├── tests ├── __init__.py ├── cache │ └── epub │ │ └── test ├── files │ └── 43172 │ │ ├── images │ │ └── image.jpg │ │ └── 43172-h │ │ ├── music │ │ └── test.mp3 │ │ └── images │ │ ├── image.jpg │ │ └── mathex.jpg ├── test_rst.py ├── test_txt.py ├── test_html.py ├── test_templates.py ├── test_job.py ├── test_setup.py └── test_htm.py ├── src └── ebookmaker │ ├── __init__.py │ ├── mydocutils │ ├── __init__.py │ ├── gutenberg │ │ ├── __init__.py │ │ ├── writers │ │ │ ├── __init__.py │ │ │ └── nroff.py │ │ ├── parsers │ │ │ ├── pg-header.rst │ │ │ └── __init__.py │ │ └── transforms │ │ │ └── __init__.py │ ├── transforms │ │ └── __init__.py │ ├── writers │ │ ├── rst2epub.css │ │ ├── rst2html.css │ │ ├── epub2.py │ │ └── rst2all.css │ ├── parsers │ │ └── default_style.rst │ └── nodes.py │ ├── Version.py │ ├── parsers │ ├── broken.png │ ├── AuxParser.py │ ├── WrapperParser.py │ ├── txt2all.css │ ├── ImageParser.py │ ├── CSSParser.py │ └── boilerplate.py │ ├── writers │ ├── cover.jpg │ ├── RSTWriter.py │ ├── PicsDirWriter.py │ ├── PDFWriter.py │ ├── HtmlTemplates.py │ ├── KindleWriter.py │ ├── TxtWriter.py │ └── __init__.py │ ├── packagers │ ├── PDFPackager.py │ ├── RSTPackager.py │ ├── HTMLPackager.py │ ├── GzipPackager.py │ ├── TxtPackager.py │ ├── PushPackager.py │ └── __init__.py │ ├── WriterFactory.py │ ├── utils.py │ ├── Unitame.py │ ├── ParserFactory.py │ ├── UnitameData.py │ ├── CommonCode.py │ ├── HTMLChunker.py │ └── Spider.py ├── pyproject.toml ├── setup.cfg ├── .travis.yml ├── Pipfile ├── scripts ├── ebookmaker ├── rhyme_compiler └── convert_unitame ├── .gitignore ├── ebookmaker.conf ├── docs ├── alt-text.md ├── images.md └── ebookmaker_v0_11.md ├── MANIFEST ├── setup.py ├── USAGE.md └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/cache/epub/test: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ebookmaker/__init__.py: -------------------------------------------------------------------------------- 1 | """ This is a package. """ 2 | -------------------------------------------------------------------------------- /src/ebookmaker/mydocutils/__init__.py: -------------------------------------------------------------------------------- 1 | """ This is a package """ 2 | -------------------------------------------------------------------------------- /src/ebookmaker/mydocutils/gutenberg/__init__.py: -------------------------------------------------------------------------------- 1 | """ This is a package. """ 2 | -------------------------------------------------------------------------------- /src/ebookmaker/mydocutils/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | """ This is a package. """ 2 | -------------------------------------------------------------------------------- /src/ebookmaker/mydocutils/gutenberg/writers/__init__.py: -------------------------------------------------------------------------------- 1 | """ This is a package. """ 2 | -------------------------------------------------------------------------------- /src/ebookmaker/Version.py: -------------------------------------------------------------------------------- 1 | VERSION = '0.13.8' 2 | GENERATOR = 'Ebookmaker %s by Project Gutenberg' 3 | -------------------------------------------------------------------------------- /src/ebookmaker/parsers/broken.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gutenbergtools/ebookmaker/HEAD/src/ebookmaker/parsers/broken.png -------------------------------------------------------------------------------- /src/ebookmaker/writers/cover.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gutenbergtools/ebookmaker/HEAD/src/ebookmaker/writers/cover.jpg -------------------------------------------------------------------------------- /tests/files/43172/images/image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gutenbergtools/ebookmaker/HEAD/tests/files/43172/images/image.jpg -------------------------------------------------------------------------------- /tests/files/43172/43172-h/music/test.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gutenbergtools/ebookmaker/HEAD/tests/files/43172/43172-h/music/test.mp3 -------------------------------------------------------------------------------- /tests/files/43172/43172-h/images/image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gutenbergtools/ebookmaker/HEAD/tests/files/43172/43172-h/images/image.jpg -------------------------------------------------------------------------------- /tests/files/43172/43172-h/images/mathex.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gutenbergtools/ebookmaker/HEAD/tests/files/43172/43172-h/images/mathex.jpg -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # As per https://github.com/pypa/setuptools/blob/main/docs/userguide/quickstart.rst 2 | [build-system] 3 | requires = ["setuptools"] 4 | build-backend = "setuptools.build_meta" -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = ebookmaker 3 | 4 | version = 0.13.8 5 | 6 | [options] 7 | package_dir= 8 | =src 9 | packages=find: 10 | 11 | [options.packages.find] 12 | where=src -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - '3.6' 5 | 6 | before_install: 7 | - sudo apt-get update 8 | 9 | install: 10 | - 'pip install pipenv' 11 | - 'pipenv install' 12 | 13 | script: python setup.py test 14 | 15 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.python.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [dev-packages] 7 | pylint = "*" 8 | 9 | [packages] 10 | e1839a8 = {path = ".",editable = true} 11 | libgutenberg = ">=0.10.31" 12 | psycopg2 = "*" 13 | docutils = ">=0.18.1" 14 | html5lib = "*" 15 | cchardet = "==2.2.0a2" 16 | ebookmaker = {file = ".", editable = true} 17 | -------------------------------------------------------------------------------- /scripts/ebookmaker: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- mode: python; indent-tabs-mode: nil; -*- coding: UTF8 -*- 3 | 4 | """ 5 | 6 | ebookmaker script 7 | 8 | Copyright 2014 by Marcello Perathoner 9 | 10 | Distributable under the GNU General Public License Version 3 or newer. 11 | 12 | This script starts epubmaker. 13 | 14 | """ 15 | 16 | from ebookmaker import EbookMaker 17 | 18 | EbookMaker.main () 19 | -------------------------------------------------------------------------------- /src/ebookmaker/packagers/PDFPackager.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- mode: python; indent-tabs-mode: nil; -*- coding: utf-8 -*- 3 | 4 | """ 5 | PDFPackager.py 6 | 7 | Copyright 2010 by Marcello Perathoner 8 | 9 | Distributable under the GNU General Public License Version 3 or newer. 10 | 11 | Package a PDF file for PG. 12 | 13 | """ 14 | 15 | from ebookmaker.packagers import OneFileZipPackager 16 | 17 | TYPE = 'ww' 18 | FORMATS = ''.split () 19 | 20 | class Packager (OneFileZipPackager): 21 | """ WW packager for PDF files. """ 22 | pass 23 | -------------------------------------------------------------------------------- /src/ebookmaker/packagers/RSTPackager.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- mode: python; indent-tabs-mode: nil; -*- coding: utf-8 -*- 3 | 4 | """ 5 | RSTPackager.py 6 | 7 | Copyright 2010 by Marcello Perathoner 8 | 9 | Distributable under the GNU General Public License Version 3 or newer. 10 | 11 | Package a RST file for PG. 12 | 13 | """ 14 | 15 | from ebookmaker.packagers import HTMLishPackager 16 | 17 | TYPE = 'ww' 18 | FORMATS = 'rst.gen'.split () 19 | 20 | class Packager (HTMLishPackager): 21 | """ Package a RST file with its images. """ 22 | pass 23 | -------------------------------------------------------------------------------- /src/ebookmaker/packagers/HTMLPackager.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- mode: python; indent-tabs-mode: nil; -*- coding: utf-8 -*- 3 | 4 | """ 5 | HTMLPackager.py 6 | 7 | Copyright 2010 by Marcello Perathoner 8 | 9 | Distributable under the GNU General Public License Version 3 or newer. 10 | 11 | Package a HTML file for PG. 12 | 13 | """ 14 | 15 | from ebookmaker.packagers import HTMLishPackager 16 | 17 | TYPE = 'ww' 18 | FORMATS = 'html.images'.split () 19 | 20 | class Packager (HTMLishPackager): 21 | """ Package a HTML file with its images. """ 22 | pass 23 | -------------------------------------------------------------------------------- /src/ebookmaker/packagers/GzipPackager.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- mode: python; indent-tabs-mode: nil; -*- coding: utf-8 -*- 3 | 4 | """ 5 | GzipPackager.py 6 | 7 | Copyright 2010 by Marcello Perathoner 8 | 9 | Distributable under the GNU General Public License Version 3 or newer. 10 | 11 | Gzip a file. 12 | 13 | """ 14 | 15 | from ebookmaker.packagers import OneFileGzipPackager 16 | 17 | TYPE = 'gzip' 18 | FORMATS = 'rst html.noimages html.images txt.us-ascii txt.iso-8859-1 txt.utf-8'.split () 19 | 20 | class Packager (OneFileGzipPackager): 21 | """ Gzip packager. """ 22 | pass 23 | -------------------------------------------------------------------------------- /src/ebookmaker/packagers/TxtPackager.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- mode: python; indent-tabs-mode: nil; -*- coding: utf-8 -*- 3 | 4 | """ 5 | TxtPackager.py 6 | 7 | Copyright 2010 by Marcello Perathoner 8 | 9 | Distributable under the GNU General Public License Version 3 or newer. 10 | 11 | Package a Txt file for PG. 12 | 13 | """ 14 | 15 | from ebookmaker.packagers import OneFileZipPackager 16 | 17 | TYPE = 'ww' 18 | FORMATS = 'txt.us-ascii txt.iso-8859-1 txt.utf-8'.split () 19 | 20 | class Packager (OneFileZipPackager): 21 | """ WW packager for plain text files. """ 22 | pass 23 | -------------------------------------------------------------------------------- /src/ebookmaker/parsers/AuxParser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- mode: python; indent-tabs-mode: nil; -*- coding: UTF8 -*- 3 | 4 | """ 5 | 6 | AuxParser.py 7 | 8 | Copyright 2009 by Marcello Perathoner 9 | 10 | Distributable under the GNU General Public License Version 3 or newer. 11 | 12 | Open an url and return raw data. 13 | 14 | """ 15 | 16 | 17 | from ebookmaker.parsers import ParserBase 18 | 19 | mediatypes = ('*/*', ) 20 | 21 | class Parser (ParserBase): 22 | """ Parse an auxiliary file. """ 23 | auxparser = True 24 | def __init__ (self, attribs = None): 25 | ParserBase.__init__ (self, attribs) 26 | self.data = None 27 | 28 | 29 | def pre_parse (self): 30 | """ Parse the file. """ 31 | self.data = self.bytes_content () 32 | 33 | 34 | def serialize (self): 35 | """ Serialize file to string. """ 36 | return self.data 37 | -------------------------------------------------------------------------------- /tests/test_rst.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import os 4 | import unittest 5 | import subprocess 6 | 7 | 8 | import ebookmaker 9 | 10 | class TestFromRst(unittest.TestCase): 11 | def setUp(self): 12 | self.sample_dir = os.path.join(os.path.dirname(__file__), 'files') 13 | 14 | def test_33968(self): 15 | book_id = '33968' 16 | dir = os.path.join(self.sample_dir, book_id) 17 | rstfile = os.path.join(dir, '%s-rst' % book_id, '%s-rst.rst' % book_id) 18 | cmd = 'ebookmaker --make=pdf --output-dir={dir} {rstfile}'.format( 19 | dir=dir, 20 | rstfile=rstfile, 21 | ) 22 | 23 | output = subprocess.check_output(cmd, shell=True) 24 | 25 | self.assertFalse(output) 26 | outs = [ 27 | "%s-cover.png", 28 | "%s-images-pdf.pdf", 29 | ] 30 | for out in outs: 31 | self.assertTrue(os.path.exists(os.path.join(dir, out % book_id))) 32 | os.remove(os.path.join(dir, out % book_id)) 33 | -------------------------------------------------------------------------------- /tests/test_txt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import os 4 | import unittest 5 | import subprocess 6 | 7 | 8 | import ebookmaker 9 | 10 | class TestFromTxt(unittest.TestCase): 11 | def setUp(self): 12 | self.sample_dir = os.path.join(os.path.dirname(__file__), 'files') 13 | self.out_dir = os.path.join(os.path.dirname(__file__), 'out') 14 | 15 | def test_69030(self): 16 | book_id = '69030' 17 | dir = os.path.join(self.sample_dir, book_id) 18 | srcfile = os.path.join(dir, '%s-0.txt' % book_id) 19 | cmd = 'ebookmaker ' 20 | cmd += f'--ebook={book_id} --make=txt --make=html --output-dir={self.out_dir} ' 21 | cmd += f'--validate {srcfile}' 22 | 23 | output = subprocess.check_output(cmd, shell=True) 24 | 25 | self.assertFalse(output) 26 | outs = [ 27 | "%s.txt", 28 | "%s-0.txt", 29 | "%s-8.txt", 30 | "%s-h.html", 31 | "%s-cover.png", 32 | ] 33 | for out in outs: 34 | self.assertTrue(os.path.exists(os.path.join(self.out_dir, out % book_id))) 35 | os.remove(os.path.join(self.out_dir, out % book_id)) 36 | -------------------------------------------------------------------------------- /src/ebookmaker/mydocutils/writers/rst2epub.css: -------------------------------------------------------------------------------- 1 | /* 2 | Project Gutenberg EPUB docutils stylesheet. 3 | 4 | This stylesheet contains styles specific to EPUB. 5 | */ 6 | 7 | /* FONTS */ 8 | 9 | /* mostly unsupported */ 10 | .small-caps { font-style: italic } 11 | .gesperrt { font-style: italic } 12 | 13 | /* ALIGN */ 14 | 15 | /* SECTIONS */ 16 | 17 | /* reduce screen real estate waste */ 18 | body { margin: 1% } 19 | 20 | /* ugly hack to give more specifity. because ADE chucks out the whole 21 | stylesheet when it sees an !important */ 22 | 23 | .first.first { margin-top: 0; text-indent: 0 } 24 | .last.last { margin-bottom: 0 } 25 | 26 | .no-page-break.no-page-break 27 | { page-break-before: avoid } 28 | 29 | /* PAGINATION */ 30 | 31 | div.clearpage { page-break-before: always; padding-top: 10% } 32 | div.cleardoublepage { page-break-before: right; padding-top: 10% } 33 | 34 | .vfill { margin-top: 10% } 35 | h2.title { margin-top: 10% } 36 | 37 | /* DIV */ 38 | 39 | a { text-decoration: none } 40 | .toc-pageref { display: none } 41 | 42 | /* DROPCAPS */ 43 | 44 | span.dropcap { line-height: 0 } 45 | img.dropcap { vertical-align: bottom } 46 | -------------------------------------------------------------------------------- /src/ebookmaker/mydocutils/gutenberg/parsers/pg-header.rst: -------------------------------------------------------------------------------- 1 | .. -*- encoding: utf-8 -*- 2 | 3 | .. |pg.copyrighted-header| replace:: 4 | 5 | This is a *copyrighted* Project Gutenberg eBook, details 6 | below. 7 | 8 | .. _pg-header: 9 | 10 | .. container:: noindent pgheader language-en pg_boilerplate 11 | 12 | This ebook is for the use of anyone anywhere in the United States 13 | and most other parts of the world at no cost and with almost no 14 | restrictions whatsoever. You may copy it, give it away or re-use it 15 | under the terms of the `Project Gutenberg License`_ included with 16 | this ebook or online at https://www.gutenberg.org/license. If you 17 | are not located in the United States, you'll have to check the laws 18 | of the country where you are located before using this ebook. 19 | 20 | |pg.copyrighted-header| 21 | 22 | .. vspace:: 2 23 | 24 | .. _pg-machine-header: 25 | 26 | .. container:: noindent white-space-pre-line 27 | 28 | |pg.machine-header| 29 | 30 | .. vspace:: 2 31 | 32 | .. _pg-start-line: 33 | 34 | \*\*\* START OF THIS PROJECT GUTENBERG EBOOK |pg.upcase-title| \*\*\* 35 | 36 | .. vspace:: 4 37 | 38 | .. _pg-produced-by: 39 | 40 | |pg.produced-by| 41 | 42 | .. vspace:: 1 43 | 44 | |pg.credits| 45 | -------------------------------------------------------------------------------- /src/ebookmaker/writers/RSTWriter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- mode: python; indent-tabs-mode: nil; -*- coding: UTF8 -*- 3 | 4 | """ 5 | RSTWriter.py 6 | 7 | Copyright 2009 by Marcello Perathoner 8 | 9 | Distributable under the GNU General Public License Version 3 or newer. 10 | 11 | Build an RST file. This is just the master RST with the PG license mixed in. 12 | 13 | """ 14 | 15 | 16 | import os 17 | 18 | from libgutenberg.Logger import debug, info, error 19 | from libgutenberg.GutenbergGlobals import SkipOutputFormat 20 | from ebookmaker import ParserFactory 21 | from ebookmaker import writers 22 | 23 | class Writer (writers.BaseWriter): 24 | """ Class to write a reStructuredText. """ 25 | 26 | def build (self, job): 27 | """ Build RST file. """ 28 | 29 | filename = os.path.join (os.path.abspath(job.outputdir), job.outputfile) 30 | 31 | debug ("Creating RST file: %s" % filename) 32 | 33 | parser = ParserFactory.ParserFactory.create (job.url) 34 | 35 | if not hasattr (parser, 'rst2nroff'): 36 | debug ('RSTWriter can only work on a RSTParser.') 37 | raise SkipOutputFormat 38 | 39 | data = parser.preprocess ('utf-8').encode ('utf-8') 40 | 41 | self.write_with_crlf (filename, data) 42 | 43 | debug ("Done RST file: %s" % filename) 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | Pipfile.lock 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *,cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # IPython Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # dotenv 80 | .env 81 | 82 | # virtualenv 83 | venv/ 84 | ENV/ 85 | 86 | # Spyder project settings 87 | .spyderproject 88 | 89 | # Rope project settings 90 | .ropeproject 91 | -------------------------------------------------------------------------------- /ebookmaker.conf: -------------------------------------------------------------------------------- 1 | # copy this file to /etc/ebookmaker.conf to set config paths 2 | # 3 | # copy this file to ~/.ebookmaker to set defaults for command line arguments 4 | # or to override config paths in /etc/ebookmaker.conf 5 | 6 | [DEFAULT_ARGS] 7 | #### this section is inactive in /etc/ebookmaker.conf #### 8 | # types: all [list of output types] 9 | # max_depth: 1 10 | # strip_links: False 11 | # include_urls: [list of urls] 12 | # exclude_urls: [list] 13 | # include_mediatypes: [list of mediatypes] 14 | # exclude_mediatypes: [list of mediatypes] 15 | # mediatype_from_extension: False 16 | # rewrite: [url]>[rewritten url] 17 | # title: None 18 | # author: None 19 | # ebook: 0 20 | # outputdir: ./ 21 | # outputfile: [title].epub 22 | # section_tags: [list of classes] 23 | # packager: None ['ww', 'gzip'] 24 | # cover: None [path] 25 | # generate_cover: False 26 | # epub_validator: java -jar epubcheck-4.2.6/epubcheck.jar 27 | # html_validator: vnu-runtime-image/bin/vnu 28 | # production: False 29 | 30 | [PATHS] 31 | # proxies: None 32 | # xelatex: xelatex 33 | # mobigen: ebook-convert # can also be a path to kindlegen 34 | # mobilang: ebook-convert # converter to use for languages not supported by Kindlegen 35 | # mobikf8: ebook-convert # converter for kf8 36 | # groff: groff 37 | # rhyming_dict: None 38 | 39 | # default is '~' 40 | # FILESDIR = file:///Users/Shared/Documents/pg/dev/html/files 41 | 42 | # default is "~/cache/epub/" 43 | # CACHEDIR = /Users/Shared/Documents/gitenberg/cache1/epub -------------------------------------------------------------------------------- /tests/test_html.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import os 4 | import unittest 5 | import subprocess 6 | 7 | 8 | import ebookmaker 9 | 10 | class TestFromHtml(unittest.TestCase): 11 | def setUp(self): 12 | self.sample_dir = os.path.join(os.path.dirname(__file__), 'files') 13 | self.out_dir = os.path.join(os.path.dirname(__file__), 'out') 14 | 15 | def test_43172(self): 16 | book_id = '43172' 17 | dir = os.path.join(self.sample_dir, book_id) 18 | htmfile = os.path.join(dir, '%s-h' % book_id, '%s-h.html' % book_id) 19 | cmd = 'ebookmaker --make=test --output-dir={dir} --generate_cover {htmfile}'.format( 20 | dir=self.out_dir, 21 | htmfile=htmfile, 22 | ) 23 | 24 | output = subprocess.check_output(cmd, shell=True) 25 | 26 | self.assertFalse(output) 27 | outs = [ 28 | "%s-epub.epub", 29 | "%s-images-epub3.epub", 30 | "%s-images-epub.epub", 31 | "%s-h.html", 32 | ] 33 | for out in outs: 34 | self.assertTrue(os.path.exists(os.path.join(self.out_dir, out % book_id))) 35 | os.remove(os.path.join(self.out_dir, out % book_id)) 36 | os.remove(os.path.join(self.out_dir, 'images/image.jpg')) 37 | os.remove(os.path.join(self.out_dir, 'images/mathex.jpg')) 38 | os.remove(os.path.join(self.out_dir, 'music/test.mp3')) 39 | os.rmdir(os.path.join(self.out_dir, 'images')) 40 | os.rmdir(os.path.join(self.out_dir, 'music')) 41 | -------------------------------------------------------------------------------- /tests/test_templates.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | ''' 4 | run this with 5 | python -m unittest -v tests.test_templates 6 | ''' 7 | 8 | import os 9 | import unittest 10 | 11 | from libgutenberg.DublinCore import GutenbergDublinCore 12 | 13 | from ebookmaker.writers import HtmlTemplates, TemplateStrings 14 | 15 | 16 | class TestHeaders(unittest.TestCase): 17 | 18 | def setUp(self): 19 | self.dc = GutenbergDublinCore() 20 | book_id = '69030' 21 | self.sample_dir = os.path.join(os.path.dirname(__file__), 'files') 22 | dir = os.path.join(self.sample_dir, book_id) 23 | srcfile = os.path.join(dir, '%s-0.txt' % book_id) 24 | with open(srcfile, 'r') as f: 25 | sampledata = f.read() 26 | self.dc.load_from_pgheader(sampledata) 27 | 28 | def test_templates(self): 29 | self.assertTrue('in the United States' in TemplateStrings.headera) 30 | self.assertTrue('FULL PROJECT GUTENBERG LICENSE' in TemplateStrings.headerb) 31 | self.assertTrue('COPYRIGHTED' in TemplateStrings.headera_copy) 32 | self.assertTrue('This particular' in TemplateStrings.headerb_copy) 33 | self.assertTrue('
` and the warning message will be suppressed. Because of a bug in the W3C HTML validator, you can also use `data-role="presentation"` so that the validator won't complain - ebookmaker will use this to produce valid html5 and epub files.
6 |
7 | 2. when the image is well described by associated text. Often an image from a book will appear above a descriptive caption. For this reason, Ebookmaker will not emit a warning message if it appears inside a `
` But when relying on a caption text, make sure it is describing what a sighted reader sees. Some captions comment on the image without describing it.
8 |
9 |
10 | Accessibiity Tutorial:
11 | https://www.w3.org/WAI/tutorials/images/
12 |
13 | Using `aria-labelledby`:
14 | https://www.w3.org/WAI/WCAG21/Techniques/aria/ARIA16
15 |
16 | Other helpful guides:
17 | https://publishers.asn.au/BooksWithoutBarriers
18 | https://axesslab.com/alt-texts/
19 | https://accessibility.huit.harvard.edu/describe-content-images
20 |
21 | w3c validator bug: https://github.com/validator/validator/issues/1599
--------------------------------------------------------------------------------
/tests/test_job.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | '''
4 | run this with
5 | python -m unittest -v tests.test_job
6 | '''
7 | import datetime
8 | import os
9 | import subprocess
10 | import sys
11 | import unittest
12 |
13 | from libgutenberg import Logger
14 | from libgutenberg.Logger import debug
15 | from libgutenberg.DublinCore import PGDCObject
16 |
17 | import ebookmaker
18 | from ebookmaker import CommonCode, ParserFactory
19 | from ebookmaker.CommonCode import Options
20 | from ebookmaker.EbookMaker import config, get_dc
21 | from ebookmaker.parsers import webify_url
22 |
23 | options = Options()
24 | Logger.set_log_level(10) # DEBUG
25 |
26 | class TestJob(unittest.TestCase):
27 |
28 | def setUp(self):
29 | config()
30 | ParserFactory.load_parsers()
31 | self.sample_dir = os.path.join(os.path.dirname(__file__), 'files')
32 | self.out_dir = os.path.join(os.path.dirname(__file__), 'out')
33 | self.testfile = os.path.join(self.sample_dir, '43172/43172-h/43172-h.htm')
34 | subprocess.run(["touch", self.testfile])
35 | self.testdbfile = "file://" + self.testfile
36 | options.config.CACHEDIR = os.path.join(os.path.dirname(__file__), 'cache/epub')
37 | options.config.FILESDIR = webify_url(os.path.join(os.path.dirname(__file__), 'files/'))
38 |
39 | def test_update(self):
40 | job = CommonCode.Job('html.images')
41 | job.ebook = 43172
42 | job.url = self.testfile
43 | job.dc = get_dc(job)
44 | job.last_updated()
45 | self.assertEqual(job.dc.update_date, datetime.date.today())
46 |
47 | def test_update_db(self):
48 | job = CommonCode.Job('html.images')
49 | job.ebook = 43172
50 | options.is_job_queue = True
51 | job.url = self.testdbfile
52 | job.dc = get_dc(job)
53 | self.assertTrue(len(job.dc.files) > 0)
54 | job.last_updated()
55 | self.assertEqual(job.dc.update_date, datetime.date(2013,7,9))
56 |
57 |
58 |
59 |
--------------------------------------------------------------------------------
/src/ebookmaker/mydocutils/gutenberg/parsers/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- mode: python; indent-tabs-mode: nil; -*- coding: utf-8 -*-
3 |
4 | """
5 |
6 | Module parsers
7 |
8 | Copyright 2010-2012 by Marcello Perathoner
9 |
10 | Distributable under the GNU General Public License Version 3 or newer.
11 |
12 | Customized Project Gutenberg directives for RST parser.
13 |
14 | """
15 |
16 | from docutils import statemachine
17 | from docutils.parsers.rst import Directive, directives
18 |
19 | from ebookmaker.mydocutils import parsers
20 |
21 | from ebookmaker.mydocutils.gutenberg import transforms as gutenberg_transforms
22 |
23 | from libgutenberg.Logger import error, warning, info, debug
24 |
25 | # pylint: disable=W0142, W0102
26 |
27 |
28 | class PGHeaderFooter (Directive):
29 | """ Inserts PG header or footer. """
30 |
31 | required_arguments = 0
32 | optional_arguments = 0
33 |
34 | def run (self):
35 | settings = self.state.document.settings
36 | include_lines = statemachine.string2lines (
37 | settings.get_resource ('mydocutils.gutenberg.parsers', self.resource),
38 | settings.tab_width,
39 | convert_whitespace = 1)
40 | self.state_machine.insert_input (include_lines, '')
41 | return []
42 |
43 |
44 | class PGHeader (PGHeaderFooter):
45 | """ Inserts PG header. """
46 | resource = 'pg-header.rst'
47 |
48 |
49 | class PGFooter (PGHeaderFooter):
50 | """ Inserts PG footer. """
51 | resource = 'pg-footer.rst'
52 |
53 |
54 | class Parser (parsers.Parser):
55 | """ Parser with PG custom directives. """
56 |
57 | def __init__ (self):
58 | parsers.Parser.__init__ (self)
59 |
60 | directives.register_directive ('pgheader', PGHeader)
61 | directives.register_directive ('pgfooter', PGFooter)
62 |
63 |
64 | def get_transforms (self):
65 | return parsers.Parser.get_transforms (self) + [
66 | gutenberg_transforms.VariablesTransform,
67 | gutenberg_transforms.SubRefToVarTransform]
68 |
--------------------------------------------------------------------------------
/src/ebookmaker/WriterFactory.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- mode: python; indent-tabs-mode: nil; -*- coding: UTF8 -*-
3 |
4 | """
5 |
6 | WriterFactory.py
7 |
8 | Copyright 2009-14 by Marcello Perathoner
9 |
10 | Distributable under the GNU General Public License Version 3 or newer.
11 |
12 | Writer factory. Dynamically loads writers from directories.
13 |
14 | """
15 |
16 |
17 | import os.path
18 |
19 | from pkg_resources import resource_listdir # pylint: disable=E0611
20 |
21 | from libgutenberg.Logger import error, debug
22 | from ebookmaker.CommonCode import Options
23 |
24 | options = Options()
25 |
26 | writers = {}
27 |
28 | def __load_writers_from (package_name):
29 | """ See what types we can write. """
30 |
31 | for fn in resource_listdir (package_name, ''):
32 | modulename, ext = os.path.splitext (fn)
33 | if ext == '.py' and modulename.endswith ('Writer'):
34 | type_ = modulename.lower ().replace ('writer', '')
35 | try:
36 | debug ("Loading writer type %s from module %s" % (type_, modulename))
37 | module = __import__ (package_name + '.' + modulename, fromlist = [modulename])
38 | writers[type_] = module
39 | except ImportError as what:
40 | error (
41 | "Could not load writer type %s from module %s. %s" %
42 | (type_, modulename, what)
43 | )
44 |
45 |
46 | def load_writers ():
47 | """ See what types we can write. """
48 |
49 | __load_writers_from ('ebookmaker.writers')
50 |
51 | for package in options.extension_packages:
52 | __load_writers_from (package)
53 |
54 | return writers.keys ()
55 |
56 |
57 | def unload_writers ():
58 | """ Unload writer modules. """
59 | for k in writers.keys ():
60 | del writers[k]
61 |
62 |
63 | def create (type_):
64 | """ Load writer module for type. """
65 | try:
66 | if type_ == 'kf8':
67 | type_ = 'kindle'
68 | return writers[type_].Writer ()
69 | except KeyError:
70 | raise KeyError ('No writer for type %s' % type_)
71 |
--------------------------------------------------------------------------------
/src/ebookmaker/mydocutils/nodes.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- mode: python; indent-tabs-mode: nil; -*- coding: utf-8 -*-
3 |
4 | """
5 |
6 | nodes.py
7 |
8 | Copyright 2011 by Marcello Perathoner
9 |
10 | Distributable under the GNU General Public License Version 3 or newer.
11 |
12 | Added nodes for PG.
13 |
14 | """
15 |
16 | from docutils import nodes
17 |
18 | class page (nodes.Element, nodes.Special):
19 | """ Hold pagination commands.
20 |
21 | Like clearpage, vspace etc.
22 |
23 | """
24 |
25 | class newline (nodes.Element):
26 | """ A line break.
27 |
28 | Outputs a hard line break if the node or one of its parents belong
29 | to the class 'white-space-pre-line'. Else a space.
30 |
31 | """
32 |
33 | class footnote_group (nodes.container):
34 | """ Hold a group of footnotes. """
35 |
36 |
37 | class variable (nodes.Inline, nodes.TextElement):
38 | """ A placeholder that gets substituted with actual text before output.
39 |
40 | We do not use substitution refs because they are resolved way too
41 | early in the transformation stage to be of much use to us.
42 |
43 | """
44 |
45 |
46 | class node_selector (object):
47 | """ Allows CSS-like selectors as condition function for nodes.traverse (). """
48 |
49 | def __init__ (self, selector):
50 |
51 | # allow selectors like [element][.class[.class[...]]][, selector[, selector]]
52 |
53 | self.matches = [] # list of 2-tuples
54 |
55 | for sel in selector.split (','):
56 | sel = sel.strip ()
57 | if '.' not in sel:
58 | sel += '.'
59 | element, classes = sel.split ('.', 1)
60 | classes = set (classes.split ('.')) if classes else set ()
61 | self.matches.append ( (getattr (nodes, element, nodes.Element), classes) )
62 |
63 |
64 | def __call__ (self, node):
65 | """ returns True if the node matches the selector. """
66 |
67 | for match in self.matches:
68 | if isinstance (node, match[0]) and match[1].issubset (node['classes']):
69 | return True
70 |
71 | return False
72 |
--------------------------------------------------------------------------------
/MANIFEST:
--------------------------------------------------------------------------------
1 | # file GENERATED by distutils, do NOT edit
2 | CHANGES
3 | README
4 | setup.cfg
5 | setup.py
6 | ebookmaker/CommonCode.py
7 | ebookmaker/EbookMaker.py
8 | ebookmaker/HTMLChunker.py
9 | ebookmaker/ParserFactory.py
10 | ebookmaker/Spider.py
11 | ebookmaker/Unitame.py
12 | ebookmaker/UnitameData.py
13 | ebookmaker/Version.py
14 | ebookmaker/WriterFactory.py
15 | ebookmaker/__init__.py
16 | ebookmaker/mydocutils/__init__.py
17 | ebookmaker/mydocutils/nodes.py
18 | ebookmaker/mydocutils/gutenberg/__init__.py
19 | ebookmaker/mydocutils/gutenberg/parsers/__init__.py
20 | ebookmaker/mydocutils/gutenberg/parsers/pg-footer.rst
21 | ebookmaker/mydocutils/gutenberg/parsers/pg-header.rst
22 | ebookmaker/mydocutils/gutenberg/transforms/__init__.py
23 | ebookmaker/mydocutils/gutenberg/writers/__init__.py
24 | ebookmaker/mydocutils/gutenberg/writers/nroff.py
25 | ebookmaker/mydocutils/parsers/__init__.py
26 | ebookmaker/mydocutils/parsers/default_style.rst
27 | ebookmaker/mydocutils/transforms/__init__.py
28 | ebookmaker/mydocutils/transforms/parts.py
29 | ebookmaker/mydocutils/writers/__init__.py
30 | ebookmaker/mydocutils/writers/epub2.py
31 | ebookmaker/mydocutils/writers/nroff.py
32 | ebookmaker/mydocutils/writers/rst2all.css
33 | ebookmaker/mydocutils/writers/rst2epub.css
34 | ebookmaker/mydocutils/writers/rst2html.css
35 | ebookmaker/mydocutils/writers/xetex.py
36 | ebookmaker/mydocutils/writers/xhtml1.py
37 | ebookmaker/packagers/GzipPackager.py
38 | ebookmaker/packagers/HTMLPackager.py
39 | ebookmaker/packagers/PDFPackager.py
40 | ebookmaker/packagers/PushPackager.py
41 | ebookmaker/packagers/RSTPackager.py
42 | ebookmaker/packagers/TxtPackager.py
43 | ebookmaker/packagers/__init__.py
44 | ebookmaker/parsers/AuxParser.py
45 | ebookmaker/parsers/CSSParser.py
46 | ebookmaker/parsers/GutenbergTextParser.py
47 | ebookmaker/parsers/HTMLParser.py
48 | ebookmaker/parsers/ImageParser.py
49 | ebookmaker/parsers/RSTParser.py
50 | ebookmaker/parsers/__init__.py
51 | ebookmaker/parsers/broken.png
52 | ebookmaker/writers/EpubWriter.py
53 | ebookmaker/writers/HTMLWriter.py
54 | ebookmaker/writers/KindleWriter.py
55 | ebookmaker/writers/PDFWriter.py
56 | ebookmaker/writers/PicsDirWriter.py
57 | ebookmaker/writers/RSTWriter.py
58 | ebookmaker/writers/TxtWriter.py
59 | ebookmaker/writers/__init__.py
60 | ebookmaker/writers/cover.jpg
61 | scripts/convert_unitame
62 | scripts/ebookmaker
63 | scripts/rhyme_compiler
64 |
--------------------------------------------------------------------------------
/src/ebookmaker/writers/PicsDirWriter.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- mode: python; indent-tabs-mode: nil; -*- coding: UTF8 -*-
3 |
4 | """
5 |
6 | PicsDirWriter.py
7 |
8 | Copyright 2012 by Marcello Perathoner
9 |
10 | Distributable under the GNU General Public License Version 3 or newer.
11 |
12 | Copies pics into local directory. Needed for HTML and Xetex.
13 |
14 | """
15 |
16 |
17 | import os.path
18 |
19 | import libgutenberg.GutenbergGlobals as gg
20 | from libgutenberg.Logger import info, debug, error
21 |
22 | from ebookmaker.parsers import webify_url
23 | from ebookmaker import writers
24 |
25 |
26 | class Writer(writers.BaseWriter):
27 | """ Writes Pics directory. """
28 |
29 | def copy_aux_files(self, job, dest_dir):
30 | """ Copy image files to dest_dir. Use image data cached in parsers. """
31 |
32 | for p in job.spider.parsers:
33 | if hasattr(p, 'resize_image') or hasattr(p, 'auxparser'):
34 | src_uri = p.attribs.url
35 | if src_uri.startswith(webify_url(dest_dir)):
36 | debug('Not copying %s to %s: already there' % (src_uri, dest_dir))
37 | continue
38 |
39 | fn_dest = gg.make_url_relative(webify_url(job.base_url), src_uri)
40 | fn_dest = os.path.join(dest_dir, fn_dest)
41 |
42 | # debug('base_url = %s, src_uri = %s' % (job.base_url, src_uri))
43 |
44 | if gg.is_same_path(src_uri, fn_dest):
45 | debug('Not copying %s to %s: same file' % (src_uri, fn_dest))
46 | continue
47 |
48 | fn_dest = gg.normalize_path(fn_dest)
49 | debug('Copying %s to %s' % (src_uri, fn_dest))
50 | gg.mkdir_for_filename(fn_dest)
51 | try:
52 | with open(fn_dest, 'wb') as fp_dest:
53 | fp_dest.write(p.serialize())
54 | except IOError as what:
55 | error('Cannot copy %s to %s: %s' % (src_uri, fn_dest, what))
56 |
57 |
58 |
59 | def build(self, job):
60 | """ Build Pics file. """
61 |
62 | dest_dir = os.path.abspath(job.outputdir)
63 |
64 | debug("Creating Pics directory in: %s" % dest_dir)
65 |
66 | self.copy_aux_files(job, dest_dir)
67 |
68 | debug("Done Pics directory in: %s" % dest_dir)
69 |
--------------------------------------------------------------------------------
/scripts/convert_unitame:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- mode: python; indent-tabs-mode: nil; -*- coding: UTF8 -*-
3 |
4 | """
5 |
6 | convert_unitame.py
7 |
8 | Copyright 2010,2014 by Marcello Perathoner
9 |
10 | Distributable under the GNU General Public License Version 3 or newer.
11 |
12 | Converts unitame.dat into UnitameData module.
13 |
14 | """
15 |
16 | import codecs
17 | import unicodedata as ud
18 |
19 | # from addhd
20 |
21 | i2a = (
22 | "Euro","",",","f","\"","...","","","^","%","S","<","OE","","Z","",
23 | "","'","'","\"","\"","","-","--","~","(TM)","s",">","oe","","z","Y",
24 | " ","i","c","L","","Y","|","Sec.","\"","(C)","","\"","","-","(R)","-",
25 | " deg.","+-"," squared"," cubed","'"," mu","",".","","","","\"","1/4","1/2","3/4","?",
26 | "A","A","A","A","Ae","A","AE","C","E","E","E","E","I","I","I","I",
27 | "Eth","N","O","O","O","O","Oe","x","O","U","U","U","Ue","Y","","ss",
28 | "a","a","a","a","ae","a","ae","c","e","e","e","e","i","i","i","i",
29 | "eth","n","o","o","o","o","oe","/","o","u","u","u","ue","y","","y"
30 | )
31 |
32 |
33 | def strip_accents (s):
34 | """ Strip accents from string. """
35 | return ud.normalize ('NFKC',
36 | filter (lambda c: ud.category (c) != 'Mn',
37 | ud.normalize ('NFKD', s)))
38 |
39 | fp = codecs.open ('unitame.dat', 'rU', 'iso-8859-1')
40 |
41 | print '''#!/usr/bin/env python
42 | # -*- mode: python; indent-tabs-mode: nil; -*- coding: utf-8 -*-
43 |
44 | """ Converted from unitame.dat """
45 |
46 | from __future__ import unicode_literals
47 |
48 | unicode_to_iso_8859_1 = {'''
49 |
50 | for line in fp.readlines ():
51 | line = line.strip ()
52 | c, dummy, sub = line.split (';', 2)
53 | c = "%c" % int (c, 16)
54 | if sub and c != sub and strip_accents (c) != sub:
55 | comment = ud.name (c)
56 | if sub == "'":
57 | sub = r"\'"
58 | print (" '%s': '%s', # %s" % (c, sub, comment)).encode ('utf-8')
59 |
60 | print "}\n\n"
61 |
62 | print "iso_8859_1_to_ascii = {"
63 |
64 | for n, sub in enumerate (i2a):
65 | n = n + 0x80
66 | if n > 0xa0:
67 | c = unichr (n)
68 | if sub and strip_accents (c) != sub:
69 | comment = ud.name (c)
70 | if sub == "'":
71 | sub = r"\'"
72 | print (" '%s': '%s', # %s" % (c, sub, comment)).encode ('utf-8')
73 |
74 | print "}\n\n"
75 |
--------------------------------------------------------------------------------
/tests/test_setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | '''
4 | run this with
5 | python -m unittest -v ebookmaker.tests.test_setup
6 | '''
7 | import os
8 | import unittest
9 | import subprocess
10 |
11 | from libgutenberg import Logger
12 | from libgutenberg.Logger import debug
13 |
14 | import ebookmaker
15 | from ebookmaker import CommonCode
16 | from ebookmaker import ParserFactory
17 | from ebookmaker import WriterFactory
18 | from ebookmaker.CommonCode import Options, path_from_file
19 | from ebookmaker.EbookMaker import config
20 | from ebookmaker.EbookMaker import DEPENDENCIES, BUILD_ORDER
21 | from ebookmaker.packagers import PackagerFactory
22 | from ebookmaker.parsers import BROKEN, webify_url
23 |
24 | options = Options()
25 |
26 | class TestLoad(unittest.TestCase):
27 |
28 | def setUp(self):
29 | config()
30 | Logger.set_log_level(options.verbose)
31 | options.types = options.types or ['all']
32 | options.types = CommonCode.add_dependencies(options.types, DEPENDENCIES, BUILD_ORDER)
33 | options.config.CACHEDIR = os.path.join(os.path.dirname(__file__), 'cache/epub')
34 | options.config.FILESDIR = webify_url(os.path.join(os.path.dirname(__file__), 'files/'))
35 | debug("Building types: %s" % ' '.join(options.types))
36 |
37 | def test_parsers(self):
38 | ParserFactory.load_parsers()
39 | pf = ParserFactory.ParserFactory()
40 |
41 | # check parser created from resource
42 | broken_parser = pf.create(BROKEN)
43 | self.assertTrue(hasattr(broken_parser, 'resize_image'))
44 | broken_parser.pre_parse()
45 | self.assertTrue(len(broken_parser.image_data) > 0)
46 | self.assertTrue(broken_parser.get_image_dimen()[0] > 0)
47 |
48 | # check conversion to jpeg
49 | broken_parser.resize_image(16 * 1024, (66, 100), output_format='jpeg')
50 |
51 | def test_writers(self):
52 | WriterFactory.load_writers()
53 |
54 | def test_packagers(self):
55 | PackagerFactory.load_packagers()
56 |
57 | def test_dirs(self):
58 | print(path_from_file('cache/epub/1234/test'))
59 | self.assertTrue(path_from_file('cache/epub/1234/test').endswith(
60 | 'cache/epub/1234/test'))
61 | print(path_from_file('1/2/3/1234/test'))
62 | self.assertTrue(path_from_file('1/2/3/1234/test').endswith('files/1234/test'))
63 |
--------------------------------------------------------------------------------
/src/ebookmaker/mydocutils/writers/rst2html.css:
--------------------------------------------------------------------------------
1 | /*
2 | Project Gutenberg HTML docutils stylesheet.
3 |
4 | This stylesheet contains styles specific to HTML.
5 | */
6 |
7 | /* FONTS */
8 |
9 | /* em { font-style: normal }
10 | strong { font-weight: normal } */
11 |
12 | .small-caps { font-variant: small-caps }
13 | .gesperrt { letter-spacing: 0.1em }
14 |
15 | /* ALIGN */
16 |
17 | .align-left { clear: left;
18 | float: left;
19 | margin-right: 1em }
20 |
21 | .align-right { clear: right;
22 | float: right;
23 | margin-left: 1em }
24 |
25 | .align-center { margin-left: auto;
26 | margin-right: auto }
27 |
28 | div.shrinkwrap { display: table; }
29 |
30 | /* SECTIONS */
31 |
32 | body { margin: 5% 10% 5% 10% }
33 |
34 | /* compact list items containing just one p */
35 | li p.pfirst { margin-top: 0; margin-bottom: 0 }
36 |
37 | .first { margin-top: 0 !important;
38 | text-indent: 0 !important }
39 | .last { margin-bottom: 0 !important }
40 |
41 | span.dropcap { float: left; margin: 0 0.1em 0 0; line-height: 1 }
42 | img.dropcap { float: left; margin: 0 0.5em 0 0; max-width: 25% }
43 | span.dropspan { font-variant: small-caps }
44 |
45 | .no-page-break { page-break-before: avoid !important }
46 |
47 | /* PAGINATION */
48 |
49 | .pageno { position: absolute; right: 95%; font: medium sans-serif; text-indent: 0 }
50 | .pageno:after { color: gray; content: '[' attr(title) ']' }
51 | .lineno { position: absolute; left: 95%; font: medium sans-serif; text-indent: 0 }
52 | .lineno:after { color: gray; content: '[' attr(title) ']' }
53 | .toc-pageref { float: right }
54 |
55 | @media screen {
56 | .coverpage, .frontispiece, .titlepage, .verso, .dedication, .plainpage
57 | { margin: 10% 0; }
58 |
59 | div.clearpage, div.cleardoublepage
60 | { margin: 10% 0; border: none; border-top: 1px solid gray; }
61 |
62 | .vfill { margin: 5% 10% }
63 | }
64 |
65 | @media print {
66 | div.clearpage { page-break-before: always; padding-top: 10% }
67 | div.cleardoublepage { page-break-before: right; padding-top: 10% }
68 |
69 | .vfill { margin-top: 20% }
70 | h2.title { margin-top: 20% }
71 | }
72 |
73 | /* DIV */
74 | pre { font-family: monospace; font-size: 0.9em; white-space: pre-wrap }
75 |
--------------------------------------------------------------------------------
/src/ebookmaker/packagers/PushPackager.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- mode: python; indent-tabs-mode: nil; -*- coding: utf-8 -*-
3 |
4 | """
5 | PushPackager.py
6 |
7 | Copyright 2011 by Marcello Perathoner
8 |
9 | Distributable under the GNU General Public License Version 3 or newer.
10 |
11 | Package a zip containing everything, that can be pushed to ibiblio.
12 |
13 | """
14 |
15 |
16 | import os.path
17 | import re
18 |
19 | from libgutenberg.Logger import info, error
20 | import libgutenberg.GutenbergGlobals as gg
21 |
22 | from ebookmaker.CommonCode import Options
23 | from ebookmaker.packagers import ZipPackager
24 |
25 | options = Options()
26 | TYPE = 'ww'
27 | FORMATS = ['push']
28 |
29 | class Packager (ZipPackager):
30 | """ Package one big zip for push.
31 |
32 | Zip contains one directory named after ebook_no.
33 | This dir mirrors structure on ibiblio::
34 |
35 | 12345/12345.txt
36 | 12345/12345.zip
37 | 12345/12345-h/12345-h.html
38 | 12345/12345-h/images/cover.jpg
39 | 12345/12345-h.zip
40 |
41 | """
42 |
43 | def package (self, job):
44 | self.setup (job)
45 | zipfilename = job.outputfile # filename is zipfile
46 |
47 | m = re.match (r'\d+', zipfilename)
48 | if m:
49 | ebook_no = m.group (0)
50 | else:
51 | error ('Invalid filename %s for push packager.' % zipfilename)
52 | return
53 |
54 | zip_ = self.create (zipfilename)
55 |
56 | for suffix in '.txt -8.txt -0.txt .zip -8.zip -0.zip -rst.zip -h.zip'.split ():
57 | filename = '%s%s' % (ebook_no, suffix)
58 | memberfilename = '%s/%s' % (ebook_no, filename)
59 | self.add (zip_, filename, memberfilename)
60 |
61 | for suffix, ext in (('-h', 'html'), ('-rst', 'rst')):
62 | filename = '%s%s.%s' % (ebook_no, suffix, ext)
63 | memberfilename = '%s/%s%s/%s' % (ebook_no, ebook_no, suffix, filename)
64 | self.add (zip_, filename, memberfilename)
65 |
66 | # image files
67 | for url in options.html_images_list:
68 | rel_url = gg.make_url_relative (job.base_url, url)
69 | filename = os.path.join (self.path, rel_url)
70 | memberfilename = '%s/%s%s/%s' % (ebook_no, ebook_no, suffix, rel_url)
71 | self.add (zip_, filename, memberfilename)
72 |
73 | zip_.close ()
74 | info ('Done Zip file: %s' % zipfilename)
75 |
--------------------------------------------------------------------------------
/tests/test_htm.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | import os
4 | import unittest
5 | import subprocess
6 |
7 |
8 | import ebookmaker
9 |
10 | class TestFromHtm(unittest.TestCase):
11 | def setUp(self):
12 | self.sample_dir = os.path.join(os.path.dirname(__file__), 'files')
13 | self.out_dir = os.path.join(os.path.dirname(__file__), 'out')
14 |
15 | def test_43172(self):
16 | book_id = '43172'
17 | dir = os.path.join(self.sample_dir, book_id)
18 | htmfile = os.path.join(dir, '%s-h' % book_id, '%s-h.htm' % book_id)
19 | cmd = f'ebookmaker -v --ebook=43172 --make=test --output-dir={self.out_dir} '
20 | cmd += f'--validate {htmfile}'
21 |
22 | output = subprocess.check_output(cmd, shell=True)
23 |
24 | self.assertFalse(output)
25 | outs = [
26 | "%s-epub.epub",
27 | "%s-images-epub3.epub",
28 | "%s-images-epub.epub",
29 | "%s-h.html",
30 | ]
31 | for out in outs:
32 | self.assertTrue(os.path.exists(os.path.join(self.out_dir, out % book_id)))
33 | os.remove(os.path.join(self.out_dir, out % book_id))
34 | os.remove(os.path.join(self.out_dir, 'images/image.jpg'))
35 | os.remove(os.path.join(self.out_dir, 'images/mathex.jpg'))
36 | os.remove(os.path.join(self.out_dir, 'music/test.mp3'))
37 | os.rmdir(os.path.join(self.out_dir, 'images'))
38 | os.rmdir(os.path.join(self.out_dir, 'music'))
39 |
40 | def test_43172_nocover(self):
41 | book_id = '43172'
42 | dir = os.path.join(self.sample_dir, book_id)
43 | htmfile = os.path.join(dir, '%s-h' % book_id, '%s-nocover.htm' % book_id)
44 | cmd = 'ebookmaker --make=test --output-dir={dir} --generate_cover {htmfile}'.format(
45 | dir=self.out_dir,
46 | htmfile=htmfile,
47 | )
48 |
49 | output = subprocess.check_output(cmd, shell=True)
50 |
51 | self.assertFalse(output)
52 | outs = [
53 | "%s-epub.epub",
54 | "%s-images-epub3.epub",
55 | "%s-images-epub.epub",
56 | "%s-h.html",
57 | "%s-cover.png",
58 | ]
59 | for out in outs:
60 | self.assertTrue(os.path.exists(os.path.join(self.out_dir, out % book_id)))
61 | os.remove(os.path.join(self.out_dir, out % book_id))
62 | os.remove(os.path.join(self.out_dir, 'images/image.jpg'))
63 | os.rmdir(os.path.join(self.out_dir, 'images'))
64 |
--------------------------------------------------------------------------------
/src/ebookmaker/parsers/WrapperParser.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- mode: python; indent-tabs-mode: nil; -*- coding: UTF8 -*-
3 |
4 | """
5 |
6 | WrapperParser.py
7 |
8 | Copyright 2020 by Eric Hellman
9 |
10 | Distributable under the GNU General Public License Version 3 or newer.
11 |
12 | """
13 | from xml.sax.saxutils import escape, quoteattr
14 |
15 | import lxml
16 |
17 | from copy import copy
18 | from libgutenberg.Logger import info
19 | from libgutenberg import GutenbergGlobals as gg
20 | from ebookmaker.parsers import HTMLParserBase, IMAGE_WRAPPER
21 |
22 | mediatypes = ()
23 |
24 | class Parser(HTMLParserBase):
25 |
26 | def __init__(self, attribs):
27 | HTMLParserBase.__init__(self, copy(attribs))
28 | self.attribs.orig_mediatype = self.attribs.mediatype
29 | self.src = attribs.url
30 | self.attribs.url = self.wrapper_url(attribs.url)
31 | self.attribs.orig_url = self.attribs.url
32 | self.attribs.nonlinear = True
33 | if not self.attribs.title:
34 | self.attribs.title = 'linked image'
35 | self.xhtml = lxml.etree.fromstring(
36 | self.unicode_content(),
37 | lxml.html.XHTMLParser(),
38 | base_url=self.attribs.url
39 | )
40 | self.fp = True # so writers won't skip it
41 |
42 | # mark the image for treatment as a linked image
43 | attribs.rel.add('linked_image')
44 | # set the referrer for the image to this wrapper
45 | attribs.referrer = self.attribs.url
46 |
47 |
48 | def unicode_content(self):
49 | """ wrapper page content """
50 | frag = ('#%s' % self.attribs.id) if self.attribs.id else ''
51 | backlink = '