├── .gitignore ├── LICENSE ├── README.md ├── examples └── converter.py ├── requirements.txt └── sample └── html_to_pdf_converter.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | db.sqlite3-journal 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # IPython 80 | profile_default/ 81 | ipython_config.py 82 | 83 | # pyenv 84 | .python-version 85 | 86 | # pipenv 87 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 88 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 89 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 90 | # install all needed dependencies. 91 | #Pipfile.lock 92 | 93 | # celery beat schedule file 94 | celerybeat-schedule 95 | 96 | # SageMath parsed files 97 | *.sage.py 98 | 99 | # Environments 100 | .env 101 | .venv 102 | env/ 103 | venv/ 104 | ENV/ 105 | env.bak/ 106 | venv.bak/ 107 | 108 | # Spyder project settings 109 | .spyderproject 110 | .spyproject 111 | 112 | # Rope project settings 113 | .ropeproject 114 | 115 | # mkdocs documentation 116 | /site 117 | 118 | # mypy 119 | .mypy_cache/ 120 | .dmypy.json 121 | dmypy.json 122 | 123 | # Pyre type checker 124 | .pyre/ 125 | 126 | *.pdf 127 | *.html 128 | chromedriver 129 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Maksim 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # python-selenium-chrome-html-to-pdf-converter 2 | Simple python wrapper to convert HTML to PDF with headless Chrome via selenium. 3 | 4 | ## Installation 5 | Clone repository, move to project root dir, install virtualenv, install dependencies: 6 | ``` 7 | git clone https://github.com/maxvst/python-selenium-chrome-html-to-pdf-converter.git 8 | cd python-selenium-chrome-html-to-pdf-converter 9 | python3 -m venv venv 10 | source venv/bin/activate 11 | pip install -r requirements.txt 12 | ``` 13 | Install chrome (chromium) browser. 14 | 15 | Download chromedriver from http://chromedriver.chromium.org/ and put it to project root directory. 16 | 17 | ## Demo 18 | ``` 19 | cd examples 20 | python converter.py https://google.com google.pdf 21 | ``` 22 | 23 | ## Why use selenium? 24 | TODO: Add description 25 | 26 | ## CSS recomendations 27 | 28 | Basic configuration for single page: 29 | ``` 30 | @page { 31 | size: A4; 32 | margin: 0mm; 33 | } 34 | ``` 35 | 36 | For printing double-sided documents use 37 | ``` 38 | @page :left { 39 | margin-left: 4cm; 40 | margin-right: 2cm; 41 | } 42 | 43 | @page :right { 44 | margin-left: 4cm; 45 | margin-right: 2cm; 46 | } 47 | 48 | @page :first { 49 | margin-top: 10cm /* Top margin on first page 10cm */ 50 | } 51 | ``` 52 | 53 | Control pagination with page-break-before, page-break-after, page-break-inside like 54 | ``` 55 | h1 { page-break-before : right } 56 | h2 { page-break-after : avoid } 57 | table { page-break-inside : avoid } 58 | ``` 59 | Control widows and оrphans like 60 | ``` 61 | @page { 62 | orphans:4; 63 | widows:2; 64 | } 65 | ``` 66 | More descriptions see at https://www.tutorialspoint.com/css/css_paged_media.htm 67 | -------------------------------------------------------------------------------- /examples/converter.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | 4 | from sample.html_to_pdf_converter import get_pdf_from_html 5 | 6 | if len(sys.argv) != 3: 7 | print ("usage: converter.py ") 8 | exit() 9 | 10 | result = get_pdf_from_html(sys.argv[1], chromedriver='../chromedriver') 11 | with open(sys.argv[2], 'wb') as file: 12 | file.write(result) 13 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | selenium==3.141.0 2 | 3 | -------------------------------------------------------------------------------- /sample/html_to_pdf_converter.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from selenium import webdriver 3 | from selenium.webdriver.chrome.options import Options 4 | import json, base64 5 | 6 | def send_devtools(driver, cmd, params={}): 7 | resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id 8 | url = driver.command_executor._url + resource 9 | body = json.dumps({'cmd': cmd, 'params': params}) 10 | response = driver.command_executor._request('POST', url, body) 11 | if response.get('status'): 12 | raise Exception(response.get('value')) 13 | return response.get('value') 14 | 15 | def get_pdf_from_html(path, chromedriver='./chromedriver', print_options = {}): 16 | webdriver_options = Options() 17 | webdriver_options.add_argument('--headless') 18 | webdriver_options.add_argument('--disable-gpu') 19 | driver = webdriver.Chrome(chromedriver, options=webdriver_options) 20 | 21 | driver.get(path) 22 | 23 | calculated_print_options = { 24 | 'landscape': False, 25 | 'displayHeaderFooter': False, 26 | 'printBackground': True, 27 | 'preferCSSPageSize': True, 28 | } 29 | calculated_print_options.update(print_options) 30 | result = send_devtools(driver, "Page.printToPDF", calculated_print_options) 31 | driver.quit() 32 | return base64.b64decode(result['data']) 33 | 34 | if __name__ == "__main__": 35 | pass 36 | # TODO: add short help layout 37 | --------------------------------------------------------------------------------