├── .github └── workflows │ └── ci.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── Makefile ├── README.rst ├── benchmark ├── invoice.html └── run.py ├── docker-entrypoint.py ├── pydf ├── __init__.py ├── bin │ └── wkhtmltopdf ├── version.py └── wkhtmltopdf.py ├── pyproject.toml ├── setup.py └── tests ├── __init__.py ├── requirements.txt ├── test_async.py ├── test_sync.py └── utils.py /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | tags: 8 | - '**' 9 | pull_request: {} 10 | 11 | jobs: 12 | test: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | 18 | - name: set up python 19 | uses: actions/setup-python@v1 20 | with: 21 | python-version: '3.11' 22 | 23 | - name: install dependencies 24 | run: | 25 | sudo apt install libjpeg62 libc6 26 | make install 27 | pip freeze 28 | 29 | - name: test 30 | run: | 31 | chmod +x /home/runner/work/pydf/pydf/pydf/bin/wkhtmltopdf 32 | make lint 33 | make test 34 | make benchmark 35 | python -c "import pydf; print(pydf.get_version())" 36 | 37 | - name: codecov 38 | run: bash <(curl -s https://codecov.io/bash) 39 | 40 | deploy: 41 | needs: 42 | - test 43 | if: "success() && startsWith(github.ref, 'refs/tags/')" 44 | runs-on: ubuntu-latest 45 | 46 | steps: 47 | - uses: actions/checkout@v2 48 | 49 | - name: set up python 50 | uses: actions/setup-python@v1 51 | with: 52 | python-version: '3.11' 53 | 54 | - name: install 55 | run: | 56 | pip install -U pip setuptools wheel twine 57 | pip install . 58 | - name: set version 59 | run: VERSION_PATH='pydf/version.py' python <(curl -Ls https://git.io/JT3rm) 60 | 61 | - run: python setup.py sdist bdist_wheel 62 | 63 | - run: twine check dist/* 64 | 65 | - name: upload to pypi 66 | run: twine upload dist/* 67 | env: 68 | TWINE_USERNAME: __token__ 69 | TWINE_PASSWORD: ${{ secrets.pypi_token }} 70 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | dist/ 6 | *.egg-info/ 7 | .installed.cfg 8 | *.egg 9 | .idea/ 10 | .vscode/ 11 | build 12 | env 13 | .coverage 14 | .cache/ 15 | benchmark/pdf_cache/ 16 | benchmark/output/ 17 | htmlcov/ 18 | *.DS_Store 19 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8 2 | 3 | LABEL maintainer='s@muelcolvin.com' 4 | 5 | RUN pip install aiohttp==3.7.3 6 | ADD ./pydf /pydf 7 | ADD setup.py / 8 | RUN pip install -e . 9 | 10 | ADD ./docker-entrypoint.py / 11 | ENTRYPOINT ["/docker-entrypoint.py"] 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014, 2016 Samuel Colvin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: install 2 | install: 3 | pip install -U setuptools pip 4 | pip install -U . 5 | pip install -r tests/requirements.txt 6 | 7 | .PHONY: format 8 | format: 9 | ruff check --fix pydf tests 10 | ruff format pydf tests 11 | 12 | .PHONY: lint 13 | lint: 14 | python setup.py check -rms 15 | ruff check pydf tests 16 | ruff format --check pydf tests 17 | 18 | .PHONY: test 19 | test: 20 | pytest --cov=pydf 21 | 22 | .PHONY: testcov 23 | testcov: 24 | pytest --cov=pydf && (echo "building coverage html"; coverage html) 25 | 26 | .PHONY: all 27 | all: testcov lint 28 | 29 | .PHONY: clean 30 | clean: 31 | rm -rf `find . -name __pycache__` 32 | rm -f `find . -type f -name '*.py[co]' ` 33 | rm -f `find . -type f -name '*~' ` 34 | rm -f `find . -type f -name '.*~' ` 35 | rm -rf .cache 36 | rm -rf htmlcov 37 | rm -rf *.egg-info 38 | rm -f .coverage 39 | rm -f .coverage.* 40 | rm -rf build 41 | python setup.py clean 42 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | pydf 2 | ==== 3 | 4 | 5 | |BuildStatus| |codecov| |PyPI| |license| |docker| 6 | 7 | PDF generation in python using 8 | `wkhtmltopdf `__. 9 | 10 | Wkhtmltopdf binaries are precompiled and included in the package making 11 | pydf easier to use, in particular this means pydf works on heroku. 12 | 13 | Currently using **wkhtmltopdf 0.12.6.1 r3 for Ubuntu 22.04 (jammy)**, requires **Python 3.6+**. 14 | 15 | **If you're not on Linux amd64:** pydf comes bundled with a wkhtmltopdf binary which will only work on Linux amd64 16 | architectures. If you're on another OS or architecture your mileage may vary, it is likely that you'll need to supply 17 | your own wkhtmltopdf binary and point pydf towards it by setting the ``WKHTMLTOPDF_PATH`` environment variable. 18 | 19 | Install 20 | ------- 21 | 22 | .. code:: shell 23 | 24 | pip install python-pdf 25 | 26 | Basic Usage 27 | ----------- 28 | 29 | .. code:: python 30 | 31 | import pydf 32 | pdf = pydf.generate_pdf('

this is html

') 33 | with open('test_doc.pdf', 'wb') as f: 34 | f.write(pdf) 35 | 36 | Async Usage 37 | ----------- 38 | 39 | Generation of lots of documents with wkhtmltopdf can be slow as wkhtmltopdf can only generate one document 40 | per process. To get round this pydf uses python 3's asyncio ``create_subprocess_exec`` to generate multiple pdfs 41 | at the same time. Thus the time taken to spin up processes doesn't slow you down. 42 | 43 | .. code:: python 44 | 45 | from pathlib import Path 46 | from pydf import AsyncPydf 47 | 48 | async def generate_async(): 49 | apydf = AsyncPydf() 50 | 51 | async def gen(i): 52 | pdf_content = await apydf.generate_pdf('

this is html

') 53 | Path(f'output_{i:03}.pdf').write_bytes(pdf_content) 54 | 55 | coros = [gen(i) for i in range(50)] 56 | await asyncio.gather(*coros) 57 | 58 | loop = asyncio.get_event_loop() 59 | loop.run_until_complete(generate_async()) 60 | 61 | 62 | See `benchmarks/run.py `__ 63 | for a full example. 64 | 65 | Locally generating an entire invoice goes from 0.372s/pdf to 0.035s/pdf with the async model. 66 | 67 | Docker 68 | ------ 69 | 70 | pydf is available as a docker image with a very simple http API for generating pdfs. 71 | 72 | Simple ``POST`` (or ``GET`` with data if possible) you HTML data to ``/generate.pdf``. 73 | 74 | Arguments can be passed using http headers; any header starting ``pdf-`` or ``pdf_`` will 75 | have that prefix removed, be converted to lower case and passed to wkhtmltopdf. 76 | 77 | For example: 78 | 79 | .. code:: shell 80 | 81 | docker run -rm -p 8000:80 -d samuelcolvin/pydf 82 | curl -d '

this is html

' -H "pdf-orientation: landscape" http://localhost:8000/generate.pdf > created.pdf 83 | open "created.pdf" 84 | 85 | In docker compose: 86 | 87 | .. code:: yaml 88 | 89 | services: 90 | pdf: 91 | image: samuelcolvin/pydf 92 | 93 | Other services can then generate PDFs by making requests to ``pdf/generate.pdf``. Pretty cool. 94 | 95 | API 96 | --- 97 | 98 | **generate\_pdf(source, [\*\*kwargs])** 99 | 100 | Generate a pdf from either a url or a html string. 101 | 102 | After the html and url arguments all other arguments are passed straight 103 | to wkhtmltopdf 104 | 105 | For details on extra arguments see the output of get\_help() and 106 | get\_extended\_help() 107 | 108 | All arguments whether specified or caught with extra\_kwargs are 109 | converted to command line args with ``'--' + original_name.replace('_', '-')``. 110 | 111 | Arguments which are True are passed with no value eg. just --quiet, 112 | False and None arguments are missed, everything else is passed with 113 | str(value). 114 | 115 | **Arguments:** 116 | 117 | - ``source``: html string to generate pdf from or url to get 118 | - ``quiet``: bool 119 | - ``grayscale``: bool 120 | - ``lowquality``: bool 121 | - ``margin_bottom``: string eg. 10mm 122 | - ``margin_left``: string eg. 10mm 123 | - ``margin_right``: string eg. 10mm 124 | - ``margin_top``: string eg. 10mm 125 | - ``orientation``: Portrait or Landscape 126 | - ``page_height``: string eg. 10mm 127 | - ``page_width``: string eg. 10mm 128 | - ``page_size``: string: A4, Letter, etc. 129 | - ``image_dpi``: int default 600 130 | - ``image_quality``: int default 94 131 | - ``extra_kwargs``: any exotic extra options for wkhtmltopdf 132 | 133 | Returns string representing pdf 134 | 135 | **get\_version()** 136 | 137 | Get version of pydf and wkhtmltopdf binary 138 | 139 | **get\_help()** 140 | 141 | get help string from wkhtmltopdf binary uses -h command line option 142 | 143 | **get\_extended\_help()** 144 | 145 | get extended help string from wkhtmltopdf binary uses -H command line 146 | option 147 | 148 | **execute\_wk(\*args)** 149 | 150 | Low level function to call wkhtmltopdf, arguments are added to 151 | wkhtmltopdf binary and passed to subprocess with not processing. 152 | 153 | .. |BuildStatus| image:: https://travis-ci.org/tutorcruncher/pydf.svg?branch=master 154 | :target: https://travis-ci.org/tutorcruncher/pydf 155 | .. |codecov| image:: https://codecov.io/github/tutorcruncher/pydf/coverage.svg?branch=master 156 | :target: https://codecov.io/github/tutorcruncher/pydf?branch=master 157 | .. |PyPI| image:: https://img.shields.io/pypi/v/python-pdf.svg?style=flat 158 | :target: https://pypi.python.org/pypi/python-pdf 159 | .. |license| image:: https://img.shields.io/pypi/l/python-pdf.svg 160 | :target: https://github.com/tutorcruncher/pydf 161 | .. |docker| image:: https://img.shields.io/docker/automated/samuelcolvin/pydf.svg 162 | :target: https://hub.docker.com/r/samuelcolvin/pydf/ 163 | 164 | 165 | Heroku 166 | ------- 167 | 168 | If you are deploying onto Heroku, then you will need to install a couple of dependencies before WKHTMLTOPDF will work. 169 | 170 | Add the Heroku buildpack `https://buildpack-registry.s3.amazonaws.com/buildpacks/heroku-community/apt.tgz` 171 | 172 | Then create an `Aptfile` in your root directory with the dependencies: 173 | 174 | .. code::shell 175 | libjpeg62 176 | libc6 177 | -------------------------------------------------------------------------------- /benchmark/invoice.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Invoice INV-123 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 |
15 |

16 | Summary 17 |

18 |

19 | Invoice INV-1, Page 1 of 2 20 |
21 | Date: !TODAY! 22 |

23 |
24 |
25 | 26 |
27 |
28 |
29 |
30 | Jane cli_a
31 | cli_a House, Any Street
32 | cli_aville
33 | United Kingdom
34 | PO37 50DE
35 | 01264 730 666
36 | testing+cli_a@tutorcruncher.com 37 |
38 |
39 | XX branch display name XX
40 | XX branch street XX
41 | XX branch town XX
42 | XX branch country XX
43 | XX branch pc XX
44 | - 45 |
46 |
47 | 48 | 49 |
50 |
51 |

52 |

We would recommend that you check the hours on the attached invoices to make sure they correspond with the service you have been provided. If you think there is a discrepancy in the invoice please contact our accounts department before you settle the bill. We would ask you to settle the invoice within 7 days.

53 | 54 |

55 | Summary of invoices payable for the period 56 |
57 |
58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 |
Work: total of contractors' invoices£100.00
AMOUNT DUE FOR PAYMENT£100.00
71 | 72 |
73 |

74 | This is a summary. For invoice breakdown please see individual PDFs also attached. 75 |

76 |
77 |
78 |
79 |

Please quote reference INV-1 with your payment

80 | 81 |
82 |

Terms of payment - 10 days from invoice date.

83 | 84 |

Please pay using the link in the email sent with this invoice.

85 | 86 |
87 |
88 |
89 |
90 | 91 | 92 | 93 |
94 |
95 |

96 | Invoice 97 |

98 | 99 |

100 | Invoice INV-1, Page 2 of 2 101 |
102 | Date: !TODAY! 103 |

104 |
105 |
106 | 107 |
108 |
109 | 110 |
111 |
112 | Jane cli_a
113 | cli_a House, Any Street
114 | cli_aville
115 | United Kingdom
116 | PO37 50DE
117 | 01264 730 666 118 |
119 |
120 | Jane con_a
121 | con_a House, Any Street
122 | con_aville
123 | United Kingdom
124 | PO37 50DE 125 |
126 |
127 | 128 | 129 | 130 | 131 | 134 | 137 | 140 | 143 | 144 | 145 | 146 | 147 | 151 | 155 | 156 | 157 | 158 | 159 |
132 | Date 133 | 135 | Item Description 136 | 138 | Units 139 | 141 | Amount 142 |
148 | !DATE!
149 | 12:00 150 |
152 | XX appointment topic XX
153 | Service #123
Appointment #123 154 |
1 unit£100.00
160 | 161 |
162 | 163 |
164 |

Total:

165 |
166 | 167 |
168 | 169 |

£100.00

170 |
171 |
172 | 173 |
174 | 175 |
176 |
177 |

To ease administration XX branch display name XX collects fees on behalf of tutors. Please make a single payment to XX branch display name XX for the total amount shown on the statement page of this PDF and if paying by BACS or cheque please use the reference number supplied on this statement.

178 | 179 |
180 |
181 |
182 | 183 | 184 | -------------------------------------------------------------------------------- /benchmark/run.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pathlib import Path 3 | from time import time 4 | 5 | from pydf import AsyncPydf, generate_pdf 6 | 7 | 8 | THIS_DIR = Path(__file__).parent.resolve() 9 | html = (THIS_DIR / 'invoice.html').read_text() 10 | OUT_DIR = THIS_DIR / 'output' 11 | if not OUT_DIR.exists(): 12 | Path.mkdir(OUT_DIR) 13 | 14 | 15 | def go_sync(): 16 | count = 10 17 | for i in range(count): 18 | pdf = generate_pdf( 19 | html, 20 | page_size='A4', 21 | zoom='1.25', 22 | margin_left='8mm', 23 | margin_right='8mm', 24 | ) 25 | print(f'{i:03}: {len(pdf)}') 26 | file = OUT_DIR / f'output_{i:03}.pdf' 27 | file.write_bytes(pdf) 28 | return count 29 | 30 | start = time() 31 | count = go_sync() 32 | time_taken = (time() - start) / count 33 | print(f'sync, time taken per pdf: {time_taken:0.3f}s') 34 | 35 | async def go_async(): 36 | apydf = AsyncPydf() 37 | 38 | async def gen(i_): 39 | pdf = await apydf.generate_pdf( 40 | html, 41 | page_size='A4', 42 | zoom='1.25', 43 | margin_left='8mm', 44 | margin_right='8mm', 45 | ) 46 | print(f'{i_:03}: {len(pdf)}') 47 | f = OUT_DIR / f'output_{i_:03}.pdf' 48 | f.write_bytes(pdf) 49 | 50 | count = 20 51 | coros = map(gen, range(count)) 52 | await asyncio.gather(*coros) 53 | return count 54 | 55 | 56 | start = time() 57 | loop = asyncio.get_event_loop() 58 | count = loop.run_until_complete(go_async()) 59 | time_taken = (time() - start) / count 60 | print(f'async time taken per pdf: {time_taken:0.3f}s') 61 | -------------------------------------------------------------------------------- /docker-entrypoint.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3.8 2 | """ 3 | pydf 4 | 5 | pdf generation in docker. 6 | 7 | To generate PDF POST (or GET with data if possible) you HTML data to /generate.pdf. 8 | 9 | Extra arguments can be passed using http headers; any header starting "pdf-" or "pdf_" will 10 | have that prefix removed, be converted to lower case and passed to wkhtmltopdf. 11 | 12 | For example: 13 | 14 | docker run -rm -p 8000:80 -d samuelcolvin/pydf 15 | curl -d '

this is html

' -H "pdf-orientation: landscape" http://localhost:8000/generate.pdf > created.pdf 16 | open "created.pdf" 17 | """ 18 | import os 19 | import logging 20 | from time import time 21 | 22 | from aiohttp import web 23 | from pydf import AsyncPydf 24 | 25 | logger = logging.getLogger('main') 26 | logger.setLevel(logging.INFO) 27 | handler = logging.StreamHandler() 28 | handler.setLevel(logging.INFO) 29 | logger.addHandler(handler) 30 | 31 | 32 | async def index(request): 33 | return web.Response(text=__doc__) 34 | 35 | 36 | async def generate(request): 37 | start = time() 38 | config = {} 39 | for k, v in request.headers.items(): 40 | if k.startswith('Pdf-') or k.startswith('Pdf_'): 41 | config[k[4:].lower()] = v 42 | data = await request.read() 43 | if not data: 44 | logger.info('Request with no body data') 45 | raise web.HTTPBadRequest(text='400: no HTML data to convert to PDF in request body\n') 46 | try: 47 | pdf_content = await app['apydf'].generate_pdf(data.decode(), **config) 48 | except RuntimeError as e: 49 | logger.info('Error generating PDF, time %0.2fs, config: %s', time() - start, config) 50 | return web.Response(text=str(e) + '\n', status=418) 51 | else: 52 | logger.info('PDF generated in %0.2fs, html-len %d, pdf-len %d', time() - start, len(data), len(pdf_content)) 53 | return web.Response(body=pdf_content, content_type='application/pdf') 54 | 55 | app = web.Application() 56 | app.router.add_get('/', index) 57 | app.router.add_route('*', '/generate.pdf', generate) 58 | app['apydf'] = AsyncPydf() 59 | 60 | port = int(os.getenv('PORT', '80')) 61 | logger.info('starting pydf server on port %s', port) 62 | web.run_app(app, port=port, print=lambda v: None) 63 | -------------------------------------------------------------------------------- /pydf/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import VERSION # noqa 2 | from .wkhtmltopdf import * # noqa 3 | -------------------------------------------------------------------------------- /pydf/bin/wkhtmltopdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tutorcruncher/pydf/7dee5b58a522e096c712e2b0dce38644a1514ba7/pydf/bin/wkhtmltopdf -------------------------------------------------------------------------------- /pydf/version.py: -------------------------------------------------------------------------------- 1 | VERSION = '0.40.0' 2 | -------------------------------------------------------------------------------- /pydf/wkhtmltopdf.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import subprocess 4 | import tempfile 5 | from pathlib import Path 6 | 7 | from .version import VERSION 8 | 9 | __all__ = [ 10 | 'AsyncPydf', 11 | 'generate_pdf', 12 | 'get_version', 13 | 'get_help', 14 | 'get_extended_help', 15 | ] 16 | 17 | THIS_DIR = Path(__file__).parent.resolve() 18 | WK_PATH = os.getenv('WKHTMLTOPDF_PATH', str(THIS_DIR / 'bin' / 'wkhtmltopdf')) 19 | DFT_CACHE_DIR = Path(tempfile.gettempdir()) / 'pydf_cache' 20 | 21 | 22 | def _execute_wk(*args, input=None): 23 | """ 24 | Generate path for the wkhtmltopdf binary and execute command. 25 | 26 | :param args: args to pass straight to subprocess.Popen 27 | :return: stdout, stderr 28 | """ 29 | wk_args = (WK_PATH,) + args 30 | return subprocess.run(wk_args, input=input, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 31 | 32 | 33 | def _convert_args(**py_args): 34 | cmd_args = [] 35 | for name, value in py_args.items(): 36 | if value in {None, False}: 37 | continue 38 | arg_name = '--' + name.replace('_', '-') 39 | if value is True: 40 | cmd_args.append(arg_name) 41 | else: 42 | cmd_args.extend([arg_name, str(value)]) 43 | 44 | # read from stdin and write to stdout 45 | cmd_args.extend(['-', '-']) 46 | return cmd_args 47 | 48 | 49 | class AsyncPydf: 50 | def __init__(self, *, max_processes=20, cache_dir=DFT_CACHE_DIR): 51 | self.semaphore = asyncio.Semaphore(value=max_processes) 52 | if not cache_dir.exists(): 53 | Path.mkdir(cache_dir) 54 | self.cache_dir = cache_dir 55 | 56 | async def generate_pdf(self, html, **cmd_args): 57 | cmd_args = [WK_PATH] + _convert_args(cache_dir=self.cache_dir, **cmd_args) 58 | async with self.semaphore: 59 | p = await asyncio.create_subprocess_exec( 60 | *cmd_args, 61 | stdin=asyncio.subprocess.PIPE, 62 | stdout=asyncio.subprocess.PIPE, 63 | stderr=asyncio.subprocess.PIPE, 64 | ) 65 | p.stdin.write(html.encode()) 66 | p.stdin.close() 67 | await p.wait() 68 | pdf_content = await p.stdout.read() 69 | if p.returncode != 0 and pdf_content[:4] != b'%PDF': 70 | stderr = await p.stderr.read() 71 | raise RuntimeError( 72 | 'error running wkhtmltopdf, command: {!r}\nresponse: "{}"'.format(cmd_args, stderr.decode().strip()) 73 | ) 74 | return pdf_content 75 | 76 | 77 | def generate_pdf( 78 | html, 79 | *, 80 | cache_dir: Path = DFT_CACHE_DIR, 81 | grayscale: bool = False, 82 | lowquality: bool = False, 83 | margin_bottom: str = None, 84 | margin_left: str = None, 85 | margin_right: str = None, 86 | margin_top: str = None, 87 | orientation: str = None, 88 | page_height: str = None, 89 | page_width: str = None, 90 | page_size: str = None, 91 | image_dpi: str = None, 92 | image_quality: str = None, 93 | **extra_kwargs, 94 | ): 95 | """ 96 | Generate a pdf from either a url or a html string. 97 | 98 | After the html and url arguments all other arguments are 99 | passed straight to wkhtmltopdf 100 | 101 | For details on extra arguments see the output of get_help() 102 | and get_extended_help() 103 | 104 | All arguments whether specified or caught with extra_kwargs are converted 105 | to command line args with "'--' + original_name.replace('_', '-')" 106 | 107 | Arguments which are True are passed with no value eg. just --quiet, False 108 | and None arguments are missed, everything else is passed with str(value). 109 | 110 | :param html: html string to generate pdf from 111 | :param grayscale: bool 112 | :param lowquality: bool 113 | :param margin_bottom: string eg. 10mm 114 | :param margin_left: string eg. 10mm 115 | :param margin_right: string eg. 10mm 116 | :param margin_top: string eg. 10mm 117 | :param orientation: Portrait or Landscape 118 | :param page_height: string eg. 10mm 119 | :param page_width: string eg. 10mm 120 | :param page_size: string: A4, Letter, etc. 121 | :param image_dpi: int default 600 122 | :param image_quality: int default 94 123 | :param extra_kwargs: any exotic extra options for wkhtmltopdf 124 | :return: string representing pdf 125 | """ 126 | if not cache_dir.exists(): 127 | Path.mkdir(cache_dir) 128 | 129 | py_args = dict( 130 | cache_dir=cache_dir, 131 | grayscale=grayscale, 132 | lowquality=lowquality, 133 | margin_bottom=margin_bottom, 134 | margin_left=margin_left, 135 | margin_right=margin_right, 136 | margin_top=margin_top, 137 | orientation=orientation, 138 | page_height=page_height, 139 | page_width=page_width, 140 | page_size=page_size, 141 | image_dpi=image_dpi, 142 | image_quality=image_quality, 143 | ) 144 | py_args.update(extra_kwargs) 145 | cmd_args = _convert_args(**py_args) 146 | 147 | p = _execute_wk(*cmd_args, input=html.encode()) 148 | pdf_content = p.stdout 149 | 150 | # it seems wkhtmltopdf's error codes can be false, we'll ignore them if we 151 | # seem to have generated a pdf 152 | if p.returncode != 0 and pdf_content[:4] != b'%PDF': 153 | raise RuntimeError( 154 | 'error running wkhtmltopdf, command: {!r}\n' 'response: "{}"'.format(cmd_args, p.stderr.decode().strip()) 155 | ) 156 | return pdf_content 157 | 158 | 159 | def _string_execute(*args): 160 | return _execute_wk(*args).stdout.decode().strip(' \n') 161 | 162 | 163 | def get_version(): 164 | """ 165 | Get version of pydf and wkhtmltopdf binary 166 | 167 | :return: version string 168 | """ 169 | try: 170 | wk_version = _string_execute('-V') 171 | except Exception as e: 172 | # we catch all errors here to make sure we get a version no matter what 173 | wk_version = '%s: %s' % (e.__class__.__name__, e) 174 | return 'pydf version: %s\nwkhtmltopdf version: %s' % (VERSION, wk_version) 175 | 176 | 177 | def get_help(): 178 | """ 179 | get help string from wkhtmltopdf binary 180 | uses -h command line option 181 | 182 | :return: help string 183 | """ 184 | return _string_execute('-h') 185 | 186 | 187 | def get_extended_help(): 188 | """ 189 | get extended help string from wkhtmltopdf binary 190 | uses -H command line option 191 | 192 | :return: extended help string 193 | """ 194 | return _string_execute('-H') 195 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.ruff] 2 | line-length = 120 3 | 4 | [tool.ruff.format] 5 | quote-style = 'single' 6 | 7 | [tool.ruff.lint] 8 | extend-select = ['I'] 9 | ignore = ['E402'] 10 | 11 | [tool.ruff.lint.mccabe] 12 | max-complexity = 12 13 | 14 | [tool.ruff.lint.isort] 15 | combine-as-imports = true 16 | 17 | [tool.pytest] 18 | testpaths = 'tests' 19 | timeout = 10 20 | 21 | [tool.coverage.run] 22 | source = ['pdf'] 23 | branch = true 24 | 25 | [tool.coverage.report] 26 | precision = 2 27 | exclude_lines = [ 28 | 'pragma: no cover', 29 | 'raise NotImplementedError', 30 | ] 31 | 32 | [tool.distutils.bdist_wheel] 33 | python-tag = 'py311' 34 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from importlib.machinery import SourceFileLoader 3 | from pathlib import Path 4 | 5 | from setuptools import setup 6 | from setuptools.command.install import install 7 | 8 | # avoid loading the package before requirements are installed: 9 | version = SourceFileLoader('version', 'pydf/version.py').load_module() 10 | 11 | description = 'PDF generation in python using wkhtmltopdf suitable for heroku' 12 | THIS_DIR = Path(__file__).resolve().parent 13 | readme_path = THIS_DIR / 'README.rst' 14 | if readme_path.exists(): 15 | long_description = readme_path.read_text() 16 | else: 17 | long_description = description 18 | 19 | 20 | class OverrideInstall(install): 21 | def run(self): 22 | install.run(self) 23 | for filepath in self.get_outputs(): 24 | if filepath.endswith('pydf/bin/wkhtmltopdf'): 25 | os.chmod(filepath, 0o775) 26 | 27 | 28 | setup( 29 | name='python-pdf', 30 | cmdclass={'install': OverrideInstall}, 31 | version=version.VERSION, 32 | description=description, 33 | long_description=long_description, 34 | author='Samuel Colvin', 35 | license='MIT', 36 | author_email='s@muelcolvin.com', 37 | url='https://github.com/tutorcruncher/pydf', 38 | packages=['pydf'], 39 | platforms='any', 40 | package_data={'pydf': ['bin/*']}, 41 | classifiers=[ 42 | 'Development Status :: 5 - Production/Stable', 43 | 'Environment :: Web Environment', 44 | 'Intended Audience :: Developers', 45 | 'License :: OSI Approved :: MIT License', 46 | 'Programming Language :: Python', 47 | 'Programming Language :: Python :: 3', 48 | 'Programming Language :: Python :: 3.11', 49 | 'Topic :: Internet :: WWW/HTTP', 50 | ], 51 | zip_safe=False 52 | ) 53 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tutorcruncher/pydf/7dee5b58a522e096c712e2b0dce38644a1514ba7/tests/__init__.py -------------------------------------------------------------------------------- /tests/requirements.txt: -------------------------------------------------------------------------------- 1 | coverage==7.4.4 2 | docutils==0.20.1 3 | pdfminer.six==20231228 4 | Pygments==2.17.2 5 | pytest==8.1.1 6 | pytest-cov==5.0.0 7 | pytest-sugar==1.0.0 8 | pytest-timeout==2.3.1 9 | ruff==0.3.4 10 | -------------------------------------------------------------------------------- /tests/test_async.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | import pytest 4 | 5 | from pydf import AsyncPydf 6 | 7 | from .utils import pdf_text 8 | 9 | 10 | def test_async_pdf_gen(): 11 | apydf = AsyncPydf() 12 | loop = asyncio.get_event_loop() 13 | 14 | pdf_content = loop.run_until_complete(apydf.generate_pdf('Is this thing on?')) 15 | assert pdf_content[:4] == b'%PDF' 16 | text = pdf_text(pdf_content) 17 | assert 'Is this thing on?\n\n\x0c' == text 18 | 19 | 20 | def test_invalid_argument(): 21 | apydf = AsyncPydf() 22 | loop = asyncio.get_event_loop() 23 | with pytest.raises(RuntimeError) as exc_info: 24 | loop.run_until_complete(apydf.generate_pdf('hello', foobar='broken')) 25 | assert 'error running wkhtmltopdf, command' in str(exc_info) 26 | -------------------------------------------------------------------------------- /tests/test_sync.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from pydf import generate_pdf, get_extended_help, get_help, get_version 4 | 5 | from .utils import pdf_text 6 | 7 | 8 | def test_generate_pdf_with_html(): 9 | pdf_content = generate_pdf('Is this thing on?') 10 | assert pdf_content[:4] == b'%PDF' 11 | text = pdf_text(pdf_content) 12 | assert 'Is this thing on?\n\n\x0c' == text 13 | 14 | 15 | def test_pdf_title(): 16 | pdf_content = generate_pdf('the titlehello') 17 | assert pdf_content[:4] == b'%PDF' 18 | text = pdf_text(pdf_content) 19 | title = 'the title'.encode('utf-16be') 20 | assert b'\n/Title (\xfe\xff%s)\n' % title in pdf_content 21 | assert 'hello\n\n\x0c' == text 22 | 23 | 24 | def test_unicode(): 25 | pdf_content = generate_pdf('Schrödinger') 26 | assert pdf_content[:4] == b'%PDF' 27 | 28 | 29 | def test_extra_arguments(): 30 | pdf_content = generate_pdf( 31 | 'testing', 32 | quiet=False, 33 | grayscale=True, 34 | lowquality=True, 35 | margin_bottom='20mm', 36 | margin_left='20mm', 37 | margin_right='20mm', 38 | margin_top='20mm', 39 | orientation='Landscape', 40 | page_height=None, 41 | page_width=None, 42 | page_size='Letter', 43 | image_dpi='300', 44 | image_quality='70', 45 | ) 46 | assert pdf_content[:4] == b'%PDF' 47 | 48 | 49 | def test_custom_size(): 50 | pdf_content = generate_pdf( 51 | 'testing', 52 | page_height='50mm', 53 | page_width='50mm', 54 | ) 55 | assert pdf_content[:4] == b'%PDF' 56 | 57 | 58 | def test_extra_kwargs(): 59 | pdf_content = generate_pdf('testing', header_right='Page [page] of [toPage]') 60 | assert pdf_content[:4] == b'%PDF' 61 | 62 | 63 | def test_bad_arguments(): 64 | with pytest.raises(RuntimeError) as exc_info: 65 | generate_pdf('hello', foobar='broken') 66 | assert 'error running wkhtmltopdf, command' in str(exc_info) 67 | 68 | 69 | def test_get_version(): 70 | print(get_version()) 71 | 72 | 73 | def test_get_help(): 74 | get_help() 75 | 76 | 77 | def test_get_extended_help(): 78 | get_extended_help() 79 | -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO, StringIO 2 | 3 | import pdfminer.layout 4 | from pdfminer import high_level 5 | 6 | 7 | def pdf_text(pdf_data: bytes) -> str: 8 | laparams = pdfminer.layout.LAParams() 9 | output = StringIO() 10 | high_level.extract_text_to_fp(BytesIO(pdf_data), output, laparams=laparams) 11 | return output.getvalue() 12 | --------------------------------------------------------------------------------