├── .github
└── workflows
│ └── ci.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── Makefile
├── README.rst
├── benchmark
├── invoice.html
└── run.py
├── docker-entrypoint.py
├── pydf
├── __init__.py
├── bin
│ └── wkhtmltopdf
├── version.py
└── wkhtmltopdf.py
├── pyproject.toml
├── setup.py
└── tests
├── __init__.py
├── requirements.txt
├── test_async.py
├── test_sync.py
└── utils.py
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 | tags:
8 | - '**'
9 | pull_request: {}
10 |
11 | jobs:
12 | test:
13 | runs-on: ubuntu-latest
14 |
15 | steps:
16 | - uses: actions/checkout@v2
17 |
18 | - name: set up python
19 | uses: actions/setup-python@v1
20 | with:
21 | python-version: '3.11'
22 |
23 | - name: install dependencies
24 | run: |
25 | sudo apt install libjpeg62 libc6
26 | make install
27 | pip freeze
28 |
29 | - name: test
30 | run: |
31 | chmod +x /home/runner/work/pydf/pydf/pydf/bin/wkhtmltopdf
32 | make lint
33 | make test
34 | make benchmark
35 | python -c "import pydf; print(pydf.get_version())"
36 |
37 | - name: codecov
38 | run: bash <(curl -s https://codecov.io/bash)
39 |
40 | deploy:
41 | needs:
42 | - test
43 | if: "success() && startsWith(github.ref, 'refs/tags/')"
44 | runs-on: ubuntu-latest
45 |
46 | steps:
47 | - uses: actions/checkout@v2
48 |
49 | - name: set up python
50 | uses: actions/setup-python@v1
51 | with:
52 | python-version: '3.11'
53 |
54 | - name: install
55 | run: |
56 | pip install -U pip setuptools wheel twine
57 | pip install .
58 | - name: set version
59 | run: VERSION_PATH='pydf/version.py' python <(curl -Ls https://git.io/JT3rm)
60 |
61 | - run: python setup.py sdist bdist_wheel
62 |
63 | - run: twine check dist/*
64 |
65 | - name: upload to pypi
66 | run: twine upload dist/*
67 | env:
68 | TWINE_USERNAME: __token__
69 | TWINE_PASSWORD: ${{ secrets.pypi_token }}
70 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 |
5 | dist/
6 | *.egg-info/
7 | .installed.cfg
8 | *.egg
9 | .idea/
10 | .vscode/
11 | build
12 | env
13 | .coverage
14 | .cache/
15 | benchmark/pdf_cache/
16 | benchmark/output/
17 | htmlcov/
18 | *.DS_Store
19 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.8
2 |
3 | LABEL maintainer='s@muelcolvin.com'
4 |
5 | RUN pip install aiohttp==3.7.3
6 | ADD ./pydf /pydf
7 | ADD setup.py /
8 | RUN pip install -e .
9 |
10 | ADD ./docker-entrypoint.py /
11 | ENTRYPOINT ["/docker-entrypoint.py"]
12 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2014, 2016 Samuel Colvin
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: install
2 | install:
3 | pip install -U setuptools pip
4 | pip install -U .
5 | pip install -r tests/requirements.txt
6 |
7 | .PHONY: format
8 | format:
9 | ruff check --fix pydf tests
10 | ruff format pydf tests
11 |
12 | .PHONY: lint
13 | lint:
14 | python setup.py check -rms
15 | ruff check pydf tests
16 | ruff format --check pydf tests
17 |
18 | .PHONY: test
19 | test:
20 | pytest --cov=pydf
21 |
22 | .PHONY: testcov
23 | testcov:
24 | pytest --cov=pydf && (echo "building coverage html"; coverage html)
25 |
26 | .PHONY: all
27 | all: testcov lint
28 |
29 | .PHONY: clean
30 | clean:
31 | rm -rf `find . -name __pycache__`
32 | rm -f `find . -type f -name '*.py[co]' `
33 | rm -f `find . -type f -name '*~' `
34 | rm -f `find . -type f -name '.*~' `
35 | rm -rf .cache
36 | rm -rf htmlcov
37 | rm -rf *.egg-info
38 | rm -f .coverage
39 | rm -f .coverage.*
40 | rm -rf build
41 | python setup.py clean
42 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | pydf
2 | ====
3 |
4 |
5 | |BuildStatus| |codecov| |PyPI| |license| |docker|
6 |
7 | PDF generation in python using
8 | `wkhtmltopdf `__.
9 |
10 | Wkhtmltopdf binaries are precompiled and included in the package making
11 | pydf easier to use, in particular this means pydf works on heroku.
12 |
13 | Currently using **wkhtmltopdf 0.12.6.1 r3 for Ubuntu 22.04 (jammy)**, requires **Python 3.6+**.
14 |
15 | **If you're not on Linux amd64:** pydf comes bundled with a wkhtmltopdf binary which will only work on Linux amd64
16 | architectures. If you're on another OS or architecture your mileage may vary, it is likely that you'll need to supply
17 | your own wkhtmltopdf binary and point pydf towards it by setting the ``WKHTMLTOPDF_PATH`` environment variable.
18 |
19 | Install
20 | -------
21 |
22 | .. code:: shell
23 |
24 | pip install python-pdf
25 |
26 | Basic Usage
27 | -----------
28 |
29 | .. code:: python
30 |
31 | import pydf
32 | pdf = pydf.generate_pdf('
this is html
')
33 | with open('test_doc.pdf', 'wb') as f:
34 | f.write(pdf)
35 |
36 | Async Usage
37 | -----------
38 |
39 | Generation of lots of documents with wkhtmltopdf can be slow as wkhtmltopdf can only generate one document
40 | per process. To get round this pydf uses python 3's asyncio ``create_subprocess_exec`` to generate multiple pdfs
41 | at the same time. Thus the time taken to spin up processes doesn't slow you down.
42 |
43 | .. code:: python
44 |
45 | from pathlib import Path
46 | from pydf import AsyncPydf
47 |
48 | async def generate_async():
49 | apydf = AsyncPydf()
50 |
51 | async def gen(i):
52 | pdf_content = await apydf.generate_pdf('
this is html
')
53 | Path(f'output_{i:03}.pdf').write_bytes(pdf_content)
54 |
55 | coros = [gen(i) for i in range(50)]
56 | await asyncio.gather(*coros)
57 |
58 | loop = asyncio.get_event_loop()
59 | loop.run_until_complete(generate_async())
60 |
61 |
62 | See `benchmarks/run.py `__
63 | for a full example.
64 |
65 | Locally generating an entire invoice goes from 0.372s/pdf to 0.035s/pdf with the async model.
66 |
67 | Docker
68 | ------
69 |
70 | pydf is available as a docker image with a very simple http API for generating pdfs.
71 |
72 | Simple ``POST`` (or ``GET`` with data if possible) you HTML data to ``/generate.pdf``.
73 |
74 | Arguments can be passed using http headers; any header starting ``pdf-`` or ``pdf_`` will
75 | have that prefix removed, be converted to lower case and passed to wkhtmltopdf.
76 |
77 | For example:
78 |
79 | .. code:: shell
80 |
81 | docker run -rm -p 8000:80 -d samuelcolvin/pydf
82 | curl -d '
this is html
' -H "pdf-orientation: landscape" http://localhost:8000/generate.pdf > created.pdf
83 | open "created.pdf"
84 |
85 | In docker compose:
86 |
87 | .. code:: yaml
88 |
89 | services:
90 | pdf:
91 | image: samuelcolvin/pydf
92 |
93 | Other services can then generate PDFs by making requests to ``pdf/generate.pdf``. Pretty cool.
94 |
95 | API
96 | ---
97 |
98 | **generate\_pdf(source, [\*\*kwargs])**
99 |
100 | Generate a pdf from either a url or a html string.
101 |
102 | After the html and url arguments all other arguments are passed straight
103 | to wkhtmltopdf
104 |
105 | For details on extra arguments see the output of get\_help() and
106 | get\_extended\_help()
107 |
108 | All arguments whether specified or caught with extra\_kwargs are
109 | converted to command line args with ``'--' + original_name.replace('_', '-')``.
110 |
111 | Arguments which are True are passed with no value eg. just --quiet,
112 | False and None arguments are missed, everything else is passed with
113 | str(value).
114 |
115 | **Arguments:**
116 |
117 | - ``source``: html string to generate pdf from or url to get
118 | - ``quiet``: bool
119 | - ``grayscale``: bool
120 | - ``lowquality``: bool
121 | - ``margin_bottom``: string eg. 10mm
122 | - ``margin_left``: string eg. 10mm
123 | - ``margin_right``: string eg. 10mm
124 | - ``margin_top``: string eg. 10mm
125 | - ``orientation``: Portrait or Landscape
126 | - ``page_height``: string eg. 10mm
127 | - ``page_width``: string eg. 10mm
128 | - ``page_size``: string: A4, Letter, etc.
129 | - ``image_dpi``: int default 600
130 | - ``image_quality``: int default 94
131 | - ``extra_kwargs``: any exotic extra options for wkhtmltopdf
132 |
133 | Returns string representing pdf
134 |
135 | **get\_version()**
136 |
137 | Get version of pydf and wkhtmltopdf binary
138 |
139 | **get\_help()**
140 |
141 | get help string from wkhtmltopdf binary uses -h command line option
142 |
143 | **get\_extended\_help()**
144 |
145 | get extended help string from wkhtmltopdf binary uses -H command line
146 | option
147 |
148 | **execute\_wk(\*args)**
149 |
150 | Low level function to call wkhtmltopdf, arguments are added to
151 | wkhtmltopdf binary and passed to subprocess with not processing.
152 |
153 | .. |BuildStatus| image:: https://travis-ci.org/tutorcruncher/pydf.svg?branch=master
154 | :target: https://travis-ci.org/tutorcruncher/pydf
155 | .. |codecov| image:: https://codecov.io/github/tutorcruncher/pydf/coverage.svg?branch=master
156 | :target: https://codecov.io/github/tutorcruncher/pydf?branch=master
157 | .. |PyPI| image:: https://img.shields.io/pypi/v/python-pdf.svg?style=flat
158 | :target: https://pypi.python.org/pypi/python-pdf
159 | .. |license| image:: https://img.shields.io/pypi/l/python-pdf.svg
160 | :target: https://github.com/tutorcruncher/pydf
161 | .. |docker| image:: https://img.shields.io/docker/automated/samuelcolvin/pydf.svg
162 | :target: https://hub.docker.com/r/samuelcolvin/pydf/
163 |
164 |
165 | Heroku
166 | -------
167 |
168 | If you are deploying onto Heroku, then you will need to install a couple of dependencies before WKHTMLTOPDF will work.
169 |
170 | Add the Heroku buildpack `https://buildpack-registry.s3.amazonaws.com/buildpacks/heroku-community/apt.tgz`
171 |
172 | Then create an `Aptfile` in your root directory with the dependencies:
173 |
174 | .. code::shell
175 | libjpeg62
176 | libc6
177 |
--------------------------------------------------------------------------------
/benchmark/invoice.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Invoice INV-123
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
30 | Jane cli_a
31 | cli_a House, Any Street
32 | cli_aville
33 | United Kingdom
34 | PO37 50DE
35 | 01264 730 666
36 | testing+cli_a@tutorcruncher.com
37 |
38 |
39 | XX branch display name XX
40 | XX branch street XX
41 | XX branch town XX
42 | XX branch country XX
43 | XX branch pc XX
44 | -
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
We would recommend that you check the hours on the attached invoices to make sure they correspond with the service you have been provided. If you think there is a discrepancy in the invoice please contact our accounts department before you settle the bill. We would ask you to settle the invoice within 7 days.
53 |
54 |
55 | Summary of invoices payable for the period
56 |
57 |
58 |
59 |
60 |
61 |
62 |
Work: total of contractors' invoices
63 |
£100.00
64 |
65 |
66 |
AMOUNT DUE FOR PAYMENT
67 |
£100.00
68 |
69 |
70 |
71 |
72 |
73 |
74 | This is a summary. For invoice breakdown please see individual PDFs also attached.
75 |
76 |
77 |
78 |
79 |
Please quote reference INV-1 with your payment
80 |
81 |
82 |
Terms of payment - 10 days from invoice date.
83 |
84 |
Please pay using the link in the email sent with this invoice.
112 | Jane cli_a
113 | cli_a House, Any Street
114 | cli_aville
115 | United Kingdom
116 | PO37 50DE
117 | 01264 730 666
118 |
119 |
120 | Jane con_a
121 | con_a House, Any Street
122 | con_aville
123 | United Kingdom
124 | PO37 50DE
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 | Date
133 |
134 |
135 | Item Description
136 |
137 |
138 | Units
139 |
140 |
141 | Amount
142 |
143 |
144 |
145 |
146 |
147 |
148 | !DATE!
149 | 12:00
150 |
151 |
152 | XX appointment topic XX
153 | Service #123 Appointment #123
154 |
155 |
1 unit
156 |
£100.00
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
Total:
165 |
166 |
167 |
168 |
169 |
£100.00
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
To ease administration XX branch display name XX collects fees on behalf of tutors. Please make a single payment to XX branch display name XX for the total amount shown on the statement page of this PDF and if paying by BACS or cheque please use the reference number supplied on this statement.
178 |
179 |
180 |
181 |
182 |
183 |
184 |
--------------------------------------------------------------------------------
/benchmark/run.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pathlib import Path
3 | from time import time
4 |
5 | from pydf import AsyncPydf, generate_pdf
6 |
7 |
8 | THIS_DIR = Path(__file__).parent.resolve()
9 | html = (THIS_DIR / 'invoice.html').read_text()
10 | OUT_DIR = THIS_DIR / 'output'
11 | if not OUT_DIR.exists():
12 | Path.mkdir(OUT_DIR)
13 |
14 |
15 | def go_sync():
16 | count = 10
17 | for i in range(count):
18 | pdf = generate_pdf(
19 | html,
20 | page_size='A4',
21 | zoom='1.25',
22 | margin_left='8mm',
23 | margin_right='8mm',
24 | )
25 | print(f'{i:03}: {len(pdf)}')
26 | file = OUT_DIR / f'output_{i:03}.pdf'
27 | file.write_bytes(pdf)
28 | return count
29 |
30 | start = time()
31 | count = go_sync()
32 | time_taken = (time() - start) / count
33 | print(f'sync, time taken per pdf: {time_taken:0.3f}s')
34 |
35 | async def go_async():
36 | apydf = AsyncPydf()
37 |
38 | async def gen(i_):
39 | pdf = await apydf.generate_pdf(
40 | html,
41 | page_size='A4',
42 | zoom='1.25',
43 | margin_left='8mm',
44 | margin_right='8mm',
45 | )
46 | print(f'{i_:03}: {len(pdf)}')
47 | f = OUT_DIR / f'output_{i_:03}.pdf'
48 | f.write_bytes(pdf)
49 |
50 | count = 20
51 | coros = map(gen, range(count))
52 | await asyncio.gather(*coros)
53 | return count
54 |
55 |
56 | start = time()
57 | loop = asyncio.get_event_loop()
58 | count = loop.run_until_complete(go_async())
59 | time_taken = (time() - start) / count
60 | print(f'async time taken per pdf: {time_taken:0.3f}s')
61 |
--------------------------------------------------------------------------------
/docker-entrypoint.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3.8
2 | """
3 | pydf
4 |
5 | pdf generation in docker.
6 |
7 | To generate PDF POST (or GET with data if possible) you HTML data to /generate.pdf.
8 |
9 | Extra arguments can be passed using http headers; any header starting "pdf-" or "pdf_" will
10 | have that prefix removed, be converted to lower case and passed to wkhtmltopdf.
11 |
12 | For example:
13 |
14 | docker run -rm -p 8000:80 -d samuelcolvin/pydf
15 | curl -d '