├── .github
└── workflows
│ ├── pypi.yml
│ └── quality.yml
├── .gitignore
├── .travis.yml
├── MANIFEST
├── MANIFEST.in
├── Out
├── some_file.pdf
├── test1.pdf
└── test1.txt
├── README.rst
├── licence.txt
├── pypdftk.py
├── setup.py
├── test.py
└── test_files
├── form-filled.json
├── form.json
├── form.pdf
├── page_01.pdf
├── python-guide.pdf
└── simple.xfdf
/.github/workflows/pypi.yml:
--------------------------------------------------------------------------------
1 | name: Upload Python Package
2 |
3 | on:
4 | workflow_dispatch:
5 | release:
6 | types: [created]
7 |
8 | jobs:
9 | deploy:
10 |
11 | runs-on: ubuntu-latest
12 |
13 | steps:
14 | - uses: actions/checkout@v2
15 | - name: Set up Python
16 | uses: actions/setup-python@v2
17 | with:
18 | python-version: '3.x'
19 | - name: Install dependencies
20 | run: |
21 | python -m pip install --upgrade pip
22 | pip install setuptools wheel twine
23 | - name: Build dist
24 | run: |
25 | python setup.py sdist
26 | - name: pypi-publish
27 | uses: pypa/gh-action-pypi-publish@v1.4.2
28 | with:
29 | password: ${{ secrets.PYPI_TOKEN }}
30 | verify_metadata: false
31 | verbose: true
32 |
--------------------------------------------------------------------------------
/.github/workflows/quality.yml:
--------------------------------------------------------------------------------
1 | name: Python quality
2 |
3 | on:
4 | workflow_dispatch:
5 | push:
6 | branches: [ master ]
7 | pull_request:
8 | branches: [ master ]
9 |
10 | jobs:
11 | build:
12 |
13 | runs-on: ubuntu-latest
14 | strategy:
15 | matrix:
16 | python-version: [2.7, 3.7]
17 |
18 | steps:
19 | - uses: actions/checkout@v2
20 | - name: Set up Python ${{ matrix.python-version }}
21 | uses: actions/setup-python@v2
22 | with:
23 | python-version: ${{ matrix.python-version }}
24 | - name: Install dependencies
25 | run: |
26 | sudo apt-get install pdftk
27 | python -m pip install --upgrade pip
28 | python -m pip install flake8 pytest
29 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
30 | - name: Lint with flake8
31 | run: |
32 | # stop the build if there are Python syntax errors or undefined names
33 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
34 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
35 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
36 | - name: Test with pytest
37 | run: |
38 | mkdir test-reports
39 | pytest --junitxml=test-reports/junit.xml test.py
40 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.py[cod]
2 |
3 | # C extensions
4 | *.so
5 |
6 | # Packages
7 | *.egg
8 | *.egg-info
9 | dist
10 | build
11 | eggs
12 | parts
13 | bin
14 | var
15 | sdist
16 | develop-eggs
17 | .installed.cfg
18 | lib
19 | lib64
20 |
21 | # Installer logs
22 | pip-log.txt
23 |
24 | # Unit test / coverage reports
25 | .coverage
26 | .tox
27 | nosetests.xml
28 |
29 | # Translations
30 | *.mo
31 |
32 | # Mr Developer
33 | .mr.developer.cfg
34 | .project
35 | .pydevproject
36 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | script: pytest test.py
3 | install:
4 | - sudo apt-get update
5 | - sudo apt-get install pdftk
6 | - pdftk --version
--------------------------------------------------------------------------------
/MANIFEST:
--------------------------------------------------------------------------------
1 | # file GENERATED by distutils, do NOT edit
2 | README.md
3 | licence.txt
4 | pypdftk.py
5 | setup.py
6 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include pypdftk.py
3 | include licence.txt
4 | exclude *.pyc
--------------------------------------------------------------------------------
/Out/some_file.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/revolunet/pypdftk/cec246855b02fcbb9af5f95a39d6656ef2d110d3/Out/some_file.pdf
--------------------------------------------------------------------------------
/Out/test1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/revolunet/pypdftk/cec246855b02fcbb9af5f95a39d6656ef2d110d3/Out/test1.pdf
--------------------------------------------------------------------------------
/Out/test1.txt:
--------------------------------------------------------------------------------
1 | test1
2 |
3 | value1 = 100000
4 | value2 = 200002
5 | value3 = 333003
6 | value4 = 444404
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | pypdftk |pypi| |travis| |githubactions|
2 | ===========================================
3 |
4 | Python module to drive the awesome `pdftk`_ binary.
5 |
6 | Proudly brought to you by many `awesome contributors`_
7 |
8 | Features
9 | --------
10 |
11 | ``fill_form``
12 | ~~~~~~~~~~~~~
13 |
14 | Fill a PDF with given data and returns the output PDF path
15 |
16 | - ``pdf_path`` : input PDF
17 | - ``datas`` : dictionnary of fielf names / values
18 | - ``out_file`` (default=auto) : output PDF path. will use tempfile if
19 | not provided
20 | - ``flatten`` (default=True) : flatten the final PDF
21 | - ``drop_xfa`` (default=False) : omit XFA data from the output PDF
22 |
23 | ``concat``
24 | ~~~~~~~~~~
25 |
26 | Merge multiple PDFs into one single file and returns the output PDF path
27 |
28 | - ``files`` : list of PDF files to concatenate
29 | - ``out_file`` (default=auto) : output PDF path. will use tempfile if
30 | not provided
31 |
32 | ``get_pages``
33 | ~~~~~~~~~~~~~
34 |
35 | Concatenate a list of page ranges into one single file and returns the
36 | output PDF path
37 |
38 | - ``pdf_path`` : input PDF
39 | - ``ranges`` (default=\ ``[]``) : ``[]`` for clone, ``[[2]]`` for
40 | extracting 2nd page, ``[[1],[2,5],[3]]`` for concatenating pages 1,
41 | 2-5, 3
42 | - ``out_file`` (default=auto) : output PDF path. will use tempfile if
43 | not provided
44 |
45 | ``split``
46 | ~~~~~~~~~
47 |
48 | Split a single PDF in many pages and return a list of pages paths
49 |
50 | - ``pdf_path`` : input PDF
51 | - ``out_dir`` (default=auto) : output PDFs dir. will use tempfile if
52 | not provided
53 |
54 | **warning** if you give a out_dir parameter, ensure its empty, or the
55 | split function may destroy your files and return incorrect results.
56 |
57 | ``gen_xfdf``
58 | ~~~~~~~~~~~~
59 |
60 | Generate a XFDF file suited for filling PDF forms and return the
61 | generated XFDF file path
62 |
63 | - ``datas`` : dictionnary of datas
64 |
65 | ``get_num_pages``
66 | ~~~~~~~~~~~~~~~~~
67 |
68 | Return the number of pages for a given PDF
69 |
70 | - ``pdf_path`` : input PDF file
71 |
72 | ``replace_page``
73 | ~~~~~~~~~~~~~~~~
74 |
75 | Replace a page in a PDF (pdf_path) by the PDF pointed by
76 | pdf_to_insert_path.
77 |
78 | - ``pdf_path`` is the PDF that will have its page replaced.
79 | - ``page_number`` is the number of the page in pdf_path to be replaced.
80 | It is 1-based.
81 | - ``pdf_to_insert_path`` is the PDF that will be inserted at the old
82 | page.
83 |
84 | ``stamp``
85 | ~~~~~~~~~
86 |
87 | Applies a stamp (from ``stamp_pdf_path``) to the PDF file in
88 | ``pdf_path``. If no ``output_pdf_path`` is provided, it returns a
89 | temporary file with the result PDF.
90 |
91 | ``[compress | uncompress]``
92 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
93 |
94 | ::
95 |
96 | These are only useful when you want to edit PDF code in a text
97 | editor like vim or emacs. Remove PDF page stream compression by
98 | applying the uncompress filter. Use the compress filter to
99 | restore compression.
100 |
101 | - ``pdf_path`` : input PDF file
102 | - ``out_file`` (default=auto) : output PDF path. will use tempfile if
103 | not provided
104 | - ``flatten`` (default=True) : flatten the final PDF
105 |
106 | ``dump_data_fields``
107 | ~~~~~~~~~~~~~~~~~~~~
108 |
109 | Read PDF and output form field statistics.
110 |
111 | - ``pdf_path`` : input PDF file
112 |
113 |
114 | Example
115 | -------
116 |
117 | Fill a PDF model and add a cover page :
118 |
119 | .. code:: python
120 |
121 | import pypdftk
122 |
123 | datas = {
124 | 'firstname': 'Julien',
125 | 'company': 'revolunet',
126 | 'price': 42
127 | }
128 | generated_pdf = pypdftk.fill_form('/path/to/model.pdf', datas)
129 | out_pdf = pypdftk.concat(['/path/to/cover.pdf', generated_pdf])
130 |
131 | pdftk path
132 | ----------
133 |
134 | By default, path is ``/usr/bin/pdftk``, but you can override it with the
135 | ``PDFTK_PATH`` environment variable
136 |
137 | Licence
138 | -------
139 |
140 | This module is released under the permissive `MIT license`_. Your
141 | contributions are always welcome.
142 |
143 | .. _pdftk: http://www.pdflabs.com/tools/pdftk-the-pdf-toolkit/
144 | .. _revolunet: http://revolunet.com
145 | .. _awesome contributors: https://github.com/revolunet/pypdftk/graphs/contributors
146 | .. _MIT license: http://revolunet.mit-license.org
147 |
148 | .. |pypi| image:: https://img.shields.io/pypi/v/pypdftk
149 | :target: https://pypi.org/project/pypdftk/
150 | .. |travis| image:: https://travis-ci.org/yguarata/pypdftk.svg?branch=master
151 | :target: https://travis-ci.org/yguarata/pypdftk
152 | .. |githubactions| image:: https://github.com/revolunet/pypdftk/actions/workflows/quality.yml/badge.svg
153 |
154 |
--------------------------------------------------------------------------------
/licence.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 1998, Regents of the University of California
2 | All rights reserved.
3 | Redistribution and use in source and binary forms, with or without
4 | modification, are permitted provided that the following conditions are met:
5 |
6 | * Redistributions of source code must retain the above copyright
7 | notice, this list of conditions and the following disclaimer.
8 | * Redistributions in binary form must reproduce the above copyright
9 | notice, this list of conditions and the following disclaimer in the
10 | documentation and/or other materials provided with the distribution.
11 | * Neither the name of the University of California, Berkeley nor the
12 | names of its contributors may be used to endorse or promote products
13 | derived from this software without specific prior written permission.
14 |
15 | THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
16 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |
--------------------------------------------------------------------------------
/pypdftk.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: UTF-8 -*-
2 |
3 | ''' pypdftk
4 |
5 | Python module to drive the awesome pdftk binary.
6 | See http://www.pdflabs.com/tools/pdftk-the-pdf-toolkit/
7 |
8 | '''
9 |
10 | import logging
11 | import os
12 | import shutil
13 | import subprocess
14 | import tempfile
15 | import itertools
16 |
17 | log = logging.getLogger(__name__)
18 |
19 | if os.getenv('PDFTK_PATH'):
20 | PDFTK_PATH = os.getenv('PDFTK_PATH')
21 | else:
22 | PDFTK_PATH = '/usr/bin/pdftk'
23 | if not os.path.isfile(PDFTK_PATH):
24 | PDFTK_PATH = 'pdftk'
25 |
26 |
27 | def check_output(*popenargs, **kwargs):
28 | if 'stdout' in kwargs:
29 | raise ValueError('stdout argument not allowed, it will be overridden.')
30 | process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
31 | output, unused_err = process.communicate()
32 | retcode = process.poll()
33 | if retcode:
34 | cmd = kwargs.get("args")
35 | if cmd is None:
36 | cmd = popenargs[0]
37 | raise subprocess.CalledProcessError(retcode, cmd, output=output)
38 | return output
39 |
40 |
41 | def run_command(command, shell=False):
42 | ''' run a system command and yield output '''
43 | p = check_output(command, shell=shell)
44 | return p.decode("utf-8").splitlines()
45 |
46 | try:
47 | run_command([PDFTK_PATH])
48 | except OSError:
49 | logging.warning('pdftk test call failed (PDFTK_PATH=%r).', PDFTK_PATH)
50 |
51 |
52 | def get_num_pages(pdf_path):
53 | ''' return number of pages in a given PDF file '''
54 | for line in run_command([PDFTK_PATH, pdf_path, 'dump_data']):
55 | if line.lower().startswith('numberofpages'):
56 | return int(line.split(':')[1])
57 | return 0
58 |
59 | def get_pages(pdf_path, ranges=[], out_file=None):
60 | '''
61 | Concatenate a list of page ranges into one single file
62 | Return temp file if no out_file provided.
63 | '''
64 | cleanOnFail = False
65 | handle = None
66 | pageRanges = []
67 | if not out_file:
68 | cleanOnFail = True
69 | handle, out_file = tempfile.mkstemp()
70 |
71 | for range in ranges:
72 | pageRanges.append("-".join([str(i) for i in range]))
73 |
74 | args = [PDFTK_PATH, pdf_path, 'cat'] + pageRanges + ['output', out_file]
75 | try:
76 | run_command(args)
77 | except:
78 | if cleanOnFail:
79 | os.remove(out_file)
80 | raise
81 | finally:
82 | if handle:
83 | os.close(handle)
84 | return out_file
85 |
86 |
87 | def fill_form(pdf_path, datas={}, out_file=None, flatten=True, drop_xfa=False):
88 | '''
89 | Fills a PDF form with given dict input data.
90 | Return temp file if no out_file provided.
91 | '''
92 | cleanOnFail = False
93 | tmp_fdf = gen_xfdf(datas)
94 | handle = None
95 | if not out_file:
96 | cleanOnFail = True
97 | handle, out_file = tempfile.mkstemp()
98 |
99 | cmd = "%s %s fill_form %s output %s" % (PDFTK_PATH, pdf_path, tmp_fdf, out_file)
100 | if flatten:
101 | cmd += ' flatten'
102 | if drop_xfa:
103 | cmd += ' drop_xfa'
104 | try:
105 | run_command(cmd, True)
106 | except:
107 | if cleanOnFail:
108 | os.remove(tmp_fdf)
109 | raise
110 | finally:
111 | if handle:
112 | os.close(handle)
113 | os.remove(tmp_fdf)
114 | return out_file
115 |
116 | def dump_data_fields(pdf_path, add_id=False):
117 | '''
118 | Return list of dicts of all fields in a PDF.
119 | If multiple values with the same key are provided for some fields (like
120 | FieldStateOption), the data for that key will be a list.
121 | If id is True, a unique numeric ID will be added for each PDF field.
122 | '''
123 | cmd = "%s %s dump_data_fields" % (PDFTK_PATH, pdf_path)
124 | field_data = map(lambda x: x.split(': ', 1), run_command(cmd, True))
125 | fields = [list(group) for k, group in itertools.groupby(field_data, lambda x: len(x) == 1) if not k]
126 | field_data = [] # Container for the whole dataset
127 | for i, field in enumerate(fields): # Iterate over datasets for each PDF field.
128 | d = {} # Use a dictionary as a container for the data from one PDF field.
129 | if add_id:
130 | d = {'id': i}
131 | for i in sorted(field): # Sort the attributes of the PDF field, then loop through them.
132 | # Each item i has 2 elements: i[0] is the key (attribute name), i[1] is the data (value).
133 | if i[0] in d: # If the key is already present in the dictionary...
134 | if isinstance(d[i[0]], list): # ...and the value is already a list...
135 | d[i[0]].append(i[1]) # ...just append to it.
136 | else: # Otherwise (if the value isn't already a list)...
137 | d[i[0]] = [ d[i[0]], i[1] ] # ...create a new list with the original and new values.
138 | else: # Otherwise (the key isn't already present in the dictionary)...
139 | d[i[0]] = i[1] # ...simply add it to the dictionary.
140 | field_data.append(d) # Finally, add the dictionary for this field to the big container.
141 | return field_data
142 |
143 | def concat(files, out_file=None):
144 | '''
145 | Merge multiples PDF files.
146 | Return temp file if no out_file provided.
147 | '''
148 | cleanOnFail = False
149 | handle = None
150 | if not out_file:
151 | cleanOnFail = True
152 | handle, out_file = tempfile.mkstemp()
153 | if len(files) == 1:
154 | shutil.copyfile(files[0], out_file)
155 | args = [PDFTK_PATH]
156 | args += files
157 | args += ['cat', 'output', out_file]
158 | try:
159 | run_command(args)
160 | except:
161 | if cleanOnFail:
162 | os.remove(out_file)
163 | raise
164 | finally:
165 | if handle:
166 | os.close(handle)
167 | return out_file
168 |
169 |
170 | def split(pdf_path, out_dir=None):
171 | '''
172 | Split a single PDF file into pages.
173 | Use a temp directory if no out_dir provided.
174 | '''
175 | cleanOnFail = False
176 | if not out_dir:
177 | cleanOnFail = True
178 | out_dir = tempfile.mkdtemp()
179 | out_pattern = '%s/page_%%06d.pdf' % out_dir
180 | try:
181 | run_command((PDFTK_PATH, pdf_path, 'burst', 'output', out_pattern))
182 | except:
183 | if cleanOnFail:
184 | shutil.rmtree(out_dir)
185 | raise
186 | out_files = os.listdir(out_dir)
187 | out_files.sort()
188 | return [os.path.join(out_dir, filename) for filename in out_files]
189 |
190 |
191 | def gen_xfdf(datas={}):
192 | ''' Generates a temp XFDF file suited for fill_form function, based on dict input data '''
193 | fields = []
194 | for key, value in datas.items():
195 | fields.append(""" %s""" % (key, value))
196 | tpl = """
197 |
198 |
199 | %s
200 |
201 | """ % "\n".join(fields)
202 | handle, out_file = tempfile.mkstemp()
203 | f = os.fdopen(handle, 'wb')
204 | f.write((tpl.encode('UTF-8')))
205 | f.close()
206 | return out_file
207 |
208 | def replace_page(pdf_path, page_number, pdf_to_insert_path):
209 | '''
210 | Replace a page in a PDF (pdf_path) by the PDF pointed by pdf_to_insert_path.
211 | page_number is the number of the page in pdf_path to be replaced. It is 1-based.
212 | '''
213 | A = 'A=' + pdf_path
214 | B = 'B=' + pdf_to_insert_path
215 | output_temp = tempfile.mktemp(suffix='.pdf')
216 |
217 | if page_number == 1: # At begin
218 | upper_bound = 'A' + str(page_number + 1) + '-end'
219 | args = (
220 | PDFTK_PATH, A, B, 'cat', 'B', upper_bound, 'output', output_temp)
221 | elif page_number == get_num_pages(pdf_path): # At end
222 | lower_bound = 'A1-' + str(page_number - 1)
223 | args = (PDFTK_PATH, A, B, 'cat', lower_bound, 'B', 'output', output_temp)
224 | else: # At middle
225 | lower_bound = 'A1-' + str(page_number - 1)
226 | upper_bound = 'A' + str(page_number + 1) + '-end'
227 | args = (
228 | PDFTK_PATH, A, B, 'cat', lower_bound, 'B', upper_bound, 'output',
229 | output_temp)
230 |
231 | run_command(args)
232 | shutil.copy(output_temp, pdf_path)
233 | os.remove(output_temp)
234 |
235 | def stamp(pdf_path, stamp_pdf_path, output_pdf_path=None):
236 | '''
237 | Applies a stamp (from stamp_pdf_path) to the PDF file in pdf_path. Useful for watermark purposes.
238 | If not output_pdf_path is provided, it returns a temporary file with the result PDF.
239 | '''
240 | output = output_pdf_path or tempfile.mktemp(suffix='.pdf')
241 | args = [PDFTK_PATH, pdf_path, 'multistamp', stamp_pdf_path, 'output', output]
242 | run_command(args)
243 | return output
244 |
245 | def pdftk_cmd_util(pdf_path, action="compress",out_file=None, flatten=True):
246 | '''
247 | :type action: should valid action, in string format. Eg: "uncompress"
248 | :param pdf_path: input PDF file
249 | :param out_file: (default=auto) : output PDF path. will use tempfile if not provided
250 | :param flatten: (default=True) : flatten the final PDF
251 | :return: name of the output file.
252 | '''
253 | actions = ["compress", "uncompress"]
254 | assert action in actions, "Unknown action. Failed to perform given action '%s'." % action
255 |
256 | handle = None
257 | cleanOnFail = False
258 | if not out_file:
259 | cleanOnFail = True
260 | handle, out_file = tempfile.mkstemp()
261 |
262 | cmd = "%s %s output %s %s" % (PDFTK_PATH, pdf_path, out_file, action)
263 |
264 | if flatten:
265 | cmd += ' flatten'
266 | try:
267 | run_command(cmd, True)
268 | except:
269 | if cleanOnFail:
270 | os.remove(out_file)
271 | raise
272 | finally:
273 | if handle:
274 | os.close(handle)
275 | return out_file
276 |
277 |
278 | def compress(pdf_path, out_file=None, flatten=True):
279 | '''
280 | These are only useful when you want to edit PDF code in a text
281 | editor like vim or emacs. Remove PDF page stream compression by
282 | applying the uncompress filter. Use the compress filter to
283 | restore compression.
284 |
285 | :param pdf_path: input PDF file
286 | :param out_file: (default=auto) : output PDF path. will use tempfile if not provided
287 | :param flatten: (default=True) : flatten the final PDF
288 | :return: name of the output file.
289 | '''
290 |
291 | return pdftk_cmd_util(pdf_path, "compress", out_file, flatten)
292 |
293 |
294 | def uncompress(pdf_path, out_file=None, flatten=True):
295 | '''
296 | These are only useful when you want to edit PDF code in a text
297 | editor like vim or emacs. Remove PDF page stream compression by
298 | applying the uncompress filter. Use the compress filter to
299 | restore compression.
300 |
301 | :param pdf_path: input PDF file
302 | :param out_file: (default=auto) : output PDF path. will use tempfile if not provided
303 | :param flatten: (default=True) : flatten the final PDF
304 | :return: name of the output file.
305 | '''
306 |
307 | return pdftk_cmd_util(pdf_path, "uncompress", out_file, flatten)
308 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from distutils.core import setup
3 |
4 | with open("README.rst", "r", encoding="utf-8") as fh:
5 | long_description = fh.read()
6 |
7 | setup(
8 | name='pypdftk',
9 | description='''Python wrapper for PDFTK''',
10 | long_description=long_description,
11 | version='0.5',
12 | author='Julien Bouquillon',
13 | author_email='julien@revolunet.com',
14 | url='http://github.com/revolunet/pypdftk',
15 | py_modules=['pypdftk'],
16 | scripts=['pypdftk.py'],
17 | classifiers=['Development Status :: 4 - Beta',
18 | 'Environment :: Web Environment',
19 | 'Intended Audience :: Developers',
20 | 'License :: OSI Approved :: BSD License',
21 | 'Operating System :: OS Independent',
22 | 'Programming Language :: Python',
23 | 'Topic :: Utilities'],
24 | )
25 |
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: UTF-8 -*-
2 | import os
3 | import unittest
4 | import json
5 | from tempfile import mkdtemp
6 | # Needed for comparison of XFDF XML
7 | import xml.etree.ElementTree as ET
8 |
9 | import pypdftk
10 |
11 | TEST_PDF_PATH = 'test_files/python-guide.pdf'
12 | TEST_XPDF_PATH = 'test_files/form.pdf'
13 | TEST_XPDF_DATA_DUMP = 'test_files/form.json'
14 | TEST_XPDF_FILLED_PATH = 'test_files/form-filled.pdf'
15 | TEST_XPDF_FILLED_DATA_DUMP = 'test_files/form-filled.json'
16 | TEST_XFDF_PATH = 'test_files/simple.xfdf'
17 | SAMPLE_DATA = {
18 | "city": "Paris",
19 | "name": "juju"
20 | }
21 | SAMPLE_DATA2 = {
22 | "Given Name Text Box": "name test",
23 | "Language 3 Check Box": "Yes"
24 | }
25 |
26 | def read(path):
27 | fd = open(path, 'r')
28 | content = fd.read()
29 | fd.close()
30 | return content
31 |
32 | # json comparison... https://stackoverflow.com/a/25851972/174027
33 | def ordered(obj):
34 | if isinstance(obj, dict):
35 | return sorted((k, ordered(v)) for k, v in obj.items())
36 | if isinstance(obj, list):
37 | return sorted(ordered(x) for x in obj)
38 | else:
39 | return obj
40 |
41 | # Converts a page range list into the number of pages
42 | def rangeCount(ranges):
43 | count = 0
44 | for range in ranges:
45 | if len(range)==1:
46 | count += 1
47 | elif len(range)==2:
48 | count += abs(range[0]-range[1]) + 1
49 | else:
50 | raise ValueError(str(range)+" contains more than 2 values")
51 | return count
52 |
53 | class TestPyPDFTK(unittest.TestCase):
54 | def test_get_num_pages(self):
55 | num = pypdftk.get_num_pages(TEST_PDF_PATH)
56 | self.assertEqual(num, 129)
57 |
58 | def test_fill_form(self):
59 | result = pypdftk.fill_form(TEST_XPDF_PATH, datas=SAMPLE_DATA2, flatten=False)
60 | result_data = ordered(pypdftk.dump_data_fields(result))
61 | expected_data = ordered(json.loads(read(TEST_XPDF_FILLED_DATA_DUMP)))
62 | self.assertEqual(result_data, expected_data)
63 |
64 | def test_dump_data_fields(self):
65 | result_data = ordered(pypdftk.dump_data_fields(TEST_XPDF_PATH))
66 | expected_data = ordered(json.loads(read(TEST_XPDF_DATA_DUMP)))
67 | self.assertEqual(result_data, expected_data)
68 |
69 | def test_concat(self):
70 | total_pages = pypdftk.get_num_pages(TEST_PDF_PATH)
71 | output_file = pypdftk.concat([TEST_PDF_PATH, TEST_PDF_PATH, TEST_PDF_PATH])
72 | concat_total_pages = pypdftk.get_num_pages(output_file)
73 | self.assertEqual(total_pages * 3, concat_total_pages)
74 |
75 |
76 | def test_get_pages_clone(self):
77 | total_pages = pypdftk.get_num_pages(TEST_PDF_PATH)
78 | output_file = pypdftk.get_pages(TEST_PDF_PATH,[])
79 | concat_total_pages = pypdftk.get_num_pages(output_file)
80 | self.assertEqual(total_pages, concat_total_pages)
81 |
82 | def test_get_pages_single(self):
83 | pageRanges = [[1]]
84 | output_file = pypdftk.get_pages(TEST_PDF_PATH,pageRanges)
85 | concat_total_pages = pypdftk.get_num_pages(output_file)
86 | self.assertEqual(rangeCount(pageRanges), concat_total_pages)
87 |
88 | def test_get_pages_range(self):
89 | pageRanges = [[2,5]]
90 | output_file = pypdftk.get_pages(TEST_PDF_PATH,pageRanges)
91 | concat_total_pages = pypdftk.get_num_pages(output_file)
92 | self.assertEqual(rangeCount(pageRanges), concat_total_pages)
93 |
94 | def test_get_pages_single_range(self):
95 | pageRanges = [[1],[2,5]]
96 | output_file = pypdftk.get_pages(TEST_PDF_PATH,pageRanges)
97 | concat_total_pages = pypdftk.get_num_pages(output_file)
98 | self.assertEqual(rangeCount(pageRanges), concat_total_pages)
99 |
100 | def test_split(self):
101 | total_pages = pypdftk.get_num_pages(TEST_PDF_PATH)
102 | paths = pypdftk.split(TEST_PDF_PATH)
103 | self.assertEqual(len(paths) - 1, total_pages)
104 | self.assertTrue('doc_data.txt' in paths[0])
105 | for p in paths:
106 | self.assertTrue(os.path.exists(p))
107 |
108 | def test_split_output_dir(self):
109 | output_dir = mkdtemp()
110 | total_pages = pypdftk.get_num_pages(TEST_PDF_PATH)
111 | paths = pypdftk.split(TEST_PDF_PATH, out_dir=output_dir)
112 | self.assertEqual(len(paths) - 1, total_pages)
113 | for p in paths:
114 | out_path = os.path.join(output_dir, os.path.basename(p))
115 | self.assertTrue(out_path)
116 |
117 | def test_gen_xfdf(self):
118 | xfdf_path = pypdftk.gen_xfdf(SAMPLE_DATA)
119 | xfdf = read(xfdf_path)
120 | expected = read(TEST_XFDF_PATH)
121 | # XML can have sibling elements in different order. So:
122 | # * Parse the XML, get list of the root's children, convert to string, sort
123 | xfdf_standard_order = [ET.tostring(i) for i in list(ET.fromstring(xfdf).iter())]
124 | expected_standard_order = [ET.tostring(i) for i in list(ET.fromstring(expected).iter())]
125 | xfdf_standard_order.sort()
126 | expected_standard_order.sort()
127 | self.assertEqual(xfdf_standard_order, expected_standard_order)
128 |
129 | def test_replace_page_at_begin(self):
130 | total_pages = pypdftk.get_num_pages(TEST_PDF_PATH)
131 | pdf_to_insert = 'test_files/page_01.pdf'
132 | pypdftk.replace_page(TEST_PDF_PATH, 1, pdf_to_insert)
133 | self.assertEqual(total_pages, pypdftk.get_num_pages(TEST_PDF_PATH))
134 |
135 | def test_replace_page_at_middle(self):
136 | total_pages = pypdftk.get_num_pages(TEST_PDF_PATH)
137 | pdf_to_insert = 'test_files/page_01.pdf'
138 | pypdftk.replace_page(TEST_PDF_PATH, 3, pdf_to_insert)
139 | self.assertEqual(total_pages, pypdftk.get_num_pages(TEST_PDF_PATH))
140 |
141 | def test_replace_page_at_end(self):
142 | total_pages = pypdftk.get_num_pages(TEST_PDF_PATH)
143 | last_page = pypdftk.get_num_pages(TEST_PDF_PATH)
144 | pdf_to_insert = 'test_files/page_01.pdf'
145 | pypdftk.replace_page(TEST_PDF_PATH, last_page, pdf_to_insert)
146 | self.assertEqual(total_pages, pypdftk.get_num_pages(TEST_PDF_PATH))
147 |
148 | @unittest.skip('Not implemented yet')
149 | def test_stamp(self):
150 | pass
151 |
152 |
153 | if __name__ == '__main__':
154 | unittest.main()
--------------------------------------------------------------------------------
/test_files/form-filled.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "FieldFlags": "0",
4 | "FieldNameAlt": "First name",
5 | "FieldName": "Given Name Text Box",
6 | "FieldType": "Text",
7 | "FieldJustification": "Left",
8 | "FieldMaxLength": "40",
9 | "FieldValue": "name test"
10 | },
11 | {
12 | "FieldFlags": "0",
13 | "FieldNameAlt": "Last name",
14 | "FieldName": "Family Name Text Box",
15 | "FieldType": "Text",
16 | "FieldJustification": "Left",
17 | "FieldMaxLength": "40",
18 | "FieldValue": ""
19 | },
20 | {
21 | "FieldFlags": "0",
22 | "FieldNameAlt": "House and floor",
23 | "FieldName": "House nr Text Box",
24 | "FieldType": "Text",
25 | "FieldJustification": "Left",
26 | "FieldMaxLength": "20",
27 | "FieldValue": ""
28 | },
29 | {
30 | "FieldFlags": "0",
31 | "FieldName": "Address 2 Text Box",
32 | "FieldType": "Text",
33 | "FieldJustification": "Left",
34 | "FieldMaxLength": "40",
35 | "FieldValue": ""
36 | },
37 | {
38 | "FieldFlags": "0",
39 | "FieldName": "Postcode Text Box",
40 | "FieldType": "Text",
41 | "FieldJustification": "Left",
42 | "FieldMaxLength": "20",
43 | "FieldValue": ""
44 | },
45 | {
46 | "FieldStateOption": ["Austria", "Belgium", "Britain", "Bulgaria", "Croatia",
47 | "Cyprus", "Czech-Republic", "Denmark", "Estonia", "Finland", "France",
48 | "Germany", "Greece", "Hungary", "Ireland", "Italy", "Latvia",
49 | "Lithuania", "Luxembourg", "Malta", "Netherlands", "Poland", "Portugal",
50 | "Romania", "Slovakia", "Slovenia", "Spain", "Sweden"],
51 | "FieldFlags": "393216",
52 | "FieldNameAlt": "Use selection or write country name",
53 | "FieldName": "Country Combo Box",
54 | "FieldType": "Choice",
55 | "FieldJustification": "Left",
56 | "FieldValue": ""
57 | },
58 | {
59 | "FieldFlags": "0",
60 | "FieldNameAlt": "Value from 40 to 250 cm",
61 | "FieldName": "Height Formatted Field",
62 | "FieldType": "Text",
63 | "FieldJustification": "Left",
64 | "FieldMaxLength": "20",
65 | "FieldValueDefault": "150",
66 | "FieldValue": "150"
67 | },
68 | {
69 | "FieldFlags": "0",
70 | "FieldName": "City Text Box",
71 | "FieldType": "Text",
72 | "FieldJustification": "Left",
73 | "FieldMaxLength": "40",
74 | "FieldValue": ""
75 | },
76 | {
77 | "FieldStateOption": ["Off", "Yes"],
78 | "FieldFlags": "0",
79 | "FieldNameAlt": "Car driving license",
80 | "FieldName": "Driving License Check Box",
81 | "FieldType": "Button",
82 | "FieldJustification": "Left",
83 | "FieldValueDefault": "Off",
84 | "FieldValue": "Off"
85 | },
86 | {
87 | "FieldStateOption": ["Black", "Blue", "Brown", "Green", "Grey", "Orange", "Red", "Violet", "White", "Yellow"],
88 | "FieldFlags": "131072",
89 | "FieldNameAlt": "Select from colour spectrum",
90 | "FieldName": "Favourite Colour List Box",
91 | "FieldType": "Choice",
92 | "FieldJustification": "Left",
93 | "FieldValueDefault": "Red",
94 | "FieldValue": "Red"
95 | },
96 | {
97 | "FieldStateOption": ["Off", "Yes"],
98 | "FieldFlags": "0",
99 | "FieldName": "Language 1 Check Box",
100 | "FieldType": "Button",
101 | "FieldJustification": "Left",
102 | "FieldValueDefault": "Off",
103 | "FieldValue": "Off"
104 | },
105 | {
106 | "FieldStateOption": ["Off", "Yes"],
107 | "FieldFlags": "0",
108 | "FieldName": "Language 2 Check Box",
109 | "FieldType": "Button",
110 | "FieldJustification": "Left",
111 | "FieldValueDefault": "Yes",
112 | "FieldValue": "Yes"
113 | },
114 | {
115 | "FieldStateOption": ["Off", "Yes"],
116 | "FieldFlags": "0",
117 | "FieldName": "Language 3 Check Box",
118 | "FieldType": "Button",
119 | "FieldJustification": "Left",
120 | "FieldValueDefault": "Off",
121 | "FieldValue": "Yes"
122 | },
123 | {
124 | "FieldStateOption": ["Off", "Yes"],
125 | "FieldFlags": "0",
126 | "FieldName": "Language 4 Check Box",
127 | "FieldType": "Button",
128 | "FieldJustification": "Left",
129 | "FieldValueDefault": "Off",
130 | "FieldValue": "Off"
131 | },
132 | {
133 | "FieldStateOption": ["Off", "Yes"],
134 | "FieldFlags": "0",
135 | "FieldName": "Language 5 Check Box",
136 | "FieldType": "Button",
137 | "FieldJustification": "Left",
138 | "FieldValueDefault": "Off",
139 | "FieldValue": "Off"
140 | },
141 | {
142 | "FieldStateOption": ["Man", "Woman"],
143 | "FieldFlags": "131072",
144 | "FieldNameAlt": "Select from list",
145 | "FieldName": "Gender List Box",
146 | "FieldType": "Choice",
147 | "FieldJustification": "Left",
148 | "FieldValueDefault": "Man",
149 | "FieldValue": "Man"
150 | },
151 | {
152 | "FieldFlags": "0",
153 | "FieldName": "Address 1 Text Box",
154 | "FieldType": "Text",
155 | "FieldJustification": "Left",
156 | "FieldMaxLength": "40",
157 | "FieldValue": ""
158 | }
159 | ]
160 |
--------------------------------------------------------------------------------
/test_files/form.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "FieldFlags": "0",
4 | "FieldNameAlt": "First name",
5 | "FieldName": "Given Name Text Box",
6 | "FieldType": "Text",
7 | "FieldJustification": "Left",
8 | "FieldMaxLength": "40",
9 | "FieldValue": ""
10 | },
11 | {
12 | "FieldFlags": "0",
13 | "FieldNameAlt": "Last name",
14 | "FieldName": "Family Name Text Box",
15 | "FieldType": "Text",
16 | "FieldJustification": "Left",
17 | "FieldMaxLength": "40",
18 | "FieldValue": ""
19 | },
20 | {
21 | "FieldFlags": "0",
22 | "FieldNameAlt": "House and floor",
23 | "FieldName": "House nr Text Box",
24 | "FieldType": "Text",
25 | "FieldJustification": "Left",
26 | "FieldMaxLength": "20",
27 | "FieldValue": ""
28 | },
29 | {
30 | "FieldFlags": "0",
31 | "FieldName": "Address 2 Text Box",
32 | "FieldType": "Text",
33 | "FieldJustification": "Left",
34 | "FieldMaxLength": "40",
35 | "FieldValue": ""
36 | },
37 | {
38 | "FieldFlags": "0",
39 | "FieldName": "Postcode Text Box",
40 | "FieldType": "Text",
41 | "FieldJustification": "Left",
42 | "FieldMaxLength": "20",
43 | "FieldValue": ""
44 | },
45 | {
46 | "FieldStateOption": ["Austria", "Belgium", "Britain", "Bulgaria", "Croatia",
47 | "Cyprus", "Czech-Republic", "Denmark", "Estonia", "Finland", "France",
48 | "Germany", "Greece", "Hungary", "Ireland", "Italy", "Latvia",
49 | "Lithuania", "Luxembourg", "Malta", "Netherlands", "Poland", "Portugal",
50 | "Romania", "Slovakia", "Slovenia", "Spain", "Sweden"],
51 | "FieldFlags": "393216",
52 | "FieldNameAlt": "Use selection or write country name",
53 | "FieldName": "Country Combo Box",
54 | "FieldType": "Choice",
55 | "FieldJustification": "Left",
56 | "FieldValue": ""
57 | },
58 | {
59 | "FieldFlags": "0",
60 | "FieldNameAlt": "Value from 40 to 250 cm",
61 | "FieldName": "Height Formatted Field",
62 | "FieldType": "Text",
63 | "FieldJustification": "Left",
64 | "FieldMaxLength": "20",
65 | "FieldValueDefault": "150",
66 | "FieldValue": "150"
67 | },
68 | {
69 | "FieldFlags": "0",
70 | "FieldName": "City Text Box",
71 | "FieldType": "Text",
72 | "FieldJustification": "Left",
73 | "FieldMaxLength": "40",
74 | "FieldValue": ""
75 | },
76 | {
77 | "FieldStateOption": ["Off", "Yes"],
78 | "FieldFlags": "0",
79 | "FieldNameAlt": "Car driving license",
80 | "FieldName": "Driving License Check Box",
81 | "FieldType": "Button",
82 | "FieldJustification": "Left",
83 | "FieldValueDefault": "Off",
84 | "FieldValue": "Off"
85 | },
86 | {
87 | "FieldStateOption": ["Black", "Blue", "Brown", "Green", "Grey", "Orange", "Red", "Violet", "White", "Yellow"],
88 | "FieldFlags": "131072",
89 | "FieldNameAlt": "Select from colour spectrum",
90 | "FieldName": "Favourite Colour List Box",
91 | "FieldType": "Choice",
92 | "FieldJustification": "Left",
93 | "FieldValueDefault": "Red",
94 | "FieldValue": "Red"
95 | },
96 | {
97 | "FieldStateOption": ["Off", "Yes"],
98 | "FieldFlags": "0",
99 | "FieldName": "Language 1 Check Box",
100 | "FieldType": "Button",
101 | "FieldJustification": "Left",
102 | "FieldValueDefault": "Off",
103 | "FieldValue": "Off"
104 | },
105 | {
106 | "FieldStateOption": ["Off", "Yes"],
107 | "FieldFlags": "0",
108 | "FieldName": "Language 2 Check Box",
109 | "FieldType": "Button",
110 | "FieldJustification": "Left",
111 | "FieldValueDefault": "Yes",
112 | "FieldValue": "Yes"
113 | },
114 | {
115 | "FieldStateOption": ["Off", "Yes"],
116 | "FieldFlags": "0",
117 | "FieldName": "Language 3 Check Box",
118 | "FieldType": "Button",
119 | "FieldJustification": "Left",
120 | "FieldValueDefault": "Off",
121 | "FieldValue": "Off"
122 | },
123 | {
124 | "FieldStateOption": ["Off", "Yes"],
125 | "FieldFlags": "0",
126 | "FieldName": "Language 4 Check Box",
127 | "FieldType": "Button",
128 | "FieldJustification": "Left",
129 | "FieldValueDefault": "Off",
130 | "FieldValue": "Off"
131 | },
132 | {
133 | "FieldStateOption": ["Off", "Yes"],
134 | "FieldFlags": "0",
135 | "FieldName": "Language 5 Check Box",
136 | "FieldType": "Button",
137 | "FieldJustification": "Left",
138 | "FieldValueDefault": "Off",
139 | "FieldValue": "Off"
140 | },
141 | {
142 | "FieldStateOption": ["Man", "Woman"],
143 | "FieldFlags": "131072",
144 | "FieldNameAlt": "Select from list",
145 | "FieldName": "Gender List Box",
146 | "FieldType": "Choice",
147 | "FieldJustification": "Left",
148 | "FieldValueDefault": "Man",
149 | "FieldValue": "Man"
150 | },
151 | {
152 | "FieldFlags": "0",
153 | "FieldName": "Address 1 Text Box",
154 | "FieldType": "Text",
155 | "FieldJustification": "Left",
156 | "FieldMaxLength": "40",
157 | "FieldValue": ""
158 | }
159 | ]
160 |
--------------------------------------------------------------------------------
/test_files/form.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/revolunet/pypdftk/cec246855b02fcbb9af5f95a39d6656ef2d110d3/test_files/form.pdf
--------------------------------------------------------------------------------
/test_files/page_01.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/revolunet/pypdftk/cec246855b02fcbb9af5f95a39d6656ef2d110d3/test_files/page_01.pdf
--------------------------------------------------------------------------------
/test_files/python-guide.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/revolunet/pypdftk/cec246855b02fcbb9af5f95a39d6656ef2d110d3/test_files/python-guide.pdf
--------------------------------------------------------------------------------
/test_files/simple.xfdf:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Paris
5 | juju
6 |
7 |
--------------------------------------------------------------------------------