├── .github └── workflows │ ├── pypi.yml │ └── quality.yml ├── .gitignore ├── .travis.yml ├── MANIFEST ├── MANIFEST.in ├── Out ├── some_file.pdf ├── test1.pdf └── test1.txt ├── README.rst ├── licence.txt ├── pypdftk.py ├── setup.py ├── test.py └── test_files ├── form-filled.json ├── form.json ├── form.pdf ├── page_01.pdf ├── python-guide.pdf └── simple.xfdf /.github/workflows/pypi.yml: -------------------------------------------------------------------------------- 1 | name: Upload Python Package 2 | 3 | on: 4 | workflow_dispatch: 5 | release: 6 | types: [created] 7 | 8 | jobs: 9 | deploy: 10 | 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v2 15 | - name: Set up Python 16 | uses: actions/setup-python@v2 17 | with: 18 | python-version: '3.x' 19 | - name: Install dependencies 20 | run: | 21 | python -m pip install --upgrade pip 22 | pip install setuptools wheel twine 23 | - name: Build dist 24 | run: | 25 | python setup.py sdist 26 | - name: pypi-publish 27 | uses: pypa/gh-action-pypi-publish@v1.4.2 28 | with: 29 | password: ${{ secrets.PYPI_TOKEN }} 30 | verify_metadata: false 31 | verbose: true 32 | -------------------------------------------------------------------------------- /.github/workflows/quality.yml: -------------------------------------------------------------------------------- 1 | name: Python quality 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: [ master ] 7 | pull_request: 8 | branches: [ master ] 9 | 10 | jobs: 11 | build: 12 | 13 | runs-on: ubuntu-latest 14 | strategy: 15 | matrix: 16 | python-version: [2.7, 3.7] 17 | 18 | steps: 19 | - uses: actions/checkout@v2 20 | - name: Set up Python ${{ matrix.python-version }} 21 | uses: actions/setup-python@v2 22 | with: 23 | python-version: ${{ matrix.python-version }} 24 | - name: Install dependencies 25 | run: | 26 | sudo apt-get install pdftk 27 | python -m pip install --upgrade pip 28 | python -m pip install flake8 pytest 29 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 30 | - name: Lint with flake8 31 | run: | 32 | # stop the build if there are Python syntax errors or undefined names 33 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 34 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 35 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 36 | - name: Test with pytest 37 | run: | 38 | mkdir test-reports 39 | pytest --junitxml=test-reports/junit.xml test.py 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | 21 | # Installer logs 22 | pip-log.txt 23 | 24 | # Unit test / coverage reports 25 | .coverage 26 | .tox 27 | nosetests.xml 28 | 29 | # Translations 30 | *.mo 31 | 32 | # Mr Developer 33 | .mr.developer.cfg 34 | .project 35 | .pydevproject 36 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | script: pytest test.py 3 | install: 4 | - sudo apt-get update 5 | - sudo apt-get install pdftk 6 | - pdftk --version -------------------------------------------------------------------------------- /MANIFEST: -------------------------------------------------------------------------------- 1 | # file GENERATED by distutils, do NOT edit 2 | README.md 3 | licence.txt 4 | pypdftk.py 5 | setup.py 6 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include pypdftk.py 3 | include licence.txt 4 | exclude *.pyc -------------------------------------------------------------------------------- /Out/some_file.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revolunet/pypdftk/cec246855b02fcbb9af5f95a39d6656ef2d110d3/Out/some_file.pdf -------------------------------------------------------------------------------- /Out/test1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revolunet/pypdftk/cec246855b02fcbb9af5f95a39d6656ef2d110d3/Out/test1.pdf -------------------------------------------------------------------------------- /Out/test1.txt: -------------------------------------------------------------------------------- 1 | test1 2 | 3 | value1 = 100000 4 | value2 = 200002 5 | value3 = 333003 6 | value4 = 444404 -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | pypdftk |pypi| |travis| |githubactions| 2 | =========================================== 3 | 4 | Python module to drive the awesome `pdftk`_ binary. 5 | 6 | Proudly brought to you by many `awesome contributors`_ 7 | 8 | Features 9 | -------- 10 | 11 | ``fill_form`` 12 | ~~~~~~~~~~~~~ 13 | 14 | Fill a PDF with given data and returns the output PDF path 15 | 16 | - ``pdf_path`` : input PDF 17 | - ``datas`` : dictionnary of fielf names / values 18 | - ``out_file`` (default=auto) : output PDF path. will use tempfile if 19 | not provided 20 | - ``flatten`` (default=True) : flatten the final PDF 21 | - ``drop_xfa`` (default=False) : omit XFA data from the output PDF 22 | 23 | ``concat`` 24 | ~~~~~~~~~~ 25 | 26 | Merge multiple PDFs into one single file and returns the output PDF path 27 | 28 | - ``files`` : list of PDF files to concatenate 29 | - ``out_file`` (default=auto) : output PDF path. will use tempfile if 30 | not provided 31 | 32 | ``get_pages`` 33 | ~~~~~~~~~~~~~ 34 | 35 | Concatenate a list of page ranges into one single file and returns the 36 | output PDF path 37 | 38 | - ``pdf_path`` : input PDF 39 | - ``ranges`` (default=\ ``[]``) : ``[]`` for clone, ``[[2]]`` for 40 | extracting 2nd page, ``[[1],[2,5],[3]]`` for concatenating pages 1, 41 | 2-5, 3 42 | - ``out_file`` (default=auto) : output PDF path. will use tempfile if 43 | not provided 44 | 45 | ``split`` 46 | ~~~~~~~~~ 47 | 48 | Split a single PDF in many pages and return a list of pages paths 49 | 50 | - ``pdf_path`` : input PDF 51 | - ``out_dir`` (default=auto) : output PDFs dir. will use tempfile if 52 | not provided 53 | 54 | **warning** if you give a out_dir parameter, ensure its empty, or the 55 | split function may destroy your files and return incorrect results. 56 | 57 | ``gen_xfdf`` 58 | ~~~~~~~~~~~~ 59 | 60 | Generate a XFDF file suited for filling PDF forms and return the 61 | generated XFDF file path 62 | 63 | - ``datas`` : dictionnary of datas 64 | 65 | ``get_num_pages`` 66 | ~~~~~~~~~~~~~~~~~ 67 | 68 | Return the number of pages for a given PDF 69 | 70 | - ``pdf_path`` : input PDF file 71 | 72 | ``replace_page`` 73 | ~~~~~~~~~~~~~~~~ 74 | 75 | Replace a page in a PDF (pdf_path) by the PDF pointed by 76 | pdf_to_insert_path. 77 | 78 | - ``pdf_path`` is the PDF that will have its page replaced. 79 | - ``page_number`` is the number of the page in pdf_path to be replaced. 80 | It is 1-based. 81 | - ``pdf_to_insert_path`` is the PDF that will be inserted at the old 82 | page. 83 | 84 | ``stamp`` 85 | ~~~~~~~~~ 86 | 87 | Applies a stamp (from ``stamp_pdf_path``) to the PDF file in 88 | ``pdf_path``. If no ``output_pdf_path`` is provided, it returns a 89 | temporary file with the result PDF. 90 | 91 | ``[compress | uncompress]`` 92 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 93 | 94 | :: 95 | 96 | These are only useful when you want to edit PDF code in a text 97 | editor like vim or emacs. Remove PDF page stream compression by 98 | applying the uncompress filter. Use the compress filter to 99 | restore compression. 100 | 101 | - ``pdf_path`` : input PDF file 102 | - ``out_file`` (default=auto) : output PDF path. will use tempfile if 103 | not provided 104 | - ``flatten`` (default=True) : flatten the final PDF 105 | 106 | ``dump_data_fields`` 107 | ~~~~~~~~~~~~~~~~~~~~ 108 | 109 | Read PDF and output form field statistics. 110 | 111 | - ``pdf_path`` : input PDF file 112 | 113 | 114 | Example 115 | ------- 116 | 117 | Fill a PDF model and add a cover page : 118 | 119 | .. code:: python 120 | 121 | import pypdftk 122 | 123 | datas = { 124 | 'firstname': 'Julien', 125 | 'company': 'revolunet', 126 | 'price': 42 127 | } 128 | generated_pdf = pypdftk.fill_form('/path/to/model.pdf', datas) 129 | out_pdf = pypdftk.concat(['/path/to/cover.pdf', generated_pdf]) 130 | 131 | pdftk path 132 | ---------- 133 | 134 | By default, path is ``/usr/bin/pdftk``, but you can override it with the 135 | ``PDFTK_PATH`` environment variable 136 | 137 | Licence 138 | ------- 139 | 140 | This module is released under the permissive `MIT license`_. Your 141 | contributions are always welcome. 142 | 143 | .. _pdftk: http://www.pdflabs.com/tools/pdftk-the-pdf-toolkit/ 144 | .. _revolunet: http://revolunet.com 145 | .. _awesome contributors: https://github.com/revolunet/pypdftk/graphs/contributors 146 | .. _MIT license: http://revolunet.mit-license.org 147 | 148 | .. |pypi| image:: https://img.shields.io/pypi/v/pypdftk 149 | :target: https://pypi.org/project/pypdftk/ 150 | .. |travis| image:: https://travis-ci.org/yguarata/pypdftk.svg?branch=master 151 | :target: https://travis-ci.org/yguarata/pypdftk 152 | .. |githubactions| image:: https://github.com/revolunet/pypdftk/actions/workflows/quality.yml/badge.svg 153 | 154 | -------------------------------------------------------------------------------- /licence.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 1998, Regents of the University of California 2 | All rights reserved. 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of the University of California, Berkeley nor the 12 | names of its contributors may be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY 16 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY 19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /pypdftk.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: UTF-8 -*- 2 | 3 | ''' pypdftk 4 | 5 | Python module to drive the awesome pdftk binary. 6 | See http://www.pdflabs.com/tools/pdftk-the-pdf-toolkit/ 7 | 8 | ''' 9 | 10 | import logging 11 | import os 12 | import shutil 13 | import subprocess 14 | import tempfile 15 | import itertools 16 | 17 | log = logging.getLogger(__name__) 18 | 19 | if os.getenv('PDFTK_PATH'): 20 | PDFTK_PATH = os.getenv('PDFTK_PATH') 21 | else: 22 | PDFTK_PATH = '/usr/bin/pdftk' 23 | if not os.path.isfile(PDFTK_PATH): 24 | PDFTK_PATH = 'pdftk' 25 | 26 | 27 | def check_output(*popenargs, **kwargs): 28 | if 'stdout' in kwargs: 29 | raise ValueError('stdout argument not allowed, it will be overridden.') 30 | process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) 31 | output, unused_err = process.communicate() 32 | retcode = process.poll() 33 | if retcode: 34 | cmd = kwargs.get("args") 35 | if cmd is None: 36 | cmd = popenargs[0] 37 | raise subprocess.CalledProcessError(retcode, cmd, output=output) 38 | return output 39 | 40 | 41 | def run_command(command, shell=False): 42 | ''' run a system command and yield output ''' 43 | p = check_output(command, shell=shell) 44 | return p.decode("utf-8").splitlines() 45 | 46 | try: 47 | run_command([PDFTK_PATH]) 48 | except OSError: 49 | logging.warning('pdftk test call failed (PDFTK_PATH=%r).', PDFTK_PATH) 50 | 51 | 52 | def get_num_pages(pdf_path): 53 | ''' return number of pages in a given PDF file ''' 54 | for line in run_command([PDFTK_PATH, pdf_path, 'dump_data']): 55 | if line.lower().startswith('numberofpages'): 56 | return int(line.split(':')[1]) 57 | return 0 58 | 59 | def get_pages(pdf_path, ranges=[], out_file=None): 60 | ''' 61 | Concatenate a list of page ranges into one single file 62 | Return temp file if no out_file provided. 63 | ''' 64 | cleanOnFail = False 65 | handle = None 66 | pageRanges = [] 67 | if not out_file: 68 | cleanOnFail = True 69 | handle, out_file = tempfile.mkstemp() 70 | 71 | for range in ranges: 72 | pageRanges.append("-".join([str(i) for i in range])) 73 | 74 | args = [PDFTK_PATH, pdf_path, 'cat'] + pageRanges + ['output', out_file] 75 | try: 76 | run_command(args) 77 | except: 78 | if cleanOnFail: 79 | os.remove(out_file) 80 | raise 81 | finally: 82 | if handle: 83 | os.close(handle) 84 | return out_file 85 | 86 | 87 | def fill_form(pdf_path, datas={}, out_file=None, flatten=True, drop_xfa=False): 88 | ''' 89 | Fills a PDF form with given dict input data. 90 | Return temp file if no out_file provided. 91 | ''' 92 | cleanOnFail = False 93 | tmp_fdf = gen_xfdf(datas) 94 | handle = None 95 | if not out_file: 96 | cleanOnFail = True 97 | handle, out_file = tempfile.mkstemp() 98 | 99 | cmd = "%s %s fill_form %s output %s" % (PDFTK_PATH, pdf_path, tmp_fdf, out_file) 100 | if flatten: 101 | cmd += ' flatten' 102 | if drop_xfa: 103 | cmd += ' drop_xfa' 104 | try: 105 | run_command(cmd, True) 106 | except: 107 | if cleanOnFail: 108 | os.remove(tmp_fdf) 109 | raise 110 | finally: 111 | if handle: 112 | os.close(handle) 113 | os.remove(tmp_fdf) 114 | return out_file 115 | 116 | def dump_data_fields(pdf_path, add_id=False): 117 | ''' 118 | Return list of dicts of all fields in a PDF. 119 | If multiple values with the same key are provided for some fields (like 120 | FieldStateOption), the data for that key will be a list. 121 | If id is True, a unique numeric ID will be added for each PDF field. 122 | ''' 123 | cmd = "%s %s dump_data_fields" % (PDFTK_PATH, pdf_path) 124 | field_data = map(lambda x: x.split(': ', 1), run_command(cmd, True)) 125 | fields = [list(group) for k, group in itertools.groupby(field_data, lambda x: len(x) == 1) if not k] 126 | field_data = [] # Container for the whole dataset 127 | for i, field in enumerate(fields): # Iterate over datasets for each PDF field. 128 | d = {} # Use a dictionary as a container for the data from one PDF field. 129 | if add_id: 130 | d = {'id': i} 131 | for i in sorted(field): # Sort the attributes of the PDF field, then loop through them. 132 | # Each item i has 2 elements: i[0] is the key (attribute name), i[1] is the data (value). 133 | if i[0] in d: # If the key is already present in the dictionary... 134 | if isinstance(d[i[0]], list): # ...and the value is already a list... 135 | d[i[0]].append(i[1]) # ...just append to it. 136 | else: # Otherwise (if the value isn't already a list)... 137 | d[i[0]] = [ d[i[0]], i[1] ] # ...create a new list with the original and new values. 138 | else: # Otherwise (the key isn't already present in the dictionary)... 139 | d[i[0]] = i[1] # ...simply add it to the dictionary. 140 | field_data.append(d) # Finally, add the dictionary for this field to the big container. 141 | return field_data 142 | 143 | def concat(files, out_file=None): 144 | ''' 145 | Merge multiples PDF files. 146 | Return temp file if no out_file provided. 147 | ''' 148 | cleanOnFail = False 149 | handle = None 150 | if not out_file: 151 | cleanOnFail = True 152 | handle, out_file = tempfile.mkstemp() 153 | if len(files) == 1: 154 | shutil.copyfile(files[0], out_file) 155 | args = [PDFTK_PATH] 156 | args += files 157 | args += ['cat', 'output', out_file] 158 | try: 159 | run_command(args) 160 | except: 161 | if cleanOnFail: 162 | os.remove(out_file) 163 | raise 164 | finally: 165 | if handle: 166 | os.close(handle) 167 | return out_file 168 | 169 | 170 | def split(pdf_path, out_dir=None): 171 | ''' 172 | Split a single PDF file into pages. 173 | Use a temp directory if no out_dir provided. 174 | ''' 175 | cleanOnFail = False 176 | if not out_dir: 177 | cleanOnFail = True 178 | out_dir = tempfile.mkdtemp() 179 | out_pattern = '%s/page_%%06d.pdf' % out_dir 180 | try: 181 | run_command((PDFTK_PATH, pdf_path, 'burst', 'output', out_pattern)) 182 | except: 183 | if cleanOnFail: 184 | shutil.rmtree(out_dir) 185 | raise 186 | out_files = os.listdir(out_dir) 187 | out_files.sort() 188 | return [os.path.join(out_dir, filename) for filename in out_files] 189 | 190 | 191 | def gen_xfdf(datas={}): 192 | ''' Generates a temp XFDF file suited for fill_form function, based on dict input data ''' 193 | fields = [] 194 | for key, value in datas.items(): 195 | fields.append(""" %s""" % (key, value)) 196 | tpl = """ 197 | 198 | 199 | %s 200 | 201 | """ % "\n".join(fields) 202 | handle, out_file = tempfile.mkstemp() 203 | f = os.fdopen(handle, 'wb') 204 | f.write((tpl.encode('UTF-8'))) 205 | f.close() 206 | return out_file 207 | 208 | def replace_page(pdf_path, page_number, pdf_to_insert_path): 209 | ''' 210 | Replace a page in a PDF (pdf_path) by the PDF pointed by pdf_to_insert_path. 211 | page_number is the number of the page in pdf_path to be replaced. It is 1-based. 212 | ''' 213 | A = 'A=' + pdf_path 214 | B = 'B=' + pdf_to_insert_path 215 | output_temp = tempfile.mktemp(suffix='.pdf') 216 | 217 | if page_number == 1: # At begin 218 | upper_bound = 'A' + str(page_number + 1) + '-end' 219 | args = ( 220 | PDFTK_PATH, A, B, 'cat', 'B', upper_bound, 'output', output_temp) 221 | elif page_number == get_num_pages(pdf_path): # At end 222 | lower_bound = 'A1-' + str(page_number - 1) 223 | args = (PDFTK_PATH, A, B, 'cat', lower_bound, 'B', 'output', output_temp) 224 | else: # At middle 225 | lower_bound = 'A1-' + str(page_number - 1) 226 | upper_bound = 'A' + str(page_number + 1) + '-end' 227 | args = ( 228 | PDFTK_PATH, A, B, 'cat', lower_bound, 'B', upper_bound, 'output', 229 | output_temp) 230 | 231 | run_command(args) 232 | shutil.copy(output_temp, pdf_path) 233 | os.remove(output_temp) 234 | 235 | def stamp(pdf_path, stamp_pdf_path, output_pdf_path=None): 236 | ''' 237 | Applies a stamp (from stamp_pdf_path) to the PDF file in pdf_path. Useful for watermark purposes. 238 | If not output_pdf_path is provided, it returns a temporary file with the result PDF. 239 | ''' 240 | output = output_pdf_path or tempfile.mktemp(suffix='.pdf') 241 | args = [PDFTK_PATH, pdf_path, 'multistamp', stamp_pdf_path, 'output', output] 242 | run_command(args) 243 | return output 244 | 245 | def pdftk_cmd_util(pdf_path, action="compress",out_file=None, flatten=True): 246 | ''' 247 | :type action: should valid action, in string format. Eg: "uncompress" 248 | :param pdf_path: input PDF file 249 | :param out_file: (default=auto) : output PDF path. will use tempfile if not provided 250 | :param flatten: (default=True) : flatten the final PDF 251 | :return: name of the output file. 252 | ''' 253 | actions = ["compress", "uncompress"] 254 | assert action in actions, "Unknown action. Failed to perform given action '%s'." % action 255 | 256 | handle = None 257 | cleanOnFail = False 258 | if not out_file: 259 | cleanOnFail = True 260 | handle, out_file = tempfile.mkstemp() 261 | 262 | cmd = "%s %s output %s %s" % (PDFTK_PATH, pdf_path, out_file, action) 263 | 264 | if flatten: 265 | cmd += ' flatten' 266 | try: 267 | run_command(cmd, True) 268 | except: 269 | if cleanOnFail: 270 | os.remove(out_file) 271 | raise 272 | finally: 273 | if handle: 274 | os.close(handle) 275 | return out_file 276 | 277 | 278 | def compress(pdf_path, out_file=None, flatten=True): 279 | ''' 280 | These are only useful when you want to edit PDF code in a text 281 | editor like vim or emacs. Remove PDF page stream compression by 282 | applying the uncompress filter. Use the compress filter to 283 | restore compression. 284 | 285 | :param pdf_path: input PDF file 286 | :param out_file: (default=auto) : output PDF path. will use tempfile if not provided 287 | :param flatten: (default=True) : flatten the final PDF 288 | :return: name of the output file. 289 | ''' 290 | 291 | return pdftk_cmd_util(pdf_path, "compress", out_file, flatten) 292 | 293 | 294 | def uncompress(pdf_path, out_file=None, flatten=True): 295 | ''' 296 | These are only useful when you want to edit PDF code in a text 297 | editor like vim or emacs. Remove PDF page stream compression by 298 | applying the uncompress filter. Use the compress filter to 299 | restore compression. 300 | 301 | :param pdf_path: input PDF file 302 | :param out_file: (default=auto) : output PDF path. will use tempfile if not provided 303 | :param flatten: (default=True) : flatten the final PDF 304 | :return: name of the output file. 305 | ''' 306 | 307 | return pdftk_cmd_util(pdf_path, "uncompress", out_file, flatten) 308 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from distutils.core import setup 3 | 4 | with open("README.rst", "r", encoding="utf-8") as fh: 5 | long_description = fh.read() 6 | 7 | setup( 8 | name='pypdftk', 9 | description='''Python wrapper for PDFTK''', 10 | long_description=long_description, 11 | version='0.5', 12 | author='Julien Bouquillon', 13 | author_email='julien@revolunet.com', 14 | url='http://github.com/revolunet/pypdftk', 15 | py_modules=['pypdftk'], 16 | scripts=['pypdftk.py'], 17 | classifiers=['Development Status :: 4 - Beta', 18 | 'Environment :: Web Environment', 19 | 'Intended Audience :: Developers', 20 | 'License :: OSI Approved :: BSD License', 21 | 'Operating System :: OS Independent', 22 | 'Programming Language :: Python', 23 | 'Topic :: Utilities'], 24 | ) 25 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: UTF-8 -*- 2 | import os 3 | import unittest 4 | import json 5 | from tempfile import mkdtemp 6 | # Needed for comparison of XFDF XML 7 | import xml.etree.ElementTree as ET 8 | 9 | import pypdftk 10 | 11 | TEST_PDF_PATH = 'test_files/python-guide.pdf' 12 | TEST_XPDF_PATH = 'test_files/form.pdf' 13 | TEST_XPDF_DATA_DUMP = 'test_files/form.json' 14 | TEST_XPDF_FILLED_PATH = 'test_files/form-filled.pdf' 15 | TEST_XPDF_FILLED_DATA_DUMP = 'test_files/form-filled.json' 16 | TEST_XFDF_PATH = 'test_files/simple.xfdf' 17 | SAMPLE_DATA = { 18 | "city": "Paris", 19 | "name": "juju" 20 | } 21 | SAMPLE_DATA2 = { 22 | "Given Name Text Box": "name test", 23 | "Language 3 Check Box": "Yes" 24 | } 25 | 26 | def read(path): 27 | fd = open(path, 'r') 28 | content = fd.read() 29 | fd.close() 30 | return content 31 | 32 | # json comparison... https://stackoverflow.com/a/25851972/174027 33 | def ordered(obj): 34 | if isinstance(obj, dict): 35 | return sorted((k, ordered(v)) for k, v in obj.items()) 36 | if isinstance(obj, list): 37 | return sorted(ordered(x) for x in obj) 38 | else: 39 | return obj 40 | 41 | # Converts a page range list into the number of pages 42 | def rangeCount(ranges): 43 | count = 0 44 | for range in ranges: 45 | if len(range)==1: 46 | count += 1 47 | elif len(range)==2: 48 | count += abs(range[0]-range[1]) + 1 49 | else: 50 | raise ValueError(str(range)+" contains more than 2 values") 51 | return count 52 | 53 | class TestPyPDFTK(unittest.TestCase): 54 | def test_get_num_pages(self): 55 | num = pypdftk.get_num_pages(TEST_PDF_PATH) 56 | self.assertEqual(num, 129) 57 | 58 | def test_fill_form(self): 59 | result = pypdftk.fill_form(TEST_XPDF_PATH, datas=SAMPLE_DATA2, flatten=False) 60 | result_data = ordered(pypdftk.dump_data_fields(result)) 61 | expected_data = ordered(json.loads(read(TEST_XPDF_FILLED_DATA_DUMP))) 62 | self.assertEqual(result_data, expected_data) 63 | 64 | def test_dump_data_fields(self): 65 | result_data = ordered(pypdftk.dump_data_fields(TEST_XPDF_PATH)) 66 | expected_data = ordered(json.loads(read(TEST_XPDF_DATA_DUMP))) 67 | self.assertEqual(result_data, expected_data) 68 | 69 | def test_concat(self): 70 | total_pages = pypdftk.get_num_pages(TEST_PDF_PATH) 71 | output_file = pypdftk.concat([TEST_PDF_PATH, TEST_PDF_PATH, TEST_PDF_PATH]) 72 | concat_total_pages = pypdftk.get_num_pages(output_file) 73 | self.assertEqual(total_pages * 3, concat_total_pages) 74 | 75 | 76 | def test_get_pages_clone(self): 77 | total_pages = pypdftk.get_num_pages(TEST_PDF_PATH) 78 | output_file = pypdftk.get_pages(TEST_PDF_PATH,[]) 79 | concat_total_pages = pypdftk.get_num_pages(output_file) 80 | self.assertEqual(total_pages, concat_total_pages) 81 | 82 | def test_get_pages_single(self): 83 | pageRanges = [[1]] 84 | output_file = pypdftk.get_pages(TEST_PDF_PATH,pageRanges) 85 | concat_total_pages = pypdftk.get_num_pages(output_file) 86 | self.assertEqual(rangeCount(pageRanges), concat_total_pages) 87 | 88 | def test_get_pages_range(self): 89 | pageRanges = [[2,5]] 90 | output_file = pypdftk.get_pages(TEST_PDF_PATH,pageRanges) 91 | concat_total_pages = pypdftk.get_num_pages(output_file) 92 | self.assertEqual(rangeCount(pageRanges), concat_total_pages) 93 | 94 | def test_get_pages_single_range(self): 95 | pageRanges = [[1],[2,5]] 96 | output_file = pypdftk.get_pages(TEST_PDF_PATH,pageRanges) 97 | concat_total_pages = pypdftk.get_num_pages(output_file) 98 | self.assertEqual(rangeCount(pageRanges), concat_total_pages) 99 | 100 | def test_split(self): 101 | total_pages = pypdftk.get_num_pages(TEST_PDF_PATH) 102 | paths = pypdftk.split(TEST_PDF_PATH) 103 | self.assertEqual(len(paths) - 1, total_pages) 104 | self.assertTrue('doc_data.txt' in paths[0]) 105 | for p in paths: 106 | self.assertTrue(os.path.exists(p)) 107 | 108 | def test_split_output_dir(self): 109 | output_dir = mkdtemp() 110 | total_pages = pypdftk.get_num_pages(TEST_PDF_PATH) 111 | paths = pypdftk.split(TEST_PDF_PATH, out_dir=output_dir) 112 | self.assertEqual(len(paths) - 1, total_pages) 113 | for p in paths: 114 | out_path = os.path.join(output_dir, os.path.basename(p)) 115 | self.assertTrue(out_path) 116 | 117 | def test_gen_xfdf(self): 118 | xfdf_path = pypdftk.gen_xfdf(SAMPLE_DATA) 119 | xfdf = read(xfdf_path) 120 | expected = read(TEST_XFDF_PATH) 121 | # XML can have sibling elements in different order. So: 122 | # * Parse the XML, get list of the root's children, convert to string, sort 123 | xfdf_standard_order = [ET.tostring(i) for i in list(ET.fromstring(xfdf).iter())] 124 | expected_standard_order = [ET.tostring(i) for i in list(ET.fromstring(expected).iter())] 125 | xfdf_standard_order.sort() 126 | expected_standard_order.sort() 127 | self.assertEqual(xfdf_standard_order, expected_standard_order) 128 | 129 | def test_replace_page_at_begin(self): 130 | total_pages = pypdftk.get_num_pages(TEST_PDF_PATH) 131 | pdf_to_insert = 'test_files/page_01.pdf' 132 | pypdftk.replace_page(TEST_PDF_PATH, 1, pdf_to_insert) 133 | self.assertEqual(total_pages, pypdftk.get_num_pages(TEST_PDF_PATH)) 134 | 135 | def test_replace_page_at_middle(self): 136 | total_pages = pypdftk.get_num_pages(TEST_PDF_PATH) 137 | pdf_to_insert = 'test_files/page_01.pdf' 138 | pypdftk.replace_page(TEST_PDF_PATH, 3, pdf_to_insert) 139 | self.assertEqual(total_pages, pypdftk.get_num_pages(TEST_PDF_PATH)) 140 | 141 | def test_replace_page_at_end(self): 142 | total_pages = pypdftk.get_num_pages(TEST_PDF_PATH) 143 | last_page = pypdftk.get_num_pages(TEST_PDF_PATH) 144 | pdf_to_insert = 'test_files/page_01.pdf' 145 | pypdftk.replace_page(TEST_PDF_PATH, last_page, pdf_to_insert) 146 | self.assertEqual(total_pages, pypdftk.get_num_pages(TEST_PDF_PATH)) 147 | 148 | @unittest.skip('Not implemented yet') 149 | def test_stamp(self): 150 | pass 151 | 152 | 153 | if __name__ == '__main__': 154 | unittest.main() -------------------------------------------------------------------------------- /test_files/form-filled.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "FieldFlags": "0", 4 | "FieldNameAlt": "First name", 5 | "FieldName": "Given Name Text Box", 6 | "FieldType": "Text", 7 | "FieldJustification": "Left", 8 | "FieldMaxLength": "40", 9 | "FieldValue": "name test" 10 | }, 11 | { 12 | "FieldFlags": "0", 13 | "FieldNameAlt": "Last name", 14 | "FieldName": "Family Name Text Box", 15 | "FieldType": "Text", 16 | "FieldJustification": "Left", 17 | "FieldMaxLength": "40", 18 | "FieldValue": "" 19 | }, 20 | { 21 | "FieldFlags": "0", 22 | "FieldNameAlt": "House and floor", 23 | "FieldName": "House nr Text Box", 24 | "FieldType": "Text", 25 | "FieldJustification": "Left", 26 | "FieldMaxLength": "20", 27 | "FieldValue": "" 28 | }, 29 | { 30 | "FieldFlags": "0", 31 | "FieldName": "Address 2 Text Box", 32 | "FieldType": "Text", 33 | "FieldJustification": "Left", 34 | "FieldMaxLength": "40", 35 | "FieldValue": "" 36 | }, 37 | { 38 | "FieldFlags": "0", 39 | "FieldName": "Postcode Text Box", 40 | "FieldType": "Text", 41 | "FieldJustification": "Left", 42 | "FieldMaxLength": "20", 43 | "FieldValue": "" 44 | }, 45 | { 46 | "FieldStateOption": ["Austria", "Belgium", "Britain", "Bulgaria", "Croatia", 47 | "Cyprus", "Czech-Republic", "Denmark", "Estonia", "Finland", "France", 48 | "Germany", "Greece", "Hungary", "Ireland", "Italy", "Latvia", 49 | "Lithuania", "Luxembourg", "Malta", "Netherlands", "Poland", "Portugal", 50 | "Romania", "Slovakia", "Slovenia", "Spain", "Sweden"], 51 | "FieldFlags": "393216", 52 | "FieldNameAlt": "Use selection or write country name", 53 | "FieldName": "Country Combo Box", 54 | "FieldType": "Choice", 55 | "FieldJustification": "Left", 56 | "FieldValue": "" 57 | }, 58 | { 59 | "FieldFlags": "0", 60 | "FieldNameAlt": "Value from 40 to 250 cm", 61 | "FieldName": "Height Formatted Field", 62 | "FieldType": "Text", 63 | "FieldJustification": "Left", 64 | "FieldMaxLength": "20", 65 | "FieldValueDefault": "150", 66 | "FieldValue": "150" 67 | }, 68 | { 69 | "FieldFlags": "0", 70 | "FieldName": "City Text Box", 71 | "FieldType": "Text", 72 | "FieldJustification": "Left", 73 | "FieldMaxLength": "40", 74 | "FieldValue": "" 75 | }, 76 | { 77 | "FieldStateOption": ["Off", "Yes"], 78 | "FieldFlags": "0", 79 | "FieldNameAlt": "Car driving license", 80 | "FieldName": "Driving License Check Box", 81 | "FieldType": "Button", 82 | "FieldJustification": "Left", 83 | "FieldValueDefault": "Off", 84 | "FieldValue": "Off" 85 | }, 86 | { 87 | "FieldStateOption": ["Black", "Blue", "Brown", "Green", "Grey", "Orange", "Red", "Violet", "White", "Yellow"], 88 | "FieldFlags": "131072", 89 | "FieldNameAlt": "Select from colour spectrum", 90 | "FieldName": "Favourite Colour List Box", 91 | "FieldType": "Choice", 92 | "FieldJustification": "Left", 93 | "FieldValueDefault": "Red", 94 | "FieldValue": "Red" 95 | }, 96 | { 97 | "FieldStateOption": ["Off", "Yes"], 98 | "FieldFlags": "0", 99 | "FieldName": "Language 1 Check Box", 100 | "FieldType": "Button", 101 | "FieldJustification": "Left", 102 | "FieldValueDefault": "Off", 103 | "FieldValue": "Off" 104 | }, 105 | { 106 | "FieldStateOption": ["Off", "Yes"], 107 | "FieldFlags": "0", 108 | "FieldName": "Language 2 Check Box", 109 | "FieldType": "Button", 110 | "FieldJustification": "Left", 111 | "FieldValueDefault": "Yes", 112 | "FieldValue": "Yes" 113 | }, 114 | { 115 | "FieldStateOption": ["Off", "Yes"], 116 | "FieldFlags": "0", 117 | "FieldName": "Language 3 Check Box", 118 | "FieldType": "Button", 119 | "FieldJustification": "Left", 120 | "FieldValueDefault": "Off", 121 | "FieldValue": "Yes" 122 | }, 123 | { 124 | "FieldStateOption": ["Off", "Yes"], 125 | "FieldFlags": "0", 126 | "FieldName": "Language 4 Check Box", 127 | "FieldType": "Button", 128 | "FieldJustification": "Left", 129 | "FieldValueDefault": "Off", 130 | "FieldValue": "Off" 131 | }, 132 | { 133 | "FieldStateOption": ["Off", "Yes"], 134 | "FieldFlags": "0", 135 | "FieldName": "Language 5 Check Box", 136 | "FieldType": "Button", 137 | "FieldJustification": "Left", 138 | "FieldValueDefault": "Off", 139 | "FieldValue": "Off" 140 | }, 141 | { 142 | "FieldStateOption": ["Man", "Woman"], 143 | "FieldFlags": "131072", 144 | "FieldNameAlt": "Select from list", 145 | "FieldName": "Gender List Box", 146 | "FieldType": "Choice", 147 | "FieldJustification": "Left", 148 | "FieldValueDefault": "Man", 149 | "FieldValue": "Man" 150 | }, 151 | { 152 | "FieldFlags": "0", 153 | "FieldName": "Address 1 Text Box", 154 | "FieldType": "Text", 155 | "FieldJustification": "Left", 156 | "FieldMaxLength": "40", 157 | "FieldValue": "" 158 | } 159 | ] 160 | -------------------------------------------------------------------------------- /test_files/form.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "FieldFlags": "0", 4 | "FieldNameAlt": "First name", 5 | "FieldName": "Given Name Text Box", 6 | "FieldType": "Text", 7 | "FieldJustification": "Left", 8 | "FieldMaxLength": "40", 9 | "FieldValue": "" 10 | }, 11 | { 12 | "FieldFlags": "0", 13 | "FieldNameAlt": "Last name", 14 | "FieldName": "Family Name Text Box", 15 | "FieldType": "Text", 16 | "FieldJustification": "Left", 17 | "FieldMaxLength": "40", 18 | "FieldValue": "" 19 | }, 20 | { 21 | "FieldFlags": "0", 22 | "FieldNameAlt": "House and floor", 23 | "FieldName": "House nr Text Box", 24 | "FieldType": "Text", 25 | "FieldJustification": "Left", 26 | "FieldMaxLength": "20", 27 | "FieldValue": "" 28 | }, 29 | { 30 | "FieldFlags": "0", 31 | "FieldName": "Address 2 Text Box", 32 | "FieldType": "Text", 33 | "FieldJustification": "Left", 34 | "FieldMaxLength": "40", 35 | "FieldValue": "" 36 | }, 37 | { 38 | "FieldFlags": "0", 39 | "FieldName": "Postcode Text Box", 40 | "FieldType": "Text", 41 | "FieldJustification": "Left", 42 | "FieldMaxLength": "20", 43 | "FieldValue": "" 44 | }, 45 | { 46 | "FieldStateOption": ["Austria", "Belgium", "Britain", "Bulgaria", "Croatia", 47 | "Cyprus", "Czech-Republic", "Denmark", "Estonia", "Finland", "France", 48 | "Germany", "Greece", "Hungary", "Ireland", "Italy", "Latvia", 49 | "Lithuania", "Luxembourg", "Malta", "Netherlands", "Poland", "Portugal", 50 | "Romania", "Slovakia", "Slovenia", "Spain", "Sweden"], 51 | "FieldFlags": "393216", 52 | "FieldNameAlt": "Use selection or write country name", 53 | "FieldName": "Country Combo Box", 54 | "FieldType": "Choice", 55 | "FieldJustification": "Left", 56 | "FieldValue": "" 57 | }, 58 | { 59 | "FieldFlags": "0", 60 | "FieldNameAlt": "Value from 40 to 250 cm", 61 | "FieldName": "Height Formatted Field", 62 | "FieldType": "Text", 63 | "FieldJustification": "Left", 64 | "FieldMaxLength": "20", 65 | "FieldValueDefault": "150", 66 | "FieldValue": "150" 67 | }, 68 | { 69 | "FieldFlags": "0", 70 | "FieldName": "City Text Box", 71 | "FieldType": "Text", 72 | "FieldJustification": "Left", 73 | "FieldMaxLength": "40", 74 | "FieldValue": "" 75 | }, 76 | { 77 | "FieldStateOption": ["Off", "Yes"], 78 | "FieldFlags": "0", 79 | "FieldNameAlt": "Car driving license", 80 | "FieldName": "Driving License Check Box", 81 | "FieldType": "Button", 82 | "FieldJustification": "Left", 83 | "FieldValueDefault": "Off", 84 | "FieldValue": "Off" 85 | }, 86 | { 87 | "FieldStateOption": ["Black", "Blue", "Brown", "Green", "Grey", "Orange", "Red", "Violet", "White", "Yellow"], 88 | "FieldFlags": "131072", 89 | "FieldNameAlt": "Select from colour spectrum", 90 | "FieldName": "Favourite Colour List Box", 91 | "FieldType": "Choice", 92 | "FieldJustification": "Left", 93 | "FieldValueDefault": "Red", 94 | "FieldValue": "Red" 95 | }, 96 | { 97 | "FieldStateOption": ["Off", "Yes"], 98 | "FieldFlags": "0", 99 | "FieldName": "Language 1 Check Box", 100 | "FieldType": "Button", 101 | "FieldJustification": "Left", 102 | "FieldValueDefault": "Off", 103 | "FieldValue": "Off" 104 | }, 105 | { 106 | "FieldStateOption": ["Off", "Yes"], 107 | "FieldFlags": "0", 108 | "FieldName": "Language 2 Check Box", 109 | "FieldType": "Button", 110 | "FieldJustification": "Left", 111 | "FieldValueDefault": "Yes", 112 | "FieldValue": "Yes" 113 | }, 114 | { 115 | "FieldStateOption": ["Off", "Yes"], 116 | "FieldFlags": "0", 117 | "FieldName": "Language 3 Check Box", 118 | "FieldType": "Button", 119 | "FieldJustification": "Left", 120 | "FieldValueDefault": "Off", 121 | "FieldValue": "Off" 122 | }, 123 | { 124 | "FieldStateOption": ["Off", "Yes"], 125 | "FieldFlags": "0", 126 | "FieldName": "Language 4 Check Box", 127 | "FieldType": "Button", 128 | "FieldJustification": "Left", 129 | "FieldValueDefault": "Off", 130 | "FieldValue": "Off" 131 | }, 132 | { 133 | "FieldStateOption": ["Off", "Yes"], 134 | "FieldFlags": "0", 135 | "FieldName": "Language 5 Check Box", 136 | "FieldType": "Button", 137 | "FieldJustification": "Left", 138 | "FieldValueDefault": "Off", 139 | "FieldValue": "Off" 140 | }, 141 | { 142 | "FieldStateOption": ["Man", "Woman"], 143 | "FieldFlags": "131072", 144 | "FieldNameAlt": "Select from list", 145 | "FieldName": "Gender List Box", 146 | "FieldType": "Choice", 147 | "FieldJustification": "Left", 148 | "FieldValueDefault": "Man", 149 | "FieldValue": "Man" 150 | }, 151 | { 152 | "FieldFlags": "0", 153 | "FieldName": "Address 1 Text Box", 154 | "FieldType": "Text", 155 | "FieldJustification": "Left", 156 | "FieldMaxLength": "40", 157 | "FieldValue": "" 158 | } 159 | ] 160 | -------------------------------------------------------------------------------- /test_files/form.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revolunet/pypdftk/cec246855b02fcbb9af5f95a39d6656ef2d110d3/test_files/form.pdf -------------------------------------------------------------------------------- /test_files/page_01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revolunet/pypdftk/cec246855b02fcbb9af5f95a39d6656ef2d110d3/test_files/page_01.pdf -------------------------------------------------------------------------------- /test_files/python-guide.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revolunet/pypdftk/cec246855b02fcbb9af5f95a39d6656ef2d110d3/test_files/python-guide.pdf -------------------------------------------------------------------------------- /test_files/simple.xfdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Paris 5 | juju 6 | 7 | --------------------------------------------------------------------------------