├── .gitignore
├── CHANGES.rst
├── MANIFEST.in
├── README.md
├── setup.cfg
├── setup.py
├── src
    ├── img2pdf.py
    ├── jp2.py
    └── tests
    │   ├── __init__.py
    │   ├── input
    │       ├── CMYK.jpg
    │       ├── CMYK.tif
    │       ├── animation.gif
    │       ├── mono.png
    │       ├── normal.jpg
    │       └── normal.png
    │   └── output
    │       ├── CMYK.jpg.pdf
    │       ├── CMYK.tif.pdf
    │       ├── animation.gif.pdf
    │       ├── mono.png.pdf
    │       ├── normal.jpg.pdf
    │       └── normal.png.pdf
├── test_comp.sh
└── tox.ini


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | build
3 | src/*.egg-info
4 | 
5 | .eggs
6 | .tox
7 | 


--------------------------------------------------------------------------------
/CHANGES.rst:
--------------------------------------------------------------------------------
  1 | =======
  2 | CHANGES
  3 | =======
  4 | 
  5 | 0.2.4
  6 | -----
  7 | 
  8 |  - Restore support for Python 2.7
  9 |  - Add support for PyPy
 10 |  - Add support for testing using tox
 11 | 
 12 | 0.2.3
 13 | -----
 14 | 
 15 |  - version number bump for botched pypi upload...
 16 | 
 17 | 0.2.2
 18 | -----
 19 | 
 20 |  - automatic monochrome CCITT Group4 encoding via Pillow/libtiff
 21 | 
 22 | 0.2.1
 23 | -----
 24 | 
 25 |  - set img2pdf as /producer value
 26 |  - support multi-frame images like multipage TIFF and animated GIF
 27 |  - support for palette images like GIF
 28 |  - support all colorspaces and imageformats knows by PIL
 29 |  - read horizontal and vertical dpi from JPEG2000 files
 30 | 
 31 | 0.2.0
 32 | -----
 33 | 
 34 |  - now Python3 only
 35 |  - pep8 compliant code
 36 |  - update my email to josch@mister-muffin.de
 37 |  - move from github to gitlab.mister-muffin.de/josch/img2pdf
 38 |  - use logging module
 39 |  - add extensive test suite
 40 |  - ability to read from standard input
 41 |  - pdf writer:
 42 |       - make more compatible with the interface of pdfrw module
 43 |       - print floats which equal to their integer conversion as integer
 44 |       - do not print trailing zeroes for floating point numbers
 45 |       - print more linebreaks
 46 |       - add binary string at beginning of PDF to indicate that the PDF
 47 |         contains binary data
 48 |       - handle datetime and unicode strings by using utf-16-be encoding
 49 |  - new options (see --help for more details):
 50 |       - --without-pdfrw
 51 |       - --imgsize
 52 |       - --border
 53 |       - --fit
 54 |       - --auto-orient
 55 |       - --viewer-panes
 56 |       - --viewer-initial-page
 57 |       - --viewer-magnification
 58 |       - --viewer-page-layout
 59 |       - --viewer-fit-window
 60 |       - --viewer-center-window
 61 |       - --viewer-fullscreen
 62 |  - remove short options for metadata command line arguments
 63 |  - correctly encode and escape non-ascii metadata
 64 |  - explicitly store date in UTC and allow parsing all date formats understood
 65 |    by dateutil and `date --date`
 66 | 
 67 | 0.1.5
 68 | -----
 69 | 
 70 | - Enable support for CMYK images
 71 | - Rework test suite
 72 | - support file objects as input
 73 | 
 74 | 0.1.4
 75 | -----
 76 | 
 77 | - add Python 3 support
 78 | - make output reproducible by sorting and --nodate option
 79 | 
 80 | 0.1.3
 81 | -----
 82 | 
 83 | - Avoid leaking file descriptors
 84 | - Convert unrecognized colorspaces to RGB
 85 | 
 86 | 0.1.1
 87 | -----
 88 | 
 89 | - allow running src/img2pdf.py standalone
 90 | - license change from GPL to LGPL
 91 | - Add pillow 2.4.0 support
 92 | - add options to specify pdf dimensions in points
 93 | 
 94 | 0.1.0 (unreleased)
 95 | ------------------
 96 | 
 97 | - Initial PyPI release.
 98 | - Modified code to create proper package.
 99 | - Added tests.
100 | - Added console script entry point.
101 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include README.md
 2 | include test_comp.sh
 3 | include CHANGES.rst
 4 | recursive-include src *.jpg
 5 | recursive-include src *.pdf
 6 | recursive-include src *.png
 7 | recursive-include src *.tif
 8 | recursive-include src *.gif
 9 | recursive-include src *.py
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | img2pdf
  2 | =======
  3 | 
  4 | Losslessly convert raster images to PDF. The file size will not unnecessarily
  5 | increase. It can for example be used to create a PDF document from a number of
  6 | scans that are only available in JPEG format. Existing solutions would either
  7 | re-encode the input JPEG files (leading to quality loss) or store them in the
  8 | zip/flate format which results into the PDF becoming unnecessarily large in
  9 | terms of its file size.
 10 | 
 11 | Background
 12 | ----------
 13 | 
 14 | Quality loss can be avoided when converting JPEG and JPEG2000 images to PDF by
 15 | embedding them into the PDF without re-encoding them. This is what img2pdf
 16 | does. It thus treats the PDF format merely as a container format for storing
 17 | one or more JPEGs without re-encoding the JPEG images themselves.
 18 | 
 19 | If you know an existing tool which allows one to embed JPEG and JPEG2000 images
 20 | into a PDF container without recompression, please contact me so that I can put
 21 | this code into the garbage bin.
 22 | 
 23 | Functionality
 24 | -------------
 25 | 
 26 | This program will take a list of raster images and produce a PDF file with the
 27 | images embedded in it.  JPEG and JPEG2000 images will be included without
 28 | recompression and the resulting PDF will only be slightly larger than the input
 29 | images due to the overhead of the PDF container.  Raster images in other
 30 | formats (like png, gif or tif) will be included using the lossless zip/flate
 31 | encoding which usually leads to a significant increase in the PDF size if the
 32 | input was for example a png image. This is unfortunately unavoidable because
 33 | there is no other way to store arbitrary RGB bitmaps in PDF in a lossless way
 34 | other than zip/flate encoding. And zip/flate compresses bitmaps worse than png
 35 | is able to compress them.
 36 | 
 37 | As a result, this tool is able to losslessly wrap raster images into a PDF
 38 | container with a quality to filesize ratio that is typically better (in case of
 39 | JPEG and JPEG2000 images) or equal (in case of other formats) than that of
 40 | existing tools.
 41 | 
 42 | For example, imagemagick will re-encode the input JPEG image (thus changing
 43 | its content):
 44 | 
 45 | 	$ convert img.jpg img.pdf
 46 | 	$ pdfimages img.pdf img.extr # not using -j to be extra sure there is no recompression
 47 | 	$ compare -metric AE img.jpg img.extr-000.ppm null:
 48 | 	1.6301e+06
 49 | 
 50 | If one wants to losslessly convert from any format to PDF with
 51 | imagemagick, one has to use zip compression:
 52 | 
 53 | 	$ convert input.jpg -compress Zip output.pdf
 54 | 	$ pdfimages img.pdf img.extr # not using -j to be extra sure there is no recompression
 55 | 	$ compare -metric AE img.jpg img.extr-000.ppm null:
 56 | 	0
 57 | 
 58 | However, this approach will result in PDF files that are a few times larger
 59 | than the input JPEG or JPEG2000 file.
 60 | 
 61 | img2pdf is able to losslessly embed JPEG and JPEG2000 files into a PDF
 62 | container without additional overhead (aside from the PDF structure itself),
 63 | save other graphics formats using lossless zip compression, and produce
 64 | multi-page PDF files when more than one input image is given.
 65 | 
 66 | Also, since JPEG and JPEG2000 images are not reencoded, conversion with img2pdf
 67 | is several times faster than with other tools.
 68 | 
 69 | Usage
 70 | -----
 71 | 
 72 | The images must be provided as files because img2pdf needs to seek in the file
 73 | descriptor.
 74 | 
 75 | If no output file is specified with the `-o`/`--output` option, output will be
 76 | done to stdout. A typical invocation is:
 77 | 
 78 | 	img2pdf img1.png img2.jpg -o out.pdf
 79 | 
 80 | The detailed documentation can be accessed by running:
 81 | 
 82 | 	img2pdf --help
 83 | 
 84 | 
 85 | Bugs
 86 | ----
 87 | 
 88 | If you find a JPEG or JPEG2000 file that, when embedded cannot be read
 89 | by the Adobe Acrobat Reader, please contact me.
 90 | 
 91 | For lossless conversion of formats other than JPEG or JPEG2000, zip/flate
 92 | encoding is used.  This choice is based on tests I did with a number of images.
 93 | I converted them into PDF using the lossless variants of the compression
 94 | formats offered by imagemagick.  In all my tests, zip/flate encoding performed
 95 | best.  You can verify my findings using the test_comp.sh script with any input
 96 | image given as a commandline argument.  If you find an input file that is
 97 | outperformed by another lossless compression method, contact me.
 98 | 
 99 | I have not yet figured out how to determine the colorspace of JPEG2000 files.
100 | Therefore JPEG2000 files use DeviceRGB by default. For JPEG2000 files with
101 | other colorspaces, you must explicitly specify it using the `--colorspace`
102 | option.
103 | 
104 | It might be possible to store transparency using masks but it is not clear
105 | what the utility of such a functionality would be.
106 | 
107 | Most vector graphic formats can be losslessly turned into PDF (minus some of
108 | the features unsupported by PDF) but img2pdf will currently turn vector
109 | graphics into their lossy raster representations. For converting raster
110 | graphics to PDF, use another tool like inkscape and then join the resulting
111 | pages with a tool like pdftk.
112 | 
113 | A configuration file could be used for default options.
114 | 
115 | Installation
116 | ------------
117 | 
118 | On a Debian- and Ubuntu-based systems, dependencies may be installed
119 | with the following command:
120 | 
121 | 	apt-get install python3 python3-pil python3-setuptools
122 | 
123 | You can then install the package using:
124 | 
125 | 	$ pip3 install img2pdf
126 | 
127 | If you prefer to install from source code use:
128 | 
129 | 	$ cd img2pdf/
130 | 	$ pip3 install .
131 | 
132 | To test the console script without installing the package on your system,
133 | use virtualenv:
134 | 
135 | 	$ cd img2pdf/
136 | 	$ virtualenv ve
137 | 	$ ve/bin/pip3 install .
138 | 
139 | You can then test the converter using:
140 | 
141 | 	$ ve/bin/img2pdf -o test.pdf src/tests/test.jpg
142 | 
143 | The package can also be used as a library:
144 | 
145 | 	import img2pdf
146 | 
147 | 	# opening from filename
148 | 	with open("name.pdf","wb") as f:
149 | 		f.write(img2pdf.convert('test.jpg'))
150 | 
151 | 	# opening from file handle
152 | 	with open("name.pdf","wb") as f1, open("test.jpg") as f2:
153 | 		f1.write(img2pdf.convert(f2))
154 | 
155 | 	# using in-memory image data
156 | 	with open("name.pdf","wb") as f:
157 | 		f.write(img2pdf.convert("\x89PNG...")
158 | 
159 | 	# multiple inputs (variant 1)
160 | 	with open("name.pdf","wb") as f:
161 | 		f.write(img2pdf.convert("test1.jpg", "test2.png"))
162 | 
163 | 	# multiple inputs (variant 2)
164 | 	with open("name.pdf","wb") as f:
165 | 		f.write(img2pdf.convert(["test1.jpg", "test2.png"]))
166 | 
167 | 	# writing to file descriptor
168 | 	with open("name.pdf","wb") as f1, open("test.jpg") as f2:
169 | 		img2pdf.convert(f2, outputstream=f1)
170 | 
171 | 	# specify paper size (A4)
172 | 	a4inpt = (img2pdf.mm_to_pt(210),img2pdf.mm_to_pt(297))
173 | 	layout_fun = img2pdf.get_layout_fun(a4inpt)
174 | 	with open("name.pdf","wb") as f:
175 | 		f.write(img2pdf.convert('test.jpg', layout_fun=layout_fun))
176 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from setuptools import setup
 3 | 
 4 | PY3 = sys.version_info[0] >= 3
 5 | 
 6 | VERSION = "0.2.4"
 7 | 
 8 | INSTALL_REQUIRES = (
 9 |     'Pillow<7.1.0',
10 | )
11 | 
12 | TESTS_REQUIRE = (
13 |     'pdfrw',
14 | )
15 | 
16 | if not PY3:
17 |     INSTALL_REQUIRES += ('enum34',)
18 | 
19 | 
20 | setup(
21 |     name='img2pdf',
22 |     version=VERSION,
23 |     author="Johannes 'josch' Schauer",
24 |     author_email='josch@mister-muffin.de',
25 |     description="Convert images to PDF via direct JPEG inclusion.",
26 |     long_description=open('README.md').read(),
27 |     license="LGPL",
28 |     keywords="jpeg pdf converter",
29 |     classifiers=[
30 |         'Development Status :: 5 - Production/Stable',
31 |         'Intended Audience :: Developers',
32 |         'Intended Audience :: Other Audience',
33 |         'Environment :: Console',
34 |         'Programming Language :: Python',
35 |         'Programming Language :: Python :: 2',
36 |         'Programming Language :: Python :: 2.7',
37 |         'Programming Language :: Python :: 3',
38 |         'Programming Language :: Python :: 3.5',
39 |         'Programming Language :: Python :: Implementation :: CPython',
40 |         "Programming Language :: Python :: Implementation :: PyPy",
41 |         'License :: OSI Approved :: GNU Lesser General Public License v3 '
42 |         '(LGPLv3)',
43 |         'Natural Language :: English',
44 |         'Operating System :: OS Independent'],
45 |     url='https://gitlab.mister-muffin.de/josch/img2pdf',
46 |     download_url='https://gitlab.mister-muffin.de/josch/img2pdf/repository/'
47 |         'archive.tar.gz?ref=' + VERSION,
48 |     package_dir={"": "src"},
49 |     py_modules=['img2pdf', 'jp2'],
50 |     include_package_data=True,
51 |     test_suite='tests.test_suite',
52 |     zip_safe=True,
53 |     install_requires=INSTALL_REQUIRES,
54 |     tests_requires=TESTS_REQUIRE,
55 |     extras_require={
56 |         'test': TESTS_REQUIRE,
57 |     },
58 |     entry_points='''
59 |     [console_scripts]
60 |     img2pdf = img2pdf:main
61 |     ''',
62 |     )
63 | 


--------------------------------------------------------------------------------
/src/img2pdf.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env python3
   2 | # -*- coding: utf-8 -*-
   3 | 
   4 | # Copyright (C) 2012-2014 Johannes 'josch' Schauer <j.schauer at email.de>
   5 | #
   6 | # This program is free software: you can redistribute it and/or
   7 | # modify it under the terms of the GNU Lesser General Public
   8 | # License as published by the Free Software Foundation, either
   9 | # version 3 of the License, or (at your option) any later
  10 | # version.
  11 | #
  12 | # This program is distributed in the hope that it will be useful,
  13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 | # GNU General Public License for more details.
  16 | #
  17 | # You should have received a copy of the GNU General Public
  18 | # License along with this program.  If not, see
  19 | # <http://www.gnu.org/licenses/>.
  20 | 
  21 | import sys
  22 | import os
  23 | import zlib
  24 | import argparse
  25 | from PIL import Image
  26 | from datetime import datetime
  27 | from jp2 import parsejp2
  28 | from enum import Enum
  29 | from io import BytesIO
  30 | import logging
  31 | 
  32 | PY3 = sys.version_info[0] >= 3
  33 | 
  34 | __version__ = "0.2.4"
  35 | default_dpi = 96.0
  36 | papersizes = {
  37 |     "letter": "8.5inx11in",
  38 |     "a0":     "841mmx1189mm",
  39 |     "a1":     "594mmx841mm",
  40 |     "a2":     "420mmx594mm",
  41 |     "a3":     "297mmx420mm",
  42 |     "a4":     "210mmx297mm",
  43 |     "a5":     "148mmx210mm",
  44 |     "a6":     "105mmx148mm",
  45 | }
  46 | papernames = {
  47 |     "letter": "Letter",
  48 |     "a0":     "A0",
  49 |     "a1":     "A1",
  50 |     "a2":     "A2",
  51 |     "a3":     "A3",
  52 |     "a4":     "A4",
  53 |     "a5":     "A5",
  54 |     "a6":     "A6",
  55 | }
  56 | 
  57 | 
  58 | FitMode = Enum('FitMode', 'into fill exact shrink enlarge')
  59 | 
  60 | PageOrientation = Enum('PageOrientation', 'portrait landscape')
  61 | 
  62 | Colorspace = Enum('Colorspace', 'RGB L 1 CMYK CMYK;I RGBA P other')
  63 | 
  64 | ImageFormat = Enum('ImageFormat', 'JPEG JPEG2000 CCITTGroup4 other')
  65 | 
  66 | PageMode = Enum('PageMode', 'none outlines thumbs')
  67 | 
  68 | PageLayout = Enum('PageLayout',
  69 |                   'single onecolumn twocolumnright twocolumnleft')
  70 | 
  71 | Magnification = Enum('Magnification', 'fit fith fitbh')
  72 | 
  73 | ImgSize = Enum('ImgSize', 'abs perc dpi')
  74 | 
  75 | Unit = Enum('Unit', 'pt cm mm inch')
  76 | 
  77 | ImgUnit = Enum('ImgUnit', 'pt cm mm inch perc dpi')
  78 | 
  79 | 
  80 | class NegativeDimensionError(Exception):
  81 |     pass
  82 | 
  83 | 
  84 | class UnsupportedColorspaceError(Exception):
  85 |     pass
  86 | 
  87 | 
  88 | class ImageOpenError(Exception):
  89 |     pass
  90 | 
  91 | 
  92 | class JpegColorspaceError(Exception):
  93 |     pass
  94 | 
  95 | 
  96 | class PdfTooLargeError(Exception):
  97 |     pass
  98 | 
  99 | 
 100 | # without pdfrw this function is a no-op
 101 | def my_convert_load(string):
 102 |     return string
 103 | 
 104 | 
 105 | def parse(cont, indent=1):
 106 |     if type(cont) is dict:
 107 |         return b"<<\n"+b"\n".join(
 108 |             [4 * indent * b" " + k + b" " + parse(v, indent+1)
 109 |              for k, v in sorted(cont.items())])+b"\n"+4*(indent-1)*b" "+b">>"
 110 |     elif type(cont) is int:
 111 |         return str(cont).encode()
 112 |     elif type(cont) is float:
 113 |         if int(cont) == cont:
 114 |             return parse(int(cont))
 115 |         else:
 116 |             return ("%0.4f" % cont).rstrip("0").encode()
 117 |     elif isinstance(cont, MyPdfDict):
 118 |         # if cont got an identifier, then addobj() has been called with it
 119 |         # and a link to it will be added, otherwise add it inline
 120 |         if hasattr(cont, "identifier"):
 121 |             return ("%d 0 R" % cont.identifier).encode()
 122 |         else:
 123 |             return parse(cont.content, indent)
 124 |     elif type(cont) is str or isinstance(cont, bytes):
 125 |         if type(cont) is str and type(cont) is not bytes:
 126 |             raise TypeError(
 127 |                 "parse must be passed a bytes object in py3. Got: %s" % cont)
 128 |         return cont
 129 |     elif isinstance(cont, list):
 130 |         return b"[ "+b" ".join([parse(c, indent) for c in cont])+b" ]"
 131 |     else:
 132 |         raise TypeError("cannot handle type %s with content %s" % (type(cont),
 133 |                                                                    cont))
 134 | 
 135 | 
 136 | class MyPdfDict(object):
 137 |     def __init__(self, *args, **kw):
 138 |         self.content = dict()
 139 |         if args:
 140 |             if len(args) == 1:
 141 |                 args = args[0]
 142 |             self.content.update(args)
 143 |         self.stream = None
 144 |         for key, value in kw.items():
 145 |             if key == "stream":
 146 |                 self.stream = value
 147 |                 self.content[MyPdfName.Length] = len(value)
 148 |             elif key == "indirect":
 149 |                 pass
 150 |             else:
 151 |                 self.content[getattr(MyPdfName, key)] = value
 152 | 
 153 |     def tostring(self):
 154 |         if self.stream is not None:
 155 |             return (
 156 |                 ("%d 0 obj\n" % self.identifier).encode() +
 157 |                 parse(self.content) +
 158 |                 b"\nstream\n" + self.stream + b"\nendstream\nendobj\n")
 159 |         else:
 160 |             return ("%d 0 obj\n" % self.identifier).encode() + \
 161 |                    parse(self.content) + b"\nendobj\n"
 162 | 
 163 |     def __setitem__(self, key, value):
 164 |         self.content[key] = value
 165 | 
 166 |     def __getitem__(self, key):
 167 |         return self.content[key]
 168 | 
 169 | 
 170 | class MyPdfName():
 171 |     def __getattr__(self, name):
 172 |         return b'/' + name.encode('ascii')
 173 | 
 174 | 
 175 | MyPdfName = MyPdfName()
 176 | 
 177 | 
 178 | class MyPdfObject(bytes):
 179 |     def __new__(cls, string):
 180 |         return bytes.__new__(cls, string.encode('ascii'))
 181 | 
 182 | 
 183 | class MyPdfArray(list):
 184 |     pass
 185 | 
 186 | 
 187 | class MyPdfWriter():
 188 |     def __init__(self, version="1.3"):
 189 |         self.objects = []
 190 |         # create an incomplete pages object so that a /Parent entry can be
 191 |         # added to each page
 192 |         self.pages = MyPdfDict(Type=MyPdfName.Pages, Kids=[], Count=0)
 193 |         self.catalog = MyPdfDict(Pages=self.pages, Type=MyPdfName.Catalog)
 194 |         self.version = version  # default pdf version 1.3
 195 |         self.pagearray = []
 196 | 
 197 |     def addobj(self, obj):
 198 |         newid = len(self.objects)+1
 199 |         obj.identifier = newid
 200 |         self.objects.append(obj)
 201 | 
 202 |     def tostream(self, info, stream):
 203 |         xreftable = list()
 204 | 
 205 |         # justification of the random binary garbage in the header from
 206 |         # adobe:
 207 |         #
 208 |         #  > Note: If a PDF file contains binary data, as most do (see Section
 209 |         #  > 3.1, “Lexical Conventions”), it is recommended that the header
 210 |         #  > line be immediately followed by a comment line containing at
 211 |         #  > least four binary characters—that is, characters whose codes are
 212 |         #  > 128 or greater. This ensures proper behavior of file transfer
 213 |         #  > applications that inspect data near the beginning of a file to
 214 |         #  > determine whether to treat the file’s contents as text or as
 215 |         #  > binary.
 216 |         #
 217 |         # the choice of binary characters is arbitrary but those four seem to
 218 |         # be used elsewhere.
 219 |         pdfheader = ('%%PDF-%s\n' % self.version).encode('ascii')
 220 |         pdfheader += b'%\xe2\xe3\xcf\xd3\n'
 221 |         stream.write(pdfheader)
 222 | 
 223 |         # From section 3.4.3 of the PDF Reference (version 1.7):
 224 |         #
 225 |         #  > Each entry is exactly 20 bytes long, including the end-of-line
 226 |         #  > marker.
 227 |         #  >
 228 |         #  > [...]
 229 |         #  >
 230 |         #  > The format of an in-use entry is
 231 |         #  > nnnnnnnnnn ggggg n eol
 232 |         #  > where
 233 |         #  > nnnnnnnnnn is a 10-digit byte offset
 234 |         #  > ggggg is a 5-digit generation number
 235 |         #  > n is a literal keyword identifying this as an in-use entry
 236 |         #  > eol is a 2-character end-of-line sequence
 237 |         #  >
 238 |         #  > [...]
 239 |         #  >
 240 |         #  > If the file’s end-of-line marker is a single character (either a
 241 |         #  > carriage return or a line feed), it is preceded by a single space;
 242 |         #
 243 |         # Since we chose to use a single character eol marker, we precede it by
 244 |         # a space
 245 |         pos = len(pdfheader)
 246 |         xreftable.append(b"0000000000 65535 f \n")
 247 |         for o in self.objects:
 248 |             xreftable.append(("%010d 00000 n \n" % pos).encode())
 249 |             content = o.tostring()
 250 |             stream.write(content)
 251 |             pos += len(content)
 252 | 
 253 |         xrefoffset = pos
 254 |         stream.write(b"xref\n")
 255 |         stream.write(("0 %d\n" % len(xreftable)).encode())
 256 |         for x in xreftable:
 257 |             stream.write(x)
 258 |         stream.write(b"trailer\n")
 259 |         stream.write(parse({b"/Size": len(xreftable), b"/Info": info,
 260 |                             b"/Root": self.catalog})+b"\n")
 261 |         stream.write(b"startxref\n")
 262 |         stream.write(("%d\n" % xrefoffset).encode())
 263 |         stream.write(b"%%EOF\n")
 264 |         return
 265 | 
 266 |     def addpage(self, page):
 267 |         page[b"/Parent"] = self.pages
 268 |         self.pagearray.append(page)
 269 |         self.pages.content[b"/Kids"].append(page)
 270 |         self.pages.content[b"/Count"] += 1
 271 |         self.addobj(page)
 272 | 
 273 | 
 274 | if PY3:
 275 |     class MyPdfString():
 276 |         @classmethod
 277 |         def encode(cls, string):
 278 |             try:
 279 |                 string = string.encode('ascii')
 280 |             except UnicodeEncodeError:
 281 |                 string = b"\xfe\xff"+string.encode("utf-16-be")
 282 |             string = string.replace(b'\\', b'\\\\')
 283 |             string = string.replace(b'(', b'\\(')
 284 |             string = string.replace(b')', b'\\)')
 285 |             return b'(' + string + b')'
 286 | else:
 287 |     class MyPdfString(object):
 288 |         @classmethod
 289 |         def encode(cls, string):
 290 |             # This mimics exactely to what pdfrw does.
 291 |             string = string.replace(b'\\', b'\\\\')
 292 |             string = string.replace(b'(', b'\\(')
 293 |             string = string.replace(b')', b'\\)')
 294 |             return b'(' + string + b')'
 295 | 
 296 | 
 297 | class pdfdoc(object):
 298 |     def __init__(self, version="1.3", title=None, author=None, creator=None,
 299 |                  producer=None, creationdate=None, moddate=None, subject=None,
 300 |                  keywords=None, nodate=False, panes=None, initial_page=None,
 301 |                  magnification=None, page_layout=None, fit_window=False,
 302 |                  center_window=False, fullscreen=False, with_pdfrw=True):
 303 |         if with_pdfrw:
 304 |             try:
 305 |                 from pdfrw import PdfWriter, PdfDict, PdfName, PdfString
 306 |                 self.with_pdfrw = True
 307 |             except ImportError:
 308 |                 PdfWriter = MyPdfWriter
 309 |                 PdfDict = MyPdfDict
 310 |                 PdfName = MyPdfName
 311 |                 PdfString = MyPdfString
 312 |                 self.with_pdfrw = False
 313 |         else:
 314 |             PdfWriter = MyPdfWriter
 315 |             PdfDict = MyPdfDict
 316 |             PdfName = MyPdfName
 317 |             PdfString = MyPdfString
 318 |             self.with_pdfrw = False
 319 | 
 320 |         now = datetime.now()
 321 |         self.info = PdfDict(indirect=True)
 322 | 
 323 |         def datetime_to_pdfdate(dt):
 324 |             return dt.strftime("%Y%m%d%H%M%SZ")
 325 | 
 326 |         if title is not None:
 327 |             self.info[PdfName.Title] = PdfString.encode(title)
 328 |         if author is not None:
 329 |             self.info[PdfName.Author] = PdfString.encode(author)
 330 |         if creator is not None:
 331 |             self.info[PdfName.Creator] = PdfString.encode(creator)
 332 |         if producer is not None and producer != "":
 333 |             self.info[PdfName.Producer] = PdfString.encode(producer)
 334 |         if creationdate is not None:
 335 |             self.info[PdfName.CreationDate] = \
 336 |                 PdfString.encode("D:"+datetime_to_pdfdate(creationdate))
 337 |         elif not nodate:
 338 |             self.info[PdfName.CreationDate] = \
 339 |                 PdfString.encode("D:"+datetime_to_pdfdate(now))
 340 |         if moddate is not None:
 341 |             self.info[PdfName.ModDate] = \
 342 |                 PdfString.encode("D:"+datetime_to_pdfdate(moddate))
 343 |         elif not nodate:
 344 |             self.info[PdfName.ModDate] = PdfString.encode(
 345 |                     "D:"+datetime_to_pdfdate(now))
 346 |         if subject is not None:
 347 |             self.info[PdfName.Subject] = PdfString.encode(subject)
 348 |         if keywords is not None:
 349 |             self.info[PdfName.Keywords] = PdfString.encode(",".join(keywords))
 350 | 
 351 |         self.writer = PdfWriter()
 352 |         self.writer.version = version
 353 |         # this is done because pdfrw adds info, catalog and pages as the first
 354 |         # three objects in this order
 355 |         if not self.with_pdfrw:
 356 |             self.writer.addobj(self.info)
 357 |             self.writer.addobj(self.writer.catalog)
 358 |             self.writer.addobj(self.writer.pages)
 359 | 
 360 |         self.panes = panes
 361 |         self.initial_page = initial_page
 362 |         self.magnification = magnification
 363 |         self.page_layout = page_layout
 364 |         self.fit_window = fit_window
 365 |         self.center_window = center_window
 366 |         self.fullscreen = fullscreen
 367 | 
 368 |     def add_imagepage(self, color, imgwidthpx, imgheightpx, imgformat, imgdata,
 369 |                       imgwidthpdf, imgheightpdf, imgxpdf, imgypdf, pagewidth,
 370 |                       pageheight):
 371 |         if self.with_pdfrw:
 372 |             from pdfrw import PdfDict, PdfName, PdfObject
 373 |             from pdfrw.py23_diffs import convert_load
 374 |         else:
 375 |             PdfDict = MyPdfDict
 376 |             PdfName = MyPdfName
 377 |             PdfObject = MyPdfObject
 378 |             convert_load = my_convert_load
 379 | 
 380 |         if color == Colorspace['1'] or color == Colorspace.L:
 381 |             colorspace = PdfName.DeviceGray
 382 |         elif color == Colorspace.RGB:
 383 |             colorspace = PdfName.DeviceRGB
 384 |         elif color == Colorspace.CMYK or color == Colorspace['CMYK;I']:
 385 |             colorspace = PdfName.DeviceCMYK
 386 |         else:
 387 |             raise UnsupportedColorspaceError("unsupported color space: %s"
 388 |                                              % color.name)
 389 | 
 390 |         # either embed the whole jpeg or deflate the bitmap representation
 391 |         logging.debug(imgformat)
 392 |         if imgformat is ImageFormat.JPEG:
 393 |             ofilter = [PdfName.DCTDecode]
 394 |         elif imgformat is ImageFormat.JPEG2000:
 395 |             ofilter = [PdfName.JPXDecode]
 396 |             self.writer.version = "1.5"  # jpeg2000 needs pdf 1.5
 397 |         elif imgformat is ImageFormat.CCITTGroup4:
 398 |             ofilter = [PdfName.CCITTFaxDecode]
 399 |         else:
 400 |             ofilter = [PdfName.FlateDecode]
 401 | 
 402 |         image = PdfDict(stream=convert_load(imgdata))
 403 | 
 404 |         image[PdfName.Type] = PdfName.XObject
 405 |         image[PdfName.Subtype] = PdfName.Image
 406 |         image[PdfName.Filter] = ofilter
 407 |         image[PdfName.Width] = imgwidthpx
 408 |         image[PdfName.Height] = imgheightpx
 409 |         image[PdfName.ColorSpace] = colorspace
 410 |         # hardcoded as PIL doesn't provide bits for non-jpeg formats
 411 |         if imgformat is ImageFormat.CCITTGroup4:
 412 |             image[PdfName.BitsPerComponent] = 1
 413 |         else:
 414 |             image[PdfName.BitsPerComponent] = 8
 415 | 
 416 |         if color == Colorspace['CMYK;I']:
 417 |             # Inverts all four channels
 418 |             image[PdfName.Decode] = [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0]
 419 | 
 420 |         if imgformat is ImageFormat.CCITTGroup4:
 421 |             decodeparms = PdfDict()
 422 |             decodeparms[PdfName.K] = -1
 423 |             decodeparms[PdfName.BlackIs1] = PdfObject('true')
 424 |             decodeparms[PdfName.Columns] = imgwidthpx
 425 |             decodeparms[PdfName.Rows] = imgheightpx
 426 |             image[PdfName.DecodeParms] = [decodeparms]
 427 | 
 428 |         text = ("q\n%0.4f 0 0 %0.4f %0.4f %0.4f cm\n/Im0 Do\nQ" %
 429 |                 (imgwidthpdf, imgheightpdf, imgxpdf, imgypdf)).encode("ascii")
 430 | 
 431 |         content = PdfDict(stream=convert_load(text))
 432 |         resources = PdfDict(XObject=PdfDict(Im0=image))
 433 | 
 434 |         page = PdfDict(indirect=True)
 435 |         page[PdfName.Type] = PdfName.Page
 436 |         page[PdfName.MediaBox] = [0, 0, pagewidth, pageheight]
 437 |         page[PdfName.Resources] = resources
 438 |         page[PdfName.Contents] = content
 439 | 
 440 |         self.writer.addpage(page)
 441 | 
 442 |         if not self.with_pdfrw:
 443 |             self.writer.addobj(content)
 444 |             self.writer.addobj(image)
 445 | 
 446 |     def tostring(self):
 447 |         stream = BytesIO()
 448 |         self.tostream(stream)
 449 |         return stream.getvalue()
 450 | 
 451 |     def tostream(self, outputstream):
 452 |         if self.with_pdfrw:
 453 |             from pdfrw import PdfDict, PdfName, PdfArray, PdfObject
 454 |         else:
 455 |             PdfDict = MyPdfDict
 456 |             PdfName = MyPdfName
 457 |             PdfObject = MyPdfObject
 458 |             PdfArray = MyPdfArray
 459 |         NullObject = PdfObject('null')
 460 |         TrueObject = PdfObject('true')
 461 | 
 462 |         # We fill the catalog with more information like /ViewerPreferences,
 463 |         # /PageMode, /PageLayout or /OpenAction because the latter refers to a
 464 |         # page object which has to be present so that we can get its id.
 465 |         #
 466 |         # Furthermore, if using pdfrw, the trailer is cleared every time a page
 467 |         # is added, so we can only start using it after all pages have been
 468 |         # written.
 469 | 
 470 |         if self.with_pdfrw:
 471 |             catalog = self.writer.trailer.Root
 472 |         else:
 473 |             catalog = self.writer.catalog
 474 | 
 475 |         if self.fullscreen or self.fit_window or self.center_window or \
 476 |                 self.panes is not None:
 477 |             catalog[PdfName.ViewerPreferences] = PdfDict()
 478 | 
 479 |         if self.fullscreen:
 480 |             # this setting might be overwritten later by the page mode
 481 |             catalog[PdfName.ViewerPreferences][PdfName.NonFullScreenPageMode] \
 482 |                     = PdfName.UseNone
 483 | 
 484 |         if self.panes == PageMode.thumbs:
 485 |             catalog[PdfName.ViewerPreferences][PdfName.NonFullScreenPageMode] \
 486 |                     = PdfName.UseThumbs
 487 |             # this setting might be overwritten later if fullscreen
 488 |             catalog[PdfName.PageMode] = PdfName.UseThumbs
 489 |         elif self.panes == PageMode.outlines:
 490 |             catalog[PdfName.ViewerPreferences][PdfName.NonFullScreenPageMode] \
 491 |                     = PdfName.UseOutlines
 492 |             # this setting might be overwritten later if fullscreen
 493 |             catalog[PdfName.PageMode] = PdfName.UseOutlines
 494 |         elif self.panes in [PageMode.none, None]:
 495 |             pass
 496 |         else:
 497 |             raise ValueError("unknown page mode: %s" % self.panes)
 498 | 
 499 |         if self.fit_window:
 500 |             catalog[PdfName.ViewerPreferences][PdfName.FitWindow] = TrueObject
 501 | 
 502 |         if self.center_window:
 503 |             catalog[PdfName.ViewerPreferences][PdfName.CenterWindow] = \
 504 |                     TrueObject
 505 | 
 506 |         if self.fullscreen:
 507 |             catalog[PdfName.PageMode] = PdfName.FullScreen
 508 | 
 509 |         # see table 8.2 in section 8.2.1 in
 510 |         # http://partners.adobe.com/public/developer/en/pdf/PDFReference16.pdf
 511 |         # Fit - Fits the page to the window.
 512 |         # FitH - Fits the width of the page to the window.
 513 |         # FitV - Fits the height of the page to the window.
 514 |         # FitR - Fits the rectangle specified by the four coordinates to the
 515 |         #        window.
 516 |         # FitB - Fits the page bounding box to the window. This basically
 517 |         #        reduces the amount of whitespace (margins) that is displayed
 518 |         #        and thus focussing more on the text content.
 519 |         # FitBH - Fits the width of the page bounding box to the window.
 520 |         # FitBV - Fits the height of the page bounding box to the window.
 521 | 
 522 |         # by default the initial page is the first one
 523 |         initial_page = self.writer.pagearray[0]
 524 |         # we set the open action here to make sure we open on the requested
 525 |         # initial page but this value might be overwritten by a custom open
 526 |         # action later while still taking the requested initial page into
 527 |         # account
 528 |         if self.initial_page is not None:
 529 |             initial_page = self.writer.pagearray[self.initial_page - 1]
 530 |             catalog[PdfName.OpenAction] = PdfArray([initial_page, PdfName.XYZ,
 531 |                                                     NullObject, NullObject, 0])
 532 | 
 533 |         if self.magnification == Magnification.fit:
 534 |             catalog[PdfName.OpenAction] = PdfArray([initial_page, PdfName.Fit])
 535 |         elif self.magnification == Magnification.fith:
 536 |             pagewidth = initial_page[PdfName.MediaBox][2]
 537 |             catalog[PdfName.OpenAction] = PdfArray(
 538 |                 [initial_page, PdfName.FitH, pagewidth])
 539 |         elif self.magnification == Magnification.fitbh:
 540 |             # quick hack to determine the image width on the page
 541 |             imgwidth = float(initial_page[PdfName.Contents].stream.split()[4])
 542 |             catalog[PdfName.OpenAction] = PdfArray(
 543 |                 [initial_page, PdfName.FitBH, imgwidth])
 544 |         elif isinstance(self.magnification, float):
 545 |             catalog[PdfName.OpenAction] = PdfArray(
 546 |                 [initial_page, PdfName.XYZ, NullObject, NullObject,
 547 |                  self.magnification])
 548 |         elif self.magnification is None:
 549 |             pass
 550 |         else:
 551 |             raise ValueError("unknown magnification: %s" % self.magnification)
 552 | 
 553 |         if self.page_layout == PageLayout.single:
 554 |             catalog[PdfName.PageLayout] = PdfName.SinglePage
 555 |         elif self.page_layout == PageLayout.onecolumn:
 556 |             catalog[PdfName.PageLayout] = PdfName.OneColumn
 557 |         elif self.page_layout == PageLayout.twocolumnright:
 558 |             catalog[PdfName.PageLayout] = PdfName.TwoColumnRight
 559 |         elif self.page_layout == PageLayout.twocolumnleft:
 560 |             catalog[PdfName.PageLayout] = PdfName.TwoColumnLeft
 561 |         elif self.page_layout is None:
 562 |             pass
 563 |         else:
 564 |             raise ValueError("unknown page layout: %s" % self.page_layout)
 565 | 
 566 |         # now write out the PDF
 567 |         if self.with_pdfrw:
 568 |             self.writer.trailer.Info = self.info
 569 |             self.writer.write(outputstream)
 570 |         else:
 571 |             self.writer.tostream(self.info, outputstream)
 572 | 
 573 | 
 574 | def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None):
 575 |     if imgformat == ImageFormat.JPEG2000 \
 576 |             and rawdata is not None and imgdata is None:
 577 |         # this codepath gets called if the PIL installation is not able to
 578 |         # handle JPEG2000 files
 579 |         imgwidthpx, imgheightpx, ics, hdpi, vdpi = parsejp2(rawdata)
 580 | 
 581 |         if hdpi is None:
 582 |             hdpi = default_dpi
 583 |         if vdpi is None:
 584 |             vdpi = default_dpi
 585 |         ndpi = (hdpi, vdpi)
 586 |     else:
 587 |         imgwidthpx, imgheightpx = imgdata.size
 588 | 
 589 |         ndpi = imgdata.info.get("dpi", (default_dpi, default_dpi))
 590 |         # In python3, the returned dpi value for some tiff images will
 591 |         # not be an integer but a float. To make the behaviour of
 592 |         # img2pdf the same between python2 and python3, we convert that
 593 |         # float into an integer by rounding.
 594 |         # Search online for the 72.009 dpi problem for more info.
 595 |         ndpi = (int(round(ndpi[0])), int(round(ndpi[1])))
 596 |         ics = imgdata.mode
 597 | 
 598 |     logging.debug("input dpi = %d x %d", *ndpi)
 599 | 
 600 |     if colorspace:
 601 |         color = colorspace
 602 |         logging.debug("input colorspace (forced) = %s", color)
 603 |     else:
 604 |         color = None
 605 |         for c in Colorspace:
 606 |             if c.name == ics:
 607 |                 color = c
 608 |         if color is None:
 609 |             color = Colorspace.other
 610 |         if color == Colorspace.CMYK and imgformat == ImageFormat.JPEG:
 611 |             # Adobe inverts CMYK JPEGs for some reason, and others
 612 |             # have followed suit as well. Some software assumes the
 613 |             # JPEG is inverted if the Adobe tag (APP14), while other
 614 |             # software assumes all CMYK JPEGs are inverted. I don't
 615 |             # have enough experience with these to know which is
 616 |             # better for images currently in the wild, so I'm going
 617 |             # with the first approach for now.
 618 |             if "adobe" in imgdata.info:
 619 |                 color = Colorspace['CMYK;I']
 620 |         logging.debug("input colorspace = %s", color.name)
 621 | 
 622 |     logging.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx)
 623 | 
 624 |     return (color, ndpi, imgwidthpx, imgheightpx)
 625 | 
 626 | 
 627 | def transcode_monochrome(imgdata):
 628 |     """Convert the open PIL.Image imgdata to compressed CCITT Group4 data"""
 629 | 
 630 |     from PIL import TiffImagePlugin
 631 | 
 632 |     logging.debug("Converting monochrome to CCITT Group4")
 633 | 
 634 |     # Convert the image to Group 4 in memory. If libtiff is not installed and
 635 |     # Pillow is not compiled against it, .save() will raise an exception.
 636 |     newimgio = BytesIO()
 637 |     imgdata.save(newimgio, format='TIFF', compression='group4')
 638 | 
 639 |     # Open new image in memory
 640 |     newimgio.seek(0)
 641 |     newimg = Image.open(newimgio)
 642 | 
 643 |     # If Pillow is passed an invalid compression argument it will ignore it;
 644 |     # make sure the image actually got compressed.
 645 |     if newimg.info['compression'] != 'group4':
 646 |         raise ValueError("Image not compressed as expected")
 647 | 
 648 |     # Read the TIFF tags to find the offset(s) of the compressed data strips.
 649 |     strip_offsets = newimg.tag_v2[TiffImagePlugin.STRIPOFFSETS]
 650 |     strip_bytes = newimg.tag_v2[TiffImagePlugin.STRIPBYTECOUNTS]
 651 |     rows_per_strip = newimg.tag_v2[TiffImagePlugin.ROWSPERSTRIP]
 652 | 
 653 |     # PIL always seems to create a single strip even for very large TIFFs when
 654 |     # it saves images, so assume we only have to read a single strip.
 655 |     # A test ~10 GPixel image was still encoded as a single strip. Just to be
 656 |     # safe check throw an error if there is more than one offset.
 657 |     if len(strip_offsets) > 1:
 658 |         raise NotImplementedError("Transcoding multiple strips not supported")
 659 | 
 660 |     newimgio.seek(strip_offsets[0])
 661 |     ccittdata = newimgio.read(strip_bytes[0])
 662 | 
 663 |     return ccittdata
 664 | 
 665 | 
 666 | def read_images(rawdata, colorspace, first_frame_only=False):
 667 |     im = BytesIO(rawdata)
 668 |     im.seek(0)
 669 |     imgdata = None
 670 |     try:
 671 |         imgdata = Image.open(im)
 672 |     except IOError as e:
 673 |         # test if it is a jpeg2000 image
 674 |         if rawdata[:12] != "\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
 675 |             raise ImageOpenError("cannot read input image (not jpeg2000). "
 676 |                                  "PIL: error reading image: %s" % e)
 677 |         # image is jpeg2000
 678 |         imgformat = ImageFormat.JPEG2000
 679 |     else:
 680 |         imgformat = None
 681 |         for f in ImageFormat:
 682 |             if f.name == imgdata.format:
 683 |                 imgformat = f
 684 |         if imgformat is None:
 685 |             imgformat = ImageFormat.other
 686 | 
 687 |     logging.debug("imgformat = %s", imgformat.name)
 688 | 
 689 |     # depending on the input format, determine whether to pass the raw
 690 |     # image or the zlib compressed color information
 691 |     if imgformat == ImageFormat.JPEG or imgformat == ImageFormat.JPEG2000:
 692 |         color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata(
 693 |                 imgdata, imgformat, default_dpi, colorspace, rawdata)
 694 |         if color == Colorspace['1']:
 695 |             raise JpegColorspaceError("jpeg can't be monochrome")
 696 |         if color == Colorspace['P']:
 697 |             raise JpegColorspaceError("jpeg can't have a color palette")
 698 |         if color == Colorspace['RGBA']:
 699 |             raise JpegColorspaceError("jpeg can't have an alpha channel")
 700 |         im.close()
 701 |         return [(color, ndpi, imgformat, rawdata, imgwidthpx, imgheightpx)]
 702 |     else:
 703 |         result = []
 704 |         img_page_count = 0
 705 |         # loop through all frames of the image (example: multipage TIFF)
 706 |         while True:
 707 |             try:
 708 |                 imgdata.seek(img_page_count)
 709 |             except EOFError:
 710 |                 break
 711 | 
 712 |             if first_frame_only and img_page_count > 0:
 713 |                 break
 714 | 
 715 |             logging.debug("Converting frame: %d" % img_page_count)
 716 | 
 717 |             color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata(
 718 |                     imgdata, imgformat, default_dpi, colorspace)
 719 | 
 720 |             newimg = None
 721 |             if color == Colorspace['1']:
 722 |                 try:
 723 |                     ccittdata = transcode_monochrome(imgdata)
 724 |                     imgformat = ImageFormat.CCITTGroup4
 725 |                     result.append((color, ndpi, imgformat, ccittdata,
 726 |                                    imgwidthpx, imgheightpx))
 727 |                     img_page_count += 1
 728 |                     continue
 729 |                 except Exception as e:
 730 |                     logging.debug(e)
 731 |                     logging.debug("Converting colorspace 1 to L")
 732 |                     newimg = imgdata.convert('L')
 733 |                     color = Colorspace.L
 734 |             elif color in [Colorspace.RGB, Colorspace.L, Colorspace.CMYK,
 735 |                            Colorspace["CMYK;I"]]:
 736 |                 logging.debug("Colorspace is OK: %s", color)
 737 |                 newimg = imgdata
 738 |             elif color in [Colorspace.RGBA, Colorspace.P, Colorspace.other]:
 739 |                 logging.debug("Converting colorspace %s to RGB", color)
 740 |                 newimg = imgdata.convert('RGB')
 741 |                 color = Colorspace.RGB
 742 |             else:
 743 |                 raise ValueError("unknown colorspace: %s" % color.name)
 744 |             imggz = zlib.compress(newimg.tobytes())
 745 |             result.append((color, ndpi, imgformat, imggz, imgwidthpx,
 746 |                            imgheightpx))
 747 |             img_page_count += 1
 748 |         # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
 749 |         # close() method
 750 |         try:
 751 |             imgdata.close()
 752 |         except AttributeError:
 753 |             pass
 754 |         im.close()
 755 |         return result
 756 | 
 757 | 
 758 | # converts a length in pixels to a length in PDF units (1/72 of an inch)
 759 | def px_to_pt(length, dpi):
 760 |     return 72.0*length/dpi
 761 | 
 762 | 
 763 | def cm_to_pt(length):
 764 |     return (72.0*length)/2.54
 765 | 
 766 | 
 767 | def mm_to_pt(length):
 768 |     return (72.0*length)/25.4
 769 | 
 770 | 
 771 | def in_to_pt(length):
 772 |     return 72.0*length
 773 | 
 774 | 
 775 | def get_layout_fun(pagesize=None, imgsize=None, border=None, fit=None,
 776 |                    auto_orient=False):
 777 |     def fitfun(fit, imgwidth, imgheight, fitwidth, fitheight):
 778 |         if fitwidth is None and fitheight is None:
 779 |             raise ValueError("fitwidth and fitheight cannot both be None")
 780 |         # if fit is fill or enlarge then it is okay if one of the dimensions
 781 |         # are negative but one of them must still be positive
 782 |         # if fit is not fill or enlarge then both dimensions must be positive
 783 |         if fit in [FitMode.fill, FitMode.enlarge] and \
 784 |                 fitwidth is not None and fitwidth < 0 and \
 785 |                 fitheight is not None and fitheight < 0:
 786 |             raise ValueError("cannot fit into a rectangle where both "
 787 |                              "dimensions are negative")
 788 |         elif fit not in [FitMode.fill, FitMode.enlarge] and \
 789 |                 ((fitwidth is not None and fitwidth < 0) or
 790 |                     (fitheight is not None and fitheight < 0)):
 791 |             raise Exception("cannot fit into a rectangle where either "
 792 |                             "dimensions are negative")
 793 | 
 794 |         def default():
 795 |             if fitwidth is not None and fitheight is not None:
 796 |                 newimgwidth = fitwidth
 797 |                 newimgheight = (newimgwidth * imgheight)/imgwidth
 798 |                 if newimgheight > fitheight:
 799 |                     newimgheight = fitheight
 800 |                     newimgwidth = (newimgheight * imgwidth)/imgheight
 801 |             elif fitwidth is None and fitheight is not None:
 802 |                 newimgheight = fitheight
 803 |                 newimgwidth = (newimgheight * imgwidth)/imgheight
 804 |             elif fitheight is None and fitwidth is not None:
 805 |                 newimgwidth = fitwidth
 806 |                 newimgheight = (newimgwidth * imgheight)/imgwidth
 807 |             else:
 808 |                 raise ValueError("fitwidth and fitheight cannot both be None")
 809 |             return newimgwidth, newimgheight
 810 |         if fit is None or fit == FitMode.into:
 811 |             return default()
 812 |         elif fit == FitMode.fill:
 813 |             if fitwidth is not None and fitheight is not None:
 814 |                 newimgwidth = fitwidth
 815 |                 newimgheight = (newimgwidth * imgheight)/imgwidth
 816 |                 if newimgheight < fitheight:
 817 |                     newimgheight = fitheight
 818 |                     newimgwidth = (newimgheight * imgwidth)/imgheight
 819 |             elif fitwidth is None and fitheight is not None:
 820 |                 newimgheight = fitheight
 821 |                 newimgwidth = (newimgheight * imgwidth)/imgheight
 822 |             elif fitheight is None and fitwidth is not None:
 823 |                 newimgwidth = fitwidth
 824 |                 newimgheight = (newimgwidth * imgheight)/imgwidth
 825 |             else:
 826 |                 raise ValueError("fitwidth and fitheight cannot both be None")
 827 |             return newimgwidth, newimgheight
 828 |         elif fit == FitMode.exact:
 829 |             if fitwidth is not None and fitheight is not None:
 830 |                 return fitwidth, fitheight
 831 |             elif fitwidth is None and fitheight is not None:
 832 |                 newimgheight = fitheight
 833 |                 newimgwidth = (newimgheight * imgwidth)/imgheight
 834 |             elif fitheight is None and fitwidth is not None:
 835 |                 newimgwidth = fitwidth
 836 |                 newimgheight = (newimgwidth * imgheight)/imgwidth
 837 |             else:
 838 |                 raise ValueError("fitwidth and fitheight cannot both be None")
 839 |             return newimgwidth, newimgheight
 840 |         elif fit == FitMode.shrink:
 841 |             if fitwidth is not None and fitheight is not None:
 842 |                 if imgwidth <= fitwidth and imgheight <= fitheight:
 843 |                     return imgwidth, imgheight
 844 |             elif fitwidth is None and fitheight is not None:
 845 |                 if imgheight <= fitheight:
 846 |                     return imgwidth, imgheight
 847 |             elif fitheight is None and fitwidth is not None:
 848 |                 if imgwidth <= fitwidth:
 849 |                     return imgwidth, imgheight
 850 |             else:
 851 |                 raise ValueError("fitwidth and fitheight cannot both be None")
 852 |             return default()
 853 |         elif fit == FitMode.enlarge:
 854 |             if fitwidth is not None and fitheight is not None:
 855 |                 if imgwidth > fitwidth or imgheight > fitheight:
 856 |                     return imgwidth, imgheight
 857 |             elif fitwidth is None and fitheight is not None:
 858 |                 if imgheight > fitheight:
 859 |                     return imgwidth, imgheight
 860 |             elif fitheight is None and fitwidth is not None:
 861 |                 if imgwidth > fitwidth:
 862 |                     return imgwidth, imgheight
 863 |             else:
 864 |                 raise ValueError("fitwidth and fitheight cannot both be None")
 865 |             return default()
 866 |         else:
 867 |             raise NotImplementedError
 868 |     # if no layout arguments are given, then the image size is equal to the
 869 |     # page size and will be drawn with the default dpi
 870 |     if pagesize is None and imgsize is None and border is None:
 871 |         return default_layout_fun
 872 |     if pagesize is None and imgsize is None and border is not None:
 873 |         def layout_fun(imgwidthpx, imgheightpx, ndpi):
 874 |             imgwidthpdf = px_to_pt(imgwidthpx, ndpi[0])
 875 |             imgheightpdf = px_to_pt(imgheightpx, ndpi[1])
 876 |             pagewidth = imgwidthpdf+2*border[1]
 877 |             pageheight = imgheightpdf+2*border[0]
 878 |             return pagewidth, pageheight, imgwidthpdf, imgheightpdf
 879 |         return layout_fun
 880 |     if border is None:
 881 |         border = (0, 0)
 882 |     # if the pagesize is given but the imagesize is not, then the imagesize
 883 |     # will be calculated from the pagesize, taking into account the border
 884 |     # and the fitting
 885 |     if pagesize is not None and imgsize is None:
 886 |         def layout_fun(imgwidthpx, imgheightpx, ndpi):
 887 |             if pagesize[0] is not None and pagesize[1] is not None and \
 888 |                     auto_orient and \
 889 |                     ((imgwidthpx > imgheightpx and
 890 |                      pagesize[0] < pagesize[1]) or
 891 |                      (imgwidthpx < imgheightpx and pagesize[0] > pagesize[1])):
 892 |                 pagewidth, pageheight = pagesize[1], pagesize[0]
 893 |                 newborder = border[1], border[0]
 894 |             else:
 895 |                 pagewidth, pageheight = pagesize[0], pagesize[1]
 896 |                 newborder = border
 897 |             if pagewidth is not None:
 898 |                 fitwidth = pagewidth-2*newborder[1]
 899 |             else:
 900 |                 fitwidth = None
 901 |             if pageheight is not None:
 902 |                 fitheight = pageheight-2*newborder[0]
 903 |             else:
 904 |                 fitheight = None
 905 |             if fit in [FitMode.fill, FitMode.enlarge] and \
 906 |                     fitwidth is not None and fitwidth < 0 and \
 907 |                     fitheight is not None and fitheight < 0:
 908 |                 raise NegativeDimensionError(
 909 |                     "at least one border dimension musts be smaller than half "
 910 |                     "the respective page dimension")
 911 |             elif fit not in [FitMode.fill, FitMode.enlarge] \
 912 |                     and ((fitwidth is not None and fitwidth < 0) or
 913 |                          (fitheight is not None and fitheight < 0)):
 914 |                 raise NegativeDimensionError(
 915 |                     "one border dimension is larger than half of the "
 916 |                     "respective page dimension")
 917 |             imgwidthpdf, imgheightpdf = \
 918 |                 fitfun(fit, px_to_pt(imgwidthpx, ndpi[0]),
 919 |                        px_to_pt(imgheightpx, ndpi[1]),
 920 |                        fitwidth, fitheight)
 921 |             if pagewidth is None:
 922 |                 pagewidth = imgwidthpdf+border[1]*2
 923 |             if pageheight is None:
 924 |                 pageheight = imgheightpdf+border[0]*2
 925 |             return pagewidth, pageheight, imgwidthpdf, imgheightpdf
 926 |         return layout_fun
 927 | 
 928 |     def scale_imgsize(s, px, dpi):
 929 |         if s is None:
 930 |             return None
 931 |         mode, value = s
 932 |         if mode == ImgSize.abs:
 933 |             return value
 934 |         if mode == ImgSize.perc:
 935 |             return (px_to_pt(px, dpi)*value)/100
 936 |         if mode == ImgSize.dpi:
 937 |             return px_to_pt(px, value)
 938 |         raise NotImplementedError
 939 |     if pagesize is None and imgsize is not None:
 940 |         def layout_fun(imgwidthpx, imgheightpx, ndpi):
 941 |             imgwidthpdf, imgheightpdf = \
 942 |                     fitfun(fit, px_to_pt(imgwidthpx, ndpi[0]),
 943 |                            px_to_pt(imgheightpx, ndpi[1]),
 944 |                            scale_imgsize(imgsize[0], imgwidthpx, ndpi[0]),
 945 |                            scale_imgsize(imgsize[1], imgheightpx, ndpi[1]))
 946 |             pagewidth = imgwidthpdf+2*border[1]
 947 |             pageheight = imgheightpdf+2*border[0]
 948 |             return pagewidth, pageheight, imgwidthpdf, imgheightpdf
 949 |         return layout_fun
 950 |     if pagesize is not None and imgsize is not None:
 951 |         def layout_fun(imgwidthpx, imgheightpx, ndpi):
 952 |             if pagesize[0] is not None and pagesize[1] is not None and \
 953 |                     auto_orient and \
 954 |                     ((imgwidthpx > imgheightpx and
 955 |                       pagesize[0] < pagesize[1]) or
 956 |                      (imgwidthpx < imgheightpx and pagesize[0] > pagesize[1])):
 957 |                 pagewidth, pageheight = pagesize[1], pagesize[0]
 958 |             else:
 959 |                 pagewidth, pageheight = pagesize[0], pagesize[1]
 960 |             imgwidthpdf, imgheightpdf = \
 961 |                 fitfun(fit, px_to_pt(imgwidthpx, ndpi[0]),
 962 |                        px_to_pt(imgheightpx, ndpi[1]),
 963 |                        scale_imgsize(imgsize[0], imgwidthpx, ndpi[0]),
 964 |                        scale_imgsize(imgsize[1], imgheightpx, ndpi[1]))
 965 |             return pagewidth, pageheight, imgwidthpdf, imgheightpdf
 966 |         return layout_fun
 967 |     raise NotImplementedError
 968 | 
 969 | 
 970 | def default_layout_fun(imgwidthpx, imgheightpx, ndpi):
 971 |     imgwidthpdf = pagewidth = px_to_pt(imgwidthpx, ndpi[0])
 972 |     imgheightpdf = pageheight = px_to_pt(imgheightpx, ndpi[1])
 973 |     return pagewidth, pageheight, imgwidthpdf, imgheightpdf
 974 | 
 975 | 
 976 | def get_fixed_dpi_layout_fun(fixed_dpi):
 977 |     """Layout function that overrides whatever DPI is claimed in input images.
 978 | 
 979 |     >>> layout_fun = get_fixed_dpi_layout_fun((300, 300))
 980 |     >>> convert(image1, layout_fun=layout_fun, ... outputstream=...)
 981 |     """
 982 |     def fixed_dpi_layout_fun(imgwidthpx, imgheightpx, ndpi):
 983 |         return default_layout_fun(imgwidthpx, imgheightpx, fixed_dpi)
 984 |     return fixed_dpi_layout_fun
 985 | 
 986 | 
 987 | # given one or more input image, depending on outputstream, either return a
 988 | # string containing the whole PDF if outputstream is None or write the PDF
 989 | # data to the given file-like object and return None
 990 | #
 991 | # Input images can be given as file like objects (they must implement read()),
 992 | # as a binary string representing the image content or as filenames to the
 993 | # images.
 994 | def convert(*images, **kwargs):
 995 | 
 996 |     _default_kwargs = dict(
 997 |         title=None,
 998 |         author=None, creator=None, producer=None, creationdate=None,
 999 |         moddate=None, subject=None, keywords=None, colorspace=None,
1000 |         nodate=False, layout_fun=default_layout_fun, viewer_panes=None,
1001 |         viewer_initial_page=None, viewer_magnification=None,
1002 |         viewer_page_layout=None, viewer_fit_window=False,
1003 |         viewer_center_window=False, viewer_fullscreen=False,
1004 |         with_pdfrw=True, outputstream=None, first_frame_only=False)
1005 |     for kwname, default in _default_kwargs.items():
1006 |         if kwname not in kwargs:
1007 |             kwargs[kwname] = default
1008 | 
1009 |     pdf = pdfdoc(
1010 |         "1.3",
1011 |         kwargs['title'], kwargs['author'], kwargs['creator'],
1012 |         kwargs['producer'], kwargs['creationdate'], kwargs['moddate'],
1013 |         kwargs['subject'], kwargs['keywords'], kwargs['nodate'],
1014 |         kwargs['viewer_panes'], kwargs['viewer_initial_page'],
1015 |         kwargs['viewer_magnification'], kwargs['viewer_page_layout'],
1016 |         kwargs['viewer_fit_window'], kwargs['viewer_center_window'],
1017 |         kwargs['viewer_fullscreen'], kwargs['with_pdfrw'])
1018 | 
1019 |     # backwards compatibility with older img2pdf versions where the first
1020 |     # argument to the function had to be given as a list
1021 |     if len(images) == 1:
1022 |         # if only one argument was given and it is a list, expand it
1023 |         if isinstance(images[0], (list, tuple)):
1024 |             images = images[0]
1025 | 
1026 |     if not isinstance(images, (list, tuple)):
1027 |         images = [images]
1028 | 
1029 |     for img in images:
1030 |         # img is allowed to be a path, a binary string representing image data
1031 |         # or a file-like object (really anything that implements read())
1032 |         try:
1033 |             rawdata = img.read()
1034 |         except AttributeError:
1035 |             if not isinstance(img, (str, bytes)):
1036 |                 raise TypeError(
1037 |                         "Neither implements read() nor is str or bytes")
1038 |             # the thing doesn't have a read() function, so try if we can treat
1039 |             # it as a file name
1040 |             try:
1041 |                 with open(img, "rb") as f:
1042 |                     rawdata = f.read()
1043 |             except:
1044 |                 # whatever the exception is (string could contain NUL
1045 |                 # characters or the path could just not exist) it's not a file
1046 |                 # name so we now try treating it as raw image content
1047 |                 rawdata = img
1048 | 
1049 |         for color, ndpi, imgformat, imgdata, imgwidthpx, imgheightpx \
1050 |                 in read_images(
1051 |                     rawdata, kwargs['colorspace'], kwargs['first_frame_only']):
1052 |             pagewidth, pageheight, imgwidthpdf, imgheightpdf = \
1053 |                 kwargs['layout_fun'](imgwidthpx, imgheightpx, ndpi)
1054 |             if pagewidth < 3.00 or pageheight < 3.00:
1055 |                 logging.warning("pdf width or height is below 3.00 - too "
1056 |                                 "small for some viewers!")
1057 |             elif pagewidth > 14400.0 or pageheight > 14400.0:
1058 |                 raise PdfTooLargeError(
1059 |                         "pdf width or height must not exceed 200 inches.")
1060 |             # the image is always centered on the page
1061 |             imgxpdf = (pagewidth - imgwidthpdf)/2.0
1062 |             imgypdf = (pageheight - imgheightpdf)/2.0
1063 |             pdf.add_imagepage(color, imgwidthpx, imgheightpx, imgformat,
1064 |                               imgdata, imgwidthpdf, imgheightpdf, imgxpdf,
1065 |                               imgypdf, pagewidth, pageheight)
1066 | 
1067 |     if kwargs['outputstream']:
1068 |         pdf.tostream(kwargs['outputstream'])
1069 |         return
1070 | 
1071 |     return pdf.tostring()
1072 | 
1073 | 
1074 | def parse_num(num, name):
1075 |     if num == '':
1076 |         return None
1077 |     unit = None
1078 |     if num.endswith("pt"):
1079 |         unit = Unit.pt
1080 |     elif num.endswith("cm"):
1081 |         unit = Unit.cm
1082 |     elif num.endswith("mm"):
1083 |         unit = Unit.mm
1084 |     elif num.endswith("in"):
1085 |         unit = Unit.inch
1086 |     else:
1087 |         try:
1088 |             num = float(num)
1089 |         except ValueError:
1090 |             msg = "%s is not a floating point number and doesn't have a " \
1091 |                   "valid unit: %s" % (name, num)
1092 |             raise argparse.ArgumentTypeError(msg)
1093 |     if unit is None:
1094 |         unit = Unit.pt
1095 |     else:
1096 |         num = num[:-2]
1097 |         try:
1098 |             num = float(num)
1099 |         except ValueError:
1100 |             msg = "%s is not a floating point number: %s" % (name, num)
1101 |             raise argparse.ArgumentTypeError(msg)
1102 |     if unit == Unit.cm:
1103 |         num = cm_to_pt(num)
1104 |     elif unit == Unit.mm:
1105 |         num = mm_to_pt(num)
1106 |     elif unit == Unit.inch:
1107 |         num = in_to_pt(num)
1108 |     return num
1109 | 
1110 | 
1111 | def parse_imgsize_num(num, name):
1112 |     if num == '':
1113 |         return None
1114 |     unit = None
1115 |     if num.endswith("pt"):
1116 |         unit = ImgUnit.pt
1117 |     elif num.endswith("cm"):
1118 |         unit = ImgUnit.cm
1119 |     elif num.endswith("mm"):
1120 |         unit = ImgUnit.mm
1121 |     elif num.endswith("in"):
1122 |         unit = ImgUnit.inch
1123 |     elif num.endswith("dpi"):
1124 |         unit = ImgUnit.dpi
1125 |     elif num.endswith("%"):
1126 |         unit = ImgUnit.perc
1127 |     else:
1128 |         try:
1129 |             num = float(num)
1130 |         except ValueError:
1131 |             msg = "%s is not a floating point number and doesn't have a " \
1132 |                   "valid unit: %s" % (name, num)
1133 |             raise argparse.ArgumentTypeError(msg)
1134 |     if unit is None:
1135 |         unit = ImgUnit.pt
1136 |     else:
1137 |         # strip off unit from string
1138 |         if unit == ImgUnit.dpi:
1139 |             num = num[:-3]
1140 |         elif unit == ImgUnit.perc:
1141 |             num = num[:-1]
1142 |         else:
1143 |             num = num[:-2]
1144 |         try:
1145 |             num = float(num)
1146 |         except ValueError:
1147 |             msg = "%s is not a floating point number: %s" % (name, num)
1148 |             raise argparse.ArgumentTypeError(msg)
1149 |     if unit == ImgUnit.cm:
1150 |         num = (ImgSize.abs, cm_to_pt(num))
1151 |     elif unit == ImgUnit.mm:
1152 |         num = (ImgSize.abs, mm_to_pt(num))
1153 |     elif unit == ImgUnit.inch:
1154 |         num = (ImgSize.abs, in_to_pt(num))
1155 |     elif unit == ImgUnit.pt:
1156 |         num = (ImgSize.abs, num)
1157 |     elif unit == ImgUnit.dpi:
1158 |         num = (ImgSize.dpi, num)
1159 |     elif unit == ImgUnit.perc:
1160 |         num = (ImgSize.perc, num)
1161 |     return num
1162 | 
1163 | 
1164 | def parse_pagesize_rectarg(string):
1165 |     transposed = string.endswith("^T")
1166 |     if transposed:
1167 |         string = string[:-2]
1168 |     if papersizes.get(string.lower()):
1169 |         string = papersizes[string.lower()]
1170 |     if 'x' not in string:
1171 |         # if there is no separating "x" in the string, then the string is
1172 |         # interpreted as the width
1173 |         w = parse_num(string, "width")
1174 |         h = None
1175 |     else:
1176 |         w, h = string.split('x', 1)
1177 |         w = parse_num(w, "width")
1178 |         h = parse_num(h, "height")
1179 |     if transposed:
1180 |         w, h = h, w
1181 |     if w is None and h is None:
1182 |         raise argparse.ArgumentTypeError("at least one dimension must be "
1183 |                                          "specified")
1184 |     return w, h
1185 | 
1186 | 
1187 | def parse_imgsize_rectarg(string):
1188 |     transposed = string.endswith("^T")
1189 |     if transposed:
1190 |         string = string[:-2]
1191 |     if papersizes.get(string.lower()):
1192 |         string = papersizes[string.lower()]
1193 |     if 'x' not in string:
1194 |         # if there is no separating "x" in the string, then the string is
1195 |         # interpreted as the width
1196 |         w = parse_imgsize_num(string, "width")
1197 |         h = None
1198 |     else:
1199 |         w, h = string.split('x', 1)
1200 |         w = parse_imgsize_num(w, "width")
1201 |         h = parse_imgsize_num(h, "height")
1202 |     if transposed:
1203 |         w, h = h, w
1204 |     if w is None and h is None:
1205 |         raise argparse.ArgumentTypeError("at least one dimension must be "
1206 |                                          "specified")
1207 |     return w, h
1208 | 
1209 | 
1210 | def parse_colorspacearg(string):
1211 |     for c in Colorspace:
1212 |         if c.name == string:
1213 |             return c
1214 |     allowed = ", ".join([c.name for c in Colorspace])
1215 |     raise argparse.ArgumentTypeError("Unsupported colorspace: %s. Must be one "
1216 |                                      "of: %s." % (string, allowed))
1217 | 
1218 | 
1219 | def parse_borderarg(string):
1220 |     if ':' in string:
1221 |         h, v = string.split(':', 1)
1222 |         if h == '':
1223 |             raise argparse.ArgumentTypeError("missing value before colon")
1224 |         if v == '':
1225 |             raise argparse.ArgumentTypeError("missing value after colon")
1226 |     else:
1227 |         if string == '':
1228 |             raise argparse.ArgumentTypeError("border option cannot be empty")
1229 |         h, v = string, string
1230 |     h, v = parse_num(h, "left/right border"), parse_num(v, "top/bottom border")
1231 |     if h is None and v is None:
1232 |         raise argparse.ArgumentTypeError("missing value")
1233 |     return h, v
1234 | 
1235 | 
1236 | def input_images(path):
1237 |     if path == '-':
1238 |         # we slurp in all data from stdin because we need to seek in it later
1239 |         result = sys.stdin.buffer.read()
1240 |         if len(result) == 0:
1241 |             raise argparse.ArgumentTypeError("\"%s\" is empty" % path)
1242 |     else:
1243 |         try:
1244 |             if os.path.getsize(path) == 0:
1245 |                 raise argparse.ArgumentTypeError("\"%s\" is empty" % path)
1246 |             # test-read a byte from it so that we can abort early in case
1247 |             # we cannot read data from the file
1248 |             with open(path, "rb") as im:
1249 |                 im.read(1)
1250 |         except IsADirectoryError:
1251 |             raise argparse.ArgumentTypeError(
1252 |                 "\"%s\" is a directory" % path)
1253 |         except PermissionError:
1254 |             raise argparse.ArgumentTypeError(
1255 |                 "\"%s\" permission denied" % path)
1256 |         except FileNotFoundError:
1257 |             raise argparse.ArgumentTypeError(
1258 |                 "\"%s\" does not exist" % path)
1259 |         result = path
1260 |     return result
1261 | 
1262 | 
1263 | def parse_fitarg(string):
1264 |     for m in FitMode:
1265 |         if m.name == string.lower():
1266 |             return m
1267 |     raise argparse.ArgumentTypeError("unknown fit mode: %s" % string)
1268 | 
1269 | 
1270 | def parse_panes(string):
1271 |     for m in PageMode:
1272 |         if m.name == string.lower():
1273 |             return m
1274 |     allowed = ", ".join([m.name for m in PageMode])
1275 |     raise argparse.ArgumentTypeError("Unsupported page mode: %s. Must be one "
1276 |                                      "of: %s." % (string, allowed))
1277 | 
1278 | 
1279 | def parse_magnification(string):
1280 |     for m in Magnification:
1281 |         if m.name == string.lower():
1282 |             return m
1283 |     try:
1284 |         return float(string)
1285 |     except ValueError:
1286 |         pass
1287 |     allowed = ", ".join([m.name for m in Magnification])
1288 |     raise argparse.ArgumentTypeError("Unsupported magnification: %s. Must be "
1289 |                                      "a floating point number or one of: %s." %
1290 |                                      (string, allowed))
1291 | 
1292 | 
1293 | def parse_layout(string):
1294 |     for l in PageLayout:
1295 |         if l.name == string.lower():
1296 |             return l
1297 |     allowed = ", ".join([l.name for l in PageLayout])
1298 |     raise argparse.ArgumentTypeError("Unsupported page layout: %s. Must be "
1299 |                                      "one of: %s." % (string, allowed))
1300 | 
1301 | 
1302 | def valid_date(string):
1303 |     # first try parsing in ISO8601 format
1304 |     try:
1305 |         return datetime.strptime(string, "%Y-%m-%d")
1306 |     except ValueError:
1307 |         pass
1308 |     try:
1309 |         return datetime.strptime(string, "%Y-%m-%dT%H:%M")
1310 |     except ValueError:
1311 |         pass
1312 |     try:
1313 |         return datetime.strptime(string, "%Y-%m-%dT%H:%M:%S")
1314 |     except ValueError:
1315 |         pass
1316 |     # then try dateutil
1317 |     try:
1318 |         from dateutil import parser
1319 |     except ImportError:
1320 |         pass
1321 |     else:
1322 |         try:
1323 |             return parser.parse(string)
1324 |         except TypeError:
1325 |             pass
1326 |     # as a last resort, try the local date utility
1327 |     try:
1328 |         import subprocess
1329 |     except ImportError:
1330 |         pass
1331 |     else:
1332 |         try:
1333 |             utime = subprocess.check_output(["date", "--date", string, "+%s"])
1334 |         except subprocess.CalledProcessError:
1335 |             pass
1336 |         else:
1337 |             return datetime.utcfromtimestamp(int(utime))
1338 |     raise argparse.ArgumentTypeError("cannot parse date: %s" % string)
1339 | 
1340 | 
1341 | def main():
1342 |     rendered_papersizes = ""
1343 |     for k, v in sorted(papersizes.items()):
1344 |         rendered_papersizes += "    %-8s %s\n" % (papernames[k], v)
1345 | 
1346 |     parser = argparse.ArgumentParser(
1347 |             formatter_class=argparse.RawDescriptionHelpFormatter,
1348 |             description='''\
1349 | Losslessly convert raster images to PDF without re-encoding JPEG and JPEG2000
1350 | images. This leads to a lossless conversion of JPEG and JPEG2000 images with
1351 | the only added file size coming from the PDF container itself.
1352 | 
1353 | Other raster graphics formats are losslessly stored in a zip/flate encoding of
1354 | their RGB representation. This might increase file size and does not store
1355 | transparency. There is nothing that can be done about that until the PDF format
1356 | allows embedding other image formats like PNG. Thus, img2pdf is primarily
1357 | useful to convert JPEG and JPEG2000 images to PDF.
1358 | 
1359 | The output is sent to standard output so that it can be redirected into a file
1360 | or to another program as part of a shell pipe. To directly write the output
1361 | into a file, use the -o or --output option.
1362 | 
1363 | Options:
1364 | ''',
1365 |             epilog='''\
1366 | Colorspace:
1367 |   Currently, the colorspace must be forced for JPEG 2000 images that are not in
1368 |   the RGB colorspace.  Available colorspace options are based on Python Imaging
1369 |   Library (PIL) short handles.
1370 | 
1371 |     RGB      RGB color
1372 |     L        Grayscale
1373 |     1        Black and white (internally converted to grayscale)
1374 |     CMYK     CMYK color
1375 |     CMYK;I   CMYK color with inversion (for CMYK JPEG files from Adobe)
1376 | 
1377 | Paper sizes:
1378 |   You can specify the short hand paper size names shown in the first column in
1379 |   the table below as arguments to the --pagesize and --imgsize options.  The
1380 |   width and height they are mapping to is shown in the second column.  Giving
1381 |   the value in the second column has the same effect as giving the short hand
1382 |   in the first column. Appending ^T (a caret/circumflex followed by the letter
1383 |   T) turns the paper size from portrait into landscape. The postfix thus
1384 |   symbolizes the transpose. The values are case insensitive.
1385 | 
1386 | %s
1387 | 
1388 | Fit options:
1389 |   The img2pdf options for the --fit argument are shown in the first column in
1390 |   the table below. The function of these options can be mapped to the geometry
1391 |   operators of imagemagick. For users who are familiar with imagemagick, the
1392 |   corresponding operator is shown in the second column.  The third column shows
1393 |   whether or not the aspect ratio is preserved for that option (same as in
1394 |   imagemagick). Just like imagemagick, img2pdf tries hard to preserve the
1395 |   aspect ratio, so if the --fit argument is not given, then the default is
1396 |   "into" which corresponds to the absence of any operator in imagemagick.
1397 |   The value of the --fit option is case insensitive.
1398 | 
1399 |     into    |   | Y | The default. Width and height values specify maximum
1400 |             |   |   | values.
1401 |    ---------+---+---+----------------------------------------------------------
1402 |     fill    | ^ | Y | Width and height values specify the minimum values.
1403 |    ---------+---+---+----------------------------------------------------------
1404 |     exact   | ! | N | Width and height emphatically given.
1405 |    ---------+---+---+----------------------------------------------------------
1406 |     shrink  | > | Y | Shrinks an image with dimensions larger than the given
1407 |             |   |   | ones (and otherwise behaves like "into").
1408 |    ---------+---+---+----------------------------------------------------------
1409 |     enlarge | < | Y | Enlarges an image with dimensions smaller than the given
1410 |             |   |   | ones (and otherwise behaves like "into").
1411 | 
1412 | Argument parsing:
1413 |   Argument long options can be abbreviated to a prefix if the abbreviation is
1414 |   anambiguous. That is, the prefix must match a unique option.
1415 | 
1416 |   Beware of your shell interpreting argument values as special characters (like
1417 |   the semicolon in the CMYK;I colorspace option). If in doubt, put the argument
1418 |   values in single quotes.
1419 | 
1420 |   If you want an argument value to start with one or more minus characters, you
1421 |   must use the long option name and join them with an equal sign like so:
1422 | 
1423 |     $ img2pdf --author=--test--
1424 | 
1425 |   If your input file name starts with one or more minus characters, either
1426 |   separate the input files from the other arguments by two minus signs:
1427 | 
1428 |     $ img2pdf -- --my-file-starts-with-two-minuses.jpg
1429 | 
1430 |   Or be more explicit about its relative path by prepending a ./:
1431 | 
1432 |     $ img2pdf ./--my-file-starts-with-two-minuses.jpg
1433 | 
1434 |   The order of non-positional arguments (all arguments other than the input
1435 |   images) does not matter.
1436 | 
1437 | Examples:
1438 |   Lines starting with a dollar sign denote commands you can enter into your
1439 |   terminal. The dollar sign signifies your command prompt. It is not part of
1440 |   the command you type.
1441 | 
1442 |   Convert two scans in JPEG format to a PDF document.
1443 | 
1444 |     $ img2pdf --output out.pdf page1.jpg page2.jpg
1445 | 
1446 |   Convert a directory of JPEG images into a PDF with printable A4 pages in
1447 |   landscape mode. On each page, the photo takes the maximum amount of space
1448 |   while preserving its aspect ratio and a print border of 2 cm on the top and
1449 |   bottom and 2.5 cm on the left and right hand side.
1450 | 
1451 |     $ img2pdf --output out.pdf --pagesize A4^T --border 2cm:2.5cm *.jpg
1452 | 
1453 |   On each A4 page, fit images into a 10 cm times 15 cm rectangle but keep the
1454 |   original image size if the image is smaller than that.
1455 | 
1456 |     $ img2pdf --output out.pdf -S A4 --imgsize 10cmx15cm --fit shrink *.jpg
1457 | 
1458 |   Prepare a directory of photos to be printed borderless on photo paper with a
1459 |   3:2 aspect ratio and rotate each page so that its orientation is the same as
1460 |   the input image.
1461 | 
1462 |     $ img2pdf --output out.pdf --pagesize 15cmx10cm --auto-orient *.jpg
1463 | 
1464 |   Encode a grayscale JPEG2000 image. The colorspace has to be forced as img2pdf
1465 |   cannot read it from the JPEG2000 file automatically.
1466 | 
1467 |     $ img2pdf --output out.pdf --colorspace L input.jp2
1468 | 
1469 | Written by Johannes 'josch' Schauer <josch@mister-muffin.de>
1470 | 
1471 | Report bugs at https://gitlab.mister-muffin.de/josch/img2pdf/issues
1472 | ''' % rendered_papersizes)
1473 | 
1474 |     parser.add_argument(
1475 |         'images', metavar='infile', type=input_images, nargs='*',
1476 |         help='Specifies the input file(s) in any format that can be read by '
1477 |         'the Python Imaging Library (PIL). If no input images are given, then '
1478 |         'a single image is read from standard input. The special filename "-" '
1479 |         'can be used once to read an image from standard input. To read a '
1480 |         'file in the current directory with the filename "-", pass it to '
1481 |         'img2pdf by explicitly stating its relative path like "./-".')
1482 |     parser.add_argument(
1483 |         '-v', '--verbose', action="store_true",
1484 |         help='Makes the program operate in verbose mode, printing messages on '
1485 |              'standard error.')
1486 |     parser.add_argument(
1487 |         '-V', '--version', action='version', version='%(prog)s '+__version__,
1488 |         help="Prints version information and exits.")
1489 | 
1490 |     outargs = parser.add_argument_group(
1491 |             title='General output arguments',
1492 |             description='Arguments controlling the output format.')
1493 | 
1494 |     outargs.add_argument(
1495 |         '-o', '--output', metavar='out', type=argparse.FileType('wb'),
1496 |         default=sys.stdout.buffer,
1497 |         help='Makes the program output to a file instead of standard output.')
1498 |     outargs.add_argument(
1499 |         '-C', '--colorspace', metavar='colorspace', type=parse_colorspacearg,
1500 |         help='''
1501 | Forces the PIL colorspace. See the epilogue for a list of possible values.
1502 | Usually the PDF colorspace would be derived from the color space of the input
1503 | image. This option overwrites the automatically detected colorspace from the
1504 | input image and thus forces a certain colorspace in the output PDF /ColorSpace
1505 | property. This is useful for JPEG 2000 images with a different colorspace than
1506 | RGB.''')
1507 | 
1508 |     outargs.add_argument(
1509 |         '-D', '--nodate', action="store_true",
1510 |         help='Suppresses timestamps in the output and thus makes the output '
1511 |               'deterministic between individual runs. You can also manually '
1512 |               'set a date using the --moddate and --creationdate options.')
1513 | 
1514 |     outargs.add_argument(
1515 |         "--without-pdfrw", action="store_true",
1516 |         help="By default, img2pdf uses the pdfrw library to create the output "
1517 |              "PDF if pdfrw is available. If you want to use the internal PDF "
1518 |              "generator of img2pdf even if pdfrw is present, then pass this "
1519 |              "option. This can be useful if you want to have unicode metadata "
1520 |              "values which pdfrw does not yet support (See "
1521 |              "https://github.com/pmaupin/pdfrw/issues/39) or if you want the "
1522 |              "PDF code to be more human readable.")
1523 | 
1524 |     outargs.add_argument(
1525 |         "--first-frame-only", action="store_true",
1526 |         help="By default, img2pdf will convert multi-frame images like "
1527 |              "multi-page TIFF or animated GIF images to one page per frame. "
1528 |              "This option will only let the first frame of every multi-frame "
1529 |              "input image be converted into a page in the resulting PDF."
1530 |             )
1531 | 
1532 |     sizeargs = parser.add_argument_group(
1533 |         title='Image and page size and layout arguments',
1534 |         description='''\
1535 | Every input image will be placed on its own page. The image size is controlled
1536 | by the dpi value of the input image or, if unset or missing, the default dpi of
1537 | %.2f. By default, each page will have the same size as the image it shows.
1538 | Thus, there will be no visible border between the image and the page border by
1539 | default. If image size and page size are made different from each other by the
1540 | options in this section, the image will always be centered in both dimensions.
1541 | 
1542 | The image size and page size can be explicitly set using the --imgsize and
1543 | --pagesize options, respectively.  If either dimension of the image size is
1544 | specified but the same dimension of the page size is not, then the latter will
1545 | be derived from the former using an optional minimal distance between the image
1546 | and the page border (given by the --border option) and/or a certain fitting
1547 | strategy (given by the --fit option). The converse happens if a dimension of
1548 | the page size is set but the same dimension of the image size is not.
1549 | 
1550 | Any length value in below options is represented by the meta variable L which
1551 | is a floating point value with an optional unit appended (without a space
1552 | between them). The default unit is pt (1/72 inch, the PDF unit) and other
1553 | allowed units are cm (centimeter), mm (millimeter), and in (inch).
1554 | 
1555 | Any size argument of the format LxL in the options below specifies the width
1556 | and height of a rectangle where the first L represents the width and the second
1557 | L represents the height with an optional unit following each value as described
1558 | above.  Either width or height may be omitted. If the height is omitted, the
1559 | separating x can be omitted as well. Omitting the width requires to prefix the
1560 | height with the separating x. The missing dimension will be chosen so to not
1561 | change the image aspect ratio. Instead of giving the width and height
1562 | explicitly, you may also specify some (case-insensitive) common page sizes such
1563 | as letter and A4.  See the epilogue at the bottom for a complete list of the
1564 | valid sizes.
1565 | 
1566 | The --fit option scales to fit the image into a rectangle that is either
1567 | derived from the --imgsize option or otherwise from the --pagesize option.
1568 | If the --border option is given in addition to the --imgsize option while the
1569 | --pagesize option is not given, then the page size will be calculated from the
1570 | image size, respecting the border setting. If the --border option is given in
1571 | addition to the --pagesize option while the --imgsize option is not given, then
1572 | the image size will be calculated from the page size, respecting the border
1573 | setting. If the --border option is given while both the --pagesize and
1574 | --imgsize options are passed, then the --border option will be ignored.
1575 | 
1576 | ''' % default_dpi)
1577 | 
1578 |     sizeargs.add_argument(
1579 |             '-S', '--pagesize', metavar='LxL', type=parse_pagesize_rectarg,
1580 |             help='''
1581 | Sets the size of the PDF pages. The short-option is the upper case S because
1582 | it is an mnemonic for being bigger than the image size.''')
1583 | 
1584 |     sizeargs.add_argument(
1585 |             '-s', '--imgsize', metavar='LxL', type=parse_imgsize_rectarg,
1586 |             help='''
1587 | Sets the size of the images on the PDF pages.  In addition, the unit dpi is
1588 | allowed which will set the image size as a value of dots per inch.  Instead of
1589 | a unit, width and height values may also have a percentage sign appended,
1590 | indicating a resize of the image by that percentage. The short-option is the
1591 | lower case s because it is an mnemonic for being smaller than the page size.
1592 | ''')
1593 |     sizeargs.add_argument(
1594 |             '-b', '--border', metavar='L[:L]', type=parse_borderarg,
1595 |             help='''
1596 | Specifies the minimal distance between the image border and the PDF page
1597 | border.  This value Is overwritten by explicit values set by --pagesize or
1598 | --imgsize.  The value will be used when calculating page dimensions from the
1599 | image dimensions or the other way round. One, or two length values can be given
1600 | as an argument, separated by a colon. One value specifies the minimal border on
1601 | all four sides. Two values specify the minimal border on the top/bottom and
1602 | left/right, respectively. It is not possible to specify asymmetric borders
1603 | because images will always be centered on the page.
1604 | ''')
1605 |     sizeargs.add_argument(
1606 |             '-f', '--fit', metavar='FIT', type=parse_fitarg,
1607 |             default=FitMode.into, help='''
1608 | 
1609 | If --imgsize is given, fits the image using these dimensions. Otherwise, fit
1610 | the image into the dimensions given by --pagesize.  FIT is one of into, fill,
1611 | exact, shrink and enlarge. The default value is "into". See the epilogue at the
1612 | bottom for a description of the FIT options.
1613 | 
1614 | ''')
1615 |     sizeargs.add_argument(
1616 |             '-a', '--auto-orient', action="store_true",
1617 |             help='''
1618 | If both dimensions of the page are given via --pagesize, conditionally swaps
1619 | these dimensions such that the page orientation is the same as the orientation
1620 | of the input image. If the orientation of a page gets flipped, then so do the
1621 | values set via the --border option.
1622 | ''')
1623 | 
1624 |     metaargs = parser.add_argument_group(
1625 |         title='Arguments setting metadata',
1626 |         description='Options handling embedded timestamps, title and author '
1627 |                     'information.')
1628 |     metaargs.add_argument(
1629 |         '--title', metavar='title', type=str,
1630 |         help='Sets the title metadata value')
1631 |     metaargs.add_argument(
1632 |         '--author', metavar='author', type=str,
1633 |         help='Sets the author metadata value')
1634 |     metaargs.add_argument(
1635 |         '--creator', metavar='creator', type=str,
1636 |         help='Sets the creator metadata value')
1637 |     metaargs.add_argument(
1638 |         '--producer', metavar='producer', type=str,
1639 |         default="img2pdf " + __version__,
1640 |         help='Sets the producer metadata value '
1641 |              '(default is: img2pdf ' + __version__ + ')')
1642 |     metaargs.add_argument(
1643 |         '--creationdate', metavar='creationdate', type=valid_date,
1644 |         help='Sets the UTC creation date metadata value in YYYY-MM-DD or '
1645 |              'YYYY-MM-DDTHH:MM or YYYY-MM-DDTHH:MM:SS format or any format '
1646 |              'understood by python dateutil module or any format understood '
1647 |              'by `date --date`')
1648 |     metaargs.add_argument(
1649 |         '--moddate', metavar='moddate', type=valid_date,
1650 |         help='Sets the UTC modification date metadata value in YYYY-MM-DD '
1651 |              'or YYYY-MM-DDTHH:MM or YYYY-MM-DDTHH:MM:SS format or any format '
1652 |              'understood by python dateutil module or any format understood '
1653 |              'by `date --date`')
1654 |     metaargs.add_argument(
1655 |         '--subject', metavar='subject', type=str,
1656 |         help='Sets the subject metadata value')
1657 |     metaargs.add_argument(
1658 |         '--keywords', metavar='kw', type=str, nargs='+',
1659 |         help='Sets the keywords metadata value (can be given multiple times)')
1660 | 
1661 |     viewerargs = parser.add_argument_group(
1662 |         title='PDF viewer arguments',
1663 |         description='PDF files can specify how they are meant to be '
1664 |                     'presented to the user by a PDF viewer')
1665 | 
1666 |     viewerargs.add_argument(
1667 |         '--viewer-panes', metavar="PANES", type=parse_panes,
1668 |         help='Instruct the PDF viewer which side panes to show. Valid values '
1669 |              'are "outlines" and "thumbs". It is not possible to specify both '
1670 |              'at the same time.')
1671 |     viewerargs.add_argument(
1672 |         '--viewer-initial-page', metavar="NUM", type=int,
1673 |         help='Instead of showing the first page, instruct the PDF viewer to '
1674 |              'show the given page instead. Page numbers start with 1.')
1675 |     viewerargs.add_argument(
1676 |         '--viewer-magnification', metavar="MAG", type=parse_magnification,
1677 |         help='Instruct the PDF viewer to open the PDF with a certain zoom '
1678 |              'level. Valid values are either a floating point number giving '
1679 |              'the exact zoom level, "fit" (zoom to fit whole page), "fith" '
1680 |              '(zoom to fit page width) and "fitbh" (zoom to fit visible page '
1681 |              'width).')
1682 |     viewerargs.add_argument(
1683 |         '--viewer-page-layout', metavar="LAYOUT", type=parse_layout,
1684 |         help='Instruct the PDF viewer how to arrange the pages on the screen. '
1685 |              'Valid values are "single" (display single pages), "onecolumn" '
1686 |              '(one continuous column), "twocolumnright" (two continuous '
1687 |              'columns with odd number pages on the right) and "twocolumnleft" '
1688 |              '(two continuous columns with odd numbered pages on the left)')
1689 |     viewerargs.add_argument(
1690 |         '--viewer-fit-window', action="store_true",
1691 |         help='Instruct the PDF viewer to resize the window to fit the page '
1692 |              'size')
1693 |     viewerargs.add_argument(
1694 |         '--viewer-center-window', action="store_true",
1695 |         help='Instruct the PDF viewer to center the PDF viewer window')
1696 |     viewerargs.add_argument(
1697 |         '--viewer-fullscreen', action="store_true",
1698 |         help='Instruct the PDF viewer to open the PDF in fullscreen mode')
1699 | 
1700 |     args = parser.parse_args()
1701 | 
1702 |     if args.verbose:
1703 |         logging.basicConfig(level=logging.DEBUG)
1704 | 
1705 |     layout_fun = get_layout_fun(args.pagesize, args.imgsize, args.border,
1706 |                                 args.fit, args.auto_orient)
1707 | 
1708 |     # if no positional arguments were supplied, read a single image from
1709 |     # standard input
1710 |     if len(args.images) == 0:
1711 |         logging.info("reading image from standard input")
1712 |         try:
1713 |             args.images = [sys.stdin.buffer.read()]
1714 |         except KeyboardInterrupt:
1715 |             exit(0)
1716 | 
1717 |     # with the number of pages being equal to the number of images, the
1718 |     # value passed to --viewer-initial-page must be between 1 and that number
1719 |     if args.viewer_initial_page is not None:
1720 |         if args.viewer_initial_page < 1:
1721 |             parser.print_usage(file=sys.stderr)
1722 |             logging.error("%s: error: argument --viewer-initial-page: must be "
1723 |                           "greater than zero" % parser.prog)
1724 |             exit(2)
1725 |         if args.viewer_initial_page > len(args.images):
1726 |             parser.print_usage(file=sys.stderr)
1727 |             logging.error("%s: error: argument --viewer-initial-page: must be "
1728 |                           "less than or equal to the total number of pages" %
1729 |                           parser.prog)
1730 |             exit(2)
1731 | 
1732 |     try:
1733 |         convert(
1734 |             *args.images, title=args.title, author=args.author,
1735 |             creator=args.creator, producer=args.producer,
1736 |             creationdate=args.creationdate, moddate=args.moddate,
1737 |             subject=args.subject, keywords=args.keywords,
1738 |             colorspace=args.colorspace, nodate=args.nodate,
1739 |             layout_fun=layout_fun, viewer_panes=args.viewer_panes,
1740 |             viewer_initial_page=args.viewer_initial_page,
1741 |             viewer_magnification=args.viewer_magnification,
1742 |             viewer_page_layout=args.viewer_page_layout,
1743 |             viewer_fit_window=args.viewer_fit_window,
1744 |             viewer_center_window=args.viewer_center_window,
1745 |             viewer_fullscreen=args.viewer_fullscreen, with_pdfrw=not
1746 |             args.without_pdfrw, outputstream=args.output,
1747 |             first_frame_only=args.first_frame_only)
1748 |     except Exception as e:
1749 |         logging.error("error: " + str(e))
1750 |         if logging.getLogger().isEnabledFor(logging.DEBUG):
1751 |             import traceback
1752 |             traceback.print_exc(file=sys.stderr)
1753 |         exit(1)
1754 | 
1755 | 
1756 | if __name__ == '__main__':
1757 |     main()
1758 | 


--------------------------------------------------------------------------------
/src/jp2.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | # Copyright (C) 2013 Johannes 'josch' Schauer <j.schauer at email.de>
  4 | #
  5 | # this module is heavily based upon jpylyzer which is
  6 | # KB / National Library of the Netherlands, Open Planets Foundation
  7 | # and released under the same license conditions
  8 | #
  9 | # This program is free software: you can redistribute it and/or modify
 10 | # it under the terms of the GNU Lesser General Public License as published by
 11 | # the Free Software Foundation, either version 3 of the License, or
 12 | # (at your option) any later version.
 13 | #
 14 | # This program is distributed in the hope that it will be useful,
 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 17 | # GNU Lesser General Public License for more details.
 18 | #
 19 | # You should have received a copy of the GNU Lesser General Public License
 20 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 21 | 
 22 | import struct
 23 | 
 24 | 
 25 | def getBox(data, byteStart, noBytes):
 26 |     boxLengthValue = struct.unpack(">I", data[byteStart:byteStart+4])[0]
 27 |     boxType = data[byteStart+4:byteStart+8]
 28 |     contentsStartOffset = 8
 29 |     if boxLengthValue == 1:
 30 |         boxLengthValue = struct.unpack(">Q", data[byteStart+8:byteStart+16])[0]
 31 |         contentsStartOffset = 16
 32 |     if boxLengthValue == 0:
 33 |         boxLengthValue = noBytes-byteStart
 34 |     byteEnd = byteStart + boxLengthValue
 35 |     boxContents = data[byteStart+contentsStartOffset:byteEnd]
 36 |     return (boxLengthValue, boxType, byteEnd, boxContents)
 37 | 
 38 | 
 39 | def parse_ihdr(data):
 40 |     height = struct.unpack(">I", data[0:4])[0]
 41 |     width = struct.unpack(">I", data[4:8])[0]
 42 |     return width, height
 43 | 
 44 | 
 45 | def parse_colr(data):
 46 |     meth = struct.unpack(">B", data[0:1])[0]
 47 |     if meth != 1:
 48 |         raise Exception("only enumerated color method supported")
 49 |     enumCS = struct.unpack(">I", data[3:])[0]
 50 |     if enumCS == 16:
 51 |         return "RGB"
 52 |     elif enumCS == 17:
 53 |         return "L"
 54 |     else:
 55 |         raise Exception("only sRGB and greyscale color space is supported, "
 56 |                         "got %d" % enumCS)
 57 | 
 58 | 
 59 | def parse_resc(data):
 60 |     hnum, hden, vnum, vden, hexp, vexp = struct.unpack(">HHHHBB", data)
 61 |     hdpi = ((hnum/hden) * (10**hexp) * 100)/2.54
 62 |     vdpi = ((vnum/vden) * (10**vexp) * 100)/2.54
 63 |     return hdpi, vdpi
 64 | 
 65 | 
 66 | def parse_res(data):
 67 |     hdpi, vdpi = None, None
 68 |     noBytes = len(data)
 69 |     byteStart = 0
 70 |     boxLengthValue = 1  # dummy value for while loop condition
 71 |     while byteStart < noBytes and boxLengthValue != 0:
 72 |         boxLengthValue, boxType, byteEnd, boxContents = \
 73 |             getBox(data, byteStart, noBytes)
 74 |         if boxType == b'resc':
 75 |             hdpi, vdpi = parse_resc(boxContents)
 76 |             break
 77 |     return hdpi, vdpi
 78 | 
 79 | 
 80 | def parse_jp2h(data):
 81 |     width, height, colorspace, hdpi, vdpi = None, None, None, None, None
 82 |     noBytes = len(data)
 83 |     byteStart = 0
 84 |     boxLengthValue = 1  # dummy value for while loop condition
 85 |     while byteStart < noBytes and boxLengthValue != 0:
 86 |         boxLengthValue, boxType, byteEnd, boxContents = \
 87 |             getBox(data, byteStart, noBytes)
 88 |         if boxType == b'ihdr':
 89 |             width, height = parse_ihdr(boxContents)
 90 |         elif boxType == b'colr':
 91 |             colorspace = parse_colr(boxContents)
 92 |         elif boxType == b'res ':
 93 |             hdpi, vdpi = parse_res(boxContents)
 94 |         byteStart = byteEnd
 95 |     return (width, height, colorspace, hdpi, vdpi)
 96 | 
 97 | 
 98 | def parsejp2(data):
 99 |     noBytes = len(data)
100 |     byteStart = 0
101 |     boxLengthValue = 1  # dummy value for while loop condition
102 |     width, height, colorspace, hdpi, vdpi = None, None, None, None, None
103 |     while byteStart < noBytes and boxLengthValue != 0:
104 |         boxLengthValue, boxType, byteEnd, boxContents = \
105 |             getBox(data, byteStart, noBytes)
106 |         if boxType == b'jp2h':
107 |             width, height, colorspace, hdpi, vdpi = parse_jp2h(boxContents)
108 |             break
109 |         byteStart = byteEnd
110 |     if not width:
111 |         raise Exception("no width in jp2 header")
112 |     if not height:
113 |         raise Exception("no height in jp2 header")
114 |     if not colorspace:
115 |         raise Exception("no colorspace in jp2 header")
116 |     # retrieving the dpi is optional so we do not error out if not present
117 |     return (width, height, colorspace, hdpi, vdpi)
118 | 
119 | 
120 | if __name__ == "__main__":
121 |     import sys
122 |     width, height, colorspace = parsejp2(open(sys.argv[1]).read())
123 |     sys.stdout.write("width = %d" % width)
124 |     sys.stdout.write("height = %d" % height)
125 |     sys.stdout.write("colorspace = %s" % colorspace)
126 | 


--------------------------------------------------------------------------------
/src/tests/__init__.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | 
  3 | import img2pdf
  4 | import os
  5 | import struct
  6 | import sys
  7 | import zlib
  8 | from PIL import Image
  9 | from io import StringIO, BytesIO
 10 | 
 11 | HERE = os.path.dirname(__file__)
 12 | 
 13 | PY3 = sys.version_info[0] >= 3
 14 | 
 15 | if PY3:
 16 |     PdfReaderIO = StringIO
 17 | else:
 18 |     PdfReaderIO = BytesIO
 19 | 
 20 | 
 21 | # convert +set date:create +set date:modify -define png:exclude-chunk=time
 22 | 
 23 | # we define some variables so that the table below can be narrower
 24 | psl = (972, 504)     # --pagesize landscape
 25 | psp = (504, 972)     # --pagesize portrait
 26 | isl = (756, 324)     # --imgsize landscape
 27 | isp = (324, 756)     # --imgsize portrait
 28 | border = (162, 270)  # --border
 29 | # there is no need to have test cases with the same images with inverted
 30 | # orientation (landscape/portrait) because --pagesize and --imgsize are
 31 | # already inverted
 32 | im1 = (864, 288)     # imgpx #1 => 648x216
 33 | im2 = (1152, 576)    # imgpx #2 => 864x432
 34 | # shortcuts for fit modes
 35 | f_into = img2pdf.FitMode.into
 36 | f_fill = img2pdf.FitMode.fill
 37 | f_exact = img2pdf.FitMode.exact
 38 | f_shrink = img2pdf.FitMode.shrink
 39 | f_enlarge = img2pdf.FitMode.enlarge
 40 | layout_test_cases = [
 41 |     # psp=972x504, psl=504x972, isl=756x324, isp=324x756, border=162:270
 42 |     # --pagesize   --border           -a pagepdf      imgpdf
 43 |     #        --imgsize     --fit
 44 |     (None, None, None,   f_into,    0, (648, 216),  (648, 216),    # 000
 45 |                                        (864, 432),  (864, 432)),
 46 |     (None, None, None,   f_into,    1, (648, 216),  (648, 216),    # 001
 47 |                                        (864, 432),  (864, 432)),
 48 |     (None, None, None,   f_fill,    0, (648, 216),  (648, 216),    # 002
 49 |                                        (864, 432),  (864, 432)),
 50 |     (None, None, None,   f_fill,    1, (648, 216),  (648, 216),    # 003
 51 |                                        (864, 432),  (864, 432)),
 52 |     (None, None, None,   f_exact,   0, (648, 216),  (648, 216),    # 004
 53 |                                        (864, 432),  (864, 432)),
 54 |     (None, None, None,   f_exact,   1, (648, 216),  (648, 216),    # 005
 55 |                                        (864, 432),  (864, 432)),
 56 |     (None, None, None,   f_shrink,  0, (648, 216),  (648, 216),    # 006
 57 |                                        (864, 432),  (864, 432)),
 58 |     (None, None, None,   f_shrink,  1, (648, 216),  (648, 216),    # 007
 59 |                                        (864, 432),  (864, 432)),
 60 |     (None, None, None,   f_enlarge, 0, (648, 216),  (648, 216),    # 008
 61 |                                        (864, 432),  (864, 432)),
 62 |     (None, None, None,   f_enlarge, 1, (648, 216),  (648, 216),    # 009
 63 |                                        (864, 432),  (864, 432)),
 64 |     (None, None, border, f_into,    0, (1188, 540), (648, 216),    # 010
 65 |                                        (1404, 756), (864, 432)),
 66 |     (None, None, border, f_into,    1, (1188, 540), (648, 216),    # 011
 67 |                                        (1404, 756), (864, 432)),
 68 |     (None, None, border, f_fill,    0, (1188, 540), (648, 216),    # 012
 69 |                                        (1404, 756), (864, 432)),
 70 |     (None, None, border, f_fill,    1, (1188, 540), (648, 216),    # 013
 71 |                                        (1404, 756), (864, 432)),
 72 |     (None, None, border, f_exact,   0, (1188, 540), (648, 216),    # 014
 73 |                                        (1404, 756), (864, 432)),
 74 |     (None, None, border, f_exact,   1, (1188, 540), (648, 216),    # 015
 75 |                                        (1404, 756), (864, 432)),
 76 |     (None, None, border, f_shrink,  0, (1188, 540), (648, 216),    # 016
 77 |                                        (1404, 756), (864, 432)),
 78 |     (None, None, border, f_shrink,  1, (1188, 540), (648, 216),    # 017
 79 |                                        (1404, 756), (864, 432)),
 80 |     (None, None, border, f_enlarge, 0, (1188, 540), (648, 216),    # 018
 81 |                                        (1404, 756), (864, 432)),
 82 |     (None, None, border, f_enlarge, 1, (1188, 540), (648, 216),    # 019
 83 |                                        (1404, 756), (864, 432)),
 84 |     (None, isp,  None,   f_into,    0, (324, 108),  (324, 108),    # 020
 85 |                                        (324, 162),  (324, 162)),
 86 |     (None, isp,  None,   f_into,    1, (324, 108),  (324, 108),    # 021
 87 |                                        (324, 162),  (324, 162)),
 88 |     (None, isp,  None,   f_fill,    0, (2268, 756), (2268, 756),   # 022
 89 |                                        (1512, 756), (1512, 756)),
 90 |     (None, isp,  None,   f_fill,    1, (2268, 756), (2268, 756),   # 023
 91 |                                        (1512, 756), (1512, 756)),
 92 |     (None, isp,  None,   f_exact,   0, (324, 756),  (324, 756),    # 024
 93 |                                        (324, 756),  (324, 756)),
 94 |     (None, isp,  None,   f_exact,   1, (324, 756),  (324, 756),    # 025
 95 |                                        (324, 756),  (324, 756)),
 96 |     (None, isp,  None,   f_shrink,  0, (324, 108),  (324, 108),    # 026
 97 |                                        (324, 162),  (324, 162)),
 98 |     (None, isp,  None,   f_shrink,  1, (324, 108),  (324, 108),    # 027
 99 |                                        (324, 162),  (324, 162)),
100 |     (None, isp,  None,   f_enlarge, 0, (648, 216),  (648, 216),    # 028
101 |                                        (864, 432),  (864, 432)),
102 |     (None, isp,  None,   f_enlarge, 1, (648, 216),  (648, 216),    # 029
103 |                                        (864, 432),  (864, 432)),
104 |     (None, isp,  border, f_into,    0, (864, 432),  (324, 108),    # 030
105 |                                        (864, 486),  (324, 162)),
106 |     (None, isp,  border, f_into,    1, (864, 432),  (324, 108),    # 031
107 |                                        (864, 486),  (324, 162)),
108 |     (None, isp,  border, f_fill,    0, (2808, 1080), (2268, 756),  # 032
109 |                                        (2052, 1080), (1512, 756)),
110 |     (None, isp,  border, f_fill,    1, (2808, 1080), (2268, 756),  # 033
111 |                                        (2052, 1080), (1512, 756)),
112 |     (None, isp,  border, f_exact,   0, (864, 1080), (324, 756),    # 034
113 |                                        (864, 1080), (324, 756)),
114 |     (None, isp,  border, f_exact,   1, (864, 1080), (324, 756),    # 035
115 |                                        (864, 1080), (324, 756)),
116 |     (None, isp,  border, f_shrink,  0, (864, 432),  (324, 108),    # 036
117 |                                        (864, 486),  (324, 162)),
118 |     (None, isp,  border, f_shrink,  1, (864, 432),  (324, 108),    # 037
119 |                                        (864, 486),  (324, 162)),
120 |     (None, isp,  border, f_enlarge, 0, (1188, 540), (648, 216),    # 038
121 |                                        (1404, 756), (864, 432)),
122 |     (None, isp,  border, f_enlarge, 1, (1188, 540), (648, 216),    # 039
123 |                                        (1404, 756), (864, 432)),
124 |     (None, isl,  None,   f_into,    0, (756, 252),  (756, 252),    # 040
125 |                                        (648, 324),  (648, 324)),
126 |     (None, isl,  None,   f_into,    1, (756, 252),  (756, 252),    # 041
127 |                                        (648, 324),  (648, 324)),
128 |     (None, isl,  None,   f_fill,    0, (972, 324),  (972, 324),    # 042
129 |                                        (756, 378),  (756, 378)),
130 |     (None, isl,  None,   f_fill,    1, (972, 324),  (972, 324),    # 043
131 |                                        (756, 378),  (756, 378)),
132 |     (None, isl,  None,   f_exact,   0, (756, 324),  (756, 324),    # 044
133 |                                        (756, 324),  (756, 324)),
134 |     (None, isl,  None,   f_exact,   1, (756, 324),  (756, 324),    # 045
135 |                                        (756, 324),  (756, 324)),
136 |     (None, isl,  None,   f_shrink,  0, (648, 216),  (648, 216),    # 046
137 |                                        (648, 324),  (648, 324)),
138 |     (None, isl,  None,   f_shrink,  1, (648, 216),  (648, 216),    # 047
139 |                                        (648, 324),  (648, 324)),
140 |     (None, isl,  None,   f_enlarge, 0, (756, 252),  (756, 252),    # 048
141 |                                        (864, 432),  (864, 432)),
142 |     (None, isl,  None,   f_enlarge, 1, (756, 252),  (756, 252),    # 049
143 |                                        (864, 432),  (864, 432)),
144 |     # psp=972x504, psp=504x972, isl=756x324, isp=324x756, border=162:270
145 |     # --pagesize   --border           -a      pagepdf     imgpdf
146 |     #        --imgsize     --fit         imgpx
147 |     (None, isl,  border, f_into,    0, (1296, 576), (756, 252),    # 050
148 |                                        (1188, 648), (648, 324)),
149 |     (None, isl,  border, f_into,    1, (1296, 576), (756, 252),    # 051
150 |                                        (1188, 648), (648, 324)),
151 |     (None, isl,  border, f_fill,    0, (1512, 648), (972, 324),    # 052
152 |                                        (1296, 702), (756, 378)),
153 |     (None, isl,  border, f_fill,    1, (1512, 648), (972, 324),    # 053
154 |                                        (1296, 702), (756, 378)),
155 |     (None, isl,  border, f_exact,   0, (1296, 648), (756, 324),    # 054
156 |                                        (1296, 648), (756, 324)),
157 |     (None, isl,  border, f_exact,   1, (1296, 648), (756, 324),    # 055
158 |                                        (1296, 648), (756, 324)),
159 |     (None, isl,  border, f_shrink,  0, (1188, 540), (648, 216),    # 056
160 |                                        (1188, 648), (648, 324)),
161 |     (None, isl,  border, f_shrink,  1, (1188, 540), (648, 216),    # 057
162 |                                        (1188, 648), (648, 324)),
163 |     (None, isl,  border, f_enlarge, 0, (1296, 576), (756, 252),    # 058
164 |                                        (1404, 756), (864, 432)),
165 |     (None, isl,  border, f_enlarge, 1, (1296, 576), (756, 252),    # 059
166 |                                        (1404, 756), (864, 432)),
167 |     (psp,  None, None,   f_into,    0, (504, 972),  (504, 168),    # 060
168 |                                        (504, 972),  (504, 252)),
169 |     (psp,  None, None,   f_into,    1, (972, 504),  (972, 324),    # 061
170 |                                        (972, 504),  (972, 486)),
171 |     (psp,  None, None,   f_fill,    0, (504, 972),  (2916, 972),   # 062
172 |                                        (504, 972),  (1944, 972)),
173 |     (psp,  None, None,   f_fill,    1, (972, 504),  (1512, 504),   # 063
174 |                                        (972, 504),  (1008, 504)),
175 |     (psp,  None, None,   f_exact,   0, (504, 972),  (504, 972),    # 064
176 |                                        (504, 972),  (504, 972)),
177 |     (psp,  None, None,   f_exact,   1, (972, 504),  (972, 504),    # 065
178 |                                        (972, 504),  (972, 504)),
179 |     (psp,  None, None,   f_shrink,  0, (504, 972),  (504, 168),    # 066
180 |                                        (504, 972),  (504, 252)),
181 |     (psp,  None, None,   f_shrink,  1, (972, 504),  (648, 216),    # 067
182 |                                        (972, 504),  (864, 432)),
183 |     (psp,  None, None,   f_enlarge, 0, (504, 972),  (648, 216),    # 068
184 |                                        (504, 972),  (864, 432)),
185 |     (psp,  None, None,   f_enlarge, 1, (972, 504),  (972, 324),    # 069
186 |                                        (972, 504),  (972, 486)),
187 |     (psp,  None, border, f_into,    0, None,  None, None,  None),  # 070
188 |     (psp,  None, border, f_into,    1, None,  None, None,  None),  # 071
189 |     (psp,  None, border, f_fill,    0, (504, 972),  (1944, 648),   # 072
190 |                                        (504, 972),  (1296, 648)),
191 |     (psp,  None, border, f_fill,    1, (972, 504),  (648, 216),    # 073
192 |                                        (972, 504),  (648, 324)),
193 |     (psp,  None, border, f_exact,   0, None,  None, None,  None),  # 074
194 |     (psp,  None, border, f_exact,   1, None,  None, None,  None),  # 075
195 |     (psp,  None, border, f_shrink,  0, None,  None, None,  None),  # 076
196 |     (psp,  None, border, f_shrink,  1, None,  None, None,  None),  # 077
197 |     (psp,  None, border, f_enlarge, 0, (504, 972),  (648, 216),    # 078
198 |                                        (504, 972),  (864, 432)),
199 |     (psp,  None, border, f_enlarge, 1, (972, 504),  (648, 216),    # 079
200 |                                        (972, 504),  (864, 432)),
201 |     (psp,  isp,  None,   f_into,    0, (504, 972),  (324, 108),    # 080
202 |                                        (504, 972),  (324, 162)),
203 |     (psp,  isp,  None,   f_into,    1, (972, 504),  (324, 108),    # 081
204 |                                        (972, 504),  (324, 162)),
205 |     (psp,  isp,  None,   f_fill,    0, (504, 972),  (2268, 756),   # 082
206 |                                        (504, 972),  (1512, 756)),
207 |     (psp,  isp,  None,   f_fill,    1, (972, 504),  (2268, 756),   # 083
208 |                                        (972, 504),  (1512, 756)),
209 |     (psp,  isp,  None,   f_exact,   0, (504, 972),  (324, 756),    # 084
210 |                                        (504, 972),  (324, 756)),
211 |     (psp,  isp,  None,   f_exact,   1, (972, 504),  (324, 756),    # 085
212 |                                        (972, 504),  (324, 756)),
213 |     (psp,  isp,  None,   f_shrink,  0, (504, 972),  (324, 108),    # 086
214 |                                        (504, 972),  (324, 162)),
215 |     (psp,  isp,  None,   f_shrink,  1, (972, 504),  (324, 108),    # 087
216 |                                        (972, 504),  (324, 162)),
217 |     (psp,  isp,  None,   f_enlarge, 0, (504, 972),  (648, 216),    # 088
218 |                                        (504, 972),  (864, 432)),
219 |     (psp,  isp,  None,   f_enlarge, 1, (972, 504),  (648, 216),    # 089
220 |                                        (972, 504),  (864, 432)),
221 |     (psp,  isp,  border, f_into,    0, (504, 972),  (324, 108),    # 090
222 |                                        (504, 972),  (324, 162)),
223 |     (psp,  isp,  border, f_into,    1, (972, 504),  (324, 108),    # 091
224 |                                        (972, 504),  (324, 162)),
225 |     (psp,  isp,  border, f_fill,    0, (504, 972),  (2268, 756),   # 092
226 |                                        (504, 972),  (1512, 756)),
227 |     (psp,  isp,  border, f_fill,    1, (972, 504),  (2268, 756),   # 093
228 |                                        (972, 504),  (1512, 756)),
229 |     (psp,  isp,  border, f_exact,   0, (504, 972),  (324, 756),    # 094
230 |                                        (504, 972),  (324, 756)),
231 |     (psp,  isp,  border, f_exact,   1, (972, 504),  (324, 756),    # 095
232 |                                        (972, 504),  (324, 756)),
233 |     (psp,  isp,  border, f_shrink,  0, (504, 972),  (324, 108),    # 096
234 |                                        (504, 972),  (324, 162)),
235 |     (psp,  isp,  border, f_shrink,  1, (972, 504),  (324, 108),    # 097
236 |                                        (972, 504),  (324, 162)),
237 |     (psp,  isp,  border, f_enlarge, 0, (504, 972),  (648, 216),    # 098
238 |                                        (504, 972),  (864, 432)),
239 |     (psp,  isp,  border, f_enlarge, 1, (972, 504),  (648, 216),    # 099
240 |                                        (972, 504),  (864, 432)),
241 |     # psp=972x504, psp=504x972, isl=756x324, isp=324x756, border=162:270
242 |     # --pagesize   --border           -a      pagepdf    imgpdf
243 |     #        --imgsize     --fit         imgpx
244 |     (psp,  isl,  None,   f_into,    0, (504, 972),  (756, 252),    # 100
245 |                                        (504, 972),  (648, 324)),
246 |     (psp,  isl,  None,   f_into,    1, (972, 504),  (756, 252),    # 101
247 |                                        (972, 504),  (648, 324)),
248 |     (psp,  isl,  None,   f_fill,    0, (504, 972),  (972, 324),    # 102
249 |                                        (504, 972),  (756, 378)),
250 |     (psp,  isl,  None,   f_fill,    1, (972, 504),  (972, 324),    # 103
251 |                                        (972, 504),  (756, 378)),
252 |     (psp,  isl,  None,   f_exact,   0, (504, 972),  (756, 324),    # 104
253 |                                        (504, 972),  (756, 324)),
254 |     (psp,  isl,  None,   f_exact,   1, (972, 504),  (756, 324),    # 105
255 |                                        (972, 504),  (756, 324)),
256 |     (psp,  isl,  None,   f_shrink,  0, (504, 972),  (648, 216),    # 106
257 |                                        (504, 972),  (648, 324)),
258 |     (psp,  isl,  None,   f_shrink,  1, (972, 504),  (648, 216),    # 107
259 |                                        (972, 504),  (648, 324)),
260 |     (psp,  isl,  None,   f_enlarge, 0, (504, 972),  (756, 252),    # 108
261 |                                        (504, 972),  (864, 432)),
262 |     (psp,  isl,  None,   f_enlarge, 1, (972, 504),  (756, 252),    # 109
263 |                                        (972, 504),  (864, 432)),
264 |     (psp,  isl,  border, f_into,    0, (504, 972),  (756, 252),    # 110
265 |                                        (504, 972),  (648, 324)),
266 |     (psp,  isl,  border, f_into,    1, (972, 504),  (756, 252),    # 111
267 |                                        (972, 504),  (648, 324)),
268 |     (psp,  isl,  border, f_fill,    0, (504, 972),  (972, 324),    # 112
269 |                                        (504, 972),  (756, 378)),
270 |     (psp,  isl,  border, f_fill,    1, (972, 504),  (972, 324),    # 113
271 |                                        (972, 504),  (756, 378)),
272 |     (psp,  isl,  border, f_exact,   0, (504, 972),  (756, 324),    # 114
273 |                                        (504, 972),  (756, 324)),
274 |     (psp,  isl,  border, f_exact,   1, (972, 504),  (756, 324),    # 115
275 |                                        (972, 504),  (756, 324)),
276 |     (psp,  isl,  border, f_shrink,  0, (504, 972),  (648, 216),    # 116
277 |                                        (504, 972),  (648, 324)),
278 |     (psp,  isl,  border, f_shrink,  1, (972, 504),  (648, 216),    # 117
279 |                                        (972, 504),  (648, 324)),
280 |     (psp,  isl,  border, f_enlarge, 0, (504, 972),  (756, 252),    # 118
281 |                                        (504, 972),  (864, 432)),
282 |     (psp,  isl,  border, f_enlarge, 1, (972, 504),  (756, 252),    # 119
283 |                                        (972, 504),  (864, 432)),
284 |     (psl,  None, None,   f_into,    0, (972, 504),  (972, 324),    # 120
285 |                                        (972, 504),  (972, 486)),
286 |     (psl,  None, None,   f_into,    1, (972, 504),  (972, 324),    # 121
287 |                                        (972, 504),  (972, 486)),
288 |     (psl,  None, None,   f_fill,    0, (972, 504),  (1512, 504),   # 122
289 |                                        (972, 504),  (1008, 504)),
290 |     (psl,  None, None,   f_fill,    1, (972, 504),  (1512, 504),   # 123
291 |                                        (972, 504),  (1008, 504)),
292 |     (psl,  None, None,   f_exact,   0, (972, 504),  (972, 504),    # 124
293 |                                        (972, 504),  (972, 504)),
294 |     (psl,  None, None,   f_exact,   1, (972, 504),  (972, 504),    # 125
295 |                                        (972, 504),  (972, 504)),
296 |     (psl,  None, None,   f_shrink,  0, (972, 504),  (648, 216),    # 126
297 |                                        (972, 504),  (864, 432)),
298 |     (psl,  None, None,   f_shrink,  1, (972, 504),  (648, 216),    # 127
299 |                                        (972, 504),  (864, 432)),
300 |     (psl,  None, None,   f_enlarge, 0, (972, 504),  (972, 324),    # 128
301 |                                        (972, 504),  (972, 486)),
302 |     (psl,  None, None,   f_enlarge, 1, (972, 504),  (972, 324),    # 129
303 |                                        (972, 504),  (972, 486)),
304 |     (psl,  None, border, f_into,    0, (972, 504),  (432, 144),    # 130
305 |                                        (972, 504),  (360, 180)),
306 |     (psl,  None, border, f_into,    1, (972, 504),  (432, 144),    # 131
307 |                                        (972, 504),  (360, 180)),
308 |     (psl,  None, border, f_fill,    0, (972, 504),  (540, 180),    # 132
309 |                                        (972, 504),  (432, 216)),
310 |     (psl,  None, border, f_fill,    1, (972, 504),  (540, 180),    # 133
311 |                                        (972, 504),  (432, 216)),
312 |     (psl,  None, border, f_exact,   0, (972, 504),  (432, 180),    # 134
313 |                                        (972, 504),  (432, 180)),
314 |     (psl,  None, border, f_exact,   1, (972, 504),  (432, 180),    # 135
315 |                                        (972, 504),  (432, 180)),
316 |     (psl,  None, border, f_shrink,  0, (972, 504),  (432, 144),    # 136
317 |                                        (972, 504),  (360, 180)),
318 |     (psl,  None, border, f_shrink,  1, (972, 504),  (432, 144),    # 137
319 |                                        (972, 504),  (360, 180)),
320 |     (psl,  None, border, f_enlarge, 0, (972, 504),  (648, 216),    # 138
321 |                                        (972, 504),  (864, 432)),
322 |     (psl,  None, border, f_enlarge, 1, (972, 504),  (648, 216),    # 139
323 |                                        (972, 504),  (864, 432)),
324 |     (psl,  isp,  None,   f_into,    0, (972, 504),  (324, 108),    # 140
325 |                                        (972, 504),  (324, 162)),
326 |     (psl,  isp,  None,   f_into,    1, (972, 504),  (324, 108),    # 141
327 |                                        (972, 504),  (324, 162)),
328 |     (psl,  isp,  None,   f_fill,    0, (972, 504),  (2268, 756),   # 142
329 |                                        (972, 504),  (1512, 756)),
330 |     (psl,  isp,  None,   f_fill,    1, (972, 504),  (2268, 756),   # 143
331 |                                        (972, 504),  (1512, 756)),
332 |     (psl,  isp,  None,   f_exact,   0, (972, 504),  (324, 756),    # 144
333 |                                        (972, 504),  (324, 756)),
334 |     (psl,  isp,  None,   f_exact,   1, (972, 504),  (324, 756),    # 145
335 |                                        (972, 504),  (324, 756)),
336 |     (psl,  isp,  None,   f_shrink,  0, (972, 504),  (324, 108),    # 146
337 |                                        (972, 504),  (324, 162)),
338 |     (psl,  isp,  None,   f_shrink,  1, (972, 504),  (324, 108),    # 147
339 |                                        (972, 504),  (324, 162)),
340 |     (psl,  isp,  None,   f_enlarge, 0, (972, 504),  (648, 216),    # 148
341 |                                        (972, 504),  (864, 432)),
342 |     (psl,  isp,  None,   f_enlarge, 1, (972, 504),  (648, 216),    # 149
343 |                                        (972, 504),  (864, 432)),
344 |     # psp=972x504, psl=504x972, isl=756x324, isp=324x756, border=162:270
345 |     # --pagesize   --border           -a      pagepdf     imgpdf
346 |     #        --imgsize     --fit         imgpx
347 |     (psl,  isp,  border, f_into,    0, (972, 504),  (324, 108),    # 150
348 |                                        (972, 504),  (324, 162)),
349 |     (psl,  isp,  border, f_into,    1, (972, 504),  (324, 108),    # 151
350 |                                        (972, 504),  (324, 162)),
351 |     (psl,  isp,  border, f_fill,    0, (972, 504),  (2268, 756),   # 152
352 |                                        (972, 504),  (1512, 756)),
353 |     (psl,  isp,  border, f_fill,    1, (972, 504),  (2268, 756),   # 153
354 |                                        (972, 504),  (1512, 756)),
355 |     (psl,  isp,  border, f_exact,   0, (972, 504),  (324, 756),    # 154
356 |                                        (972, 504),  (324, 756)),
357 |     (psl,  isp,  border, f_exact,   1, (972, 504),  (324, 756),    # 155
358 |                                        (972, 504),  (324, 756)),
359 |     (psl,  isp,  border, f_shrink,  0, (972, 504),  (324, 108),    # 156
360 |                                        (972, 504),  (324, 162)),
361 |     (psl,  isp,  border, f_shrink,  1, (972, 504),  (324, 108),    # 157
362 |                                        (972, 504),  (324, 162)),
363 |     (psl,  isp,  border, f_enlarge, 0, (972, 504),  (648, 216),    # 158
364 |                                        (972, 504),  (864, 432)),
365 |     (psl,  isp,  border, f_enlarge, 1, (972, 504),  (648, 216),    # 159
366 |                                        (972, 504),  (864, 432)),
367 |     (psl,  isl,  None,   f_into,    0, (972, 504),  (756, 252),    # 160
368 |                                        (972, 504),  (648, 324)),
369 |     (psl,  isl,  None,   f_into,    1, (972, 504),  (756, 252),    # 161
370 |                                        (972, 504),  (648, 324)),
371 |     (psl,  isl,  None,   f_fill,    0, (972, 504),  (972, 324),    # 162
372 |                                        (972, 504),  (756, 378)),
373 |     (psl,  isl,  None,   f_fill,    1, (972, 504),  (972, 324),    # 163
374 |                                        (972, 504),  (756, 378)),
375 |     (psl,  isl,  None,   f_exact,   0, (972, 504),  (756, 324),    # 164
376 |                                        (972, 504),  (756, 324)),
377 |     (psl,  isl,  None,   f_exact,   1, (972, 504),  (756, 324),    # 165
378 |                                        (972, 504),  (756, 324)),
379 |     (psl,  isl,  None,   f_shrink,  0, (972, 504),  (648, 216),    # 166
380 |                                        (972, 504),  (648, 324)),
381 |     (psl,  isl,  None,   f_shrink,  1, (972, 504),  (648, 216),    # 167
382 |                                        (972, 504),  (648, 324)),
383 |     (psl,  isl,  None,   f_enlarge, 0, (972, 504),  (756, 252),    # 168
384 |                                        (972, 504),  (864, 432)),
385 |     (psl,  isl,  None,   f_enlarge, 1, (972, 504),  (756, 252),    # 169
386 |                                        (972, 504),  (864, 432)),
387 |     (psl,  isl,  border, f_into,    0, (972, 504),  (756, 252),    # 170
388 |                                        (972, 504),  (648, 324)),
389 |     (psl,  isl,  border, f_into,    1, (972, 504),  (756, 252),    # 171
390 |                                        (972, 504),  (648, 324)),
391 |     (psl,  isl,  border, f_fill,    0, (972, 504),  (972, 324),    # 172
392 |                                        (972, 504),  (756, 378)),
393 |     (psl,  isl,  border, f_fill,    1, (972, 504),  (972, 324),    # 173
394 |                                        (972, 504),  (756, 378)),
395 |     (psl,  isl,  border, f_exact,   0, (972, 504),  (756, 324),    # 174
396 |                                        (972, 504),  (756, 324)),
397 |     (psl,  isl,  border, f_exact,   1, (972, 504),  (756, 324),    # 175
398 |                                        (972, 504),  (756, 324)),
399 |     (psl,  isl,  border, f_shrink,  0, (972, 504),  (648, 216),    # 176
400 |                                        (972, 504),  (648, 324)),
401 |     (psl,  isl,  border, f_shrink,  1, (972, 504),  (648, 216),    # 177
402 |                                        (972, 504),  (648, 324)),
403 |     (psl,  isl,  border, f_enlarge, 0, (972, 504),  (756, 252),    # 178
404 |                                        (972, 504),  (864, 432)),
405 |     (psl,  isl,  border, f_enlarge, 1, (972, 504),  (756, 252),    # 179
406 |                                        (972, 504),  (864, 432)),
407 | ]
408 | 
409 | 
410 | def tiff_header_for_ccitt(width, height, img_size, ccitt_group=4):
411 |     # Quick and dirty TIFF header builder from
412 |     # https://stackoverflow.com/questions/2641770
413 |     tiff_header_struct = '<' + '2s' + 'h' + 'l' + 'h' + 'hhll' * 8 + 'h'
414 |     return struct.pack(
415 |         tiff_header_struct,
416 |         b'II',  # Byte order indication: Little indian
417 |         42,  # Version number (always 42)
418 |         8,  # Offset to first IFD
419 |         8,  # Number of tags in IFD
420 |         256, 4, 1, width,  # ImageWidth, LONG, 1, width
421 |         257, 4, 1, height,  # ImageLength, LONG, 1, lenght
422 |         258, 3, 1, 1,  # BitsPerSample, SHORT, 1, 1
423 |         259, 3, 1, ccitt_group,  # Compression, SHORT, 1, 4 = CCITT Group 4
424 |         262, 3, 1, 1,  # Threshholding, SHORT, 1, 0 = WhiteIsZero
425 |         273, 4, 1, struct.calcsize(
426 |             tiff_header_struct),  # StripOffsets, LONG, 1, len of header
427 |         278, 4, 1, height,  # RowsPerStrip, LONG, 1, lenght
428 |         279, 4, 1, img_size,  # StripByteCounts, LONG, 1, size of image
429 |         0  # last IFD
430 |         )
431 | 
432 | 
433 | def test_suite():
434 |     class TestImg2Pdf(unittest.TestCase):
435 |         pass
436 | 
437 |     for i, (psopt, isopt, border, fit, ao, pspdf1, ispdf1,
438 |             pspdf2, ispdf2) in enumerate(layout_test_cases):
439 |         if isopt is not None:
440 |             isopt = ((img2pdf.ImgSize.abs, isopt[0]),
441 |                      (img2pdf.ImgSize.abs, isopt[1]))
442 | 
443 |         def layout_handler(
444 |                 self, psopt, isopt, border, fit, ao, pspdf, ispdf, im):
445 |             layout_fun = img2pdf.get_layout_fun(psopt, isopt, border, fit, ao)
446 |             try:
447 |                 pwpdf, phpdf, iwpdf, ihpdf = \
448 |                     layout_fun(im[0], im[1], (img2pdf.default_dpi,
449 |                                               img2pdf.default_dpi))
450 |                 self.assertEqual((pwpdf, phpdf), pspdf)
451 |                 self.assertEqual((iwpdf, ihpdf), ispdf)
452 |             except img2pdf.NegativeDimensionError:
453 |                 self.assertEqual(None, pspdf)
454 |                 self.assertEqual(None, ispdf)
455 | 
456 |         def layout_handler_im1(self, psopt=psopt, isopt=isopt, border=border,
457 |                                fit=fit, ao=ao, pspdf=pspdf1, ispdf=ispdf1):
458 |             layout_handler(self, psopt, isopt, border, fit, ao, pspdf, ispdf,
459 |                            im1)
460 |         setattr(TestImg2Pdf, "test_layout_%03d_im1" % i, layout_handler_im1)
461 | 
462 |         def layout_handler_im2(self, psopt=psopt, isopt=isopt, border=border,
463 |                                fit=fit, ao=ao, pspdf=pspdf2, ispdf=ispdf2):
464 |             layout_handler(self, psopt, isopt, border, fit, ao, pspdf, ispdf,
465 |                            im2)
466 |         setattr(TestImg2Pdf, "test_layout_%03d_im2" % i, layout_handler_im2)
467 | 
468 |     files = os.listdir(os.path.join(HERE, "input"))
469 |     for with_pdfrw, test_name in [(a, b) for a in [True, False]
470 |                                   for b in files]:
471 |         inputf = os.path.join(HERE, "input", test_name)
472 |         if not os.path.isfile(inputf):
473 |             continue
474 |         outputf = os.path.join(HERE, "output", test_name+".pdf")
475 |         assert os.path.isfile(outputf)
476 | 
477 |         def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw):
478 |             with open(f, "rb") as inf:
479 |                 orig_imgdata = inf.read()
480 |             output = img2pdf.convert(orig_imgdata, nodate=True,
481 |                                      with_pdfrw=with_pdfrw)
482 |             from pdfrw import PdfReader, PdfName, PdfWriter
483 |             from pdfrw.py23_diffs import convert_load, convert_store
484 |             x = PdfReader(PdfReaderIO(convert_load(output)))
485 |             self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root,
486 |                              PdfName.Size])
487 |             self.assertIn(x.Root.Pages.Count, ('1', '2'))
488 |             if len(x.Root.Pages.Kids) == '1':
489 |                 self.assertEqual(x.Size, '7')
490 |                 self.assertEqual(len(x.Root.Pages.Kids), 1)
491 |             elif len(x.Root.Pages.Kids) == '2':
492 |                 self.assertEqual(x.Size, '10')
493 |                 self.assertEqual(len(x.Root.Pages.Kids), 2)
494 |             self.assertEqual(x.Info, {})
495 |             self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages,
496 |                                                      PdfName.Type])
497 |             self.assertEqual(x.Root.Type, PdfName.Catalog)
498 |             self.assertEqual(sorted(x.Root.Pages.keys()),
499 |                              [PdfName.Count, PdfName.Kids, PdfName.Type])
500 |             self.assertEqual(x.Root.Pages.Type, PdfName.Pages)
501 |             orig_img = Image.open(f)
502 |             for pagenum in range(len(x.Root.Pages.Kids)):
503 |                 # retrieve the original image frame that this page was
504 |                 # generated from
505 |                 orig_img.seek(pagenum)
506 |                 cur_page = x.Root.Pages.Kids[pagenum]
507 | 
508 |                 ndpi = orig_img.info.get("dpi", (96.0, 96.0))
509 |                 # In python3, the returned dpi value for some tiff images will
510 |                 # not be an integer but a float. To make the behaviour of
511 |                 # img2pdf the same between python2 and python3, we convert that
512 |                 # float into an integer by rounding.
513 |                 # Search online for the 72.009 dpi problem for more info.
514 |                 ndpi = (int(round(ndpi[0])), int(round(ndpi[1])))
515 |                 imgwidthpx, imgheightpx = orig_img.size
516 |                 pagewidth = 72.0*imgwidthpx/ndpi[0]
517 |                 pageheight = 72.0*imgheightpx/ndpi[1]
518 | 
519 |                 def format_float(f):
520 |                     if int(f) == f:
521 |                         return str(int(f))
522 |                     else:
523 |                         return ("%.4f" % f).rstrip("0")
524 | 
525 |                 self.assertEqual(sorted(cur_page.keys()),
526 |                                  [PdfName.Contents, PdfName.MediaBox,
527 |                                   PdfName.Parent, PdfName.Resources,
528 |                                   PdfName.Type])
529 |                 self.assertEqual(cur_page.MediaBox,
530 |                                  ['0', '0', format_float(pagewidth),
531 |                                   format_float(pageheight)])
532 |                 self.assertEqual(cur_page.Parent, x.Root.Pages)
533 |                 self.assertEqual(cur_page.Type, PdfName.Page)
534 |                 self.assertEqual(cur_page.Resources.keys(),
535 |                                  [PdfName.XObject])
536 |                 self.assertEqual(cur_page.Resources.XObject.keys(),
537 |                                  [PdfName.Im0])
538 |                 self.assertEqual(cur_page.Contents.keys(),
539 |                                  [PdfName.Length])
540 |                 self.assertEqual(cur_page.Contents.Length,
541 |                                  str(len(cur_page.Contents.stream)))
542 |                 self.assertEqual(cur_page.Contents.stream,
543 |                                  "q\n%.4f 0 0 %.4f 0.0000 0.0000 cm\n"
544 |                                  "/Im0 Do\nQ" % (pagewidth, pageheight))
545 | 
546 |                 imgprops = cur_page.Resources.XObject.Im0
547 | 
548 |                 # test if the filter is valid:
549 |                 self.assertIn(
550 |                     imgprops.Filter, [[PdfName.DCTDecode], [PdfName.JPXDecode],
551 |                                       [PdfName.FlateDecode],
552 |                                       [PdfName.CCITTFaxDecode]])
553 |                 # test if the colorspace is valid
554 |                 self.assertIn(
555 |                     imgprops.ColorSpace, [PdfName.DeviceGray,
556 |                                           PdfName.DeviceRGB,
557 |                                           PdfName.DeviceCMYK])
558 | 
559 |                 # test if the image has correct size
560 |                 self.assertEqual(imgprops.Width, str(orig_img.size[0]))
561 |                 self.assertEqual(imgprops.Height, str(orig_img.size[1]))
562 |                 # if the input file is a jpeg then it should've been copied
563 |                 # verbatim into the PDF
564 |                 if imgprops.Filter in [[PdfName.DCTDecode],
565 |                                        [PdfName.JPXDecode]]:
566 |                     self.assertEqual(
567 |                         cur_page.Resources.XObject.Im0.stream,
568 |                         convert_load(orig_imgdata))
569 |                 elif imgprops.Filter == [PdfName.CCITTFaxDecode]:
570 |                     tiff_header = tiff_header_for_ccitt(
571 |                         int(imgprops.Width), int(imgprops.Height),
572 |                         int(imgprops.Length), 4)
573 |                     imgio = BytesIO()
574 |                     imgio.write(tiff_header)
575 |                     imgio.write(convert_store(
576 |                         cur_page.Resources.XObject.Im0.stream))
577 |                     imgio.seek(0)
578 |                     im = Image.open(imgio)
579 |                     self.assertEqual(im.tobytes(), orig_img.tobytes())
580 |                     try:
581 |                         im.close()
582 |                     except AttributeError:
583 |                         pass
584 | 
585 |                 elif imgprops.Filter == [PdfName.FlateDecode]:
586 |                     # otherwise, the data is flate encoded and has to be equal
587 |                     # to the pixel data of the input image
588 |                     imgdata = zlib.decompress(
589 |                         convert_store(cur_page.Resources.XObject.Im0.stream))
590 |                     colorspace = imgprops.ColorSpace
591 |                     if colorspace == PdfName.DeviceGray:
592 |                         colorspace = 'L'
593 |                     elif colorspace == PdfName.DeviceRGB:
594 |                         colorspace = 'RGB'
595 |                     elif colorspace == PdfName.DeviceCMYK:
596 |                         colorspace = 'CMYK'
597 |                     else:
598 |                         raise Exception("invalid colorspace")
599 |                     im = Image.frombytes(colorspace, (int(imgprops.Width),
600 |                                                       int(imgprops.Height)),
601 |                                          imgdata)
602 |                     if orig_img.mode == '1':
603 |                         self.assertEqual(im.tobytes(),
604 |                                          orig_img.convert("L").tobytes())
605 |                     elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"):
606 |                         self.assertEqual(im.tobytes(),
607 |                                          orig_img.convert("RGB").tobytes())
608 |                     # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not
609 |                     # have the close() method
610 |                     try:
611 |                         im.close()
612 |                     except AttributeError:
613 |                         pass
614 |             # now use pdfrw to parse and then write out both pdfs and check the
615 |             # result for equality
616 |             y = PdfReader(out)
617 |             outx = BytesIO()
618 |             outy = BytesIO()
619 |             xwriter = PdfWriter()
620 |             ywriter = PdfWriter()
621 |             xwriter.trailer = x
622 |             ywriter.trailer = y
623 |             xwriter.write(outx)
624 |             ywriter.write(outy)
625 |             self.assertEqual(outx.getvalue(), outy.getvalue())
626 |             # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
627 |             # close() method
628 |             try:
629 |                 orig_img.close()
630 |             except AttributeError:
631 |                 pass
632 |         if with_pdfrw:
633 |             setattr(TestImg2Pdf, "test_%s_with_pdfrw" % test_name, handle)
634 |         else:
635 |             setattr(TestImg2Pdf, "test_%s_without_pdfrw" % test_name, handle)
636 | 
637 |     return unittest.TestSuite((
638 |             unittest.makeSuite(TestImg2Pdf),
639 |             ))
640 | 


--------------------------------------------------------------------------------
/src/tests/input/CMYK.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/myollie/img2pdf/ff6216cf641e21e70619ac18a49deca26ff8f4a0/src/tests/input/CMYK.jpg


--------------------------------------------------------------------------------
/src/tests/input/CMYK.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/myollie/img2pdf/ff6216cf641e21e70619ac18a49deca26ff8f4a0/src/tests/input/CMYK.tif


--------------------------------------------------------------------------------
/src/tests/input/animation.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/myollie/img2pdf/ff6216cf641e21e70619ac18a49deca26ff8f4a0/src/tests/input/animation.gif


--------------------------------------------------------------------------------
/src/tests/input/mono.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/myollie/img2pdf/ff6216cf641e21e70619ac18a49deca26ff8f4a0/src/tests/input/mono.png


--------------------------------------------------------------------------------
/src/tests/input/normal.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/myollie/img2pdf/ff6216cf641e21e70619ac18a49deca26ff8f4a0/src/tests/input/normal.jpg


--------------------------------------------------------------------------------
/src/tests/input/normal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/myollie/img2pdf/ff6216cf641e21e70619ac18a49deca26ff8f4a0/src/tests/input/normal.png


--------------------------------------------------------------------------------
/src/tests/output/CMYK.jpg.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/myollie/img2pdf/ff6216cf641e21e70619ac18a49deca26ff8f4a0/src/tests/output/CMYK.jpg.pdf


--------------------------------------------------------------------------------
/src/tests/output/CMYK.tif.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/myollie/img2pdf/ff6216cf641e21e70619ac18a49deca26ff8f4a0/src/tests/output/CMYK.tif.pdf


--------------------------------------------------------------------------------
/src/tests/output/animation.gif.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/myollie/img2pdf/ff6216cf641e21e70619ac18a49deca26ff8f4a0/src/tests/output/animation.gif.pdf


--------------------------------------------------------------------------------
/src/tests/output/mono.png.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/myollie/img2pdf/ff6216cf641e21e70619ac18a49deca26ff8f4a0/src/tests/output/mono.png.pdf


--------------------------------------------------------------------------------
/src/tests/output/normal.jpg.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/myollie/img2pdf/ff6216cf641e21e70619ac18a49deca26ff8f4a0/src/tests/output/normal.jpg.pdf


--------------------------------------------------------------------------------
/src/tests/output/normal.png.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/myollie/img2pdf/ff6216cf641e21e70619ac18a49deca26ff8f4a0/src/tests/output/normal.png.pdf


--------------------------------------------------------------------------------
/test_comp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | if [ $# -ne 1 ]; then
 4 | 	echo "usage: $0 image"
 5 | 	exit
 6 | fi
 7 | 
 8 | echo "converting image to pdf, trying all compressions imagemagick has to offer"
 9 | echo "if, as a result, Zip/FlateDecode should NOT be the lossless compression with the lowest size ratio, contact me j [dot] schauer [at] email [dot] de"
10 | echo "also, send me the image in question"
11 | echo
12 | 
13 | imsize=`stat -c "%s" "$1"`
14 | 
15 | for a in `convert -list compress`; do
16 | 	echo "encode:\t$a"
17 | 	convert "$1" -compress $a "`basename $1 .jpg`.pdf"
18 | 	pdfimages "`basename $1 .jpg`.pdf" "`basename $1 .jpg`"
19 | 	/bin/echo -ne "diff:\t"
20 | 	diff=`compare -metric AE "$1" "\`basename $1 .jpg\`-000.ppm" null: 2>&1`
21 | 	if [ "$diff" != "0" ]; then
22 | 		echo "lossy"
23 | 	else
24 | 		echo "lossless"
25 | 	fi
26 | 	/bin/echo -ne "size:\t"
27 | 	pdfsize=`stat -c "%s" "\`basename $1 .jpg\`.pdf"`
28 | 	echo "scale=1;$pdfsize/$imsize" | bc
29 | 	/bin/echo -ne "pdf:\t"
30 | 	grep --max-count=1 --text /Filter "`basename $1 .jpg`.pdf"
31 | 	echo
32 | done
33 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py27,py35,pypy
 3 | 
 4 | [testenv]
 5 | deps =
 6 |     .[test]
 7 | commands =
 8 |     python setup.py test -q
 9 | 
10 | 


--------------------------------------------------------------------------------