├── .gitignore ├── .mailmap ├── .travis.yml ├── CHANGES.rst ├── HACKING ├── LICENSE ├── MANIFEST.in ├── README.md ├── appveyor.yml ├── screenshot.png ├── setup.py ├── src ├── img2pdf.py ├── img2pdf_test.py ├── jp2.py └── tests │ ├── input │ ├── CMYK.jpg │ ├── CMYK.tif │ ├── animation.gif │ ├── gray.png │ ├── mono.jb2 │ ├── mono.png │ ├── mono.tif │ ├── normal.jpg │ └── normal.png │ └── output │ ├── CMYK.jpg.pdf │ ├── CMYK.tif.pdf │ ├── animation.gif.pdf │ ├── gray.png.pdf │ ├── mono.jb2.pdf │ ├── mono.png.pdf │ ├── mono.tif.pdf │ ├── normal.jpg.pdf │ └── normal.png.pdf ├── test_comp.sh └── tox.ini /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | build 3 | src/*.egg-info 4 | 5 | .eggs 6 | .tox 7 | -------------------------------------------------------------------------------- /.mailmap: -------------------------------------------------------------------------------- 1 | Johannes Schauer Marin Rodrigues 2 | Johannes Schauer Marin Rodrigues 3 | Johannes Schauer Marin Rodrigues 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | matrix: 3 | include: 4 | - name: "Ubuntu Focal" 5 | dist: focal 6 | addons: 7 | apt: 8 | packages: 9 | - imagemagick 10 | - libtiff-tools 11 | - libimage-exiftool-perl 12 | - poppler-utils 13 | - netpbm 14 | - ghostscript 15 | - mupdf-tools 16 | - name: "python 3.9 Windows" 17 | os: windows 18 | language: shell # 'language: python' is an error on Travis CI Windows 19 | before_install: choco install python imagemagick 20 | env: PATH=/c/Python39:/c/Python39/Scripts:$PATH 21 | - name: "python 3.7 MacOs" 22 | os: osx 23 | osx_image: xcode12.2 # pikepdf import fails with earlier versions 24 | language: shell # 'language: python' is an error on Travis CI macOS 25 | cache: 26 | directories: 27 | - "$HOME/Library/Caches/Homebrew" 28 | - "$HOME/Library/Caches/pip" 29 | addons: 30 | homebrew: 31 | #update: true 32 | packages: 33 | - python3 34 | - imagemagick 35 | before_install: 36 | - python3 -m pip install --upgrade virtualenv 37 | - virtualenv -p python3 --system-site-packages "$HOME/venv" 38 | - source "$HOME/venv/bin/activate" 39 | install: pip install tox 40 | script: 41 | - python --version 42 | - python -m tox 43 | -------------------------------------------------------------------------------- /CHANGES.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | CHANGES 3 | ======= 4 | 5 | 0.6.1 (2025-04-27) 6 | ------------------ 7 | 8 | - testsuite fixes 9 | 10 | 0.6.0 (2025-02-15) 11 | ------------------ 12 | 13 | - Add support for JBIG2 (generic coding) 14 | - Add convert_to_docobject() broken out from convert() 15 | - Add pil_get_dpi() broken out from get_imgmetadata() 16 | 17 | 0.5.1 (2023-11-26) 18 | ------------------ 19 | 20 | - no default ICC profile location for PDF/A-1b on Windows 21 | - workaround for PNG input without dpi units but non-square dpi aspect ratio 22 | 23 | 0.5.0 (2023-10-28) 24 | ------------------ 25 | 26 | - support MIFF for 16 bit CMYK input 27 | - accept pathlib.Path objects as input 28 | - don't store RGB ICC profiles from bilevel or grayscale TIFF, PNG and JPEG 29 | - thumbnails are no longer included by default and --include-thumbnails has to 30 | be used if you want them 31 | - support for pikepdf (>= 6.2.0) 32 | 33 | 0.4.4 (2022-04-07) 34 | ------------------ 35 | 36 | - --viewer-page-layout support for twopageright and twopageleft 37 | - Add B and JB paper sizes 38 | - support for pikepdf (>= 5.0.0) and Pillow (>= 9.1.0) 39 | 40 | 0.4.3 (2021-10-24) 41 | ------------------ 42 | 43 | - fix --viewer-initial-page (broken in last release) 44 | 45 | 0.4.2 (2021-10-11) 46 | ------------------ 47 | 48 | - add --rotation 49 | - allow palette PNG images with ICC profile 50 | - sort globbing result on windows 51 | - convert 8-bit PNG alpha channels to /SMasks in PDF 52 | - remove pdfrw from tests 53 | 54 | 0.4.1 (2021-05-09) 55 | ------------------ 56 | 57 | - support wildcards in paths on windows 58 | - support MPO images 59 | - fix page border computation 60 | - use "img2pdf" logger instead of "root" logger 61 | - add --from-file 62 | 63 | 0.4.0 (2020-08-07) 64 | ------------------ 65 | 66 | - replace --without-pdfrw by --engine=internal or --engine=pdfrw 67 | - add pikepdf as additional rendering engine and add --engine=pikepdf 68 | - support for creating PDF/A-1b compliant PDF using the --pdfa option 69 | (this also requires the presence of an ICC profile somewhere on the system) 70 | - support for images with embedded ICC profile as input 71 | - rewrite tests 72 | * use pytest via tox 73 | * use pikepdf instead of pdfrw 74 | * use imagemagick json output instead of identify -verbose 75 | - format all code with black 76 | 77 | 0.3.6 (2020-04-30) 78 | ------------------ 79 | 80 | - fix tests for Fedora on arm64 81 | 82 | 0.3.5 (2020-04-28) 83 | ------------------ 84 | 85 | - remove all Python 2 support 86 | - disable pdfrw by default 87 | 88 | 0.3.4 (2020-04-05) 89 | ------------------ 90 | 91 | - test.sh: replace imagemagick with custom python script to produce bit-by-bit 92 | identical results on all architectures 93 | - add --crop-border, --bleed-border, --trim-border and --art-border options 94 | - first draft of a rudimentary tkinter gui (run with --gui) 95 | 96 | 0.3.3 (2019-01-07) 97 | ------------------ 98 | 99 | - restore basic support for Python 2 100 | - also ship test.sh 101 | - add legal and tabloid paper formats 102 | - respect exif rotation tag 103 | 104 | 0.3.2 (2018-11-20) 105 | ------------------ 106 | 107 | - support big endian TIFF with lsb-to-msb FillOrder 108 | - support multipage CCITT Group 4 TIFF 109 | - also reject palette images with transparency 110 | - support PNG images with 1, 2, 4 or 16 bits per sample 111 | - support multipage TIFF with differently encoded images 112 | - support CCITT Group4 TIFF without rows-per-strip 113 | - add extensive test suite 114 | 115 | 0.3.1 (2018-08-04) 116 | ------------------ 117 | 118 | - Directly copy data from CCITT Group 4 encoded TIFF images into the PDF 119 | container without re-encoding 120 | 121 | 0.3.0 (2018-06-18) 122 | ------------------ 123 | 124 | - Store non-jpeg images using PNG compression 125 | - Support arbitrarily large pages via PDF /UserUnit field 126 | - Disallow input with alpha channel as it cannot be preserved 127 | - Add option --pillow-limit-break to support very large input 128 | 129 | 0.2.4 (2017-05-23) 130 | ------------------ 131 | 132 | - Restore support for Python 2.7 133 | - Add support for PyPy 134 | - Add support for testing using tox 135 | 136 | 0.2.3 (2017-01-20) 137 | ------------------ 138 | 139 | - version number bump for botched pypi upload... 140 | 141 | 0.2.2 (2017-01-20) 142 | ------------------ 143 | 144 | - automatic monochrome CCITT Group4 encoding via Pillow/libtiff 145 | 146 | 0.2.1 (2016-05-04) 147 | ------------------ 148 | 149 | - set img2pdf as /producer value 150 | - support multi-frame images like multipage TIFF and animated GIF 151 | - support for palette images like GIF 152 | - support all colorspaces and imageformats known by PIL 153 | - read horizontal and vertical dpi from JPEG2000 files 154 | 155 | 0.2.0 (2015-05-10) 156 | ------------------ 157 | 158 | - now Python3 only 159 | - pep8 compliant code 160 | - update my email to josch@mister-muffin.de 161 | - move from github to gitlab.mister-muffin.de/josch/img2pdf 162 | - use logging module 163 | - add extensive test suite 164 | - ability to read from standard input 165 | - pdf writer: 166 | - make more compatible with the interface of pdfrw module 167 | - print floats which equal to their integer conversion as integer 168 | - do not print trailing zeroes for floating point numbers 169 | - print more linebreaks 170 | - add binary string at beginning of PDF to indicate that the PDF 171 | contains binary data 172 | - handle datetime and unicode strings by using utf-16-be encoding 173 | - new options (see --help for more details): 174 | - --without-pdfrw 175 | - --imgsize 176 | - --border 177 | - --fit 178 | - --auto-orient 179 | - --viewer-panes 180 | - --viewer-initial-page 181 | - --viewer-magnification 182 | - --viewer-page-layout 183 | - --viewer-fit-window 184 | - --viewer-center-window 185 | - --viewer-fullscreen 186 | - remove short options for metadata command line arguments 187 | - correctly encode and escape non-ascii metadata 188 | - explicitly store date in UTC and allow parsing all date formats understood 189 | by dateutil and `date --date` 190 | 191 | 0.1.5 (2015-02-16) 192 | ------------------ 193 | 194 | - Enable support for CMYK images 195 | - Rework test suite 196 | - support file objects as input 197 | 198 | 0.1.4 (2015-01-21) 199 | ------------------ 200 | 201 | - add Python 3 support 202 | - make output reproducible by sorting and --nodate option 203 | 204 | 0.1.3 (2014-11-10) 205 | ------------------ 206 | 207 | - Avoid leaking file descriptors 208 | - Convert unrecognized colorspaces to RGB 209 | 210 | 0.1.1 (2014-09-07) 211 | ------------------ 212 | 213 | - allow running src/img2pdf.py standalone 214 | - license change from GPL to LGPL 215 | - Add pillow 2.4.0 support 216 | - add options to specify pdf dimensions in points 217 | 218 | 0.1.0 (2014-03-14, unreleased) 219 | ------------------ 220 | 221 | - Initial PyPI release. 222 | - Modified code to create proper package. 223 | - Added tests. 224 | - Added console script entry point. 225 | -------------------------------------------------------------------------------- /HACKING: -------------------------------------------------------------------------------- 1 | Running img2pdf from source 2 | --------------------------- 3 | 4 | img2pdf can be run directly from the cloned git repository: 5 | 6 | $ python3 src/img2pdf.py img.jpg -o out.pdf 7 | 8 | Running the testsuite 9 | --------------------- 10 | 11 | $ pytest 12 | 13 | Making a new release 14 | -------------------- 15 | 16 | - CHANGES.rst: Add a new entry 17 | - setup.py: Bump VERSION 18 | - src/img2pdf.py: Bump __version__ 19 | - Commit: 20 | 21 | $ git add CHANGES.rst setup.py src/img2pdf.py 22 | $ git commit -m "release version X.Y.Z" 23 | 24 | - Add git tag: 25 | 26 | $ git tag X.Y.Z -m X.Y.Z 27 | 28 | - Build and upload to pypi: 29 | 30 | $ rm -rf dist/* 31 | $ python3 setup.py sdist 32 | $ twine upload dist/* 33 | 34 | - Push everything to git forge 35 | 36 | $ git push 37 | 38 | - Push to github 39 | 40 | $ git push github 41 | 42 | - Obtain img2pdf.exe from appveyor: 43 | 44 | https://ci.appveyor.com/project/josch/img2pdf/ 45 | 46 | - Create new release: 47 | 48 | https://gitlab.mister-muffin.de/josch/img2pdf/releases/new 49 | 50 | Using debbisect to find regressions 51 | ----------------------------------- 52 | 53 | $ debbisect --cache=./cache --depends="git,ca-certificates,python3, 54 | ghostscript,imagemagick,mupdf-tools,poppler-utils,python3-pil, 55 | python3-pytest,python3-numpy,python3-scipy,python3-pikepdf" \ 56 | --verbose 2023-09-16 2023-10-24 \ 57 | 'chroot "$1" sh -c " 58 | git clone https://gitlab.mister-muffin.de/josch/img2pdf.git 59 | && cd img2pdf 60 | && pytest 'src/img2pdf_test.py::test_jpg_2000_rgba8[internal]"' 61 | 62 | Using debbisect cache 63 | --------------------- 64 | 65 | $ mmdebstrap --variant=apt --aptopt='Acquire::Check-Valid-Until "false"' \ 66 | --include=git,ca-certificates,python3,ghostscript,imagemagick \ 67 | --include=mupdf-tools,poppler-utils,python3-pil,python3-pytest \ 68 | --include=python3-numpy,python3-scipy,python3-pikepdf \ 69 | --hook-dir=/usr/share/mmdebstrap/hooks/file-mirror-automount \ 70 | --setup-hook='mkdir -p "$1/home/josch/git/devscripts/cache/pool/"' \ 71 | --setup-hook='mount -o ro,bind /home/josch/git/devscripts/cache/pool/ "$1/home/josch/git/devscripts/cache/pool/"' \ 72 | --chrooted-customize-hook=bash 73 | unstable /dev/null 74 | file:///home/josch/git/devscripts/cache/archive/debian/20231022T090139Z/ 75 | 76 | Bisecting imagemagick 77 | --------------------- 78 | 79 | $ git clean -fdx && git reset --hard 80 | $ ./configure --prefix=$(pwd)/prefix 81 | $ make -j$(nproc) 82 | $ make install 83 | $ LD_LIBRARY_PATH=$(pwd)/prefix/lib prefix/bin/compare ... 84 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include test_comp.sh 3 | include test.sh 4 | include magick.py 5 | include CHANGES.rst 6 | include LICENSE 7 | recursive-include src *.jpg 8 | recursive-include src *.pdf 9 | recursive-include src *.png 10 | recursive-include src *.tif 11 | recursive-include src *.gif 12 | recursive-include src *.py 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Travis Status](https://travis-ci.com/josch/img2pdf.svg?branch=main)](https://app.travis-ci.com/josch/img2pdf) 2 | [![Appveyor Status](https://ci.appveyor.com/api/projects/status/2kws3wkqvi526llj/branch/main?svg=true)](https://ci.appveyor.com/project/josch/img2pdf/branch/main) 3 | 4 | img2pdf 5 | ======= 6 | 7 | Lossless conversion of raster images to PDF. You should use img2pdf if your 8 | priorities are (in this order): 9 | 10 | 1. **always lossless**: the image embedded in the PDF will always have the 11 | exact same color information for every pixel as the input 12 | 2. **small**: if possible, the difference in filesize between the input image 13 | and the output PDF will only be the overhead of the PDF container itself 14 | 3. **fast**: if possible, the input image is just pasted into the PDF document 15 | as-is without any CPU hungry re-encoding of the pixel data 16 | 17 | Conventional conversion software (like ImageMagick) would either: 18 | 19 | 1. not be lossless because lossy re-encoding to JPEG 20 | 2. not be small because using wasteful flate encoding of raw pixel data 21 | 3. not be fast because input data gets re-encoded 22 | 23 | Another advantage of not having to re-encode the input (in most common 24 | situations) is, that img2pdf is able to handle much larger input than other 25 | software, because the raw pixel data never has to be loaded into memory. 26 | 27 | The following table shows how img2pdf handles different input depending on the 28 | input file format and image color space. 29 | 30 | | Format | Colorspace | Result | 31 | | ------------------------------------- | ------------------------------------ | ------------- | 32 | | JPEG | any | direct | 33 | | JPEG2000 | any | direct | 34 | | PNG (non-interlaced, no transparency) | any | direct | 35 | | TIFF (CCITT Group 4) | 1-bit monochrome | direct | 36 | | JBIG2 (single-page generic coding) | 1-bit monochrome | direct | 37 | | any | any except CMYK and 1-bit monochrome | PNG Paeth | 38 | | any | 1-bit monochrome | CCITT Group 4 | 39 | | any | CMYK | flate | 40 | 41 | For JPEG, JPEG2000, non-interlaced PNG, TIFF images with CCITT Group 4 42 | encoded data, and JBIG2 with single-page generic coding (e.g. using `jbig2enc`), 43 | img2pdf directly embeds the image data into the PDF without 44 | re-encoding it. It thus treats the PDF format merely as a container format for 45 | the image data. In these cases, img2pdf only increases the filesize by the size 46 | of the PDF container (typically around 500 to 700 bytes). Since data is only 47 | copied and not re-encoded, img2pdf is also typically faster than other 48 | solutions for these input formats. 49 | 50 | For all other input types, img2pdf first has to transform the pixel data to 51 | make it compatible with PDF. In most cases, the PNG Paeth filter is applied to 52 | the pixel data. For 1-bit monochrome input, CCITT Group 4 is used instead. Only for 53 | CMYK input no filter is applied before finally applying flate compression. 54 | 55 | Usage 56 | ----- 57 | 58 | The images must be provided as files because img2pdf needs to seek in the file 59 | descriptor. 60 | 61 | If no output file is specified with the `-o`/`--output` option, output will be 62 | done to stdout. A typical invocation is: 63 | 64 | $ img2pdf img1.png img2.jpg -o out.pdf 65 | 66 | The detailed documentation can be accessed by running: 67 | 68 | $ img2pdf --help 69 | 70 | With no command line arguments supplied, img2pdf will read a single image from 71 | standard input and write the resulting PDF to standard output. Here is an 72 | example for how to scan directly to PDF using scanimage(1) from SANE: 73 | 74 | $ scanimage --mode=Color --resolution=300 | pnmtojpeg -quality 90 | img2pdf > scan.pdf 75 | 76 | Bugs 77 | ---- 78 | 79 | - If you find a JPEG, JPEG2000, PNG or CCITT Group 4 encoded TIFF file that, 80 | when embedded into the PDF cannot be read by the Adobe Acrobat Reader, 81 | please contact me. 82 | 83 | - An error is produced if the input image is broken. This commonly happens if 84 | the input image has an invalid EXIF Orientation value of zero. Even though 85 | only nine different values from 1 to 9 are permitted, Anroid phones and 86 | Canon DSLR cameras produce JPEG images with the invalid value of zero. 87 | Either fix your input images with `exiftool` or similar software before 88 | passing the JPEG to `img2pdf` or run `img2pdf` with `--rotation=ifvalid` 89 | (if you run img2pdf from the commandline) or by passing 90 | `rotation=img2pdf.Rotation.ifvalid` as an argument to `convert()` when using 91 | img2pdf as a library. 92 | 93 | - img2pdf uses PIL (or Pillow) to obtain image meta data and to convert the 94 | input if necessary. To prevent decompression bomb denial of service attacks, 95 | Pillow limits the maximum number of pixels an input image is allowed to 96 | have. If you are sure that you know what you are doing, then you can disable 97 | this safeguard by passing the `--pillow-limit-break` option to img2pdf. This 98 | allows one to process even very large input images. 99 | 100 | Installation 101 | ------------ 102 | 103 | On a Debian- and Ubuntu-based systems, img2pdf can be installed from the 104 | official repositories: 105 | 106 | $ apt install img2pdf 107 | 108 | If you want to install it using pip, you can run: 109 | 110 | $ pip3 install img2pdf 111 | 112 | If you prefer to install from source code use: 113 | 114 | $ cd img2pdf/ 115 | $ pip3 install . 116 | 117 | To test the console script without installing the package on your system, 118 | use virtualenv: 119 | 120 | $ cd img2pdf/ 121 | $ virtualenv ve 122 | $ ve/bin/pip3 install . 123 | 124 | You can then test the converter using: 125 | 126 | $ ve/bin/img2pdf -o test.pdf src/tests/test.jpg 127 | 128 | If you don't want to setup Python on Windows, then head to the 129 | [releases](https://gitlab.mister-muffin.de/josch/img2pdf/releases) section and download the latest 130 | `img2pdf.exe`. 131 | 132 | GUI 133 | --- 134 | 135 | There exists an experimental GUI with all settings currently disabled. You can 136 | directly convert images to PDF but you cannot set any options via the GUI yet. 137 | If you are interested in adding more features to the PDF, please submit a merge 138 | request. The GUI is based on tkinter and works on Linux, Windows and MacOS. 139 | 140 | ![](screenshot.png) 141 | 142 | Library 143 | ------- 144 | 145 | The package can also be used as a library: 146 | 147 | import img2pdf 148 | 149 | # opening from filename 150 | with open("name.pdf","wb") as f: 151 | f.write(img2pdf.convert('test.jpg')) 152 | 153 | # opening from file handle 154 | with open("name.pdf","wb") as f1, open("test.jpg") as f2: 155 | f1.write(img2pdf.convert(f2)) 156 | 157 | # opening using pathlib 158 | with open("name.pdf","wb") as f: 159 | f.write(img2pdf.convert(pathlib.Path('test.jpg'))) 160 | 161 | # using in-memory image data 162 | with open("name.pdf","wb") as f: 163 | f.write(img2pdf.convert("\x89PNG...") 164 | 165 | # multiple inputs (variant 1) 166 | with open("name.pdf","wb") as f: 167 | f.write(img2pdf.convert("test1.jpg", "test2.png")) 168 | 169 | # multiple inputs (variant 2) 170 | with open("name.pdf","wb") as f: 171 | f.write(img2pdf.convert(["test1.jpg", "test2.png"])) 172 | 173 | # convert all files ending in .jpg inside a directory 174 | dirname = "/path/to/images" 175 | imgs = [] 176 | for fname in os.listdir(dirname): 177 | if not fname.endswith(".jpg"): 178 | continue 179 | path = os.path.join(dirname, fname) 180 | if os.path.isdir(path): 181 | continue 182 | imgs.append(path) 183 | with open("name.pdf","wb") as f: 184 | f.write(img2pdf.convert(imgs)) 185 | 186 | # convert all files ending in .jpg in a directory and its subdirectories 187 | dirname = "/path/to/images" 188 | imgs = [] 189 | for r, _, f in os.walk(dirname): 190 | for fname in f: 191 | if not fname.endswith(".jpg"): 192 | continue 193 | imgs.append(os.path.join(r, fname)) 194 | with open("name.pdf","wb") as f: 195 | f.write(img2pdf.convert(imgs)) 196 | 197 | 198 | # convert all files matching a glob 199 | import glob 200 | with open("name.pdf","wb") as f: 201 | f.write(img2pdf.convert(glob.glob("/path/to/*.jpg"))) 202 | 203 | # convert all files matching a glob using pathlib.Path 204 | from pathlib import Path 205 | with open("name.pdf","wb") as f: 206 | f.write(img2pdf.convert(*Path("/path").glob("**/*.jpg"))) 207 | 208 | # ignore invalid rotation values in the input images 209 | with open("name.pdf","wb") as f: 210 | f.write(img2pdf.convert('test.jpg'), rotation=img2pdf.Rotation.ifvalid) 211 | 212 | # writing to file descriptor 213 | with open("name.pdf","wb") as f1, open("test.jpg") as f2: 214 | img2pdf.convert(f2, outputstream=f1) 215 | 216 | # specify paper size (A4) 217 | a4inpt = (img2pdf.mm_to_pt(210),img2pdf.mm_to_pt(297)) 218 | layout_fun = img2pdf.get_layout_fun(a4inpt) 219 | with open("name.pdf","wb") as f: 220 | f.write(img2pdf.convert('test.jpg', layout_fun=layout_fun)) 221 | 222 | # use a fixed dpi of 300 instead of reading it from the image 223 | dpix = dpiy = 300 224 | layout_fun = img2pdf.get_fixed_dpi_layout_fun((dpix, dpiy)) 225 | with open("name.pdf","wb") as f: 226 | f.write(img2pdf.convert('test.jpg', layout_fun=layout_fun)) 227 | 228 | # create a PDF/A-1b compliant document by passing an ICC profile 229 | with open("name.pdf","wb") as f: 230 | f.write(img2pdf.convert('test.jpg', pdfa="/usr/share/color/icc/sRGB.icc")) 231 | 232 | Comparison to ImageMagick 233 | ------------------------- 234 | 235 | Create a large test image: 236 | 237 | $ convert logo: -resize 8000x original.jpg 238 | 239 | Convert it into PDF using ImageMagick and img2pdf: 240 | 241 | $ time img2pdf original.jpg -o img2pdf.pdf 242 | $ time convert original.jpg imagemagick.pdf 243 | 244 | Notice how ImageMagick took an order of magnitude longer to do the conversion 245 | than img2pdf. It also used twice the memory. 246 | 247 | Now extract the image data from both PDF documents and compare it to the 248 | original: 249 | 250 | $ pdfimages -all img2pdf.pdf tmp 251 | $ compare -metric AE original.jpg tmp-000.jpg null: 252 | 0 253 | $ pdfimages -all imagemagick.pdf tmp 254 | $ compare -metric AE original.jpg tmp-000.jpg null: 255 | 118716 256 | 257 | To get lossless output with ImageMagick we can use Zip compression but that 258 | unnecessarily increases the size of the output: 259 | 260 | $ convert original.jpg -compress Zip imagemagick.pdf 261 | $ pdfimages -all imagemagick.pdf tmp 262 | $ compare -metric AE original.jpg tmp-000.png null: 263 | 0 264 | $ stat --format="%s %n" original.jpg img2pdf.pdf imagemagick.pdf 265 | 1535837 original.jpg 266 | 1536683 img2pdf.pdf 267 | 9397809 imagemagick.pdf 268 | 269 | Comparison to pdfLaTeX 270 | ---------------------- 271 | 272 | pdfLaTeX performs a lossless conversion from included images to PDF by default. 273 | If the input is a JPEG, then it simply embeds the JPEG into the PDF in the same 274 | way as img2pdf does it. But for other image formats it uses flate compression 275 | of the plain pixel data and thus needlessly increases the output file size: 276 | 277 | $ convert logo: -resize 8000x original.png 278 | $ cat << END > pdflatex.tex 279 | \documentclass{article} 280 | \usepackage{graphicx} 281 | \begin{document} 282 | \includegraphics{original.png} 283 | \end{document} 284 | END 285 | $ pdflatex pdflatex.tex 286 | $ stat --format="%s %n" original.png pdflatex.pdf 287 | 4500182 original.png 288 | 9318120 pdflatex.pdf 289 | 290 | Comparison to podofoimg2pdf 291 | --------------------------- 292 | 293 | Like pdfLaTeX, podofoimg2pdf is able to perform a lossless conversion from JPEG 294 | to PDF by plainly embedding the JPEG data into the pdf container. But just like 295 | pdfLaTeX it uses flate compression for all other file formats, thus sometimes 296 | resulting in larger files than necessary. 297 | 298 | $ convert logo: -resize 8000x original.png 299 | $ podofoimg2pdf out.pdf original.png 300 | stat --format="%s %n" original.png out.pdf 301 | 4500181 original.png 302 | 9335629 out.pdf 303 | 304 | It also only supports JPEG, PNG and TIF as input and lacks many of the 305 | convenience features of img2pdf like page sizes, borders, rotation and 306 | metadata. 307 | 308 | Comparison to Tesseract OCR 309 | --------------------------- 310 | 311 | Tesseract OCR comes closest to the functionality img2pdf provides. It is able 312 | to convert JPEG and PNG input to PDF without needlessly increasing the filesize 313 | and is at the same time lossless. So if your input is JPEG and PNG images, then 314 | you should safely be able to use Tesseract instead of img2pdf. For other input, 315 | Tesseract might not do a lossless conversion. For example it converts CMYK 316 | input to RGB and removes the alpha channel from images with transparency. For 317 | multipage TIFF or animated GIF, it will only convert the first frame. 318 | 319 | Comparison to econvert from ExactImage 320 | -------------------------------------- 321 | 322 | Like pdflatex and podofoimg2pf, econvert is able to embed JPEG images into PDF 323 | directly without re-encoding but when given other file formats, it stores them 324 | just using flate compressen, which unnecessarily increases the filesize. 325 | Furthermore, it throws an error with CMYK TIF input. It also doesn't store CMYK 326 | jpeg files as CMYK but converts them to RGB, so it's not lossless. When trying 327 | to feed it 16bit files, it errors out with Unhandled bps/spp combination. It 328 | also seems to choose JPEG encoding when using it on some file types (like 329 | palette images) making it again not lossless for that input as well. 330 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | # For Python versions available on Appveyor, see 3 | # https://www.appveyor.com/docs/windows-images-software/#python 4 | matrix: 5 | # - PYTHON: "C:\\Python27" 6 | # - PYTHON: "C:\\Python33" 7 | # - PYTHON: "C:\\Python34" 8 | # - PYTHON: "C:\\Python35" 9 | # - PYTHON: "C:\\Python36" 10 | # - PYTHON: "C:\\Python37" 11 | # - PYTHON: "C:\\Python27-x64" 12 | # - PYTHON: "C:\\Python33-x64" 13 | # - PYTHON: "C:\\Python34-x64" 14 | # - PYTHON: "C:\\Python35-x64" 15 | # - PYTHON: "C:\\Python36-x64" 16 | - PYTHON: "C:\\Python37-x64" 17 | 18 | install: 19 | - "%PYTHON%\\python.exe -m pip install tox wheel pyinstaller Pillow" 20 | 21 | build: off 22 | 23 | # don't run tests on windows because we don't have imagemagick 24 | #test_script: 25 | # - "%PYTHON%\\python.exe -m tox" 26 | 27 | after_test: 28 | - "%PYTHON%\\python.exe setup.py bdist_wheel" 29 | - "%PYTHON%\\python.exe -m PyInstaller --clean --onefile --console --nowindowed --name img2pdf src/img2pdf.py" 30 | #- "%PYTHON%\\python.exe -m PyInstaller --clean --onefile --noconsole --windowed --name img2pdf_windowed src/img2pdf.py" 31 | 32 | artifacts: 33 | - path: dist\* 34 | -------------------------------------------------------------------------------- /screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josch/img2pdf/bb188a3eaf7d956b82f7f9a18bbda774301c586f/screenshot.png -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from setuptools import setup 3 | 4 | VERSION = "0.6.1" 5 | 6 | INSTALL_REQUIRES = ( 7 | "Pillow", 8 | "pikepdf", 9 | ) 10 | 11 | setup( 12 | name="img2pdf", 13 | version=VERSION, 14 | author="Johannes Schauer Marin Rodrigues", 15 | author_email="josch@mister-muffin.de", 16 | description="Convert images to PDF via direct JPEG inclusion.", 17 | long_description=open("README.md").read(), 18 | long_description_content_type="text/markdown", 19 | license="LGPL", 20 | keywords="jpeg pdf converter", 21 | classifiers=[ 22 | "Development Status :: 5 - Production/Stable", 23 | "Intended Audience :: Developers", 24 | "Intended Audience :: Other Audience", 25 | "Environment :: Console", 26 | "Programming Language :: Python", 27 | "Programming Language :: Python :: 3", 28 | "Programming Language :: Python :: 3.5", 29 | "Programming Language :: Python :: Implementation :: CPython", 30 | "Programming Language :: Python :: Implementation :: PyPy", 31 | "License :: OSI Approved :: GNU Lesser General Public License v3 " "(LGPLv3)", 32 | "Natural Language :: English", 33 | "Operating System :: OS Independent", 34 | ], 35 | url="https://gitlab.mister-muffin.de/josch/img2pdf", 36 | download_url="https://gitlab.mister-muffin.de/josch/img2pdf/repository/" 37 | "archive.tar.gz?ref=" + VERSION, 38 | package_dir={"": "src"}, 39 | py_modules=["img2pdf", "jp2"], 40 | include_package_data=True, 41 | zip_safe=True, 42 | install_requires=INSTALL_REQUIRES, 43 | extras_require={ 44 | "gui": ("tkinter"), 45 | }, 46 | entry_points={ 47 | "setuptools.installation": ["eggsecutable = img2pdf:main"], 48 | "console_scripts": ["img2pdf = img2pdf:main"], 49 | "gui_scripts": ["img2pdf-gui = img2pdf:gui"], 50 | }, 51 | ) 52 | -------------------------------------------------------------------------------- /src/jp2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Copyright (C) 2013 Johannes Schauer Marin Rodrigues 4 | # 5 | # this module is heavily based upon jpylyzer which is 6 | # KB / National Library of the Netherlands, Open Planets Foundation 7 | # and released under the same license conditions 8 | # 9 | # This program is free software: you can redistribute it and/or modify 10 | # it under the terms of the GNU Lesser General Public License as published by 11 | # the Free Software Foundation, either version 3 of the License, or 12 | # (at your option) any later version. 13 | # 14 | # This program is distributed in the hope that it will be useful, 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | # GNU Lesser General Public License for more details. 18 | # 19 | # You should have received a copy of the GNU Lesser General Public License 20 | # along with this program. If not, see . 21 | 22 | import struct 23 | 24 | 25 | def getBox(data, byteStart, noBytes): 26 | boxLengthValue = struct.unpack(">I", data[byteStart : byteStart + 4])[0] 27 | boxType = data[byteStart + 4 : byteStart + 8] 28 | contentsStartOffset = 8 29 | if boxLengthValue == 1: 30 | boxLengthValue = struct.unpack(">Q", data[byteStart + 8 : byteStart + 16])[0] 31 | contentsStartOffset = 16 32 | if boxLengthValue == 0: 33 | boxLengthValue = noBytes - byteStart 34 | byteEnd = byteStart + boxLengthValue 35 | boxContents = data[byteStart + contentsStartOffset : byteEnd] 36 | return (boxLengthValue, boxType, byteEnd, boxContents) 37 | 38 | 39 | def parse_ihdr(data): 40 | height, width, channels, bpp = struct.unpack(">IIHB", data[:11]) 41 | return width, height, channels, bpp + 1 42 | 43 | 44 | def parse_colr(data): 45 | meth = struct.unpack(">B", data[0:1])[0] 46 | if meth != 1: 47 | raise Exception("only enumerated color method supported") 48 | enumCS = struct.unpack(">I", data[3:])[0] 49 | if enumCS == 16: 50 | return "RGB" 51 | elif enumCS == 17: 52 | return "L" 53 | else: 54 | raise Exception( 55 | "only sRGB and greyscale color space is supported, " "got %d" % enumCS 56 | ) 57 | 58 | 59 | def parse_resc(data): 60 | hnum, hden, vnum, vden, hexp, vexp = struct.unpack(">HHHHBB", data) 61 | hdpi = ((hnum / hden) * (10**hexp) * 100) / 2.54 62 | vdpi = ((vnum / vden) * (10**vexp) * 100) / 2.54 63 | return hdpi, vdpi 64 | 65 | 66 | def parse_res(data): 67 | hdpi, vdpi = None, None 68 | noBytes = len(data) 69 | byteStart = 0 70 | boxLengthValue = 1 # dummy value for while loop condition 71 | while byteStart < noBytes and boxLengthValue != 0: 72 | boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes) 73 | if boxType == b"resc": 74 | hdpi, vdpi = parse_resc(boxContents) 75 | break 76 | return hdpi, vdpi 77 | 78 | 79 | def parse_jp2h(data): 80 | width, height, colorspace, hdpi, vdpi = None, None, None, None, None 81 | noBytes = len(data) 82 | byteStart = 0 83 | boxLengthValue = 1 # dummy value for while loop condition 84 | while byteStart < noBytes and boxLengthValue != 0: 85 | boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes) 86 | if boxType == b"ihdr": 87 | width, height, channels, bpp = parse_ihdr(boxContents) 88 | elif boxType == b"colr": 89 | colorspace = parse_colr(boxContents) 90 | elif boxType == b"res ": 91 | hdpi, vdpi = parse_res(boxContents) 92 | byteStart = byteEnd 93 | return (width, height, colorspace, hdpi, vdpi, channels, bpp) 94 | 95 | 96 | def parsejp2(data): 97 | noBytes = len(data) 98 | byteStart = 0 99 | boxLengthValue = 1 # dummy value for while loop condition 100 | width, height, colorspace, hdpi, vdpi = None, None, None, None, None 101 | while byteStart < noBytes and boxLengthValue != 0: 102 | boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes) 103 | if boxType == b"jp2h": 104 | width, height, colorspace, hdpi, vdpi, channels, bpp = parse_jp2h( 105 | boxContents 106 | ) 107 | break 108 | byteStart = byteEnd 109 | if not width: 110 | raise Exception("no width in jp2 header") 111 | if not height: 112 | raise Exception("no height in jp2 header") 113 | if not colorspace: 114 | raise Exception("no colorspace in jp2 header") 115 | # retrieving the dpi is optional so we do not error out if not present 116 | return (width, height, colorspace, hdpi, vdpi, channels, bpp) 117 | 118 | 119 | def parsej2k(data): 120 | lsiz, rsiz, xsiz, ysiz, xosiz, yosiz, _, _, _, _, csiz = struct.unpack( 121 | ">HHIIIIIIIIH", data[4:42] 122 | ) 123 | ssiz = [None] * csiz 124 | xrsiz = [None] * csiz 125 | yrsiz = [None] * csiz 126 | for i in range(csiz): 127 | ssiz[i], xrsiz[i], yrsiz[i] = struct.unpack( 128 | "BBB", data[42 + 3 * i : 42 + 3 * (i + 1)] 129 | ) 130 | assert ssiz == [7, 7, 7] 131 | return xsiz - xosiz, ysiz - yosiz, None, None, None, csiz, 8 132 | 133 | 134 | def parse(data): 135 | if data[:4] == b"\xff\x4f\xff\x51": 136 | return parsej2k(data) 137 | else: 138 | return parsejp2(data) 139 | 140 | 141 | if __name__ == "__main__": 142 | import sys 143 | 144 | width, height, colorspace, hdpi, vdpi, channels, bpp = parse( 145 | open(sys.argv[1], "rb").read() 146 | ) 147 | print("width = %d" % width) 148 | print("height = %d" % height) 149 | print("colorspace = %s" % colorspace) 150 | print("hdpi = %s" % hdpi) 151 | print("vdpi = %s" % vdpi) 152 | print("channels = %s" % channels) 153 | print("bpp = %s" % bpp) 154 | -------------------------------------------------------------------------------- /src/tests/input/CMYK.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josch/img2pdf/bb188a3eaf7d956b82f7f9a18bbda774301c586f/src/tests/input/CMYK.jpg -------------------------------------------------------------------------------- /src/tests/input/CMYK.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josch/img2pdf/bb188a3eaf7d956b82f7f9a18bbda774301c586f/src/tests/input/CMYK.tif -------------------------------------------------------------------------------- /src/tests/input/animation.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josch/img2pdf/bb188a3eaf7d956b82f7f9a18bbda774301c586f/src/tests/input/animation.gif -------------------------------------------------------------------------------- /src/tests/input/gray.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josch/img2pdf/bb188a3eaf7d956b82f7f9a18bbda774301c586f/src/tests/input/gray.png -------------------------------------------------------------------------------- /src/tests/input/mono.jb2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josch/img2pdf/bb188a3eaf7d956b82f7f9a18bbda774301c586f/src/tests/input/mono.jb2 -------------------------------------------------------------------------------- /src/tests/input/mono.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josch/img2pdf/bb188a3eaf7d956b82f7f9a18bbda774301c586f/src/tests/input/mono.png -------------------------------------------------------------------------------- /src/tests/input/mono.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josch/img2pdf/bb188a3eaf7d956b82f7f9a18bbda774301c586f/src/tests/input/mono.tif -------------------------------------------------------------------------------- /src/tests/input/normal.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josch/img2pdf/bb188a3eaf7d956b82f7f9a18bbda774301c586f/src/tests/input/normal.jpg -------------------------------------------------------------------------------- /src/tests/input/normal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josch/img2pdf/bb188a3eaf7d956b82f7f9a18bbda774301c586f/src/tests/input/normal.png -------------------------------------------------------------------------------- /src/tests/output/CMYK.jpg.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josch/img2pdf/bb188a3eaf7d956b82f7f9a18bbda774301c586f/src/tests/output/CMYK.jpg.pdf -------------------------------------------------------------------------------- /src/tests/output/CMYK.tif.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josch/img2pdf/bb188a3eaf7d956b82f7f9a18bbda774301c586f/src/tests/output/CMYK.tif.pdf -------------------------------------------------------------------------------- /src/tests/output/animation.gif.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josch/img2pdf/bb188a3eaf7d956b82f7f9a18bbda774301c586f/src/tests/output/animation.gif.pdf -------------------------------------------------------------------------------- /src/tests/output/gray.png.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josch/img2pdf/bb188a3eaf7d956b82f7f9a18bbda774301c586f/src/tests/output/gray.png.pdf -------------------------------------------------------------------------------- /src/tests/output/mono.jb2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josch/img2pdf/bb188a3eaf7d956b82f7f9a18bbda774301c586f/src/tests/output/mono.jb2.pdf -------------------------------------------------------------------------------- /src/tests/output/mono.png.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josch/img2pdf/bb188a3eaf7d956b82f7f9a18bbda774301c586f/src/tests/output/mono.png.pdf -------------------------------------------------------------------------------- /src/tests/output/mono.tif.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josch/img2pdf/bb188a3eaf7d956b82f7f9a18bbda774301c586f/src/tests/output/mono.tif.pdf -------------------------------------------------------------------------------- /src/tests/output/normal.jpg.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josch/img2pdf/bb188a3eaf7d956b82f7f9a18bbda774301c586f/src/tests/output/normal.jpg.pdf -------------------------------------------------------------------------------- /src/tests/output/normal.png.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/josch/img2pdf/bb188a3eaf7d956b82f7f9a18bbda774301c586f/src/tests/output/normal.png.pdf -------------------------------------------------------------------------------- /test_comp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | if [ $# -ne 1 ]; then 4 | echo "usage: $0 image" 5 | exit 6 | fi 7 | 8 | echo "converting image to pdf, trying all compressions imagemagick has to offer" 9 | echo "if, as a result, Zip/FlateDecode should NOT be the lossless compression with the lowest size ratio, contact me j [dot] schauer [at] email [dot] de" 10 | echo "also, send me the image in question" 11 | echo 12 | 13 | imsize=`stat -c "%s" "$1"` 14 | 15 | for a in `convert -list compress`; do 16 | echo "encode:\t$a" 17 | convert "$1" -compress $a "`basename $1 .jpg`.pdf" 18 | pdfimages "`basename $1 .jpg`.pdf" "`basename $1 .jpg`" 19 | printf "diff:\t" 20 | diff=`compare -metric AE "$1" "\`basename $1 .jpg\`-000.ppm" null: 2>&1` 21 | if [ "$diff" != "0" ]; then 22 | echo "lossy" 23 | else 24 | echo "lossless" 25 | fi 26 | printf "size:\t" 27 | pdfsize=`stat -c "%s" "\`basename $1 .jpg\`.pdf"` 28 | echo "scale=1;$pdfsize/$imsize" | bc 29 | printf "pdf:\t" 30 | grep --max-count=1 --text /Filter "`basename $1 .jpg`.pdf" 31 | echo 32 | done 33 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | # tox (https://tox.readthedocs.io/) is a tool for running tests 2 | # in multiple virtualenvs. This configuration file will run the 3 | # test suite on all supported python versions. To use it, "pip install tox" 4 | # and then run "tox" from this directory. 5 | 6 | [tox] 7 | envlist = py37, py38, py39, py310 8 | skip_missing_interpreters = true 9 | 10 | [testenv] 11 | deps = 12 | pdfrw 13 | pytest 14 | pikepdf 15 | numpy 16 | scipy 17 | commands = 18 | python -m pytest -vv 19 | --------------------------------------------------------------------------------