├── .github └── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── feature_request.md │ └── general-purpose.md ├── .gitignore ├── .vs ├── ProjectSettings.json ├── PyMuPDF │ └── v15 │ │ ├── .suo │ │ └── Browse.VC.db ├── VSWorkspaceState.json └── slnx.sqlite ├── COPYING ├── GNU AFFERO GPL V3 ├── PKG-INFO ├── README.md ├── debian ├── changelog ├── control ├── copyright ├── patches │ ├── docs │ ├── fiximport │ ├── libs │ └── series ├── python3-fitz.lintian-overrides ├── rules ├── salsa-ci.yml ├── source │ └── format ├── tests │ └── control └── watch ├── demo └── pymupdf.jpg ├── docs ├── PyMuPDF.ico ├── algebra.rst ├── annot.rst ├── app1.rst ├── app2.rst ├── app3.rst ├── app4.rst ├── changes.rst ├── classes.rst ├── colors.rst ├── colorspace.rst ├── conf.py ├── coop_low.rst ├── device.rst ├── displaylist.rst ├── document.rst ├── faq.rst ├── font.rst ├── functions.rst ├── glossary.rst ├── identity.rst ├── images │ ├── img-4up.png │ ├── img-7edges.png │ ├── img-a-is--1.png │ ├── img-adobe.png │ ├── img-alpha-0.png │ ├── img-alpha-1.png │ ├── img-annots.jpg │ ├── img-attach-result.jpg │ ├── img-b-is-0.5.png │ ├── img-binsetupdirs.png │ ├── img-breadth.png │ ├── img-c-is-0.5.png │ ├── img-cake.png │ ├── img-caret-annot.jpg │ ├── img-circle.png │ ├── img-clip.jpg │ ├── img-colordb.png │ ├── img-copy-speed-1.png │ ├── img-copy-speed-2.png │ ├── img-d-is--1.png │ ├── img-drawBezier.png │ ├── img-drawCurve.png │ ├── img-drawSector1.png │ ├── img-drawSector2.png │ ├── img-drawcircle.jpg │ ├── img-drawquad.jpg │ ├── img-e-is-100.png │ ├── img-embed-progress.jpg │ ├── img-encoding.jpg │ ├── img-encrypting.jpg │ ├── img-even-odd.png │ ├── img-extract-imga.jpg │ ├── img-extract-imgb.jpg │ ├── img-f-is-100.png │ ├── img-filesizes.png │ ├── img-freetext.jpg │ ├── img-import-progress.jpg │ ├── img-inkannot.jpg │ ├── img-inserttext.jpg │ ├── img-markedpdf.jpg │ ├── img-markers.jpg │ ├── img-matrix.png │ ├── img-opacity.jpg │ ├── img-original.png │ ├── img-pdfjoiner.jpg │ ├── img-pdftext.jpg │ ├── img-planish.png │ ├── img-point-unit.jpg │ ├── img-polyline.png │ ├── img-posterize.png │ ├── img-pymupdf.jpg │ ├── img-quads.jpg │ ├── img-redact.jpg │ ├── img-render-speed.png │ ├── img-rendermode.jpg │ ├── img-rot+morph.png │ ├── img-rot-60.png │ ├── img-rotate.png │ ├── img-showpdfpage.jpg │ ├── img-sierpinski.png │ ├── img-squiggly.png │ ├── img-stampannot.jpg │ ├── img-stencil.jpg │ ├── img-symbols.jpg │ ├── img-target.png │ ├── img-textbox.jpg │ ├── img-textboxtract.png │ ├── img-textmarker.jpg │ ├── img-textmethods.png │ ├── img-textpage-char.png │ ├── img-textpage.png │ ├── img-textperformance.png │ ├── img-timings.png │ ├── img-writeimage.png │ └── mupdf-icons.jpg ├── index.rst ├── installation.rst ├── intro.rst ├── irect.rst ├── kerning.style ├── link.rst ├── linkdest.rst ├── lowlevel.rst ├── make-bold.py ├── matrix.rst ├── module.rst ├── multiprocess-gui.py ├── multiprocess-render.py ├── new-annots.py ├── outline.rst ├── page.rst ├── pixmap.rst ├── point.rst ├── pymupdf-logo.jpg ├── quad.rst ├── rect.rst ├── replace-fonts.py ├── shape.rst ├── text-lister.py ├── textpage.rst ├── textwriter.rst ├── tools.rst ├── tutorial.rst ├── vars.rst ├── version.rst ├── wheelnames.txt └── widget.rst ├── fitz ├── __init__.py ├── __main__.py ├── fitz.i ├── helper-annot.i ├── helper-convert.i ├── helper-defines.i ├── helper-fields.i ├── helper-geo-c.i ├── helper-geo-py.i ├── helper-other.i ├── helper-pdfinfo.i ├── helper-pixmap.i ├── helper-portfolio.i ├── helper-python.i ├── helper-select.i ├── helper-stext.i ├── helper-xobject.i ├── utils.py └── version.i ├── installation ├── .DS_Store ├── centos │ └── centos_pymupdf.sh ├── freebsd │ └── freebsd_pymupdf.sh └── ubuntu │ └── ubuntu_pymupdf.sh └── setup.py /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: JorjMcKie 7 | 8 | --- 9 | 10 | _**Please provide all mandatory information!**_ 11 | 12 | ## Describe the bug (mandatory) 13 | A clear and concise description of what the bug is. 14 | 15 | ## To Reproduce (mandatory) 16 | Explain the steps to reproduce the behavior, For example, include a minimal code snippet, example files, etc. 17 | 18 | ## Expected behavior (optional) 19 | Describe what you expected to happen (if not obvious). 20 | 21 | ## Screenshots (optional) 22 | If applicable, add screenshots to help explain your problem. 23 | 24 | ## Your configuration (mandatory) 25 | - Operating system, potentially version and bitness 26 | - Python version, bitness 27 | - PyMuPDF version, installation method (**wheel** or **generated** from source). 28 | 29 | For example, the output of `print(sys.version, "\n", sys.platform, "\n", fitz.__doc__)` would be sufficient (for the first two bullets). 30 | 31 | ## Additional context (optional) 32 | Add any other context about the problem here. 33 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement 6 | assignees: JorjMcKie 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Potentially add an issue reference. 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | Are there several options for how your request could be met? 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/general-purpose.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: General Purpose 3 | about: Use this form for questions, comments, etc. 4 | title: 'Question / Comment:' 5 | labels: question 6 | assignees: JorjMcKie 7 | 8 | --- 9 | 10 | 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.so 3 | *.o 4 | *.swp 5 | build/ 6 | demo/README.rst 7 | -------------------------------------------------------------------------------- /.vs/ProjectSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "CurrentProjectSetting": "Keine Konfigurationen" 3 | } -------------------------------------------------------------------------------- /.vs/PyMuPDF/v15/.suo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/.vs/PyMuPDF/v15/.suo -------------------------------------------------------------------------------- /.vs/PyMuPDF/v15/Browse.VC.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/.vs/PyMuPDF/v15/Browse.VC.db -------------------------------------------------------------------------------- /.vs/VSWorkspaceState.json: -------------------------------------------------------------------------------- 1 | { 2 | "ExpandedNodes": [ 3 | "" 4 | ], 5 | "SelectedNode": "\\README.md", 6 | "PreviewInSolutionExplorer": false 7 | } -------------------------------------------------------------------------------- /.vs/slnx.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/.vs/slnx.sqlite -------------------------------------------------------------------------------- /PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 1.1 2 | Name: PyMuPDF 3 | Version: 1.17.4 4 | Author: Ruikai Liu 5 | Author-email: lrk700@gmail.com 6 | Maintainer: Jorj X. McKie 7 | Maintainer-email: jorj.x.mckie@outlook.de 8 | Home-page: https://github.com/pymupdf/PyMuPDF 9 | Download-url: https://github.com/pymupdf/PyMuPDF 10 | Summary: PyMuPDF is a Python binding for the PDF rendering library MuPDF 11 | Description: 12 | Release date: July 31, 2020 13 | 14 | Authors 15 | ======= 16 | 17 | * Jorj X. McKie 18 | * Ruikai Liu 19 | 20 | Introduction 21 | ============ 22 | 23 | This is **version 1.17.4 of PyMuPDF**, a Python binding for `MuPDF `_ - "a lightweight PDF and XPS viewer". 24 | 25 | MuPDF can access files in PDF, XPS, OpenXPS, epub, comic and fiction book formats, and it is known for both, its top performance and high rendering quality. 26 | 27 | With PyMuPDF you therefore can access files with extensions ``*.pdf``, ``*.xps``, ``*.oxps``, ``*.epub``, ``*.cbz`` or ``*.fb2`` from your Python scripts. A number of popular image formats is supported as well, including multi-page TIFF images. 28 | 29 | PyMuPDF should run on all platforms that are supported by both, MuPDF and Python. These include, but are not limited to, Windows (XP/SP2 and up), Mac OSX and Linux, 32-bit or 64-bit. If you can generate MuPDF on a Python supported platform, then also PyMuPDF can be used there. 30 | 31 | PyMuPDF is hosted on `GitHub `_ where you find up-to-date information of its features, our `issue tracker `_, `Wikis `_ and much more. 32 | 33 | Installation 34 | ============ 35 | 36 | For all MS Windows versions as well as popular Max OSX and Linux versions, we are providing Python wheels - see the download section of this site and the current `release directory `_ of our home page. Other platforms need to download and generate the MuPDF library first and then set up PyMuPDF. Do visit our GitHub home, which has more details on this, including latest bugfixes, pre-releases, etc. 37 | 38 | Usage and Documentation 39 | ======================== 40 | 41 | For all document types you can render pages in raster (PNG) or vector (SVG) formats, extract text and access meta information, links, annotations and bookmarks, as well as decrypt the document. For PDF files, these objects can also be created, modified or deleted. Plus you can rotate, re-arrange, duplicate, create, or delete pages and join or split documents. 42 | 43 | Starting with version 1.16.0, PDF password protection is **fully supported**: passwords, encryption methods and permission levels can be set, changed or removed. 44 | 45 | Specifically for PDF files, PyMuPDF provides update access to low-level structure information, supports handling of embedded files and modification of page contents (like inserting images, fonts, text, annotations and drawings). 46 | 47 | Other features include embedding vector images (SVG, PDF) such as logos or watermarks, joining or splitting single PDF pages (including things like posterizing and 2-up / 4-up processing). 48 | 49 | You can also create **PDF Form fields** with support for text, checkbox, listbox and combobox widgets. 50 | 51 | Our home page provides many examples and How-Tos for all of this. At a minimum, read the tutorial and the the recipes sections of our documentation. 52 | 53 | Written using **Sphinx**, documentation is available here: 54 | 55 | * View it online at `Read The Docs `_. For **best quality downloads**, use the following links. 56 | 57 | * `HTML `_ 58 | 59 | * `Windows CHM `_ 60 | 61 | * `PDF `_ 62 | 63 | 64 | Classifier: Development Status :: 5 - Production/Stable 65 | Classifier: Environment :: Console 66 | Classifier: Intended Audience :: Developers 67 | Classifier: License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+) 68 | Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+) 69 | Classifier: Operating System :: MacOS 70 | Classifier: Operating System :: Microsoft :: Windows 71 | Classifier: Operating System :: POSIX :: Linux 72 | Classifier: Programming Language :: C 73 | Classifier: Programming Language :: Python :: 2.7 74 | Classifier: Programming Language :: Python :: 3 75 | Classifier: Programming Language :: Python :: 3.4 76 | Classifier: Programming Language :: Python :: 3.5 77 | Classifier: Programming Language :: Python :: 3.6 78 | Classifier: Programming Language :: Python :: 3.7 79 | Classifier: Programming Language :: Python :: 3.8 80 | Classifier: Topic :: Utilities 81 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyMuPDF 1.17.4 2 | 3 | ![logo](https://github.com/pymupdf/PyMuPDF/blob/master/demo/pymupdf.jpg) 4 | 5 | Release date: July 31, 2020 6 | 7 | **Travis-CI:** [![Build Status](https://travis-ci.org/JorjMcKie/py-mupdf.svg?branch=master)](https://travis-ci.org/JorjMcKie/py-mupdf) 8 | 9 | On **[PyPI](https://pypi.org/project/PyMuPDF)** since August 2016: [![](https://pepy.tech/badge/pymupdf)](https://pepy.tech/project/pymupdf) 10 | 11 | # Authors 12 | * [Jorj X. McKie](mailto:jorj.x.mckie@outlook.de) 13 | * [Ruikai Liu](mailto:lrk700@gmail.com) 14 | 15 | # Introduction 16 | 17 | This is **version 1.17.4 of PyMuPDF**, a Python binding with support for [MuPDF 1.17.*](http://mupdf.com/) - "a lightweight PDF, XPS, and E-book viewer". 18 | 19 | MuPDF can access files in PDF, XPS, OpenXPS, CBZ, EPUB and FB2 (e-books) formats, and it is known for its top performance and high rendering quality. 20 | 21 | With PyMuPDF you can access files with extensions like ".pdf", ".xps", ".oxps", ".cbz", ".fb2" or ".epub". In addition, about 10 popular image formats can also be opened and handled like documents. 22 | 23 | 24 | # Usage and Documentation 25 | For all supported document types (i.e. **_including images_**) you can 26 | * decrypt the document 27 | * access meta information, links and bookmarks 28 | * render pages in raster formats (PNG and some others), or the vector format SVG 29 | * search for text 30 | * extract text and images 31 | * convert to other formats: PDF, (X)HTML, XML, JSON, text 32 | 33 | > To some degree, PyMuPDF can therefore be used as an [image converter](https://github.com/pymupdf/PyMuPDF/wiki/How-to-Convert-Images): it can read a range of input formats and can produce **Portable Network Graphics (PNG)**, **Portable Anymaps** (**PNM**, etc.), **Portable Arbitrary Maps (PAM)**, **Adobe Postscript** and **Adobe Photoshop** documents, making the use of other graphics packages obselete in these cases. But interfacing with e.g. PIL/Pillow for image input and output is easy as well. 34 | 35 | **PDF documents** can be created, joined or split up. Pages can be inserted, deleted, re-arranged or modified in many ways (including annotations and form fields). 36 | 37 | * Images and fonts can be extracted or inserted. 38 | * Embedded files are fully supported. 39 | * PDFs can be reformatted to support double-sided printing, posterizing, applying logos or watermarks 40 | * Password protection is fully supported: decryption, encryption, encryption method selection, permmission level and user / owner password setting. 41 | * Low-level PDF structures can be accessed and modified. 42 | * PyMuPDF can also be used as a **module in the command line** using ``"python -m fitz ..."``. This is a versatile utility, which we will further develop going forward. It currently supports PDF document 43 | 44 | - **encryption / decryption / optimization** 45 | - creating **sub-documents** 46 | - document **joining** 47 | - **image / font extraction** 48 | - full support of **embedded files**. 49 | 50 | 51 | Have a look at the basic [demos](https://github.com/pymupdf/PyMuPDF-Utilities/tree/master/demo), the [examples](https://github.com/pymupdf/PyMuPDF-Utilities/tree/master/examples) (which contain complete, working programs), and the **recipes** section of our [Wiki](https://github.com/pymupdf/PyMuPDF/wiki) sidebar, which contains more than a dozen of guides in How-To-style. 52 | 53 | Our **documentation**, written using Sphinx, is available in various formats from the following sources. It currently is a combination of a reference guide and a user manual. For a **quick start** look at the [tutorial](https://pymupdf.readthedocs.io/en/latest/tutorial/) and the [recipes](https://pymupdf.readthedocs.io/en/latest/faq/) chapters. 54 | 55 | * You can view it online at [Read the Docs](https://readthedocs.org/projects/pymupdf/). This site also provides download options for zipped HTML and PDF. 56 | * Find a Windows help file [here](https://github.com/pymupdf/PyMuPDF-optional-material/tree/master/doc/PyMuPDF.chm). 57 | 58 | 59 | # Installation 60 | 61 | For the major **Windows** and (thanks to our user **@jbarlow83**!) **Mac OSX** or **Linux** versions we offer wheels in the [download section of PyPI](https://pypi.org/project/PyMuPDF/#files). This includes Python 2.7 and version Python 3.5 through 3.8. 62 | 63 | For other Python versions or operating systems you need to generate PyMuPDF yourself as follows. This should work for all platforms which support Python and MuPDF. In any case you need the development version of Python. 64 | 65 | To do this, you must download and generate MuPDF. This process depends very much on your system. For most platforms, the MuPDF source contains prepared procedures for achieving this. Please observe the following general steps: 66 | 67 | * Be sure to download the official MuPDF source release from [here](https://mupdf.com/downloads/archive). 68 | 69 | * Do **not use** MuPDF's [GitHub repo](https://github.com/ArtifexSoftware/mupdf). It contains their current **development source**, which is **not compatible** with this PyMuPDF version. 70 | 71 | * This repo's `fitz` folder contains one or more files whose names start with a single underscore `"_"`. These files contain configuration data and hotfixes. Each one must be copy-renamed to its correct target location **inside the MuPDF source** that you have downloaded, **before you generate MuPDF**. Currently, these files are: 72 | - fitz configuration file `_config.h` copy-replace to: `mupdf/include/mupdf/fitz/config.h`. It contains configuration data like e.g. which fonts to support. 73 | 74 | - Now MuPDF can be generated. 75 | 76 | * Since PyMuPDF v1.14.17, the sources provided in this repository **no longer contain** the interface files ``fitz.py`` and ``fitz.wrap.c`` - they are instead generated **"on the fly"** by ``setup.py`` using the interface generator [SWIG](http://www.swig.org/). So you need SWIG being installed on your system. Please refer to issue #312 for some background. 77 | - PyMuPDF wheels have been generated using **SWIG v4.0.1**. 78 | 79 | 80 | * If you do **not use SWIG**, please download the **sources from PyPI** - they continue to contain those generated files, so installation should work like any other Python extension generation on your system. 81 | 82 | Once this is done, adjust directories in ``setup.py`` and run ``python setup.py install``. 83 | 84 | The following sections contain further comments for some platforms. 85 | 86 | ## Ubuntu 87 | Our users (thanks to **@gileadslostson** and **@jbarlow83**!) have documented their MuPDF installation experiences from sources in this [Wiki page](https://github.com/pymupdf/PyMuPDF/wiki/Ubuntu-Installation-Experience). 88 | 89 | ## OSX 90 | First, install the MuPDF headers and libraries, which are provided by mupdf-tools: ``brew install mupdf-tools``. 91 | 92 | Then you might need to ``export ARCHFLAGS='-arch x86_64'``, since ``libmupdf.a`` is for x86_64 only. 93 | 94 | Finally, please double check ``setup.py`` before building. Update ``include_dirs`` and ``library_dirs`` if necessary. 95 | 96 | ## MS Windows 97 | If you are looking to make your own binary, consult this [Wiki page](https://github.com/pymupdf/PyMuPDF/wiki/Windows-Binaries-Generation). It explains how to use Visual Studio for generating MuPDF in quite some detail. 98 | 99 | # Earlier Versions 100 | Earlier versions are available in the [releases](https://github.com/pymupdf/PyMuPDF/releases) directory. 101 | 102 | # License 103 | PyMuPDF is distributed under GNU GPL V3. Because you will implicitely also be using MuPDF, its license GNU AFFERO GPL V3 applies as well. Copies of both are included in this repository. 104 | 105 | # Contact 106 | Please submit questions, comments or issues [here](https://github.com/pymupdf/PyMuPDF/issues), or directly contact the authors via their e-mail addresses. 107 | -------------------------------------------------------------------------------- /debian/changelog: -------------------------------------------------------------------------------- 1 | pymupdf (1.17.4+ds1-1~np1) unstable; urgency=medium 2 | 3 | [ Bastian Germann ] 4 | * Use debhelper provided python3:Provides 5 | * Set Built-Using according to Policy 7.8 6 | * Update upstream copyright info 7 | * New upstream version 1.16.17+ds1 8 | * Refresh docs patch 9 | 10 | [ Norbert Preining ] 11 | * New upstream releases. 12 | * Bump B-D of libmupdf-dev 13 | 14 | -- Norbert Preining Wed, 29 Jul 2020 12:48:09 +0900 15 | 16 | pymupdf (1.16.11-1) unstable; urgency=medium 17 | 18 | [ Johannes 'josch' Schauer ] 19 | * New upstream version 1.16.11 (closes: #950639) 20 | * Bump Standards-Version to 4.5.0 21 | * add debian/salsa-ci.yml 22 | * debian/control: b-d on libpython3-all-dev instead of libpython3-dev 23 | * debian/copyright: remove unused files from Files-Excluded 24 | * add autopkgtest 25 | * add patch fiximport 26 | * debian/control: add Rules-Requires-Root: no 27 | * debian/tests/control: chdir to / to not use fitz module from unpacked 28 | sources 29 | * add debian/python3-fitz.lintian-overrides 30 | * debian/watch: add repacksuffix 31 | * debian/watch: don't run uupdate 32 | 33 | [ Bastian Germann ] 34 | * Drop non-existing examples 35 | * Exclude 1.16.11 files 36 | * Add docs patch 37 | 38 | -- Johannes 'josch' Schauer Sun, 23 Feb 2020 21:05:36 +0100 39 | 40 | pymupdf (1.14.16-1) unstable; urgency=medium 41 | 42 | * Initial release. (Closes: #930761) 43 | 44 | -- Johannes 'josch' Schauer Sat, 22 Jun 2019 04:02:32 +0200 45 | -------------------------------------------------------------------------------- /debian/control: -------------------------------------------------------------------------------- 1 | Source: pymupdf 2 | Section: python 3 | Priority: optional 4 | Maintainer: Debian Python Modules Team 5 | Uploaders: Johannes 'josch' Schauer 6 | Homepage: https://github.com/pymupdf/PyMuPDF 7 | Vcs-Browser: https://salsa.debian.org/python-team/modules/pymupdf 8 | Vcs-Git: https://salsa.debian.org/python-team/modules/pymupdf.git 9 | Standards-Version: 4.5.0 10 | Build-Depends: debhelper-compat (= 12), dh-python, python3-setuptools, python3-all, libpython3-all-dev, libmupdf-dev (>= 1.17.0), libjbig2dec-dev, libjpeg-dev, libfreetype6-dev, libpng-dev, libopenjp2-7-dev, libharfbuzz-dev, swig, libmujs-dev 11 | Rules-Requires-Root: no 12 | 13 | Package: python3-fitz 14 | Architecture: any 15 | Depends: ${shlibs:Depends}, ${misc:Depends}, ${python3:Depends} 16 | Provides: ${python3:Provides} 17 | Built-Using: ${Built-Using} 18 | Description: Python binding for MuPDF 19 | Allows one to access files in PDF, XPS, OpenXPS, CBZ, EPUB, and FB2 (e-books) 20 | formats, and it is known for its top performance and high rendering quality. 21 | . 22 | PDF manipulation and generation functions are available, including metadata 23 | and bookmark maintenance, document restructuring, annotation / link handling 24 | and document or page creation. 25 | -------------------------------------------------------------------------------- /debian/copyright: -------------------------------------------------------------------------------- 1 | Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ 2 | Upstream-Name: PyMuPDF 3 | Upstream-Contact: Jorj X. McKie 4 | Source: https://github.com/pymupdf/PyMuPDF/ 5 | Files-Excluded: docs/_static 6 | fitz/_config.h 7 | 8 | # upstream's clarification that indeed all material is GPL-3+ can be 9 | # found in this comment and the ones that follow: 10 | # https://github.com/pymupdf/PyMuPDF/issues/312#issuecomment-504641426 11 | Files: * 12 | Copyright: 2012-2018 Ruikai Liu 13 | 2015-2020 Jorj X. McKie 14 | License: GPL-3+ 15 | 16 | Files: debian/* 17 | Copyright: 2019 Johannes 'josch' Schauer 18 | License: GPL-3+ 19 | 20 | License: GPL-3+ 21 | This program is free software; you can redistribute it 22 | and/or modify it under the terms of the GNU General Public 23 | License as published by the Free Software Foundation; either 24 | version 3 of the License, or (at your option) any later 25 | version. 26 | . 27 | This program is distributed in the hope that it will be 28 | useful, but WITHOUT ANY WARRANTY; without even the implied 29 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 30 | PURPOSE. See the GNU General Public License for more 31 | details. 32 | . 33 | You should have received a copy of the GNU General Public 34 | License along with this package; if not, write to the Free 35 | Software Foundation, Inc., 51 Franklin St, Fifth Floor, 36 | Boston, MA 02110-1301 USA 37 | . 38 | On Debian systems, the full text of the GNU General Public 39 | License version 3 can be found in the file 40 | `/usr/share/common-licenses/GPL-3'. 41 | -------------------------------------------------------------------------------- /debian/patches/docs: -------------------------------------------------------------------------------- 1 | Description: [PATCH] Prevent docs build warnings 2 | 3 | diff --git a/docs/conf.py b/docs/conf.py 4 | index 1175edf..e726f8d 100644 5 | --- a/docs/conf.py 6 | +++ b/docs/conf.py 7 | @@ -128,12 +128,12 @@ html_theme_options = { 8 | # The name of an image file (within the static path) to use as favicon of the 9 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 10 | # pixels large. 11 | -html_favicon = "Pymupdf.ico" 12 | +html_favicon = "PyMuPDF.ico" 13 | 14 | # Add any paths that contain custom static files (such as style sheets) here, 15 | # relative to this directory. They are copied after the builtin static files, 16 | # so a file named "default.css" will overwrite the builtin "default.css". 17 | -html_static_path = ["_static"] 18 | +html_static_path = [] 19 | 20 | # Add any extra paths that contain custom files (such as robots.txt or 21 | # .htaccess) here, relative to this directory. These files are copied 22 | -------------------------------------------------------------------------------- /debian/patches/fiximport: -------------------------------------------------------------------------------- 1 | Author: Johannes 'josch' Schauer 2 | Description: add additional import statement as otherwise you'd get: 3 | Traceback (most recent call last): 4 | File "", line 1, in 5 | File "[...]/src/fitz/__init__.py", line 3, in 6 | from fitz.fitz import * 7 | ModuleNotFoundError: No module named 'fitz.fitz' 8 | 9 | --- a/fitz/__init__.py 10 | +++ b/fitz/__init__.py 11 | @@ -1,5 +1,6 @@ 12 | from __future__ import absolute_import, print_function 13 | import sys 14 | +import fitz.fitz as fitz 15 | from fitz.fitz import * 16 | 17 | # define the supported colorspaces for convenience 18 | -------------------------------------------------------------------------------- /debian/patches/libs: -------------------------------------------------------------------------------- 1 | Description: Link shared library with additional libs 2 | 3 | --- 4 | setup.py | 3 ++- 5 | 1 file changed, 2 insertions(+), 1 deletion(-) 6 | 7 | --- a/setup.py 8 | +++ b/setup.py 9 | @@ -25,7 +25,8 @@ if sys.platform.startswith("linux"): 10 | "mupdf", 11 | #'crypto', #openssl is required by mupdf on archlinux 12 | #'jbig2dec', 'openjp2', 'jpeg', 'freetype', 13 | - "mupdf-third", 14 | + #"mupdf-third", 15 | + "harfbuzz", "jbig2dec", "jpeg", "freetype", "png16", "openjp2", "mujs", 16 | ], # the libraries to link with 17 | ) 18 | elif sys.platform.startswith(("darwin", "freebsd")): 19 | -------------------------------------------------------------------------------- /debian/patches/series: -------------------------------------------------------------------------------- 1 | docs 2 | libs 3 | fiximport 4 | -------------------------------------------------------------------------------- /debian/python3-fitz.lintian-overrides: -------------------------------------------------------------------------------- 1 | # all false positives 2 | python3-fitz: spelling-error-in-binary usr/lib/python3/dist-packages/fitz/_fitz.cpython-*.so SyLES Styles 3 | python3-fitz: spelling-error-in-binary usr/lib/python3/dist-packages/fitz/_fitz.cpython-*.so Yau You 4 | python3-fitz: spelling-error-in-binary usr/lib/python3/dist-packages/fitz/_fitz.cpython-*.so moR more 5 | python3-fitz: spelling-error-in-binary usr/lib/python3/dist-packages/fitz/_fitz.cpython-*.so pres press 6 | -------------------------------------------------------------------------------- /debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | 3 | export DEB_BUILD_MAINT_OPTIONS = hardening=+all 4 | DPKG_EXPORT_BUILDFLAGS = 1 5 | include /usr/share/dpkg/buildflags.mk 6 | 7 | override_dh_gencontrol: 8 | dh_gencontrol -- -VBuilt-Using="$(shell dpkg-query -f '$${source:Package} (= $${source:Version}), ' -W libmupdf-dev)" 9 | 10 | %: 11 | dh $@ --buildsystem=pybuild --with python3 12 | -------------------------------------------------------------------------------- /debian/salsa-ci.yml: -------------------------------------------------------------------------------- 1 | include: 2 | - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/salsa-ci.yml 3 | - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/pipeline-jobs.yml 4 | -------------------------------------------------------------------------------- /debian/source/format: -------------------------------------------------------------------------------- 1 | 3.0 (quilt) 2 | -------------------------------------------------------------------------------- /debian/tests/control: -------------------------------------------------------------------------------- 1 | Test-Command: env --chdir=/ python3 -c "import fitz" 2 | Restrictions: allow-stderr 3 | Depends: python3-fitz 4 | Features: test-name=python3-fitz 5 | -------------------------------------------------------------------------------- /debian/watch: -------------------------------------------------------------------------------- 1 | version=4 2 | opts="filenamemangle=s%(?:.*?)?v?(\d[\d.]*)\.tar\.gz%PyMuPDF-$1.tar.gz%,dversionmangle=s/\+ds\d*$//,repacksuffix=+ds1" \ 3 | https://github.com/pymupdf/PyMuPDF/tags \ 4 | (?:.*?/)?v?(\d[\d.]*)\.tar\.gz 5 | -------------------------------------------------------------------------------- /demo/pymupdf.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/demo/pymupdf.jpg -------------------------------------------------------------------------------- /docs/PyMuPDF.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/PyMuPDF.ico -------------------------------------------------------------------------------- /docs/algebra.rst: -------------------------------------------------------------------------------- 1 | .. _Algebra: 2 | 3 | Operator Algebra for Geometry Objects 4 | ====================================== 5 | 6 | .. highlight:: python 7 | 8 | Instances of classes :ref:`Point`, :ref:`IRect`, :ref:`Rect` and :ref:`Matrix` are collectively also called "geometry" objects. 9 | 10 | They all are special cases of Python sequences, see :ref:`SequenceTypes` for more background. 11 | 12 | We have defined operators for these classes that allow dealing with them (almost) like ordinary numbers in terms of addition, subtraction, multiplication, division, and some others. 13 | 14 | This chapter is a synopsis of what is possible. 15 | 16 | General Remarks 17 | ----------------- 18 | 1. Operators can be either **binary** (i.e. involving two objects) or **unary**. 19 | 20 | 2. The resulting type of **binary** operations is either a **new object of the left operand's class** or a bool. 21 | 22 | 3. The result of **unary** operations is either a **new object** of the same class, a bool or a float. 23 | 24 | 4. The binary operators *+, -, *, /* are defined for all classes. They *roughly* do what you would expect -- **except, that the second operand ...** 25 | 26 | - may always be a number which then performs the operation on every component of the first one, 27 | - may always be a numeric sequence of the same length (2, 4 or 6) -- we call such sequences :data:`point_like`, :data:`rect_like` or :data:`matrix_like`, respectively. 28 | 29 | 5. Rectangles support additional binary operations: **intersection** (operator *"&"*), **union** (operator *"|"*) and **containment** checking. 30 | 31 | 6. Binary operators fully support in-place operations, so expressions like *"a /= b"* are valid if b is numeric or "a_like". 32 | 33 | 34 | Unary Operations 35 | ------------------ 36 | 37 | =========== =================================================================== 38 | Oper. Result 39 | =========== =================================================================== 40 | bool(OBJ) is false exactly if all components of OBJ are zero 41 | abs(OBJ) the rectangle area -- equal to norm(OBJ) for the other tyes 42 | norm(OBJ) square root of the component squares (Euclidean norm) 43 | +OBJ new copy of OBJ 44 | -OBJ new copy of OBJ with negated components 45 | ~m inverse of matrix "m", or the null matrix if not invertible 46 | =========== =================================================================== 47 | 48 | 49 | Binary Operations 50 | ------------------ 51 | For every geometry object "a" and every number "b", the operations "a ° b" and "a °= b" are always defined for the operators *+, -, *, /*. The respective operation is simply executed for each component of "a". If the **second operand is not a number**, then the following is defined: 52 | 53 | ========= ======================================================================= 54 | Oper. Result 55 | ========= ======================================================================= 56 | a+b, a-b component-wise execution, "b" must be "a-like". 57 | a*m, a/m "a" can be a point, rectangle or matrix, but "m" must be 58 | :data:`matrix_like`. *"a/m"* is treated as *"a*~m"* (see note below 59 | for non-invertible matrices). If "a" is a **point** or a **rectangle**, 60 | then *"a.transform(m)"* is executed. If "a" is a matrix, then 61 | matrix concatenation takes place. 62 | a&b **intersection rectangle:** "a" must be a rectangle and 63 | "b" :data:`rect_like`. Delivers the **largest rectangle** 64 | contained in both operands. 65 | a|b **union rectangle:** "a" must be a rectangle, and "b" may be 66 | :data:`point_like` or :data:`rect_like`. 67 | Delivers the **smallest rectangle** containing both operands. 68 | b in a if "b" is a number, then *"b in tuple(a)"* is returned. 69 | If "b" is :data:`point_like` or :data:`rect_like`, then "a" 70 | must be a rectangle, and *"a.contains(b)"* is returned. 71 | a == b *True* if *bool(a-b)* is *False* ("b" may be "a-like"). 72 | ========= ======================================================================= 73 | 74 | 75 | .. note:: Please note an important difference to usual arithmetics: 76 | 77 | Matrix multiplication is **not commutative**, i.e. in general we have *m*n != n*m* for two matrices. Also, there are non-zero matrices which have no inverse, for example *m = Matrix(1, 0, 1, 0, 1, 0)*. If you try to divide by any of these you will receive a *ZeroDivisionError* exception using operator *"/"*, e.g. for *fitz.Identity / m*. But if you formulate *fitz.Identity * ~m*, the result will be *fitz.Matrix()* (the null matrix). 78 | 79 | Admittedly, this represents an inconsistency, and we are considering to remove it. For the time being, you can choose to avoid an exception and check whether ~m is the null matrix, or accept a potential *ZeroDivisionError* by using *fitz.Identity / m*. 80 | 81 | 82 | Some Examples 83 | -------------- 84 | 85 | Manipulation with numbers 86 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 87 | For the usual arithmetic operations, numbers are always allowed as second operand. In addition, you can formulate *"x in OBJ"*, where x is a number. It is implemented as *"x in tuple(OBJ)"*:: 88 | 89 | >>> fitz.Rect(1, 2, 3, 4) + 5 90 | fitz.Rect(6.0, 7.0, 8.0, 9.0) 91 | >>> 3 in fitz.Rect(1, 2, 3, 4) 92 | True 93 | >>> 94 | 95 | The following will create the upper left quarter of a document page rectangle:: 96 | 97 | >>> page.rect 98 | Rect(0.0, 0.0, 595.0, 842.0) 99 | >>> page.rect / 2 100 | Rect(0.0, 0.0, 297.5, 421.0) 101 | >>> 102 | 103 | The following will deliver the **middle point of a line** connecting two points **p1** and **p2**:: 104 | 105 | >>> p1 = fitz.Point(1, 2) 106 | >>> p2 = fitz.Point(4711, 3141) 107 | >>> mp = p1 + (p2 - p1) / 2 108 | >>> mp 109 | Point(2356.0, 1571.5) 110 | >>> 111 | 112 | Manipulation with "like" Objects 113 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 114 | 115 | The second operand of a binary operation can always be "like" the left operand. "Like" in this context means "a sequence of numbers of the same length". With the above examples:: 116 | 117 | >>> p1 + p2 118 | Point(4712.0, 3143.0) 119 | >>> p1 + (4711, 3141) 120 | Point(4712.0, 3143.0) 121 | >>> p1 += (4711, 3141) 122 | >>> p1 123 | Point(4712.0, 3143.0) 124 | >>> 125 | 126 | To shift a rectangle for 5 pixels to the right, do this:: 127 | 128 | >>> fitz.Rect(100, 100, 200, 200) + (5, 0, 5, 0) # add 5 to the x coordinates 129 | Rect(105.0, 100.0, 205.0, 200.0) 130 | >>> 131 | 132 | Points, rectangles and matrices can be *transformed* with matrices. In PyMuPDF, we treat this like a **"multiplication"** (or resp. **"division"**), where the second operand may be "like" a matrix. Division in this context means "multiplication with the inverted matrix":: 133 | 134 | >>> m = fitz.Matrix(1, 2, 3, 4, 5, 6) 135 | >>> n = fitz.Matrix(6, 5, 4, 3, 2, 1) 136 | >>> p = fitz.Point(1, 2) 137 | >>> p * m 138 | Point(12.0, 16.0) 139 | >>> p * (1, 2, 3, 4, 5, 6) 140 | Point(12.0, 16.0) 141 | >>> p / m 142 | Point(2.0, -2.0) 143 | >>> p / (1, 2, 3, 4, 5, 6) 144 | Point(2.0, -2.0) 145 | >>> 146 | >>> m * n # matrix multiplication 147 | Matrix(14.0, 11.0, 34.0, 27.0, 56.0, 44.0) 148 | >>> m / n # matrix division 149 | Matrix(2.5, -3.5, 3.5, -4.5, 5.5, -7.5) 150 | >>> 151 | >>> m / m # result is equal to the Identity matrix 152 | Matrix(1.0, 0.0, 0.0, 1.0, 0.0, 0.0) 153 | >>> 154 | >>> # look at this non-invertible matrix: 155 | >>> m = fitz.Matrix(1, 0, 1, 0, 1, 0) 156 | >>> ~m 157 | Matrix(0.0, 0.0, 0.0, 0.0, 0.0, 0.0) 158 | >>> # we try dividing by it in two ways: 159 | >>> p = fitz.Point(1, 2) 160 | >>> p * ~m # this delivers point (0, 0): 161 | Point(0.0, 0.0) 162 | >>> p / m # but this is an exception: 163 | Traceback (most recent call last): 164 | File "", line 1, in 165 | p / m 166 | File "... /site-packages/fitz/fitz.py", line 869, in __truediv__ 167 | raise ZeroDivisionError("matrix not invertible") 168 | ZeroDivisionError: matrix not invertible 169 | >>> 170 | 171 | 172 | As a specialty, rectangles support additional binary operations: 173 | 174 | * **intersection** -- the common area of rectangle-likes, operator *"&"* 175 | * **inclusion** -- enlarge to include a point-like or rect-like, operator *"|"* 176 | * **containment** check -- whether a point-like or rect-like is inside 177 | 178 | Here is an example for creating the smallest rectangle enclosing given points:: 179 | 180 | >>> # first define some point-likes 181 | >>> points = [] 182 | >>> for i in range(10): 183 | for j in range(10): 184 | points.append((i, j)) 185 | >>> 186 | >>> # now create a rectangle containing all these 100 points 187 | >>> # start with an empty rectangle 188 | >>> r = fitz.Rect(points[0], points[0]) 189 | >>> for p in points[1:]: # and include remaining points one by one 190 | r |= p 191 | >>> r # here is the to be expected result: 192 | Rect(0.0, 0.0, 9.0, 9.0) 193 | >>> (4, 5) in r # this point-like lies inside the rectangle 194 | True 195 | >>> # and this rect-like is also inside 196 | >>> (4, 4, 5, 5) in r 197 | True 198 | >>> 199 | 200 | -------------------------------------------------------------------------------- /docs/app1.rst: -------------------------------------------------------------------------------- 1 | =============================== 2 | Appendix 1: Performance 3 | =============================== 4 | 5 | We have tried to get an impression on PyMuPDF's performance. While we know this is very hard and a fair comparison is almost impossible, we feel that we at least should provide some quantitative information to justify our bold comments on MuPDF's **top performance**. 6 | 7 | Following are three sections that deal with different aspects of performance: 8 | 9 | * document parsing 10 | * text extraction 11 | * image rendering 12 | 13 | In each section, the same fixed set of PDF files is being processed by a set of tools. The set of tools varies -- for reasons we will explain in the section. 14 | 15 | .. |fsizes| image:: images/img-filesizes.png 16 | 17 | Here is the list of files we are using. Each file name is accompanied by further information: **size** in bytes, number of **pages**, number of bookmarks (**toc** entries), number of **links**, **text** size as a percentage of file size, **KB** per page, PDF **version** and remarks. **text %** and **KB index** are indicators for whether a file is text or graphics oriented. 18 | |fsizes| 19 | E.g. *Adobe.pdf* and *PyMuPDF.pdf* are clearly text oriented, all other files contain many more images. 20 | 21 | 22 | 23 | Part 1: Parsing 24 | ~~~~~~~~~~~~~~~~ 25 | 26 | How fast is a PDF file read and its content parsed for further processing? The sheer parsing performance cannot directly be compared, because batch utilities always execute a requested task completely, in one go, front to end. *pdfrw* too, has a *lazy* strategy for parsing, meaning it only parses those parts of a document that are required in any moment. 27 | 28 | To yet find an answer to the question, we therefore measure the time to copy a PDF file to an output file with each tool, and doing nothing else. 29 | 30 | **These were the tools** 31 | 32 | All tools are either platform independent, or at least can run both, on Windows and Unix / Linux (pdftk). 33 | 34 | **Poppler** is missing here, because it specifically is a Linux tool set, although we know there exist Windows ports (created with considerable effort apparently). Technically, it is a C/C++ library, for which a Python binding exists -- in so far somewhat comparable to PyMuPDF. But Poppler in contrast is tightly coupled to **Qt** and **Cairo**. We may still include it in future, when a more handy Windows installation is available. We have seen however some `analysis `_, that hints at a much lower performance than MuPDF. Our comparison of text extraction speeds also show a much lower performance of Poppler's PDF code base **Xpdf**. 35 | 36 | Image rendering of MuPDF also is about three times faster than the one of Xpdf when comparing the command line tools *mudraw* of MuPDF and *pdftopng* of Xpdf -- see part 3 of this chapter. 37 | 38 | ========= ========================================================================== 39 | Tool Description 40 | ========= ========================================================================== 41 | PyMuPDF tool of this manual, appearing as "fitz" in reports 42 | pdfrw a pure Python tool, is being used by rst2pdf, has interface to ReportLab 43 | PyPDF2 a pure Python tool with a very complete function set 44 | pdftk a command line utility with numerous functions 45 | ========= ========================================================================== 46 | 47 | This is how each of the tools was used: 48 | 49 | **PyMuPDF**: 50 | :: 51 | doc = fitz.open("input.pdf") 52 | doc.save("output.pdf") 53 | 54 | **pdfrw**: 55 | :: 56 | doc = PdfReader("input.pdf") 57 | writer = PdfWriter() 58 | writer.trailer = doc 59 | writer.write("output.pdf") 60 | 61 | **PyPDF2**: 62 | :: 63 | pdfmerge = PyPDF2.PdfFileMerger() 64 | pdfmerge.append("input.pdf") 65 | pdfmerge.write("output.pdf") 66 | pdfmerge.close() 67 | 68 | **pdftk**: 69 | :: 70 | pdftk input.pdf output output.pdf 71 | 72 | 73 | **Observations** 74 | 75 | .. |cpyspeed1| image:: images/img-copy-speed-1.png 76 | .. |cpyspeed2| image:: images/img-copy-speed-2.png 77 | 78 | These are our run time findings (in **seconds**, please note the European number convention: meaning of decimal point and comma is reversed): 79 | 80 | |cpyspeed1| 81 | 82 | If we leave out the Adobe manual, this table looks like 83 | 84 | |cpyspeed2| 85 | 86 | PyMuPDF is by far the fastest: on average 4.5 times faster than the second best (the pure Python tool pdfrw, **chapeau pdfrw!**), and almost 20 times faster than the command line tool pdftk. 87 | 88 | Where PyMuPDF only requires less than 13 seconds to process all files, pdftk affords itself almost 4 minutes. 89 | 90 | By far the slowest tool is PyPDF2 -- it is more than 66 times slower than PyMuPDF and 15 times slower than pdfrw! The main reason for PyPDF2's bad look comes from the Adobe manual. It obviously is slowed down by the linear file structure and the immense amount of bookmarks of this file. If we take out this special case, then PyPDF2 is only 21.5 times slower than PyMuPDF, 4.5 times slower than pdfrw and 1.2 times slower than pdftk. 91 | 92 | If we look at the output PDFs, there is one surprise: 93 | 94 | Each tool created a PDF of similar size as the original. Apart from the Adobe case, PyMuPDF always created the smallest output. 95 | 96 | Adobe's manual is an exception: The pure Python tools pdfrw and PyPDF2 **reduced** its size by more than 20% (and yielded a document which is no longer linearized)! 97 | 98 | PyMuPDF and pdftk in contrast **drastically increased** the size by 40% to about 50 MB (also no longer linearized). 99 | 100 | So far, we have no explanation of what is happening here. 101 | 102 | 103 | Part 2: Text Extraction 104 | ~~~~~~~~~~~~~~~~~~~~~~~~ 105 | We also have compared text extraction speed with other tools. 106 | 107 | The following table shows a run time comparison. PyMuPDF's methods appear as "fitz (TEXT)" and "fitz (JSON)" respectively. The tool *pdftotext.exe* of the `Xpdf `_ toolset appears as "xpdf". 108 | 109 | * **extractText():** basic text extraction without layout re-arrangement (using *GetText(..., output = "text")*) 110 | * **pdftotext:** a command line tool of the **Xpdf** toolset (which also is the basis of `Poppler's library `_) 111 | * **extractJSON():** text extraction with layout information (using *GetText(..., output = "json")*) 112 | * **pdfminer:** a pure Python PDF tool specialized on text extraction tasks 113 | 114 | All tools have been used with their most basic, fanciless functionality -- no layout re-arrangements, etc. 115 | 116 | For demonstration purposes, we have included a version of *GetText(doc, output = "json")*, that also re-arranges the output according to occurrence on the page. 117 | 118 | .. |textperf| image:: images/img-textperformance.png 119 | 120 | Here are the results using the same test files as above (again: decimal point and comma reversed): 121 | 122 | |textperf| 123 | 124 | Again, (Py-) MuPDF is the fastest around. It is 2.3 to 2.6 times faster than xpdf. 125 | 126 | *pdfminer*, as a pure Python solution, of course is comparatively slow: MuPDF is 50 to 60 times faster and xpdf is 23 times faster. These observations in order of magnitude coincide with the statements on this `web site `_. 127 | 128 | Part 3: Image Rendering 129 | ~~~~~~~~~~~~~~~~~~~~~~~~ 130 | We have tested rendering speed of MuPDF against the *pdftopng.exe*, a command lind tool of the **Xpdf** toolset (the PDF code basis of **Poppler**). 131 | 132 | **MuPDF invocation using a resolution of 150 pixels (Xpdf default):** 133 | :: 134 | mutool draw -o t%d.png -r 150 file.pdf 135 | 136 | **PyMuPDF invocation:** 137 | :: 138 | zoom = 150.0 / 72.0 139 | mat = fitz.Matrix(zoom, zoom) 140 | def ProcessFile(datei): 141 | print "processing:", datei 142 | doc=fitz.open(datei) 143 | for p in fitz.Pages(doc): 144 | pix = p.getPixmap(matrix=mat, alpha = False) 145 | pix.writePNG("t-%s.png" % p.number) 146 | pix = None 147 | doc.close() 148 | return 149 | 150 | **Xpdf invocation:** 151 | :: 152 | pdftopng.exe file.pdf ./ 153 | 154 | .. |renderspeed| image:: images/img-render-speed.png 155 | 156 | The resulting runtimes can be found here (again: meaning of decimal point and comma reversed): 157 | 158 | |renderspeed| 159 | 160 | * MuPDF and PyMuPDF are both about 3 times faster than Xpdf. 161 | 162 | * The 2% speed difference between MuPDF (a utility written in C) and PyMuPDF is the Python overhead. 163 | -------------------------------------------------------------------------------- /docs/app3.rst: -------------------------------------------------------------------------------- 1 | .. _Appendix 3: 2 | 3 | ================================================ 4 | Appendix 3: Considerations on Embedded Files 5 | ================================================ 6 | This chapter provides some background on embedded files support in PyMuPDF. 7 | 8 | General 9 | ---------- 10 | Starting with version 1.4, PDF supports embedding arbitrary files as part ("Embedded File Streams") of a PDF document file (see chapter 3.10.3, pp. 184 of the :ref:`AdobeManual`). 11 | 12 | In many aspects, this is comparable to concepts also found in ZIP files or the OLE technique in MS Windows. PDF embedded files do, however, *not* support directory structures as does the ZIP format. An embedded file can in turn contain embedded files itself. 13 | 14 | Advantages of this concept are that embedded files are under the PDF umbrella, benefitting from its permissions / password protection and integrity aspects: all data, which a PDF may reference or even may be dependent on, can be bundled into it and so form a single, consistent unit of information. 15 | 16 | In addition to embedded files, PDF 1.7 adds *collections* to its support range. This is an advanced way of storing and presenting meta information (i.e. arbitrary and extensible properties) of embedded files. 17 | 18 | MuPDF Support 19 | -------------- 20 | After adding initial support for collections (portfolios) and */EmbeddedFiles* in MuPDF version 1.11, this support was dropped again in version 1.15. 21 | 22 | As a consequence, the cli utility *mutool* no longer offers access to embedded files. 23 | 24 | PyMuPDF -- having implemented an */EmbeddedFiles* API in response in its version 1.11.0 -- was therefore forced to change gears starting with its version 1.16.0 (we never published a MuPDF v1.15.x compatible PyMuPDF). 25 | 26 | We are now maintaining our own code basis supporting embedded files. This code makes use of basic MuPDF dictionary and array functions only. 27 | 28 | PyMuPDF Support 29 | ------------------ 30 | We continue to support the full old API with respect to embedded files -- with only minor, cosmetic changes. 31 | 32 | There even also is a new function, which delivers a list of all names under which embedded data are resgistered in a PDF, :meth:`Document.embeddedFileNames`. 33 | -------------------------------------------------------------------------------- /docs/classes.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Classes 3 | ============ 4 | 5 | .. toctree:: 6 | :maxdepth: 2 7 | 8 | annot 9 | colorspace 10 | displaylist 11 | document 12 | font 13 | identity 14 | irect 15 | link 16 | linkdest 17 | matrix 18 | outline 19 | page 20 | pixmap 21 | point 22 | quad 23 | rect 24 | shape 25 | textpage 26 | textwriter 27 | tools 28 | widget 29 | -------------------------------------------------------------------------------- /docs/colors.rst: -------------------------------------------------------------------------------- 1 | .. _ColorDatabase: 2 | 3 | ================ 4 | Color Database 5 | ================ 6 | Since the introduction of methods involving colors (like :meth:`Page.drawCircle`), a requirement may be to have access to predefined colors. 7 | 8 | The fabulous GUI package `wxPython `_ has a database of over 540 predefined RGB colors, which are given more or less memorizable names. Among them are not only standard names like "green" or "blue", but also "turquoise", "skyblue", and 100 (not only 50 ...) shades of "gray", etc. 9 | 10 | We have taken the liberty to copy this database (a list of tuples) modified into PyMuPDF and make its colors available as PDF compatible float triples: for wxPython's *("WHITE", 255, 255, 255)* we return *(1, 1, 1)*, which can be directly used in *color* and *fill* parameters. We also accept any mixed case of "wHiTe" to find a color. 11 | 12 | Function *getColor()* 13 | ------------------------ 14 | As the color database may not be needed very often, one additional import statement seems acceptable to get access to it:: 15 | 16 | >>> # "getColor" is the only method you really need 17 | >>> from fitz.utils import getColor 18 | >>> getColor("aliceblue") 19 | (0.9411764705882353, 0.9725490196078431, 1.0) 20 | >>> # 21 | >>> # to get a list of all existing names 22 | >>> from fitz.utils import getColorList 23 | >>> cl = getColorList() 24 | >>> cl 25 | ['ALICEBLUE', 'ANTIQUEWHITE', 'ANTIQUEWHITE1', 'ANTIQUEWHITE2', 'ANTIQUEWHITE3', 26 | 'ANTIQUEWHITE4', 'AQUAMARINE', 'AQUAMARINE1'] ... 27 | >>> # 28 | >>> # to see the full integer color coding 29 | >>> from fitz.utils import getColorInfoList 30 | >>> il = getColorInfoList() 31 | >>> il 32 | [('ALICEBLUE', 240, 248, 255), ('ANTIQUEWHITE', 250, 235, 215), 33 | ('ANTIQUEWHITE1', 255, 239, 219), ('ANTIQUEWHITE2', 238, 223, 204), 34 | ('ANTIQUEWHITE3', 205, 192, 176), ('ANTIQUEWHITE4', 139, 131, 120), 35 | ('AQUAMARINE', 127, 255, 212), ('AQUAMARINE1', 127, 255, 212)] ... 36 | 37 | 38 | Printing the Color Database 39 | ---------------------------- 40 | If you want to actually see how the many available colors look like, use scripts `colordbRGB.py `_ or `colordbHSV.py `_ in the examples directory. They create PDFs (already existing in the same directory) with all these colors. Their only difference is sorting order: one takes the RGB values, the other one the Hue-Saturation-Values as sort criteria. 41 | This is a screen print of what these files look like. 42 | 43 | .. image:: images/img-colordb.png 44 | -------------------------------------------------------------------------------- /docs/colorspace.rst: -------------------------------------------------------------------------------- 1 | .. _Colorspace: 2 | 3 | ================ 4 | Colorspace 5 | ================ 6 | 7 | Represents the color space of a :ref:`Pixmap`. 8 | 9 | 10 | **Class API** 11 | 12 | .. class:: Colorspace 13 | 14 | .. method:: __init__(self, n) 15 | 16 | Constructor 17 | 18 | :arg int n: A number identifying the colorspace. Possible values are :data:`CS_RGB`, :data:`CS_GRAY` and :data:`CS_CMYK`. 19 | 20 | .. attribute:: name 21 | 22 | The name identifying the colorspace. Example: *fitz.csCMYK.name = 'DeviceCMYK'*. 23 | 24 | :type: str 25 | 26 | .. attribute:: n 27 | 28 | The number of bytes required to define the color of one pixel. Example: *fitz.csCMYK.n == 4*. 29 | 30 | :type: int 31 | 32 | 33 | **Predefined Colorspaces** 34 | 35 | For saving some typing effort, there exist predefined colorspace objects for the three available cases. 36 | 37 | * :data:`csRGB` = *fitz.Colorspace(fitz.CS_RGB)* 38 | * :data:`csGRAY` = *fitz.Colorspace(fitz.CS_GRAY)* 39 | * :data:`csCMYK` = *fitz.Colorspace(fitz.CS_CMYK)* 40 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | import sys 4 | import os 5 | import sphinx_rtd_theme 6 | 7 | # If extensions (or modules to document with autodoc) are in another directory, 8 | # add these directories to sys.path here. If the directory is relative to the 9 | # documentation root, use os.path.abspath to make it absolute, like shown here. 10 | # sys.path.insert(0, os.path.abspath('.')) 11 | 12 | # -- General configuration ------------------------------------------------ 13 | 14 | # If your documentation needs a minimal Sphinx version, state it here. 15 | # needs_sphinx = "3.1" 16 | 17 | # Add any Sphinx extension module names here, as strings. They can be 18 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 19 | # ones. 20 | extensions = [ 21 | "sphinx.ext.autodoc", 22 | # "sphinx.ext.todo", 23 | "sphinx.ext.coverage", 24 | "sphinx.ext.ifconfig", 25 | # "sphinx.ext.imgmath", 26 | ] 27 | 28 | # Add any paths that contain templates here, relative to this directory. 29 | templates_path = ["_templates"] 30 | 31 | # The suffix of source filenames. 32 | # source_suffix = ".rst" 33 | 34 | # The encoding of source files. 35 | # source_encoding = 'utf-8-sig' 36 | 37 | # The master toctree document. 38 | master_doc = "index" 39 | 40 | # General information about the project. 41 | project = "PyMuPDF" 42 | copyright = "2015-2020, Jorj X. McKie" 43 | 44 | # The version info for the project you're documenting, acts as replacement for 45 | # |version| and |release|, also used in various other places throughout the 46 | # built documents. 47 | # 48 | # The full version, including alpha/beta/rc tags. 49 | release = "1.17.4" 50 | 51 | # The short X.Y version 52 | version = release 53 | 54 | # The language for content autogenerated by Sphinx. Refer to documentation 55 | # for a list of supported languages. 56 | # language = None 57 | 58 | # There are two options for replacing |today|: either, you set today to some 59 | # non-false value, then it is used: 60 | # today = '' 61 | # Else, today_fmt is used as the format for a strftime call. 62 | # today_fmt = '%B %d, %Y' 63 | 64 | # List of patterns, relative to source directory, that match files and 65 | # directories to ignore when looking for source files. 66 | exclude_patterns = ["_build"] 67 | 68 | # The reST default role (used for this markup: `text`) to use for all 69 | # documents. 70 | default_role = None 71 | 72 | # If true, '()' will be appended to :func: etc. cross-reference text. 73 | add_function_parentheses = True 74 | 75 | # If true, the current module name will be prepended to all description 76 | # unit titles (such as .. function::). 77 | add_module_names = True 78 | 79 | # If true, sectionauthor and moduleauthor directives will be shown in the 80 | # output. They are ignored by default. 81 | show_authors = False 82 | 83 | # The name of the Pygments (syntax highlighting) style to use. 84 | pygments_style = "sphinx" 85 | 86 | # A list of ignored prefixes for module index sorting. 87 | modindex_common_prefix = [] 88 | 89 | # If true, keep warnings as "system message" paragraphs in the built documents. 90 | keep_warnings = False 91 | 92 | 93 | # -- Options for HTML output ---------------------------------------------- 94 | 95 | # The theme to use for HTML and HTML Help pages. See the documentation for 96 | # a list of builtin themes. 97 | # html_theme = "agogo" 98 | # html_theme = "sphinxdoc" 99 | # html_theme = "python_docs_theme" 100 | html_theme = "sphinx_rtd_theme" 101 | # html_theme = "classic" 102 | 103 | # Theme options are theme-specific and customize the look and feel of a theme 104 | # further. For a list of options available for each theme, see the 105 | # documentation. 106 | html_theme_options = { 107 | # "root_name": "", 108 | # "root_url": "", 109 | # "root_icon": "pymupdf.ico", 110 | # "sidebarbgcolor": "gray", 111 | } 112 | 113 | # Add any paths that contain custom themes here, relative to this directory. 114 | # html_theme_path = [] 115 | # html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 116 | 117 | # The name for this set of Sphinx documents. If None, it defaults to 118 | # " v documentation". 119 | # html_title = None 120 | 121 | # A shorter title for the navigation bar. Default is the same as html_title. 122 | # html_short_title = None 123 | 124 | # The name of an image file (relative to this directory) to place at the top 125 | # of the sidebar. 126 | # html_logo = "images/img-pymupdf.jpg" 127 | 128 | # The name of an image file (within the static path) to use as favicon of the 129 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 130 | # pixels large. 131 | html_favicon = "Pymupdf.ico" 132 | 133 | # Add any paths that contain custom static files (such as style sheets) here, 134 | # relative to this directory. They are copied after the builtin static files, 135 | # so a file named "default.css" will overwrite the builtin "default.css". 136 | html_static_path = ["_static"] 137 | 138 | # Add any extra paths that contain custom files (such as robots.txt or 139 | # .htaccess) here, relative to this directory. These files are copied 140 | # directly to the root of the documentation. 141 | # html_extra_path = [] 142 | 143 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 144 | # using the given strftime format. 145 | html_last_updated_fmt = "%d. %b %Y" 146 | 147 | # If true, SmartyPants will be used to convert quotes and dashes to 148 | # typographically correct entities. 149 | # html_use_smartypants = False 150 | 151 | # Custom sidebar templates, maps document names to template names. 152 | # html_sidebars = {} 153 | 154 | # Additional templates that should be rendered to pages, maps page names to 155 | # template names. 156 | html_additional_pages = {} 157 | 158 | # If false, no module index is generated. 159 | html_domain_indices = True 160 | 161 | # If false, no index is generated. 162 | html_use_index = True 163 | 164 | # If true, the index is split into individual pages for each letter. 165 | html_split_index = True 166 | 167 | # If true, links to the reST sources are added to the pages. 168 | html_show_sourcelink = True 169 | html_sourcelink_suffix = ".rst" 170 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 171 | html_show_sphinx = True 172 | 173 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 174 | html_show_copyright = True 175 | 176 | # If true, an OpenSearch description file will be output, and all pages will 177 | # contain a tag referring to it. The value of this option must be the 178 | # base URL from which the finished HTML is served. 179 | # html_use_opensearch = "https://pymupdf.readthedocs.io/en/latest" 180 | 181 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 182 | # html_file_suffix = ".html" 183 | 184 | # Output file base name for HTML help builder. 185 | htmlhelp_basename = "PyMuPDF" 186 | 187 | 188 | # -- Options for LaTeX output --------------------------------------------- 189 | latex_elements = { 190 | # "fontpkg": r"\usepackage[sfdefault]{ClearSans} \usepackage[T1]{fontenc}" 191 | } 192 | # Grouping the document tree into LaTeX files. List of tuples 193 | # (source start file, target name, title, 194 | # author, documentclass [howto, manual, or own class]). 195 | latex_documents = [ 196 | ("index", "PyMuPDF.tex", u"PyMuPDF Documentation", u"Jorj X. McKie", "manual") 197 | ] 198 | # The name of an image file (relative to this directory) to place at the top of 199 | # the title page. 200 | latex_logo = "images/img-pymupdf.jpg" 201 | 202 | # For "manual" documents, if this is true, then toplevel headings are parts, 203 | # not chapters. 204 | # latex_use_parts = False 205 | 206 | # If true, show page references after internal links. 207 | latex_show_pagerefs = False 208 | 209 | # If true, show URL addresses after external links. 210 | # latex_show_urls = True 211 | # latex_use_xindy = True 212 | # Documents to append as an appendix to all manuals. 213 | # latex_appendices = [] 214 | 215 | # If false, no module index is generated. 216 | latex_domain_indices = True 217 | 218 | # -- Options for PDF output -------------------------------------------------- 219 | # Grouping the document tree into PDF files. List of tuples 220 | # (source start file, target name, title, author). 221 | 222 | pdf_documents = [("index", "PyMuPDF", "PyMuPDF Manual", "Jorj McKie")] 223 | 224 | # A comma-separated list of custom stylesheets. Example: 225 | pdf_stylesheets = ["sphinx", "bahnschrift"] 226 | 227 | # Create a compressed PDF 228 | pdf_compressed = True 229 | 230 | # A colon-separated list of folders to search for fonts. Example: 231 | # pdf_font_path=['/usr/share/fonts', '/usr/share/texmf-dist/fonts/'] 232 | 233 | # Language to be used for hyphenation support 234 | pdf_language = "en_US" 235 | 236 | # If false, no index is generated. 237 | pdf_use_index = True 238 | 239 | # If false, no modindex is generated. 240 | pdf_use_modindex = True 241 | 242 | # If false, no coverpage is generated. 243 | pdf_use_coverpage = True 244 | 245 | pdf_break_level = 2 246 | 247 | pdf_verbosity = 0 248 | pdf_invariant = True 249 | -------------------------------------------------------------------------------- /docs/coop_low.rst: -------------------------------------------------------------------------------- 1 | 2 | .. _cooperation: 3 | 4 | =============================================================== 5 | Working together: DisplayList and TextPage 6 | =============================================================== 7 | Here are some instructions on how to use these classes together. 8 | 9 | In some situations, performance improvements may be achievable, when you fall back to the detail level explained here. 10 | 11 | Create a DisplayList 12 | --------------------- 13 | A :ref:`DisplayList` represents an interpreted document page. Methods for pixmap creation, text extraction and text search are -- behind the curtain -- all using the page's display list to perform their tasks. If a page must be rendered several times (e.g. because of changed zoom levels), or if text search and text extraction should both be performed, overhead can be saved, if the display list is created only once and then used for all other tasks. 14 | 15 | >>> dl = page.getDisplayList() # create the display list 16 | 17 | You can also create display lists for many pages "on stack" (in a list), may be during document open, during idling times, or you store it when a page is visited for the first time (e.g. in GUI scripts). 18 | 19 | Note, that for everything what follows, only the display list is needed -- the corresponding :ref:`Page` object could have been deleted. 20 | 21 | Generate Pixmap 22 | ------------------ 23 | The following creates a Pixmap from a :ref:`DisplayList`. Parameters are the same as for :meth:`Page.getPixmap`. 24 | 25 | >>> pix = dl.getPixmap() # create the page's pixmap 26 | 27 | The execution time of this statement may be up to 50% shorter than that of :meth:`Page.getPixMap`. 28 | 29 | Perform Text Search 30 | --------------------- 31 | With the display list from above, we can also search for text. 32 | 33 | For this we need to create a :ref:`TextPage`. 34 | 35 | >>> tp = dl.getTextPage() # display list from above 36 | >>> rlist = tp.search("needle") # look up "needle" locations 37 | >>> for r in rlist: # work with the found locations, e.g. 38 | pix.invertIRect(r.irect) # invert colors in the rectangles 39 | 40 | Extract Text 41 | ---------------- 42 | With the same :ref:`TextPage` object from above, we can now immediately use any or all of the 5 text extraction methods. 43 | 44 | .. note:: Above, we have created our text page without argument. This leads to a default argument of 3 (ligatures and white-space are preserved), IAW images will **not** be extracted -- see below. 45 | 46 | >>> txt = tp.extractText() # plain text format 47 | >>> json = tp.extractJSON() # json format 48 | >>> html = tp.extractHTML() # HTML format 49 | >>> xml = tp.extractXML() # XML format 50 | >>> xml = tp.extractXHTML() # XHTML format 51 | 52 | Further Performance improvements 53 | --------------------------------- 54 | Pixmap 55 | ~~~~~~~ 56 | As explained in the :ref:`Page` chapter: 57 | 58 | If you do not need transparency set *alpha = 0* when creating pixmaps. This will save 25% memory (if RGB, the most common case) and possibly 5% execution time (depending on the GUI software). 59 | 60 | TextPage 61 | ~~~~~~~~~ 62 | If you do not need images extracted alongside the text of a page, you can set the following option: 63 | 64 | >>> flags = fitz.TEXT_PRESERVE_LIGATURES | fitz.TEXT_PRESERVE_WHITESPACE 65 | >>> tp = dl.getTextPage(flags) 66 | 67 | This will save ca. 25% overall execution time for the HTML, XHTML and JSON text extractions and **hugely** reduce the amount of storage (both, memory and disk space) if the document is graphics oriented. 68 | 69 | If you however do need images, use a value of 7 for flags: 70 | 71 | >>> flags = fitz.TEXT_PRESERVE_LIGATURES | fitz.TEXT_PRESERVE_WHITESPACE | fitz.TEXT_PRESERVE_IMAGES 72 | -------------------------------------------------------------------------------- /docs/device.rst: -------------------------------------------------------------------------------- 1 | .. _Device: 2 | 3 | ================ 4 | Device 5 | ================ 6 | 7 | The different format handlers (pdf, xps, etc.) interpret pages to a "device". Devices are the basis for everything that can be done with a page: rendering, text extraction and searching. The device type is determined by the selected construction method. 8 | 9 | **Class API** 10 | 11 | .. class:: Device 12 | 13 | .. method:: __init__(self, object, clip) 14 | 15 | Constructor for either a pixel map or a display list device. 16 | 17 | :arg object: either a *Pixmap* or a *DisplayList*. 18 | :type object: :ref:`Pixmap` or :ref:`DisplayList` 19 | 20 | :arg clip: An optional `IRect` for *Pixmap* devices to restrict rendering to a certain area of the page. If the complete page is required, specify *None*. For display list devices, this parameter must be omitted. 21 | :type clip: :ref:`IRect` 22 | 23 | .. method:: __init__(self, textpage, flags=0) 24 | 25 | Constructor for a text page device. 26 | 27 | :arg textpage: *TextPage* object 28 | :type textpage: :ref:`TextPage` 29 | 30 | :arg int flags: control the way how text is parsed into the text page. Currently 3 options can be coded into this parameter, see :ref:`TextPreserve`. To set these options use something like *flags=0 | TEXT_PRESERVE_LIGATURES | ...*. 31 | 32 | .. note:: In higher level code (:meth:`Page.getText`, :meth:`Document.getPageText`), the following decisions for creating text devices have been implemented: (1) *TEXT_PRESERVE_LIGATURES* and *TEXT_PRESERVE_WHITESPACES* are always set, (2) *TEXT_PRESERVE_IMAGES* is set for JSON and HTML, otherwise off. 33 | 34 | -------------------------------------------------------------------------------- /docs/displaylist.rst: -------------------------------------------------------------------------------- 1 | .. _DisplayList: 2 | 3 | ================ 4 | DisplayList 5 | ================ 6 | 7 | DisplayList is a list containing drawing commands (text, images, etc.). The intent is two-fold: 8 | 9 | 1. as a caching-mechanism to reduce parsing of a page 10 | 2. as a data structure in multi-threading setups, where one thread parses the page and another one renders pages. This aspect is currently not supported by PyMuPDF. 11 | 12 | A display list is populated with objects from a page, usually by executing :meth:`Page.getDisplayList`. There also exists an independent constructor. 13 | 14 | "Replay" the list (once or many times) by invoking one of its methods :meth:`~DisplayList.run`, :meth:`~DisplayList.getPixmap` or :meth:`~DisplayList.getTextPage`. 15 | 16 | 17 | ================================= ============================================ 18 | **Method** **Short Description** 19 | ================================= ============================================ 20 | :meth:`~DisplayList.run` Run a display list through a device. 21 | :meth:`~DisplayList.getPixmap` generate a pixmap 22 | :meth:`~DisplayList.getTextPage` generate a text page 23 | :attr:`~DisplayList.rect` mediabox of the display list 24 | ================================= ============================================ 25 | 26 | 27 | **Class API** 28 | 29 | .. class:: DisplayList 30 | 31 | .. method:: __init__(self, mediabox) 32 | 33 | Create a new display list. 34 | 35 | :arg mediabox: The page's rectangle. 36 | :type mediabox: :ref:`Rect` 37 | 38 | :rtype: *DisplayList* 39 | 40 | .. method:: run(device, matrix, area) 41 | 42 | Run the display list through a device. The device will populate the display list with its "commands" (i.e. text extraction or image creation). The display list can later be used to "read" a page many times without having to re-interpret it from the document file. 43 | 44 | You will most probably instead use one of the specialized run methods below -- :meth:`getPixmap` or :meth:`getTextPage`. 45 | 46 | :arg device: Device 47 | :type device: :ref:`Device` 48 | 49 | :arg matrix: Transformation matrix to apply to the display list contents. 50 | :type matrix: :ref:`Matrix` 51 | 52 | :arg area: Only the part visible within this area will be considered when the list is run through the device. 53 | :type area: :ref:`Rect` 54 | 55 | .. index:: 56 | pair: matrix; getPixmap 57 | pair: colorspace; getPixmap 58 | pair: clip; getPixmap 59 | pair: alpha; getPixmap 60 | 61 | .. method:: getPixmap(matrix=fitz.Identity, colorspace=fitz.csRGB, alpha=0, clip=None) 62 | 63 | Run the display list through a draw device and return a pixmap. 64 | 65 | :arg matrix: matrix to use. Default is the identity matrix. 66 | :type matrix: :ref:`Matrix` 67 | 68 | :arg colorspace: the desired colorspace. Default is RGB. 69 | :type colorspace: :ref:`Colorspace` 70 | 71 | :arg int alpha: determine whether or not (0, default) to include a transparency channel. 72 | 73 | :arg clip: an area of the full mediabox to which the pixmap should be restricted. 74 | :type clip: :ref:`IRect` or :ref:`Rect` 75 | 76 | :rtype: :ref:`Pixmap` 77 | :returns: pixmap of the display list. 78 | 79 | .. method:: getTextPage(flags) 80 | 81 | Run the display list through a text device and return a text page. 82 | 83 | :arg int flags: control which information is parsed into a text page. Default value in PyMuPDF is **3 = TEXT_PRESERVE_LIGATURES | TEXT_PRESERVE_WHITESPACE**, i.e. ligatures are **passed through**, white spaces are **passed through** (not translated to spaces), and images are **not included**. See :ref:`TextPreserve`. 84 | 85 | :rtype: :ref:`TextPage` 86 | :returns: text page of the display list. 87 | 88 | .. attribute:: rect 89 | 90 | Contains the display list's mediabox. This will equal the page's rectangle if it was created via :meth:`Page.getDisplayList`. 91 | 92 | :type: :ref:`Rect` 93 | -------------------------------------------------------------------------------- /docs/font.rst: -------------------------------------------------------------------------------- 1 | .. _Font: 2 | 3 | ================ 4 | Font 5 | ================ 6 | 7 | *(New in v1.16.18)* This class represents a font as defined in MuPDF (*fz_font_s* structure). It is required for the new class :ref:`TextWriter` and the new :meth:`Page.writeText`. Currently, it has no connection to how fonts are used in methods ``insertText`` or insertTextbox``, respectively. 8 | 9 | A Font object also contains useful general information, like the font bbox, the number of defined glyphs, glyph names or the bbox of a single glyph. 10 | 11 | **Class API** 12 | 13 | .. class:: Font 14 | 15 | .. method:: __init__(self, fontname=None, fontfile=None, 16 | fontbuffer=None, script=0, language=None, ordering=-1, is_bold=0, 17 | is_italic=0, is_serif=0) 18 | 19 | Font constructor. The large number of parameters are used to locate font, which most closely resembles the requirements. Not all parameters are ever required -- see the below pseudo code explaining the logic how the parameters are evaluated. 20 | 21 | :arg str fontname: one of the :ref:`Base-14-Fonts` or CJK fontnames. Also possible are a select few of other names like (watch the correct spelling): "Arial", "Times", "Times Roman". 22 | 23 | *(Changed in v1.17.4)* 24 | 25 | If you have installed `pymupdf-fonts `_, you can also use the following new "reserved" fontnames: "figo", "figbo", "figit", "figbi", "fimo", and "fimbo". This will provide one of the "FiraGo" or resp. "FiraMono" fonts, created by Mozilla.org. 26 | 27 | :arg str filename: the filename of a fontfile somewhere on your system [#f1]_. 28 | :arg bytes,bytearray,io.BytesIO fontbuffer: a fontfile loaded in memory [#f1]_. 29 | :arg in script: the number of a UCDN script. Currently supported in PyMuPDF are numbers 24, and 32 through 35. 30 | :arg str language: one of the values "zh-Hant" (traditional Chinese), "zh-Hans" (simplified Chinese), "ja" (Japanese) and "ko" (Korean). Otherwise, all ISO 639 codes from the subsets 1, 2, 3 and 5 are also possible, but are currently documentary only. 31 | :arg int ordering: an alternative selector for one of the CJK fonts. 32 | :arg bool is_bold: look for a bold font. 33 | :arg bool is_italic: look for an italic font. 34 | :arg bool is_serif: look for a serifed font. 35 | 36 | :returns: a MuPDF font if successful. This is the overall logic, how an appropriate font is located:: 37 | 38 | if fontfile: 39 | create font from it ignoring other arguments 40 | if not successful -> exception 41 | if fonbuffer: 42 | create font from it ignoring other arguments 43 | if not successful -> exception 44 | if ordering >= 0: 45 | load **"universal"** font ignoring other parameters 46 | # this will always be successful 47 | if fontname: 48 | create a Base14 font, or resp. **"universal"** font, ignoring other parameters 49 | # note: values "Arial", "Times", "Times Roman" are also possible 50 | if not successful -> exception 51 | Finally try to load a "NOTO" font using *script* and *language* parameters. 52 | if not successful: 53 | look for fallback font 54 | 55 | .. note:: 56 | 57 | With the usual abbreviations "helv", "tiro", etc., you will create fonts with the expected names "Helvetica", "Times-Roman" and so on. 58 | 59 | Using *ordering >= 0*, or fontnames starting with "china", "japan" or "korea" will always create the same **"universal"** font **"Droid Sans Fallback Regular"**. This font supports **all CJK and all Latin characters**. 60 | 61 | Actually, you would rarely ever need another font than **"Droid Sans Fallback Regular"**. **Except** that this font file is relatively large and adds about 1.65 MB (compressed) to your PDF file size. If you do not need CJK support, stick with specifying "helv", "tiro" etc., and you will get away with about 35 KB compressed. 62 | 63 | If you **know** you have a mixture of CJK and Latin text, consider just using ``Font(ordering=0)`` because this supports everything and also significantly (by a factor of two to three) speeds up execution: MuPDF will always find any character in this single font and need not check fallbacks. 64 | 65 | But if you do specify a Base-14 fontname, you will still be able to also write CJK characters! MuPDF automatically detects this situation and silently falls back to the universal font (which will then of course also be embedded in your PDF). 66 | 67 | *(New in v1.17.4)* Optionally, a set of new "reserved" fontnames becomes available if you install `pymupdf-fonts `_. The currently available fonts are from the Fira fonts family created by Mozilla. "Fira Mono" is a nice mono-spaced sans font set and FiraGO is another non-serifed "universal" font, set which supports all European languages (including Cyrillic and Greek) plus Thai, Arabian, Hewbrew and Devanagari -- however none of the CJK languages. The size of a FiraGO font is only a quarter of the "Droid Sans Fallback" size (compressed 400 KB vs. 1.65 MB) -- and the style variants bold and italic are available..The following table maps a fontname to the corresponding font: 68 | 69 | =========== ======================================= 70 | Fontname Font 71 | =========== ======================================= 72 | figo FiraGO Regular 73 | figbo FiraGO Bold 74 | figit FiraGO Italic 75 | figbi FiraGO Bold Italic 76 | fimo Fira Mono Regular 77 | fimbo Fira Mono Bold 78 | =========== ======================================= 79 | 80 | **All fonts mentioned here** also support Greek and Cyrillic letters. 81 | 82 | .. method:: has_glyph(chr, language=None, script=0) 83 | 84 | Check whether the unicode *chr* exists in the font or some fallback. May be used to check whether any "TOFU" symbols will appear on output. 85 | 86 | :arg int chr: the unicode of the character (i.e. *ord()*). 87 | :arg str language: the language -- currently unused. 88 | :arg int script: the UCDN script number. 89 | :returns: *True* or *False*. 90 | 91 | .. method:: glyph_advance(chr, language=None, script=0, wmode=0) 92 | 93 | Calculate the "width" of the character's glyph (visual representation). 94 | 95 | :arg int chr: the unicode number of the character. Use ``ord(c)``, not the character itself. Again, this should normally work even if a character is not supported by that font, because fallback fonts will be checked where necessary. 96 | 97 | The other parameters are not in use currently. This especially means that only horizontal text writing is supported. 98 | 99 | :returns: a float representing the glyph's width relative to **fontsize 1**. 100 | 101 | .. method:: glyph_name_to_unicode(name) 102 | 103 | Return the unicode for a given glyph name. Use it in conjunction with ``chr()`` if you want to output e.g. a certain symbol. 104 | 105 | :arg str name: The name of the glyph. 106 | 107 | :returns: The unicode integer, or 65533 = 0xFFFD if the name is unknown. Examples: ``font.glyph_name_to_unicode("Sigma") = 931``, ``font.glyph_name_to_unicode("sigma") = 963``. Refer to e.g. `this `_ publication for a list of glyph names and their unicode numbers. 108 | 109 | .. method:: unicode_to_glyph_name(chr, language=None, script=0, wmode=0) 110 | 111 | Show the name of the character's glyph. 112 | 113 | :arg int chr: the unicode number of the character. Use ``ord(c)``, not the character itself. 114 | 115 | :returns: a string representing the glyph's name. E.g. ``font.glyph_name(ord("#")) = "numbersign"``. Depending on how this font was built, the string may be empty, ".notfound" or some generated name. 116 | 117 | .. method:: text_length(text, fontsize=11) 118 | 119 | Calculate the length of a unicode string. 120 | 121 | :arg str text: a text string -- UTF-8 encoded. For Python 2, you must use unicode here. 122 | 123 | :arg float fontsize: the fontsize. 124 | 125 | :returns: a float representing the length of the string when stored in the PDF. Internally :meth:`glyph_advance` is used on a by-character level. If the font does not have a character, it will automatically be looked up in a fallback font. 126 | 127 | .. attribute:: flags 128 | 129 | A dictionary with various font properties, each represented as bools. 130 | 131 | .. attribute:: name 132 | 133 | Name of the font. May be "" or "(null)". 134 | 135 | .. attribute:: glyph_count 136 | 137 | The number of glyphs defined in the font. 138 | 139 | .. rubric:: Footnotes 140 | 141 | .. [#f1] MuPDF does not support all fontfiles with this feature and will raise exceptions like *"mupdf: FT_New_Memory_Face((null)): unknown file format"*, if encounters issues. 142 | -------------------------------------------------------------------------------- /docs/glossary.rst: -------------------------------------------------------------------------------- 1 | ============== 2 | Glossary 3 | ============== 4 | 5 | .. data:: matrix_like 6 | 7 | A Python sequence of 6 numbers. 8 | 9 | .. data:: rect_like 10 | 11 | A Python sequence of 4 numbers. 12 | 13 | .. data:: irect_like 14 | 15 | A Python sequence of 4 integers. 16 | 17 | .. data:: point_like 18 | 19 | A Python sequence of 2 numbers. 20 | 21 | .. data:: quad_like 22 | 23 | A Python sequence of 4 :data:`point_like` items. 24 | 25 | .. data:: inheritable 26 | 27 | A number of values in a PDF can be specified once and then be inherited by objects further down in a parent-child relationship. The mediabox (physical size) of pages can for example be specified in nodes of the :data:`pagetree` and will then be taken as value for all *kids*, which do not specify their own value. 28 | 29 | .. data:: MediaBox 30 | 31 | A PDF array of 4 floats specifying a physical page size (:data:`inheritable`). 32 | 33 | .. data:: CropBox 34 | 35 | A PDF array of 4 floats specifying a page's visible area (:data:`inheritable`). This value is **not affected** if the page is rotated. 36 | 37 | 38 | .. data:: catalog 39 | 40 | A central PDF :data:`dictionary` -- also called "root" -- containing pointers to many other information. 41 | 42 | .. data:: contents 43 | 44 | "A **content stream** is a PDF :data:`stream` :data:`object` whose data consists of a sequence of instructions describing the graphical elements to be painted on a page." (:ref:`AdobeManual` p. 151). For an overview of the mini-language used in these streams see chapter "Operator Summary" on page 985 of the :ref:`AdobeManual`. A PDF :data:`page` can have none to many contents objects. If it has none, the page is empty (but still may show annotations). If it has several, they will be interpreted in sequence as if their instructions had been present in one such object (i.e. like in a concatenated string). It should be noted that there are more stream object types which use the same syntax: e.g. appearance dictionaries associated with annotations and Form XObjects. 45 | 46 | .. data:: resources 47 | 48 | A :data:`dictionary` containing references to any resources (like images or fonts) required by a PDF :data:`page` (required, inheritable, :ref:`AdobeManual` p. 145) and certain other objects (Form XObjects). This dictionary appears as a sub-dictionary in the object definition under the key */Resources*. Being an inheritable object type, there may exist "parent" resources for all pages or certain subsets of pages. 49 | 50 | .. data:: dictionary 51 | 52 | A PDF :data:`object` type, which is somewhat comparable to the same-named Python notion: "A dictionary object is an associative table containing pairs of objects, known as the dictionary's entries. The first element of each entry is the key and the second element is the value. The key must be a name (...). The value can be any kind of object, including another dictionary. A dictionary entry whose value is null (...) is equivalent to an absent entry." (:ref:`AdobeManual` p. 59). 53 | 54 | Dictionaries are the most important :data:`object` type in PDF. Here is an example (describing a :data:`page`):: 55 | 56 | << 57 | /Contents 40 0 R % value: an indirect object 58 | /Type/Page % value: a name object 59 | /MediaBox[0 0 595.32 841.92] % value: an array object 60 | /Rotate 0 % value: a number object 61 | /Parent 12 0 R % value: an indirect object 62 | /Resources<< % value: a dictionary object 63 | /ExtGState<> 64 | /Font<< 65 | /R8 27 0 R/R10 21 0 R/R12 24 0 R/R14 15 0 R 66 | /R17 4 0 R/R20 30 0 R/R23 7 0 R /R27 20 0 R 67 | >> 68 | /ProcSet[/PDF/Text] % value: array of two name objects 69 | >> 70 | /Annots[55 0 R] % value: array, one entry (indirect object) 71 | >> 72 | 73 | *Contents*, *Type*, *MediaBox*, etc. are **keys**, *40 0 R*, *Page*, *[0 0 595.32 841.92]*, etc. are the respective **values**. The strings *"<<"* and *">>"* are used to enclose object definitions. 74 | 75 | This example also shows the syntax of **nested** dictionary values: *Resources* has an object as its value, which in turn is a dictionary with keys like *ExtGState* (with the value *<>*, which is another dictionary), etc. 76 | 77 | .. data:: page 78 | 79 | A PDF page is a :data:`dictionary` object which defines one page in a PDF, see :ref:`AdobeManual` p. 145. 80 | 81 | .. data:: pagetree 82 | 83 | "The pages of a document are accessed through a structure known as the page tree, which defines the ordering of pages in the document. The tree structure allows PDF consumer applications, using only limited memory, to quickly open a document containing thousands of pages. The tree contains nodes of two types: intermediate nodes, called page tree nodes, and leaf nodes, called page objects." (:ref:`AdobeManual` p. 143). 84 | 85 | While it is possible to list all page references in just one array, PDFs with many pages are often created using *balanced tree* structures ("page trees") for faster access to any single page. In relation to the total number of pages, this can reduce the average page access time by page number from a linear to some logarithmic order of magnitude. 86 | 87 | For fast page access, MuPDF can use its own array in memory -- independently from what may or may not be present in the document file. This array is indexed by page number and therefore much faster than even the access via a perfectly balanced page tree. 88 | 89 | .. data:: object 90 | 91 | Similar to Python, PDF supports the notion *object*, which can come in eight basic types: boolean values, integer and real numbers, strings, names, arrays, dictionaries, streams, and the null object (:ref:`AdobeManual` p. 51). Objects can be made identifyable by assigning a label. This label is then called *indirect* object. PyMuPDF supports retrieving definitions of indirect objects via their cross reference number via :meth:`Document.xrefObject`. 92 | 93 | .. data:: stream 94 | 95 | A PDF :data:`object` type which is a sequence of bytes, similar to a string. "However, a PDF application can read a stream incrementally, while a string must be read in its entirety. Furthermore, a stream can be of unlimited length, whereas a string is subject to an implementation limit. For this reason, objects with potentially large amounts of data, such as images and page descriptions, are represented as streams." "A stream consists of a :data:`dictionary` followed by zero or more bytes bracketed between the keywords *stream* and *endstream*":: 96 | 97 | nnn 0 obj 98 | << 99 | dictionary definition 100 | >> 101 | stream 102 | (zero or more bytes) 103 | endstream 104 | endobj 105 | 106 | See :ref:`AdobeManual` p. 60. PyMuPDF supports retrieving stream content via :meth:`Document.xrefStream`. Use :meth:`Document.isStream` to determine whether an object is of stream type. 107 | 108 | .. data:: unitvector 109 | 110 | A mathematical notion meaning a vector of norm ("length") 1 -- usually the Euclidean norm is implied. In PyMuPDF, this term is restricted to :ref:`Point` objects, see :attr:`Point.unit`. 111 | 112 | .. data:: xref 113 | 114 | Abbreviation for cross-reference number: this is an integer unique identification for objects in a PDF. There exists a cross-reference table (which may physically consist of several separate segments) in each PDF, which stores the relative position of each object for quick lookup. The cross-reference table is one entry longer than the number of existing object: item zero is reserved and must not be used in any way. Many PyMuPDF classes have an *xref* attribute (which is zero for non-PDFs), and one can find out the total number of objects in a PDF via :meth:`Document.xrefLength` *- 1*. 115 | 116 | .. data:: resolution 117 | 118 | Images and :ref:`Pixmap` objects may contain resolution information provided as "dots per inch", dpi, in each direction (horizontal and vertical). When MuPDF reads an image form a file or from a PDF object, it will parse this information and put it in :attr:`Pixmap.xres`, :attr:`Pixmap.yres`, respectively. When it finds not meaningful information in the input (like non-positive values or values exceeding 4800), it will use "sane" defaults instead. The usual default value is 96, but it may also be 72 in some cases (e.g. 72 for JPX images). 119 | -------------------------------------------------------------------------------- /docs/identity.rst: -------------------------------------------------------------------------------- 1 | .. _Identity: 2 | 3 | ============ 4 | Identity 5 | ============ 6 | 7 | Identity is a :ref:`Matrix` that performs no action -- to be used whenever the syntax requires a matrix, but no actual transformation should take place. It has the form *fitz.Matrix(1, 0, 0, 1, 0, 0)*. 8 | 9 | Identity is a constant, an "immutable" object. So, all of its matrix properties are read-only and its methods are disabled. 10 | 11 | If you need a **mutable** identity matrix as a starting point, use one of the following statements:: 12 | 13 | >>> m = fitz.Matrix(1, 0, 0, 1, 0, 0) # specify the values 14 | >>> m = fitz.Matrix(1, 1) # use scaling by factor 1 15 | >>> m = fitz.Matrix(0) # use rotation by zero degrees 16 | >>> m = fitz.Matrix(fitz.Identity) # make a copy of Identity 17 | -------------------------------------------------------------------------------- /docs/images/img-4up.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-4up.png -------------------------------------------------------------------------------- /docs/images/img-7edges.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-7edges.png -------------------------------------------------------------------------------- /docs/images/img-a-is--1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-a-is--1.png -------------------------------------------------------------------------------- /docs/images/img-adobe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-adobe.png -------------------------------------------------------------------------------- /docs/images/img-alpha-0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-alpha-0.png -------------------------------------------------------------------------------- /docs/images/img-alpha-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-alpha-1.png -------------------------------------------------------------------------------- /docs/images/img-annots.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-annots.jpg -------------------------------------------------------------------------------- /docs/images/img-attach-result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-attach-result.jpg -------------------------------------------------------------------------------- /docs/images/img-b-is-0.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-b-is-0.5.png -------------------------------------------------------------------------------- /docs/images/img-binsetupdirs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-binsetupdirs.png -------------------------------------------------------------------------------- /docs/images/img-breadth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-breadth.png -------------------------------------------------------------------------------- /docs/images/img-c-is-0.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-c-is-0.5.png -------------------------------------------------------------------------------- /docs/images/img-cake.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-cake.png -------------------------------------------------------------------------------- /docs/images/img-caret-annot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-caret-annot.jpg -------------------------------------------------------------------------------- /docs/images/img-circle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-circle.png -------------------------------------------------------------------------------- /docs/images/img-clip.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-clip.jpg -------------------------------------------------------------------------------- /docs/images/img-colordb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-colordb.png -------------------------------------------------------------------------------- /docs/images/img-copy-speed-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-copy-speed-1.png -------------------------------------------------------------------------------- /docs/images/img-copy-speed-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-copy-speed-2.png -------------------------------------------------------------------------------- /docs/images/img-d-is--1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-d-is--1.png -------------------------------------------------------------------------------- /docs/images/img-drawBezier.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-drawBezier.png -------------------------------------------------------------------------------- /docs/images/img-drawCurve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-drawCurve.png -------------------------------------------------------------------------------- /docs/images/img-drawSector1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-drawSector1.png -------------------------------------------------------------------------------- /docs/images/img-drawSector2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-drawSector2.png -------------------------------------------------------------------------------- /docs/images/img-drawcircle.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-drawcircle.jpg -------------------------------------------------------------------------------- /docs/images/img-drawquad.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-drawquad.jpg -------------------------------------------------------------------------------- /docs/images/img-e-is-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-e-is-100.png -------------------------------------------------------------------------------- /docs/images/img-embed-progress.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-embed-progress.jpg -------------------------------------------------------------------------------- /docs/images/img-encoding.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-encoding.jpg -------------------------------------------------------------------------------- /docs/images/img-encrypting.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-encrypting.jpg -------------------------------------------------------------------------------- /docs/images/img-even-odd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-even-odd.png -------------------------------------------------------------------------------- /docs/images/img-extract-imga.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-extract-imga.jpg -------------------------------------------------------------------------------- /docs/images/img-extract-imgb.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-extract-imgb.jpg -------------------------------------------------------------------------------- /docs/images/img-f-is-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-f-is-100.png -------------------------------------------------------------------------------- /docs/images/img-filesizes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-filesizes.png -------------------------------------------------------------------------------- /docs/images/img-freetext.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-freetext.jpg -------------------------------------------------------------------------------- /docs/images/img-import-progress.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-import-progress.jpg -------------------------------------------------------------------------------- /docs/images/img-inkannot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-inkannot.jpg -------------------------------------------------------------------------------- /docs/images/img-inserttext.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-inserttext.jpg -------------------------------------------------------------------------------- /docs/images/img-markedpdf.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-markedpdf.jpg -------------------------------------------------------------------------------- /docs/images/img-markers.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-markers.jpg -------------------------------------------------------------------------------- /docs/images/img-matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-matrix.png -------------------------------------------------------------------------------- /docs/images/img-opacity.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-opacity.jpg -------------------------------------------------------------------------------- /docs/images/img-original.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-original.png -------------------------------------------------------------------------------- /docs/images/img-pdfjoiner.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-pdfjoiner.jpg -------------------------------------------------------------------------------- /docs/images/img-pdftext.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-pdftext.jpg -------------------------------------------------------------------------------- /docs/images/img-planish.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-planish.png -------------------------------------------------------------------------------- /docs/images/img-point-unit.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-point-unit.jpg -------------------------------------------------------------------------------- /docs/images/img-polyline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-polyline.png -------------------------------------------------------------------------------- /docs/images/img-posterize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-posterize.png -------------------------------------------------------------------------------- /docs/images/img-pymupdf.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-pymupdf.jpg -------------------------------------------------------------------------------- /docs/images/img-quads.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-quads.jpg -------------------------------------------------------------------------------- /docs/images/img-redact.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-redact.jpg -------------------------------------------------------------------------------- /docs/images/img-render-speed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-render-speed.png -------------------------------------------------------------------------------- /docs/images/img-rendermode.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-rendermode.jpg -------------------------------------------------------------------------------- /docs/images/img-rot+morph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-rot+morph.png -------------------------------------------------------------------------------- /docs/images/img-rot-60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-rot-60.png -------------------------------------------------------------------------------- /docs/images/img-rotate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-rotate.png -------------------------------------------------------------------------------- /docs/images/img-showpdfpage.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-showpdfpage.jpg -------------------------------------------------------------------------------- /docs/images/img-sierpinski.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-sierpinski.png -------------------------------------------------------------------------------- /docs/images/img-squiggly.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-squiggly.png -------------------------------------------------------------------------------- /docs/images/img-stampannot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-stampannot.jpg -------------------------------------------------------------------------------- /docs/images/img-stencil.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-stencil.jpg -------------------------------------------------------------------------------- /docs/images/img-symbols.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-symbols.jpg -------------------------------------------------------------------------------- /docs/images/img-target.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-target.png -------------------------------------------------------------------------------- /docs/images/img-textbox.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-textbox.jpg -------------------------------------------------------------------------------- /docs/images/img-textboxtract.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-textboxtract.png -------------------------------------------------------------------------------- /docs/images/img-textmarker.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-textmarker.jpg -------------------------------------------------------------------------------- /docs/images/img-textmethods.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-textmethods.png -------------------------------------------------------------------------------- /docs/images/img-textpage-char.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-textpage-char.png -------------------------------------------------------------------------------- /docs/images/img-textpage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-textpage.png -------------------------------------------------------------------------------- /docs/images/img-textperformance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-textperformance.png -------------------------------------------------------------------------------- /docs/images/img-timings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-timings.png -------------------------------------------------------------------------------- /docs/images/img-writeimage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-writeimage.png -------------------------------------------------------------------------------- /docs/images/mupdf-icons.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/mupdf-icons.jpg -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | **PyMuPDF Documentation** 2 | ================================= 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | intro 8 | installation 9 | tutorial 10 | faq 11 | module 12 | classes 13 | algebra 14 | lowlevel 15 | glossary 16 | vars 17 | colors 18 | app1 19 | app2 20 | app3 21 | app4 22 | changes 23 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============= 3 | PyMuPDF can be installed from sources as follows or from wheels, see :ref:`InstallBinary`. 4 | 5 | .. _InstallSource: 6 | 7 | Option 1: Install from Sources 8 | ------------------------------- 9 | This is a three-step process. 10 | 11 | Step 1: Download PyMuPDF 12 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 13 | Download the sources from https://pypi.org/project/PyMuPDF/#files and decompress them. 14 | 15 | Step 2: Download and Generate MuPDF 16 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 17 | Download *mupdf-x.xx.x-source.tar.gz* from `Mupdf `_ and unzip / decompress it. Make sure to download the (sub-) version for which PyMuPDF has stated its compatibility. 18 | 19 | .. note:: The latest MuPDF **development sources** are available on https://github.com/ArtifexSoftware/mupdf -- this is **not** what you want here. 20 | 21 | 22 | **Applying any Changes and Hot Fixes to MuPDF Sources** 23 | 24 | On occasion, vital hot fixes or functional enhancements must be applied to MuPDF sources before it is generated. 25 | 26 | Any such files are contained in the *fitz* directory of the `PyMuPDF homepage `_ -- their names all start with an underscore *"_"*. Currently (v1.16.x), these files and their copy destinations are the following: 27 | 28 | * *_config.h* -- PyMuPDF's configuration to control the binary file size and the inclusion of MuPDF features, see next section. This file must renamed and replace MuPDF file */include/mupdf/fitz/config.h*. This file controls the size of the PyMuPDF binary by cutting away unneeded fonts from MuPDF. 29 | 30 | **Generate MuPDF** 31 | 32 | The MuPDF source includes generation procedures / makefiles for numerous platforms. For Windows platforms, Visual Studio solution and project definitions are provided. 33 | 34 | PyMuPDF's `homepage `_ contains additional details and hints. 35 | 36 | Step 3: Build / Setup PyMuPDF 37 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 38 | Adjust the setup.py script as necessary. E.g. make sure that: 39 | 40 | * the include directory is correctly set in sync with your directory structure 41 | * the object code libraries are correctly defined 42 | 43 | Now perform a *python setup.py install*. 44 | 45 | .. note:: You can also install from the sources of the Github repository. These **do not contain** the pre-generated files *fitz.py* or *fitz_wrap.c*, which instead are generated by the installation script *setup.py*. To use it, `SWIG `_ must be installed on your system. 46 | 47 | 48 | .. _InstallBinary: 49 | 50 | Option 2: Install from Binaries 51 | -------------------------------- 52 | You can install PyMuPDF from Python wheels. The wheels are *self-contained*, i.e. you will **not need any other software** nor download / install MuPDF to run PyMuPDF scripts. 53 | This installation option is available for all MS Windows and the most **popular 64-bit** Mac OSX and Linux platforms for Python versions 2.7 and 3.5 through 3.8. 54 | Windows binaries are provided for Python **32-bit and 64-bit** versions. 55 | 56 | **Overview of wheel names (PyMuPDF version is x.xx.xx):** 57 | 58 | .. literalinclude:: wheelnames.txt 59 | 60 | 61 | Older versions can be found in the releases directory of our home page https://github.com/pymupdf/PyMuPDF/releases. 62 | 63 | If you unexpectedly run into problems installing the wheel for your system, please make sure you have updated your PIP to the current version. 64 | 65 | -------------------------------------------------------------------------------- /docs/intro.rst: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============== 3 | 4 | .. image:: images/img-pymupdf.jpg 5 | :align: center 6 | 7 | **PyMuPDF** is a Python binding for `MuPDF `_ -- "a lightweight PDF and XPS viewer". 8 | 9 | MuPDF can access files in PDF, XPS, OpenXPS, CBZ (comic book archive), FB2 and EPUB (e-book) formats. 10 | 11 | These are files with extensions *.pdf*, *.xps*, *.oxps*, *.cbz*, *.fb2* or *.epub* (so you can develop **e-book viewers in Python** ...). 12 | 13 | PyMuPDF provides access to many important functions of MuPDF from within a Python environment, and we are continuously seeking to expand this function set. 14 | 15 | MuPDF stands out among all similar products for its top rendering capability and unsurpassed processing speed. At the same time, its "light weight" makes it an excellent choice for platforms where resources are typically limited, like smartphones. 16 | 17 | Check this out yourself and compare the various free PDF-viewers. In terms of speed and rendering quality `SumatraPDF `_ ranges at the top (apart from MuPDF's own standalone viewer) -- since it has changed its library basis to MuPDF! 18 | 19 | While PyMuPDF has been available since several years for an earlier version of MuPDF (v1.2, called **fitz-python** then), it was until only mid May 2015, that its creator and a few co-workers decided to elevate it to support current releases of MuPDF. 20 | 21 | PyMuPDF runs and has been tested on Mac, Linux, Windows XP SP2 and up, Python 2.7 through Python 3.7 (note that Python supports Windows XP only up to v3.4), 32bit and 64bit versions. Other platforms should work too, as long as MuPDF and Python support them. 22 | 23 | PyMuPDF is hosted on `GitHub `_. We also are registered on `PyPI `_. 24 | 25 | For MS Windows and popular Python versions on Mac OSX and Linux we have created wheels. So installation should be convenient enough for hopefully most of our users: just issue 26 | 27 | *pip install --upgrade pymupdf* 28 | 29 | If your platform is not among those supported with a wheel, your installation consists of two separate steps: 30 | 31 | 1. Installation of MuPDF: this involves downloading the source from their website and then compiling it on your machine. Adjust *setup.py* to point to the right directories (next step), before you try generating PyMuPDF. 32 | 33 | 2. Installation of PyMuPDF: this step is normal Python procedure. Usually you will have to adapt the *setup.py* to point to correct *include* and *lib* directories of your generated MuPDF. 34 | 35 | For installation details check out the respective chapter. 36 | 37 | There exist several `demo `_ and `example `_ programs in the main repository, ranging from simple code snippets to full-featured utilities, like text extraction, PDF joiners and bookmark maintenance. 38 | 39 | Interesting **PDF manipulation and generation** functions have been added over time, including metadata and bookmark maintenance, document restructuring, annotation / link handling and document or page creation. 40 | 41 | Note on the Name *fitz* 42 | -------------------------- 43 | The standard Python import statement for this library is *import fitz*. This has a historical reason: 44 | 45 | The original rendering library for MuPDF was called *Libart*. 46 | 47 | *"After Artifex Software acquired the MuPDF project, the development focus shifted on writing a new modern graphics library called *Fitz*. Fitz was originally intended as an R&D project to replace the aging Ghostscript graphics library, but has instead become the rendering engine powering MuPDF."* (Quoted from `Wikipedia `_). 48 | 49 | License 50 | -------- 51 | PyMuPDF is distributed under GNU GPL V3 (or later, at your choice). 52 | 53 | MuPDF is distributed under a separate license, the **GNU AFFERO GPL V3**. 54 | 55 | Both licenses apply, when you use PyMuPDF. 56 | 57 | .. note:: Version 3 of the GNU AFFERO GPL is a lot less restrictive than its earlier versions used to be. It basically is an open source freeware license, that obliges your software to also being open source and freeware. Consult `this website `_, if you want to create a commercial product with PyMuPDF. 58 | 59 | .. include:: version.rst 60 | -------------------------------------------------------------------------------- /docs/irect.rst: -------------------------------------------------------------------------------- 1 | .. _IRect: 2 | 3 | ========== 4 | IRect 5 | ========== 6 | 7 | IRect is a rectangular bounding box similar to :ref:`Rect`, except that all corner coordinates are integers. IRect is used to specify an area of pixels, e.g. to receive image data during rendering. Otherwise, many similarities exist, e.g. considerations concerning emptiness and finiteness of rectangles also apply to this class. 8 | 9 | ============================== =========================================== 10 | **Attribute / Method** **Short Description** 11 | ============================== =========================================== 12 | :meth:`IRect.contains` checks containment of another object 13 | :meth:`IRect.getArea` calculate rectangle area 14 | :meth:`IRect.getRect` return a :ref:`Rect` with same coordinates 15 | :meth:`IRect.getRectArea` calculate rectangle area 16 | :meth:`IRect.intersect` common part with another rectangle 17 | :meth:`IRect.intersects` checks for non-empty intersection 18 | :meth:`IRect.morph` transform with a point and a matrix 19 | :meth:`IRect.norm` the Euclidean norm 20 | :meth:`IRect.normalize` makes a rectangle finite 21 | :attr:`IRect.bottom_left` bottom left point, synonym *bl* 22 | :attr:`IRect.bottom_right` bottom right point, synonym *br* 23 | :attr:`IRect.height` height of the rectangle 24 | :attr:`IRect.isEmpty` whether rectangle is empty 25 | :attr:`IRect.isInfinite` whether rectangle is infinite 26 | :attr:`IRect.rect` equals result of method *getRect()* 27 | :attr:`IRect.top_left` top left point, synonym *tl* 28 | :attr:`IRect.top_right` top_right point, synonym *tr* 29 | :attr:`IRect.quad` :ref:`Quad` made from rectangle corners 30 | :attr:`IRect.width` width of the rectangle 31 | :attr:`IRect.x0` X-coordinate of the top left corner 32 | :attr:`IRect.x1` X-coordinate of the bottom right corner 33 | :attr:`IRect.y0` Y-coordinate of the top left corner 34 | :attr:`IRect.y1` Y-coordinate of the bottom right corner 35 | ============================== =========================================== 36 | 37 | **Class API** 38 | 39 | .. class:: IRect 40 | 41 | .. method:: __init__(self) 42 | 43 | .. method:: __init__(self, x0, y0, x1, y1) 44 | 45 | .. method:: __init__(self, irect) 46 | 47 | .. method:: __init__(self, sequence) 48 | 49 | Overloaded constructors. Also see examples below and those for the :ref:`Rect` class. 50 | 51 | If another irect is specified, a **new copy** will be made. 52 | 53 | If sequence is specified, it must be a Python sequence type of 4 numbers (see :ref:`SequenceTypes`). Non-integer numbers will be truncated, non-numeric entries will raise an exception. 54 | 55 | The other parameters mean integer coordinates. 56 | 57 | .. method:: getRect() 58 | 59 | A convenience function returning a :ref:`Rect` with the same coordinates. Also available as attribute *rect*. 60 | 61 | :rtype: :ref:`Rect` 62 | 63 | .. method:: getRectArea([unit]) 64 | 65 | .. method:: getArea([unit]) 66 | 67 | Calculates the area of the rectangle and, with no parameter, equals *abs(IRect)*. Like an empty rectangle, the area of an infinite rectangle is also zero. 68 | 69 | :arg str unit: Specify required unit: respective squares of "px" (pixels, default), "in" (inches), "cm" (centimeters), or "mm" (millimeters). 70 | 71 | :rtype: float 72 | 73 | .. method:: intersect(ir) 74 | 75 | The intersection (common rectangular area) of the current rectangle and *ir* is calculated and replaces the current rectangle. If either rectangle is empty, the result is also empty. If either rectangle is infinite, the other one is taken as the result -- and hence also infinite if both rectangles were infinite. 76 | 77 | :arg rect_like ir: Second rectangle. 78 | 79 | .. method:: contains(x) 80 | 81 | Checks whether *x* is contained in the rectangle. It may be :data:`rect_like`, :data:`point_like` or a number. If *x* is an empty rectangle, this is always true. Conversely, if the rectangle is empty this is always *False*, if *x* is not an empty rectangle and not a number. If *x* is a number, it will be checked to be one of the four components. *x in irect* and *irect.contains(x)* are equivalent. 82 | 83 | :arg x: the object to check. 84 | :type x: :ref:`IRect` or :ref:`Rect` or :ref:`Point` or int 85 | 86 | :rtype: bool 87 | 88 | .. method:: intersects(r) 89 | 90 | Checks whether the rectangle and the :data:`rect_like` "r" contain a common non-empty :ref:`IRect`. This will always be *False* if either is infinite or empty. 91 | 92 | :arg rect_like r: the rectangle to check. 93 | 94 | :rtype: bool 95 | 96 | .. method:: morph(fixpoint, matrix) 97 | 98 | *(New in version 1.17.0)* 99 | 100 | Return a new quad after applying a matrix to it using a fixed point. 101 | 102 | :arg point_like fixpoint: the fixed point. 103 | :arg matrix_like matrix: the matrix. 104 | :returns: a new :ref:`Quad`. This a wrapper of the same-named quad method. 105 | 106 | .. method:: norm() 107 | 108 | *(New in version 1.16.0)* 109 | 110 | Return the Euclidean norm of the rectangle treated as a vector of four numbers. 111 | 112 | .. method:: normalize() 113 | 114 | Make the rectangle finite. This is done by shuffling rectangle corners. After this, the bottom right corner will indeed be south-eastern to the top left one. See :ref:`Rect` for a more details. 115 | 116 | .. attribute:: top_left 117 | 118 | .. attribute:: tl 119 | 120 | Equals *Point(x0, y0)*. 121 | 122 | :type: :ref:`Point` 123 | 124 | .. attribute:: top_right 125 | 126 | .. attribute:: tr 127 | 128 | Equals *Point(x1, y0)*. 129 | 130 | :type: :ref:`Point` 131 | 132 | .. attribute:: bottom_left 133 | 134 | .. attribute:: bl 135 | 136 | Equals *Point(x0, y1)*. 137 | 138 | :type: :ref:`Point` 139 | 140 | .. attribute:: bottom_right 141 | 142 | .. attribute:: br 143 | 144 | Equals *Point(x1, y1)*. 145 | 146 | :type: :ref:`Point` 147 | 148 | .. attribute:: quad 149 | 150 | The quadrilateral *Quad(irect.tl, irect.tr, irect.bl, irect.br)*. 151 | 152 | :type: :ref:`Quad` 153 | 154 | .. attribute:: width 155 | 156 | Contains the width of the bounding box. Equals *abs(x1 - x0)*. 157 | 158 | :type: int 159 | 160 | .. attribute:: height 161 | 162 | Contains the height of the bounding box. Equals *abs(y1 - y0)*. 163 | 164 | :type: int 165 | 166 | .. attribute:: x0 167 | 168 | X-coordinate of the left corners. 169 | 170 | :type: int 171 | 172 | .. attribute:: y0 173 | 174 | Y-coordinate of the top corners. 175 | 176 | :type: int 177 | 178 | .. attribute:: x1 179 | 180 | X-coordinate of the right corners. 181 | 182 | :type: int 183 | 184 | .. attribute:: y1 185 | 186 | Y-coordinate of the bottom corners. 187 | 188 | :type: int 189 | 190 | .. attribute:: isInfinite 191 | 192 | *True* if rectangle is infinite, *False* otherwise. 193 | 194 | :type: bool 195 | 196 | .. attribute:: isEmpty 197 | 198 | *True* if rectangle is empty, *False* otherwise. 199 | 200 | :type: bool 201 | 202 | 203 | .. note:: 204 | 205 | * This class adheres to the Python sequence protocol, so components can be accessed via their index, too. Also refer to :ref:`SequenceTypes`. 206 | * Rectangles can be used with arithmetic operators -- see chapter :ref:`Algebra`. 207 | 208 | -------------------------------------------------------------------------------- /docs/kerning.style: -------------------------------------------------------------------------------- 1 | fontsAlias: 2 | stdBold: DejaVu Sans-Bold 3 | stdBoldItalic: DejaVu Sans-BoldOblique 4 | stdFont: DejaVu Sans 5 | stdItalic: DejaVu Sans-Oblique 6 | stdMono: Courier New 7 | stdMonoBold: DejaVu Sans Mono-Bold 8 | stdMonoBoldItalic: DejaVu Sans Mono-BoldOblique 9 | stdMonoItalic: DejaVu Sans Mono-Oblique 10 | stdSans: DejaVu Sans 11 | stdSansBold: DejaVu Sans-Bold 12 | stdSansBoldItalic: DejaVu Sans-BoldOblique 13 | stdSansItalic: DejaVu Sans-Oblique 14 | stdSerif: DejaVu Serif 15 | 16 | styles: base: kerning: true 17 | 18 | styles: bodytext: alignment: left 19 | -------------------------------------------------------------------------------- /docs/link.rst: -------------------------------------------------------------------------------- 1 | .. _Link: 2 | 3 | ================ 4 | Link 5 | ================ 6 | Represents a pointer to somewhere (this document, other documents, the internet). Links exist per document page, and they are forward-chained to each other, starting from an initial link which is accessible by the :attr:`Page.firstLink` property. 7 | 8 | There is a parent-child relationship between a link and its page. If the page object becomes unusable (closed document, any document structure change, etc.), then so does every of its existing link objects -- an exception is raised saying that the object is "orphaned", whenever a link property or method is accessed. 9 | 10 | ========================= ============================================ 11 | **Attribute** **Short Description** 12 | ========================= ============================================ 13 | :meth:`Link.setBorder` modify border properties 14 | :meth:`Link.setColors` modify color properties 15 | :attr:`Link.border` border characteristics 16 | :attr:`Link.colors` border line color 17 | :attr:`Link.dest` points to link destination details 18 | :attr:`Link.isExternal` external link destination? 19 | :attr:`Link.next` points to next link 20 | :attr:`Link.rect` clickable area in untransformed coordinates. 21 | :attr:`Link.uri` link destination 22 | :attr:`Link.xref` :data:`xref` number of the entry 23 | ========================= ============================================ 24 | 25 | **Class API** 26 | 27 | .. class:: Link 28 | 29 | .. method:: setBorder(border=None, width=0, style=None, dashes=None) 30 | 31 | PDF only: Change border width and dashing properties. 32 | 33 | *(Changed in version 1.16.9)* Allow specification without using a dictionary. The direct parameters are used if *border* is not a dictionary. 34 | 35 | :arg dict border: a dictionary as returned by the :attr:`border` property, with keys *"width"* (*float*), *"style"* (*str*) and *"dashes"* (*sequence*). Omitted keys will leave the resp. property unchanged. To e.g. remove dashing use: *"dashes": []*. If dashes is not an empty sequence, "style" will automatically be set to "D" (dashed). 36 | 37 | :arg float width: see above. 38 | :arg str style: see above. 39 | :arg sequence dashes: see above. 40 | 41 | .. method:: setColors(colors=None, stroke=None, fill=None) 42 | 43 | Changes the "stroke" and "fill" colors. 44 | 45 | *(Changed in version 1.16.9)* Allow colors to be directly set. These parameters are used if *colors* is not a dictionary. 46 | 47 | :arg dict colors: a dictionary containing color specifications. For accepted dictionary keys and values see below. The most practical way should be to first make a copy of the *colors* property and then modify this dictionary as required. 48 | :arg sequence stroke: see above. 49 | :arg sequence fill: see above. 50 | 51 | 52 | .. attribute:: colors 53 | 54 | Meaningful for PDF only: A dictionary of two lists of floats in range *0 <= float <= 1* specifying the *stroke* and the interior (*fill*) colors. If not a PDF, *None* is returned. The stroke color is used for borders and everything that is actively painted or written ("stroked"). The lengths of these lists implicitely determine the colorspaces used: 1 = GRAY, 3 = RGB, 4 = CMYK. So *[1.0, 0.0, 0.0]* stands for RGB color red. Both lists can be *[]* if no color is specified. The value of each float *f* is mapped to the integer value *i* in range 0 to 255 via the computation *f = i / 255*. 55 | 56 | :rtype: dict 57 | 58 | .. attribute:: border 59 | 60 | Meaningful for PDF only: A dictionary containing border characteristics. It will be *None* for non-PDFs and an empty dictionary if no border information exists. The following keys can occur: 61 | 62 | * *width* -- a float indicating the border thickness in points. The value is -1.0 if no width is specified. 63 | 64 | * *dashes* -- a sequence of integers specifying a line dash pattern. *[]* means no dashes, *[n]* means equal on-off lengths of *n* points, longer lists will be interpreted as specifying alternating on-off length values. See the :ref:`AdobeManual` page 217 for more details. 65 | 66 | * *style* -- 1-byte border style: *S* (Solid) = solid rectangle surrounding the annotation, *D* (Dashed) = dashed rectangle surrounding the link, the dash pattern is specified by the *dashes* entry, *B* (Beveled) = a simulated embossed rectangle that appears to be raised above the surface of the page, *I* (Inset) = a simulated engraved rectangle that appears to be recessed below the surface of the page, *U* (Underline) = a single line along the bottom of the annotation rectangle. 67 | 68 | :rtype: dict 69 | 70 | .. attribute:: rect 71 | 72 | The area that can be clicked in untransformed coordinates. 73 | 74 | :type: :ref:`Rect` 75 | 76 | .. attribute:: isExternal 77 | 78 | A bool specifying whether the link target is outside of the current document. 79 | 80 | :type: bool 81 | 82 | .. attribute:: uri 83 | 84 | A string specifying the link target. The meaning of this property should be evaluated in conjunction with property *isExternal*. The value may be *None*, in which case *isExternal == False*. If *uri* starts with *file://*, *mailto:*, or an internet resource name, *isExternal* is *True*. In all other cases *isExternal == False* and *uri* points to an internal location. In case of PDF documents, this should either be *#nnnn* to indicate a 1-based (!) page number *nnnn*, or a named location. The format varies for other document types, e.g. *uri = '../FixedDoc.fdoc#PG_2_LNK_1'* for page number 2 (1-based) in an XPS document. 85 | 86 | :type: str 87 | 88 | .. attribute:: xref 89 | 90 | An integer specifying the PDF :data:`xref`. Zero if not a PDF. 91 | 92 | :type: int 93 | 94 | .. attribute:: next 95 | 96 | The next link or *None*. 97 | 98 | :type: *Link* 99 | 100 | .. attribute:: dest 101 | 102 | The link destination details object. 103 | 104 | :type: :ref:`linkDest` 105 | -------------------------------------------------------------------------------- /docs/linkdest.rst: -------------------------------------------------------------------------------- 1 | .. _linkDest: 2 | 3 | ================ 4 | linkDest 5 | ================ 6 | Class representing the `dest` property of an outline entry or a link. Describes the destination to which such entries point. 7 | 8 | =========================== ==================================== 9 | **Attribute** **Short Description** 10 | =========================== ==================================== 11 | :attr:`linkDest.dest` destination 12 | :attr:`linkDest.fileSpec` file specification (path, filename) 13 | :attr:`linkDest.flags` descriptive flags 14 | :attr:`linkDest.isMap` is this a MAP? 15 | :attr:`linkDest.isUri` is this a URI? 16 | :attr:`linkDest.kind` kind of destination 17 | :attr:`linkDest.lt` top left coordinates 18 | :attr:`linkDest.named` name if named destination 19 | :attr:`linkDest.newWindow` name of new window 20 | :attr:`linkDest.page` page number 21 | :attr:`linkDest.rb` bottom right coordinates 22 | :attr:`linkDest.uri` URI 23 | =========================== ==================================== 24 | 25 | **Class API** 26 | 27 | .. class:: linkDest 28 | 29 | .. attribute:: dest 30 | 31 | Target destination name if :attr:`linkDest.kind` is :data:`LINK_GOTOR` and :attr:`linkDest.page` is *-1*. 32 | 33 | :type: str 34 | 35 | .. attribute:: fileSpec 36 | 37 | Contains the filename and path this link points to, if :attr:`linkDest.kind` is :data:`LINK_GOTOR` or :data:`LINK_LAUNCH`. 38 | 39 | :type: str 40 | 41 | .. attribute:: flags 42 | 43 | A bitfield describing the validity and meaning of the different aspects of the destination. As far as possible, link destinations are constructed such that e.g. :attr:`linkDest.lt` and :attr:`linkDest.rb` can be treated as defining a bounding box. But the flags indicate which of the values were actually specified, see :ref:`linkDest Flags`. 44 | 45 | :type: int 46 | 47 | .. attribute:: isMap 48 | 49 | This flag specifies whether to track the mouse position when the URI is resolved. Default value: False. 50 | 51 | :type: bool 52 | 53 | .. attribute:: isUri 54 | 55 | Specifies whether this destination is an internet resource (as opposed to e.g. a local file specification in URI format). 56 | 57 | :type: bool 58 | 59 | .. attribute:: kind 60 | 61 | Indicates the type of this destination, like a place in this document, a URI, a file launch, an action or a place in another file. Look at :ref:`linkDest Kinds` to see the names and numerical values. 62 | 63 | :type: int 64 | 65 | .. attribute:: lt 66 | 67 | The top left :ref:`Point` of the destination. 68 | 69 | :type: :ref:`Point` 70 | 71 | .. attribute:: named 72 | 73 | This destination refers to some named action to perform (e.g. a javascript, see :ref:`AdobeManual`). Standard actions provided are *NextPage*, *PrevPage*, *FirstPage*, and *LastPage*. 74 | 75 | :type: str 76 | 77 | .. attribute:: newWindow 78 | 79 | If true, the destination should be launched in a new window. 80 | 81 | :type: bool 82 | 83 | .. attribute:: page 84 | 85 | The page number (in this or the target document) this destination points to. Only set if :attr:`linkDest.kind` is :data:`LINK_GOTOR` or :data:`LINK_GOTO`. May be *-1* if :attr:`linkDest.kind` is :data:`LINK_GOTOR`. In this case :attr:`linkDest.dest` contains the **name** of a destination in the target document. 86 | 87 | :type: int 88 | 89 | .. attribute:: rb 90 | 91 | The bottom right :ref:`Point` of this destination. 92 | 93 | :type: :ref:`Point` 94 | 95 | .. attribute:: uri 96 | 97 | The name of the URI this destination points to. 98 | 99 | :type: str 100 | -------------------------------------------------------------------------------- /docs/lowlevel.rst: -------------------------------------------------------------------------------- 1 | ================================= 2 | Low Level Functions and Classes 3 | ================================= 4 | Contains a number of functions and classes for the experienced user. To be used for special needs or performance requirements. 5 | 6 | .. toctree:: 7 | :maxdepth: 1 8 | 9 | functions 10 | device 11 | coop_low 12 | -------------------------------------------------------------------------------- /docs/make-bold.py: -------------------------------------------------------------------------------- 1 | """ 2 | Problem: Since MuPDF v1.16 a 'Freetext' annotation font is restricted to the 3 | "normal" versions (no bold, no italics) of Times-Roman, Helvetica, Courier. 4 | It is impossible to use PyMuPDF to modify this. 5 | 6 | Solution: Using Adobe's JavaScript API, it is possible to manipulate properties 7 | of Freetext annotations. Check out these references: 8 | https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/js_api_reference.pdf, 9 | or https://www.adobe.com/devnet/acrobat/documentation.html. 10 | 11 | Function 'this.getAnnots()' will return all annotations as an array. We loop 12 | over this array to set the properties of the text through the 'richContents' 13 | attribute. 14 | There is no explicit property to set text to bold, but it is possible to set 15 | fontWeight=800 (400 is the normal size) of richContents. 16 | Other attributes, like color, italics, etc. can also be set via richContents. 17 | 18 | If we have 'FreeText' annotations created with PyMuPDF, we can make use of this 19 | JavaScript feature to modify the font - thus circumventing the above restriction. 20 | 21 | Use PyMuPDF v1.16.12 to create a push button that executes a Javascript 22 | containing the desired code. This is what this program does. 23 | Then open the resulting file with Adobe reader (!). 24 | After clicking on the button, all Freetext annotations will be bold, and the 25 | file can be saved. 26 | If desired, the button can be removed again, using free tools like PyMuPDF or 27 | PDF XChange editor. 28 | 29 | Note / Caution: 30 | --------------- 31 | The JavaScript will **only** work if the file is opened with Adobe Acrobat reader! 32 | When using other PDF viewers, the reaction is unforeseeable. 33 | """ 34 | import sys 35 | 36 | import fitz 37 | 38 | # this JavaScript will execute when the button is clicked: 39 | jscript = """ 40 | var annt = this.getAnnots(); 41 | annt.forEach(function (item, index) { 42 | try { 43 | var span = item.richContents; 44 | span.forEach(function (it, dx) { 45 | it.fontWeight = 800; 46 | }) 47 | item.richContents = span; 48 | } catch (err) {} 49 | }); 50 | app.alert('Done'); 51 | """ 52 | i_fn = sys.argv[1] # input file name 53 | o_fn = "bold-" + i_fn # output filename 54 | doc = fitz.open(i_fn) # open input 55 | page = doc[0] # get desired page 56 | 57 | # ------------------------------------------------ 58 | # make a push button for invoking the JavaScript 59 | # ------------------------------------------------ 60 | 61 | widget = fitz.Widget() # create widget 62 | 63 | # make it a 'PushButton' 64 | widget.field_type = fitz.PDF_WIDGET_TYPE_BUTTON 65 | widget.field_flags = fitz.PDF_BTN_FIELD_IS_PUSHBUTTON 66 | 67 | widget.rect = fitz.Rect(5, 5, 20, 20) # button position 68 | 69 | widget.script = jscript # fill in JavaScript source text 70 | widget.field_name = "Make bold" # arbitrary name 71 | widget.field_value = "Off" # arbitrary value 72 | widget.fill_color = (0, 0, 1) # make button visible 73 | 74 | annot = page.addWidget(widget) # add the widget to the page 75 | doc.save(o_fn) # output the file 76 | 77 | -------------------------------------------------------------------------------- /docs/multiprocess-gui.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 2019-05-01 3 | 4 | @author: yinkaisheng@live.com 5 | @copyright: 2019 yinkaisheng@live.com 6 | @license: GNU GPL 3.0+ 7 | 8 | Demonstrate the use of multiprocessing with PyMuPDF 9 | ----------------------------------------------------- 10 | This example shows some more advanced use of multiprocessing. 11 | The main process show a Qt GUI and establishes a 2-way communication with 12 | another process, which accesses a supported document. 13 | """ 14 | import os 15 | import sys 16 | import time 17 | import multiprocessing as mp 18 | import queue 19 | import fitz 20 | from PyQt5 import QtCore, QtGui, QtWidgets 21 | 22 | my_timer = time.clock if str is bytes else time.perf_counter 23 | 24 | 25 | class DocForm(QtWidgets.QWidget): 26 | def __init__(self): 27 | super().__init__() 28 | self.process = None 29 | self.queNum = mp.Queue() 30 | self.queDoc = mp.Queue() 31 | self.pageCount = 0 32 | self.curPageNum = 0 33 | self.lastDir = "" 34 | self.timerSend = QtCore.QTimer(self) 35 | self.timerSend.timeout.connect(self.onTimerSendPageNum) 36 | self.timerGet = QtCore.QTimer(self) 37 | self.timerGet.timeout.connect(self.onTimerGetPage) 38 | self.timerWaiting = QtCore.QTimer(self) 39 | self.timerWaiting.timeout.connect(self.onTimerWaiting) 40 | self.initUI() 41 | 42 | def initUI(self): 43 | vbox = QtWidgets.QVBoxLayout() 44 | self.setLayout(vbox) 45 | 46 | hbox = QtWidgets.QHBoxLayout() 47 | self.btnOpen = QtWidgets.QPushButton("OpenDocument", self) 48 | self.btnOpen.clicked.connect(self.openDoc) 49 | hbox.addWidget(self.btnOpen) 50 | 51 | self.btnPlay = QtWidgets.QPushButton("PlayDocument", self) 52 | self.btnPlay.clicked.connect(self.playDoc) 53 | hbox.addWidget(self.btnPlay) 54 | 55 | self.btnStop = QtWidgets.QPushButton("Stop", self) 56 | self.btnStop.clicked.connect(self.stopPlay) 57 | hbox.addWidget(self.btnStop) 58 | 59 | self.label = QtWidgets.QLabel("0/0", self) 60 | self.label.setFont(QtGui.QFont("Verdana", 20)) 61 | hbox.addWidget(self.label) 62 | 63 | vbox.addLayout(hbox) 64 | 65 | self.labelImg = QtWidgets.QLabel("Document", self) 66 | sizePolicy = QtWidgets.QSizePolicy( 67 | QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Expanding 68 | ) 69 | self.labelImg.setSizePolicy(sizePolicy) 70 | vbox.addWidget(self.labelImg) 71 | 72 | self.setGeometry(100, 100, 400, 600) 73 | self.setWindowTitle("PyMuPDF Document Player") 74 | self.show() 75 | 76 | def openDoc(self): 77 | path, _ = QtWidgets.QFileDialog.getOpenFileName( 78 | self, 79 | "Open Document", 80 | self.lastDir, 81 | "All Supported Files (*.pdf;*.epub;*.xps;*.oxps;*.cbz;*.fb2);;PDF Files (*.pdf);;EPUB Files (*.epub);;XPS Files (*.xps);;OpenXPS Files (*.oxps);;CBZ Files (*.cbz);;FB2 Files (*.fb2)", 82 | options=QtWidgets.QFileDialog.Options(), 83 | ) 84 | if path: 85 | self.lastDir, self.file = os.path.split(path) 86 | if self.process: 87 | self.queNum.put(-1) # use -1 to notify the process to exit 88 | self.timerSend.stop() 89 | self.curPageNum = 0 90 | self.pageCount = 0 91 | self.process = mp.Process( 92 | target=openDocInProcess, args=(path, self.queNum, self.queDoc) 93 | ) 94 | self.process.start() 95 | self.timerGet.start(40) 96 | self.label.setText("0/0") 97 | self.queNum.put(0) 98 | self.startTime = time.perf_counter() 99 | self.timerWaiting.start(40) 100 | 101 | def playDoc(self): 102 | self.timerSend.start(500) 103 | 104 | def stopPlay(self): 105 | self.timerSend.stop() 106 | 107 | def onTimerSendPageNum(self): 108 | if self.curPageNum < self.pageCount - 1: 109 | self.queNum.put(self.curPageNum + 1) 110 | else: 111 | self.timerSend.stop() 112 | 113 | def onTimerGetPage(self): 114 | try: 115 | ret = self.queDoc.get(False) 116 | if isinstance(ret, int): 117 | self.timerWaiting.stop() 118 | self.pageCount = ret 119 | self.label.setText("{}/{}".format(self.curPageNum + 1, self.pageCount)) 120 | else: # tuple, pixmap info 121 | num, samples, width, height, stride, alpha = ret 122 | self.curPageNum = num 123 | self.label.setText("{}/{}".format(self.curPageNum + 1, self.pageCount)) 124 | fmt = ( 125 | QtGui.QImage.Format_RGBA8888 126 | if alpha 127 | else QtGui.QImage.Format_RGB888 128 | ) 129 | qimg = QtGui.QImage(samples, width, height, stride, fmt) 130 | self.labelImg.setPixmap(QtGui.QPixmap.fromImage(qimg)) 131 | except queue.Empty as ex: 132 | pass 133 | 134 | def onTimerWaiting(self): 135 | self.labelImg.setText( 136 | 'Loading "{}", {:.2f}s'.format( 137 | self.file, time.perf_counter() - self.startTime 138 | ) 139 | ) 140 | 141 | def closeEvent(self, event): 142 | self.queNum.put(-1) 143 | event.accept() 144 | 145 | 146 | def openDocInProcess(path, queNum, quePageInfo): 147 | start = my_timer() 148 | doc = fitz.open(path) 149 | end = my_timer() 150 | quePageInfo.put(doc.pageCount) 151 | while True: 152 | num = queNum.get() 153 | if num < 0: 154 | break 155 | page = doc.loadPage(num) 156 | pix = page.getPixmap() 157 | quePageInfo.put( 158 | (num, pix.samples, pix.width, pix.height, pix.stride, pix.alpha) 159 | ) 160 | doc.close() 161 | print("process exit") 162 | 163 | 164 | if __name__ == "__main__": 165 | app = QtWidgets.QApplication(sys.argv) 166 | form = DocForm() 167 | sys.exit(app.exec_()) 168 | -------------------------------------------------------------------------------- /docs/multiprocess-render.py: -------------------------------------------------------------------------------- 1 | """ 2 | Demonstrate the use of multiprocessing with PyMuPDF. 3 | 4 | Depending on the number of CPUs, the document is divided in page ranges. 5 | Each range is then worked on by one process. 6 | The type of work would typically be text extraction or page rendering. Each 7 | process must know where to put its results, because this processing pattern 8 | does not include inter-process communication or data sharing. 9 | 10 | Compared to sequential processing, speed improvements in range of 100% (ie. 11 | twice as fast) or better can be expected. 12 | """ 13 | from __future__ import print_function, division 14 | import sys 15 | import os 16 | import time 17 | from multiprocessing import Pool, cpu_count 18 | import fitz 19 | 20 | # choose a version specific timer function (bytes == str in Python 2) 21 | mytime = time.clock if str is bytes else time.perf_counter 22 | 23 | 24 | def render_page(vector): 25 | """ Render a page range of a document. 26 | 27 | Notes: 28 | The PyMuPDF document cannot be part of the argument, because that 29 | cannot be pickled. So we are being passed in just its filename. 30 | This is no performance issue, because we are a separate process and 31 | need to open the document anyway. 32 | Any page-specific function can be processed here - rendering is just 33 | an example - text extraction might be another. 34 | The work must however be self-contained: no inter-process communication 35 | or synchronization is possible with this design. 36 | Care must also be taken with which parameters are contained in the 37 | argument, because it will be passed in via pickling by the Pool class. 38 | So any large objects will increase the overall duration. 39 | Args: 40 | vector: a list containing required parameters. 41 | """ 42 | # recreate the arguments 43 | idx = vector[0] # this is the segment number we have to process 44 | cpu = vector[1] # number of CPUs 45 | filename = vector[2] # document filename 46 | mat = vector[3] # the matrix for rendering 47 | doc = fitz.open(filename) # open the document 48 | num_pages = len(doc) # get number of pages 49 | 50 | # pages per segment: make sure that cpu * seg_size >= num_pages! 51 | seg_size = int(num_pages / cpu + 1) 52 | seg_from = idx * seg_size # our first page number 53 | seg_to = min(seg_from + seg_size, num_pages) # last page number 54 | 55 | for i in range(seg_from, seg_to): # work through our page segment 56 | page = doc[i] 57 | # page.getText("rawdict") # use any page-related type of work here, eg 58 | pix = page.getPixmap(alpha=False, matrix=mat) 59 | # store away the result somewhere ... 60 | # pix.writePNG("p-%i.png" % i) 61 | print("Processed page numbers %i through %i" % (seg_from, seg_to - 1)) 62 | 63 | 64 | if __name__ == "__main__": 65 | t0 = mytime() # start a timer 66 | filename = sys.argv[1] 67 | mat = fitz.Matrix(0.2, 0.2) # the rendering matrix: scale down to 20% 68 | cpu = cpu_count() 69 | 70 | # make vectors of arguments for the processes 71 | vectors = [(i, cpu, filename, mat) for i in range(cpu)] 72 | print("Starting %i processes for '%s'." % (cpu, filename)) 73 | 74 | pool = Pool() # make pool of 'cpu_count()' processes 75 | pool.map(render_page, vectors, 1) # start processes passing each a vector 76 | 77 | t1 = mytime() # stop the timer 78 | print("Total time %g seconds" % round(t1 - t0, 2)) 79 | 80 | -------------------------------------------------------------------------------- /docs/new-annots.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | ------------------------------------------------------------------------------- 4 | Demo script showing how annotations can be added to a PDF using PyMuPDF. 5 | 6 | It contains the following annotation types: 7 | Caret, Text, FreeText, text markers (underline, strike-out, highlight, 8 | squiggle), Circle, Square, Line, PolyLine, Polygon, FileAttachment, Stamp 9 | and Redaction. 10 | There is some effort to vary appearances by adding colors, line ends, 11 | opacity, rotation, dashed lines, etc. 12 | 13 | Dependencies 14 | ------------ 15 | PyMuPDF v1.17.0 16 | ------------------------------------------------------------------------------- 17 | """ 18 | from __future__ import print_function 19 | 20 | import gc 21 | import os 22 | import sys 23 | 24 | import fitz 25 | 26 | print(fitz.__doc__) 27 | if fitz.VersionBind.split(".") < ["1", "17", "0"]: 28 | sys.exit("PyMuPDF v1.17.0+ is needed.") 29 | 30 | gc.set_debug(gc.DEBUG_UNCOLLECTABLE) 31 | 32 | highlight = "this text is highlighted" 33 | underline = "this text is underlined" 34 | strikeout = "this text is striked out" 35 | squiggled = "this text is zigzag-underlined" 36 | red = (1, 0, 0) 37 | blue = (0, 0, 1) 38 | gold = (1, 1, 0) 39 | green = (0, 1, 0) 40 | 41 | displ = fitz.Rect(0, 50, 0, 50) 42 | r = fitz.Rect(72, 72, 220, 100) 43 | t1 = u"têxt üsès Lätiñ charß,\nEUR: €, mu: µ, super scripts: ²³!" 44 | 45 | 46 | def print_descr(annot): 47 | """Print a short description to the right of each annot rect.""" 48 | annot.parent.insertText( 49 | annot.rect.br + (10, -5), "%s annotation" % annot.type[1], color=red 50 | ) 51 | 52 | 53 | doc = fitz.open() 54 | page = doc.newPage() 55 | 56 | page.setRotation(0) 57 | 58 | annot = page.addCaretAnnot(r.tl) 59 | print_descr(annot) 60 | 61 | r = r + displ 62 | annot = page.addFreetextAnnot( 63 | r, 64 | t1, 65 | fontsize=10, 66 | rotate=90, 67 | text_color=blue, 68 | fill_color=gold, 69 | align=fitz.TEXT_ALIGN_CENTER, 70 | ) 71 | annot.setBorder(width=0.3, dashes=[2]) 72 | annot.update(text_color=blue, fill_color=gold) 73 | 74 | print_descr(annot) 75 | r = annot.rect + displ 76 | 77 | annot = page.addTextAnnot(r.tl, t1) 78 | print_descr(annot) 79 | 80 | # Adding text marker annotations: 81 | # first insert a unique text, then search for it, then mark it 82 | pos = annot.rect.tl + displ.tl 83 | page.insertText( 84 | pos, # insertion point 85 | highlight, # inserted text 86 | morph=(pos, fitz.Matrix(-5)), # rotate around insertion point 87 | ) 88 | rl = page.searchFor(highlight, quads=True) # need a quad b/o tilted text 89 | annot = page.addHighlightAnnot(rl[0]) 90 | print_descr(annot) 91 | pos = annot.rect.bl # next insertion point 92 | 93 | page.insertText(pos, underline, morph=(pos, fitz.Matrix(-10))) 94 | rl = page.searchFor(underline, quads=True) 95 | annot = page.addUnderlineAnnot(rl[0]) 96 | print_descr(annot) 97 | pos = annot.rect.bl 98 | 99 | page.insertText(pos, strikeout, morph=(pos, fitz.Matrix(-15))) 100 | rl = page.searchFor(strikeout, quads=True) 101 | annot = page.addStrikeoutAnnot(rl[0]) 102 | print_descr(annot) 103 | pos = annot.rect.bl 104 | 105 | page.insertText(pos, squiggled, morph=(pos, fitz.Matrix(-20))) 106 | rl = page.searchFor(squiggled, quads=True) 107 | annot = page.addSquigglyAnnot(rl[0]) 108 | print_descr(annot) 109 | pos = annot.rect.bl 110 | 111 | r = fitz.Rect(pos, pos.x + 75, pos.y + 35) + (0, 20, 0, 20) 112 | annot = page.addPolylineAnnot([r.bl, r.tr, r.br, r.tl]) # 'Polyline' 113 | annot.setBorder(width=0.3, dashes=[2]) 114 | annot.setColors(stroke=blue, fill=green) 115 | annot.setLineEnds(fitz.PDF_ANNOT_LE_CLOSED_ARROW, fitz.PDF_ANNOT_LE_R_CLOSED_ARROW) 116 | annot.update(fill_color=(1, 1, 0)) 117 | print_descr(annot) 118 | 119 | r += displ 120 | annot = page.addPolygonAnnot([r.bl, r.tr, r.br, r.tl]) # 'Polygon' 121 | annot.setBorder(width=0.3, dashes=[2]) 122 | annot.setColors(stroke=blue, fill=gold) 123 | annot.setLineEnds(fitz.PDF_ANNOT_LE_DIAMOND, fitz.PDF_ANNOT_LE_CIRCLE) 124 | annot.update() 125 | print_descr(annot) 126 | 127 | r += displ 128 | annot = page.addLineAnnot(r.tr, r.bl) # 'Line' 129 | annot.setBorder(width=0.3, dashes=[2]) 130 | annot.setColors(stroke=blue, fill=gold) 131 | annot.setLineEnds(fitz.PDF_ANNOT_LE_DIAMOND, fitz.PDF_ANNOT_LE_CIRCLE) 132 | annot.update() 133 | print_descr(annot) 134 | 135 | r += displ 136 | annot = page.addRectAnnot(r) # 'Square' 137 | annot.setBorder(width=1, dashes=[1, 2]) 138 | annot.setColors(stroke=blue, fill=gold) 139 | annot.update(opacity=0.5) 140 | print_descr(annot) 141 | 142 | r += displ 143 | annot = page.addCircleAnnot(r) # 'Circle' 144 | annot.setBorder(width=0.3, dashes=[2]) 145 | annot.setColors(stroke=blue, fill=gold) 146 | annot.update() 147 | print_descr(annot) 148 | 149 | r += displ 150 | annot = page.addFileAnnot( 151 | r.tl, b"just anything for testing", "testdata.txt" # 'FileAttachment' 152 | ) 153 | print_descr(annot) # annot.rect 154 | 155 | r += displ 156 | annot = page.addStampAnnot(r, stamp=10) # 'Stamp' 157 | annot.setColors(stroke=green) 158 | annot.update() 159 | print_descr(annot) 160 | 161 | r += displ + (0, 0, 50, 10) 162 | rc = page.insertTextbox( 163 | r, 164 | "This content will be removed upon applying the redaction.", 165 | color=blue, 166 | align=fitz.TEXT_ALIGN_CENTER, 167 | ) 168 | annot = page.addRedactAnnot(r) 169 | print_descr(annot) 170 | 171 | outfile = os.path.abspath(__file__).replace(".py", "-%i.pdf" % page.rotation) 172 | doc.save(outfile, deflate=True) 173 | -------------------------------------------------------------------------------- /docs/outline.rst: -------------------------------------------------------------------------------- 1 | .. _Outline: 2 | 3 | ================ 4 | Outline 5 | ================ 6 | 7 | *outline* (or "bookmark"), is a property of *Document*. If not *None*, it stands for the first outline item of the document. Its properties in turn define the characteristics of this item and also point to other outline items in "horizontal" or downward direction. The full tree of all outline items for e.g. a conventional table of contents (TOC) can be recovered by following these "pointers". 8 | 9 | ============================ ================================================== 10 | **Method / Attribute** **Short Description** 11 | ============================ ================================================== 12 | :attr:`Outline.down` next item downwards 13 | :attr:`Outline.next` next item same level 14 | :attr:`Outline.page` page number (0-based) 15 | :attr:`Outline.title` title 16 | :attr:`Outline.uri` string further specifying the outline target 17 | :attr:`Outline.isExternal` target is outside this document 18 | :attr:`Outline.is_open` whether sub-outlines are open or collapsed 19 | :attr:`Outline.isOpen` whether sub-outlines are open or collapsed 20 | :attr:`Outline.dest` points to link destination details 21 | ============================ ================================================== 22 | 23 | **Class API** 24 | 25 | .. class:: Outline 26 | 27 | .. attribute:: down 28 | 29 | The next outline item on the next level down. Is *None* if the item has no kids. 30 | 31 | :type: :ref:`Outline` 32 | 33 | .. attribute:: next 34 | 35 | The next outline item at the same level as this item. Is *None* if this is the last one in its level. 36 | 37 | :type: `Outline` 38 | 39 | .. attribute:: page 40 | 41 | The page number (0-based) this bookmark points to. 42 | 43 | :type: int 44 | 45 | .. attribute:: title 46 | 47 | The item's title as a string or *None*. 48 | 49 | :type: str 50 | 51 | .. attribute:: is_open 52 | 53 | Or *isOpen* -- an indicator showing whether any sub-outlines should be expanded (*True*) or be collapsed (*False*). This information should be interpreted by PDF display software accordingly. 54 | 55 | :type: bool 56 | 57 | .. attribute:: isExternal 58 | 59 | A bool specifying whether the target is outside (*True*) of the current document. 60 | 61 | :type: bool 62 | 63 | .. attribute:: uri 64 | 65 | A string specifying the link target. The meaning of this property should be evaluated in conjunction with *isExternal*. The value may be *None*, in which case *isExternal == False*. If *uri* starts with *file://*, *mailto:*, or an internet resource name, *isExternal* is *True*. In all other cases *isExternal == False* and *uri* points to an internal location. In case of PDF documents, this should either be *#nnnn* to indicate a 1-based (!) page number *nnnn*, or a named location. The format varies for other document types, e.g. *uri = '../FixedDoc.fdoc#PG_21_LNK_84'* for page number 21 (1-based) in an XPS document. 66 | 67 | :type: str 68 | 69 | .. attribute:: dest 70 | 71 | The link destination details object. 72 | 73 | :type: :ref:`linkDest` 74 | -------------------------------------------------------------------------------- /docs/point.rst: -------------------------------------------------------------------------------- 1 | .. _Point: 2 | 3 | ================ 4 | Point 5 | ================ 6 | 7 | *Point* represents a point in the plane, defined by its x and y coordinates. 8 | 9 | ============================ ============================================ 10 | **Attribute / Method** **Description** 11 | ============================ ============================================ 12 | :meth:`Point.distance_to` calculate distance to point or rect 13 | :meth:`Point.norm` the Euclidean norm 14 | :meth:`Point.transform` transform point with a matrix 15 | :attr:`Point.abs_unit` same as unit, but positive coordinates 16 | :attr:`Point.unit` point coordinates divided by *abs(point)* 17 | :attr:`Point.x` the X-coordinate 18 | :attr:`Point.y` the Y-coordinate 19 | ============================ ============================================ 20 | 21 | **Class API** 22 | 23 | .. class:: Point 24 | 25 | .. method:: __init__(self) 26 | 27 | .. method:: __init__(self, x, y) 28 | 29 | .. method:: __init__(self, point) 30 | 31 | .. method:: __init__(self, sequence) 32 | 33 | Overloaded constructors. 34 | 35 | Without parameters, *Point(0, 0)* will be created. 36 | 37 | With another point specified, a **new copy** will be crated, "sequence" is a Python sequence of 2 numbers (see :ref:`SequenceTypes`). 38 | 39 | :arg float x: x coordinate of the point 40 | 41 | :arg float y: y coordinate of the point 42 | 43 | .. method:: distance_to(x [, unit]) 44 | 45 | Calculate the distance to *x*, which may be :data:`point_like` or :data:`rect_like`. The distance is given in units of either pixels (default), inches, centimeters or millimeters. 46 | 47 | :arg point_like,rect_like x: to which to compute the distance. 48 | 49 | :arg str unit: the unit to be measured in. One of "px", "in", "cm", "mm". 50 | 51 | :rtype: float 52 | :returns: the distance to *x*. If this is :data:`rect_like`, then the distance 53 | 54 | * is the length of the shortest line connecting to one of the rectangle sides 55 | * is calculated to the **finite version** of it 56 | * is zero if it **contains** the point 57 | 58 | .. method:: norm() 59 | 60 | *(New in version 1.16.0)* 61 | 62 | Return the Euclidean norm (the length) of the point as a vector. Equals result of function *abs()*. 63 | 64 | .. method:: transform(m) 65 | 66 | Apply a matrix to the point and replace it with the result. 67 | 68 | :arg matrix_like m: The matrix to be applied. 69 | 70 | :rtype: :ref:`Point` 71 | 72 | .. attribute:: unit 73 | 74 | Result of dividing each coordinate by *norm(point)*, the distance of the point to (0,0). This is a vector of length 1 pointing in the same direction as the point does. Its x, resp. y values are equal to the cosine, resp. sine of the angle this vector (and the point itself) has with the x axis. 75 | 76 | .. image:: images/img-point-unit.jpg 77 | 78 | :type: :ref:`Point` 79 | 80 | .. attribute:: abs_unit 81 | 82 | Same as :attr:`unit` above, replacing the coordinates with their absolute values. 83 | 84 | :type: :ref:`Point` 85 | 86 | .. attribute:: x 87 | 88 | The x coordinate 89 | 90 | :type: float 91 | 92 | .. attribute:: y 93 | 94 | The y coordinate 95 | 96 | :type: float 97 | 98 | .. note:: 99 | 100 | * This class adheres to the Python sequence protocol, so components can be accessed via their index, too. Also refer to :ref:`SequenceTypes`. 101 | * Rectangles can be used with arithmetic operators -- see chapter :ref:`Algebra`. 102 | 103 | -------------------------------------------------------------------------------- /docs/pymupdf-logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/pymupdf-logo.jpg -------------------------------------------------------------------------------- /docs/quad.rst: -------------------------------------------------------------------------------- 1 | .. _Quad: 2 | 3 | ========== 4 | Quad 5 | ========== 6 | 7 | Represents a four-sided mathematical shape (also called "quadrilateral" or "tetragon") in the plane, defined as a sequence of four :ref:`Point` objects ul, ur, ll, lr (conveniently called upper left, upper right, lower left, lower right). 8 | 9 | Quads can **be obtained** as results of text search methods (:meth:`Page.searchFor`), and they **are used** to define text marker annotations (see e.g. :meth:`Page.addSquigglyAnnot` and friends), and in several draw methods (like :meth:`Page.drawQuad` / :meth:`Shape.drawQuad`, :meth:`Page.drawOval`/ :meth`Shape.drawQuad`). 10 | 11 | .. note:: 12 | 13 | * If the corners of a rectangle are transformed with a **rotation**, **scale** or **translation** :ref:`Matrix`, then the resulting quad is **rectangular**, i.e. its corners again enclose angles of 90 degrees. Property :attr:`Quad.isRectangular` checks whether a quad can be thought of being the result of such an operation. This is not true for all matrices: e.g. shear matrices produce parallelograms, and non-invertible matrices deliver "degenerate" tetragons like triangles or lines. 14 | 15 | * Attribute :attr:`Quad.rect` obtains the envelopping rectangle. Vice versa, rectangles now have attributes :attr:`Rect.quad`, resp. :attr:`IRect.quad` to obtain their respective tetragon versions. 16 | 17 | 18 | ============================= ======================================================= 19 | **Methods / Attributes** **Short Description** 20 | ============================= ======================================================= 21 | :meth:`Quad.transform` transform with a matrix 22 | :meth:`Quad.morph` transform with a point and matrix 23 | :attr:`Quad.ul` upper left point 24 | :attr:`Quad.ur` upper right point 25 | :attr:`Quad.ll` lower left point 26 | :attr:`Quad.lr` lower right point 27 | :attr:`Quad.isConvex` true if quad is a convex set 28 | :attr:`Quad.isEmpty` true if quad is an empty set 29 | :attr:`Quad.isRectangular` true if quad is a (rotated) rectangle 30 | :attr:`Quad.rect` smallest containing :ref:`Rect` 31 | :attr:`Quad.width` the longest width value 32 | :attr:`Quad.height` the longest height value 33 | ============================= ======================================================= 34 | 35 | **Class API** 36 | 37 | .. class:: Quad 38 | 39 | .. method:: __init__(self) 40 | 41 | .. method:: __init__(self, ul, ur, ll, lr) 42 | 43 | .. method:: __init__(self, quad) 44 | 45 | .. method:: __init__(self, sequence) 46 | 47 | Overloaded constructors: "ul", "ur", "ll", "lr" stand for :data:`point_like` objects (the four corners), "sequence" is a Python sequence with four :data:`point_like` objects. 48 | 49 | If "quad" is specified, the constructor creates a **new copy** of it. 50 | 51 | Without parameters, a quad consisting of 4 copies of *Point(0, 0)* is created. 52 | 53 | 54 | .. method:: transform(matrix) 55 | 56 | Modify the quadrilateral by transforming each of its corners with a matrix. 57 | 58 | :arg matrix_like matrix: the matrix. 59 | 60 | .. method:: morph(fixpoint, matrix) 61 | 62 | *(New in version 1.17.0)* "Morph" the quad with a matrix-like using a point-like as fixed point. 63 | 64 | :arg point_like fixpoint: the point. 65 | :arg matrix_like matrix: the matrix. 66 | :returns: a new quad. The effect is achieved by using the following code:: 67 | 68 | >>> T = fitz.Matrix(1, 1).preTranslate(fixpoint.x, fixpoint.y) 69 | >>> result = self * ~T * matrix * T 70 | 71 | So the quad is translated such, that fixpoint becomes the origin (0, 0), then the matrix is applied to it, and finally a reverse translation is done. 72 | 73 | Typical uses include rotating the quad around a desired point. 74 | 75 | .. attribute:: rect 76 | 77 | The smallest rectangle containing the quad, represented by the blue area in the following picture. 78 | 79 | .. image:: images/img-quads.jpg 80 | 81 | :type: :ref:`Rect` 82 | 83 | .. attribute:: ul 84 | 85 | Upper left point. 86 | 87 | :type: :ref:`Point` 88 | 89 | .. attribute:: ur 90 | 91 | Upper right point. 92 | 93 | :type: :ref:`Point` 94 | 95 | .. attribute:: ll 96 | 97 | Lower left point. 98 | 99 | :type: :ref:`Point` 100 | 101 | .. attribute:: lr 102 | 103 | Lower right point. 104 | 105 | :type: :ref:`Point` 106 | 107 | .. attribute:: isConvex 108 | 109 | *(New in version 1.16.1)* 110 | 111 | True if every line connecting two points of the quad is inside the quad. We in addition also make sure here, that the quad is not "degenerate", i.e. not all corners are on the same line (which would still qualify as convexity in the mathematical sense). 112 | 113 | :type: bool 114 | 115 | .. attribute:: isEmpty 116 | 117 | True if enclosed area is zero, which means that at least three of the four corners are on the same line. If this is false, the quad may still be degenerate or not look like a tetragon at all (triangles, parallelograms, trapezoids, ...). 118 | 119 | :type: bool 120 | 121 | .. attribute:: isRectangular 122 | 123 | True if all corner angles are 90 degrees. This implies that the quad is **convex and not empty**. 124 | 125 | :type: bool 126 | 127 | .. attribute:: width 128 | 129 | The maximum length of the top and the bottom side. 130 | 131 | :type: float 132 | 133 | .. attribute:: height 134 | 135 | The maximum length of the left and the right side. 136 | 137 | :type: float 138 | 139 | Remark 140 | ------ 141 | This class adheres to the sequence protocol, so components can be dealt with via their indices, too. Also refer to :ref:`SequenceTypes`. 142 | 143 | We are still in process to extend algebraic operations to quads. Multiplication and division with / by numbers and matrices are already defined. Addition, subtraction and any unary operations may follow when we see an actual need. 144 | -------------------------------------------------------------------------------- /docs/replace-fonts.py: -------------------------------------------------------------------------------- 1 | """ 2 | Demo / Experimental: Replace the fonts in a PDF. 3 | 4 | """ 5 | import fitz 6 | import sys 7 | 8 | fname = sys.argv[1] 9 | 10 | doc = fitz.open(fname) # input PDF 11 | out = fitz.open() # output PDF 12 | csv = open("fonts.csv").read().splitlines() 13 | all_fonts = [] # will contain: (old basefont name, Base14 name) 14 | for f in csv: 15 | all_fonts.append(f.split(";")) 16 | 17 | 18 | def pdf_color(srgb): 19 | """Create a PDF color triple from a given sRGB color integer. 20 | """ 21 | b = (srgb % 256) / 255 22 | srgb /= 256 23 | g = (srgb % 256) / 255 24 | srgb /= 256 25 | r = srgb / 255 26 | return (r, g, b) 27 | 28 | 29 | def get_font(fontname): 30 | """Lookup base fontname and return one of the "reserved" Base14 fontnames. 31 | """ 32 | for f in all_fonts: 33 | if f[0] in fontname: # fontname may look like "ABCDEF+fontname..." 34 | return f[1] 35 | return "helv" # default: Helvetica 36 | 37 | 38 | for page in doc: 39 | if page.number % 10 == 0: # just entertainment messages every 10 pages 40 | print("Processed %i pages" % page.number) 41 | if not page._isWrapped: # check if input page geometry is dubious 42 | page._wrapContents() 43 | # for each input page create an output with same dimensions 44 | outpage = out.newPage(width=page.rect.width, height=page.rect.height) 45 | 46 | # create a shape to write the output text to. 47 | shape = outpage.newShape() 48 | text_blocks = [] 49 | image_blocks = [] 50 | for block in page.getText("dict")["blocks"]: 51 | if block["type"] == 0: 52 | text_blocks.append(block) 53 | else: 54 | image_blocks.append(block) 55 | 56 | # insert the images first, so any text appears in foreground 57 | for block in image_blocks: 58 | outpage.insertImage(block["bbox"], stream=block["image"]) 59 | print("Inserted an image on page", page.number) 60 | 61 | for block in text_blocks: # read text blocks 62 | shape.drawRect(block["bbox"]) # draw all text on white background, 63 | # because images may cover same area 64 | 65 | for line in block["lines"]: # for each line in the block ... 66 | for span in line["spans"]: # for each span in the line ... 67 | fontname = get_font(span["font"]) # get replacing fontname 68 | fontsize = span["size"] 69 | text = span["text"] 70 | bbox = fitz.Rect(span["bbox"]) # text rectangle on input 71 | text_size = fitz.getTextlength( # measure text length on output 72 | text, fontname=fontname, fontsize=fontsize 73 | ) 74 | 75 | # adjust fontsize if text is too long with new the font 76 | if text_size > bbox.width: 77 | fontsize *= bbox.width / text_size 78 | try: 79 | shape.insertText( # copy text to output page 80 | bbox.bl, # insertion point on output page 81 | text, # the text to insert 82 | fontsize=fontsize, # fontsize 83 | # decide on output font here: the place for sophistication! 84 | fontname=fontname, 85 | color=pdf_color(span["color"]), 86 | ) 87 | except ValueError: 88 | print("Method 'insertText' failed:") 89 | print( 90 | "page:", 91 | page.number, 92 | "at", 93 | span["bbox"][:2], 94 | "text:", 95 | span["text"], 96 | ) 97 | shape.finish(color=None, fill=(1, 1, 1)) # white for the text background 98 | shape.commit() # write everything to the output page 99 | 100 | """ 101 | Several other features can be added, like: 102 | - copy over the input metadata dictionary 103 | - copy over the input table of contents 104 | """ 105 | out.save("new-" + fname, deflate=True, garbage=4) 106 | -------------------------------------------------------------------------------- /docs/text-lister.py: -------------------------------------------------------------------------------- 1 | import fitz 2 | 3 | 4 | def flags_decomposer(flags): 5 | """Make font flags human readable.""" 6 | l = [] 7 | if flags & 2 ** 0: 8 | l.append("superscript") 9 | if flags & 2 ** 1: 10 | l.append("italic") 11 | if flags & 2 ** 2: 12 | l.append("serifed") 13 | else: 14 | l.append("sans") 15 | if flags & 2 ** 3: 16 | l.append("monospaced") 17 | else: 18 | l.append("proportional") 19 | if flags & 2 ** 4: 20 | l.append("bold") 21 | return ", ".join(l) 22 | 23 | 24 | doc = fitz.open("text-tester.pdf") 25 | page = doc[0] 26 | 27 | # read page text as a dictionary, suppressing extra spaces in CJK fonts 28 | blocks = page.getText("dict", flags=11)["blocks"] 29 | for b in blocks: # iterate through the text blocks 30 | for l in b["lines"]: # iterate through the text lines 31 | for s in l["spans"]: # iterate through the text spans 32 | print("") 33 | font_properties = "Font: '%s' (%s), size %g, color #%06x" % ( 34 | s["font"], # font name 35 | flags_decomposer(s["flags"]), # readable font flags 36 | s["size"], # font size 37 | s["color"], # font color 38 | ) 39 | print("Text: '%s'" % s["text"]) # simple print of text 40 | print(font_properties) 41 | -------------------------------------------------------------------------------- /docs/textwriter.rst: -------------------------------------------------------------------------------- 1 | .. _TextWriter: 2 | 3 | ================ 4 | TextWriter 5 | ================ 6 | 7 | *(New in v1.16.18)* This class represents a MuPDF *text* object. It can be thought of as a collection of text *"spans"*. Each span has its own starting position, font and font size. It is an elegant alternative for writing text to PDF pages, when compared with methods :meth:`Page.insertText` and friends: 8 | 9 | * **Improved text positioning:** Choose any point where insertion of a text span should start. Storing a text span returns the coordinates of the *last character* of the span. 10 | * **Free font choice:** Each text span has its own font and fontsize. This lets you easily switch between font and font characteristics when composing a larger text. 11 | * **Automatic fallback fonts:** If a character is not represented by the chosen font, alternative fonts are automatically searched. This significantly reduces the risk of seeing unprintable symbols in the output ("TOFUs"). PyMuPDF now also comes with the **universal font "Droid Sans Fallback Regular"**, which supports **all Latin** characters (incuding Cyrillic and Greek), and **all CJK** characters (Chinese, Japanese, Korean). 12 | * **Cyrillic and Greek Support:** The :ref:`Base-14-fonts` have integrated support of Cyrillic and Greek characters **without specifying encoding.** If your text is a mixture of Latin, Greek and Cyrillic, it will be shown correctly if you just use e.g. font "Helvetica". 13 | * **Transparency support:** Parameter *opacity* is supported. This offers a handy way to create watermark-style text. 14 | * **Justified text:** Supported for any font -- not just simple fonts as in :meth:`Page.insertText`. 15 | * **Reusability:** A TextWriter object exists independent from any page. It can be written multiple times, either to the same or to other pages, in the same or in different PDFs, choosing different colors or transparency. 16 | 17 | Using this object entails three steps: 18 | 19 | 1. When **created**, a TextWriter requires a fixed **page rectangle** in relation to which it calculates text span positions. Text can be written to a page if and only if its size equals that of the TextWriter. 20 | 2. Store text in the TextWriter using methods :meth:`TextWriter.append` and :meth:`TextWriter.fillTextbox` as often as desired. 21 | 3. Output the TextWriter object on some PDF page with a compatible size. 22 | 23 | .. note:: Starting with version 1.17.0, TextWriters **do support** text rotation via the *morph* parameter of :meth:`TextWriter.writeText`. 24 | 25 | There also exists :meth:`Page.writeText` which lets you combine one or more TextWriters and jointly write them to a given rectangle and with a given rotation angle -- much like :meth:`Page.showPDFpage`. 26 | 27 | **Class API** 28 | 29 | .. class:: TextWriter 30 | 31 | .. method:: __init__(self, rect, opacity=1, color=None) 32 | 33 | :arg rect-like rect: rectangle internally used for text positioning computations. 34 | :arg float opacity: sets the transparency for the text to store here. Values outside the interval ``[0, 1)`` will be ignored. A value of e.g. 0.5 means 50% transparency. 35 | :arg float,sequ color: the color of the text. All colors are specified as floats *0 <= color <= 1*. A single float represents some gray level, a sequence implies the colorspace via its length. 36 | 37 | 38 | .. method:: append(pos, text, font=None, fontsize=11, language=None) 39 | 40 | Add new text, usually (but not necessarily) representing a text span. 41 | 42 | :arg point_like pos: start position of the text, the bottom left point of the first character. 43 | :arg str text: a string (Python 2: unicode is mandatory!) of arbitrary length. It will be written starting at position "pos". 44 | :arg font: a :ref:`Font`. If omitted, ``fitz.Font("helv")`` will be used. 45 | :arg float fontsize: the fontsize, a positive number, default 11. 46 | :arg str language: the language to use, e.g. "en" for English. Meaningful values should be compliant with the ISO 639 standards 1, 2, 3 or 5. Reserved for future use: currently has no effect as far as we know. 47 | 48 | :returns: :attr:`textRect` and :attr:`lastPoint`. 49 | 50 | .. method:: fillTextbox(rect, text, pos=None, font=None, fontsize=11, align=0, warn=True) 51 | 52 | Fill a given rectangle with text. This is a convenience method to use as an alternative to :meth:`append`. 53 | 54 | :arg rect_like rect: the area to fill. No part of the text will appear outside of this. 55 | :arg str,sequ text: the text. Can be specified as a (UTF-8) string or a list / tuple of strings. A string will first be converted to a list using *splitlines()*. Every list item will begin on a new line (forced line breaks). 56 | :arg point_like pos: *(new in v1.17.3)* start storing at this point. Default is a point near rectangle top-left. 57 | :arg font: the :ref:`Font`, default `fitz.Font("helv")`. 58 | :arg float fontsize: the fontsize. 59 | :arg int align: text alignment. Use one of TEXT_ALIGN_LEFT, TEXT_ALIGN_CENTER, TEXT_ALIGN_RIGHT or TEXT_ALIGN_JUSTIFY. 60 | :arg bool warn: warn on text overflow (default), or raise an exception. In any case, text not fitting will not be written. 61 | 62 | .. note:: Use these methods as often as is required -- there is no technical limit (except memory constraints of your system). You can also mix appends and text boxes and have multiple of both. Text positioning is controlled by the insertion point. There is no need to adhere to any order. 63 | 64 | 65 | .. method:: writeText(page, opacity=None, color=None, morph=None, overlay=True) 66 | 67 | Write the TextWriter text to a page. 68 | 69 | :arg page: write to this :ref:`Page`. 70 | :arg float opacity: override the value of the TextWriter for this output. 71 | :arg sequ color: override the value of the TextWriter for this output. 72 | :arg sequ morph: modify the text appearance by applying a matrix to it. If provided, this must be a sequence *(fixpoint, matrix)* with a point-like *fixpoint* and a matrix-like *matrix*. A typical example is rotating the text around *fixpoint*. 73 | :arg bool overlay: put in foreground (default) or background. 74 | 75 | 76 | .. attribute:: textRect 77 | 78 | The :ref:`Rect` currently occupied. This value changes when more text is added. 79 | 80 | .. attribute:: lastPoint 81 | 82 | The "cursor position" -- a :ref:`Point` -- after the last written character (its bottom-right). 83 | 84 | .. attribute:: opacity 85 | 86 | The text opacity (modifyable). 87 | 88 | .. attribute:: color 89 | 90 | The text color (modifyable). 91 | 92 | .. attribute:: rect 93 | 94 | The page rectangle for which this TextWriter was created. Must not be modified. 95 | 96 | 97 | To see some demo scripts dealing with TextWriter, have a look at `this `_ repository. 98 | 99 | 100 | .. note:: 101 | 102 | 1. Opacity and color apply to **all the text** in this object. 103 | 2. If you need different colors / transpareny, you must create a separate TextWriter. Whenever you determine the color should change, simply append the text to the respective TextWriter using the previously returned :attr:`lastPoint` as position for the new text span. 104 | 3. Appending items or text boxes can occur in arbitrary order: only the position parameter controls where text appears. 105 | 4. Font and fontsize can freely vary within the same TextWriter. This can be used to let text with different properties appear on the same displayed line: just specify *pos* accordingly, and e.g. set it to :attr:`lastPoint` of the previously added item. 106 | 5. You can use the *pos* argument of :meth:`TextWriter.fillTextbox` to indent the first line, so its text may continue any preceeding one in a continuous manner. 107 | -------------------------------------------------------------------------------- /docs/version.rst: -------------------------------------------------------------------------------- 1 | Covered Version 2 | -------------------- 3 | 4 | This documentation covers PyMuPDF v1.17.4 features as of **2020-07-20 18:09:40**. 5 | 6 | .. note:: The major and minor versions of **PyMuPDF** and **MuPDF** will always be the same. Only the third qualifier (patch level) may deviate from that of MuPDF. -------------------------------------------------------------------------------- /docs/wheelnames.txt: -------------------------------------------------------------------------------- 1 | PyMuPDF-x.xx.xx-cp27-cp27m-macosx_10_9_x86_64.whl 2 | PyMuPDF-x.xx.xx-cp27-cp27m-manylinux2010_x86_64.whl 3 | PyMuPDF-x.xx.xx-cp27-cp27m-win32.whl 4 | PyMuPDF-x.xx.xx-cp27-cp27m-win_amd64.whl 5 | PyMuPDF-x.xx.xx-cp27-cp27mu-manylinux2010_x86_64.whl 6 | PyMuPDF-x.xx.xx-cp35-cp35m-macosx_10_9_x86_64.whl 7 | PyMuPDF-x.xx.xx-cp35-cp35m-manylinux2010_x86_64.whl 8 | PyMuPDF-x.xx.xx-cp35-cp35m-win32.whl 9 | PyMuPDF-x.xx.xx-cp35-cp35m-win_amd64.whl 10 | PyMuPDF-x.xx.xx-cp36-cp36m-macosx_10_9_x86_64.whl 11 | PyMuPDF-x.xx.xx-cp36-cp36m-manylinux2010_x86_64.whl 12 | PyMuPDF-x.xx.xx-cp36-cp36m-win32.whl 13 | PyMuPDF-x.xx.xx-cp36-cp36m-win_amd64.whl 14 | PyMuPDF-x.xx.xx-cp37-cp37m-macosx_10_9_x86_64.whl 15 | PyMuPDF-x.xx.xx-cp37-cp37m-manylinux2010_x86_64.whl 16 | PyMuPDF-x.xx.xx-cp37-cp37m-win32.whl 17 | PyMuPDF-x.xx.xx-cp37-cp37m-win_amd64.whl 18 | PyMuPDF-x.xx.xx-cp38-cp38-macosx_10_9_x86_64.whl 19 | PyMuPDF-x.xx.xx-cp38-cp38-manylinux2010_x86_64.whl 20 | PyMuPDF-x.xx.xx-cp38-cp38-win32.whl 21 | PyMuPDF-x.xx.xx-cp38-cp38-win_amd64.whl 22 | -------------------------------------------------------------------------------- /docs/widget.rst: -------------------------------------------------------------------------------- 1 | .. _Widget: 2 | 3 | ================ 4 | Widget 5 | ================ 6 | 7 | This class represents a PDF Form field, also called "widget". Fields are a special case of annotations, which allow users with limited permissions to enter information in a PDF. This is primarily used for filling out forms. 8 | 9 | Like annotations, widgets live on PDF pages. Similar to annotations, the first widget on a page is accessible via :attr:`Page.firstWidget` and subsequent widgets can be accessed via the :attr:`Widget.next` property. 10 | 11 | *(Changed in version 1.16.0)* MuPDF no longer treats widgets as a subset of general annotations. Consequently, :attr:`Page.firstAnnot` and :meth:`Annot.next` will deliver non-widget annotations exclusively, and be *None* if only form fields exist on a page. Vice versa, :attr:`Page.firstWidget` and :meth:`Widget.next` will only show widgets. This design decision is purely internal to MuPDF; technically, links, annotations and fields have a lot in common and also continue to share the better part of their code within (Py-) MuPDF. 12 | 13 | 14 | **Class API** 15 | 16 | .. class:: Widget 17 | 18 | .. method:: update 19 | 20 | After any changes to a widget, this method **must be used** to store them in the PDF [#f1]_. 21 | 22 | .. method:: reset 23 | 24 | Reset the field's value to its default -- if defined -- or remove it. Do not forget to issue :meth:`update` afterwards. 25 | 26 | .. attribute:: next 27 | 28 | Point to the next form field on the page. 29 | 30 | .. attribute:: border_color 31 | 32 | A list of up to 4 floats defining the field's border. Default value is *None* which causes border style and border width to be ignored. 33 | 34 | .. attribute:: border_style 35 | 36 | A string defining the line style of the field's border. See :attr:`Annot.border`. Default is "s" ("Solid") -- a continuous line. Only the first character (upper or lower case) will be regarded when creating a widget. 37 | 38 | .. attribute:: border_width 39 | 40 | A float defining the width of the border line. Default is 1. 41 | 42 | .. attribute:: border_dashes 43 | 44 | A list/tuple of integers defining the dash properties of the border line. This is only meaningful if *border_style == "D"* and :attr:`border_color` is provided. 45 | 46 | .. attribute:: choice_values 47 | 48 | Python sequence of strings defining the valid choices of list boxes and combo boxes. For these widgets, this property is mandatory and must contain at least two items. Ignored for other types. 49 | 50 | .. attribute:: field_name 51 | 52 | A mandatory string defining the field's name. No checking for duplicates takes place. 53 | 54 | .. attribute:: field_label 55 | 56 | An optional string containing an "alternate" field name. Typically used for any notes, help on field usage, etc. Default is the field name. 57 | 58 | .. attribute:: field_value 59 | 60 | The value of the field. 61 | 62 | .. attribute:: field_flags 63 | 64 | An integer defining a large amount of proprties of a field. Handle this attribute with care. 65 | 66 | .. attribute:: field_type 67 | 68 | A mandatory integer defining the field type. This is a value in the range of 0 to 6. It cannot be changed when updating the widget. 69 | 70 | .. attribute:: field_type_string 71 | 72 | A string describing (and derived from) the field type. 73 | 74 | .. attribute:: fill_color 75 | 76 | A list of up to 4 floats defining the field's background color. 77 | 78 | .. attribute:: button_caption 79 | 80 | The caption string of a button-type field. 81 | 82 | .. attribute:: is_signed 83 | 84 | A bool indicating the status of a signature field, else *None*. 85 | 86 | .. attribute:: rect 87 | 88 | The rectangle containing the field. 89 | 90 | .. attribute:: text_color 91 | 92 | A list of **1, 3 or 4 floats** defining the text color. Default value is black (`[0, 0, 0]`). 93 | 94 | .. attribute:: text_font 95 | 96 | A string defining the font to be used. Default and replacement for invalid values is *"Helv"*. For valid font reference names see the table below. 97 | 98 | .. attribute:: text_fontsize 99 | 100 | A float defining the text fontsize. Default value is zero, which causes PDF viewer software to dynamically choose a size suitable for the annotation's rectangle and text amount. 101 | 102 | .. attribute:: text_maxlen 103 | 104 | An integer defining the maximum number of text characters. PDF viewers will (should) not accept a longer text. 105 | 106 | .. attribute:: text_type 107 | 108 | An integer defining acceptable text types (e.g. numeric, date, time, etc.). For reference only for the time being -- will be ignored when creating or updating widgets. 109 | 110 | .. attribute:: xref 111 | 112 | The PDF :data:`xref` of the widget. 113 | 114 | .. attribute:: script 115 | 116 | *(New in version 1.16.12)* JavaScript text (unicode) for an action associated with the widget, or *None*. This is the only script action supported for **button type** widgets. 117 | 118 | .. attribute:: script_stroke 119 | 120 | *(New in version 1.16.12)* JavaScript text (unicode) to be performed when the user types a key-stroke into a text field or combo box or modifies the selection in a scrollable list box. This action can check the keystroke for validity and reject or modify it. *None* if not present. 121 | 122 | .. attribute:: script_format 123 | 124 | *(New in version 1.16.12)* JavaScript text (unicode) to be performed before the field is formatted to display its current value. This action can modify the field’s value before formatting. *None* if not present. 125 | 126 | .. attribute:: script_change 127 | 128 | *(New in version 1.16.12)* JavaScript text (unicode) to be performed when the field’s value is changed. This action can check the new value for validity. *None* if not present. 129 | 130 | .. attribute:: script_calc 131 | 132 | *(New in version 1.16.12)* JavaScript text (unicode) to be performed to recalculate the value of this field when that of another field changes. *None* if not present. 133 | 134 | .. note:: 135 | 1. For **adding** or **changing** one of the above scripts, just put the appropriate JavaScript source code in the widget attribute. To **remove** a script, set the respective attribute to *None*. 136 | 2. Button fields only support :attr:`script`. Other script entries will automatically be set to *None*. 137 | 138 | 139 | Standard Fonts for Widgets 140 | ---------------------------------- 141 | Widgets use their own resources object */DR*. A widget resources object must at least contain a */Font* object. Widget fonts are independent from page fonts. We currently support the 14 PDF base fonts using the following fixed reference names, or any name of an already existing field font. When specifying a text font for new or changed widgets, **either** choose one in the first table column (upper and lower case supported), **or** one of the already existing form fonts. In the latter case, spelling must exactly match. 142 | 143 | To find out already existing field fonts, inspect the list :attr:`Document.FormFonts`. 144 | 145 | ============= ======================= 146 | **Reference** **Base14 Fontname** 147 | ============= ======================= 148 | CoBI Courier-BoldOblique 149 | CoBo Courier-Bold 150 | CoIt Courier-Oblique 151 | Cour Courier 152 | HeBI Helvetica-BoldOblique 153 | HeBo Helvetica-Bold 154 | HeIt Helvetica-Oblique 155 | Helv Helvetica **(default)** 156 | Symb Symbol 157 | TiBI Times-BoldItalic 158 | TiBo Times-Bold 159 | TiIt Times-Italic 160 | TiRo Times-Roman 161 | ZaDb ZapfDingbats 162 | ============= ======================= 163 | 164 | You are generally free to use any font for every widget. However, we recommend using *ZaDb* ("ZapfDingbats") and fontsize 0 for check boxes: typical viewers will put a correctly sized tickmark in the field's rectangle, when it is clicked. 165 | 166 | .. rubric:: Footnotes 167 | 168 | .. [#f1] If you intend to re-access a new or updated field (e.g. for making a pixmap), make sure to reload the page first. Either close and re-open the document, or load another page first, or simply do ``page = doc.reload_page(page)``. 169 | -------------------------------------------------------------------------------- /fitz/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function 2 | import sys 3 | from fitz.fitz import * 4 | 5 | # define the supported colorspaces for convenience 6 | fitz.csRGB = fitz.Colorspace(fitz.CS_RGB) 7 | fitz.csGRAY = fitz.Colorspace(fitz.CS_GRAY) 8 | fitz.csCMYK = fitz.Colorspace(fitz.CS_CMYK) 9 | csRGB = fitz.csRGB 10 | csGRAY = fitz.csGRAY 11 | csCMYK = fitz.csCMYK 12 | 13 | # create the TOOLS object 14 | TOOLS = fitz.Tools() 15 | fitz.TOOLS = TOOLS 16 | 17 | if fitz.VersionFitz != fitz.TOOLS.mupdf_version(): 18 | v1 = fitz.VersionFitz.split(".") 19 | v2 = fitz.TOOLS.mupdf_version().split(".") 20 | if v1[:-1] != v2[:-1]: 21 | raise ValueError( 22 | "MuPDF library mismatch %s <> %s" 23 | % (fitz.VersionFitz, fitz.TOOLS.mupdf_version()) 24 | ) 25 | 26 | 27 | # copy functions to their respective fitz classes 28 | import fitz.utils 29 | 30 | # ------------------------------------------------------------------------------ 31 | # Document 32 | # ------------------------------------------------------------------------------ 33 | fitz.open = fitz.Document 34 | fitz.Document.getToC = fitz.utils.getToC 35 | fitz.Document._do_links = fitz.utils.do_links 36 | fitz.Document.getPagePixmap = fitz.utils.getPagePixmap 37 | fitz.Document.getPageText = fitz.utils.getPageText 38 | fitz.Document.setMetadata = fitz.utils.setMetadata 39 | fitz.Document.setToC = fitz.utils.setToC 40 | fitz.Document.searchPageFor = fitz.utils.searchPageFor 41 | fitz.Document.newPage = fitz.utils.newPage 42 | fitz.Document.insertPage = fitz.utils.insertPage 43 | fitz.Document.getCharWidths = fitz.utils.getCharWidths 44 | fitz.Document.scrub = fitz.utils.scrub 45 | 46 | # ------------------------------------------------------------------------------ 47 | # Page 48 | # ------------------------------------------------------------------------------ 49 | fitz.Page.apply_redactions = fitz.utils.apply_redactions 50 | fitz.Page.drawBezier = fitz.utils.drawBezier 51 | fitz.Page.drawCircle = fitz.utils.drawCircle 52 | fitz.Page.drawCurve = fitz.utils.drawCurve 53 | fitz.Page.drawLine = fitz.utils.drawLine 54 | fitz.Page.drawOval = fitz.utils.drawOval 55 | fitz.Page.drawPolyline = fitz.utils.drawPolyline 56 | fitz.Page.drawQuad = fitz.utils.drawQuad 57 | fitz.Page.drawRect = fitz.utils.drawRect 58 | fitz.Page.drawSector = fitz.utils.drawSector 59 | fitz.Page.drawSquiggle = fitz.utils.drawSquiggle 60 | fitz.Page.drawZigzag = fitz.utils.drawZigzag 61 | fitz.Page.getLinks = fitz.utils.getLinks 62 | fitz.Page.getPixmap = fitz.utils.getPixmap 63 | fitz.Page.getText = fitz.utils.getText 64 | fitz.Page.getTextBlocks = fitz.utils.getTextBlocks 65 | fitz.Page.getTextWords = fitz.utils.getTextWords 66 | fitz.Page.insertImage = fitz.utils.insertImage 67 | fitz.Page.insertLink = fitz.utils.insertLink 68 | fitz.Page.insertText = fitz.utils.insertText 69 | fitz.Page.insertTextbox = fitz.utils.insertTextbox 70 | fitz.Page.newShape = lambda x: fitz.utils.Shape(x) 71 | fitz.Page.searchFor = fitz.utils.searchFor 72 | fitz.Page.showPDFpage = fitz.utils.showPDFpage 73 | fitz.Page.updateLink = fitz.utils.updateLink 74 | fitz.Page.writeText = fitz.utils.writeText 75 | # ------------------------------------------------------------------------------ 76 | # Rect 77 | # ------------------------------------------------------------------------------ 78 | fitz.Rect.getRectArea = fitz.utils.getRectArea 79 | fitz.Rect.getArea = fitz.utils.getRectArea 80 | 81 | # ------------------------------------------------------------------------------ 82 | # IRect 83 | # ------------------------------------------------------------------------------ 84 | fitz.IRect.getRectArea = fitz.utils.getRectArea 85 | fitz.IRect.getArea = fitz.utils.getRectArea 86 | 87 | # ------------------------------------------------------------------------------ 88 | # IRect 89 | # ------------------------------------------------------------------------------ 90 | fitz.TextWriter.fillTextbox = fitz.utils.fillTextbox 91 | 92 | 93 | fitz.__doc__ = """ 94 | PyMuPDF %s: Python bindings for the MuPDF %s library. 95 | Version date: %s. 96 | Built for Python %i.%i on %s (%i-bit). 97 | """ % ( 98 | fitz.VersionBind, 99 | fitz.VersionFitz, 100 | fitz.VersionDate, 101 | sys.version_info[0], 102 | sys.version_info[1], 103 | sys.platform, 104 | 64 if sys.maxsize > 2 ** 32 else 32, 105 | ) 106 | -------------------------------------------------------------------------------- /fitz/helper-convert.i: -------------------------------------------------------------------------------- 1 | %{ 2 | //----------------------------------------------------------------------------- 3 | // Convert any MuPDF document to a PDF 4 | // Returns bytes object containing the PDF, created via 'write' function. 5 | //----------------------------------------------------------------------------- 6 | PyObject *JM_convert_to_pdf(fz_context *ctx, fz_document *doc, int fp, int tp, int rotate) 7 | { 8 | pdf_document *pdfout = pdf_create_document(ctx); // new PDF document 9 | int i, incr = 1, s = fp, e = tp; 10 | if (fp > tp) { 11 | incr = -1; // count backwards 12 | s = tp; // adjust ... 13 | e = fp; // ... range 14 | } 15 | fz_rect mediabox; 16 | int rot = JM_norm_rotation(rotate); 17 | fz_device *dev = NULL; 18 | fz_buffer *contents = NULL; 19 | pdf_obj *resources = NULL; 20 | fz_page *page; 21 | fz_var(dev); 22 | fz_var(contents); 23 | fz_var(resources); 24 | fz_var(page); 25 | for (i = fp; INRANGE(i, s, e); i += incr) { // interpret & write document pages as PDF pages 26 | fz_try(ctx) { 27 | page = fz_load_page(ctx, doc, i); 28 | mediabox = fz_bound_page(ctx, page); 29 | dev = pdf_page_write(ctx, pdfout, mediabox, &resources, &contents); 30 | fz_run_page(ctx, page, dev, fz_identity, NULL); 31 | fz_close_device(ctx, dev); 32 | fz_drop_device(ctx, dev); 33 | dev = NULL; 34 | pdf_obj *page_obj = pdf_add_page(ctx, pdfout, mediabox, rot, resources, contents); 35 | pdf_insert_page(ctx, pdfout, -1, page_obj); 36 | pdf_drop_obj(ctx, page_obj); 37 | } 38 | fz_always(ctx) { 39 | pdf_drop_obj(ctx, resources); 40 | fz_drop_buffer(ctx, contents); 41 | fz_drop_device(ctx, dev); 42 | fz_drop_page(ctx, page); 43 | } 44 | fz_catch(ctx) { 45 | fz_rethrow(ctx); 46 | } 47 | } 48 | // PDF created - now write it to Python bytearray 49 | PyObject *r = NULL; 50 | fz_output *out = NULL; 51 | fz_buffer *res = NULL; 52 | // prepare write options structure 53 | pdf_write_options opts = { 0 }; 54 | opts.do_garbage = 4; 55 | opts.do_compress = 1; 56 | opts.do_compress_images = 1; 57 | opts.do_compress_fonts = 1; 58 | opts.do_sanitize = 1; 59 | opts.do_incremental = 0; 60 | opts.do_ascii = 0; 61 | opts.do_decompress = 0; 62 | opts.do_linear = 0; 63 | opts.do_clean = 1; 64 | opts.do_pretty = 0; 65 | 66 | fz_try(ctx) { 67 | res = fz_new_buffer(ctx, 8192); 68 | out = fz_new_output_with_buffer(ctx, res); 69 | pdf_write_document(ctx, pdfout, out, &opts); 70 | unsigned char *c = NULL; 71 | size_t len = fz_buffer_storage(gctx, res, &c); 72 | r = PyBytes_FromStringAndSize((const char *) c, (Py_ssize_t) len); 73 | } 74 | fz_always(ctx) { 75 | pdf_drop_document(ctx, pdfout); 76 | fz_drop_output(ctx, out); 77 | fz_drop_buffer(ctx, res); 78 | } 79 | fz_catch(ctx) { 80 | fz_rethrow(ctx); 81 | } 82 | return r; 83 | } 84 | %} 85 | -------------------------------------------------------------------------------- /fitz/helper-geo-c.i: -------------------------------------------------------------------------------- 1 | %{ 2 | 3 | //----------------------------------------------------------------------------- 4 | // Functions converting betwenn PySequences and fitz geometry objects 5 | //----------------------------------------------------------------------------- 6 | static int 7 | JM_INT_ITEM(PyObject *obj, Py_ssize_t idx, int *result) 8 | { 9 | PyObject *temp = PySequence_ITEM(obj, idx); 10 | if (!temp) return 1; 11 | *result = (int) PyLong_AsLong(temp); 12 | Py_DECREF(temp); 13 | if (PyErr_Occurred()) { 14 | PyErr_Clear(); 15 | return 1; 16 | } 17 | return 0; 18 | } 19 | 20 | static int 21 | JM_FLOAT_ITEM(PyObject *obj, Py_ssize_t idx, float *result) 22 | { 23 | PyObject *temp = PySequence_ITEM(obj, idx); 24 | if (!temp) return 1; 25 | *result = (float) PyFloat_AsDouble(temp); 26 | Py_DECREF(temp); 27 | if (PyErr_Occurred()) { 28 | PyErr_Clear(); 29 | return 1; 30 | } 31 | return 0; 32 | } 33 | 34 | //----------------------------------------------------------------------------- 35 | // PySequence to fz_rect. Default: infinite rect 36 | //----------------------------------------------------------------------------- 37 | static fz_rect 38 | JM_rect_from_py(PyObject *r) 39 | { 40 | if (!r || !PySequence_Check(r) || PySequence_Size(r) != 4) 41 | return fz_infinite_rect; 42 | Py_ssize_t i; 43 | float f[4]; 44 | 45 | for (i = 0; i < 4; i++) 46 | if (JM_FLOAT_ITEM(r, i, &f[i]) == 1) return fz_infinite_rect; 47 | 48 | return fz_make_rect(f[0], f[1], f[2], f[3]); 49 | } 50 | 51 | //----------------------------------------------------------------------------- 52 | // PySequence from fz_rect 53 | //----------------------------------------------------------------------------- 54 | static PyObject * 55 | JM_py_from_rect(fz_rect r) 56 | { 57 | return Py_BuildValue("ffff", r.x0, r.y0, r.x1, r.y1); 58 | } 59 | 60 | //----------------------------------------------------------------------------- 61 | // PySequence to fz_irect. Default: infinite irect 62 | //----------------------------------------------------------------------------- 63 | static fz_irect 64 | JM_irect_from_py(PyObject *r) 65 | { 66 | if (!PySequence_Check(r) || PySequence_Size(r) != 4) 67 | return fz_infinite_irect; 68 | int x[4]; 69 | Py_ssize_t i; 70 | 71 | for (i = 0; i < 4; i++) 72 | if (JM_INT_ITEM(r, i, &x[i]) == 1) return fz_infinite_irect; 73 | 74 | return fz_make_irect(x[0], x[1], x[2], x[3]); 75 | } 76 | 77 | //----------------------------------------------------------------------------- 78 | // PySequence from fz_irect 79 | //----------------------------------------------------------------------------- 80 | static PyObject * 81 | JM_py_from_irect(fz_irect r) 82 | { 83 | return Py_BuildValue("iiii", r.x0, r.y0, r.x1, r.y1); 84 | } 85 | 86 | 87 | //----------------------------------------------------------------------------- 88 | // PySequence to fz_point. Default: (0, 0) 89 | //----------------------------------------------------------------------------- 90 | static fz_point 91 | JM_point_from_py(PyObject *p) 92 | { 93 | fz_point p0 = fz_make_point(0, 0); 94 | float x, y; 95 | 96 | if (!p || !PySequence_Check(p) || PySequence_Size(p) != 2) 97 | return p0; 98 | 99 | if (JM_FLOAT_ITEM(p, 0, &x) == 1) return p0; 100 | if (JM_FLOAT_ITEM(p, 1, &y) == 1) return p0; 101 | 102 | return fz_make_point(x, y); 103 | } 104 | 105 | //----------------------------------------------------------------------------- 106 | // PySequence from fz_point 107 | //----------------------------------------------------------------------------- 108 | static PyObject * 109 | JM_py_from_point(fz_point p) 110 | { 111 | return Py_BuildValue("ff", p.x, p.y); 112 | } 113 | 114 | 115 | //----------------------------------------------------------------------------- 116 | // PySequence to fz_matrix. Default: fz_identity 117 | //----------------------------------------------------------------------------- 118 | static fz_matrix 119 | JM_matrix_from_py(PyObject *m) 120 | { 121 | Py_ssize_t i; 122 | float a[6]; 123 | 124 | if (!m || !PySequence_Check(m) || PySequence_Size(m) != 6) 125 | return fz_identity; 126 | 127 | for (i = 0; i < 6; i++) 128 | if (JM_FLOAT_ITEM(m, i, &a[i]) == 1) return fz_identity; 129 | 130 | return fz_make_matrix(a[0], a[1], a[2], a[3], a[4], a[5]); 131 | } 132 | 133 | //----------------------------------------------------------------------------- 134 | // PySequence from fz_matrix 135 | //----------------------------------------------------------------------------- 136 | static PyObject * 137 | JM_py_from_matrix(fz_matrix m) 138 | { 139 | return Py_BuildValue("ffffff", m.a, m.b, m.c, m.d, m.e, m.f); 140 | } 141 | 142 | //----------------------------------------------------------------------------- 143 | // fz_quad from PySequence. Four floats are treated as rect. 144 | // Else must be four pairs of floats. 145 | //----------------------------------------------------------------------------- 146 | static fz_quad 147 | JM_quad_from_py(PyObject *r) 148 | { 149 | fz_quad q = fz_make_quad(0, 0, 0, 0, 0, 0, 0, 0); 150 | fz_point p[4]; 151 | float test; 152 | Py_ssize_t i; 153 | PyObject *obj = NULL; 154 | 155 | if (!r || !PySequence_Check(r) || PySequence_Size(r) != 4) 156 | return q; 157 | 158 | if (JM_FLOAT_ITEM(r, 0, &test) == 0) 159 | return fz_quad_from_rect(JM_rect_from_py(r)); 160 | 161 | for (i = 0; i < 4; i++) { 162 | obj = PySequence_ITEM(r, i); // next point item 163 | if (!obj || !PySequence_Check(obj) || PySequence_Size(obj) != 2) 164 | goto exit_result; // invalid: cancel the rest 165 | 166 | if (JM_FLOAT_ITEM(obj, 0, &p[i].x) == 1) goto exit_result; 167 | if (JM_FLOAT_ITEM(obj, 1, &p[i].y) == 1) goto exit_result; 168 | 169 | Py_CLEAR(obj); 170 | } 171 | q.ul = p[0]; 172 | q.ur = p[1]; 173 | q.ll = p[2]; 174 | q.lr = p[3]; 175 | return q; 176 | 177 | exit_result:; 178 | Py_CLEAR(obj); 179 | return q; 180 | } 181 | 182 | //----------------------------------------------------------------------------- 183 | // PySequence from fz_quad. 184 | //----------------------------------------------------------------------------- 185 | static PyObject * 186 | JM_py_from_quad(fz_quad quad) 187 | { 188 | PyObject *pquad = PyTuple_New(4); 189 | PyTuple_SET_ITEM(pquad, 0, JM_py_from_point(quad.ul)); 190 | PyTuple_SET_ITEM(pquad, 1, JM_py_from_point(quad.ur)); 191 | PyTuple_SET_ITEM(pquad, 2, JM_py_from_point(quad.ll)); 192 | PyTuple_SET_ITEM(pquad, 3, JM_py_from_point(quad.lr)); 193 | return pquad; 194 | } 195 | 196 | %} 197 | -------------------------------------------------------------------------------- /fitz/helper-portfolio.i: -------------------------------------------------------------------------------- 1 | %{ 2 | //----------------------------------------------------------------------------- 3 | // perform some cleaning if we have /EmbeddedFiles: 4 | // (1) remove any /Limits if /Names exists 5 | // (2) remove any empty /Collection 6 | // (3) set /PageMode/UseAttachments 7 | //----------------------------------------------------------------------------- 8 | void JM_embedded_clean(fz_context *ctx, pdf_document *pdf) 9 | { 10 | pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, pdf), PDF_NAME(Root)); 11 | 12 | // remove any empty /Collection entry 13 | pdf_obj *coll = pdf_dict_get(ctx, root, PDF_NAME(Collection)); 14 | if (coll && pdf_dict_len(ctx, coll) == 0) 15 | pdf_dict_del(ctx, root, PDF_NAME(Collection)); 16 | 17 | pdf_obj *efiles = pdf_dict_getl(ctx, root, 18 | PDF_NAME(Names), 19 | PDF_NAME(EmbeddedFiles), 20 | PDF_NAME(Names), 21 | NULL); 22 | if (efiles) { 23 | pdf_dict_put_name(ctx, root, PDF_NAME(PageMode), "UseAttachments"); 24 | } 25 | return; 26 | } 27 | 28 | //----------------------------------------------------------------------------- 29 | // embed a new file in a PDF (not only /EmbeddedFiles entries) 30 | //----------------------------------------------------------------------------- 31 | pdf_obj *JM_embed_file(fz_context *ctx, 32 | pdf_document *pdf, 33 | fz_buffer *buf, 34 | char *filename, 35 | char *ufilename, 36 | char *desc, 37 | int compress) 38 | { 39 | size_t len = 0; 40 | pdf_obj *ef, *f, *params, *val = NULL; 41 | fz_var(val); 42 | fz_try(ctx) { 43 | val = pdf_new_dict(ctx, pdf, 6); 44 | pdf_dict_put_dict(ctx, val, PDF_NAME(CI), 4); 45 | ef = pdf_dict_put_dict(ctx, val, PDF_NAME(EF), 4); 46 | pdf_dict_put_text_string(ctx, val, PDF_NAME(F), filename); 47 | pdf_dict_put_text_string(ctx, val, PDF_NAME(UF), ufilename); 48 | pdf_dict_put_text_string(ctx, val, PDF_NAME(Desc), desc); 49 | pdf_dict_put(ctx, val, PDF_NAME(Type), PDF_NAME(Filespec)); 50 | f = pdf_add_stream(ctx, pdf, 51 | fz_new_buffer_from_copied_data(ctx, " ", 1), 52 | NULL, 0); 53 | pdf_dict_put_drop(ctx, ef, PDF_NAME(F), f); 54 | JM_update_stream(ctx, pdf, f, buf, compress); 55 | len = fz_buffer_storage(ctx, buf, NULL); 56 | pdf_dict_put_int(ctx, f, PDF_NAME(DL), len); 57 | pdf_dict_put_int(ctx, f, PDF_NAME(Length), len); 58 | params = pdf_dict_put_dict(ctx, f, PDF_NAME(Params), 4); 59 | pdf_dict_put_int(ctx, params, PDF_NAME(Size), len); 60 | } 61 | fz_catch(ctx) { 62 | fz_rethrow(ctx); 63 | } 64 | return val; 65 | } 66 | %} 67 | -------------------------------------------------------------------------------- /fitz/helper-xobject.i: -------------------------------------------------------------------------------- 1 | %{ 2 | //----------------------------------------------------------------------------- 3 | // Read and concatenate a PDF page's /Conents object(s) in a buffer 4 | //----------------------------------------------------------------------------- 5 | fz_buffer *JM_read_contents(fz_context * ctx, pdf_obj * pageref) 6 | { 7 | fz_buffer *res = NULL, *nres = NULL; 8 | int i; 9 | fz_try(ctx) { 10 | pdf_obj *contents = pdf_dict_get(ctx, pageref, PDF_NAME(Contents)); 11 | if (pdf_is_array(ctx, contents)) { 12 | res = fz_new_buffer(ctx, 1024); 13 | for (i = 0; i < pdf_array_len(ctx, contents); i++) { 14 | nres = pdf_load_stream(ctx, pdf_array_get(ctx, contents, i)); 15 | fz_append_buffer(ctx, res, nres); 16 | fz_drop_buffer(ctx, nres); 17 | } 18 | } 19 | else if (contents) { 20 | res = pdf_load_stream(ctx, contents); 21 | } 22 | } 23 | fz_catch(ctx) { 24 | fz_rethrow(ctx); 25 | } 26 | return res; 27 | } 28 | 29 | //----------------------------------------------------------------------------- 30 | // Make an XObject from a PDF page 31 | // For a positive xref assume that that object can be used instead 32 | //----------------------------------------------------------------------------- 33 | pdf_obj *JM_xobject_from_page(fz_context * ctx, pdf_document * pdfout, fz_page * fsrcpage, int xref, pdf_graft_map *gmap) 34 | { 35 | fz_buffer *res = NULL; 36 | pdf_obj *xobj1, *resources = NULL, *o, *spageref; 37 | fz_rect mediabox; 38 | 39 | fz_try(ctx) { 40 | pdf_page *srcpage = pdf_page_from_fz_page(ctx, fsrcpage); 41 | spageref = srcpage->obj; 42 | mediabox = pdf_to_rect(ctx, pdf_dict_get_inheritable(ctx, spageref, PDF_NAME(MediaBox))); 43 | 44 | if (xref > 0) { 45 | xobj1 = pdf_new_indirect(ctx, pdfout, xref, 0); 46 | } 47 | else { 48 | // Deep-copy resources object of source page 49 | o = pdf_dict_get_inheritable(ctx, spageref, PDF_NAME(Resources)); 50 | if (gmap) // use graftmap when possible 51 | resources = pdf_graft_mapped_object(ctx, gmap, o); 52 | else 53 | resources = pdf_graft_object(ctx, pdfout, o); 54 | 55 | // get spgage contents source 56 | res = JM_read_contents(ctx, spageref); 57 | 58 | //------------------------------------------------------------- 59 | // create XObject representing the source page 60 | //------------------------------------------------------------- 61 | xobj1 = pdf_new_xobject(ctx, pdfout, mediabox, fz_identity, NULL, res); 62 | // store spage contents 63 | JM_update_stream(ctx, pdfout, xobj1, res, 1); 64 | fz_drop_buffer(ctx, res); 65 | 66 | // store spage resources 67 | pdf_dict_put_drop(ctx, xobj1, PDF_NAME(Resources), resources); 68 | } 69 | } 70 | fz_catch(ctx) { 71 | fz_rethrow(ctx); 72 | } 73 | return xobj1; 74 | } 75 | 76 | //----------------------------------------------------------------------------- 77 | // Insert a buffer as a new separate /Contents object of a page. 78 | // 1. Create a new stream object from buffer 'newcont' 79 | // 2. If /Contents already is an array, then just prepend or append this object 80 | // 3. Else, create new array and put old content obj and this object into it. 81 | // If the page had no /Contents before, just create a 1-item array. 82 | //----------------------------------------------------------------------------- 83 | int JM_insert_contents(fz_context * ctx, pdf_document * pdf, 84 | pdf_obj * pageref, fz_buffer * newcont, int overlay) 85 | { 86 | int xref = 0; 87 | fz_try(ctx) { 88 | pdf_obj *contents = pdf_dict_get(ctx, pageref, PDF_NAME(Contents)); 89 | pdf_obj *newconts = pdf_add_stream(ctx, pdf, newcont, NULL, 0); 90 | xref = pdf_to_num(ctx, newconts); 91 | if (pdf_is_array(ctx, contents)) { 92 | if (overlay) // append new object 93 | pdf_array_push(ctx, contents, newconts); 94 | else // prepend new object 95 | pdf_array_insert(ctx, contents, newconts, 0); 96 | } 97 | else { 98 | pdf_obj *carr = pdf_new_array(ctx, pdf, 5); 99 | if (overlay) { 100 | if (contents) 101 | pdf_array_push(ctx, carr, contents); 102 | pdf_array_push(ctx, carr, newconts); 103 | } 104 | else { 105 | pdf_array_push_drop(ctx, carr, newconts); 106 | if (contents) 107 | pdf_array_push(ctx, carr, contents); 108 | } 109 | pdf_dict_put(ctx, pageref, PDF_NAME(Contents), carr); 110 | } 111 | } 112 | fz_catch(ctx) { 113 | fz_rethrow(ctx); 114 | } 115 | return xref; 116 | } 117 | 118 | static PyObject *img_info = NULL; 119 | 120 | static fz_image * 121 | JM_image_filter(fz_context * ctx, void *opaque, fz_matrix ctm, const char *name, fz_image *image) 122 | { 123 | fz_quad q = fz_transform_quad(fz_quad_from_rect(fz_unit_rect), ctm); 124 | PyObject *q_py = JM_py_from_quad(q); 125 | PyList_Append(img_info, Py_BuildValue("sO", name, q_py)); 126 | Py_DECREF(q_py); 127 | return NULL; 128 | } 129 | 130 | void 131 | JM_filter_content_stream( 132 | fz_context * ctx, 133 | pdf_document * doc, 134 | pdf_obj * in_stm, 135 | pdf_obj * in_res, 136 | fz_matrix transform, 137 | pdf_filter_options * filter, 138 | int struct_parents, 139 | fz_buffer **out_buf, 140 | pdf_obj **out_res) 141 | { 142 | pdf_processor *proc_buffer = NULL; 143 | pdf_processor *proc_filter = NULL; 144 | 145 | fz_var(proc_buffer); 146 | fz_var(proc_filter); 147 | 148 | *out_buf = NULL; 149 | *out_res = NULL; 150 | 151 | fz_try(ctx) { 152 | *out_buf = fz_new_buffer(ctx, 1024); 153 | proc_buffer = pdf_new_buffer_processor(ctx, *out_buf, filter->ascii); 154 | if (filter->sanitize) { 155 | *out_res = pdf_new_dict(ctx, doc, 1); 156 | proc_filter = pdf_new_filter_processor(ctx, doc, proc_buffer, in_res, *out_res, struct_parents, transform, filter); 157 | pdf_process_contents(ctx, proc_filter, doc, in_res, in_stm, NULL); 158 | pdf_close_processor(ctx, proc_filter); 159 | } 160 | else { 161 | *out_res = pdf_keep_obj(ctx, in_res); 162 | pdf_process_contents(ctx, proc_buffer, doc, in_res, in_stm, NULL); 163 | } 164 | pdf_close_processor(ctx, proc_buffer); 165 | } 166 | fz_always(ctx) { 167 | pdf_drop_processor(ctx, proc_filter); 168 | pdf_drop_processor(ctx, proc_buffer); 169 | } 170 | fz_catch(ctx) { 171 | fz_drop_buffer(ctx, *out_buf); 172 | *out_buf = NULL; 173 | pdf_drop_obj(ctx, *out_res); 174 | *out_res = NULL; 175 | fz_rethrow(ctx); 176 | } 177 | } 178 | 179 | PyObject * 180 | JM_image_reporter(fz_context *ctx, pdf_page *page) 181 | { 182 | pdf_document *doc = page->doc; 183 | pdf_filter_options filter; 184 | memset(&filter, 0, sizeof filter); 185 | filter.opaque = page; 186 | filter.text_filter = NULL; 187 | filter.image_filter = JM_image_filter; 188 | filter.end_page = NULL; 189 | filter.recurse = 0; 190 | filter.instance_forms = 1; 191 | filter.sanitize = 1; 192 | filter.ascii = 1; 193 | 194 | pdf_obj *contents, *old_res; 195 | pdf_obj *struct_parents_obj; 196 | pdf_obj *new_res; 197 | fz_buffer *buffer; 198 | int struct_parents; 199 | 200 | struct_parents_obj = pdf_dict_get(ctx, page->obj, PDF_NAME(StructParents)); 201 | struct_parents = -1; 202 | if (pdf_is_number(ctx, struct_parents_obj)) 203 | struct_parents = pdf_to_int(ctx, struct_parents_obj); 204 | 205 | contents = pdf_page_contents(ctx, page); 206 | old_res = pdf_page_resources(ctx, page); 207 | img_info = PyList_New(0); 208 | JM_filter_content_stream(ctx, doc, contents, old_res, fz_identity, &filter, struct_parents, &buffer, &new_res); 209 | fz_drop_buffer(ctx, buffer); 210 | pdf_drop_obj(ctx, new_res); 211 | PyObject *rc = PySequence_Tuple(img_info); 212 | Py_DECREF(img_info); 213 | img_info = NULL; 214 | return rc; 215 | } 216 | 217 | %} 218 | -------------------------------------------------------------------------------- /fitz/version.i: -------------------------------------------------------------------------------- 1 | %pythoncode %{ 2 | VersionFitz = "1.17.0" 3 | VersionBind = "1.17.4" 4 | VersionDate = "2020-07-20 18:09:40" 5 | version = (VersionBind, VersionFitz, "20200720180940") 6 | %} -------------------------------------------------------------------------------- /installation/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/installation/.DS_Store -------------------------------------------------------------------------------- /installation/centos/centos_pymupdf.sh: -------------------------------------------------------------------------------- 1 | wget https://mupdf.com/downloads/mupdf-1.17.0-source.tar.gz 2 | tar -zxvf mupdf-1.17.0-source.tar.gz 3 | 4 | cd mupdf-1.17.0-source 5 | export CFLAGS="-fPIC -std=gnu99" 6 | 7 | make HAVE_X11=no HAVE_GLFW=no HAVE_GLUT=no prefix=/usr/local 8 | sudo make HAVE_X11=no HAVE_GLFW=no HAVE_GLUT=no prefix=/usr/local install 9 | 10 | cd .. 11 | 12 | rm -rf PyMuPDF 13 | git clone https://github.com/pymupdf/PyMuPDF.git 14 | cd PyMuPDF 15 | 16 | sudo python setup.py build 17 | sudo python setup.py install 18 | -------------------------------------------------------------------------------- /installation/freebsd/freebsd_pymupdf.sh: -------------------------------------------------------------------------------- 1 | setenv CFLAGS -fPIC 2 | 3 | # install the pre-required tool 4 | pkg install swig30 5 | 6 | # Ensure we have a build of the current version 7 | wget https://mupdf.com/downloads/archive/mupdf-1.17.0-source.tar.gz 8 | tar -zxvf mupdf-1.17.0-source.tar.gz 9 | 10 | rm -rf PyMuPDF 11 | git clone https://github.com/pymupdf/PyMuPDF.git 12 | 13 | cd mupdf-1.17.0-source 14 | # replace files in mupdf source 15 | cp ../PyMuPDF/fitz/_config.h include/mupdf/fitz/config.h 16 | 17 | gmake HAVE_X11=no HAVE_GLFW=no HAVE_GLUT=no prefix=/usr/local 18 | gmake HAVE_X11=no HAVE_GLFW=no HAVE_GLUT=no prefix=/usr/local install 19 | 20 | cd ../PyMuPDF 21 | python setup.py build 22 | python setup.py install 23 | -------------------------------------------------------------------------------- /installation/ubuntu/ubuntu_pymupdf.sh: -------------------------------------------------------------------------------- 1 | wget https://mupdf.com/downloads/archive/mupdf-1.17.0-source.tar.gz 2 | tar -zxvf mupdf-1.17.0-source.tar.gz 3 | 4 | cd mupdf-1.17.0-source 5 | 6 | export CFLAGS="-fPIC" 7 | # install some prerequirement 8 | sudo apt install pkg-config python-dev 9 | 10 | make HAVE_X11=no HAVE_GLFW=no HAVE_GLUT=no prefix=/usr/local 11 | sudo make HAVE_X11=no HAVE_GLFW=no HAVE_GLUT=no prefix=/usr/local install 12 | 13 | cd .. 14 | 15 | rm -rf PyMuPDF 16 | git clone https://github.com/pymupdf/PyMuPDF.git 17 | cd PyMuPDF 18 | 19 | sudo python setup.py build 20 | sudo python setup.py install 21 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup, Extension 2 | from distutils.command.build_py import build_py as build_py_orig 3 | import sys, os 4 | 5 | # custom build_py command which runs build_ext first 6 | # this is necessary because build_py needs the fitz.py which is only generated 7 | # by SWIG in the build_ext step 8 | class build_ext_first(build_py_orig): 9 | def run(self): 10 | self.run_command("build_ext") 11 | return super().run() 12 | 13 | 14 | # check the platform 15 | if sys.platform.startswith("linux"): 16 | module = Extension( 17 | "fitz._fitz", # name of the module 18 | ["fitz/fitz.i"], 19 | include_dirs=[ # we need the path of the MuPDF headers 20 | "/usr/include/mupdf", 21 | "/usr/local/include/mupdf", 22 | ], 23 | # library_dirs=[''], 24 | libraries=[ 25 | "mupdf", 26 | #'crypto', #openssl is required by mupdf on archlinux 27 | #'jbig2dec', 'openjp2', 'jpeg', 'freetype', 28 | "mupdf-third", 29 | ], # the libraries to link with 30 | ) 31 | elif sys.platform.startswith(("darwin", "freebsd")): 32 | module = Extension( 33 | "fitz._fitz", # name of the module 34 | ["fitz/fitz.i"], 35 | # directories containing mupdf's header files 36 | include_dirs=["/usr/local/include/mupdf", "/usr/local/include"], 37 | # libraries should already be linked here by brew 38 | library_dirs=["/usr/local/lib"], 39 | # library_dirs=['/usr/local/Cellar/mupdf-tools/1.8/lib/', 40 | #'/usr/local/Cellar/openssl/1.0.2g/lib/', 41 | #'/usr/local/Cellar/jpeg/8d/lib/', 42 | #'/usr/local/Cellar/freetype/2.6.3/lib/', 43 | #'/usr/local/Cellar/jbig2dec/0.12/lib/' 44 | # ], 45 | libraries=["mupdf", "mupdf-third"], 46 | ) 47 | 48 | else: 49 | # =============================================================================== 50 | # Build / set up PyMuPDF under Windows 51 | # =============================================================================== 52 | module = Extension( 53 | "fitz._fitz", 54 | ["fitz/fitz.i"], 55 | include_dirs=[ # we need the path of the MuPDF's headers 56 | "./mupdf/include", 57 | "./mupdf/include/mupdf", 58 | ], 59 | libraries=[ # these are needed in Windows 60 | "libmupdf", 61 | "libresources", 62 | "libthirdparty", 63 | ], 64 | extra_link_args=["/NODEFAULTLIB:MSVCRT"], 65 | # x86 dir of libmupdf.lib etc. 66 | library_dirs=["./mupdf/platform/win32/Release"], 67 | # x64 dir of libmupdf.lib etc. 68 | # library_dirs=['./mupdf/platform/win32/x64/Release'], 69 | ) 70 | 71 | pkg_tab = open("PKG-INFO").read().split("\n") 72 | long_dtab = [] 73 | classifier = [] 74 | for l in pkg_tab: 75 | if l.startswith("Classifier: "): 76 | classifier.append(l[12:]) 77 | continue 78 | if l.startswith(" "): 79 | long_dtab.append(l.strip()) 80 | long_desc = "\n".join(long_dtab) 81 | 82 | setup( 83 | name="PyMuPDF", 84 | version="1.17.4", 85 | description="Python bindings for the PDF rendering library MuPDF", 86 | long_description=long_desc, 87 | classifiers=classifier, 88 | url="https://github.com/pymupdf/PyMuPDF", 89 | author="Jorj McKie, Ruikai Liu", 90 | author_email="jorj.x.mckie@outlook.de", 91 | cmdclass={"build_py": build_ext_first}, 92 | ext_modules=[module], 93 | py_modules=["fitz.fitz", "fitz.utils", "fitz.__main__"], 94 | ) 95 | --------------------------------------------------------------------------------