├── .github
└── ISSUE_TEMPLATE
│ ├── bug_report.md
│ ├── feature_request.md
│ └── general-purpose.md
├── .gitignore
├── .vs
├── ProjectSettings.json
├── PyMuPDF
│ └── v15
│ │ ├── .suo
│ │ └── Browse.VC.db
├── VSWorkspaceState.json
└── slnx.sqlite
├── COPYING
├── GNU AFFERO GPL V3
├── PKG-INFO
├── README.md
├── debian
├── changelog
├── control
├── copyright
├── patches
│ ├── docs
│ ├── fiximport
│ ├── libs
│ └── series
├── python3-fitz.lintian-overrides
├── rules
├── salsa-ci.yml
├── source
│ └── format
├── tests
│ └── control
└── watch
├── demo
└── pymupdf.jpg
├── docs
├── PyMuPDF.ico
├── algebra.rst
├── annot.rst
├── app1.rst
├── app2.rst
├── app3.rst
├── app4.rst
├── changes.rst
├── classes.rst
├── colors.rst
├── colorspace.rst
├── conf.py
├── coop_low.rst
├── device.rst
├── displaylist.rst
├── document.rst
├── faq.rst
├── font.rst
├── functions.rst
├── glossary.rst
├── identity.rst
├── images
│ ├── img-4up.png
│ ├── img-7edges.png
│ ├── img-a-is--1.png
│ ├── img-adobe.png
│ ├── img-alpha-0.png
│ ├── img-alpha-1.png
│ ├── img-annots.jpg
│ ├── img-attach-result.jpg
│ ├── img-b-is-0.5.png
│ ├── img-binsetupdirs.png
│ ├── img-breadth.png
│ ├── img-c-is-0.5.png
│ ├── img-cake.png
│ ├── img-caret-annot.jpg
│ ├── img-circle.png
│ ├── img-clip.jpg
│ ├── img-colordb.png
│ ├── img-copy-speed-1.png
│ ├── img-copy-speed-2.png
│ ├── img-d-is--1.png
│ ├── img-drawBezier.png
│ ├── img-drawCurve.png
│ ├── img-drawSector1.png
│ ├── img-drawSector2.png
│ ├── img-drawcircle.jpg
│ ├── img-drawquad.jpg
│ ├── img-e-is-100.png
│ ├── img-embed-progress.jpg
│ ├── img-encoding.jpg
│ ├── img-encrypting.jpg
│ ├── img-even-odd.png
│ ├── img-extract-imga.jpg
│ ├── img-extract-imgb.jpg
│ ├── img-f-is-100.png
│ ├── img-filesizes.png
│ ├── img-freetext.jpg
│ ├── img-import-progress.jpg
│ ├── img-inkannot.jpg
│ ├── img-inserttext.jpg
│ ├── img-markedpdf.jpg
│ ├── img-markers.jpg
│ ├── img-matrix.png
│ ├── img-opacity.jpg
│ ├── img-original.png
│ ├── img-pdfjoiner.jpg
│ ├── img-pdftext.jpg
│ ├── img-planish.png
│ ├── img-point-unit.jpg
│ ├── img-polyline.png
│ ├── img-posterize.png
│ ├── img-pymupdf.jpg
│ ├── img-quads.jpg
│ ├── img-redact.jpg
│ ├── img-render-speed.png
│ ├── img-rendermode.jpg
│ ├── img-rot+morph.png
│ ├── img-rot-60.png
│ ├── img-rotate.png
│ ├── img-showpdfpage.jpg
│ ├── img-sierpinski.png
│ ├── img-squiggly.png
│ ├── img-stampannot.jpg
│ ├── img-stencil.jpg
│ ├── img-symbols.jpg
│ ├── img-target.png
│ ├── img-textbox.jpg
│ ├── img-textboxtract.png
│ ├── img-textmarker.jpg
│ ├── img-textmethods.png
│ ├── img-textpage-char.png
│ ├── img-textpage.png
│ ├── img-textperformance.png
│ ├── img-timings.png
│ ├── img-writeimage.png
│ └── mupdf-icons.jpg
├── index.rst
├── installation.rst
├── intro.rst
├── irect.rst
├── kerning.style
├── link.rst
├── linkdest.rst
├── lowlevel.rst
├── make-bold.py
├── matrix.rst
├── module.rst
├── multiprocess-gui.py
├── multiprocess-render.py
├── new-annots.py
├── outline.rst
├── page.rst
├── pixmap.rst
├── point.rst
├── pymupdf-logo.jpg
├── quad.rst
├── rect.rst
├── replace-fonts.py
├── shape.rst
├── text-lister.py
├── textpage.rst
├── textwriter.rst
├── tools.rst
├── tutorial.rst
├── vars.rst
├── version.rst
├── wheelnames.txt
└── widget.rst
├── fitz
├── __init__.py
├── __main__.py
├── fitz.i
├── helper-annot.i
├── helper-convert.i
├── helper-defines.i
├── helper-fields.i
├── helper-geo-c.i
├── helper-geo-py.i
├── helper-other.i
├── helper-pdfinfo.i
├── helper-pixmap.i
├── helper-portfolio.i
├── helper-python.i
├── helper-select.i
├── helper-stext.i
├── helper-xobject.i
├── utils.py
└── version.i
├── installation
├── .DS_Store
├── centos
│ └── centos_pymupdf.sh
├── freebsd
│ └── freebsd_pymupdf.sh
└── ubuntu
│ └── ubuntu_pymupdf.sh
└── setup.py
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: bug
6 | assignees: JorjMcKie
7 |
8 | ---
9 |
10 | _**Please provide all mandatory information!**_
11 |
12 | ## Describe the bug (mandatory)
13 | A clear and concise description of what the bug is.
14 |
15 | ## To Reproduce (mandatory)
16 | Explain the steps to reproduce the behavior, For example, include a minimal code snippet, example files, etc.
17 |
18 | ## Expected behavior (optional)
19 | Describe what you expected to happen (if not obvious).
20 |
21 | ## Screenshots (optional)
22 | If applicable, add screenshots to help explain your problem.
23 |
24 | ## Your configuration (mandatory)
25 | - Operating system, potentially version and bitness
26 | - Python version, bitness
27 | - PyMuPDF version, installation method (**wheel** or **generated** from source).
28 |
29 | For example, the output of `print(sys.version, "\n", sys.platform, "\n", fitz.__doc__)` would be sufficient (for the first two bullets).
30 |
31 | ## Additional context (optional)
32 | Add any other context about the problem here.
33 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: enhancement
6 | assignees: JorjMcKie
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Potentially add an issue reference.
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | Are there several options for how your request could be met?
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/general-purpose.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: General Purpose
3 | about: Use this form for questions, comments, etc.
4 | title: 'Question / Comment:'
5 | labels: question
6 | assignees: JorjMcKie
7 |
8 | ---
9 |
10 |
11 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.so
3 | *.o
4 | *.swp
5 | build/
6 | demo/README.rst
7 |
--------------------------------------------------------------------------------
/.vs/ProjectSettings.json:
--------------------------------------------------------------------------------
1 | {
2 | "CurrentProjectSetting": "Keine Konfigurationen"
3 | }
--------------------------------------------------------------------------------
/.vs/PyMuPDF/v15/.suo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/.vs/PyMuPDF/v15/.suo
--------------------------------------------------------------------------------
/.vs/PyMuPDF/v15/Browse.VC.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/.vs/PyMuPDF/v15/Browse.VC.db
--------------------------------------------------------------------------------
/.vs/VSWorkspaceState.json:
--------------------------------------------------------------------------------
1 | {
2 | "ExpandedNodes": [
3 | ""
4 | ],
5 | "SelectedNode": "\\README.md",
6 | "PreviewInSolutionExplorer": false
7 | }
--------------------------------------------------------------------------------
/.vs/slnx.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/.vs/slnx.sqlite
--------------------------------------------------------------------------------
/PKG-INFO:
--------------------------------------------------------------------------------
1 | Metadata-Version: 1.1
2 | Name: PyMuPDF
3 | Version: 1.17.4
4 | Author: Ruikai Liu
5 | Author-email: lrk700@gmail.com
6 | Maintainer: Jorj X. McKie
7 | Maintainer-email: jorj.x.mckie@outlook.de
8 | Home-page: https://github.com/pymupdf/PyMuPDF
9 | Download-url: https://github.com/pymupdf/PyMuPDF
10 | Summary: PyMuPDF is a Python binding for the PDF rendering library MuPDF
11 | Description:
12 | Release date: July 31, 2020
13 |
14 | Authors
15 | =======
16 |
17 | * Jorj X. McKie
18 | * Ruikai Liu
19 |
20 | Introduction
21 | ============
22 |
23 | This is **version 1.17.4 of PyMuPDF**, a Python binding for `MuPDF `_ - "a lightweight PDF and XPS viewer".
24 |
25 | MuPDF can access files in PDF, XPS, OpenXPS, epub, comic and fiction book formats, and it is known for both, its top performance and high rendering quality.
26 |
27 | With PyMuPDF you therefore can access files with extensions ``*.pdf``, ``*.xps``, ``*.oxps``, ``*.epub``, ``*.cbz`` or ``*.fb2`` from your Python scripts. A number of popular image formats is supported as well, including multi-page TIFF images.
28 |
29 | PyMuPDF should run on all platforms that are supported by both, MuPDF and Python. These include, but are not limited to, Windows (XP/SP2 and up), Mac OSX and Linux, 32-bit or 64-bit. If you can generate MuPDF on a Python supported platform, then also PyMuPDF can be used there.
30 |
31 | PyMuPDF is hosted on `GitHub `_ where you find up-to-date information of its features, our `issue tracker `_, `Wikis `_ and much more.
32 |
33 | Installation
34 | ============
35 |
36 | For all MS Windows versions as well as popular Max OSX and Linux versions, we are providing Python wheels - see the download section of this site and the current `release directory `_ of our home page. Other platforms need to download and generate the MuPDF library first and then set up PyMuPDF. Do visit our GitHub home, which has more details on this, including latest bugfixes, pre-releases, etc.
37 |
38 | Usage and Documentation
39 | ========================
40 |
41 | For all document types you can render pages in raster (PNG) or vector (SVG) formats, extract text and access meta information, links, annotations and bookmarks, as well as decrypt the document. For PDF files, these objects can also be created, modified or deleted. Plus you can rotate, re-arrange, duplicate, create, or delete pages and join or split documents.
42 |
43 | Starting with version 1.16.0, PDF password protection is **fully supported**: passwords, encryption methods and permission levels can be set, changed or removed.
44 |
45 | Specifically for PDF files, PyMuPDF provides update access to low-level structure information, supports handling of embedded files and modification of page contents (like inserting images, fonts, text, annotations and drawings).
46 |
47 | Other features include embedding vector images (SVG, PDF) such as logos or watermarks, joining or splitting single PDF pages (including things like posterizing and 2-up / 4-up processing).
48 |
49 | You can also create **PDF Form fields** with support for text, checkbox, listbox and combobox widgets.
50 |
51 | Our home page provides many examples and How-Tos for all of this. At a minimum, read the tutorial and the the recipes sections of our documentation.
52 |
53 | Written using **Sphinx**, documentation is available here:
54 |
55 | * View it online at `Read The Docs `_. For **best quality downloads**, use the following links.
56 |
57 | * `HTML `_
58 |
59 | * `Windows CHM `_
60 |
61 | * `PDF `_
62 |
63 |
64 | Classifier: Development Status :: 5 - Production/Stable
65 | Classifier: Environment :: Console
66 | Classifier: Intended Audience :: Developers
67 | Classifier: License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)
68 | Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
69 | Classifier: Operating System :: MacOS
70 | Classifier: Operating System :: Microsoft :: Windows
71 | Classifier: Operating System :: POSIX :: Linux
72 | Classifier: Programming Language :: C
73 | Classifier: Programming Language :: Python :: 2.7
74 | Classifier: Programming Language :: Python :: 3
75 | Classifier: Programming Language :: Python :: 3.4
76 | Classifier: Programming Language :: Python :: 3.5
77 | Classifier: Programming Language :: Python :: 3.6
78 | Classifier: Programming Language :: Python :: 3.7
79 | Classifier: Programming Language :: Python :: 3.8
80 | Classifier: Topic :: Utilities
81 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PyMuPDF 1.17.4
2 |
3 | 
4 |
5 | Release date: July 31, 2020
6 |
7 | **Travis-CI:** [](https://travis-ci.org/JorjMcKie/py-mupdf)
8 |
9 | On **[PyPI](https://pypi.org/project/PyMuPDF)** since August 2016: [](https://pepy.tech/project/pymupdf)
10 |
11 | # Authors
12 | * [Jorj X. McKie](mailto:jorj.x.mckie@outlook.de)
13 | * [Ruikai Liu](mailto:lrk700@gmail.com)
14 |
15 | # Introduction
16 |
17 | This is **version 1.17.4 of PyMuPDF**, a Python binding with support for [MuPDF 1.17.*](http://mupdf.com/) - "a lightweight PDF, XPS, and E-book viewer".
18 |
19 | MuPDF can access files in PDF, XPS, OpenXPS, CBZ, EPUB and FB2 (e-books) formats, and it is known for its top performance and high rendering quality.
20 |
21 | With PyMuPDF you can access files with extensions like ".pdf", ".xps", ".oxps", ".cbz", ".fb2" or ".epub". In addition, about 10 popular image formats can also be opened and handled like documents.
22 |
23 |
24 | # Usage and Documentation
25 | For all supported document types (i.e. **_including images_**) you can
26 | * decrypt the document
27 | * access meta information, links and bookmarks
28 | * render pages in raster formats (PNG and some others), or the vector format SVG
29 | * search for text
30 | * extract text and images
31 | * convert to other formats: PDF, (X)HTML, XML, JSON, text
32 |
33 | > To some degree, PyMuPDF can therefore be used as an [image converter](https://github.com/pymupdf/PyMuPDF/wiki/How-to-Convert-Images): it can read a range of input formats and can produce **Portable Network Graphics (PNG)**, **Portable Anymaps** (**PNM**, etc.), **Portable Arbitrary Maps (PAM)**, **Adobe Postscript** and **Adobe Photoshop** documents, making the use of other graphics packages obselete in these cases. But interfacing with e.g. PIL/Pillow for image input and output is easy as well.
34 |
35 | **PDF documents** can be created, joined or split up. Pages can be inserted, deleted, re-arranged or modified in many ways (including annotations and form fields).
36 |
37 | * Images and fonts can be extracted or inserted.
38 | * Embedded files are fully supported.
39 | * PDFs can be reformatted to support double-sided printing, posterizing, applying logos or watermarks
40 | * Password protection is fully supported: decryption, encryption, encryption method selection, permmission level and user / owner password setting.
41 | * Low-level PDF structures can be accessed and modified.
42 | * PyMuPDF can also be used as a **module in the command line** using ``"python -m fitz ..."``. This is a versatile utility, which we will further develop going forward. It currently supports PDF document
43 |
44 | - **encryption / decryption / optimization**
45 | - creating **sub-documents**
46 | - document **joining**
47 | - **image / font extraction**
48 | - full support of **embedded files**.
49 |
50 |
51 | Have a look at the basic [demos](https://github.com/pymupdf/PyMuPDF-Utilities/tree/master/demo), the [examples](https://github.com/pymupdf/PyMuPDF-Utilities/tree/master/examples) (which contain complete, working programs), and the **recipes** section of our [Wiki](https://github.com/pymupdf/PyMuPDF/wiki) sidebar, which contains more than a dozen of guides in How-To-style.
52 |
53 | Our **documentation**, written using Sphinx, is available in various formats from the following sources. It currently is a combination of a reference guide and a user manual. For a **quick start** look at the [tutorial](https://pymupdf.readthedocs.io/en/latest/tutorial/) and the [recipes](https://pymupdf.readthedocs.io/en/latest/faq/) chapters.
54 |
55 | * You can view it online at [Read the Docs](https://readthedocs.org/projects/pymupdf/). This site also provides download options for zipped HTML and PDF.
56 | * Find a Windows help file [here](https://github.com/pymupdf/PyMuPDF-optional-material/tree/master/doc/PyMuPDF.chm).
57 |
58 |
59 | # Installation
60 |
61 | For the major **Windows** and (thanks to our user **@jbarlow83**!) **Mac OSX** or **Linux** versions we offer wheels in the [download section of PyPI](https://pypi.org/project/PyMuPDF/#files). This includes Python 2.7 and version Python 3.5 through 3.8.
62 |
63 | For other Python versions or operating systems you need to generate PyMuPDF yourself as follows. This should work for all platforms which support Python and MuPDF. In any case you need the development version of Python.
64 |
65 | To do this, you must download and generate MuPDF. This process depends very much on your system. For most platforms, the MuPDF source contains prepared procedures for achieving this. Please observe the following general steps:
66 |
67 | * Be sure to download the official MuPDF source release from [here](https://mupdf.com/downloads/archive).
68 |
69 | * Do **not use** MuPDF's [GitHub repo](https://github.com/ArtifexSoftware/mupdf). It contains their current **development source**, which is **not compatible** with this PyMuPDF version.
70 |
71 | * This repo's `fitz` folder contains one or more files whose names start with a single underscore `"_"`. These files contain configuration data and hotfixes. Each one must be copy-renamed to its correct target location **inside the MuPDF source** that you have downloaded, **before you generate MuPDF**. Currently, these files are:
72 | - fitz configuration file `_config.h` copy-replace to: `mupdf/include/mupdf/fitz/config.h`. It contains configuration data like e.g. which fonts to support.
73 |
74 | - Now MuPDF can be generated.
75 |
76 | * Since PyMuPDF v1.14.17, the sources provided in this repository **no longer contain** the interface files ``fitz.py`` and ``fitz.wrap.c`` - they are instead generated **"on the fly"** by ``setup.py`` using the interface generator [SWIG](http://www.swig.org/). So you need SWIG being installed on your system. Please refer to issue #312 for some background.
77 | - PyMuPDF wheels have been generated using **SWIG v4.0.1**.
78 |
79 |
80 | * If you do **not use SWIG**, please download the **sources from PyPI** - they continue to contain those generated files, so installation should work like any other Python extension generation on your system.
81 |
82 | Once this is done, adjust directories in ``setup.py`` and run ``python setup.py install``.
83 |
84 | The following sections contain further comments for some platforms.
85 |
86 | ## Ubuntu
87 | Our users (thanks to **@gileadslostson** and **@jbarlow83**!) have documented their MuPDF installation experiences from sources in this [Wiki page](https://github.com/pymupdf/PyMuPDF/wiki/Ubuntu-Installation-Experience).
88 |
89 | ## OSX
90 | First, install the MuPDF headers and libraries, which are provided by mupdf-tools: ``brew install mupdf-tools``.
91 |
92 | Then you might need to ``export ARCHFLAGS='-arch x86_64'``, since ``libmupdf.a`` is for x86_64 only.
93 |
94 | Finally, please double check ``setup.py`` before building. Update ``include_dirs`` and ``library_dirs`` if necessary.
95 |
96 | ## MS Windows
97 | If you are looking to make your own binary, consult this [Wiki page](https://github.com/pymupdf/PyMuPDF/wiki/Windows-Binaries-Generation). It explains how to use Visual Studio for generating MuPDF in quite some detail.
98 |
99 | # Earlier Versions
100 | Earlier versions are available in the [releases](https://github.com/pymupdf/PyMuPDF/releases) directory.
101 |
102 | # License
103 | PyMuPDF is distributed under GNU GPL V3. Because you will implicitely also be using MuPDF, its license GNU AFFERO GPL V3 applies as well. Copies of both are included in this repository.
104 |
105 | # Contact
106 | Please submit questions, comments or issues [here](https://github.com/pymupdf/PyMuPDF/issues), or directly contact the authors via their e-mail addresses.
107 |
--------------------------------------------------------------------------------
/debian/changelog:
--------------------------------------------------------------------------------
1 | pymupdf (1.17.4+ds1-1~np1) unstable; urgency=medium
2 |
3 | [ Bastian Germann ]
4 | * Use debhelper provided python3:Provides
5 | * Set Built-Using according to Policy 7.8
6 | * Update upstream copyright info
7 | * New upstream version 1.16.17+ds1
8 | * Refresh docs patch
9 |
10 | [ Norbert Preining ]
11 | * New upstream releases.
12 | * Bump B-D of libmupdf-dev
13 |
14 | -- Norbert Preining Wed, 29 Jul 2020 12:48:09 +0900
15 |
16 | pymupdf (1.16.11-1) unstable; urgency=medium
17 |
18 | [ Johannes 'josch' Schauer ]
19 | * New upstream version 1.16.11 (closes: #950639)
20 | * Bump Standards-Version to 4.5.0
21 | * add debian/salsa-ci.yml
22 | * debian/control: b-d on libpython3-all-dev instead of libpython3-dev
23 | * debian/copyright: remove unused files from Files-Excluded
24 | * add autopkgtest
25 | * add patch fiximport
26 | * debian/control: add Rules-Requires-Root: no
27 | * debian/tests/control: chdir to / to not use fitz module from unpacked
28 | sources
29 | * add debian/python3-fitz.lintian-overrides
30 | * debian/watch: add repacksuffix
31 | * debian/watch: don't run uupdate
32 |
33 | [ Bastian Germann ]
34 | * Drop non-existing examples
35 | * Exclude 1.16.11 files
36 | * Add docs patch
37 |
38 | -- Johannes 'josch' Schauer Sun, 23 Feb 2020 21:05:36 +0100
39 |
40 | pymupdf (1.14.16-1) unstable; urgency=medium
41 |
42 | * Initial release. (Closes: #930761)
43 |
44 | -- Johannes 'josch' Schauer Sat, 22 Jun 2019 04:02:32 +0200
45 |
--------------------------------------------------------------------------------
/debian/control:
--------------------------------------------------------------------------------
1 | Source: pymupdf
2 | Section: python
3 | Priority: optional
4 | Maintainer: Debian Python Modules Team
5 | Uploaders: Johannes 'josch' Schauer
6 | Homepage: https://github.com/pymupdf/PyMuPDF
7 | Vcs-Browser: https://salsa.debian.org/python-team/modules/pymupdf
8 | Vcs-Git: https://salsa.debian.org/python-team/modules/pymupdf.git
9 | Standards-Version: 4.5.0
10 | Build-Depends: debhelper-compat (= 12), dh-python, python3-setuptools, python3-all, libpython3-all-dev, libmupdf-dev (>= 1.17.0), libjbig2dec-dev, libjpeg-dev, libfreetype6-dev, libpng-dev, libopenjp2-7-dev, libharfbuzz-dev, swig, libmujs-dev
11 | Rules-Requires-Root: no
12 |
13 | Package: python3-fitz
14 | Architecture: any
15 | Depends: ${shlibs:Depends}, ${misc:Depends}, ${python3:Depends}
16 | Provides: ${python3:Provides}
17 | Built-Using: ${Built-Using}
18 | Description: Python binding for MuPDF
19 | Allows one to access files in PDF, XPS, OpenXPS, CBZ, EPUB, and FB2 (e-books)
20 | formats, and it is known for its top performance and high rendering quality.
21 | .
22 | PDF manipulation and generation functions are available, including metadata
23 | and bookmark maintenance, document restructuring, annotation / link handling
24 | and document or page creation.
25 |
--------------------------------------------------------------------------------
/debian/copyright:
--------------------------------------------------------------------------------
1 | Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
2 | Upstream-Name: PyMuPDF
3 | Upstream-Contact: Jorj X. McKie
4 | Source: https://github.com/pymupdf/PyMuPDF/
5 | Files-Excluded: docs/_static
6 | fitz/_config.h
7 |
8 | # upstream's clarification that indeed all material is GPL-3+ can be
9 | # found in this comment and the ones that follow:
10 | # https://github.com/pymupdf/PyMuPDF/issues/312#issuecomment-504641426
11 | Files: *
12 | Copyright: 2012-2018 Ruikai Liu
13 | 2015-2020 Jorj X. McKie
14 | License: GPL-3+
15 |
16 | Files: debian/*
17 | Copyright: 2019 Johannes 'josch' Schauer
18 | License: GPL-3+
19 |
20 | License: GPL-3+
21 | This program is free software; you can redistribute it
22 | and/or modify it under the terms of the GNU General Public
23 | License as published by the Free Software Foundation; either
24 | version 3 of the License, or (at your option) any later
25 | version.
26 | .
27 | This program is distributed in the hope that it will be
28 | useful, but WITHOUT ANY WARRANTY; without even the implied
29 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
30 | PURPOSE. See the GNU General Public License for more
31 | details.
32 | .
33 | You should have received a copy of the GNU General Public
34 | License along with this package; if not, write to the Free
35 | Software Foundation, Inc., 51 Franklin St, Fifth Floor,
36 | Boston, MA 02110-1301 USA
37 | .
38 | On Debian systems, the full text of the GNU General Public
39 | License version 3 can be found in the file
40 | `/usr/share/common-licenses/GPL-3'.
41 |
--------------------------------------------------------------------------------
/debian/patches/docs:
--------------------------------------------------------------------------------
1 | Description: [PATCH] Prevent docs build warnings
2 |
3 | diff --git a/docs/conf.py b/docs/conf.py
4 | index 1175edf..e726f8d 100644
5 | --- a/docs/conf.py
6 | +++ b/docs/conf.py
7 | @@ -128,12 +128,12 @@ html_theme_options = {
8 | # The name of an image file (within the static path) to use as favicon of the
9 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
10 | # pixels large.
11 | -html_favicon = "Pymupdf.ico"
12 | +html_favicon = "PyMuPDF.ico"
13 |
14 | # Add any paths that contain custom static files (such as style sheets) here,
15 | # relative to this directory. They are copied after the builtin static files,
16 | # so a file named "default.css" will overwrite the builtin "default.css".
17 | -html_static_path = ["_static"]
18 | +html_static_path = []
19 |
20 | # Add any extra paths that contain custom files (such as robots.txt or
21 | # .htaccess) here, relative to this directory. These files are copied
22 |
--------------------------------------------------------------------------------
/debian/patches/fiximport:
--------------------------------------------------------------------------------
1 | Author: Johannes 'josch' Schauer
2 | Description: add additional import statement as otherwise you'd get:
3 | Traceback (most recent call last):
4 | File "", line 1, in
5 | File "[...]/src/fitz/__init__.py", line 3, in
6 | from fitz.fitz import *
7 | ModuleNotFoundError: No module named 'fitz.fitz'
8 |
9 | --- a/fitz/__init__.py
10 | +++ b/fitz/__init__.py
11 | @@ -1,5 +1,6 @@
12 | from __future__ import absolute_import, print_function
13 | import sys
14 | +import fitz.fitz as fitz
15 | from fitz.fitz import *
16 |
17 | # define the supported colorspaces for convenience
18 |
--------------------------------------------------------------------------------
/debian/patches/libs:
--------------------------------------------------------------------------------
1 | Description: Link shared library with additional libs
2 |
3 | ---
4 | setup.py | 3 ++-
5 | 1 file changed, 2 insertions(+), 1 deletion(-)
6 |
7 | --- a/setup.py
8 | +++ b/setup.py
9 | @@ -25,7 +25,8 @@ if sys.platform.startswith("linux"):
10 | "mupdf",
11 | #'crypto', #openssl is required by mupdf on archlinux
12 | #'jbig2dec', 'openjp2', 'jpeg', 'freetype',
13 | - "mupdf-third",
14 | + #"mupdf-third",
15 | + "harfbuzz", "jbig2dec", "jpeg", "freetype", "png16", "openjp2", "mujs",
16 | ], # the libraries to link with
17 | )
18 | elif sys.platform.startswith(("darwin", "freebsd")):
19 |
--------------------------------------------------------------------------------
/debian/patches/series:
--------------------------------------------------------------------------------
1 | docs
2 | libs
3 | fiximport
4 |
--------------------------------------------------------------------------------
/debian/python3-fitz.lintian-overrides:
--------------------------------------------------------------------------------
1 | # all false positives
2 | python3-fitz: spelling-error-in-binary usr/lib/python3/dist-packages/fitz/_fitz.cpython-*.so SyLES Styles
3 | python3-fitz: spelling-error-in-binary usr/lib/python3/dist-packages/fitz/_fitz.cpython-*.so Yau You
4 | python3-fitz: spelling-error-in-binary usr/lib/python3/dist-packages/fitz/_fitz.cpython-*.so moR more
5 | python3-fitz: spelling-error-in-binary usr/lib/python3/dist-packages/fitz/_fitz.cpython-*.so pres press
6 |
--------------------------------------------------------------------------------
/debian/rules:
--------------------------------------------------------------------------------
1 | #!/usr/bin/make -f
2 |
3 | export DEB_BUILD_MAINT_OPTIONS = hardening=+all
4 | DPKG_EXPORT_BUILDFLAGS = 1
5 | include /usr/share/dpkg/buildflags.mk
6 |
7 | override_dh_gencontrol:
8 | dh_gencontrol -- -VBuilt-Using="$(shell dpkg-query -f '$${source:Package} (= $${source:Version}), ' -W libmupdf-dev)"
9 |
10 | %:
11 | dh $@ --buildsystem=pybuild --with python3
12 |
--------------------------------------------------------------------------------
/debian/salsa-ci.yml:
--------------------------------------------------------------------------------
1 | include:
2 | - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/salsa-ci.yml
3 | - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/pipeline-jobs.yml
4 |
--------------------------------------------------------------------------------
/debian/source/format:
--------------------------------------------------------------------------------
1 | 3.0 (quilt)
2 |
--------------------------------------------------------------------------------
/debian/tests/control:
--------------------------------------------------------------------------------
1 | Test-Command: env --chdir=/ python3 -c "import fitz"
2 | Restrictions: allow-stderr
3 | Depends: python3-fitz
4 | Features: test-name=python3-fitz
5 |
--------------------------------------------------------------------------------
/debian/watch:
--------------------------------------------------------------------------------
1 | version=4
2 | opts="filenamemangle=s%(?:.*?)?v?(\d[\d.]*)\.tar\.gz%PyMuPDF-$1.tar.gz%,dversionmangle=s/\+ds\d*$//,repacksuffix=+ds1" \
3 | https://github.com/pymupdf/PyMuPDF/tags \
4 | (?:.*?/)?v?(\d[\d.]*)\.tar\.gz
5 |
--------------------------------------------------------------------------------
/demo/pymupdf.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/demo/pymupdf.jpg
--------------------------------------------------------------------------------
/docs/PyMuPDF.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/PyMuPDF.ico
--------------------------------------------------------------------------------
/docs/algebra.rst:
--------------------------------------------------------------------------------
1 | .. _Algebra:
2 |
3 | Operator Algebra for Geometry Objects
4 | ======================================
5 |
6 | .. highlight:: python
7 |
8 | Instances of classes :ref:`Point`, :ref:`IRect`, :ref:`Rect` and :ref:`Matrix` are collectively also called "geometry" objects.
9 |
10 | They all are special cases of Python sequences, see :ref:`SequenceTypes` for more background.
11 |
12 | We have defined operators for these classes that allow dealing with them (almost) like ordinary numbers in terms of addition, subtraction, multiplication, division, and some others.
13 |
14 | This chapter is a synopsis of what is possible.
15 |
16 | General Remarks
17 | -----------------
18 | 1. Operators can be either **binary** (i.e. involving two objects) or **unary**.
19 |
20 | 2. The resulting type of **binary** operations is either a **new object of the left operand's class** or a bool.
21 |
22 | 3. The result of **unary** operations is either a **new object** of the same class, a bool or a float.
23 |
24 | 4. The binary operators *+, -, *, /* are defined for all classes. They *roughly* do what you would expect -- **except, that the second operand ...**
25 |
26 | - may always be a number which then performs the operation on every component of the first one,
27 | - may always be a numeric sequence of the same length (2, 4 or 6) -- we call such sequences :data:`point_like`, :data:`rect_like` or :data:`matrix_like`, respectively.
28 |
29 | 5. Rectangles support additional binary operations: **intersection** (operator *"&"*), **union** (operator *"|"*) and **containment** checking.
30 |
31 | 6. Binary operators fully support in-place operations, so expressions like *"a /= b"* are valid if b is numeric or "a_like".
32 |
33 |
34 | Unary Operations
35 | ------------------
36 |
37 | =========== ===================================================================
38 | Oper. Result
39 | =========== ===================================================================
40 | bool(OBJ) is false exactly if all components of OBJ are zero
41 | abs(OBJ) the rectangle area -- equal to norm(OBJ) for the other tyes
42 | norm(OBJ) square root of the component squares (Euclidean norm)
43 | +OBJ new copy of OBJ
44 | -OBJ new copy of OBJ with negated components
45 | ~m inverse of matrix "m", or the null matrix if not invertible
46 | =========== ===================================================================
47 |
48 |
49 | Binary Operations
50 | ------------------
51 | For every geometry object "a" and every number "b", the operations "a ° b" and "a °= b" are always defined for the operators *+, -, *, /*. The respective operation is simply executed for each component of "a". If the **second operand is not a number**, then the following is defined:
52 |
53 | ========= =======================================================================
54 | Oper. Result
55 | ========= =======================================================================
56 | a+b, a-b component-wise execution, "b" must be "a-like".
57 | a*m, a/m "a" can be a point, rectangle or matrix, but "m" must be
58 | :data:`matrix_like`. *"a/m"* is treated as *"a*~m"* (see note below
59 | for non-invertible matrices). If "a" is a **point** or a **rectangle**,
60 | then *"a.transform(m)"* is executed. If "a" is a matrix, then
61 | matrix concatenation takes place.
62 | a&b **intersection rectangle:** "a" must be a rectangle and
63 | "b" :data:`rect_like`. Delivers the **largest rectangle**
64 | contained in both operands.
65 | a|b **union rectangle:** "a" must be a rectangle, and "b" may be
66 | :data:`point_like` or :data:`rect_like`.
67 | Delivers the **smallest rectangle** containing both operands.
68 | b in a if "b" is a number, then *"b in tuple(a)"* is returned.
69 | If "b" is :data:`point_like` or :data:`rect_like`, then "a"
70 | must be a rectangle, and *"a.contains(b)"* is returned.
71 | a == b *True* if *bool(a-b)* is *False* ("b" may be "a-like").
72 | ========= =======================================================================
73 |
74 |
75 | .. note:: Please note an important difference to usual arithmetics:
76 |
77 | Matrix multiplication is **not commutative**, i.e. in general we have *m*n != n*m* for two matrices. Also, there are non-zero matrices which have no inverse, for example *m = Matrix(1, 0, 1, 0, 1, 0)*. If you try to divide by any of these you will receive a *ZeroDivisionError* exception using operator *"/"*, e.g. for *fitz.Identity / m*. But if you formulate *fitz.Identity * ~m*, the result will be *fitz.Matrix()* (the null matrix).
78 |
79 | Admittedly, this represents an inconsistency, and we are considering to remove it. For the time being, you can choose to avoid an exception and check whether ~m is the null matrix, or accept a potential *ZeroDivisionError* by using *fitz.Identity / m*.
80 |
81 |
82 | Some Examples
83 | --------------
84 |
85 | Manipulation with numbers
86 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
87 | For the usual arithmetic operations, numbers are always allowed as second operand. In addition, you can formulate *"x in OBJ"*, where x is a number. It is implemented as *"x in tuple(OBJ)"*::
88 |
89 | >>> fitz.Rect(1, 2, 3, 4) + 5
90 | fitz.Rect(6.0, 7.0, 8.0, 9.0)
91 | >>> 3 in fitz.Rect(1, 2, 3, 4)
92 | True
93 | >>>
94 |
95 | The following will create the upper left quarter of a document page rectangle::
96 |
97 | >>> page.rect
98 | Rect(0.0, 0.0, 595.0, 842.0)
99 | >>> page.rect / 2
100 | Rect(0.0, 0.0, 297.5, 421.0)
101 | >>>
102 |
103 | The following will deliver the **middle point of a line** connecting two points **p1** and **p2**::
104 |
105 | >>> p1 = fitz.Point(1, 2)
106 | >>> p2 = fitz.Point(4711, 3141)
107 | >>> mp = p1 + (p2 - p1) / 2
108 | >>> mp
109 | Point(2356.0, 1571.5)
110 | >>>
111 |
112 | Manipulation with "like" Objects
113 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
114 |
115 | The second operand of a binary operation can always be "like" the left operand. "Like" in this context means "a sequence of numbers of the same length". With the above examples::
116 |
117 | >>> p1 + p2
118 | Point(4712.0, 3143.0)
119 | >>> p1 + (4711, 3141)
120 | Point(4712.0, 3143.0)
121 | >>> p1 += (4711, 3141)
122 | >>> p1
123 | Point(4712.0, 3143.0)
124 | >>>
125 |
126 | To shift a rectangle for 5 pixels to the right, do this::
127 |
128 | >>> fitz.Rect(100, 100, 200, 200) + (5, 0, 5, 0) # add 5 to the x coordinates
129 | Rect(105.0, 100.0, 205.0, 200.0)
130 | >>>
131 |
132 | Points, rectangles and matrices can be *transformed* with matrices. In PyMuPDF, we treat this like a **"multiplication"** (or resp. **"division"**), where the second operand may be "like" a matrix. Division in this context means "multiplication with the inverted matrix"::
133 |
134 | >>> m = fitz.Matrix(1, 2, 3, 4, 5, 6)
135 | >>> n = fitz.Matrix(6, 5, 4, 3, 2, 1)
136 | >>> p = fitz.Point(1, 2)
137 | >>> p * m
138 | Point(12.0, 16.0)
139 | >>> p * (1, 2, 3, 4, 5, 6)
140 | Point(12.0, 16.0)
141 | >>> p / m
142 | Point(2.0, -2.0)
143 | >>> p / (1, 2, 3, 4, 5, 6)
144 | Point(2.0, -2.0)
145 | >>>
146 | >>> m * n # matrix multiplication
147 | Matrix(14.0, 11.0, 34.0, 27.0, 56.0, 44.0)
148 | >>> m / n # matrix division
149 | Matrix(2.5, -3.5, 3.5, -4.5, 5.5, -7.5)
150 | >>>
151 | >>> m / m # result is equal to the Identity matrix
152 | Matrix(1.0, 0.0, 0.0, 1.0, 0.0, 0.0)
153 | >>>
154 | >>> # look at this non-invertible matrix:
155 | >>> m = fitz.Matrix(1, 0, 1, 0, 1, 0)
156 | >>> ~m
157 | Matrix(0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
158 | >>> # we try dividing by it in two ways:
159 | >>> p = fitz.Point(1, 2)
160 | >>> p * ~m # this delivers point (0, 0):
161 | Point(0.0, 0.0)
162 | >>> p / m # but this is an exception:
163 | Traceback (most recent call last):
164 | File "", line 1, in
165 | p / m
166 | File "... /site-packages/fitz/fitz.py", line 869, in __truediv__
167 | raise ZeroDivisionError("matrix not invertible")
168 | ZeroDivisionError: matrix not invertible
169 | >>>
170 |
171 |
172 | As a specialty, rectangles support additional binary operations:
173 |
174 | * **intersection** -- the common area of rectangle-likes, operator *"&"*
175 | * **inclusion** -- enlarge to include a point-like or rect-like, operator *"|"*
176 | * **containment** check -- whether a point-like or rect-like is inside
177 |
178 | Here is an example for creating the smallest rectangle enclosing given points::
179 |
180 | >>> # first define some point-likes
181 | >>> points = []
182 | >>> for i in range(10):
183 | for j in range(10):
184 | points.append((i, j))
185 | >>>
186 | >>> # now create a rectangle containing all these 100 points
187 | >>> # start with an empty rectangle
188 | >>> r = fitz.Rect(points[0], points[0])
189 | >>> for p in points[1:]: # and include remaining points one by one
190 | r |= p
191 | >>> r # here is the to be expected result:
192 | Rect(0.0, 0.0, 9.0, 9.0)
193 | >>> (4, 5) in r # this point-like lies inside the rectangle
194 | True
195 | >>> # and this rect-like is also inside
196 | >>> (4, 4, 5, 5) in r
197 | True
198 | >>>
199 |
200 |
--------------------------------------------------------------------------------
/docs/app1.rst:
--------------------------------------------------------------------------------
1 | ===============================
2 | Appendix 1: Performance
3 | ===============================
4 |
5 | We have tried to get an impression on PyMuPDF's performance. While we know this is very hard and a fair comparison is almost impossible, we feel that we at least should provide some quantitative information to justify our bold comments on MuPDF's **top performance**.
6 |
7 | Following are three sections that deal with different aspects of performance:
8 |
9 | * document parsing
10 | * text extraction
11 | * image rendering
12 |
13 | In each section, the same fixed set of PDF files is being processed by a set of tools. The set of tools varies -- for reasons we will explain in the section.
14 |
15 | .. |fsizes| image:: images/img-filesizes.png
16 |
17 | Here is the list of files we are using. Each file name is accompanied by further information: **size** in bytes, number of **pages**, number of bookmarks (**toc** entries), number of **links**, **text** size as a percentage of file size, **KB** per page, PDF **version** and remarks. **text %** and **KB index** are indicators for whether a file is text or graphics oriented.
18 | |fsizes|
19 | E.g. *Adobe.pdf* and *PyMuPDF.pdf* are clearly text oriented, all other files contain many more images.
20 |
21 |
22 |
23 | Part 1: Parsing
24 | ~~~~~~~~~~~~~~~~
25 |
26 | How fast is a PDF file read and its content parsed for further processing? The sheer parsing performance cannot directly be compared, because batch utilities always execute a requested task completely, in one go, front to end. *pdfrw* too, has a *lazy* strategy for parsing, meaning it only parses those parts of a document that are required in any moment.
27 |
28 | To yet find an answer to the question, we therefore measure the time to copy a PDF file to an output file with each tool, and doing nothing else.
29 |
30 | **These were the tools**
31 |
32 | All tools are either platform independent, or at least can run both, on Windows and Unix / Linux (pdftk).
33 |
34 | **Poppler** is missing here, because it specifically is a Linux tool set, although we know there exist Windows ports (created with considerable effort apparently). Technically, it is a C/C++ library, for which a Python binding exists -- in so far somewhat comparable to PyMuPDF. But Poppler in contrast is tightly coupled to **Qt** and **Cairo**. We may still include it in future, when a more handy Windows installation is available. We have seen however some `analysis `_, that hints at a much lower performance than MuPDF. Our comparison of text extraction speeds also show a much lower performance of Poppler's PDF code base **Xpdf**.
35 |
36 | Image rendering of MuPDF also is about three times faster than the one of Xpdf when comparing the command line tools *mudraw* of MuPDF and *pdftopng* of Xpdf -- see part 3 of this chapter.
37 |
38 | ========= ==========================================================================
39 | Tool Description
40 | ========= ==========================================================================
41 | PyMuPDF tool of this manual, appearing as "fitz" in reports
42 | pdfrw a pure Python tool, is being used by rst2pdf, has interface to ReportLab
43 | PyPDF2 a pure Python tool with a very complete function set
44 | pdftk a command line utility with numerous functions
45 | ========= ==========================================================================
46 |
47 | This is how each of the tools was used:
48 |
49 | **PyMuPDF**:
50 | ::
51 | doc = fitz.open("input.pdf")
52 | doc.save("output.pdf")
53 |
54 | **pdfrw**:
55 | ::
56 | doc = PdfReader("input.pdf")
57 | writer = PdfWriter()
58 | writer.trailer = doc
59 | writer.write("output.pdf")
60 |
61 | **PyPDF2**:
62 | ::
63 | pdfmerge = PyPDF2.PdfFileMerger()
64 | pdfmerge.append("input.pdf")
65 | pdfmerge.write("output.pdf")
66 | pdfmerge.close()
67 |
68 | **pdftk**:
69 | ::
70 | pdftk input.pdf output output.pdf
71 |
72 |
73 | **Observations**
74 |
75 | .. |cpyspeed1| image:: images/img-copy-speed-1.png
76 | .. |cpyspeed2| image:: images/img-copy-speed-2.png
77 |
78 | These are our run time findings (in **seconds**, please note the European number convention: meaning of decimal point and comma is reversed):
79 |
80 | |cpyspeed1|
81 |
82 | If we leave out the Adobe manual, this table looks like
83 |
84 | |cpyspeed2|
85 |
86 | PyMuPDF is by far the fastest: on average 4.5 times faster than the second best (the pure Python tool pdfrw, **chapeau pdfrw!**), and almost 20 times faster than the command line tool pdftk.
87 |
88 | Where PyMuPDF only requires less than 13 seconds to process all files, pdftk affords itself almost 4 minutes.
89 |
90 | By far the slowest tool is PyPDF2 -- it is more than 66 times slower than PyMuPDF and 15 times slower than pdfrw! The main reason for PyPDF2's bad look comes from the Adobe manual. It obviously is slowed down by the linear file structure and the immense amount of bookmarks of this file. If we take out this special case, then PyPDF2 is only 21.5 times slower than PyMuPDF, 4.5 times slower than pdfrw and 1.2 times slower than pdftk.
91 |
92 | If we look at the output PDFs, there is one surprise:
93 |
94 | Each tool created a PDF of similar size as the original. Apart from the Adobe case, PyMuPDF always created the smallest output.
95 |
96 | Adobe's manual is an exception: The pure Python tools pdfrw and PyPDF2 **reduced** its size by more than 20% (and yielded a document which is no longer linearized)!
97 |
98 | PyMuPDF and pdftk in contrast **drastically increased** the size by 40% to about 50 MB (also no longer linearized).
99 |
100 | So far, we have no explanation of what is happening here.
101 |
102 |
103 | Part 2: Text Extraction
104 | ~~~~~~~~~~~~~~~~~~~~~~~~
105 | We also have compared text extraction speed with other tools.
106 |
107 | The following table shows a run time comparison. PyMuPDF's methods appear as "fitz (TEXT)" and "fitz (JSON)" respectively. The tool *pdftotext.exe* of the `Xpdf `_ toolset appears as "xpdf".
108 |
109 | * **extractText():** basic text extraction without layout re-arrangement (using *GetText(..., output = "text")*)
110 | * **pdftotext:** a command line tool of the **Xpdf** toolset (which also is the basis of `Poppler's library `_)
111 | * **extractJSON():** text extraction with layout information (using *GetText(..., output = "json")*)
112 | * **pdfminer:** a pure Python PDF tool specialized on text extraction tasks
113 |
114 | All tools have been used with their most basic, fanciless functionality -- no layout re-arrangements, etc.
115 |
116 | For demonstration purposes, we have included a version of *GetText(doc, output = "json")*, that also re-arranges the output according to occurrence on the page.
117 |
118 | .. |textperf| image:: images/img-textperformance.png
119 |
120 | Here are the results using the same test files as above (again: decimal point and comma reversed):
121 |
122 | |textperf|
123 |
124 | Again, (Py-) MuPDF is the fastest around. It is 2.3 to 2.6 times faster than xpdf.
125 |
126 | *pdfminer*, as a pure Python solution, of course is comparatively slow: MuPDF is 50 to 60 times faster and xpdf is 23 times faster. These observations in order of magnitude coincide with the statements on this `web site `_.
127 |
128 | Part 3: Image Rendering
129 | ~~~~~~~~~~~~~~~~~~~~~~~~
130 | We have tested rendering speed of MuPDF against the *pdftopng.exe*, a command lind tool of the **Xpdf** toolset (the PDF code basis of **Poppler**).
131 |
132 | **MuPDF invocation using a resolution of 150 pixels (Xpdf default):**
133 | ::
134 | mutool draw -o t%d.png -r 150 file.pdf
135 |
136 | **PyMuPDF invocation:**
137 | ::
138 | zoom = 150.0 / 72.0
139 | mat = fitz.Matrix(zoom, zoom)
140 | def ProcessFile(datei):
141 | print "processing:", datei
142 | doc=fitz.open(datei)
143 | for p in fitz.Pages(doc):
144 | pix = p.getPixmap(matrix=mat, alpha = False)
145 | pix.writePNG("t-%s.png" % p.number)
146 | pix = None
147 | doc.close()
148 | return
149 |
150 | **Xpdf invocation:**
151 | ::
152 | pdftopng.exe file.pdf ./
153 |
154 | .. |renderspeed| image:: images/img-render-speed.png
155 |
156 | The resulting runtimes can be found here (again: meaning of decimal point and comma reversed):
157 |
158 | |renderspeed|
159 |
160 | * MuPDF and PyMuPDF are both about 3 times faster than Xpdf.
161 |
162 | * The 2% speed difference between MuPDF (a utility written in C) and PyMuPDF is the Python overhead.
163 |
--------------------------------------------------------------------------------
/docs/app3.rst:
--------------------------------------------------------------------------------
1 | .. _Appendix 3:
2 |
3 | ================================================
4 | Appendix 3: Considerations on Embedded Files
5 | ================================================
6 | This chapter provides some background on embedded files support in PyMuPDF.
7 |
8 | General
9 | ----------
10 | Starting with version 1.4, PDF supports embedding arbitrary files as part ("Embedded File Streams") of a PDF document file (see chapter 3.10.3, pp. 184 of the :ref:`AdobeManual`).
11 |
12 | In many aspects, this is comparable to concepts also found in ZIP files or the OLE technique in MS Windows. PDF embedded files do, however, *not* support directory structures as does the ZIP format. An embedded file can in turn contain embedded files itself.
13 |
14 | Advantages of this concept are that embedded files are under the PDF umbrella, benefitting from its permissions / password protection and integrity aspects: all data, which a PDF may reference or even may be dependent on, can be bundled into it and so form a single, consistent unit of information.
15 |
16 | In addition to embedded files, PDF 1.7 adds *collections* to its support range. This is an advanced way of storing and presenting meta information (i.e. arbitrary and extensible properties) of embedded files.
17 |
18 | MuPDF Support
19 | --------------
20 | After adding initial support for collections (portfolios) and */EmbeddedFiles* in MuPDF version 1.11, this support was dropped again in version 1.15.
21 |
22 | As a consequence, the cli utility *mutool* no longer offers access to embedded files.
23 |
24 | PyMuPDF -- having implemented an */EmbeddedFiles* API in response in its version 1.11.0 -- was therefore forced to change gears starting with its version 1.16.0 (we never published a MuPDF v1.15.x compatible PyMuPDF).
25 |
26 | We are now maintaining our own code basis supporting embedded files. This code makes use of basic MuPDF dictionary and array functions only.
27 |
28 | PyMuPDF Support
29 | ------------------
30 | We continue to support the full old API with respect to embedded files -- with only minor, cosmetic changes.
31 |
32 | There even also is a new function, which delivers a list of all names under which embedded data are resgistered in a PDF, :meth:`Document.embeddedFileNames`.
33 |
--------------------------------------------------------------------------------
/docs/classes.rst:
--------------------------------------------------------------------------------
1 | ============
2 | Classes
3 | ============
4 |
5 | .. toctree::
6 | :maxdepth: 2
7 |
8 | annot
9 | colorspace
10 | displaylist
11 | document
12 | font
13 | identity
14 | irect
15 | link
16 | linkdest
17 | matrix
18 | outline
19 | page
20 | pixmap
21 | point
22 | quad
23 | rect
24 | shape
25 | textpage
26 | textwriter
27 | tools
28 | widget
29 |
--------------------------------------------------------------------------------
/docs/colors.rst:
--------------------------------------------------------------------------------
1 | .. _ColorDatabase:
2 |
3 | ================
4 | Color Database
5 | ================
6 | Since the introduction of methods involving colors (like :meth:`Page.drawCircle`), a requirement may be to have access to predefined colors.
7 |
8 | The fabulous GUI package `wxPython `_ has a database of over 540 predefined RGB colors, which are given more or less memorizable names. Among them are not only standard names like "green" or "blue", but also "turquoise", "skyblue", and 100 (not only 50 ...) shades of "gray", etc.
9 |
10 | We have taken the liberty to copy this database (a list of tuples) modified into PyMuPDF and make its colors available as PDF compatible float triples: for wxPython's *("WHITE", 255, 255, 255)* we return *(1, 1, 1)*, which can be directly used in *color* and *fill* parameters. We also accept any mixed case of "wHiTe" to find a color.
11 |
12 | Function *getColor()*
13 | ------------------------
14 | As the color database may not be needed very often, one additional import statement seems acceptable to get access to it::
15 |
16 | >>> # "getColor" is the only method you really need
17 | >>> from fitz.utils import getColor
18 | >>> getColor("aliceblue")
19 | (0.9411764705882353, 0.9725490196078431, 1.0)
20 | >>> #
21 | >>> # to get a list of all existing names
22 | >>> from fitz.utils import getColorList
23 | >>> cl = getColorList()
24 | >>> cl
25 | ['ALICEBLUE', 'ANTIQUEWHITE', 'ANTIQUEWHITE1', 'ANTIQUEWHITE2', 'ANTIQUEWHITE3',
26 | 'ANTIQUEWHITE4', 'AQUAMARINE', 'AQUAMARINE1'] ...
27 | >>> #
28 | >>> # to see the full integer color coding
29 | >>> from fitz.utils import getColorInfoList
30 | >>> il = getColorInfoList()
31 | >>> il
32 | [('ALICEBLUE', 240, 248, 255), ('ANTIQUEWHITE', 250, 235, 215),
33 | ('ANTIQUEWHITE1', 255, 239, 219), ('ANTIQUEWHITE2', 238, 223, 204),
34 | ('ANTIQUEWHITE3', 205, 192, 176), ('ANTIQUEWHITE4', 139, 131, 120),
35 | ('AQUAMARINE', 127, 255, 212), ('AQUAMARINE1', 127, 255, 212)] ...
36 |
37 |
38 | Printing the Color Database
39 | ----------------------------
40 | If you want to actually see how the many available colors look like, use scripts `colordbRGB.py `_ or `colordbHSV.py `_ in the examples directory. They create PDFs (already existing in the same directory) with all these colors. Their only difference is sorting order: one takes the RGB values, the other one the Hue-Saturation-Values as sort criteria.
41 | This is a screen print of what these files look like.
42 |
43 | .. image:: images/img-colordb.png
44 |
--------------------------------------------------------------------------------
/docs/colorspace.rst:
--------------------------------------------------------------------------------
1 | .. _Colorspace:
2 |
3 | ================
4 | Colorspace
5 | ================
6 |
7 | Represents the color space of a :ref:`Pixmap`.
8 |
9 |
10 | **Class API**
11 |
12 | .. class:: Colorspace
13 |
14 | .. method:: __init__(self, n)
15 |
16 | Constructor
17 |
18 | :arg int n: A number identifying the colorspace. Possible values are :data:`CS_RGB`, :data:`CS_GRAY` and :data:`CS_CMYK`.
19 |
20 | .. attribute:: name
21 |
22 | The name identifying the colorspace. Example: *fitz.csCMYK.name = 'DeviceCMYK'*.
23 |
24 | :type: str
25 |
26 | .. attribute:: n
27 |
28 | The number of bytes required to define the color of one pixel. Example: *fitz.csCMYK.n == 4*.
29 |
30 | :type: int
31 |
32 |
33 | **Predefined Colorspaces**
34 |
35 | For saving some typing effort, there exist predefined colorspace objects for the three available cases.
36 |
37 | * :data:`csRGB` = *fitz.Colorspace(fitz.CS_RGB)*
38 | * :data:`csGRAY` = *fitz.Colorspace(fitz.CS_GRAY)*
39 | * :data:`csCMYK` = *fitz.Colorspace(fitz.CS_CMYK)*
40 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | import sys
4 | import os
5 | import sphinx_rtd_theme
6 |
7 | # If extensions (or modules to document with autodoc) are in another directory,
8 | # add these directories to sys.path here. If the directory is relative to the
9 | # documentation root, use os.path.abspath to make it absolute, like shown here.
10 | # sys.path.insert(0, os.path.abspath('.'))
11 |
12 | # -- General configuration ------------------------------------------------
13 |
14 | # If your documentation needs a minimal Sphinx version, state it here.
15 | # needs_sphinx = "3.1"
16 |
17 | # Add any Sphinx extension module names here, as strings. They can be
18 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
19 | # ones.
20 | extensions = [
21 | "sphinx.ext.autodoc",
22 | # "sphinx.ext.todo",
23 | "sphinx.ext.coverage",
24 | "sphinx.ext.ifconfig",
25 | # "sphinx.ext.imgmath",
26 | ]
27 |
28 | # Add any paths that contain templates here, relative to this directory.
29 | templates_path = ["_templates"]
30 |
31 | # The suffix of source filenames.
32 | # source_suffix = ".rst"
33 |
34 | # The encoding of source files.
35 | # source_encoding = 'utf-8-sig'
36 |
37 | # The master toctree document.
38 | master_doc = "index"
39 |
40 | # General information about the project.
41 | project = "PyMuPDF"
42 | copyright = "2015-2020, Jorj X. McKie"
43 |
44 | # The version info for the project you're documenting, acts as replacement for
45 | # |version| and |release|, also used in various other places throughout the
46 | # built documents.
47 | #
48 | # The full version, including alpha/beta/rc tags.
49 | release = "1.17.4"
50 |
51 | # The short X.Y version
52 | version = release
53 |
54 | # The language for content autogenerated by Sphinx. Refer to documentation
55 | # for a list of supported languages.
56 | # language = None
57 |
58 | # There are two options for replacing |today|: either, you set today to some
59 | # non-false value, then it is used:
60 | # today = ''
61 | # Else, today_fmt is used as the format for a strftime call.
62 | # today_fmt = '%B %d, %Y'
63 |
64 | # List of patterns, relative to source directory, that match files and
65 | # directories to ignore when looking for source files.
66 | exclude_patterns = ["_build"]
67 |
68 | # The reST default role (used for this markup: `text`) to use for all
69 | # documents.
70 | default_role = None
71 |
72 | # If true, '()' will be appended to :func: etc. cross-reference text.
73 | add_function_parentheses = True
74 |
75 | # If true, the current module name will be prepended to all description
76 | # unit titles (such as .. function::).
77 | add_module_names = True
78 |
79 | # If true, sectionauthor and moduleauthor directives will be shown in the
80 | # output. They are ignored by default.
81 | show_authors = False
82 |
83 | # The name of the Pygments (syntax highlighting) style to use.
84 | pygments_style = "sphinx"
85 |
86 | # A list of ignored prefixes for module index sorting.
87 | modindex_common_prefix = []
88 |
89 | # If true, keep warnings as "system message" paragraphs in the built documents.
90 | keep_warnings = False
91 |
92 |
93 | # -- Options for HTML output ----------------------------------------------
94 |
95 | # The theme to use for HTML and HTML Help pages. See the documentation for
96 | # a list of builtin themes.
97 | # html_theme = "agogo"
98 | # html_theme = "sphinxdoc"
99 | # html_theme = "python_docs_theme"
100 | html_theme = "sphinx_rtd_theme"
101 | # html_theme = "classic"
102 |
103 | # Theme options are theme-specific and customize the look and feel of a theme
104 | # further. For a list of options available for each theme, see the
105 | # documentation.
106 | html_theme_options = {
107 | # "root_name": "",
108 | # "root_url": "",
109 | # "root_icon": "pymupdf.ico",
110 | # "sidebarbgcolor": "gray",
111 | }
112 |
113 | # Add any paths that contain custom themes here, relative to this directory.
114 | # html_theme_path = []
115 | # html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
116 |
117 | # The name for this set of Sphinx documents. If None, it defaults to
118 | # " v documentation".
119 | # html_title = None
120 |
121 | # A shorter title for the navigation bar. Default is the same as html_title.
122 | # html_short_title = None
123 |
124 | # The name of an image file (relative to this directory) to place at the top
125 | # of the sidebar.
126 | # html_logo = "images/img-pymupdf.jpg"
127 |
128 | # The name of an image file (within the static path) to use as favicon of the
129 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
130 | # pixels large.
131 | html_favicon = "Pymupdf.ico"
132 |
133 | # Add any paths that contain custom static files (such as style sheets) here,
134 | # relative to this directory. They are copied after the builtin static files,
135 | # so a file named "default.css" will overwrite the builtin "default.css".
136 | html_static_path = ["_static"]
137 |
138 | # Add any extra paths that contain custom files (such as robots.txt or
139 | # .htaccess) here, relative to this directory. These files are copied
140 | # directly to the root of the documentation.
141 | # html_extra_path = []
142 |
143 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
144 | # using the given strftime format.
145 | html_last_updated_fmt = "%d. %b %Y"
146 |
147 | # If true, SmartyPants will be used to convert quotes and dashes to
148 | # typographically correct entities.
149 | # html_use_smartypants = False
150 |
151 | # Custom sidebar templates, maps document names to template names.
152 | # html_sidebars = {}
153 |
154 | # Additional templates that should be rendered to pages, maps page names to
155 | # template names.
156 | html_additional_pages = {}
157 |
158 | # If false, no module index is generated.
159 | html_domain_indices = True
160 |
161 | # If false, no index is generated.
162 | html_use_index = True
163 |
164 | # If true, the index is split into individual pages for each letter.
165 | html_split_index = True
166 |
167 | # If true, links to the reST sources are added to the pages.
168 | html_show_sourcelink = True
169 | html_sourcelink_suffix = ".rst"
170 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
171 | html_show_sphinx = True
172 |
173 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
174 | html_show_copyright = True
175 |
176 | # If true, an OpenSearch description file will be output, and all pages will
177 | # contain a tag referring to it. The value of this option must be the
178 | # base URL from which the finished HTML is served.
179 | # html_use_opensearch = "https://pymupdf.readthedocs.io/en/latest"
180 |
181 | # This is the file name suffix for HTML files (e.g. ".xhtml").
182 | # html_file_suffix = ".html"
183 |
184 | # Output file base name for HTML help builder.
185 | htmlhelp_basename = "PyMuPDF"
186 |
187 |
188 | # -- Options for LaTeX output ---------------------------------------------
189 | latex_elements = {
190 | # "fontpkg": r"\usepackage[sfdefault]{ClearSans} \usepackage[T1]{fontenc}"
191 | }
192 | # Grouping the document tree into LaTeX files. List of tuples
193 | # (source start file, target name, title,
194 | # author, documentclass [howto, manual, or own class]).
195 | latex_documents = [
196 | ("index", "PyMuPDF.tex", u"PyMuPDF Documentation", u"Jorj X. McKie", "manual")
197 | ]
198 | # The name of an image file (relative to this directory) to place at the top of
199 | # the title page.
200 | latex_logo = "images/img-pymupdf.jpg"
201 |
202 | # For "manual" documents, if this is true, then toplevel headings are parts,
203 | # not chapters.
204 | # latex_use_parts = False
205 |
206 | # If true, show page references after internal links.
207 | latex_show_pagerefs = False
208 |
209 | # If true, show URL addresses after external links.
210 | # latex_show_urls = True
211 | # latex_use_xindy = True
212 | # Documents to append as an appendix to all manuals.
213 | # latex_appendices = []
214 |
215 | # If false, no module index is generated.
216 | latex_domain_indices = True
217 |
218 | # -- Options for PDF output --------------------------------------------------
219 | # Grouping the document tree into PDF files. List of tuples
220 | # (source start file, target name, title, author).
221 |
222 | pdf_documents = [("index", "PyMuPDF", "PyMuPDF Manual", "Jorj McKie")]
223 |
224 | # A comma-separated list of custom stylesheets. Example:
225 | pdf_stylesheets = ["sphinx", "bahnschrift"]
226 |
227 | # Create a compressed PDF
228 | pdf_compressed = True
229 |
230 | # A colon-separated list of folders to search for fonts. Example:
231 | # pdf_font_path=['/usr/share/fonts', '/usr/share/texmf-dist/fonts/']
232 |
233 | # Language to be used for hyphenation support
234 | pdf_language = "en_US"
235 |
236 | # If false, no index is generated.
237 | pdf_use_index = True
238 |
239 | # If false, no modindex is generated.
240 | pdf_use_modindex = True
241 |
242 | # If false, no coverpage is generated.
243 | pdf_use_coverpage = True
244 |
245 | pdf_break_level = 2
246 |
247 | pdf_verbosity = 0
248 | pdf_invariant = True
249 |
--------------------------------------------------------------------------------
/docs/coop_low.rst:
--------------------------------------------------------------------------------
1 |
2 | .. _cooperation:
3 |
4 | ===============================================================
5 | Working together: DisplayList and TextPage
6 | ===============================================================
7 | Here are some instructions on how to use these classes together.
8 |
9 | In some situations, performance improvements may be achievable, when you fall back to the detail level explained here.
10 |
11 | Create a DisplayList
12 | ---------------------
13 | A :ref:`DisplayList` represents an interpreted document page. Methods for pixmap creation, text extraction and text search are -- behind the curtain -- all using the page's display list to perform their tasks. If a page must be rendered several times (e.g. because of changed zoom levels), or if text search and text extraction should both be performed, overhead can be saved, if the display list is created only once and then used for all other tasks.
14 |
15 | >>> dl = page.getDisplayList() # create the display list
16 |
17 | You can also create display lists for many pages "on stack" (in a list), may be during document open, during idling times, or you store it when a page is visited for the first time (e.g. in GUI scripts).
18 |
19 | Note, that for everything what follows, only the display list is needed -- the corresponding :ref:`Page` object could have been deleted.
20 |
21 | Generate Pixmap
22 | ------------------
23 | The following creates a Pixmap from a :ref:`DisplayList`. Parameters are the same as for :meth:`Page.getPixmap`.
24 |
25 | >>> pix = dl.getPixmap() # create the page's pixmap
26 |
27 | The execution time of this statement may be up to 50% shorter than that of :meth:`Page.getPixMap`.
28 |
29 | Perform Text Search
30 | ---------------------
31 | With the display list from above, we can also search for text.
32 |
33 | For this we need to create a :ref:`TextPage`.
34 |
35 | >>> tp = dl.getTextPage() # display list from above
36 | >>> rlist = tp.search("needle") # look up "needle" locations
37 | >>> for r in rlist: # work with the found locations, e.g.
38 | pix.invertIRect(r.irect) # invert colors in the rectangles
39 |
40 | Extract Text
41 | ----------------
42 | With the same :ref:`TextPage` object from above, we can now immediately use any or all of the 5 text extraction methods.
43 |
44 | .. note:: Above, we have created our text page without argument. This leads to a default argument of 3 (ligatures and white-space are preserved), IAW images will **not** be extracted -- see below.
45 |
46 | >>> txt = tp.extractText() # plain text format
47 | >>> json = tp.extractJSON() # json format
48 | >>> html = tp.extractHTML() # HTML format
49 | >>> xml = tp.extractXML() # XML format
50 | >>> xml = tp.extractXHTML() # XHTML format
51 |
52 | Further Performance improvements
53 | ---------------------------------
54 | Pixmap
55 | ~~~~~~~
56 | As explained in the :ref:`Page` chapter:
57 |
58 | If you do not need transparency set *alpha = 0* when creating pixmaps. This will save 25% memory (if RGB, the most common case) and possibly 5% execution time (depending on the GUI software).
59 |
60 | TextPage
61 | ~~~~~~~~~
62 | If you do not need images extracted alongside the text of a page, you can set the following option:
63 |
64 | >>> flags = fitz.TEXT_PRESERVE_LIGATURES | fitz.TEXT_PRESERVE_WHITESPACE
65 | >>> tp = dl.getTextPage(flags)
66 |
67 | This will save ca. 25% overall execution time for the HTML, XHTML and JSON text extractions and **hugely** reduce the amount of storage (both, memory and disk space) if the document is graphics oriented.
68 |
69 | If you however do need images, use a value of 7 for flags:
70 |
71 | >>> flags = fitz.TEXT_PRESERVE_LIGATURES | fitz.TEXT_PRESERVE_WHITESPACE | fitz.TEXT_PRESERVE_IMAGES
72 |
--------------------------------------------------------------------------------
/docs/device.rst:
--------------------------------------------------------------------------------
1 | .. _Device:
2 |
3 | ================
4 | Device
5 | ================
6 |
7 | The different format handlers (pdf, xps, etc.) interpret pages to a "device". Devices are the basis for everything that can be done with a page: rendering, text extraction and searching. The device type is determined by the selected construction method.
8 |
9 | **Class API**
10 |
11 | .. class:: Device
12 |
13 | .. method:: __init__(self, object, clip)
14 |
15 | Constructor for either a pixel map or a display list device.
16 |
17 | :arg object: either a *Pixmap* or a *DisplayList*.
18 | :type object: :ref:`Pixmap` or :ref:`DisplayList`
19 |
20 | :arg clip: An optional `IRect` for *Pixmap* devices to restrict rendering to a certain area of the page. If the complete page is required, specify *None*. For display list devices, this parameter must be omitted.
21 | :type clip: :ref:`IRect`
22 |
23 | .. method:: __init__(self, textpage, flags=0)
24 |
25 | Constructor for a text page device.
26 |
27 | :arg textpage: *TextPage* object
28 | :type textpage: :ref:`TextPage`
29 |
30 | :arg int flags: control the way how text is parsed into the text page. Currently 3 options can be coded into this parameter, see :ref:`TextPreserve`. To set these options use something like *flags=0 | TEXT_PRESERVE_LIGATURES | ...*.
31 |
32 | .. note:: In higher level code (:meth:`Page.getText`, :meth:`Document.getPageText`), the following decisions for creating text devices have been implemented: (1) *TEXT_PRESERVE_LIGATURES* and *TEXT_PRESERVE_WHITESPACES* are always set, (2) *TEXT_PRESERVE_IMAGES* is set for JSON and HTML, otherwise off.
33 |
34 |
--------------------------------------------------------------------------------
/docs/displaylist.rst:
--------------------------------------------------------------------------------
1 | .. _DisplayList:
2 |
3 | ================
4 | DisplayList
5 | ================
6 |
7 | DisplayList is a list containing drawing commands (text, images, etc.). The intent is two-fold:
8 |
9 | 1. as a caching-mechanism to reduce parsing of a page
10 | 2. as a data structure in multi-threading setups, where one thread parses the page and another one renders pages. This aspect is currently not supported by PyMuPDF.
11 |
12 | A display list is populated with objects from a page, usually by executing :meth:`Page.getDisplayList`. There also exists an independent constructor.
13 |
14 | "Replay" the list (once or many times) by invoking one of its methods :meth:`~DisplayList.run`, :meth:`~DisplayList.getPixmap` or :meth:`~DisplayList.getTextPage`.
15 |
16 |
17 | ================================= ============================================
18 | **Method** **Short Description**
19 | ================================= ============================================
20 | :meth:`~DisplayList.run` Run a display list through a device.
21 | :meth:`~DisplayList.getPixmap` generate a pixmap
22 | :meth:`~DisplayList.getTextPage` generate a text page
23 | :attr:`~DisplayList.rect` mediabox of the display list
24 | ================================= ============================================
25 |
26 |
27 | **Class API**
28 |
29 | .. class:: DisplayList
30 |
31 | .. method:: __init__(self, mediabox)
32 |
33 | Create a new display list.
34 |
35 | :arg mediabox: The page's rectangle.
36 | :type mediabox: :ref:`Rect`
37 |
38 | :rtype: *DisplayList*
39 |
40 | .. method:: run(device, matrix, area)
41 |
42 | Run the display list through a device. The device will populate the display list with its "commands" (i.e. text extraction or image creation). The display list can later be used to "read" a page many times without having to re-interpret it from the document file.
43 |
44 | You will most probably instead use one of the specialized run methods below -- :meth:`getPixmap` or :meth:`getTextPage`.
45 |
46 | :arg device: Device
47 | :type device: :ref:`Device`
48 |
49 | :arg matrix: Transformation matrix to apply to the display list contents.
50 | :type matrix: :ref:`Matrix`
51 |
52 | :arg area: Only the part visible within this area will be considered when the list is run through the device.
53 | :type area: :ref:`Rect`
54 |
55 | .. index::
56 | pair: matrix; getPixmap
57 | pair: colorspace; getPixmap
58 | pair: clip; getPixmap
59 | pair: alpha; getPixmap
60 |
61 | .. method:: getPixmap(matrix=fitz.Identity, colorspace=fitz.csRGB, alpha=0, clip=None)
62 |
63 | Run the display list through a draw device and return a pixmap.
64 |
65 | :arg matrix: matrix to use. Default is the identity matrix.
66 | :type matrix: :ref:`Matrix`
67 |
68 | :arg colorspace: the desired colorspace. Default is RGB.
69 | :type colorspace: :ref:`Colorspace`
70 |
71 | :arg int alpha: determine whether or not (0, default) to include a transparency channel.
72 |
73 | :arg clip: an area of the full mediabox to which the pixmap should be restricted.
74 | :type clip: :ref:`IRect` or :ref:`Rect`
75 |
76 | :rtype: :ref:`Pixmap`
77 | :returns: pixmap of the display list.
78 |
79 | .. method:: getTextPage(flags)
80 |
81 | Run the display list through a text device and return a text page.
82 |
83 | :arg int flags: control which information is parsed into a text page. Default value in PyMuPDF is **3 = TEXT_PRESERVE_LIGATURES | TEXT_PRESERVE_WHITESPACE**, i.e. ligatures are **passed through**, white spaces are **passed through** (not translated to spaces), and images are **not included**. See :ref:`TextPreserve`.
84 |
85 | :rtype: :ref:`TextPage`
86 | :returns: text page of the display list.
87 |
88 | .. attribute:: rect
89 |
90 | Contains the display list's mediabox. This will equal the page's rectangle if it was created via :meth:`Page.getDisplayList`.
91 |
92 | :type: :ref:`Rect`
93 |
--------------------------------------------------------------------------------
/docs/font.rst:
--------------------------------------------------------------------------------
1 | .. _Font:
2 |
3 | ================
4 | Font
5 | ================
6 |
7 | *(New in v1.16.18)* This class represents a font as defined in MuPDF (*fz_font_s* structure). It is required for the new class :ref:`TextWriter` and the new :meth:`Page.writeText`. Currently, it has no connection to how fonts are used in methods ``insertText`` or insertTextbox``, respectively.
8 |
9 | A Font object also contains useful general information, like the font bbox, the number of defined glyphs, glyph names or the bbox of a single glyph.
10 |
11 | **Class API**
12 |
13 | .. class:: Font
14 |
15 | .. method:: __init__(self, fontname=None, fontfile=None,
16 | fontbuffer=None, script=0, language=None, ordering=-1, is_bold=0,
17 | is_italic=0, is_serif=0)
18 |
19 | Font constructor. The large number of parameters are used to locate font, which most closely resembles the requirements. Not all parameters are ever required -- see the below pseudo code explaining the logic how the parameters are evaluated.
20 |
21 | :arg str fontname: one of the :ref:`Base-14-Fonts` or CJK fontnames. Also possible are a select few of other names like (watch the correct spelling): "Arial", "Times", "Times Roman".
22 |
23 | *(Changed in v1.17.4)*
24 |
25 | If you have installed `pymupdf-fonts `_, you can also use the following new "reserved" fontnames: "figo", "figbo", "figit", "figbi", "fimo", and "fimbo". This will provide one of the "FiraGo" or resp. "FiraMono" fonts, created by Mozilla.org.
26 |
27 | :arg str filename: the filename of a fontfile somewhere on your system [#f1]_.
28 | :arg bytes,bytearray,io.BytesIO fontbuffer: a fontfile loaded in memory [#f1]_.
29 | :arg in script: the number of a UCDN script. Currently supported in PyMuPDF are numbers 24, and 32 through 35.
30 | :arg str language: one of the values "zh-Hant" (traditional Chinese), "zh-Hans" (simplified Chinese), "ja" (Japanese) and "ko" (Korean). Otherwise, all ISO 639 codes from the subsets 1, 2, 3 and 5 are also possible, but are currently documentary only.
31 | :arg int ordering: an alternative selector for one of the CJK fonts.
32 | :arg bool is_bold: look for a bold font.
33 | :arg bool is_italic: look for an italic font.
34 | :arg bool is_serif: look for a serifed font.
35 |
36 | :returns: a MuPDF font if successful. This is the overall logic, how an appropriate font is located::
37 |
38 | if fontfile:
39 | create font from it ignoring other arguments
40 | if not successful -> exception
41 | if fonbuffer:
42 | create font from it ignoring other arguments
43 | if not successful -> exception
44 | if ordering >= 0:
45 | load **"universal"** font ignoring other parameters
46 | # this will always be successful
47 | if fontname:
48 | create a Base14 font, or resp. **"universal"** font, ignoring other parameters
49 | # note: values "Arial", "Times", "Times Roman" are also possible
50 | if not successful -> exception
51 | Finally try to load a "NOTO" font using *script* and *language* parameters.
52 | if not successful:
53 | look for fallback font
54 |
55 | .. note::
56 |
57 | With the usual abbreviations "helv", "tiro", etc., you will create fonts with the expected names "Helvetica", "Times-Roman" and so on.
58 |
59 | Using *ordering >= 0*, or fontnames starting with "china", "japan" or "korea" will always create the same **"universal"** font **"Droid Sans Fallback Regular"**. This font supports **all CJK and all Latin characters**.
60 |
61 | Actually, you would rarely ever need another font than **"Droid Sans Fallback Regular"**. **Except** that this font file is relatively large and adds about 1.65 MB (compressed) to your PDF file size. If you do not need CJK support, stick with specifying "helv", "tiro" etc., and you will get away with about 35 KB compressed.
62 |
63 | If you **know** you have a mixture of CJK and Latin text, consider just using ``Font(ordering=0)`` because this supports everything and also significantly (by a factor of two to three) speeds up execution: MuPDF will always find any character in this single font and need not check fallbacks.
64 |
65 | But if you do specify a Base-14 fontname, you will still be able to also write CJK characters! MuPDF automatically detects this situation and silently falls back to the universal font (which will then of course also be embedded in your PDF).
66 |
67 | *(New in v1.17.4)* Optionally, a set of new "reserved" fontnames becomes available if you install `pymupdf-fonts `_. The currently available fonts are from the Fira fonts family created by Mozilla. "Fira Mono" is a nice mono-spaced sans font set and FiraGO is another non-serifed "universal" font, set which supports all European languages (including Cyrillic and Greek) plus Thai, Arabian, Hewbrew and Devanagari -- however none of the CJK languages. The size of a FiraGO font is only a quarter of the "Droid Sans Fallback" size (compressed 400 KB vs. 1.65 MB) -- and the style variants bold and italic are available..The following table maps a fontname to the corresponding font:
68 |
69 | =========== =======================================
70 | Fontname Font
71 | =========== =======================================
72 | figo FiraGO Regular
73 | figbo FiraGO Bold
74 | figit FiraGO Italic
75 | figbi FiraGO Bold Italic
76 | fimo Fira Mono Regular
77 | fimbo Fira Mono Bold
78 | =========== =======================================
79 |
80 | **All fonts mentioned here** also support Greek and Cyrillic letters.
81 |
82 | .. method:: has_glyph(chr, language=None, script=0)
83 |
84 | Check whether the unicode *chr* exists in the font or some fallback. May be used to check whether any "TOFU" symbols will appear on output.
85 |
86 | :arg int chr: the unicode of the character (i.e. *ord()*).
87 | :arg str language: the language -- currently unused.
88 | :arg int script: the UCDN script number.
89 | :returns: *True* or *False*.
90 |
91 | .. method:: glyph_advance(chr, language=None, script=0, wmode=0)
92 |
93 | Calculate the "width" of the character's glyph (visual representation).
94 |
95 | :arg int chr: the unicode number of the character. Use ``ord(c)``, not the character itself. Again, this should normally work even if a character is not supported by that font, because fallback fonts will be checked where necessary.
96 |
97 | The other parameters are not in use currently. This especially means that only horizontal text writing is supported.
98 |
99 | :returns: a float representing the glyph's width relative to **fontsize 1**.
100 |
101 | .. method:: glyph_name_to_unicode(name)
102 |
103 | Return the unicode for a given glyph name. Use it in conjunction with ``chr()`` if you want to output e.g. a certain symbol.
104 |
105 | :arg str name: The name of the glyph.
106 |
107 | :returns: The unicode integer, or 65533 = 0xFFFD if the name is unknown. Examples: ``font.glyph_name_to_unicode("Sigma") = 931``, ``font.glyph_name_to_unicode("sigma") = 963``. Refer to e.g. `this `_ publication for a list of glyph names and their unicode numbers.
108 |
109 | .. method:: unicode_to_glyph_name(chr, language=None, script=0, wmode=0)
110 |
111 | Show the name of the character's glyph.
112 |
113 | :arg int chr: the unicode number of the character. Use ``ord(c)``, not the character itself.
114 |
115 | :returns: a string representing the glyph's name. E.g. ``font.glyph_name(ord("#")) = "numbersign"``. Depending on how this font was built, the string may be empty, ".notfound" or some generated name.
116 |
117 | .. method:: text_length(text, fontsize=11)
118 |
119 | Calculate the length of a unicode string.
120 |
121 | :arg str text: a text string -- UTF-8 encoded. For Python 2, you must use unicode here.
122 |
123 | :arg float fontsize: the fontsize.
124 |
125 | :returns: a float representing the length of the string when stored in the PDF. Internally :meth:`glyph_advance` is used on a by-character level. If the font does not have a character, it will automatically be looked up in a fallback font.
126 |
127 | .. attribute:: flags
128 |
129 | A dictionary with various font properties, each represented as bools.
130 |
131 | .. attribute:: name
132 |
133 | Name of the font. May be "" or "(null)".
134 |
135 | .. attribute:: glyph_count
136 |
137 | The number of glyphs defined in the font.
138 |
139 | .. rubric:: Footnotes
140 |
141 | .. [#f1] MuPDF does not support all fontfiles with this feature and will raise exceptions like *"mupdf: FT_New_Memory_Face((null)): unknown file format"*, if encounters issues.
142 |
--------------------------------------------------------------------------------
/docs/glossary.rst:
--------------------------------------------------------------------------------
1 | ==============
2 | Glossary
3 | ==============
4 |
5 | .. data:: matrix_like
6 |
7 | A Python sequence of 6 numbers.
8 |
9 | .. data:: rect_like
10 |
11 | A Python sequence of 4 numbers.
12 |
13 | .. data:: irect_like
14 |
15 | A Python sequence of 4 integers.
16 |
17 | .. data:: point_like
18 |
19 | A Python sequence of 2 numbers.
20 |
21 | .. data:: quad_like
22 |
23 | A Python sequence of 4 :data:`point_like` items.
24 |
25 | .. data:: inheritable
26 |
27 | A number of values in a PDF can be specified once and then be inherited by objects further down in a parent-child relationship. The mediabox (physical size) of pages can for example be specified in nodes of the :data:`pagetree` and will then be taken as value for all *kids*, which do not specify their own value.
28 |
29 | .. data:: MediaBox
30 |
31 | A PDF array of 4 floats specifying a physical page size (:data:`inheritable`).
32 |
33 | .. data:: CropBox
34 |
35 | A PDF array of 4 floats specifying a page's visible area (:data:`inheritable`). This value is **not affected** if the page is rotated.
36 |
37 |
38 | .. data:: catalog
39 |
40 | A central PDF :data:`dictionary` -- also called "root" -- containing pointers to many other information.
41 |
42 | .. data:: contents
43 |
44 | "A **content stream** is a PDF :data:`stream` :data:`object` whose data consists of a sequence of instructions describing the graphical elements to be painted on a page." (:ref:`AdobeManual` p. 151). For an overview of the mini-language used in these streams see chapter "Operator Summary" on page 985 of the :ref:`AdobeManual`. A PDF :data:`page` can have none to many contents objects. If it has none, the page is empty (but still may show annotations). If it has several, they will be interpreted in sequence as if their instructions had been present in one such object (i.e. like in a concatenated string). It should be noted that there are more stream object types which use the same syntax: e.g. appearance dictionaries associated with annotations and Form XObjects.
45 |
46 | .. data:: resources
47 |
48 | A :data:`dictionary` containing references to any resources (like images or fonts) required by a PDF :data:`page` (required, inheritable, :ref:`AdobeManual` p. 145) and certain other objects (Form XObjects). This dictionary appears as a sub-dictionary in the object definition under the key */Resources*. Being an inheritable object type, there may exist "parent" resources for all pages or certain subsets of pages.
49 |
50 | .. data:: dictionary
51 |
52 | A PDF :data:`object` type, which is somewhat comparable to the same-named Python notion: "A dictionary object is an associative table containing pairs of objects, known as the dictionary's entries. The first element of each entry is the key and the second element is the value. The key must be a name (...). The value can be any kind of object, including another dictionary. A dictionary entry whose value is null (...) is equivalent to an absent entry." (:ref:`AdobeManual` p. 59).
53 |
54 | Dictionaries are the most important :data:`object` type in PDF. Here is an example (describing a :data:`page`)::
55 |
56 | <<
57 | /Contents 40 0 R % value: an indirect object
58 | /Type/Page % value: a name object
59 | /MediaBox[0 0 595.32 841.92] % value: an array object
60 | /Rotate 0 % value: a number object
61 | /Parent 12 0 R % value: an indirect object
62 | /Resources<< % value: a dictionary object
63 | /ExtGState<>
64 | /Font<<
65 | /R8 27 0 R/R10 21 0 R/R12 24 0 R/R14 15 0 R
66 | /R17 4 0 R/R20 30 0 R/R23 7 0 R /R27 20 0 R
67 | >>
68 | /ProcSet[/PDF/Text] % value: array of two name objects
69 | >>
70 | /Annots[55 0 R] % value: array, one entry (indirect object)
71 | >>
72 |
73 | *Contents*, *Type*, *MediaBox*, etc. are **keys**, *40 0 R*, *Page*, *[0 0 595.32 841.92]*, etc. are the respective **values**. The strings *"<<"* and *">>"* are used to enclose object definitions.
74 |
75 | This example also shows the syntax of **nested** dictionary values: *Resources* has an object as its value, which in turn is a dictionary with keys like *ExtGState* (with the value *<>*, which is another dictionary), etc.
76 |
77 | .. data:: page
78 |
79 | A PDF page is a :data:`dictionary` object which defines one page in a PDF, see :ref:`AdobeManual` p. 145.
80 |
81 | .. data:: pagetree
82 |
83 | "The pages of a document are accessed through a structure known as the page tree, which defines the ordering of pages in the document. The tree structure allows PDF consumer applications, using only limited memory, to quickly open a document containing thousands of pages. The tree contains nodes of two types: intermediate nodes, called page tree nodes, and leaf nodes, called page objects." (:ref:`AdobeManual` p. 143).
84 |
85 | While it is possible to list all page references in just one array, PDFs with many pages are often created using *balanced tree* structures ("page trees") for faster access to any single page. In relation to the total number of pages, this can reduce the average page access time by page number from a linear to some logarithmic order of magnitude.
86 |
87 | For fast page access, MuPDF can use its own array in memory -- independently from what may or may not be present in the document file. This array is indexed by page number and therefore much faster than even the access via a perfectly balanced page tree.
88 |
89 | .. data:: object
90 |
91 | Similar to Python, PDF supports the notion *object*, which can come in eight basic types: boolean values, integer and real numbers, strings, names, arrays, dictionaries, streams, and the null object (:ref:`AdobeManual` p. 51). Objects can be made identifyable by assigning a label. This label is then called *indirect* object. PyMuPDF supports retrieving definitions of indirect objects via their cross reference number via :meth:`Document.xrefObject`.
92 |
93 | .. data:: stream
94 |
95 | A PDF :data:`object` type which is a sequence of bytes, similar to a string. "However, a PDF application can read a stream incrementally, while a string must be read in its entirety. Furthermore, a stream can be of unlimited length, whereas a string is subject to an implementation limit. For this reason, objects with potentially large amounts of data, such as images and page descriptions, are represented as streams." "A stream consists of a :data:`dictionary` followed by zero or more bytes bracketed between the keywords *stream* and *endstream*"::
96 |
97 | nnn 0 obj
98 | <<
99 | dictionary definition
100 | >>
101 | stream
102 | (zero or more bytes)
103 | endstream
104 | endobj
105 |
106 | See :ref:`AdobeManual` p. 60. PyMuPDF supports retrieving stream content via :meth:`Document.xrefStream`. Use :meth:`Document.isStream` to determine whether an object is of stream type.
107 |
108 | .. data:: unitvector
109 |
110 | A mathematical notion meaning a vector of norm ("length") 1 -- usually the Euclidean norm is implied. In PyMuPDF, this term is restricted to :ref:`Point` objects, see :attr:`Point.unit`.
111 |
112 | .. data:: xref
113 |
114 | Abbreviation for cross-reference number: this is an integer unique identification for objects in a PDF. There exists a cross-reference table (which may physically consist of several separate segments) in each PDF, which stores the relative position of each object for quick lookup. The cross-reference table is one entry longer than the number of existing object: item zero is reserved and must not be used in any way. Many PyMuPDF classes have an *xref* attribute (which is zero for non-PDFs), and one can find out the total number of objects in a PDF via :meth:`Document.xrefLength` *- 1*.
115 |
116 | .. data:: resolution
117 |
118 | Images and :ref:`Pixmap` objects may contain resolution information provided as "dots per inch", dpi, in each direction (horizontal and vertical). When MuPDF reads an image form a file or from a PDF object, it will parse this information and put it in :attr:`Pixmap.xres`, :attr:`Pixmap.yres`, respectively. When it finds not meaningful information in the input (like non-positive values or values exceeding 4800), it will use "sane" defaults instead. The usual default value is 96, but it may also be 72 in some cases (e.g. 72 for JPX images).
119 |
--------------------------------------------------------------------------------
/docs/identity.rst:
--------------------------------------------------------------------------------
1 | .. _Identity:
2 |
3 | ============
4 | Identity
5 | ============
6 |
7 | Identity is a :ref:`Matrix` that performs no action -- to be used whenever the syntax requires a matrix, but no actual transformation should take place. It has the form *fitz.Matrix(1, 0, 0, 1, 0, 0)*.
8 |
9 | Identity is a constant, an "immutable" object. So, all of its matrix properties are read-only and its methods are disabled.
10 |
11 | If you need a **mutable** identity matrix as a starting point, use one of the following statements::
12 |
13 | >>> m = fitz.Matrix(1, 0, 0, 1, 0, 0) # specify the values
14 | >>> m = fitz.Matrix(1, 1) # use scaling by factor 1
15 | >>> m = fitz.Matrix(0) # use rotation by zero degrees
16 | >>> m = fitz.Matrix(fitz.Identity) # make a copy of Identity
17 |
--------------------------------------------------------------------------------
/docs/images/img-4up.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-4up.png
--------------------------------------------------------------------------------
/docs/images/img-7edges.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-7edges.png
--------------------------------------------------------------------------------
/docs/images/img-a-is--1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-a-is--1.png
--------------------------------------------------------------------------------
/docs/images/img-adobe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-adobe.png
--------------------------------------------------------------------------------
/docs/images/img-alpha-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-alpha-0.png
--------------------------------------------------------------------------------
/docs/images/img-alpha-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-alpha-1.png
--------------------------------------------------------------------------------
/docs/images/img-annots.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-annots.jpg
--------------------------------------------------------------------------------
/docs/images/img-attach-result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-attach-result.jpg
--------------------------------------------------------------------------------
/docs/images/img-b-is-0.5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-b-is-0.5.png
--------------------------------------------------------------------------------
/docs/images/img-binsetupdirs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-binsetupdirs.png
--------------------------------------------------------------------------------
/docs/images/img-breadth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-breadth.png
--------------------------------------------------------------------------------
/docs/images/img-c-is-0.5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-c-is-0.5.png
--------------------------------------------------------------------------------
/docs/images/img-cake.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-cake.png
--------------------------------------------------------------------------------
/docs/images/img-caret-annot.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-caret-annot.jpg
--------------------------------------------------------------------------------
/docs/images/img-circle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-circle.png
--------------------------------------------------------------------------------
/docs/images/img-clip.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-clip.jpg
--------------------------------------------------------------------------------
/docs/images/img-colordb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-colordb.png
--------------------------------------------------------------------------------
/docs/images/img-copy-speed-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-copy-speed-1.png
--------------------------------------------------------------------------------
/docs/images/img-copy-speed-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-copy-speed-2.png
--------------------------------------------------------------------------------
/docs/images/img-d-is--1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-d-is--1.png
--------------------------------------------------------------------------------
/docs/images/img-drawBezier.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-drawBezier.png
--------------------------------------------------------------------------------
/docs/images/img-drawCurve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-drawCurve.png
--------------------------------------------------------------------------------
/docs/images/img-drawSector1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-drawSector1.png
--------------------------------------------------------------------------------
/docs/images/img-drawSector2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-drawSector2.png
--------------------------------------------------------------------------------
/docs/images/img-drawcircle.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-drawcircle.jpg
--------------------------------------------------------------------------------
/docs/images/img-drawquad.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-drawquad.jpg
--------------------------------------------------------------------------------
/docs/images/img-e-is-100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-e-is-100.png
--------------------------------------------------------------------------------
/docs/images/img-embed-progress.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-embed-progress.jpg
--------------------------------------------------------------------------------
/docs/images/img-encoding.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-encoding.jpg
--------------------------------------------------------------------------------
/docs/images/img-encrypting.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-encrypting.jpg
--------------------------------------------------------------------------------
/docs/images/img-even-odd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-even-odd.png
--------------------------------------------------------------------------------
/docs/images/img-extract-imga.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-extract-imga.jpg
--------------------------------------------------------------------------------
/docs/images/img-extract-imgb.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-extract-imgb.jpg
--------------------------------------------------------------------------------
/docs/images/img-f-is-100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-f-is-100.png
--------------------------------------------------------------------------------
/docs/images/img-filesizes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-filesizes.png
--------------------------------------------------------------------------------
/docs/images/img-freetext.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-freetext.jpg
--------------------------------------------------------------------------------
/docs/images/img-import-progress.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-import-progress.jpg
--------------------------------------------------------------------------------
/docs/images/img-inkannot.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-inkannot.jpg
--------------------------------------------------------------------------------
/docs/images/img-inserttext.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-inserttext.jpg
--------------------------------------------------------------------------------
/docs/images/img-markedpdf.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-markedpdf.jpg
--------------------------------------------------------------------------------
/docs/images/img-markers.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-markers.jpg
--------------------------------------------------------------------------------
/docs/images/img-matrix.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-matrix.png
--------------------------------------------------------------------------------
/docs/images/img-opacity.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-opacity.jpg
--------------------------------------------------------------------------------
/docs/images/img-original.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-original.png
--------------------------------------------------------------------------------
/docs/images/img-pdfjoiner.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-pdfjoiner.jpg
--------------------------------------------------------------------------------
/docs/images/img-pdftext.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-pdftext.jpg
--------------------------------------------------------------------------------
/docs/images/img-planish.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-planish.png
--------------------------------------------------------------------------------
/docs/images/img-point-unit.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-point-unit.jpg
--------------------------------------------------------------------------------
/docs/images/img-polyline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-polyline.png
--------------------------------------------------------------------------------
/docs/images/img-posterize.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-posterize.png
--------------------------------------------------------------------------------
/docs/images/img-pymupdf.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-pymupdf.jpg
--------------------------------------------------------------------------------
/docs/images/img-quads.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-quads.jpg
--------------------------------------------------------------------------------
/docs/images/img-redact.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-redact.jpg
--------------------------------------------------------------------------------
/docs/images/img-render-speed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-render-speed.png
--------------------------------------------------------------------------------
/docs/images/img-rendermode.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-rendermode.jpg
--------------------------------------------------------------------------------
/docs/images/img-rot+morph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-rot+morph.png
--------------------------------------------------------------------------------
/docs/images/img-rot-60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-rot-60.png
--------------------------------------------------------------------------------
/docs/images/img-rotate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-rotate.png
--------------------------------------------------------------------------------
/docs/images/img-showpdfpage.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-showpdfpage.jpg
--------------------------------------------------------------------------------
/docs/images/img-sierpinski.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-sierpinski.png
--------------------------------------------------------------------------------
/docs/images/img-squiggly.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-squiggly.png
--------------------------------------------------------------------------------
/docs/images/img-stampannot.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-stampannot.jpg
--------------------------------------------------------------------------------
/docs/images/img-stencil.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-stencil.jpg
--------------------------------------------------------------------------------
/docs/images/img-symbols.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-symbols.jpg
--------------------------------------------------------------------------------
/docs/images/img-target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-target.png
--------------------------------------------------------------------------------
/docs/images/img-textbox.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-textbox.jpg
--------------------------------------------------------------------------------
/docs/images/img-textboxtract.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-textboxtract.png
--------------------------------------------------------------------------------
/docs/images/img-textmarker.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-textmarker.jpg
--------------------------------------------------------------------------------
/docs/images/img-textmethods.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-textmethods.png
--------------------------------------------------------------------------------
/docs/images/img-textpage-char.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-textpage-char.png
--------------------------------------------------------------------------------
/docs/images/img-textpage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-textpage.png
--------------------------------------------------------------------------------
/docs/images/img-textperformance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-textperformance.png
--------------------------------------------------------------------------------
/docs/images/img-timings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-timings.png
--------------------------------------------------------------------------------
/docs/images/img-writeimage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/img-writeimage.png
--------------------------------------------------------------------------------
/docs/images/mupdf-icons.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/images/mupdf-icons.jpg
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | **PyMuPDF Documentation**
2 | =================================
3 |
4 | .. toctree::
5 | :maxdepth: 4
6 |
7 | intro
8 | installation
9 | tutorial
10 | faq
11 | module
12 | classes
13 | algebra
14 | lowlevel
15 | glossary
16 | vars
17 | colors
18 | app1
19 | app2
20 | app3
21 | app4
22 | changes
23 |
--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
1 | Installation
2 | =============
3 | PyMuPDF can be installed from sources as follows or from wheels, see :ref:`InstallBinary`.
4 |
5 | .. _InstallSource:
6 |
7 | Option 1: Install from Sources
8 | -------------------------------
9 | This is a three-step process.
10 |
11 | Step 1: Download PyMuPDF
12 | ~~~~~~~~~~~~~~~~~~~~~~~~~
13 | Download the sources from https://pypi.org/project/PyMuPDF/#files and decompress them.
14 |
15 | Step 2: Download and Generate MuPDF
16 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
17 | Download *mupdf-x.xx.x-source.tar.gz* from `Mupdf `_ and unzip / decompress it. Make sure to download the (sub-) version for which PyMuPDF has stated its compatibility.
18 |
19 | .. note:: The latest MuPDF **development sources** are available on https://github.com/ArtifexSoftware/mupdf -- this is **not** what you want here.
20 |
21 |
22 | **Applying any Changes and Hot Fixes to MuPDF Sources**
23 |
24 | On occasion, vital hot fixes or functional enhancements must be applied to MuPDF sources before it is generated.
25 |
26 | Any such files are contained in the *fitz* directory of the `PyMuPDF homepage `_ -- their names all start with an underscore *"_"*. Currently (v1.16.x), these files and their copy destinations are the following:
27 |
28 | * *_config.h* -- PyMuPDF's configuration to control the binary file size and the inclusion of MuPDF features, see next section. This file must renamed and replace MuPDF file */include/mupdf/fitz/config.h*. This file controls the size of the PyMuPDF binary by cutting away unneeded fonts from MuPDF.
29 |
30 | **Generate MuPDF**
31 |
32 | The MuPDF source includes generation procedures / makefiles for numerous platforms. For Windows platforms, Visual Studio solution and project definitions are provided.
33 |
34 | PyMuPDF's `homepage `_ contains additional details and hints.
35 |
36 | Step 3: Build / Setup PyMuPDF
37 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
38 | Adjust the setup.py script as necessary. E.g. make sure that:
39 |
40 | * the include directory is correctly set in sync with your directory structure
41 | * the object code libraries are correctly defined
42 |
43 | Now perform a *python setup.py install*.
44 |
45 | .. note:: You can also install from the sources of the Github repository. These **do not contain** the pre-generated files *fitz.py* or *fitz_wrap.c*, which instead are generated by the installation script *setup.py*. To use it, `SWIG `_ must be installed on your system.
46 |
47 |
48 | .. _InstallBinary:
49 |
50 | Option 2: Install from Binaries
51 | --------------------------------
52 | You can install PyMuPDF from Python wheels. The wheels are *self-contained*, i.e. you will **not need any other software** nor download / install MuPDF to run PyMuPDF scripts.
53 | This installation option is available for all MS Windows and the most **popular 64-bit** Mac OSX and Linux platforms for Python versions 2.7 and 3.5 through 3.8.
54 | Windows binaries are provided for Python **32-bit and 64-bit** versions.
55 |
56 | **Overview of wheel names (PyMuPDF version is x.xx.xx):**
57 |
58 | .. literalinclude:: wheelnames.txt
59 |
60 |
61 | Older versions can be found in the releases directory of our home page https://github.com/pymupdf/PyMuPDF/releases.
62 |
63 | If you unexpectedly run into problems installing the wheel for your system, please make sure you have updated your PIP to the current version.
64 |
65 |
--------------------------------------------------------------------------------
/docs/intro.rst:
--------------------------------------------------------------------------------
1 | Introduction
2 | ==============
3 |
4 | .. image:: images/img-pymupdf.jpg
5 | :align: center
6 |
7 | **PyMuPDF** is a Python binding for `MuPDF `_ -- "a lightweight PDF and XPS viewer".
8 |
9 | MuPDF can access files in PDF, XPS, OpenXPS, CBZ (comic book archive), FB2 and EPUB (e-book) formats.
10 |
11 | These are files with extensions *.pdf*, *.xps*, *.oxps*, *.cbz*, *.fb2* or *.epub* (so you can develop **e-book viewers in Python** ...).
12 |
13 | PyMuPDF provides access to many important functions of MuPDF from within a Python environment, and we are continuously seeking to expand this function set.
14 |
15 | MuPDF stands out among all similar products for its top rendering capability and unsurpassed processing speed. At the same time, its "light weight" makes it an excellent choice for platforms where resources are typically limited, like smartphones.
16 |
17 | Check this out yourself and compare the various free PDF-viewers. In terms of speed and rendering quality `SumatraPDF `_ ranges at the top (apart from MuPDF's own standalone viewer) -- since it has changed its library basis to MuPDF!
18 |
19 | While PyMuPDF has been available since several years for an earlier version of MuPDF (v1.2, called **fitz-python** then), it was until only mid May 2015, that its creator and a few co-workers decided to elevate it to support current releases of MuPDF.
20 |
21 | PyMuPDF runs and has been tested on Mac, Linux, Windows XP SP2 and up, Python 2.7 through Python 3.7 (note that Python supports Windows XP only up to v3.4), 32bit and 64bit versions. Other platforms should work too, as long as MuPDF and Python support them.
22 |
23 | PyMuPDF is hosted on `GitHub `_. We also are registered on `PyPI `_.
24 |
25 | For MS Windows and popular Python versions on Mac OSX and Linux we have created wheels. So installation should be convenient enough for hopefully most of our users: just issue
26 |
27 | *pip install --upgrade pymupdf*
28 |
29 | If your platform is not among those supported with a wheel, your installation consists of two separate steps:
30 |
31 | 1. Installation of MuPDF: this involves downloading the source from their website and then compiling it on your machine. Adjust *setup.py* to point to the right directories (next step), before you try generating PyMuPDF.
32 |
33 | 2. Installation of PyMuPDF: this step is normal Python procedure. Usually you will have to adapt the *setup.py* to point to correct *include* and *lib* directories of your generated MuPDF.
34 |
35 | For installation details check out the respective chapter.
36 |
37 | There exist several `demo `_ and `example `_ programs in the main repository, ranging from simple code snippets to full-featured utilities, like text extraction, PDF joiners and bookmark maintenance.
38 |
39 | Interesting **PDF manipulation and generation** functions have been added over time, including metadata and bookmark maintenance, document restructuring, annotation / link handling and document or page creation.
40 |
41 | Note on the Name *fitz*
42 | --------------------------
43 | The standard Python import statement for this library is *import fitz*. This has a historical reason:
44 |
45 | The original rendering library for MuPDF was called *Libart*.
46 |
47 | *"After Artifex Software acquired the MuPDF project, the development focus shifted on writing a new modern graphics library called *Fitz*. Fitz was originally intended as an R&D project to replace the aging Ghostscript graphics library, but has instead become the rendering engine powering MuPDF."* (Quoted from `Wikipedia `_).
48 |
49 | License
50 | --------
51 | PyMuPDF is distributed under GNU GPL V3 (or later, at your choice).
52 |
53 | MuPDF is distributed under a separate license, the **GNU AFFERO GPL V3**.
54 |
55 | Both licenses apply, when you use PyMuPDF.
56 |
57 | .. note:: Version 3 of the GNU AFFERO GPL is a lot less restrictive than its earlier versions used to be. It basically is an open source freeware license, that obliges your software to also being open source and freeware. Consult `this website `_, if you want to create a commercial product with PyMuPDF.
58 |
59 | .. include:: version.rst
60 |
--------------------------------------------------------------------------------
/docs/irect.rst:
--------------------------------------------------------------------------------
1 | .. _IRect:
2 |
3 | ==========
4 | IRect
5 | ==========
6 |
7 | IRect is a rectangular bounding box similar to :ref:`Rect`, except that all corner coordinates are integers. IRect is used to specify an area of pixels, e.g. to receive image data during rendering. Otherwise, many similarities exist, e.g. considerations concerning emptiness and finiteness of rectangles also apply to this class.
8 |
9 | ============================== ===========================================
10 | **Attribute / Method** **Short Description**
11 | ============================== ===========================================
12 | :meth:`IRect.contains` checks containment of another object
13 | :meth:`IRect.getArea` calculate rectangle area
14 | :meth:`IRect.getRect` return a :ref:`Rect` with same coordinates
15 | :meth:`IRect.getRectArea` calculate rectangle area
16 | :meth:`IRect.intersect` common part with another rectangle
17 | :meth:`IRect.intersects` checks for non-empty intersection
18 | :meth:`IRect.morph` transform with a point and a matrix
19 | :meth:`IRect.norm` the Euclidean norm
20 | :meth:`IRect.normalize` makes a rectangle finite
21 | :attr:`IRect.bottom_left` bottom left point, synonym *bl*
22 | :attr:`IRect.bottom_right` bottom right point, synonym *br*
23 | :attr:`IRect.height` height of the rectangle
24 | :attr:`IRect.isEmpty` whether rectangle is empty
25 | :attr:`IRect.isInfinite` whether rectangle is infinite
26 | :attr:`IRect.rect` equals result of method *getRect()*
27 | :attr:`IRect.top_left` top left point, synonym *tl*
28 | :attr:`IRect.top_right` top_right point, synonym *tr*
29 | :attr:`IRect.quad` :ref:`Quad` made from rectangle corners
30 | :attr:`IRect.width` width of the rectangle
31 | :attr:`IRect.x0` X-coordinate of the top left corner
32 | :attr:`IRect.x1` X-coordinate of the bottom right corner
33 | :attr:`IRect.y0` Y-coordinate of the top left corner
34 | :attr:`IRect.y1` Y-coordinate of the bottom right corner
35 | ============================== ===========================================
36 |
37 | **Class API**
38 |
39 | .. class:: IRect
40 |
41 | .. method:: __init__(self)
42 |
43 | .. method:: __init__(self, x0, y0, x1, y1)
44 |
45 | .. method:: __init__(self, irect)
46 |
47 | .. method:: __init__(self, sequence)
48 |
49 | Overloaded constructors. Also see examples below and those for the :ref:`Rect` class.
50 |
51 | If another irect is specified, a **new copy** will be made.
52 |
53 | If sequence is specified, it must be a Python sequence type of 4 numbers (see :ref:`SequenceTypes`). Non-integer numbers will be truncated, non-numeric entries will raise an exception.
54 |
55 | The other parameters mean integer coordinates.
56 |
57 | .. method:: getRect()
58 |
59 | A convenience function returning a :ref:`Rect` with the same coordinates. Also available as attribute *rect*.
60 |
61 | :rtype: :ref:`Rect`
62 |
63 | .. method:: getRectArea([unit])
64 |
65 | .. method:: getArea([unit])
66 |
67 | Calculates the area of the rectangle and, with no parameter, equals *abs(IRect)*. Like an empty rectangle, the area of an infinite rectangle is also zero.
68 |
69 | :arg str unit: Specify required unit: respective squares of "px" (pixels, default), "in" (inches), "cm" (centimeters), or "mm" (millimeters).
70 |
71 | :rtype: float
72 |
73 | .. method:: intersect(ir)
74 |
75 | The intersection (common rectangular area) of the current rectangle and *ir* is calculated and replaces the current rectangle. If either rectangle is empty, the result is also empty. If either rectangle is infinite, the other one is taken as the result -- and hence also infinite if both rectangles were infinite.
76 |
77 | :arg rect_like ir: Second rectangle.
78 |
79 | .. method:: contains(x)
80 |
81 | Checks whether *x* is contained in the rectangle. It may be :data:`rect_like`, :data:`point_like` or a number. If *x* is an empty rectangle, this is always true. Conversely, if the rectangle is empty this is always *False*, if *x* is not an empty rectangle and not a number. If *x* is a number, it will be checked to be one of the four components. *x in irect* and *irect.contains(x)* are equivalent.
82 |
83 | :arg x: the object to check.
84 | :type x: :ref:`IRect` or :ref:`Rect` or :ref:`Point` or int
85 |
86 | :rtype: bool
87 |
88 | .. method:: intersects(r)
89 |
90 | Checks whether the rectangle and the :data:`rect_like` "r" contain a common non-empty :ref:`IRect`. This will always be *False* if either is infinite or empty.
91 |
92 | :arg rect_like r: the rectangle to check.
93 |
94 | :rtype: bool
95 |
96 | .. method:: morph(fixpoint, matrix)
97 |
98 | *(New in version 1.17.0)*
99 |
100 | Return a new quad after applying a matrix to it using a fixed point.
101 |
102 | :arg point_like fixpoint: the fixed point.
103 | :arg matrix_like matrix: the matrix.
104 | :returns: a new :ref:`Quad`. This a wrapper of the same-named quad method.
105 |
106 | .. method:: norm()
107 |
108 | *(New in version 1.16.0)*
109 |
110 | Return the Euclidean norm of the rectangle treated as a vector of four numbers.
111 |
112 | .. method:: normalize()
113 |
114 | Make the rectangle finite. This is done by shuffling rectangle corners. After this, the bottom right corner will indeed be south-eastern to the top left one. See :ref:`Rect` for a more details.
115 |
116 | .. attribute:: top_left
117 |
118 | .. attribute:: tl
119 |
120 | Equals *Point(x0, y0)*.
121 |
122 | :type: :ref:`Point`
123 |
124 | .. attribute:: top_right
125 |
126 | .. attribute:: tr
127 |
128 | Equals *Point(x1, y0)*.
129 |
130 | :type: :ref:`Point`
131 |
132 | .. attribute:: bottom_left
133 |
134 | .. attribute:: bl
135 |
136 | Equals *Point(x0, y1)*.
137 |
138 | :type: :ref:`Point`
139 |
140 | .. attribute:: bottom_right
141 |
142 | .. attribute:: br
143 |
144 | Equals *Point(x1, y1)*.
145 |
146 | :type: :ref:`Point`
147 |
148 | .. attribute:: quad
149 |
150 | The quadrilateral *Quad(irect.tl, irect.tr, irect.bl, irect.br)*.
151 |
152 | :type: :ref:`Quad`
153 |
154 | .. attribute:: width
155 |
156 | Contains the width of the bounding box. Equals *abs(x1 - x0)*.
157 |
158 | :type: int
159 |
160 | .. attribute:: height
161 |
162 | Contains the height of the bounding box. Equals *abs(y1 - y0)*.
163 |
164 | :type: int
165 |
166 | .. attribute:: x0
167 |
168 | X-coordinate of the left corners.
169 |
170 | :type: int
171 |
172 | .. attribute:: y0
173 |
174 | Y-coordinate of the top corners.
175 |
176 | :type: int
177 |
178 | .. attribute:: x1
179 |
180 | X-coordinate of the right corners.
181 |
182 | :type: int
183 |
184 | .. attribute:: y1
185 |
186 | Y-coordinate of the bottom corners.
187 |
188 | :type: int
189 |
190 | .. attribute:: isInfinite
191 |
192 | *True* if rectangle is infinite, *False* otherwise.
193 |
194 | :type: bool
195 |
196 | .. attribute:: isEmpty
197 |
198 | *True* if rectangle is empty, *False* otherwise.
199 |
200 | :type: bool
201 |
202 |
203 | .. note::
204 |
205 | * This class adheres to the Python sequence protocol, so components can be accessed via their index, too. Also refer to :ref:`SequenceTypes`.
206 | * Rectangles can be used with arithmetic operators -- see chapter :ref:`Algebra`.
207 |
208 |
--------------------------------------------------------------------------------
/docs/kerning.style:
--------------------------------------------------------------------------------
1 | fontsAlias:
2 | stdBold: DejaVu Sans-Bold
3 | stdBoldItalic: DejaVu Sans-BoldOblique
4 | stdFont: DejaVu Sans
5 | stdItalic: DejaVu Sans-Oblique
6 | stdMono: Courier New
7 | stdMonoBold: DejaVu Sans Mono-Bold
8 | stdMonoBoldItalic: DejaVu Sans Mono-BoldOblique
9 | stdMonoItalic: DejaVu Sans Mono-Oblique
10 | stdSans: DejaVu Sans
11 | stdSansBold: DejaVu Sans-Bold
12 | stdSansBoldItalic: DejaVu Sans-BoldOblique
13 | stdSansItalic: DejaVu Sans-Oblique
14 | stdSerif: DejaVu Serif
15 |
16 | styles: base: kerning: true
17 |
18 | styles: bodytext: alignment: left
19 |
--------------------------------------------------------------------------------
/docs/link.rst:
--------------------------------------------------------------------------------
1 | .. _Link:
2 |
3 | ================
4 | Link
5 | ================
6 | Represents a pointer to somewhere (this document, other documents, the internet). Links exist per document page, and they are forward-chained to each other, starting from an initial link which is accessible by the :attr:`Page.firstLink` property.
7 |
8 | There is a parent-child relationship between a link and its page. If the page object becomes unusable (closed document, any document structure change, etc.), then so does every of its existing link objects -- an exception is raised saying that the object is "orphaned", whenever a link property or method is accessed.
9 |
10 | ========================= ============================================
11 | **Attribute** **Short Description**
12 | ========================= ============================================
13 | :meth:`Link.setBorder` modify border properties
14 | :meth:`Link.setColors` modify color properties
15 | :attr:`Link.border` border characteristics
16 | :attr:`Link.colors` border line color
17 | :attr:`Link.dest` points to link destination details
18 | :attr:`Link.isExternal` external link destination?
19 | :attr:`Link.next` points to next link
20 | :attr:`Link.rect` clickable area in untransformed coordinates.
21 | :attr:`Link.uri` link destination
22 | :attr:`Link.xref` :data:`xref` number of the entry
23 | ========================= ============================================
24 |
25 | **Class API**
26 |
27 | .. class:: Link
28 |
29 | .. method:: setBorder(border=None, width=0, style=None, dashes=None)
30 |
31 | PDF only: Change border width and dashing properties.
32 |
33 | *(Changed in version 1.16.9)* Allow specification without using a dictionary. The direct parameters are used if *border* is not a dictionary.
34 |
35 | :arg dict border: a dictionary as returned by the :attr:`border` property, with keys *"width"* (*float*), *"style"* (*str*) and *"dashes"* (*sequence*). Omitted keys will leave the resp. property unchanged. To e.g. remove dashing use: *"dashes": []*. If dashes is not an empty sequence, "style" will automatically be set to "D" (dashed).
36 |
37 | :arg float width: see above.
38 | :arg str style: see above.
39 | :arg sequence dashes: see above.
40 |
41 | .. method:: setColors(colors=None, stroke=None, fill=None)
42 |
43 | Changes the "stroke" and "fill" colors.
44 |
45 | *(Changed in version 1.16.9)* Allow colors to be directly set. These parameters are used if *colors* is not a dictionary.
46 |
47 | :arg dict colors: a dictionary containing color specifications. For accepted dictionary keys and values see below. The most practical way should be to first make a copy of the *colors* property and then modify this dictionary as required.
48 | :arg sequence stroke: see above.
49 | :arg sequence fill: see above.
50 |
51 |
52 | .. attribute:: colors
53 |
54 | Meaningful for PDF only: A dictionary of two lists of floats in range *0 <= float <= 1* specifying the *stroke* and the interior (*fill*) colors. If not a PDF, *None* is returned. The stroke color is used for borders and everything that is actively painted or written ("stroked"). The lengths of these lists implicitely determine the colorspaces used: 1 = GRAY, 3 = RGB, 4 = CMYK. So *[1.0, 0.0, 0.0]* stands for RGB color red. Both lists can be *[]* if no color is specified. The value of each float *f* is mapped to the integer value *i* in range 0 to 255 via the computation *f = i / 255*.
55 |
56 | :rtype: dict
57 |
58 | .. attribute:: border
59 |
60 | Meaningful for PDF only: A dictionary containing border characteristics. It will be *None* for non-PDFs and an empty dictionary if no border information exists. The following keys can occur:
61 |
62 | * *width* -- a float indicating the border thickness in points. The value is -1.0 if no width is specified.
63 |
64 | * *dashes* -- a sequence of integers specifying a line dash pattern. *[]* means no dashes, *[n]* means equal on-off lengths of *n* points, longer lists will be interpreted as specifying alternating on-off length values. See the :ref:`AdobeManual` page 217 for more details.
65 |
66 | * *style* -- 1-byte border style: *S* (Solid) = solid rectangle surrounding the annotation, *D* (Dashed) = dashed rectangle surrounding the link, the dash pattern is specified by the *dashes* entry, *B* (Beveled) = a simulated embossed rectangle that appears to be raised above the surface of the page, *I* (Inset) = a simulated engraved rectangle that appears to be recessed below the surface of the page, *U* (Underline) = a single line along the bottom of the annotation rectangle.
67 |
68 | :rtype: dict
69 |
70 | .. attribute:: rect
71 |
72 | The area that can be clicked in untransformed coordinates.
73 |
74 | :type: :ref:`Rect`
75 |
76 | .. attribute:: isExternal
77 |
78 | A bool specifying whether the link target is outside of the current document.
79 |
80 | :type: bool
81 |
82 | .. attribute:: uri
83 |
84 | A string specifying the link target. The meaning of this property should be evaluated in conjunction with property *isExternal*. The value may be *None*, in which case *isExternal == False*. If *uri* starts with *file://*, *mailto:*, or an internet resource name, *isExternal* is *True*. In all other cases *isExternal == False* and *uri* points to an internal location. In case of PDF documents, this should either be *#nnnn* to indicate a 1-based (!) page number *nnnn*, or a named location. The format varies for other document types, e.g. *uri = '../FixedDoc.fdoc#PG_2_LNK_1'* for page number 2 (1-based) in an XPS document.
85 |
86 | :type: str
87 |
88 | .. attribute:: xref
89 |
90 | An integer specifying the PDF :data:`xref`. Zero if not a PDF.
91 |
92 | :type: int
93 |
94 | .. attribute:: next
95 |
96 | The next link or *None*.
97 |
98 | :type: *Link*
99 |
100 | .. attribute:: dest
101 |
102 | The link destination details object.
103 |
104 | :type: :ref:`linkDest`
105 |
--------------------------------------------------------------------------------
/docs/linkdest.rst:
--------------------------------------------------------------------------------
1 | .. _linkDest:
2 |
3 | ================
4 | linkDest
5 | ================
6 | Class representing the `dest` property of an outline entry or a link. Describes the destination to which such entries point.
7 |
8 | =========================== ====================================
9 | **Attribute** **Short Description**
10 | =========================== ====================================
11 | :attr:`linkDest.dest` destination
12 | :attr:`linkDest.fileSpec` file specification (path, filename)
13 | :attr:`linkDest.flags` descriptive flags
14 | :attr:`linkDest.isMap` is this a MAP?
15 | :attr:`linkDest.isUri` is this a URI?
16 | :attr:`linkDest.kind` kind of destination
17 | :attr:`linkDest.lt` top left coordinates
18 | :attr:`linkDest.named` name if named destination
19 | :attr:`linkDest.newWindow` name of new window
20 | :attr:`linkDest.page` page number
21 | :attr:`linkDest.rb` bottom right coordinates
22 | :attr:`linkDest.uri` URI
23 | =========================== ====================================
24 |
25 | **Class API**
26 |
27 | .. class:: linkDest
28 |
29 | .. attribute:: dest
30 |
31 | Target destination name if :attr:`linkDest.kind` is :data:`LINK_GOTOR` and :attr:`linkDest.page` is *-1*.
32 |
33 | :type: str
34 |
35 | .. attribute:: fileSpec
36 |
37 | Contains the filename and path this link points to, if :attr:`linkDest.kind` is :data:`LINK_GOTOR` or :data:`LINK_LAUNCH`.
38 |
39 | :type: str
40 |
41 | .. attribute:: flags
42 |
43 | A bitfield describing the validity and meaning of the different aspects of the destination. As far as possible, link destinations are constructed such that e.g. :attr:`linkDest.lt` and :attr:`linkDest.rb` can be treated as defining a bounding box. But the flags indicate which of the values were actually specified, see :ref:`linkDest Flags`.
44 |
45 | :type: int
46 |
47 | .. attribute:: isMap
48 |
49 | This flag specifies whether to track the mouse position when the URI is resolved. Default value: False.
50 |
51 | :type: bool
52 |
53 | .. attribute:: isUri
54 |
55 | Specifies whether this destination is an internet resource (as opposed to e.g. a local file specification in URI format).
56 |
57 | :type: bool
58 |
59 | .. attribute:: kind
60 |
61 | Indicates the type of this destination, like a place in this document, a URI, a file launch, an action or a place in another file. Look at :ref:`linkDest Kinds` to see the names and numerical values.
62 |
63 | :type: int
64 |
65 | .. attribute:: lt
66 |
67 | The top left :ref:`Point` of the destination.
68 |
69 | :type: :ref:`Point`
70 |
71 | .. attribute:: named
72 |
73 | This destination refers to some named action to perform (e.g. a javascript, see :ref:`AdobeManual`). Standard actions provided are *NextPage*, *PrevPage*, *FirstPage*, and *LastPage*.
74 |
75 | :type: str
76 |
77 | .. attribute:: newWindow
78 |
79 | If true, the destination should be launched in a new window.
80 |
81 | :type: bool
82 |
83 | .. attribute:: page
84 |
85 | The page number (in this or the target document) this destination points to. Only set if :attr:`linkDest.kind` is :data:`LINK_GOTOR` or :data:`LINK_GOTO`. May be *-1* if :attr:`linkDest.kind` is :data:`LINK_GOTOR`. In this case :attr:`linkDest.dest` contains the **name** of a destination in the target document.
86 |
87 | :type: int
88 |
89 | .. attribute:: rb
90 |
91 | The bottom right :ref:`Point` of this destination.
92 |
93 | :type: :ref:`Point`
94 |
95 | .. attribute:: uri
96 |
97 | The name of the URI this destination points to.
98 |
99 | :type: str
100 |
--------------------------------------------------------------------------------
/docs/lowlevel.rst:
--------------------------------------------------------------------------------
1 | =================================
2 | Low Level Functions and Classes
3 | =================================
4 | Contains a number of functions and classes for the experienced user. To be used for special needs or performance requirements.
5 |
6 | .. toctree::
7 | :maxdepth: 1
8 |
9 | functions
10 | device
11 | coop_low
12 |
--------------------------------------------------------------------------------
/docs/make-bold.py:
--------------------------------------------------------------------------------
1 | """
2 | Problem: Since MuPDF v1.16 a 'Freetext' annotation font is restricted to the
3 | "normal" versions (no bold, no italics) of Times-Roman, Helvetica, Courier.
4 | It is impossible to use PyMuPDF to modify this.
5 |
6 | Solution: Using Adobe's JavaScript API, it is possible to manipulate properties
7 | of Freetext annotations. Check out these references:
8 | https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/js_api_reference.pdf,
9 | or https://www.adobe.com/devnet/acrobat/documentation.html.
10 |
11 | Function 'this.getAnnots()' will return all annotations as an array. We loop
12 | over this array to set the properties of the text through the 'richContents'
13 | attribute.
14 | There is no explicit property to set text to bold, but it is possible to set
15 | fontWeight=800 (400 is the normal size) of richContents.
16 | Other attributes, like color, italics, etc. can also be set via richContents.
17 |
18 | If we have 'FreeText' annotations created with PyMuPDF, we can make use of this
19 | JavaScript feature to modify the font - thus circumventing the above restriction.
20 |
21 | Use PyMuPDF v1.16.12 to create a push button that executes a Javascript
22 | containing the desired code. This is what this program does.
23 | Then open the resulting file with Adobe reader (!).
24 | After clicking on the button, all Freetext annotations will be bold, and the
25 | file can be saved.
26 | If desired, the button can be removed again, using free tools like PyMuPDF or
27 | PDF XChange editor.
28 |
29 | Note / Caution:
30 | ---------------
31 | The JavaScript will **only** work if the file is opened with Adobe Acrobat reader!
32 | When using other PDF viewers, the reaction is unforeseeable.
33 | """
34 | import sys
35 |
36 | import fitz
37 |
38 | # this JavaScript will execute when the button is clicked:
39 | jscript = """
40 | var annt = this.getAnnots();
41 | annt.forEach(function (item, index) {
42 | try {
43 | var span = item.richContents;
44 | span.forEach(function (it, dx) {
45 | it.fontWeight = 800;
46 | })
47 | item.richContents = span;
48 | } catch (err) {}
49 | });
50 | app.alert('Done');
51 | """
52 | i_fn = sys.argv[1] # input file name
53 | o_fn = "bold-" + i_fn # output filename
54 | doc = fitz.open(i_fn) # open input
55 | page = doc[0] # get desired page
56 |
57 | # ------------------------------------------------
58 | # make a push button for invoking the JavaScript
59 | # ------------------------------------------------
60 |
61 | widget = fitz.Widget() # create widget
62 |
63 | # make it a 'PushButton'
64 | widget.field_type = fitz.PDF_WIDGET_TYPE_BUTTON
65 | widget.field_flags = fitz.PDF_BTN_FIELD_IS_PUSHBUTTON
66 |
67 | widget.rect = fitz.Rect(5, 5, 20, 20) # button position
68 |
69 | widget.script = jscript # fill in JavaScript source text
70 | widget.field_name = "Make bold" # arbitrary name
71 | widget.field_value = "Off" # arbitrary value
72 | widget.fill_color = (0, 0, 1) # make button visible
73 |
74 | annot = page.addWidget(widget) # add the widget to the page
75 | doc.save(o_fn) # output the file
76 |
77 |
--------------------------------------------------------------------------------
/docs/multiprocess-gui.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on 2019-05-01
3 |
4 | @author: yinkaisheng@live.com
5 | @copyright: 2019 yinkaisheng@live.com
6 | @license: GNU GPL 3.0+
7 |
8 | Demonstrate the use of multiprocessing with PyMuPDF
9 | -----------------------------------------------------
10 | This example shows some more advanced use of multiprocessing.
11 | The main process show a Qt GUI and establishes a 2-way communication with
12 | another process, which accesses a supported document.
13 | """
14 | import os
15 | import sys
16 | import time
17 | import multiprocessing as mp
18 | import queue
19 | import fitz
20 | from PyQt5 import QtCore, QtGui, QtWidgets
21 |
22 | my_timer = time.clock if str is bytes else time.perf_counter
23 |
24 |
25 | class DocForm(QtWidgets.QWidget):
26 | def __init__(self):
27 | super().__init__()
28 | self.process = None
29 | self.queNum = mp.Queue()
30 | self.queDoc = mp.Queue()
31 | self.pageCount = 0
32 | self.curPageNum = 0
33 | self.lastDir = ""
34 | self.timerSend = QtCore.QTimer(self)
35 | self.timerSend.timeout.connect(self.onTimerSendPageNum)
36 | self.timerGet = QtCore.QTimer(self)
37 | self.timerGet.timeout.connect(self.onTimerGetPage)
38 | self.timerWaiting = QtCore.QTimer(self)
39 | self.timerWaiting.timeout.connect(self.onTimerWaiting)
40 | self.initUI()
41 |
42 | def initUI(self):
43 | vbox = QtWidgets.QVBoxLayout()
44 | self.setLayout(vbox)
45 |
46 | hbox = QtWidgets.QHBoxLayout()
47 | self.btnOpen = QtWidgets.QPushButton("OpenDocument", self)
48 | self.btnOpen.clicked.connect(self.openDoc)
49 | hbox.addWidget(self.btnOpen)
50 |
51 | self.btnPlay = QtWidgets.QPushButton("PlayDocument", self)
52 | self.btnPlay.clicked.connect(self.playDoc)
53 | hbox.addWidget(self.btnPlay)
54 |
55 | self.btnStop = QtWidgets.QPushButton("Stop", self)
56 | self.btnStop.clicked.connect(self.stopPlay)
57 | hbox.addWidget(self.btnStop)
58 |
59 | self.label = QtWidgets.QLabel("0/0", self)
60 | self.label.setFont(QtGui.QFont("Verdana", 20))
61 | hbox.addWidget(self.label)
62 |
63 | vbox.addLayout(hbox)
64 |
65 | self.labelImg = QtWidgets.QLabel("Document", self)
66 | sizePolicy = QtWidgets.QSizePolicy(
67 | QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Expanding
68 | )
69 | self.labelImg.setSizePolicy(sizePolicy)
70 | vbox.addWidget(self.labelImg)
71 |
72 | self.setGeometry(100, 100, 400, 600)
73 | self.setWindowTitle("PyMuPDF Document Player")
74 | self.show()
75 |
76 | def openDoc(self):
77 | path, _ = QtWidgets.QFileDialog.getOpenFileName(
78 | self,
79 | "Open Document",
80 | self.lastDir,
81 | "All Supported Files (*.pdf;*.epub;*.xps;*.oxps;*.cbz;*.fb2);;PDF Files (*.pdf);;EPUB Files (*.epub);;XPS Files (*.xps);;OpenXPS Files (*.oxps);;CBZ Files (*.cbz);;FB2 Files (*.fb2)",
82 | options=QtWidgets.QFileDialog.Options(),
83 | )
84 | if path:
85 | self.lastDir, self.file = os.path.split(path)
86 | if self.process:
87 | self.queNum.put(-1) # use -1 to notify the process to exit
88 | self.timerSend.stop()
89 | self.curPageNum = 0
90 | self.pageCount = 0
91 | self.process = mp.Process(
92 | target=openDocInProcess, args=(path, self.queNum, self.queDoc)
93 | )
94 | self.process.start()
95 | self.timerGet.start(40)
96 | self.label.setText("0/0")
97 | self.queNum.put(0)
98 | self.startTime = time.perf_counter()
99 | self.timerWaiting.start(40)
100 |
101 | def playDoc(self):
102 | self.timerSend.start(500)
103 |
104 | def stopPlay(self):
105 | self.timerSend.stop()
106 |
107 | def onTimerSendPageNum(self):
108 | if self.curPageNum < self.pageCount - 1:
109 | self.queNum.put(self.curPageNum + 1)
110 | else:
111 | self.timerSend.stop()
112 |
113 | def onTimerGetPage(self):
114 | try:
115 | ret = self.queDoc.get(False)
116 | if isinstance(ret, int):
117 | self.timerWaiting.stop()
118 | self.pageCount = ret
119 | self.label.setText("{}/{}".format(self.curPageNum + 1, self.pageCount))
120 | else: # tuple, pixmap info
121 | num, samples, width, height, stride, alpha = ret
122 | self.curPageNum = num
123 | self.label.setText("{}/{}".format(self.curPageNum + 1, self.pageCount))
124 | fmt = (
125 | QtGui.QImage.Format_RGBA8888
126 | if alpha
127 | else QtGui.QImage.Format_RGB888
128 | )
129 | qimg = QtGui.QImage(samples, width, height, stride, fmt)
130 | self.labelImg.setPixmap(QtGui.QPixmap.fromImage(qimg))
131 | except queue.Empty as ex:
132 | pass
133 |
134 | def onTimerWaiting(self):
135 | self.labelImg.setText(
136 | 'Loading "{}", {:.2f}s'.format(
137 | self.file, time.perf_counter() - self.startTime
138 | )
139 | )
140 |
141 | def closeEvent(self, event):
142 | self.queNum.put(-1)
143 | event.accept()
144 |
145 |
146 | def openDocInProcess(path, queNum, quePageInfo):
147 | start = my_timer()
148 | doc = fitz.open(path)
149 | end = my_timer()
150 | quePageInfo.put(doc.pageCount)
151 | while True:
152 | num = queNum.get()
153 | if num < 0:
154 | break
155 | page = doc.loadPage(num)
156 | pix = page.getPixmap()
157 | quePageInfo.put(
158 | (num, pix.samples, pix.width, pix.height, pix.stride, pix.alpha)
159 | )
160 | doc.close()
161 | print("process exit")
162 |
163 |
164 | if __name__ == "__main__":
165 | app = QtWidgets.QApplication(sys.argv)
166 | form = DocForm()
167 | sys.exit(app.exec_())
168 |
--------------------------------------------------------------------------------
/docs/multiprocess-render.py:
--------------------------------------------------------------------------------
1 | """
2 | Demonstrate the use of multiprocessing with PyMuPDF.
3 |
4 | Depending on the number of CPUs, the document is divided in page ranges.
5 | Each range is then worked on by one process.
6 | The type of work would typically be text extraction or page rendering. Each
7 | process must know where to put its results, because this processing pattern
8 | does not include inter-process communication or data sharing.
9 |
10 | Compared to sequential processing, speed improvements in range of 100% (ie.
11 | twice as fast) or better can be expected.
12 | """
13 | from __future__ import print_function, division
14 | import sys
15 | import os
16 | import time
17 | from multiprocessing import Pool, cpu_count
18 | import fitz
19 |
20 | # choose a version specific timer function (bytes == str in Python 2)
21 | mytime = time.clock if str is bytes else time.perf_counter
22 |
23 |
24 | def render_page(vector):
25 | """ Render a page range of a document.
26 |
27 | Notes:
28 | The PyMuPDF document cannot be part of the argument, because that
29 | cannot be pickled. So we are being passed in just its filename.
30 | This is no performance issue, because we are a separate process and
31 | need to open the document anyway.
32 | Any page-specific function can be processed here - rendering is just
33 | an example - text extraction might be another.
34 | The work must however be self-contained: no inter-process communication
35 | or synchronization is possible with this design.
36 | Care must also be taken with which parameters are contained in the
37 | argument, because it will be passed in via pickling by the Pool class.
38 | So any large objects will increase the overall duration.
39 | Args:
40 | vector: a list containing required parameters.
41 | """
42 | # recreate the arguments
43 | idx = vector[0] # this is the segment number we have to process
44 | cpu = vector[1] # number of CPUs
45 | filename = vector[2] # document filename
46 | mat = vector[3] # the matrix for rendering
47 | doc = fitz.open(filename) # open the document
48 | num_pages = len(doc) # get number of pages
49 |
50 | # pages per segment: make sure that cpu * seg_size >= num_pages!
51 | seg_size = int(num_pages / cpu + 1)
52 | seg_from = idx * seg_size # our first page number
53 | seg_to = min(seg_from + seg_size, num_pages) # last page number
54 |
55 | for i in range(seg_from, seg_to): # work through our page segment
56 | page = doc[i]
57 | # page.getText("rawdict") # use any page-related type of work here, eg
58 | pix = page.getPixmap(alpha=False, matrix=mat)
59 | # store away the result somewhere ...
60 | # pix.writePNG("p-%i.png" % i)
61 | print("Processed page numbers %i through %i" % (seg_from, seg_to - 1))
62 |
63 |
64 | if __name__ == "__main__":
65 | t0 = mytime() # start a timer
66 | filename = sys.argv[1]
67 | mat = fitz.Matrix(0.2, 0.2) # the rendering matrix: scale down to 20%
68 | cpu = cpu_count()
69 |
70 | # make vectors of arguments for the processes
71 | vectors = [(i, cpu, filename, mat) for i in range(cpu)]
72 | print("Starting %i processes for '%s'." % (cpu, filename))
73 |
74 | pool = Pool() # make pool of 'cpu_count()' processes
75 | pool.map(render_page, vectors, 1) # start processes passing each a vector
76 |
77 | t1 = mytime() # stop the timer
78 | print("Total time %g seconds" % round(t1 - t0, 2))
79 |
80 |
--------------------------------------------------------------------------------
/docs/new-annots.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | -------------------------------------------------------------------------------
4 | Demo script showing how annotations can be added to a PDF using PyMuPDF.
5 |
6 | It contains the following annotation types:
7 | Caret, Text, FreeText, text markers (underline, strike-out, highlight,
8 | squiggle), Circle, Square, Line, PolyLine, Polygon, FileAttachment, Stamp
9 | and Redaction.
10 | There is some effort to vary appearances by adding colors, line ends,
11 | opacity, rotation, dashed lines, etc.
12 |
13 | Dependencies
14 | ------------
15 | PyMuPDF v1.17.0
16 | -------------------------------------------------------------------------------
17 | """
18 | from __future__ import print_function
19 |
20 | import gc
21 | import os
22 | import sys
23 |
24 | import fitz
25 |
26 | print(fitz.__doc__)
27 | if fitz.VersionBind.split(".") < ["1", "17", "0"]:
28 | sys.exit("PyMuPDF v1.17.0+ is needed.")
29 |
30 | gc.set_debug(gc.DEBUG_UNCOLLECTABLE)
31 |
32 | highlight = "this text is highlighted"
33 | underline = "this text is underlined"
34 | strikeout = "this text is striked out"
35 | squiggled = "this text is zigzag-underlined"
36 | red = (1, 0, 0)
37 | blue = (0, 0, 1)
38 | gold = (1, 1, 0)
39 | green = (0, 1, 0)
40 |
41 | displ = fitz.Rect(0, 50, 0, 50)
42 | r = fitz.Rect(72, 72, 220, 100)
43 | t1 = u"têxt üsès Lätiñ charß,\nEUR: €, mu: µ, super scripts: ²³!"
44 |
45 |
46 | def print_descr(annot):
47 | """Print a short description to the right of each annot rect."""
48 | annot.parent.insertText(
49 | annot.rect.br + (10, -5), "%s annotation" % annot.type[1], color=red
50 | )
51 |
52 |
53 | doc = fitz.open()
54 | page = doc.newPage()
55 |
56 | page.setRotation(0)
57 |
58 | annot = page.addCaretAnnot(r.tl)
59 | print_descr(annot)
60 |
61 | r = r + displ
62 | annot = page.addFreetextAnnot(
63 | r,
64 | t1,
65 | fontsize=10,
66 | rotate=90,
67 | text_color=blue,
68 | fill_color=gold,
69 | align=fitz.TEXT_ALIGN_CENTER,
70 | )
71 | annot.setBorder(width=0.3, dashes=[2])
72 | annot.update(text_color=blue, fill_color=gold)
73 |
74 | print_descr(annot)
75 | r = annot.rect + displ
76 |
77 | annot = page.addTextAnnot(r.tl, t1)
78 | print_descr(annot)
79 |
80 | # Adding text marker annotations:
81 | # first insert a unique text, then search for it, then mark it
82 | pos = annot.rect.tl + displ.tl
83 | page.insertText(
84 | pos, # insertion point
85 | highlight, # inserted text
86 | morph=(pos, fitz.Matrix(-5)), # rotate around insertion point
87 | )
88 | rl = page.searchFor(highlight, quads=True) # need a quad b/o tilted text
89 | annot = page.addHighlightAnnot(rl[0])
90 | print_descr(annot)
91 | pos = annot.rect.bl # next insertion point
92 |
93 | page.insertText(pos, underline, morph=(pos, fitz.Matrix(-10)))
94 | rl = page.searchFor(underline, quads=True)
95 | annot = page.addUnderlineAnnot(rl[0])
96 | print_descr(annot)
97 | pos = annot.rect.bl
98 |
99 | page.insertText(pos, strikeout, morph=(pos, fitz.Matrix(-15)))
100 | rl = page.searchFor(strikeout, quads=True)
101 | annot = page.addStrikeoutAnnot(rl[0])
102 | print_descr(annot)
103 | pos = annot.rect.bl
104 |
105 | page.insertText(pos, squiggled, morph=(pos, fitz.Matrix(-20)))
106 | rl = page.searchFor(squiggled, quads=True)
107 | annot = page.addSquigglyAnnot(rl[0])
108 | print_descr(annot)
109 | pos = annot.rect.bl
110 |
111 | r = fitz.Rect(pos, pos.x + 75, pos.y + 35) + (0, 20, 0, 20)
112 | annot = page.addPolylineAnnot([r.bl, r.tr, r.br, r.tl]) # 'Polyline'
113 | annot.setBorder(width=0.3, dashes=[2])
114 | annot.setColors(stroke=blue, fill=green)
115 | annot.setLineEnds(fitz.PDF_ANNOT_LE_CLOSED_ARROW, fitz.PDF_ANNOT_LE_R_CLOSED_ARROW)
116 | annot.update(fill_color=(1, 1, 0))
117 | print_descr(annot)
118 |
119 | r += displ
120 | annot = page.addPolygonAnnot([r.bl, r.tr, r.br, r.tl]) # 'Polygon'
121 | annot.setBorder(width=0.3, dashes=[2])
122 | annot.setColors(stroke=blue, fill=gold)
123 | annot.setLineEnds(fitz.PDF_ANNOT_LE_DIAMOND, fitz.PDF_ANNOT_LE_CIRCLE)
124 | annot.update()
125 | print_descr(annot)
126 |
127 | r += displ
128 | annot = page.addLineAnnot(r.tr, r.bl) # 'Line'
129 | annot.setBorder(width=0.3, dashes=[2])
130 | annot.setColors(stroke=blue, fill=gold)
131 | annot.setLineEnds(fitz.PDF_ANNOT_LE_DIAMOND, fitz.PDF_ANNOT_LE_CIRCLE)
132 | annot.update()
133 | print_descr(annot)
134 |
135 | r += displ
136 | annot = page.addRectAnnot(r) # 'Square'
137 | annot.setBorder(width=1, dashes=[1, 2])
138 | annot.setColors(stroke=blue, fill=gold)
139 | annot.update(opacity=0.5)
140 | print_descr(annot)
141 |
142 | r += displ
143 | annot = page.addCircleAnnot(r) # 'Circle'
144 | annot.setBorder(width=0.3, dashes=[2])
145 | annot.setColors(stroke=blue, fill=gold)
146 | annot.update()
147 | print_descr(annot)
148 |
149 | r += displ
150 | annot = page.addFileAnnot(
151 | r.tl, b"just anything for testing", "testdata.txt" # 'FileAttachment'
152 | )
153 | print_descr(annot) # annot.rect
154 |
155 | r += displ
156 | annot = page.addStampAnnot(r, stamp=10) # 'Stamp'
157 | annot.setColors(stroke=green)
158 | annot.update()
159 | print_descr(annot)
160 |
161 | r += displ + (0, 0, 50, 10)
162 | rc = page.insertTextbox(
163 | r,
164 | "This content will be removed upon applying the redaction.",
165 | color=blue,
166 | align=fitz.TEXT_ALIGN_CENTER,
167 | )
168 | annot = page.addRedactAnnot(r)
169 | print_descr(annot)
170 |
171 | outfile = os.path.abspath(__file__).replace(".py", "-%i.pdf" % page.rotation)
172 | doc.save(outfile, deflate=True)
173 |
--------------------------------------------------------------------------------
/docs/outline.rst:
--------------------------------------------------------------------------------
1 | .. _Outline:
2 |
3 | ================
4 | Outline
5 | ================
6 |
7 | *outline* (or "bookmark"), is a property of *Document*. If not *None*, it stands for the first outline item of the document. Its properties in turn define the characteristics of this item and also point to other outline items in "horizontal" or downward direction. The full tree of all outline items for e.g. a conventional table of contents (TOC) can be recovered by following these "pointers".
8 |
9 | ============================ ==================================================
10 | **Method / Attribute** **Short Description**
11 | ============================ ==================================================
12 | :attr:`Outline.down` next item downwards
13 | :attr:`Outline.next` next item same level
14 | :attr:`Outline.page` page number (0-based)
15 | :attr:`Outline.title` title
16 | :attr:`Outline.uri` string further specifying the outline target
17 | :attr:`Outline.isExternal` target is outside this document
18 | :attr:`Outline.is_open` whether sub-outlines are open or collapsed
19 | :attr:`Outline.isOpen` whether sub-outlines are open or collapsed
20 | :attr:`Outline.dest` points to link destination details
21 | ============================ ==================================================
22 |
23 | **Class API**
24 |
25 | .. class:: Outline
26 |
27 | .. attribute:: down
28 |
29 | The next outline item on the next level down. Is *None* if the item has no kids.
30 |
31 | :type: :ref:`Outline`
32 |
33 | .. attribute:: next
34 |
35 | The next outline item at the same level as this item. Is *None* if this is the last one in its level.
36 |
37 | :type: `Outline`
38 |
39 | .. attribute:: page
40 |
41 | The page number (0-based) this bookmark points to.
42 |
43 | :type: int
44 |
45 | .. attribute:: title
46 |
47 | The item's title as a string or *None*.
48 |
49 | :type: str
50 |
51 | .. attribute:: is_open
52 |
53 | Or *isOpen* -- an indicator showing whether any sub-outlines should be expanded (*True*) or be collapsed (*False*). This information should be interpreted by PDF display software accordingly.
54 |
55 | :type: bool
56 |
57 | .. attribute:: isExternal
58 |
59 | A bool specifying whether the target is outside (*True*) of the current document.
60 |
61 | :type: bool
62 |
63 | .. attribute:: uri
64 |
65 | A string specifying the link target. The meaning of this property should be evaluated in conjunction with *isExternal*. The value may be *None*, in which case *isExternal == False*. If *uri* starts with *file://*, *mailto:*, or an internet resource name, *isExternal* is *True*. In all other cases *isExternal == False* and *uri* points to an internal location. In case of PDF documents, this should either be *#nnnn* to indicate a 1-based (!) page number *nnnn*, or a named location. The format varies for other document types, e.g. *uri = '../FixedDoc.fdoc#PG_21_LNK_84'* for page number 21 (1-based) in an XPS document.
66 |
67 | :type: str
68 |
69 | .. attribute:: dest
70 |
71 | The link destination details object.
72 |
73 | :type: :ref:`linkDest`
74 |
--------------------------------------------------------------------------------
/docs/point.rst:
--------------------------------------------------------------------------------
1 | .. _Point:
2 |
3 | ================
4 | Point
5 | ================
6 |
7 | *Point* represents a point in the plane, defined by its x and y coordinates.
8 |
9 | ============================ ============================================
10 | **Attribute / Method** **Description**
11 | ============================ ============================================
12 | :meth:`Point.distance_to` calculate distance to point or rect
13 | :meth:`Point.norm` the Euclidean norm
14 | :meth:`Point.transform` transform point with a matrix
15 | :attr:`Point.abs_unit` same as unit, but positive coordinates
16 | :attr:`Point.unit` point coordinates divided by *abs(point)*
17 | :attr:`Point.x` the X-coordinate
18 | :attr:`Point.y` the Y-coordinate
19 | ============================ ============================================
20 |
21 | **Class API**
22 |
23 | .. class:: Point
24 |
25 | .. method:: __init__(self)
26 |
27 | .. method:: __init__(self, x, y)
28 |
29 | .. method:: __init__(self, point)
30 |
31 | .. method:: __init__(self, sequence)
32 |
33 | Overloaded constructors.
34 |
35 | Without parameters, *Point(0, 0)* will be created.
36 |
37 | With another point specified, a **new copy** will be crated, "sequence" is a Python sequence of 2 numbers (see :ref:`SequenceTypes`).
38 |
39 | :arg float x: x coordinate of the point
40 |
41 | :arg float y: y coordinate of the point
42 |
43 | .. method:: distance_to(x [, unit])
44 |
45 | Calculate the distance to *x*, which may be :data:`point_like` or :data:`rect_like`. The distance is given in units of either pixels (default), inches, centimeters or millimeters.
46 |
47 | :arg point_like,rect_like x: to which to compute the distance.
48 |
49 | :arg str unit: the unit to be measured in. One of "px", "in", "cm", "mm".
50 |
51 | :rtype: float
52 | :returns: the distance to *x*. If this is :data:`rect_like`, then the distance
53 |
54 | * is the length of the shortest line connecting to one of the rectangle sides
55 | * is calculated to the **finite version** of it
56 | * is zero if it **contains** the point
57 |
58 | .. method:: norm()
59 |
60 | *(New in version 1.16.0)*
61 |
62 | Return the Euclidean norm (the length) of the point as a vector. Equals result of function *abs()*.
63 |
64 | .. method:: transform(m)
65 |
66 | Apply a matrix to the point and replace it with the result.
67 |
68 | :arg matrix_like m: The matrix to be applied.
69 |
70 | :rtype: :ref:`Point`
71 |
72 | .. attribute:: unit
73 |
74 | Result of dividing each coordinate by *norm(point)*, the distance of the point to (0,0). This is a vector of length 1 pointing in the same direction as the point does. Its x, resp. y values are equal to the cosine, resp. sine of the angle this vector (and the point itself) has with the x axis.
75 |
76 | .. image:: images/img-point-unit.jpg
77 |
78 | :type: :ref:`Point`
79 |
80 | .. attribute:: abs_unit
81 |
82 | Same as :attr:`unit` above, replacing the coordinates with their absolute values.
83 |
84 | :type: :ref:`Point`
85 |
86 | .. attribute:: x
87 |
88 | The x coordinate
89 |
90 | :type: float
91 |
92 | .. attribute:: y
93 |
94 | The y coordinate
95 |
96 | :type: float
97 |
98 | .. note::
99 |
100 | * This class adheres to the Python sequence protocol, so components can be accessed via their index, too. Also refer to :ref:`SequenceTypes`.
101 | * Rectangles can be used with arithmetic operators -- see chapter :ref:`Algebra`.
102 |
103 |
--------------------------------------------------------------------------------
/docs/pymupdf-logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/docs/pymupdf-logo.jpg
--------------------------------------------------------------------------------
/docs/quad.rst:
--------------------------------------------------------------------------------
1 | .. _Quad:
2 |
3 | ==========
4 | Quad
5 | ==========
6 |
7 | Represents a four-sided mathematical shape (also called "quadrilateral" or "tetragon") in the plane, defined as a sequence of four :ref:`Point` objects ul, ur, ll, lr (conveniently called upper left, upper right, lower left, lower right).
8 |
9 | Quads can **be obtained** as results of text search methods (:meth:`Page.searchFor`), and they **are used** to define text marker annotations (see e.g. :meth:`Page.addSquigglyAnnot` and friends), and in several draw methods (like :meth:`Page.drawQuad` / :meth:`Shape.drawQuad`, :meth:`Page.drawOval`/ :meth`Shape.drawQuad`).
10 |
11 | .. note::
12 |
13 | * If the corners of a rectangle are transformed with a **rotation**, **scale** or **translation** :ref:`Matrix`, then the resulting quad is **rectangular**, i.e. its corners again enclose angles of 90 degrees. Property :attr:`Quad.isRectangular` checks whether a quad can be thought of being the result of such an operation. This is not true for all matrices: e.g. shear matrices produce parallelograms, and non-invertible matrices deliver "degenerate" tetragons like triangles or lines.
14 |
15 | * Attribute :attr:`Quad.rect` obtains the envelopping rectangle. Vice versa, rectangles now have attributes :attr:`Rect.quad`, resp. :attr:`IRect.quad` to obtain their respective tetragon versions.
16 |
17 |
18 | ============================= =======================================================
19 | **Methods / Attributes** **Short Description**
20 | ============================= =======================================================
21 | :meth:`Quad.transform` transform with a matrix
22 | :meth:`Quad.morph` transform with a point and matrix
23 | :attr:`Quad.ul` upper left point
24 | :attr:`Quad.ur` upper right point
25 | :attr:`Quad.ll` lower left point
26 | :attr:`Quad.lr` lower right point
27 | :attr:`Quad.isConvex` true if quad is a convex set
28 | :attr:`Quad.isEmpty` true if quad is an empty set
29 | :attr:`Quad.isRectangular` true if quad is a (rotated) rectangle
30 | :attr:`Quad.rect` smallest containing :ref:`Rect`
31 | :attr:`Quad.width` the longest width value
32 | :attr:`Quad.height` the longest height value
33 | ============================= =======================================================
34 |
35 | **Class API**
36 |
37 | .. class:: Quad
38 |
39 | .. method:: __init__(self)
40 |
41 | .. method:: __init__(self, ul, ur, ll, lr)
42 |
43 | .. method:: __init__(self, quad)
44 |
45 | .. method:: __init__(self, sequence)
46 |
47 | Overloaded constructors: "ul", "ur", "ll", "lr" stand for :data:`point_like` objects (the four corners), "sequence" is a Python sequence with four :data:`point_like` objects.
48 |
49 | If "quad" is specified, the constructor creates a **new copy** of it.
50 |
51 | Without parameters, a quad consisting of 4 copies of *Point(0, 0)* is created.
52 |
53 |
54 | .. method:: transform(matrix)
55 |
56 | Modify the quadrilateral by transforming each of its corners with a matrix.
57 |
58 | :arg matrix_like matrix: the matrix.
59 |
60 | .. method:: morph(fixpoint, matrix)
61 |
62 | *(New in version 1.17.0)* "Morph" the quad with a matrix-like using a point-like as fixed point.
63 |
64 | :arg point_like fixpoint: the point.
65 | :arg matrix_like matrix: the matrix.
66 | :returns: a new quad. The effect is achieved by using the following code::
67 |
68 | >>> T = fitz.Matrix(1, 1).preTranslate(fixpoint.x, fixpoint.y)
69 | >>> result = self * ~T * matrix * T
70 |
71 | So the quad is translated such, that fixpoint becomes the origin (0, 0), then the matrix is applied to it, and finally a reverse translation is done.
72 |
73 | Typical uses include rotating the quad around a desired point.
74 |
75 | .. attribute:: rect
76 |
77 | The smallest rectangle containing the quad, represented by the blue area in the following picture.
78 |
79 | .. image:: images/img-quads.jpg
80 |
81 | :type: :ref:`Rect`
82 |
83 | .. attribute:: ul
84 |
85 | Upper left point.
86 |
87 | :type: :ref:`Point`
88 |
89 | .. attribute:: ur
90 |
91 | Upper right point.
92 |
93 | :type: :ref:`Point`
94 |
95 | .. attribute:: ll
96 |
97 | Lower left point.
98 |
99 | :type: :ref:`Point`
100 |
101 | .. attribute:: lr
102 |
103 | Lower right point.
104 |
105 | :type: :ref:`Point`
106 |
107 | .. attribute:: isConvex
108 |
109 | *(New in version 1.16.1)*
110 |
111 | True if every line connecting two points of the quad is inside the quad. We in addition also make sure here, that the quad is not "degenerate", i.e. not all corners are on the same line (which would still qualify as convexity in the mathematical sense).
112 |
113 | :type: bool
114 |
115 | .. attribute:: isEmpty
116 |
117 | True if enclosed area is zero, which means that at least three of the four corners are on the same line. If this is false, the quad may still be degenerate or not look like a tetragon at all (triangles, parallelograms, trapezoids, ...).
118 |
119 | :type: bool
120 |
121 | .. attribute:: isRectangular
122 |
123 | True if all corner angles are 90 degrees. This implies that the quad is **convex and not empty**.
124 |
125 | :type: bool
126 |
127 | .. attribute:: width
128 |
129 | The maximum length of the top and the bottom side.
130 |
131 | :type: float
132 |
133 | .. attribute:: height
134 |
135 | The maximum length of the left and the right side.
136 |
137 | :type: float
138 |
139 | Remark
140 | ------
141 | This class adheres to the sequence protocol, so components can be dealt with via their indices, too. Also refer to :ref:`SequenceTypes`.
142 |
143 | We are still in process to extend algebraic operations to quads. Multiplication and division with / by numbers and matrices are already defined. Addition, subtraction and any unary operations may follow when we see an actual need.
144 |
--------------------------------------------------------------------------------
/docs/replace-fonts.py:
--------------------------------------------------------------------------------
1 | """
2 | Demo / Experimental: Replace the fonts in a PDF.
3 |
4 | """
5 | import fitz
6 | import sys
7 |
8 | fname = sys.argv[1]
9 |
10 | doc = fitz.open(fname) # input PDF
11 | out = fitz.open() # output PDF
12 | csv = open("fonts.csv").read().splitlines()
13 | all_fonts = [] # will contain: (old basefont name, Base14 name)
14 | for f in csv:
15 | all_fonts.append(f.split(";"))
16 |
17 |
18 | def pdf_color(srgb):
19 | """Create a PDF color triple from a given sRGB color integer.
20 | """
21 | b = (srgb % 256) / 255
22 | srgb /= 256
23 | g = (srgb % 256) / 255
24 | srgb /= 256
25 | r = srgb / 255
26 | return (r, g, b)
27 |
28 |
29 | def get_font(fontname):
30 | """Lookup base fontname and return one of the "reserved" Base14 fontnames.
31 | """
32 | for f in all_fonts:
33 | if f[0] in fontname: # fontname may look like "ABCDEF+fontname..."
34 | return f[1]
35 | return "helv" # default: Helvetica
36 |
37 |
38 | for page in doc:
39 | if page.number % 10 == 0: # just entertainment messages every 10 pages
40 | print("Processed %i pages" % page.number)
41 | if not page._isWrapped: # check if input page geometry is dubious
42 | page._wrapContents()
43 | # for each input page create an output with same dimensions
44 | outpage = out.newPage(width=page.rect.width, height=page.rect.height)
45 |
46 | # create a shape to write the output text to.
47 | shape = outpage.newShape()
48 | text_blocks = []
49 | image_blocks = []
50 | for block in page.getText("dict")["blocks"]:
51 | if block["type"] == 0:
52 | text_blocks.append(block)
53 | else:
54 | image_blocks.append(block)
55 |
56 | # insert the images first, so any text appears in foreground
57 | for block in image_blocks:
58 | outpage.insertImage(block["bbox"], stream=block["image"])
59 | print("Inserted an image on page", page.number)
60 |
61 | for block in text_blocks: # read text blocks
62 | shape.drawRect(block["bbox"]) # draw all text on white background,
63 | # because images may cover same area
64 |
65 | for line in block["lines"]: # for each line in the block ...
66 | for span in line["spans"]: # for each span in the line ...
67 | fontname = get_font(span["font"]) # get replacing fontname
68 | fontsize = span["size"]
69 | text = span["text"]
70 | bbox = fitz.Rect(span["bbox"]) # text rectangle on input
71 | text_size = fitz.getTextlength( # measure text length on output
72 | text, fontname=fontname, fontsize=fontsize
73 | )
74 |
75 | # adjust fontsize if text is too long with new the font
76 | if text_size > bbox.width:
77 | fontsize *= bbox.width / text_size
78 | try:
79 | shape.insertText( # copy text to output page
80 | bbox.bl, # insertion point on output page
81 | text, # the text to insert
82 | fontsize=fontsize, # fontsize
83 | # decide on output font here: the place for sophistication!
84 | fontname=fontname,
85 | color=pdf_color(span["color"]),
86 | )
87 | except ValueError:
88 | print("Method 'insertText' failed:")
89 | print(
90 | "page:",
91 | page.number,
92 | "at",
93 | span["bbox"][:2],
94 | "text:",
95 | span["text"],
96 | )
97 | shape.finish(color=None, fill=(1, 1, 1)) # white for the text background
98 | shape.commit() # write everything to the output page
99 |
100 | """
101 | Several other features can be added, like:
102 | - copy over the input metadata dictionary
103 | - copy over the input table of contents
104 | """
105 | out.save("new-" + fname, deflate=True, garbage=4)
106 |
--------------------------------------------------------------------------------
/docs/text-lister.py:
--------------------------------------------------------------------------------
1 | import fitz
2 |
3 |
4 | def flags_decomposer(flags):
5 | """Make font flags human readable."""
6 | l = []
7 | if flags & 2 ** 0:
8 | l.append("superscript")
9 | if flags & 2 ** 1:
10 | l.append("italic")
11 | if flags & 2 ** 2:
12 | l.append("serifed")
13 | else:
14 | l.append("sans")
15 | if flags & 2 ** 3:
16 | l.append("monospaced")
17 | else:
18 | l.append("proportional")
19 | if flags & 2 ** 4:
20 | l.append("bold")
21 | return ", ".join(l)
22 |
23 |
24 | doc = fitz.open("text-tester.pdf")
25 | page = doc[0]
26 |
27 | # read page text as a dictionary, suppressing extra spaces in CJK fonts
28 | blocks = page.getText("dict", flags=11)["blocks"]
29 | for b in blocks: # iterate through the text blocks
30 | for l in b["lines"]: # iterate through the text lines
31 | for s in l["spans"]: # iterate through the text spans
32 | print("")
33 | font_properties = "Font: '%s' (%s), size %g, color #%06x" % (
34 | s["font"], # font name
35 | flags_decomposer(s["flags"]), # readable font flags
36 | s["size"], # font size
37 | s["color"], # font color
38 | )
39 | print("Text: '%s'" % s["text"]) # simple print of text
40 | print(font_properties)
41 |
--------------------------------------------------------------------------------
/docs/textwriter.rst:
--------------------------------------------------------------------------------
1 | .. _TextWriter:
2 |
3 | ================
4 | TextWriter
5 | ================
6 |
7 | *(New in v1.16.18)* This class represents a MuPDF *text* object. It can be thought of as a collection of text *"spans"*. Each span has its own starting position, font and font size. It is an elegant alternative for writing text to PDF pages, when compared with methods :meth:`Page.insertText` and friends:
8 |
9 | * **Improved text positioning:** Choose any point where insertion of a text span should start. Storing a text span returns the coordinates of the *last character* of the span.
10 | * **Free font choice:** Each text span has its own font and fontsize. This lets you easily switch between font and font characteristics when composing a larger text.
11 | * **Automatic fallback fonts:** If a character is not represented by the chosen font, alternative fonts are automatically searched. This significantly reduces the risk of seeing unprintable symbols in the output ("TOFUs"). PyMuPDF now also comes with the **universal font "Droid Sans Fallback Regular"**, which supports **all Latin** characters (incuding Cyrillic and Greek), and **all CJK** characters (Chinese, Japanese, Korean).
12 | * **Cyrillic and Greek Support:** The :ref:`Base-14-fonts` have integrated support of Cyrillic and Greek characters **without specifying encoding.** If your text is a mixture of Latin, Greek and Cyrillic, it will be shown correctly if you just use e.g. font "Helvetica".
13 | * **Transparency support:** Parameter *opacity* is supported. This offers a handy way to create watermark-style text.
14 | * **Justified text:** Supported for any font -- not just simple fonts as in :meth:`Page.insertText`.
15 | * **Reusability:** A TextWriter object exists independent from any page. It can be written multiple times, either to the same or to other pages, in the same or in different PDFs, choosing different colors or transparency.
16 |
17 | Using this object entails three steps:
18 |
19 | 1. When **created**, a TextWriter requires a fixed **page rectangle** in relation to which it calculates text span positions. Text can be written to a page if and only if its size equals that of the TextWriter.
20 | 2. Store text in the TextWriter using methods :meth:`TextWriter.append` and :meth:`TextWriter.fillTextbox` as often as desired.
21 | 3. Output the TextWriter object on some PDF page with a compatible size.
22 |
23 | .. note:: Starting with version 1.17.0, TextWriters **do support** text rotation via the *morph* parameter of :meth:`TextWriter.writeText`.
24 |
25 | There also exists :meth:`Page.writeText` which lets you combine one or more TextWriters and jointly write them to a given rectangle and with a given rotation angle -- much like :meth:`Page.showPDFpage`.
26 |
27 | **Class API**
28 |
29 | .. class:: TextWriter
30 |
31 | .. method:: __init__(self, rect, opacity=1, color=None)
32 |
33 | :arg rect-like rect: rectangle internally used for text positioning computations.
34 | :arg float opacity: sets the transparency for the text to store here. Values outside the interval ``[0, 1)`` will be ignored. A value of e.g. 0.5 means 50% transparency.
35 | :arg float,sequ color: the color of the text. All colors are specified as floats *0 <= color <= 1*. A single float represents some gray level, a sequence implies the colorspace via its length.
36 |
37 |
38 | .. method:: append(pos, text, font=None, fontsize=11, language=None)
39 |
40 | Add new text, usually (but not necessarily) representing a text span.
41 |
42 | :arg point_like pos: start position of the text, the bottom left point of the first character.
43 | :arg str text: a string (Python 2: unicode is mandatory!) of arbitrary length. It will be written starting at position "pos".
44 | :arg font: a :ref:`Font`. If omitted, ``fitz.Font("helv")`` will be used.
45 | :arg float fontsize: the fontsize, a positive number, default 11.
46 | :arg str language: the language to use, e.g. "en" for English. Meaningful values should be compliant with the ISO 639 standards 1, 2, 3 or 5. Reserved for future use: currently has no effect as far as we know.
47 |
48 | :returns: :attr:`textRect` and :attr:`lastPoint`.
49 |
50 | .. method:: fillTextbox(rect, text, pos=None, font=None, fontsize=11, align=0, warn=True)
51 |
52 | Fill a given rectangle with text. This is a convenience method to use as an alternative to :meth:`append`.
53 |
54 | :arg rect_like rect: the area to fill. No part of the text will appear outside of this.
55 | :arg str,sequ text: the text. Can be specified as a (UTF-8) string or a list / tuple of strings. A string will first be converted to a list using *splitlines()*. Every list item will begin on a new line (forced line breaks).
56 | :arg point_like pos: *(new in v1.17.3)* start storing at this point. Default is a point near rectangle top-left.
57 | :arg font: the :ref:`Font`, default `fitz.Font("helv")`.
58 | :arg float fontsize: the fontsize.
59 | :arg int align: text alignment. Use one of TEXT_ALIGN_LEFT, TEXT_ALIGN_CENTER, TEXT_ALIGN_RIGHT or TEXT_ALIGN_JUSTIFY.
60 | :arg bool warn: warn on text overflow (default), or raise an exception. In any case, text not fitting will not be written.
61 |
62 | .. note:: Use these methods as often as is required -- there is no technical limit (except memory constraints of your system). You can also mix appends and text boxes and have multiple of both. Text positioning is controlled by the insertion point. There is no need to adhere to any order.
63 |
64 |
65 | .. method:: writeText(page, opacity=None, color=None, morph=None, overlay=True)
66 |
67 | Write the TextWriter text to a page.
68 |
69 | :arg page: write to this :ref:`Page`.
70 | :arg float opacity: override the value of the TextWriter for this output.
71 | :arg sequ color: override the value of the TextWriter for this output.
72 | :arg sequ morph: modify the text appearance by applying a matrix to it. If provided, this must be a sequence *(fixpoint, matrix)* with a point-like *fixpoint* and a matrix-like *matrix*. A typical example is rotating the text around *fixpoint*.
73 | :arg bool overlay: put in foreground (default) or background.
74 |
75 |
76 | .. attribute:: textRect
77 |
78 | The :ref:`Rect` currently occupied. This value changes when more text is added.
79 |
80 | .. attribute:: lastPoint
81 |
82 | The "cursor position" -- a :ref:`Point` -- after the last written character (its bottom-right).
83 |
84 | .. attribute:: opacity
85 |
86 | The text opacity (modifyable).
87 |
88 | .. attribute:: color
89 |
90 | The text color (modifyable).
91 |
92 | .. attribute:: rect
93 |
94 | The page rectangle for which this TextWriter was created. Must not be modified.
95 |
96 |
97 | To see some demo scripts dealing with TextWriter, have a look at `this `_ repository.
98 |
99 |
100 | .. note::
101 |
102 | 1. Opacity and color apply to **all the text** in this object.
103 | 2. If you need different colors / transpareny, you must create a separate TextWriter. Whenever you determine the color should change, simply append the text to the respective TextWriter using the previously returned :attr:`lastPoint` as position for the new text span.
104 | 3. Appending items or text boxes can occur in arbitrary order: only the position parameter controls where text appears.
105 | 4. Font and fontsize can freely vary within the same TextWriter. This can be used to let text with different properties appear on the same displayed line: just specify *pos* accordingly, and e.g. set it to :attr:`lastPoint` of the previously added item.
106 | 5. You can use the *pos* argument of :meth:`TextWriter.fillTextbox` to indent the first line, so its text may continue any preceeding one in a continuous manner.
107 |
--------------------------------------------------------------------------------
/docs/version.rst:
--------------------------------------------------------------------------------
1 | Covered Version
2 | --------------------
3 |
4 | This documentation covers PyMuPDF v1.17.4 features as of **2020-07-20 18:09:40**.
5 |
6 | .. note:: The major and minor versions of **PyMuPDF** and **MuPDF** will always be the same. Only the third qualifier (patch level) may deviate from that of MuPDF.
--------------------------------------------------------------------------------
/docs/wheelnames.txt:
--------------------------------------------------------------------------------
1 | PyMuPDF-x.xx.xx-cp27-cp27m-macosx_10_9_x86_64.whl
2 | PyMuPDF-x.xx.xx-cp27-cp27m-manylinux2010_x86_64.whl
3 | PyMuPDF-x.xx.xx-cp27-cp27m-win32.whl
4 | PyMuPDF-x.xx.xx-cp27-cp27m-win_amd64.whl
5 | PyMuPDF-x.xx.xx-cp27-cp27mu-manylinux2010_x86_64.whl
6 | PyMuPDF-x.xx.xx-cp35-cp35m-macosx_10_9_x86_64.whl
7 | PyMuPDF-x.xx.xx-cp35-cp35m-manylinux2010_x86_64.whl
8 | PyMuPDF-x.xx.xx-cp35-cp35m-win32.whl
9 | PyMuPDF-x.xx.xx-cp35-cp35m-win_amd64.whl
10 | PyMuPDF-x.xx.xx-cp36-cp36m-macosx_10_9_x86_64.whl
11 | PyMuPDF-x.xx.xx-cp36-cp36m-manylinux2010_x86_64.whl
12 | PyMuPDF-x.xx.xx-cp36-cp36m-win32.whl
13 | PyMuPDF-x.xx.xx-cp36-cp36m-win_amd64.whl
14 | PyMuPDF-x.xx.xx-cp37-cp37m-macosx_10_9_x86_64.whl
15 | PyMuPDF-x.xx.xx-cp37-cp37m-manylinux2010_x86_64.whl
16 | PyMuPDF-x.xx.xx-cp37-cp37m-win32.whl
17 | PyMuPDF-x.xx.xx-cp37-cp37m-win_amd64.whl
18 | PyMuPDF-x.xx.xx-cp38-cp38-macosx_10_9_x86_64.whl
19 | PyMuPDF-x.xx.xx-cp38-cp38-manylinux2010_x86_64.whl
20 | PyMuPDF-x.xx.xx-cp38-cp38-win32.whl
21 | PyMuPDF-x.xx.xx-cp38-cp38-win_amd64.whl
22 |
--------------------------------------------------------------------------------
/docs/widget.rst:
--------------------------------------------------------------------------------
1 | .. _Widget:
2 |
3 | ================
4 | Widget
5 | ================
6 |
7 | This class represents a PDF Form field, also called "widget". Fields are a special case of annotations, which allow users with limited permissions to enter information in a PDF. This is primarily used for filling out forms.
8 |
9 | Like annotations, widgets live on PDF pages. Similar to annotations, the first widget on a page is accessible via :attr:`Page.firstWidget` and subsequent widgets can be accessed via the :attr:`Widget.next` property.
10 |
11 | *(Changed in version 1.16.0)* MuPDF no longer treats widgets as a subset of general annotations. Consequently, :attr:`Page.firstAnnot` and :meth:`Annot.next` will deliver non-widget annotations exclusively, and be *None* if only form fields exist on a page. Vice versa, :attr:`Page.firstWidget` and :meth:`Widget.next` will only show widgets. This design decision is purely internal to MuPDF; technically, links, annotations and fields have a lot in common and also continue to share the better part of their code within (Py-) MuPDF.
12 |
13 |
14 | **Class API**
15 |
16 | .. class:: Widget
17 |
18 | .. method:: update
19 |
20 | After any changes to a widget, this method **must be used** to store them in the PDF [#f1]_.
21 |
22 | .. method:: reset
23 |
24 | Reset the field's value to its default -- if defined -- or remove it. Do not forget to issue :meth:`update` afterwards.
25 |
26 | .. attribute:: next
27 |
28 | Point to the next form field on the page.
29 |
30 | .. attribute:: border_color
31 |
32 | A list of up to 4 floats defining the field's border. Default value is *None* which causes border style and border width to be ignored.
33 |
34 | .. attribute:: border_style
35 |
36 | A string defining the line style of the field's border. See :attr:`Annot.border`. Default is "s" ("Solid") -- a continuous line. Only the first character (upper or lower case) will be regarded when creating a widget.
37 |
38 | .. attribute:: border_width
39 |
40 | A float defining the width of the border line. Default is 1.
41 |
42 | .. attribute:: border_dashes
43 |
44 | A list/tuple of integers defining the dash properties of the border line. This is only meaningful if *border_style == "D"* and :attr:`border_color` is provided.
45 |
46 | .. attribute:: choice_values
47 |
48 | Python sequence of strings defining the valid choices of list boxes and combo boxes. For these widgets, this property is mandatory and must contain at least two items. Ignored for other types.
49 |
50 | .. attribute:: field_name
51 |
52 | A mandatory string defining the field's name. No checking for duplicates takes place.
53 |
54 | .. attribute:: field_label
55 |
56 | An optional string containing an "alternate" field name. Typically used for any notes, help on field usage, etc. Default is the field name.
57 |
58 | .. attribute:: field_value
59 |
60 | The value of the field.
61 |
62 | .. attribute:: field_flags
63 |
64 | An integer defining a large amount of proprties of a field. Handle this attribute with care.
65 |
66 | .. attribute:: field_type
67 |
68 | A mandatory integer defining the field type. This is a value in the range of 0 to 6. It cannot be changed when updating the widget.
69 |
70 | .. attribute:: field_type_string
71 |
72 | A string describing (and derived from) the field type.
73 |
74 | .. attribute:: fill_color
75 |
76 | A list of up to 4 floats defining the field's background color.
77 |
78 | .. attribute:: button_caption
79 |
80 | The caption string of a button-type field.
81 |
82 | .. attribute:: is_signed
83 |
84 | A bool indicating the status of a signature field, else *None*.
85 |
86 | .. attribute:: rect
87 |
88 | The rectangle containing the field.
89 |
90 | .. attribute:: text_color
91 |
92 | A list of **1, 3 or 4 floats** defining the text color. Default value is black (`[0, 0, 0]`).
93 |
94 | .. attribute:: text_font
95 |
96 | A string defining the font to be used. Default and replacement for invalid values is *"Helv"*. For valid font reference names see the table below.
97 |
98 | .. attribute:: text_fontsize
99 |
100 | A float defining the text fontsize. Default value is zero, which causes PDF viewer software to dynamically choose a size suitable for the annotation's rectangle and text amount.
101 |
102 | .. attribute:: text_maxlen
103 |
104 | An integer defining the maximum number of text characters. PDF viewers will (should) not accept a longer text.
105 |
106 | .. attribute:: text_type
107 |
108 | An integer defining acceptable text types (e.g. numeric, date, time, etc.). For reference only for the time being -- will be ignored when creating or updating widgets.
109 |
110 | .. attribute:: xref
111 |
112 | The PDF :data:`xref` of the widget.
113 |
114 | .. attribute:: script
115 |
116 | *(New in version 1.16.12)* JavaScript text (unicode) for an action associated with the widget, or *None*. This is the only script action supported for **button type** widgets.
117 |
118 | .. attribute:: script_stroke
119 |
120 | *(New in version 1.16.12)* JavaScript text (unicode) to be performed when the user types a key-stroke into a text field or combo box or modifies the selection in a scrollable list box. This action can check the keystroke for validity and reject or modify it. *None* if not present.
121 |
122 | .. attribute:: script_format
123 |
124 | *(New in version 1.16.12)* JavaScript text (unicode) to be performed before the field is formatted to display its current value. This action can modify the field’s value before formatting. *None* if not present.
125 |
126 | .. attribute:: script_change
127 |
128 | *(New in version 1.16.12)* JavaScript text (unicode) to be performed when the field’s value is changed. This action can check the new value for validity. *None* if not present.
129 |
130 | .. attribute:: script_calc
131 |
132 | *(New in version 1.16.12)* JavaScript text (unicode) to be performed to recalculate the value of this field when that of another field changes. *None* if not present.
133 |
134 | .. note::
135 | 1. For **adding** or **changing** one of the above scripts, just put the appropriate JavaScript source code in the widget attribute. To **remove** a script, set the respective attribute to *None*.
136 | 2. Button fields only support :attr:`script`. Other script entries will automatically be set to *None*.
137 |
138 |
139 | Standard Fonts for Widgets
140 | ----------------------------------
141 | Widgets use their own resources object */DR*. A widget resources object must at least contain a */Font* object. Widget fonts are independent from page fonts. We currently support the 14 PDF base fonts using the following fixed reference names, or any name of an already existing field font. When specifying a text font for new or changed widgets, **either** choose one in the first table column (upper and lower case supported), **or** one of the already existing form fonts. In the latter case, spelling must exactly match.
142 |
143 | To find out already existing field fonts, inspect the list :attr:`Document.FormFonts`.
144 |
145 | ============= =======================
146 | **Reference** **Base14 Fontname**
147 | ============= =======================
148 | CoBI Courier-BoldOblique
149 | CoBo Courier-Bold
150 | CoIt Courier-Oblique
151 | Cour Courier
152 | HeBI Helvetica-BoldOblique
153 | HeBo Helvetica-Bold
154 | HeIt Helvetica-Oblique
155 | Helv Helvetica **(default)**
156 | Symb Symbol
157 | TiBI Times-BoldItalic
158 | TiBo Times-Bold
159 | TiIt Times-Italic
160 | TiRo Times-Roman
161 | ZaDb ZapfDingbats
162 | ============= =======================
163 |
164 | You are generally free to use any font for every widget. However, we recommend using *ZaDb* ("ZapfDingbats") and fontsize 0 for check boxes: typical viewers will put a correctly sized tickmark in the field's rectangle, when it is clicked.
165 |
166 | .. rubric:: Footnotes
167 |
168 | .. [#f1] If you intend to re-access a new or updated field (e.g. for making a pixmap), make sure to reload the page first. Either close and re-open the document, or load another page first, or simply do ``page = doc.reload_page(page)``.
169 |
--------------------------------------------------------------------------------
/fitz/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, print_function
2 | import sys
3 | from fitz.fitz import *
4 |
5 | # define the supported colorspaces for convenience
6 | fitz.csRGB = fitz.Colorspace(fitz.CS_RGB)
7 | fitz.csGRAY = fitz.Colorspace(fitz.CS_GRAY)
8 | fitz.csCMYK = fitz.Colorspace(fitz.CS_CMYK)
9 | csRGB = fitz.csRGB
10 | csGRAY = fitz.csGRAY
11 | csCMYK = fitz.csCMYK
12 |
13 | # create the TOOLS object
14 | TOOLS = fitz.Tools()
15 | fitz.TOOLS = TOOLS
16 |
17 | if fitz.VersionFitz != fitz.TOOLS.mupdf_version():
18 | v1 = fitz.VersionFitz.split(".")
19 | v2 = fitz.TOOLS.mupdf_version().split(".")
20 | if v1[:-1] != v2[:-1]:
21 | raise ValueError(
22 | "MuPDF library mismatch %s <> %s"
23 | % (fitz.VersionFitz, fitz.TOOLS.mupdf_version())
24 | )
25 |
26 |
27 | # copy functions to their respective fitz classes
28 | import fitz.utils
29 |
30 | # ------------------------------------------------------------------------------
31 | # Document
32 | # ------------------------------------------------------------------------------
33 | fitz.open = fitz.Document
34 | fitz.Document.getToC = fitz.utils.getToC
35 | fitz.Document._do_links = fitz.utils.do_links
36 | fitz.Document.getPagePixmap = fitz.utils.getPagePixmap
37 | fitz.Document.getPageText = fitz.utils.getPageText
38 | fitz.Document.setMetadata = fitz.utils.setMetadata
39 | fitz.Document.setToC = fitz.utils.setToC
40 | fitz.Document.searchPageFor = fitz.utils.searchPageFor
41 | fitz.Document.newPage = fitz.utils.newPage
42 | fitz.Document.insertPage = fitz.utils.insertPage
43 | fitz.Document.getCharWidths = fitz.utils.getCharWidths
44 | fitz.Document.scrub = fitz.utils.scrub
45 |
46 | # ------------------------------------------------------------------------------
47 | # Page
48 | # ------------------------------------------------------------------------------
49 | fitz.Page.apply_redactions = fitz.utils.apply_redactions
50 | fitz.Page.drawBezier = fitz.utils.drawBezier
51 | fitz.Page.drawCircle = fitz.utils.drawCircle
52 | fitz.Page.drawCurve = fitz.utils.drawCurve
53 | fitz.Page.drawLine = fitz.utils.drawLine
54 | fitz.Page.drawOval = fitz.utils.drawOval
55 | fitz.Page.drawPolyline = fitz.utils.drawPolyline
56 | fitz.Page.drawQuad = fitz.utils.drawQuad
57 | fitz.Page.drawRect = fitz.utils.drawRect
58 | fitz.Page.drawSector = fitz.utils.drawSector
59 | fitz.Page.drawSquiggle = fitz.utils.drawSquiggle
60 | fitz.Page.drawZigzag = fitz.utils.drawZigzag
61 | fitz.Page.getLinks = fitz.utils.getLinks
62 | fitz.Page.getPixmap = fitz.utils.getPixmap
63 | fitz.Page.getText = fitz.utils.getText
64 | fitz.Page.getTextBlocks = fitz.utils.getTextBlocks
65 | fitz.Page.getTextWords = fitz.utils.getTextWords
66 | fitz.Page.insertImage = fitz.utils.insertImage
67 | fitz.Page.insertLink = fitz.utils.insertLink
68 | fitz.Page.insertText = fitz.utils.insertText
69 | fitz.Page.insertTextbox = fitz.utils.insertTextbox
70 | fitz.Page.newShape = lambda x: fitz.utils.Shape(x)
71 | fitz.Page.searchFor = fitz.utils.searchFor
72 | fitz.Page.showPDFpage = fitz.utils.showPDFpage
73 | fitz.Page.updateLink = fitz.utils.updateLink
74 | fitz.Page.writeText = fitz.utils.writeText
75 | # ------------------------------------------------------------------------------
76 | # Rect
77 | # ------------------------------------------------------------------------------
78 | fitz.Rect.getRectArea = fitz.utils.getRectArea
79 | fitz.Rect.getArea = fitz.utils.getRectArea
80 |
81 | # ------------------------------------------------------------------------------
82 | # IRect
83 | # ------------------------------------------------------------------------------
84 | fitz.IRect.getRectArea = fitz.utils.getRectArea
85 | fitz.IRect.getArea = fitz.utils.getRectArea
86 |
87 | # ------------------------------------------------------------------------------
88 | # IRect
89 | # ------------------------------------------------------------------------------
90 | fitz.TextWriter.fillTextbox = fitz.utils.fillTextbox
91 |
92 |
93 | fitz.__doc__ = """
94 | PyMuPDF %s: Python bindings for the MuPDF %s library.
95 | Version date: %s.
96 | Built for Python %i.%i on %s (%i-bit).
97 | """ % (
98 | fitz.VersionBind,
99 | fitz.VersionFitz,
100 | fitz.VersionDate,
101 | sys.version_info[0],
102 | sys.version_info[1],
103 | sys.platform,
104 | 64 if sys.maxsize > 2 ** 32 else 32,
105 | )
106 |
--------------------------------------------------------------------------------
/fitz/helper-convert.i:
--------------------------------------------------------------------------------
1 | %{
2 | //-----------------------------------------------------------------------------
3 | // Convert any MuPDF document to a PDF
4 | // Returns bytes object containing the PDF, created via 'write' function.
5 | //-----------------------------------------------------------------------------
6 | PyObject *JM_convert_to_pdf(fz_context *ctx, fz_document *doc, int fp, int tp, int rotate)
7 | {
8 | pdf_document *pdfout = pdf_create_document(ctx); // new PDF document
9 | int i, incr = 1, s = fp, e = tp;
10 | if (fp > tp) {
11 | incr = -1; // count backwards
12 | s = tp; // adjust ...
13 | e = fp; // ... range
14 | }
15 | fz_rect mediabox;
16 | int rot = JM_norm_rotation(rotate);
17 | fz_device *dev = NULL;
18 | fz_buffer *contents = NULL;
19 | pdf_obj *resources = NULL;
20 | fz_page *page;
21 | fz_var(dev);
22 | fz_var(contents);
23 | fz_var(resources);
24 | fz_var(page);
25 | for (i = fp; INRANGE(i, s, e); i += incr) { // interpret & write document pages as PDF pages
26 | fz_try(ctx) {
27 | page = fz_load_page(ctx, doc, i);
28 | mediabox = fz_bound_page(ctx, page);
29 | dev = pdf_page_write(ctx, pdfout, mediabox, &resources, &contents);
30 | fz_run_page(ctx, page, dev, fz_identity, NULL);
31 | fz_close_device(ctx, dev);
32 | fz_drop_device(ctx, dev);
33 | dev = NULL;
34 | pdf_obj *page_obj = pdf_add_page(ctx, pdfout, mediabox, rot, resources, contents);
35 | pdf_insert_page(ctx, pdfout, -1, page_obj);
36 | pdf_drop_obj(ctx, page_obj);
37 | }
38 | fz_always(ctx) {
39 | pdf_drop_obj(ctx, resources);
40 | fz_drop_buffer(ctx, contents);
41 | fz_drop_device(ctx, dev);
42 | fz_drop_page(ctx, page);
43 | }
44 | fz_catch(ctx) {
45 | fz_rethrow(ctx);
46 | }
47 | }
48 | // PDF created - now write it to Python bytearray
49 | PyObject *r = NULL;
50 | fz_output *out = NULL;
51 | fz_buffer *res = NULL;
52 | // prepare write options structure
53 | pdf_write_options opts = { 0 };
54 | opts.do_garbage = 4;
55 | opts.do_compress = 1;
56 | opts.do_compress_images = 1;
57 | opts.do_compress_fonts = 1;
58 | opts.do_sanitize = 1;
59 | opts.do_incremental = 0;
60 | opts.do_ascii = 0;
61 | opts.do_decompress = 0;
62 | opts.do_linear = 0;
63 | opts.do_clean = 1;
64 | opts.do_pretty = 0;
65 |
66 | fz_try(ctx) {
67 | res = fz_new_buffer(ctx, 8192);
68 | out = fz_new_output_with_buffer(ctx, res);
69 | pdf_write_document(ctx, pdfout, out, &opts);
70 | unsigned char *c = NULL;
71 | size_t len = fz_buffer_storage(gctx, res, &c);
72 | r = PyBytes_FromStringAndSize((const char *) c, (Py_ssize_t) len);
73 | }
74 | fz_always(ctx) {
75 | pdf_drop_document(ctx, pdfout);
76 | fz_drop_output(ctx, out);
77 | fz_drop_buffer(ctx, res);
78 | }
79 | fz_catch(ctx) {
80 | fz_rethrow(ctx);
81 | }
82 | return r;
83 | }
84 | %}
85 |
--------------------------------------------------------------------------------
/fitz/helper-geo-c.i:
--------------------------------------------------------------------------------
1 | %{
2 |
3 | //-----------------------------------------------------------------------------
4 | // Functions converting betwenn PySequences and fitz geometry objects
5 | //-----------------------------------------------------------------------------
6 | static int
7 | JM_INT_ITEM(PyObject *obj, Py_ssize_t idx, int *result)
8 | {
9 | PyObject *temp = PySequence_ITEM(obj, idx);
10 | if (!temp) return 1;
11 | *result = (int) PyLong_AsLong(temp);
12 | Py_DECREF(temp);
13 | if (PyErr_Occurred()) {
14 | PyErr_Clear();
15 | return 1;
16 | }
17 | return 0;
18 | }
19 |
20 | static int
21 | JM_FLOAT_ITEM(PyObject *obj, Py_ssize_t idx, float *result)
22 | {
23 | PyObject *temp = PySequence_ITEM(obj, idx);
24 | if (!temp) return 1;
25 | *result = (float) PyFloat_AsDouble(temp);
26 | Py_DECREF(temp);
27 | if (PyErr_Occurred()) {
28 | PyErr_Clear();
29 | return 1;
30 | }
31 | return 0;
32 | }
33 |
34 | //-----------------------------------------------------------------------------
35 | // PySequence to fz_rect. Default: infinite rect
36 | //-----------------------------------------------------------------------------
37 | static fz_rect
38 | JM_rect_from_py(PyObject *r)
39 | {
40 | if (!r || !PySequence_Check(r) || PySequence_Size(r) != 4)
41 | return fz_infinite_rect;
42 | Py_ssize_t i;
43 | float f[4];
44 |
45 | for (i = 0; i < 4; i++)
46 | if (JM_FLOAT_ITEM(r, i, &f[i]) == 1) return fz_infinite_rect;
47 |
48 | return fz_make_rect(f[0], f[1], f[2], f[3]);
49 | }
50 |
51 | //-----------------------------------------------------------------------------
52 | // PySequence from fz_rect
53 | //-----------------------------------------------------------------------------
54 | static PyObject *
55 | JM_py_from_rect(fz_rect r)
56 | {
57 | return Py_BuildValue("ffff", r.x0, r.y0, r.x1, r.y1);
58 | }
59 |
60 | //-----------------------------------------------------------------------------
61 | // PySequence to fz_irect. Default: infinite irect
62 | //-----------------------------------------------------------------------------
63 | static fz_irect
64 | JM_irect_from_py(PyObject *r)
65 | {
66 | if (!PySequence_Check(r) || PySequence_Size(r) != 4)
67 | return fz_infinite_irect;
68 | int x[4];
69 | Py_ssize_t i;
70 |
71 | for (i = 0; i < 4; i++)
72 | if (JM_INT_ITEM(r, i, &x[i]) == 1) return fz_infinite_irect;
73 |
74 | return fz_make_irect(x[0], x[1], x[2], x[3]);
75 | }
76 |
77 | //-----------------------------------------------------------------------------
78 | // PySequence from fz_irect
79 | //-----------------------------------------------------------------------------
80 | static PyObject *
81 | JM_py_from_irect(fz_irect r)
82 | {
83 | return Py_BuildValue("iiii", r.x0, r.y0, r.x1, r.y1);
84 | }
85 |
86 |
87 | //-----------------------------------------------------------------------------
88 | // PySequence to fz_point. Default: (0, 0)
89 | //-----------------------------------------------------------------------------
90 | static fz_point
91 | JM_point_from_py(PyObject *p)
92 | {
93 | fz_point p0 = fz_make_point(0, 0);
94 | float x, y;
95 |
96 | if (!p || !PySequence_Check(p) || PySequence_Size(p) != 2)
97 | return p0;
98 |
99 | if (JM_FLOAT_ITEM(p, 0, &x) == 1) return p0;
100 | if (JM_FLOAT_ITEM(p, 1, &y) == 1) return p0;
101 |
102 | return fz_make_point(x, y);
103 | }
104 |
105 | //-----------------------------------------------------------------------------
106 | // PySequence from fz_point
107 | //-----------------------------------------------------------------------------
108 | static PyObject *
109 | JM_py_from_point(fz_point p)
110 | {
111 | return Py_BuildValue("ff", p.x, p.y);
112 | }
113 |
114 |
115 | //-----------------------------------------------------------------------------
116 | // PySequence to fz_matrix. Default: fz_identity
117 | //-----------------------------------------------------------------------------
118 | static fz_matrix
119 | JM_matrix_from_py(PyObject *m)
120 | {
121 | Py_ssize_t i;
122 | float a[6];
123 |
124 | if (!m || !PySequence_Check(m) || PySequence_Size(m) != 6)
125 | return fz_identity;
126 |
127 | for (i = 0; i < 6; i++)
128 | if (JM_FLOAT_ITEM(m, i, &a[i]) == 1) return fz_identity;
129 |
130 | return fz_make_matrix(a[0], a[1], a[2], a[3], a[4], a[5]);
131 | }
132 |
133 | //-----------------------------------------------------------------------------
134 | // PySequence from fz_matrix
135 | //-----------------------------------------------------------------------------
136 | static PyObject *
137 | JM_py_from_matrix(fz_matrix m)
138 | {
139 | return Py_BuildValue("ffffff", m.a, m.b, m.c, m.d, m.e, m.f);
140 | }
141 |
142 | //-----------------------------------------------------------------------------
143 | // fz_quad from PySequence. Four floats are treated as rect.
144 | // Else must be four pairs of floats.
145 | //-----------------------------------------------------------------------------
146 | static fz_quad
147 | JM_quad_from_py(PyObject *r)
148 | {
149 | fz_quad q = fz_make_quad(0, 0, 0, 0, 0, 0, 0, 0);
150 | fz_point p[4];
151 | float test;
152 | Py_ssize_t i;
153 | PyObject *obj = NULL;
154 |
155 | if (!r || !PySequence_Check(r) || PySequence_Size(r) != 4)
156 | return q;
157 |
158 | if (JM_FLOAT_ITEM(r, 0, &test) == 0)
159 | return fz_quad_from_rect(JM_rect_from_py(r));
160 |
161 | for (i = 0; i < 4; i++) {
162 | obj = PySequence_ITEM(r, i); // next point item
163 | if (!obj || !PySequence_Check(obj) || PySequence_Size(obj) != 2)
164 | goto exit_result; // invalid: cancel the rest
165 |
166 | if (JM_FLOAT_ITEM(obj, 0, &p[i].x) == 1) goto exit_result;
167 | if (JM_FLOAT_ITEM(obj, 1, &p[i].y) == 1) goto exit_result;
168 |
169 | Py_CLEAR(obj);
170 | }
171 | q.ul = p[0];
172 | q.ur = p[1];
173 | q.ll = p[2];
174 | q.lr = p[3];
175 | return q;
176 |
177 | exit_result:;
178 | Py_CLEAR(obj);
179 | return q;
180 | }
181 |
182 | //-----------------------------------------------------------------------------
183 | // PySequence from fz_quad.
184 | //-----------------------------------------------------------------------------
185 | static PyObject *
186 | JM_py_from_quad(fz_quad quad)
187 | {
188 | PyObject *pquad = PyTuple_New(4);
189 | PyTuple_SET_ITEM(pquad, 0, JM_py_from_point(quad.ul));
190 | PyTuple_SET_ITEM(pquad, 1, JM_py_from_point(quad.ur));
191 | PyTuple_SET_ITEM(pquad, 2, JM_py_from_point(quad.ll));
192 | PyTuple_SET_ITEM(pquad, 3, JM_py_from_point(quad.lr));
193 | return pquad;
194 | }
195 |
196 | %}
197 |
--------------------------------------------------------------------------------
/fitz/helper-portfolio.i:
--------------------------------------------------------------------------------
1 | %{
2 | //-----------------------------------------------------------------------------
3 | // perform some cleaning if we have /EmbeddedFiles:
4 | // (1) remove any /Limits if /Names exists
5 | // (2) remove any empty /Collection
6 | // (3) set /PageMode/UseAttachments
7 | //-----------------------------------------------------------------------------
8 | void JM_embedded_clean(fz_context *ctx, pdf_document *pdf)
9 | {
10 | pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, pdf), PDF_NAME(Root));
11 |
12 | // remove any empty /Collection entry
13 | pdf_obj *coll = pdf_dict_get(ctx, root, PDF_NAME(Collection));
14 | if (coll && pdf_dict_len(ctx, coll) == 0)
15 | pdf_dict_del(ctx, root, PDF_NAME(Collection));
16 |
17 | pdf_obj *efiles = pdf_dict_getl(ctx, root,
18 | PDF_NAME(Names),
19 | PDF_NAME(EmbeddedFiles),
20 | PDF_NAME(Names),
21 | NULL);
22 | if (efiles) {
23 | pdf_dict_put_name(ctx, root, PDF_NAME(PageMode), "UseAttachments");
24 | }
25 | return;
26 | }
27 |
28 | //-----------------------------------------------------------------------------
29 | // embed a new file in a PDF (not only /EmbeddedFiles entries)
30 | //-----------------------------------------------------------------------------
31 | pdf_obj *JM_embed_file(fz_context *ctx,
32 | pdf_document *pdf,
33 | fz_buffer *buf,
34 | char *filename,
35 | char *ufilename,
36 | char *desc,
37 | int compress)
38 | {
39 | size_t len = 0;
40 | pdf_obj *ef, *f, *params, *val = NULL;
41 | fz_var(val);
42 | fz_try(ctx) {
43 | val = pdf_new_dict(ctx, pdf, 6);
44 | pdf_dict_put_dict(ctx, val, PDF_NAME(CI), 4);
45 | ef = pdf_dict_put_dict(ctx, val, PDF_NAME(EF), 4);
46 | pdf_dict_put_text_string(ctx, val, PDF_NAME(F), filename);
47 | pdf_dict_put_text_string(ctx, val, PDF_NAME(UF), ufilename);
48 | pdf_dict_put_text_string(ctx, val, PDF_NAME(Desc), desc);
49 | pdf_dict_put(ctx, val, PDF_NAME(Type), PDF_NAME(Filespec));
50 | f = pdf_add_stream(ctx, pdf,
51 | fz_new_buffer_from_copied_data(ctx, " ", 1),
52 | NULL, 0);
53 | pdf_dict_put_drop(ctx, ef, PDF_NAME(F), f);
54 | JM_update_stream(ctx, pdf, f, buf, compress);
55 | len = fz_buffer_storage(ctx, buf, NULL);
56 | pdf_dict_put_int(ctx, f, PDF_NAME(DL), len);
57 | pdf_dict_put_int(ctx, f, PDF_NAME(Length), len);
58 | params = pdf_dict_put_dict(ctx, f, PDF_NAME(Params), 4);
59 | pdf_dict_put_int(ctx, params, PDF_NAME(Size), len);
60 | }
61 | fz_catch(ctx) {
62 | fz_rethrow(ctx);
63 | }
64 | return val;
65 | }
66 | %}
67 |
--------------------------------------------------------------------------------
/fitz/helper-xobject.i:
--------------------------------------------------------------------------------
1 | %{
2 | //-----------------------------------------------------------------------------
3 | // Read and concatenate a PDF page's /Conents object(s) in a buffer
4 | //-----------------------------------------------------------------------------
5 | fz_buffer *JM_read_contents(fz_context * ctx, pdf_obj * pageref)
6 | {
7 | fz_buffer *res = NULL, *nres = NULL;
8 | int i;
9 | fz_try(ctx) {
10 | pdf_obj *contents = pdf_dict_get(ctx, pageref, PDF_NAME(Contents));
11 | if (pdf_is_array(ctx, contents)) {
12 | res = fz_new_buffer(ctx, 1024);
13 | for (i = 0; i < pdf_array_len(ctx, contents); i++) {
14 | nres = pdf_load_stream(ctx, pdf_array_get(ctx, contents, i));
15 | fz_append_buffer(ctx, res, nres);
16 | fz_drop_buffer(ctx, nres);
17 | }
18 | }
19 | else if (contents) {
20 | res = pdf_load_stream(ctx, contents);
21 | }
22 | }
23 | fz_catch(ctx) {
24 | fz_rethrow(ctx);
25 | }
26 | return res;
27 | }
28 |
29 | //-----------------------------------------------------------------------------
30 | // Make an XObject from a PDF page
31 | // For a positive xref assume that that object can be used instead
32 | //-----------------------------------------------------------------------------
33 | pdf_obj *JM_xobject_from_page(fz_context * ctx, pdf_document * pdfout, fz_page * fsrcpage, int xref, pdf_graft_map *gmap)
34 | {
35 | fz_buffer *res = NULL;
36 | pdf_obj *xobj1, *resources = NULL, *o, *spageref;
37 | fz_rect mediabox;
38 |
39 | fz_try(ctx) {
40 | pdf_page *srcpage = pdf_page_from_fz_page(ctx, fsrcpage);
41 | spageref = srcpage->obj;
42 | mediabox = pdf_to_rect(ctx, pdf_dict_get_inheritable(ctx, spageref, PDF_NAME(MediaBox)));
43 |
44 | if (xref > 0) {
45 | xobj1 = pdf_new_indirect(ctx, pdfout, xref, 0);
46 | }
47 | else {
48 | // Deep-copy resources object of source page
49 | o = pdf_dict_get_inheritable(ctx, spageref, PDF_NAME(Resources));
50 | if (gmap) // use graftmap when possible
51 | resources = pdf_graft_mapped_object(ctx, gmap, o);
52 | else
53 | resources = pdf_graft_object(ctx, pdfout, o);
54 |
55 | // get spgage contents source
56 | res = JM_read_contents(ctx, spageref);
57 |
58 | //-------------------------------------------------------------
59 | // create XObject representing the source page
60 | //-------------------------------------------------------------
61 | xobj1 = pdf_new_xobject(ctx, pdfout, mediabox, fz_identity, NULL, res);
62 | // store spage contents
63 | JM_update_stream(ctx, pdfout, xobj1, res, 1);
64 | fz_drop_buffer(ctx, res);
65 |
66 | // store spage resources
67 | pdf_dict_put_drop(ctx, xobj1, PDF_NAME(Resources), resources);
68 | }
69 | }
70 | fz_catch(ctx) {
71 | fz_rethrow(ctx);
72 | }
73 | return xobj1;
74 | }
75 |
76 | //-----------------------------------------------------------------------------
77 | // Insert a buffer as a new separate /Contents object of a page.
78 | // 1. Create a new stream object from buffer 'newcont'
79 | // 2. If /Contents already is an array, then just prepend or append this object
80 | // 3. Else, create new array and put old content obj and this object into it.
81 | // If the page had no /Contents before, just create a 1-item array.
82 | //-----------------------------------------------------------------------------
83 | int JM_insert_contents(fz_context * ctx, pdf_document * pdf,
84 | pdf_obj * pageref, fz_buffer * newcont, int overlay)
85 | {
86 | int xref = 0;
87 | fz_try(ctx) {
88 | pdf_obj *contents = pdf_dict_get(ctx, pageref, PDF_NAME(Contents));
89 | pdf_obj *newconts = pdf_add_stream(ctx, pdf, newcont, NULL, 0);
90 | xref = pdf_to_num(ctx, newconts);
91 | if (pdf_is_array(ctx, contents)) {
92 | if (overlay) // append new object
93 | pdf_array_push(ctx, contents, newconts);
94 | else // prepend new object
95 | pdf_array_insert(ctx, contents, newconts, 0);
96 | }
97 | else {
98 | pdf_obj *carr = pdf_new_array(ctx, pdf, 5);
99 | if (overlay) {
100 | if (contents)
101 | pdf_array_push(ctx, carr, contents);
102 | pdf_array_push(ctx, carr, newconts);
103 | }
104 | else {
105 | pdf_array_push_drop(ctx, carr, newconts);
106 | if (contents)
107 | pdf_array_push(ctx, carr, contents);
108 | }
109 | pdf_dict_put(ctx, pageref, PDF_NAME(Contents), carr);
110 | }
111 | }
112 | fz_catch(ctx) {
113 | fz_rethrow(ctx);
114 | }
115 | return xref;
116 | }
117 |
118 | static PyObject *img_info = NULL;
119 |
120 | static fz_image *
121 | JM_image_filter(fz_context * ctx, void *opaque, fz_matrix ctm, const char *name, fz_image *image)
122 | {
123 | fz_quad q = fz_transform_quad(fz_quad_from_rect(fz_unit_rect), ctm);
124 | PyObject *q_py = JM_py_from_quad(q);
125 | PyList_Append(img_info, Py_BuildValue("sO", name, q_py));
126 | Py_DECREF(q_py);
127 | return NULL;
128 | }
129 |
130 | void
131 | JM_filter_content_stream(
132 | fz_context * ctx,
133 | pdf_document * doc,
134 | pdf_obj * in_stm,
135 | pdf_obj * in_res,
136 | fz_matrix transform,
137 | pdf_filter_options * filter,
138 | int struct_parents,
139 | fz_buffer **out_buf,
140 | pdf_obj **out_res)
141 | {
142 | pdf_processor *proc_buffer = NULL;
143 | pdf_processor *proc_filter = NULL;
144 |
145 | fz_var(proc_buffer);
146 | fz_var(proc_filter);
147 |
148 | *out_buf = NULL;
149 | *out_res = NULL;
150 |
151 | fz_try(ctx) {
152 | *out_buf = fz_new_buffer(ctx, 1024);
153 | proc_buffer = pdf_new_buffer_processor(ctx, *out_buf, filter->ascii);
154 | if (filter->sanitize) {
155 | *out_res = pdf_new_dict(ctx, doc, 1);
156 | proc_filter = pdf_new_filter_processor(ctx, doc, proc_buffer, in_res, *out_res, struct_parents, transform, filter);
157 | pdf_process_contents(ctx, proc_filter, doc, in_res, in_stm, NULL);
158 | pdf_close_processor(ctx, proc_filter);
159 | }
160 | else {
161 | *out_res = pdf_keep_obj(ctx, in_res);
162 | pdf_process_contents(ctx, proc_buffer, doc, in_res, in_stm, NULL);
163 | }
164 | pdf_close_processor(ctx, proc_buffer);
165 | }
166 | fz_always(ctx) {
167 | pdf_drop_processor(ctx, proc_filter);
168 | pdf_drop_processor(ctx, proc_buffer);
169 | }
170 | fz_catch(ctx) {
171 | fz_drop_buffer(ctx, *out_buf);
172 | *out_buf = NULL;
173 | pdf_drop_obj(ctx, *out_res);
174 | *out_res = NULL;
175 | fz_rethrow(ctx);
176 | }
177 | }
178 |
179 | PyObject *
180 | JM_image_reporter(fz_context *ctx, pdf_page *page)
181 | {
182 | pdf_document *doc = page->doc;
183 | pdf_filter_options filter;
184 | memset(&filter, 0, sizeof filter);
185 | filter.opaque = page;
186 | filter.text_filter = NULL;
187 | filter.image_filter = JM_image_filter;
188 | filter.end_page = NULL;
189 | filter.recurse = 0;
190 | filter.instance_forms = 1;
191 | filter.sanitize = 1;
192 | filter.ascii = 1;
193 |
194 | pdf_obj *contents, *old_res;
195 | pdf_obj *struct_parents_obj;
196 | pdf_obj *new_res;
197 | fz_buffer *buffer;
198 | int struct_parents;
199 |
200 | struct_parents_obj = pdf_dict_get(ctx, page->obj, PDF_NAME(StructParents));
201 | struct_parents = -1;
202 | if (pdf_is_number(ctx, struct_parents_obj))
203 | struct_parents = pdf_to_int(ctx, struct_parents_obj);
204 |
205 | contents = pdf_page_contents(ctx, page);
206 | old_res = pdf_page_resources(ctx, page);
207 | img_info = PyList_New(0);
208 | JM_filter_content_stream(ctx, doc, contents, old_res, fz_identity, &filter, struct_parents, &buffer, &new_res);
209 | fz_drop_buffer(ctx, buffer);
210 | pdf_drop_obj(ctx, new_res);
211 | PyObject *rc = PySequence_Tuple(img_info);
212 | Py_DECREF(img_info);
213 | img_info = NULL;
214 | return rc;
215 | }
216 |
217 | %}
218 |
--------------------------------------------------------------------------------
/fitz/version.i:
--------------------------------------------------------------------------------
1 | %pythoncode %{
2 | VersionFitz = "1.17.0"
3 | VersionBind = "1.17.4"
4 | VersionDate = "2020-07-20 18:09:40"
5 | version = (VersionBind, VersionFitz, "20200720180940")
6 | %}
--------------------------------------------------------------------------------
/installation/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/norbusan/pymupdf-debian/3f2be7c62e623cee6566bc7213cb0fc577a3e1eb/installation/.DS_Store
--------------------------------------------------------------------------------
/installation/centos/centos_pymupdf.sh:
--------------------------------------------------------------------------------
1 | wget https://mupdf.com/downloads/mupdf-1.17.0-source.tar.gz
2 | tar -zxvf mupdf-1.17.0-source.tar.gz
3 |
4 | cd mupdf-1.17.0-source
5 | export CFLAGS="-fPIC -std=gnu99"
6 |
7 | make HAVE_X11=no HAVE_GLFW=no HAVE_GLUT=no prefix=/usr/local
8 | sudo make HAVE_X11=no HAVE_GLFW=no HAVE_GLUT=no prefix=/usr/local install
9 |
10 | cd ..
11 |
12 | rm -rf PyMuPDF
13 | git clone https://github.com/pymupdf/PyMuPDF.git
14 | cd PyMuPDF
15 |
16 | sudo python setup.py build
17 | sudo python setup.py install
18 |
--------------------------------------------------------------------------------
/installation/freebsd/freebsd_pymupdf.sh:
--------------------------------------------------------------------------------
1 | setenv CFLAGS -fPIC
2 |
3 | # install the pre-required tool
4 | pkg install swig30
5 |
6 | # Ensure we have a build of the current version
7 | wget https://mupdf.com/downloads/archive/mupdf-1.17.0-source.tar.gz
8 | tar -zxvf mupdf-1.17.0-source.tar.gz
9 |
10 | rm -rf PyMuPDF
11 | git clone https://github.com/pymupdf/PyMuPDF.git
12 |
13 | cd mupdf-1.17.0-source
14 | # replace files in mupdf source
15 | cp ../PyMuPDF/fitz/_config.h include/mupdf/fitz/config.h
16 |
17 | gmake HAVE_X11=no HAVE_GLFW=no HAVE_GLUT=no prefix=/usr/local
18 | gmake HAVE_X11=no HAVE_GLFW=no HAVE_GLUT=no prefix=/usr/local install
19 |
20 | cd ../PyMuPDF
21 | python setup.py build
22 | python setup.py install
23 |
--------------------------------------------------------------------------------
/installation/ubuntu/ubuntu_pymupdf.sh:
--------------------------------------------------------------------------------
1 | wget https://mupdf.com/downloads/archive/mupdf-1.17.0-source.tar.gz
2 | tar -zxvf mupdf-1.17.0-source.tar.gz
3 |
4 | cd mupdf-1.17.0-source
5 |
6 | export CFLAGS="-fPIC"
7 | # install some prerequirement
8 | sudo apt install pkg-config python-dev
9 |
10 | make HAVE_X11=no HAVE_GLFW=no HAVE_GLUT=no prefix=/usr/local
11 | sudo make HAVE_X11=no HAVE_GLFW=no HAVE_GLUT=no prefix=/usr/local install
12 |
13 | cd ..
14 |
15 | rm -rf PyMuPDF
16 | git clone https://github.com/pymupdf/PyMuPDF.git
17 | cd PyMuPDF
18 |
19 | sudo python setup.py build
20 | sudo python setup.py install
21 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup, Extension
2 | from distutils.command.build_py import build_py as build_py_orig
3 | import sys, os
4 |
5 | # custom build_py command which runs build_ext first
6 | # this is necessary because build_py needs the fitz.py which is only generated
7 | # by SWIG in the build_ext step
8 | class build_ext_first(build_py_orig):
9 | def run(self):
10 | self.run_command("build_ext")
11 | return super().run()
12 |
13 |
14 | # check the platform
15 | if sys.platform.startswith("linux"):
16 | module = Extension(
17 | "fitz._fitz", # name of the module
18 | ["fitz/fitz.i"],
19 | include_dirs=[ # we need the path of the MuPDF headers
20 | "/usr/include/mupdf",
21 | "/usr/local/include/mupdf",
22 | ],
23 | # library_dirs=[''],
24 | libraries=[
25 | "mupdf",
26 | #'crypto', #openssl is required by mupdf on archlinux
27 | #'jbig2dec', 'openjp2', 'jpeg', 'freetype',
28 | "mupdf-third",
29 | ], # the libraries to link with
30 | )
31 | elif sys.platform.startswith(("darwin", "freebsd")):
32 | module = Extension(
33 | "fitz._fitz", # name of the module
34 | ["fitz/fitz.i"],
35 | # directories containing mupdf's header files
36 | include_dirs=["/usr/local/include/mupdf", "/usr/local/include"],
37 | # libraries should already be linked here by brew
38 | library_dirs=["/usr/local/lib"],
39 | # library_dirs=['/usr/local/Cellar/mupdf-tools/1.8/lib/',
40 | #'/usr/local/Cellar/openssl/1.0.2g/lib/',
41 | #'/usr/local/Cellar/jpeg/8d/lib/',
42 | #'/usr/local/Cellar/freetype/2.6.3/lib/',
43 | #'/usr/local/Cellar/jbig2dec/0.12/lib/'
44 | # ],
45 | libraries=["mupdf", "mupdf-third"],
46 | )
47 |
48 | else:
49 | # ===============================================================================
50 | # Build / set up PyMuPDF under Windows
51 | # ===============================================================================
52 | module = Extension(
53 | "fitz._fitz",
54 | ["fitz/fitz.i"],
55 | include_dirs=[ # we need the path of the MuPDF's headers
56 | "./mupdf/include",
57 | "./mupdf/include/mupdf",
58 | ],
59 | libraries=[ # these are needed in Windows
60 | "libmupdf",
61 | "libresources",
62 | "libthirdparty",
63 | ],
64 | extra_link_args=["/NODEFAULTLIB:MSVCRT"],
65 | # x86 dir of libmupdf.lib etc.
66 | library_dirs=["./mupdf/platform/win32/Release"],
67 | # x64 dir of libmupdf.lib etc.
68 | # library_dirs=['./mupdf/platform/win32/x64/Release'],
69 | )
70 |
71 | pkg_tab = open("PKG-INFO").read().split("\n")
72 | long_dtab = []
73 | classifier = []
74 | for l in pkg_tab:
75 | if l.startswith("Classifier: "):
76 | classifier.append(l[12:])
77 | continue
78 | if l.startswith(" "):
79 | long_dtab.append(l.strip())
80 | long_desc = "\n".join(long_dtab)
81 |
82 | setup(
83 | name="PyMuPDF",
84 | version="1.17.4",
85 | description="Python bindings for the PDF rendering library MuPDF",
86 | long_description=long_desc,
87 | classifiers=classifier,
88 | url="https://github.com/pymupdf/PyMuPDF",
89 | author="Jorj McKie, Ruikai Liu",
90 | author_email="jorj.x.mckie@outlook.de",
91 | cmdclass={"build_py": build_ext_first},
92 | ext_modules=[module],
93 | py_modules=["fitz.fitz", "fitz.utils", "fitz.__main__"],
94 | )
95 |
--------------------------------------------------------------------------------