├── LICENSE
├── OCR
    ├── PDF_XChange-OCRed.pdf
    ├── README.md
    ├── easyocr1.py
    ├── images-to-ocr-pdf.py
    ├── ocr-ed.pdf
    ├── ocr-ed.txt
    ├── ocrpages.py
    ├── scanned.pdf
    ├── tesseract1.py
    ├── tesseract2.py
    └── v110-changes.pdf
├── README.md
├── advanced-toc
    ├── README.md
    ├── colored-toc.pdf
    ├── colored-toc.png
    ├── colorize.py
    ├── example.pdf
    └── replaced-toc.pdf
├── alias-changer.py
├── animations
    ├── README.md
    ├── morph-demo1.jpg
    ├── morph-demo1.py
    ├── morph-demo2.py
    ├── morph-demo3.py
    ├── quad-show1.py
    ├── quad-show2.jpg
    └── quad-show2.py
├── annotations
    ├── freetext-annot-lang.pdf
    ├── freetext-annot-lang.py
    ├── new-annots-0.pdf
    ├── new-annots.py
    ├── opacity.pdf
    ├── opacity.py
    ├── show-no-annots.py
    ├── with-annots.png
    └── without-annots.png
├── cloud-interactions
    ├── README.md
    ├── from-aws-s3.py
    ├── from-google.py
    ├── from-ms-azure.py
    ├── to-aws-s3.py
    └── to-ms-azure.py
├── conversion
    ├── README.md
    ├── images-to-ocr-pdf.py
    ├── make-cbz.py
    ├── make-imagepdf.py
    └── make-page-images.py
├── examples
    ├── .gitignore
    ├── DeDRM-ebook.py
    ├── README.md
    ├── anonymize-document
    │   ├── anonymize.py
    │   ├── input.pdf
    │   └── output.pdf
    ├── attach-images
    │   ├── attach.py
    │   ├── input
    │   │   ├── erik-jan-leusink-s2mkB4WOl9k-unsplash.jpg
    │   │   └── joe-caione-qO-PIF84Vxg-unsplash.jpg
    │   └── output.pdf
    ├── browse-document
    │   ├── browse.py
    │   └── input.pdf
    ├── combine-pages
    │   ├── combine.py
    │   ├── input.pdf
    │   └── output.pdf
    ├── convert-document
    │   ├── convert.py
    │   ├── input.epub
    │   └── output.pdf
    ├── convert-image
    │   ├── convert.py
    │   ├── input.jpg
    │   └── output.png
    ├── convert-pixmap
    │   ├── convert.py
    │   ├── input.png
    │   └── output.jpg
    ├── convert-text
    │   ├── convert.py
    │   ├── input.txt
    │   └── output.pdf
    ├── copy-embedded
    │   ├── copy.py
    │   ├── input.pdf
    │   └── output.pdf
    ├── decrypt-document
    │   ├── decrypt.py
    │   ├── input.pdf
    │   └── output.pdf
    ├── display-document
    │   ├── display.py
    │   └── input.pdf
    ├── draw-cardioid
    │   ├── draw.py
    │   └── output.pdf
    ├── draw-caustic
    │   ├── draw.py
    │   ├── output.pdf
    │   ├── output.png
    │   ├── output.svg
    │   └── output.svgz
    ├── draw-fractal
    │   ├── carpet.py
    │   ├── output_carpet.png
    │   ├── output_punch.png
    │   ├── output_triangle.pdf
    │   ├── punch.py
    │   └── triangle.py
    ├── draw-polygon
    │   ├── draw.py
    │   ├── output.pdf
    │   └── output.svg
    ├── draw-rgb-area
    │   ├── draw.py
    │   ├── output_PIL.png
    │   └── output_fitz.png
    ├── draw-sines
    │   ├── draw.py
    │   └── output.pdf
    ├── edit-images
    │   ├── README.md
    │   ├── edit.py
    │   ├── figure-01.jpg
    │   └── input.pdf
    ├── edit-links
    │   ├── edit.py
    │   └── input.pdf
    ├── edit-toc
    │   ├── edit.py
    │   └── input.pdf
    ├── embed-images
    │   ├── embed.py
    │   ├── input
    │   │   ├── erik-jan-leusink-s2mkB4WOl9k-unsplash.jpg
    │   │   └── joe-caione-qO-PIF84Vxg-unsplash.jpg
    │   └── output.pdf
    ├── export-embedded
    │   ├── export.py
    │   ├── input.pdf
    │   └── output.pdf
    ├── export-metadata
    │   ├── export.py
    │   ├── input.pdf
    │   └── output.csv
    ├── export-toc
    │   ├── export.py
    │   ├── input.pdf
    │   └── output.csv
    ├── extract-images
    │   ├── extract-from-pages.py
    │   ├── extract-from-xref.py
    │   ├── input.pdf
    │   └── output
    │   │   ├── .gitkeep
    │   │   ├── img00005.png
    │   │   └── img00011.png
    ├── extract-table
    │   ├── ParseTab.py
    │   ├── README.md
    │   ├── extract.py
    │   ├── input.pdf
    │   └── wx-extract.py
    ├── extract-vector-graphics
    │   └── detect_graphics.py
    ├── extract-xobj
    │   ├── extract.py
    │   ├── input.pdf
    │   └── output.pdf
    ├── filmfestival-2tables
    │   ├── README.md
    │   ├── filmfestival.db
    │   ├── filmfestival.py
    │   └── output.pdf
    ├── icons
    │   ├── PyMuPDF.ico
    │   ├── __init__.py
    │   ├── ico_pdf.py
    │   ├── pdf.py
    │   └── pymupdf.png
    ├── import-embedded
    │   ├── import.py
    │   ├── input.pdf
    │   ├── joe-caione-qO-PIF84Vxg-unsplash.jpg
    │   └── output.pdf
    ├── import-metadata
    │   ├── import.py
    │   ├── input.csv
    │   └── input.pdf
    ├── import-toc
    │   ├── import.py
    │   ├── input.csv
    │   └── input.pdf
    ├── insert-images
    │   ├── input
    │   │   ├── erik-jan-leusink-s2mkB4WOl9k-unsplash.jpg
    │   │   └── joe-caione-qO-PIF84Vxg-unsplash.jpg
    │   ├── insert.py
    │   └── output.pdf
    ├── insert-logo
    │   ├── file.py
    │   ├── input.pdf
    │   ├── logo.png
    │   ├── logo.svg
    │   ├── output_file.pdf
    │   ├── output_svg.pdf
    │   └── svg.py
    ├── join-documents
    │   ├── input
    │   │   ├── made-with-cc.pdf
    │   │   └── thinkpython2.pdf
    │   ├── join.py
    │   └── output.pdf
    ├── list-embedded
    │   ├── input.pdf
    │   └── list.py
    ├── make-calendar
    │   ├── make.py
    │   └── output.pdf
    ├── optimize-document
    │   ├── input.pdf
    │   └── optimize.py
    ├── posterize-document
    │   ├── input.pdf
    │   ├── output.pdf
    │   └── posterize.py
    ├── print-hsv
    │   ├── output.pdf
    │   └── print.py
    ├── print-page-format
    │   └── print.py
    ├── print-rgb
    │   ├── output.pdf
    │   └── print.py
    ├── replace-image
    │   ├── README.md
    │   ├── input.jpg
    │   ├── input.pdf
    │   ├── output_remove.pdf
    │   ├── output_replace.pdf
    │   ├── remove.py
    │   └── replace.py
    ├── split-document
    │   ├── input.pdf
    │   ├── output
    │   │   ├── .gitkeep
    │   │   ├── input-0.pdf
    │   │   ├── input-1.pdf
    │   │   ├── input-10.pdf
    │   │   ├── input-100.pdf
    │   │   ├── input-101.pdf
    │   │   ├── input-102.pdf
    │   │   ├── input-103.pdf
    │   │   ├── input-104.pdf
    │   │   ├── input-105.pdf
    │   │   ├── input-106.pdf
    │   │   ├── input-107.pdf
    │   │   ├── input-108.pdf
    │   │   ├── input-109.pdf
    │   │   ├── input-11.pdf
    │   │   ├── input-110.pdf
    │   │   ├── input-111.pdf
    │   │   ├── input-112.pdf
    │   │   ├── input-113.pdf
    │   │   ├── input-114.pdf
    │   │   ├── input-115.pdf
    │   │   ├── input-116.pdf
    │   │   ├── input-117.pdf
    │   │   ├── input-118.pdf
    │   │   ├── input-119.pdf
    │   │   ├── input-12.pdf
    │   │   ├── input-120.pdf
    │   │   ├── input-121.pdf
    │   │   ├── input-122.pdf
    │   │   ├── input-123.pdf
    │   │   ├── input-124.pdf
    │   │   ├── input-125.pdf
    │   │   ├── input-126.pdf
    │   │   ├── input-127.pdf
    │   │   ├── input-128.pdf
    │   │   ├── input-129.pdf
    │   │   ├── input-13.pdf
    │   │   ├── input-130.pdf
    │   │   ├── input-131.pdf
    │   │   ├── input-132.pdf
    │   │   ├── input-133.pdf
    │   │   ├── input-134.pdf
    │   │   ├── input-135.pdf
    │   │   ├── input-136.pdf
    │   │   ├── input-137.pdf
    │   │   ├── input-138.pdf
    │   │   ├── input-139.pdf
    │   │   ├── input-14.pdf
    │   │   ├── input-140.pdf
    │   │   ├── input-141.pdf
    │   │   ├── input-142.pdf
    │   │   ├── input-143.pdf
    │   │   ├── input-144.pdf
    │   │   ├── input-145.pdf
    │   │   ├── input-146.pdf
    │   │   ├── input-147.pdf
    │   │   ├── input-148.pdf
    │   │   ├── input-149.pdf
    │   │   ├── input-15.pdf
    │   │   ├── input-150.pdf
    │   │   ├── input-151.pdf
    │   │   ├── input-152.pdf
    │   │   ├── input-153.pdf
    │   │   ├── input-154.pdf
    │   │   ├── input-155.pdf
    │   │   ├── input-156.pdf
    │   │   ├── input-157.pdf
    │   │   ├── input-158.pdf
    │   │   ├── input-159.pdf
    │   │   ├── input-16.pdf
    │   │   ├── input-160.pdf
    │   │   ├── input-161.pdf
    │   │   ├── input-162.pdf
    │   │   ├── input-163.pdf
    │   │   ├── input-164.pdf
    │   │   ├── input-165.pdf
    │   │   ├── input-166.pdf
    │   │   ├── input-167.pdf
    │   │   ├── input-168.pdf
    │   │   ├── input-169.pdf
    │   │   ├── input-17.pdf
    │   │   ├── input-170.pdf
    │   │   ├── input-171.pdf
    │   │   ├── input-172.pdf
    │   │   ├── input-173.pdf
    │   │   ├── input-174.pdf
    │   │   ├── input-175.pdf
    │   │   ├── input-18.pdf
    │   │   ├── input-19.pdf
    │   │   ├── input-2.pdf
    │   │   ├── input-20.pdf
    │   │   ├── input-21.pdf
    │   │   ├── input-22.pdf
    │   │   ├── input-23.pdf
    │   │   ├── input-24.pdf
    │   │   ├── input-25.pdf
    │   │   ├── input-26.pdf
    │   │   ├── input-27.pdf
    │   │   ├── input-28.pdf
    │   │   ├── input-29.pdf
    │   │   ├── input-3.pdf
    │   │   ├── input-30.pdf
    │   │   ├── input-31.pdf
    │   │   ├── input-32.pdf
    │   │   ├── input-33.pdf
    │   │   ├── input-34.pdf
    │   │   ├── input-35.pdf
    │   │   ├── input-36.pdf
    │   │   ├── input-37.pdf
    │   │   ├── input-38.pdf
    │   │   ├── input-39.pdf
    │   │   ├── input-4.pdf
    │   │   ├── input-40.pdf
    │   │   ├── input-41.pdf
    │   │   ├── input-42.pdf
    │   │   ├── input-43.pdf
    │   │   ├── input-44.pdf
    │   │   ├── input-45.pdf
    │   │   ├── input-46.pdf
    │   │   ├── input-47.pdf
    │   │   ├── input-48.pdf
    │   │   ├── input-49.pdf
    │   │   ├── input-5.pdf
    │   │   ├── input-50.pdf
    │   │   ├── input-51.pdf
    │   │   ├── input-52.pdf
    │   │   ├── input-53.pdf
    │   │   ├── input-54.pdf
    │   │   ├── input-55.pdf
    │   │   ├── input-56.pdf
    │   │   ├── input-57.pdf
    │   │   ├── input-58.pdf
    │   │   ├── input-59.pdf
    │   │   ├── input-6.pdf
    │   │   ├── input-60.pdf
    │   │   ├── input-61.pdf
    │   │   ├── input-62.pdf
    │   │   ├── input-63.pdf
    │   │   ├── input-64.pdf
    │   │   ├── input-65.pdf
    │   │   ├── input-66.pdf
    │   │   ├── input-67.pdf
    │   │   ├── input-68.pdf
    │   │   ├── input-69.pdf
    │   │   ├── input-7.pdf
    │   │   ├── input-70.pdf
    │   │   ├── input-71.pdf
    │   │   ├── input-72.pdf
    │   │   ├── input-73.pdf
    │   │   ├── input-74.pdf
    │   │   ├── input-75.pdf
    │   │   ├── input-76.pdf
    │   │   ├── input-77.pdf
    │   │   ├── input-78.pdf
    │   │   ├── input-79.pdf
    │   │   ├── input-8.pdf
    │   │   ├── input-80.pdf
    │   │   ├── input-81.pdf
    │   │   ├── input-82.pdf
    │   │   ├── input-83.pdf
    │   │   ├── input-84.pdf
    │   │   ├── input-85.pdf
    │   │   ├── input-86.pdf
    │   │   ├── input-87.pdf
    │   │   ├── input-88.pdf
    │   │   ├── input-89.pdf
    │   │   ├── input-9.pdf
    │   │   ├── input-90.pdf
    │   │   ├── input-91.pdf
    │   │   ├── input-92.pdf
    │   │   ├── input-93.pdf
    │   │   ├── input-94.pdf
    │   │   ├── input-95.pdf
    │   │   ├── input-96.pdf
    │   │   ├── input-97.pdf
    │   │   ├── input-98.pdf
    │   │   └── input-99.pdf
    │   └── split.py
    ├── test-blendmode
    │   ├── output.pdf
    │   └── test.py
    ├── tile-image
    │   ├── input.jpg
    │   ├── output
    │   │   ├── .gitkeep
    │   │   ├── target-00.png
    │   │   ├── target-01.png
    │   │   ├── target-02.png
    │   │   ├── target-10.png
    │   │   ├── target-11.png
    │   │   ├── target-12.png
    │   │   ├── target-20.png
    │   │   ├── target-21.png
    │   │   ├── target-22.png
    │   │   ├── target-30.png
    │   │   ├── target-31.png
    │   │   └── target-32.png
    │   └── tile.py
    ├── view-document
    │   ├── input.pdf
    │   └── view.py
    └── zerofy-rotation
    │   ├── derotate.py
    │   ├── input.pdf
    │   └── zerofy-rotation.py
├── fields
    ├── date-field.py
    ├── form-fields.py
    ├── interfield-calculation.py
    ├── list-fields.py
    ├── switch-text-on-off.py
    ├── widgettest-alt.pdf
    └── widgettest.py
├── font-replacement
    ├── multi-language.jpg
    ├── page-17-after.png
    ├── page-17-before.png
    ├── readme.md
    ├── repl-font.py
    ├── repl-fontnames.py
    └── run-log.txt
├── jupyter-notebooks
    ├── 1page-snap.log
    ├── 1page-snap.pdf
    ├── 1page.pdf
    ├── README-OCR.md
    ├── README.md
    ├── blacked.pdf
    ├── dehyphenate-flag.ipynb
    ├── detect-hidden.ipynb
    ├── input.pdf
    ├── input.pdf-status.log
    ├── journalling1.ipynb
    ├── journalling2.ipynb
    ├── new_circle_annot.ipynb
    ├── object-algebra.ipynb
    ├── ocr-illegible.ipynb
    ├── optional-content.ipynb
    ├── page-rectangles.ipynb
    ├── partial-ocr.ipynb
    ├── partial-ocr.pdf
    ├── show_image.py
    └── testpage-performance.ipynb
├── optional-content
    ├── readme.md
    ├── source-ocmd.pdf
    ├── source-ocmd.py
    ├── source-radio.pdf
    ├── source-radio.py
    └── source.pdf
├── pdf-names-resolution
    ├── README.md
    ├── find_names.py
    └── list_names.py
├── reporting
    ├── README.md
    ├── documentation-draft.md
    ├── examples
    │   ├── filmfestival-2tables
    │   │   ├── README.md
    │   │   ├── Reports.py
    │   │   ├── filmfestival.db
    │   │   ├── filmfestival.py
    │   │   └── output.pdf
    │   ├── invoice
    │   │   ├── README.md
    │   │   ├── Reports.py
    │   │   ├── header.html
    │   │   ├── invoice-parms.db
    │   │   ├── invoicer.py
    │   │   ├── items.html
    │   │   ├── logo.png
    │   │   ├── output.pdf
    │   │   └── prolog.html
    │   ├── multi-format
    │   │   ├── README.md
    │   │   ├── Reports.py
    │   │   ├── national-capitals.csv
    │   │   ├── national-capitals.py
    │   │   └── output.pdf
    │   ├── row-with-images
    │   │   ├── README.md
    │   │   ├── Reports.py
    │   │   ├── flags.zip
    │   │   ├── items.csv
    │   │   ├── output.pdf
    │   │   └── rows-with-images.py
    │   ├── simple-article
    │   │   ├── README.md
    │   │   ├── Reports.py
    │   │   ├── output.pdf
    │   │   ├── simple-article.py
    │   │   ├── springer.html
    │   │   └── springer.jpg
    │   └── user-fonts
    │   │   ├── DejaVuSansCondensed-Bold.ttf
    │   │   ├── DejaVuSansCondensed.ttf
    │   │   ├── README.md
    │   │   ├── Reports.py
    │   │   ├── dejavu.py
    │   │   ├── filmfestival.db
    │   │   ├── kenpixel.py
    │   │   ├── kenpixel.ttf
    │   │   ├── output-dejavu.pdf
    │   │   └── output-kenpixel.pdf
    ├── pymupdf-reporting.pdf
    └── pymupdf-reporting.pptx
├── shapes
    ├── piechart1.pdf
    ├── piechart1.png
    ├── piechart1.py
    ├── piechart2.pdf
    ├── piechart2.py
    ├── shapes_and_symbols.py
    ├── symbol-list.pdf
    └── symbol-list.py
├── table-analysis
    ├── README.md
    ├── XPS-table.pdf
    ├── XPS-table.xlsx
    ├── XPS-table.xps
    ├── chinese-table.pdf
    ├── clean_graphics.py
    ├── compare-xps-pdf.ipynb
    ├── find_tables.ipynb
    ├── gridlines-to-pandas.py
    ├── input1-bbox.json
    ├── input1.pdf
    ├── input2.pdf
    ├── join_tables.ipynb
    ├── national-capitals.pdf
    ├── show_image.py
    └── span-analysis-to-pandas.py
├── text-documents
    ├── README.md
    ├── any-file.ipynb
    ├── basic.ipynb
    ├── multi-language.ipynb
    └── test.pdf
├── text-extraction
    ├── 1page-text.jpg
    ├── 1page.pdf
    ├── Dart-text.jpg
    ├── Dart.pdf
    ├── PDF2Text.py
    ├── PDF2TextBlocks.py
    ├── Petresume-text.jpg
    ├── Petresume.pdf
    ├── README.md
    ├── demo1-text.jpg
    ├── demo1.pdf
    ├── extend-dicts.pdf
    ├── extend-dicts.py
    ├── fitzcli.py
    ├── garbled-text.jpg
    ├── garbled.pdf
    ├── invoice-simple.pdf
    ├── layout-analyzer.py
    ├── layout-demo1.pdf
    ├── lookup-keywords.py
    ├── multi_column.py
    ├── shadows.pdf
    ├── textmaker.pdf
    ├── textmaker.py
    ├── textmaker2.pdf
    └── textmaker2.py
├── textbox-extraction
    ├── readme.md
    ├── search.pdf
    ├── search.png
    ├── textbox-extract-1.py
    └── textbox-extract-2.py
├── textwriter
    ├── cff-test.pdf
    ├── demo.pdf
    ├── demo.py
    ├── new-annots-tw-0.pdf
    ├── new-annots-tw.py
    ├── test-droid.pdf
    ├── test.pdf
    ├── textwriter-textbox.pdf
    └── textwriter-textbox.py
└── word&line-marking
    ├── mark-lines.png
    ├── mark-lines.py
    ├── mark-lines2.jpg
    ├── mark-lines2.py
    ├── mark-words.pdf
    ├── mark-words.py
    ├── readme.md
    └── search.pdf


/OCR/PDF_XChange-OCRed.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/OCR/PDF_XChange-OCRed.pdf


--------------------------------------------------------------------------------
/OCR/images-to-ocr-pdf.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utility to OCR a list of images and output them as one PDF
 3 | 
 4 | License: GNU AGPL 3.0
 5 | Author: (c) Harald Lieder, harald.lieder@outlook.com
 6 | Date: 2021-10-26
 7 | """
 8 | import os
 9 | import sys
10 | 
11 | import fitz
12 | 
13 | if tuple(map(int, fitz.VersionBind.split("."))) < (1, 19, 0):
14 |     raise ValueError("Need at least PyMuPDF v1.19.0")
15 | 
16 | doc = fitz.open()  # output PDF
17 | img_folder = sys.argv[1]  # example: image folder name provided
18 | dirname = os.path.dirname(img_folder)
19 | img_list = os.listdir(img_folder)  # some list of image filenames
20 | for img in img_list:
21 |     imgfile = os.path.join(dirname, img)
22 |     pix = fitz.Pixmap(imgfile)  # make a pixmap form the image file
23 |     pdfbytes = pix.pdfocr_tobytes(language="eng")  # 1-page PDF with the OCRed image
24 |     imgpdf = fitz.open("pdf", pdfbytes)  # open it as a PDF
25 |     doc.insert_pdf(imgpdf)  # append the image page to output
26 | 
27 | doc.ez_save("ocr-pdf.pdf")  # save output
28 | 


--------------------------------------------------------------------------------
/OCR/ocr-ed.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/OCR/ocr-ed.pdf


--------------------------------------------------------------------------------
/OCR/ocr-ed.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | 
 5 |        PyMuPDF— the Python
 6 |           bindings for MuPDF
 7 | 
 8 | PyMuPDF Documentation
 9 |                             Release 1.18.19
10 | 
11 |                                Jorj X. McKie
12 | 
13 |                                      Sep 17, 2021
14 | 


--------------------------------------------------------------------------------
/OCR/ocrpages.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This is a basic script demonstrating the use of OCRmyPDF together with PyMuPDF.
 3 | 
 4 | It reads a PDF's pages and passes them to ocrmypdf one by one. One could at this
 5 | point insert some checks as to whether the page is actually an, contains no text,
 6 | or text with many unrecognized characters or the like.
 7 | 
 8 | Each page is then converted to a 1-page temporary PDF which is
 9 | - passed to ocrmypdf for OCR-ing it
10 | - the 1-page output PDF of the pervious step is then text-extracted
11 | - return the extracted text
12 | 
13 | Instead of extracting simple naive text format, one could also use all other
14 | text extraction formats like "dict" to get text position information.
15 | 
16 | Requires
17 | ---------
18 | ocrmypdf
19 | """
20 | import fitz
21 | import ocrmypdf
22 | import sys
23 | import io
24 | 
25 | 
26 | def ocr_the_page(page):
27 |     """Extract the text from passed-in PDF page."""
28 |     src = page.parent  # the page's document
29 |     doc = fitz.open()  # make temporary 1-pager
30 |     doc.insert_pdf(src, from_page=page.number, to_page=page.number)
31 |     pdfbytes = doc.tobytes()
32 |     inbytes = io.BytesIO(pdfbytes)  # transform to BytesIO object
33 |     outbytes = io.BytesIO()  # let ocrmypdf store its result pdf here
34 |     ocrmypdf.ocr(
35 |         inbytes,  # input 1-pager
36 |         outbytes,  # ouput 1-pager
37 |         language="eng",  # modify as required e.g. ("eng", "ger")
38 |         output_type="pdf",  # only need simple PDF format
39 |         # add more paramneters, e.g. to enforce OCR-ing, etc., e.g.
40 |         # force_ocr=True, redo_ocr=True
41 |     )
42 |     ocr_pdf = fitz.open("pdf", outbytes.getvalue())  # read output as fitz PDF
43 |     text = ocr_pdf[0].get_text()  # ...and extract text from the page
44 |     return text  # return it
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     doc = fitz.open(sys.argv[1])
49 |     for page in doc:
50 |         text = ocr_the_page(page)
51 |         print("Text from page %i:" % page.number)
52 |         print(text)
53 | 


--------------------------------------------------------------------------------
/OCR/scanned.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/OCR/scanned.pdf


--------------------------------------------------------------------------------
/OCR/v110-changes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/OCR/v110-changes.pdf


--------------------------------------------------------------------------------
/advanced-toc/colored-toc.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/advanced-toc/colored-toc.pdf


--------------------------------------------------------------------------------
/advanced-toc/colored-toc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/advanced-toc/colored-toc.png


--------------------------------------------------------------------------------
/advanced-toc/colorize.py:
--------------------------------------------------------------------------------
 1 | import fitz
 2 | 
 3 | doc = fitz.open("example.pdf")
 4 | toc = doc.get_toc(False)
 5 | for i, item in enumerate(toc):
 6 |     lvl, title, pno, ddict = item
 7 |     ddict["collapse"] = False
 8 |     if lvl == 1:
 9 |         ddict["color"] = (1, 0, 0)
10 |         ddict["bold"] = True
11 |         ddict["italic"] = False
12 |     elif lvl == 2:
13 |         ddict["color"] = (0, 0, 1)
14 |         ddict["bold"] = False
15 |         ddict["italic"] = True
16 |     else:
17 |         ddict["color"] = (0, 1, 0)
18 |         ddict["bold"] = ddict["italic"] = False
19 |     doc.set_toc_item(i, dest_dict=ddict)
20 | doc.save("new-toc.pdf")
21 | 


--------------------------------------------------------------------------------
/advanced-toc/example.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/advanced-toc/example.pdf


--------------------------------------------------------------------------------
/advanced-toc/replaced-toc.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/advanced-toc/replaced-toc.pdf


--------------------------------------------------------------------------------
/animations/README.md:
--------------------------------------------------------------------------------
 1 | This folder contains a few scripts which may best be characterized as "fun" or "entertainment" ... using PyMuPDF of course.
 2 | 
 3 | They all work following the same basic approach:
 4 | 
 5 | 1. Draw or write something on an empty page of a new PDF
 6 | 2. Convert the page to an image
 7 | 3. Show this image in a GUI (using PySimpleGUI)
 8 | 4. Destroy image, page and PDF document
 9 | 5. Modify some parameters
10 | 6. Start over with step 1 above in an endless loop.
11 | 
12 | Because of the excellent performance of PyMuPDF (😉), this process is fast enough to be shown like a little video clip - mostly achieving more than 100 frames per second.
13 | 
14 | Scripts `morph-demo1.py`, `morph-demo2.py` and `morph-demo3.py` show the effect of morphing a text box given some fixpoint.
15 | 
16 | Scripts `quad-show1.py` and `quad-show2.py` simply draw quadrilaterals to demonstrate what happens when their corners are modified following certain patterns.
17 | 


--------------------------------------------------------------------------------
/animations/morph-demo1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/animations/morph-demo1.jpg


--------------------------------------------------------------------------------
/animations/quad-show2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/animations/quad-show2.jpg


--------------------------------------------------------------------------------
/annotations/freetext-annot-lang.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/annotations/freetext-annot-lang.pdf


--------------------------------------------------------------------------------
/annotations/freetext-annot-lang.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import division, print_function
 3 | 
 4 | import os
 5 | import sys
 6 | 
 7 | import fitz
 8 | 
 9 | print(fitz.__doc__)
10 | if fitz.VersionBind.split(".") < ["1", "17", "0"]:
11 |     sys.exit("Need PyMuPDF v1.17.0 or later.")
12 | 
13 | outfile = os.path.abspath(__file__).replace(".py", ".pdf")
14 | 
15 | 
16 | doc = fitz.open()  # new PDF
17 | page = doc.new_page()  # new page
18 | 
19 | text = r"""This is a text of mixed languages to generate FreeText annotations with automatic font selection - a feature new in MuPDF v1.17.
20 | Euro: €, general Latin and other signs: | ~ ° ² ³ ñ ä ö ü ß â ¿ ¡ µ ¶ œ ¼ ½ ¾ ‰
21 | Japan: 熊野三山本願所は、 15世紀末以降における熊野三山 （熊野本宮、 熊野新宮
22 | Greece: Στα ερείπια της πόλης, που ήταν ένα σημαντικό
23 | Korea: 에듀롬은 하나의 계정으로 전 세계 고등교육 기관의 인터넷에 접속할
24 | Russia: Ко времени восшествия на престол Якова I в значительной
25 | China: 北京作为城市的历史 可以追溯到 3,000 年前。西周初年， 周武王封召公奭于燕國。
26 | Devanagari (not supported): नि:शुल्क ज्ञानको लागी लाई धन्यबाद""".splitlines()
27 | 
28 | blue = (0, 0, 1)
29 | red = (1, 0, 0)
30 | gold = (1, 1, 0)
31 | green = (0, 1, 0)
32 | 
33 | # make the rectangles for filling in above text lines
34 | tl = page.rect.tl + (72, 144)  # some distance from the page's corners
35 | br = page.rect.br - (72, 144)
36 | rect = fitz.Rect(tl, br)  # put all annots inside this rectangle
37 | cells = fitz.make_table(rect, cols=1, rows=len(text))
38 | shrink = (0, 5, 0, 0)  # makes distance between annots
39 | for i in range(len(text)):
40 |     annot = page.add_freetext_annot(
41 |         cells[i][0] + shrink,
42 |         text[i],
43 |         fontsize=16,
44 |         fontname="tiro",  # used for non-CJK characters only!
45 |         align=fitz.TEXT_ALIGN_CENTER,
46 |         text_color=blue,
47 |     )
48 |     annot.set_border(width=1.0)
49 |     annot.update(fill_color=gold, border_color=green)
50 | 
51 | doc.save(outfile, garbage=3, deflate=True)
52 | 


--------------------------------------------------------------------------------
/annotations/new-annots-0.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/annotations/new-annots-0.pdf


--------------------------------------------------------------------------------
/annotations/opacity.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | 
 4 | import fitz
 5 | 
 6 | print(fitz.__doc__)
 7 | doc = fitz.open()
 8 | page = doc.new_page()
 9 | 
10 | annot1 = page.add_circle_annot((50, 50, 100, 100))
11 | annot1.set_colors(fill=(1, 0, 0), stroke=(1, 0, 0))
12 | annot1.set_opacity(2 / 3)
13 | annot1.update(blend_mode="Multiply")
14 | 
15 | annot2 = page.add_circle_annot((75, 75, 125, 125))
16 | annot2.set_colors(fill=(0, 0, 1), stroke=(0, 0, 1))
17 | annot2.set_opacity(1 / 3)
18 | annot2.update(blend_mode="Multiply")
19 | outfile = os.path.abspath(__file__).replace(".py", ".pdf")
20 | doc.save(outfile, expand=True, pretty=True)
21 | print("saved", outfile)
22 | 


--------------------------------------------------------------------------------
/annotations/show-no-annots.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import fitz
 3 | 
 4 | """
 5 | Render a page with and without anootations.
 6 | 
 7 | Please note that starting with v1.16.0, pixmaps without annotations
 8 | can be created directly.
 9 | """
10 | print(fitz.__doc__)
11 | thisdir = os.path.dirname(__file__)
12 | infile = os.path.join(thisdir, "new-annots-0.pdf")
13 | src = fitz.open(infile)  # a document with annotations
14 | p1 = src[0]
15 | pix1 = p1.get_pixmap(annots=True)
16 | pix1.save(os.path.join(thisdir, "with-annots.png"))  # save page pixmap
17 | pix2 = p1.get_pixmap(annots=False)
18 | pix2.save(os.path.join(thisdir, "without-annots.png"))
19 | 


--------------------------------------------------------------------------------
/annotations/with-annots.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/annotations/with-annots.png


--------------------------------------------------------------------------------
/annotations/without-annots.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/annotations/without-annots.png


--------------------------------------------------------------------------------
/cloud-interactions/README.md:
--------------------------------------------------------------------------------
1 | This is a set of code snippets showing how to download or upload to cloud services offered by major providers.
2 | 
3 | The focus of the scripts is to demonstrate, how using intermediate disk storage can be avoided by using PyMuPDF Document features.
4 | 
5 | We are currently considering to extend `Document` creation such that cloud access is covered too. Because of the diversity of ways how this works by cloud service provider, this is somewhat tedious. So please bear with us until we are clear what we need to do.
6 | 


--------------------------------------------------------------------------------
/cloud-interactions/from-aws-s3.py:
--------------------------------------------------------------------------------
 1 | import fitz
 2 | import boto3
 3 | 
 4 | s3 = boto3.client("s3")
 5 | 
 6 | # fill in your credentials to access the cloud
 7 | response = s3.get_object(Bucket="string", Key="string")
 8 | mime = response["ContentType"]
 9 | body = response["Body"]
10 | 
11 | # define Document with these data
12 | doc = fitz.open(mime, body.read())
13 | 


--------------------------------------------------------------------------------
/cloud-interactions/from-google.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import fitz  # pymupdf
 3 | import gcsfs  # google cloud storage file system
 4 | 
 5 | # Access the google filesystem.
 6 | # You will need to supply credentials - which is omitted here
 7 | fs = gcsfs.GCSFileSystem(project="my-google-project")
 8 | 
 9 | filename = fs.ls("my-bucket")[0]  # first filename in bucket
10 | ext = os.path.splitext(filename)[1]  # determine file extension
11 | f = fs.open(filename, "rb")  # open with that filesystem
12 | 
13 | # now open with PyMuPDF using the bytes object of "f"
14 | doc = fitz.open(ext[1:], f.read())
15 | 


--------------------------------------------------------------------------------
/cloud-interactions/from-ms-azure.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import fitz  # pymupdf
 3 | from azure.storage.blob import BlobClient
 4 | 
 5 | blob = BlobClient.from_connection_string(
 6 |     conn_str="my_connection_string",
 7 |     container_name="my_container",
 8 |     blob_name="my_blob",
 9 | )
10 | 
11 | with open("some-file.pdf", "wb") as my_blob:
12 |     blob_data = blob.download_blob()
13 |     blob_data.readinto(my_blob)
14 | 
15 | # now open with PyMuPDF using the bytes object of "f"
16 | doc = fitz.open("pdf", my_blob.read())
17 | 


--------------------------------------------------------------------------------
/cloud-interactions/to-aws-s3.py:
--------------------------------------------------------------------------------
 1 | import fitz
 2 | import boto3
 3 | 
 4 | # process some PDF document
 5 | doc = fitz.open("...")
 6 | # then write / upload it directly to AWS S3
 7 | # Instead of save, we use the tobytes(), which generates a bytes object
 8 | pdfbytes = doc.tobytes(  # optional 'save' parameters:
 9 |     garbage=3,
10 |     deflate=True,
11 |     owner_pw="owner-password",
12 |     user_pw="user-pasword",
13 | )
14 | 
15 | s3 = boto3.client("s3")
16 | request_route = "string"
17 | request_token = "string"
18 | s3.write_get_object_response(
19 |     Body=pdfbytes,
20 |     RequestRoute=request_route,
21 |     RequestToken=request_token,
22 | )
23 | 


--------------------------------------------------------------------------------
/cloud-interactions/to-ms-azure.py:
--------------------------------------------------------------------------------
 1 | import fitz  # pymupdf
 2 | from azure.storage.blob import BlobClient
 3 | 
 4 | # some PDF document
 5 | doc = fitz.open("...")
 6 | 
 7 | # access Azure blob client
 8 | blob = BlobClient.from_connection_string(
 9 |     conn_str="my_connection_string",
10 |     container_name="my_container",
11 |     blob_name="my_blob",
12 | )
13 | 
14 | # upload document
15 | blob.upload_blob(
16 |     doc.tobytes(
17 |         garbage=3,
18 |         deflate=True,
19 |         # more parameters
20 |     )
21 | )
22 | 


--------------------------------------------------------------------------------
/conversion/README.md:
--------------------------------------------------------------------------------
 1 | This folder contains scripts for document conversions.
 2 | 
 3 | Over time, more examples will be added. Currently there are:
 4 | 
 5 | * `make-cbz.py` - convert any document to a Comic Book
 6 | * `make-imagepdf.py` - convert any document to a PDF with original pages rendered to images.
 7 | * `make-page-images.py` - convert the pages of any document to PNG images.
 8 | * `images-to-ocr-pdf.py` - make PDF from a list of images (one image per page), where each page contains an OCR text layer.
 9 | 
10 | 
11 | Your contribution is welcome. This may include more conversion types, or improvements like better handling / supporting parameters of existing scripts.
12 | 


--------------------------------------------------------------------------------
/conversion/images-to-ocr-pdf.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utility to OCR a list of images and output them as one PDF
 3 | 
 4 | License: GNU AGPL 3.0
 5 | Author: (c) Harald Lieder, harald.lieder@outlook.com
 6 | Date: 2021-10-26
 7 | """
 8 | import os
 9 | import sys
10 | 
11 | import fitz
12 | 
13 | if tuple(map(int, fitz.VersionBind.split("."))) < (1, 19, 0):
14 |     raise ValueError("Need at least PyMuPDF v1.19.0")
15 | 
16 | doc = fitz.open()  # output PDF
17 | img_folder = sys.argv[1]  # example: image folder name provided
18 | dirname = os.path.dirname(img_folder)
19 | img_list = os.listdir(img_folder)  # some list of image filenames
20 | for img in img_list:
21 |     imgfile = os.path.join(dirname, img)
22 |     pix = fitz.Pixmap(imgfile)  # make a pixmap form the image file
23 |     pdfbytes = pix.pdfocr_tobytes(language="eng")  # 1-page PDF with the OCRed image
24 |     imgpdf = fitz.open("pdf", pdfbytes)  # open it as a PDF
25 |     doc.insert_pdf(imgpdf)  # append the image page to output
26 | 
27 | doc.ez_save("ocr-pdf.pdf")  # save output
28 | 


--------------------------------------------------------------------------------
/conversion/make-cbz.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utility to convert a supported document to a Comic Book archive.
 3 | 
 4 | License: GNU AGPL 3.0
 5 | Author: (c) Harald Lieder, harald.lieder@outlook.com
 6 | Date: 2021-08-30
 7 | """
 8 | 
 9 | import os
10 | import sys
11 | import zipfile
12 | 
13 | import fitz
14 | 
15 | 
16 | def main(doc, outfile=None, pages=None, dpi=96):
17 |     if outfile is None:
18 |         if doc.name:
19 |             filename, _ = os.path.splitext(doc.name)
20 |             outfile = filename + ".cbz"
21 |         elif __file__.endswith(".py"):
22 |             outfile = __file__.replace(".py", ".cbz")
23 |         else:
24 |             outfile = "out.cbz"
25 |     zipout = zipfile.ZipFile(
26 |         outfile,
27 |         "w",
28 |         compression=zipfile.ZIP_STORED,
29 |     )
30 |     if pages is None:
31 |         pages = range(doc.page_count)
32 |     zoom = dpi / 72
33 |     mat = fitz.Matrix(zoom, zoom)
34 |     for pno in pages:
35 |         page = doc[pno]
36 |         pix = page.get_pixmap(matrix=mat)
37 |         pix.set_dpi(dpi, dpi)
38 |         pagename = "p%05i.png" % (pno + 1)
39 |         zipout.writestr(pagename, pix.tobytes("png"))
40 |     zipout.close()
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     filename = sys.argv[1]
45 |     doc = fitz.open(filename)
46 |     main(doc)
47 | 


--------------------------------------------------------------------------------
/conversion/make-imagepdf.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utility to convert a supported document to an image-only PDF.
 3 | 
 4 | License: GNU AGPL 3.0
 5 | Author: (c) Harald Lieder, harald.lieder@outlook.com
 6 | Date: 2021-08-30
 7 | """
 8 | import os
 9 | import sys
10 | 
11 | import fitz
12 | 
13 | 
14 | def main(doc, outfile=None, pages=None, dpi=96):
15 |     if outfile is None:
16 |         if doc.name:
17 |             filename, _ = os.path.splitext(doc.name)
18 |             outfile = filename + ".pdf"
19 |         elif __file__.endswith(".py"):
20 |             outfile = __file__.replace(".py", ".pdf")
21 |         else:
22 |             outfile = "out.pdf"
23 |     if outfile == doc.name:
24 |         outfile += ".pdf"
25 |     if pages is None:
26 |         pages = range(doc.page_count)
27 |     zoom = dpi / 72
28 |     mat = fitz.Matrix(zoom, zoom)
29 |     pdfout = fitz.open()
30 |     for pno in pages:
31 |         page = doc[pno]
32 |         pix = page.get_pixmap(matrix=mat)
33 |         pix.set_dpi(dpi, dpi)
34 |         opage = pdfout.new_page(width=page.rect.width, height=page.rect.height)
35 |         opage.insert_image(opage.rect, pixmap=pix)
36 |     pdfout.ez_save(outfile)
37 |     pdfout.close()
38 | 
39 | 
40 | if __name__ == "__main__":
41 |     filename = sys.argv[1]
42 |     doc = fitz.open(filename)
43 |     main(doc)
44 | 


--------------------------------------------------------------------------------
/conversion/make-page-images.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Basic script to convert pages of an arbitrary document to PNG images.
 3 | 
 4 | All MuPDF document types are supported: PDF, XPS, EPUB, etc.
 5 | Page images are stored in the script's folder and named "page-0001.png",
 6 | "page-0002.png".
 7 | 
 8 | Desired resolution can be chosen by setting the "DPI" variable.
 9 | """
10 | import sys
11 | import fitz
12 | 
13 | filename = sys.argv[1]
14 | doc = fitz.open(filename)
15 | DPI = 300  # the desired image resolution
16 | ZOOM = DPI / 72  # zoom factor, standard dpi is 72
17 | magnify = fitz.Matrix(ZOOM, ZOOM)  # takes care of zooming
18 | for page in doc:
19 |     pix = page.get_pixmap(matrix=magnify)  # make page image
20 |     pix.set_dpi(DPI, DPI)  # store dpi info in image
21 |     pix.save("page-%04i.png" % (page.number + 1))
22 | 
23 | # generates images named page-0001.png, page-0002.png, ...
24 | 


--------------------------------------------------------------------------------
/examples/.gitignore:
--------------------------------------------------------------------------------
1 | /__pycache__
2 | /extract-table/__pycache__/
3 | /replace-image/__pycache__/
4 | /icons/__pycache__
5 | 


--------------------------------------------------------------------------------
/examples/anonymize-document/anonymize.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Remove all text from a document.
 3 | -------------------------------------------------------------------------------
 4 | License: GNU GPL V3
 5 | (c) 2022 Jorj X. McKie
 6 | 
 7 | Usage
 8 | -----
 9 | python anonymize.py input.pdf
10 | 
11 | Description
12 | -----------
13 | Scan through all pages of a PDF and remove all text. The metadata dictionary
14 | will also be cleared with "none" values. Any XML-based metadata will also be
15 | deleted.
16 | """
17 | 
18 | import sys
19 | import fitz
20 | 
21 | 
22 | def remove_txt(cont):
23 |     """
24 |     Remove everything enclosed in a pair of "BT" / "ET" strings, including both.
25 |     Assuming "cont" is the string of a PDF "/Contents" stream, this will make
26 |     all text of the owning page disappear (permanent delete).
27 |     """
28 |     cont1 = cont.replace(b"\n", b" ")
29 |     ct = cont1.split(b" ")
30 |     nct = []
31 |     intext = False
32 |     for word in ct:
33 |         if word == b"ET":
34 |             intext = False
35 |             continue
36 |         if word == b"BT":
37 |             intext = True
38 |             continue
39 |         if intext:
40 |             continue
41 |         nct.append(word)
42 | 
43 |     ncont = b" ".join(nct)
44 |     return ncont
45 | 
46 | 
47 | assert len(sys.argv) == 2, "need input PDF file name"
48 | fn = sys.argv[1]
49 | assert fn.endswith(".pdf"), "expect a PDF file"
50 | doc = fitz.open(fn)
51 | doc.set_metadata({})  # set metadata values to "none"
52 | doc.del_xml_metadata()  # delete any XML metadata
53 | for page in doc:
54 |     xref_lst = page.get_contents()
55 |     for xref in xref_lst:
56 |         cont = doc.xref_stream(xref)
57 |         ncont = remove_txt(cont)
58 |         doc.update_stream(xref, ncont)
59 | 
60 | doc.save("output.pdf", clean=True, garbage=4)
61 | 


--------------------------------------------------------------------------------
/examples/anonymize-document/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/anonymize-document/input.pdf


--------------------------------------------------------------------------------
/examples/anonymize-document/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/anonymize-document/output.pdf


--------------------------------------------------------------------------------
/examples/attach-images/input/erik-jan-leusink-s2mkB4WOl9k-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/attach-images/input/erik-jan-leusink-s2mkB4WOl9k-unsplash.jpg


--------------------------------------------------------------------------------
/examples/attach-images/input/joe-caione-qO-PIF84Vxg-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/attach-images/input/joe-caione-qO-PIF84Vxg-unsplash.jpg


--------------------------------------------------------------------------------
/examples/attach-images/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/attach-images/output.pdf


--------------------------------------------------------------------------------
/examples/browse-document/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/browse-document/input.pdf


--------------------------------------------------------------------------------
/examples/combine-pages/combine.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copy a PDF document combining every 4 pages
 3 | -------------------------------------------------------------------------------
 4 | License: GNU GPL V3
 5 | (c) 2018 Jorj X. McKie
 6 | 
 7 | Usage
 8 | -----
 9 | python combine.py input.pdf
10 | 
11 | Notes
12 | -----
13 | (1) Output file is chosen to have A4 portrait pages. Input pages are scaled
14 |     maintaining side proportions. Both can be changed, e.g. based on input
15 |     page size. However, note that not all pages need to have the same size, etc.
16 | 
17 | (2) Easily adapt the example to combine just 2 pages (like for a booklet) or
18 |     make the output page dimension dependent on input, or whatever.
19 | 
20 | (3) This should run very fast: needed less than 25 sec on a Python 3.6 64bit,
21 |     Windows 10, AMD 4.0 GHz for the 1'310 pages of the Adobe manual.
22 |     Without save-options "garbage" and "deflate" this goes below 4 seconds, but
23 |     results in a bigger file.
24 | 
25 | Dependencies
26 | ------------
27 | PyMuPDF 1.12.1 or later
28 | """
29 | 
30 | from __future__ import print_function
31 | import fitz, sys
32 | 
33 | infile = sys.argv[1]
34 | src = fitz.open(infile)
35 | doc = fitz.open()  # empty output PDF
36 | 
37 | width, height = fitz.paper_size("a4")  # A4 portrait output page format
38 | r = fitz.Rect(0, 0, width, height)
39 | 
40 | # define the 4 rectangles per page
41 | r1 = r * 0.5  # top left rect
42 | r2 = r1 + (r1.width, 0, r1.width, 0)  # top right
43 | r3 = r1 + (0, r1.height, 0, r1.height)  # bottom left
44 | r4 = fitz.Rect(r1.br, r.br)  # bottom right
45 | 
46 | # put them in a list
47 | r_tab = [r1, r2, r3, r4]
48 | 
49 | # now copy input pages to output
50 | for spage in src:
51 |     if spage.number % 4 == 0:  # create new output page
52 |         page = doc.new_page(-1, width=width, height=height)
53 |     # insert input page into the correct rectangle
54 |     page.show_pdf_page(
55 |         r_tab[spage.number % 4],  # select output rect
56 |         src,  # input document
57 |         spage.number,
58 |     )  # input page number
59 | 
60 | # by all means, save new file using garbage collection and compression
61 | doc.save("output.pdf", garbage=4, deflate=True)
62 | 


--------------------------------------------------------------------------------
/examples/combine-pages/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/combine-pages/input.pdf


--------------------------------------------------------------------------------
/examples/combine-pages/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/combine-pages/output.pdf


--------------------------------------------------------------------------------
/examples/convert-document/input.epub:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/convert-document/input.epub


--------------------------------------------------------------------------------
/examples/convert-document/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/convert-document/output.pdf


--------------------------------------------------------------------------------
/examples/convert-image/convert.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Convert an arbitrary image to a PNG pixmap using Pillow
 3 | --------------------------------------------------------------------------------
 4 | License: GNU GPL V3
 5 | (c) 2022 Jorj X. McKie
 6 | 
 7 | Usage
 8 | -----
 9 | python convert.py input.jpg
10 | 
11 | Dependencies
12 | ------------
13 | Pillow
14 | """
15 | 
16 | import sys
17 | import fitz
18 | from PIL import Image
19 | 
20 | print(fitz.__doc__)
21 | 
22 | if len(sys.argv) == 2:
23 |     pic_fn = sys.argv[1]
24 | else:
25 |     pic_fn = None
26 | 
27 | if pic_fn:
28 |     print("Reading %s" % pic_fn)
29 |     pic_f = open(pic_fn, "rb")
30 |     img = Image.open(pic_f).convert("RGB")
31 |     samples = img.tobytes()
32 |     pix = fitz.Pixmap(fitz.csRGB, img.size[0], img.size[1], samples, 0)
33 |     pix.save("output.png")
34 |     pic_f.close()
35 | 


--------------------------------------------------------------------------------
/examples/convert-image/input.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/convert-image/input.jpg


--------------------------------------------------------------------------------
/examples/convert-image/output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/convert-image/output.png


--------------------------------------------------------------------------------
/examples/convert-pixmap/convert.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Convert an arbitrary pixmap to JPEG format using Pillow
 3 | --------------------------------------------------------------------------------
 4 | License: GNU GPL V3
 5 | (c) 2022 Jorj X. McKie
 6 | 
 7 | Usage
 8 | -----
 9 | python convert.py input.png
10 | 
11 | Dependencies
12 | ------------
13 | Pillow
14 | """
15 | 
16 | import sys
17 | import fitz
18 | from PIL import Image
19 | 
20 | print(fitz.__doc__)
21 | assert len(sys.argv) == 2, "Usage: %s <input file>" % sys.argv[0]
22 | 
23 | pix = fitz.Pixmap(sys.argv[1])
24 | rgb = "RGB"
25 | if pix.alpha:  # JPEG cannot have alpha!
26 |     pix0 = fitz.Pixmap(pix, 0)  # drop alpha channel
27 |     pix = pix0  # rename pixmap
28 | 
29 | img = Image.frombuffer(rgb, [pix.width, pix.height], pix.samples, "raw", rgb, 0, 1)
30 | img.save("output.jpg")
31 | 


--------------------------------------------------------------------------------
/examples/convert-pixmap/input.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/convert-pixmap/input.png


--------------------------------------------------------------------------------
/examples/convert-pixmap/output.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/convert-pixmap/output.jpg


--------------------------------------------------------------------------------
/examples/convert-text/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/convert-text/output.pdf


--------------------------------------------------------------------------------
/examples/copy-embedded/copy.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copy the embedded files in the input document to the output document
 3 | -------------------------------------------------------------------------------
 4 | License: GNU AGPL V3
 5 | (c) 2021 Jorj X. McKie
 6 | 
 7 | Usage
 8 | -----
 9 | python copy.py input.pdf output.pdf
10 | 
11 | Notes
12 | -----
13 | The output.pdf file generated in examples/embed-images is renamed as input.pdf
14 | to be used as the input file in this example.
15 | 
16 | Dependencies
17 | ------------
18 | PyMuPDF
19 | """
20 | 
21 | from __future__ import print_function
22 | import sys
23 | import fitz
24 | 
25 | ifn = sys.argv[1]  # input PDF
26 | ofn = sys.argv[2]  # output PDF
27 | docin = fitz.open(ifn)
28 | docout = fitz.open(ofn)
29 | print("Copying embedded files from '%s' to '%s'" % (ifn, ofn))
30 | for i in range(docin.embfile_count()):
31 |     d = docin.embfile_info(i)  # file metadata
32 |     b = docin.embfile_get(i)  # file content
33 |     try:  # safeguarding against duplicate entries
34 |         print("copying entry:", d["name"])
35 |         docout.embfile_add(b, d["name"], d["file"], d["desc"])
36 |     except:
37 |         pass
38 | 
39 | # save output (incrementally or to new PDF)
40 | docout.saveIncr()
41 | 


--------------------------------------------------------------------------------
/examples/copy-embedded/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/copy-embedded/input.pdf


--------------------------------------------------------------------------------
/examples/copy-embedded/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/copy-embedded/output.pdf


--------------------------------------------------------------------------------
/examples/decrypt-document/decrypt.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Decrypt a PDF document with the password provided and save it as a new document
 3 | --------------------------------------------------------------------------------
 4 | License: GNU GPL V3+
 5 | (c) 2022 Jorj X. McKie
 6 | 
 7 | Usage
 8 | -----
 9 | python decrypt.py input.pdf password output.pdf
10 | """
11 | 
12 | import sys
13 | import fitz
14 | 
15 | print(fitz.__doc__)
16 | assert len(sys.argv) == 4, (
17 |     "Usage: %s <input file> <password> <output file>" % sys.argv[0]
18 | )
19 | 
20 | doc = fitz.Document(sys.argv[1])
21 | assert doc.needs_pass, sys.argv[0] + " not password protected"
22 | 
23 | assert doc.authenticate(sys.argv[2]), 'cannot decrypt %s with password "%s"' % (
24 |     sys.argv[1],
25 |     sys.argv[2],
26 | )
27 | 
28 | doc.save(sys.argv[3])
29 | 


--------------------------------------------------------------------------------
/examples/decrypt-document/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/decrypt-document/input.pdf


--------------------------------------------------------------------------------
/examples/decrypt-document/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/decrypt-document/output.pdf


--------------------------------------------------------------------------------
/examples/display-document/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/display-document/input.pdf


--------------------------------------------------------------------------------
/examples/draw-cardioid/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/draw-cardioid/output.pdf


--------------------------------------------------------------------------------
/examples/draw-caustic/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/draw-caustic/output.pdf


--------------------------------------------------------------------------------
/examples/draw-caustic/output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/draw-caustic/output.png


--------------------------------------------------------------------------------
/examples/draw-caustic/output.svgz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/draw-caustic/output.svgz


--------------------------------------------------------------------------------
/examples/draw-fractal/output_carpet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/draw-fractal/output_carpet.png


--------------------------------------------------------------------------------
/examples/draw-fractal/output_punch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/draw-fractal/output_punch.png


--------------------------------------------------------------------------------
/examples/draw-fractal/output_triangle.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/draw-fractal/output_triangle.pdf


--------------------------------------------------------------------------------
/examples/draw-polygon/draw.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Draw a regular polygon with a curly border
 3 | --------------------------------------------------------------------------------
 4 | License: GNU GPL V3
 5 | (c) 2017 Jorj X. McKie
 6 | 
 7 | Usage
 8 | -----
 9 | python draw.py
10 | 
11 | Description
12 | -----------
13 | Draw an arbitrary regular polygon using wavy lines instead of straight lines.
14 | Two output files are generated: a PDF and a SVG image file. The page size is
15 | adjusted to the drawing. This script also demonstrates how the draw commands can
16 | be used to calculate points without actually drawing them.
17 | 
18 | Dependencies
19 | ------------
20 | PyMuPDF
21 | """
22 | 
23 | import fitz
24 | 
25 | print(fitz.__doc__)
26 | 
27 | outpdf = "output.pdf"
28 | outsvg = "output.svg"
29 | doc = fitz.open()
30 | page = doc.new_page()
31 | img = page.new_shape()
32 | nedge = 5  # number of polygon edges
33 | breadth = 2  # wave amplitude
34 | beta = -1.0 * 360 / nedge  # our angle, drawn clockwise
35 | center = fitz.Point(300, 300)  # center of circle
36 | p0 = fitz.Point(300, 200)  # start here (1st edge = north)
37 | p1 = +p0  # save as last edge to add
38 | points = [p0]  # to store the polygon edges
39 | 
40 | # we only use this to calculate the polygon edges
41 | # we will delete the resp. draw commands
42 | for i in range(nedge - 1):
43 |     p0 = img.draw_sector(center, p0, beta)
44 |     points.append(p0)
45 | 
46 | # erase previous draw commands in contents buffer
47 | img.draw_cont = ""
48 | 
49 | points.append(p1)  # add starting point to edges list
50 | # now draw the lines along stored edges
51 | for i in range(nedge):
52 |     img.draw_squiggle(points[i], points[i + 1], breadth=breadth)
53 | 
54 | img.finish(color=(0, 0, 1), fill=(1, 1, 0), closePath=False)
55 | 
56 | # adjust visible page to dimensions of the drawing
57 | page.set_cropbox(img.rect)
58 | img.commit()
59 | doc.save(outpdf)
60 | fout = open(outsvg, "w")
61 | fout.write(page.get_svg_image())
62 | fout.close()
63 | doc.close()
64 | 


--------------------------------------------------------------------------------
/examples/draw-polygon/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/draw-polygon/output.pdf


--------------------------------------------------------------------------------
/examples/draw-rgb-area/draw.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Draw an RGB pixel area with numpy and save it with fitz
 3 | --------------------------------------------------------------------------------
 4 | License: GNU GPL V3
 5 | (c) 2022 Jorj X. McKie
 6 | 
 7 | Usage
 8 | -----
 9 | python draw.py
10 | 
11 | Description
12 | -----------
13 | This is 10+ times faster than saving with pure python solutions like pypng and
14 | almost 2 times faster than saving with PIL. However, PIL images are smaller than
15 | those of MuPDF.
16 | 
17 | Dependencies
18 | ------------
19 | Pillow, numpy
20 | """
21 | 
22 | from __future__ import print_function
23 | import sys
24 | import time
25 | import fitz
26 | import numpy as np
27 | import PIL
28 | from PIL import Image
29 | 
30 | print("Python:", sys.version)
31 | print("NumPy version", np.__version__)
32 | print(fitz.__doc__)
33 | print("PIL version", PIL.__version__)
34 | 
35 | height = 2048
36 | width = 2028
37 | 
38 | image = np.ndarray((height, width, 3), dtype=np.uint8)
39 | 
40 | for i in range(height):
41 |     for j in range(width):
42 |         image[i, j] = np.array([i % 256, j % 256, (i + j) % 256], dtype=np.uint8)
43 | 
44 | samples = image.tobytes()
45 | 
46 | ttab = [(time.perf_counter(), "")]
47 | 
48 | pix = fitz.Pixmap(fitz.csRGB, width, height, samples, 0)
49 | pix.save("output_fitz.png")
50 | ttab.append((time.perf_counter(), "fitz"))
51 | 
52 | pix = Image.frombuffer("RGB", [width, height], samples, "raw", "RGB", 0, 1)
53 | pix.save("output_PIL.png")
54 | ttab.append((time.perf_counter(), "PIL"))
55 | 
56 | for i, t in enumerate(ttab):
57 |     if i > 0:
58 |         print("storing with %s: %g sec." % (t[1], t[0] - ttab[i - 1][0]))
59 | 


--------------------------------------------------------------------------------
/examples/draw-rgb-area/output_PIL.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/draw-rgb-area/output_PIL.png


--------------------------------------------------------------------------------
/examples/draw-rgb-area/output_fitz.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/draw-rgb-area/output_fitz.png


--------------------------------------------------------------------------------
/examples/draw-sines/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/draw-sines/output.pdf


--------------------------------------------------------------------------------
/examples/edit-images/figure-01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/edit-images/figure-01.jpg


--------------------------------------------------------------------------------
/examples/edit-images/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/edit-images/input.pdf


--------------------------------------------------------------------------------
/examples/edit-links/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/edit-links/input.pdf


--------------------------------------------------------------------------------
/examples/edit-toc/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/edit-toc/input.pdf


--------------------------------------------------------------------------------
/examples/embed-images/embed.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Embed the images found in the input directory
 3 | -------------------------------------------------------------------------------
 4 | License: GNU GPL V3+
 5 | (c) 2018 Jorj X. McKie
 6 | 
 7 | Usage
 8 | -----
 9 | python embed.py
10 | 
11 | Dependencies
12 | ------------
13 | PyMuPDF
14 | PySimpleGUI, tkinter, optional: requires Python 3 if used
15 | """
16 | 
17 | from __future__ import print_function
18 | import os, time, sys, fitz
19 | 
20 | print(fitz.__doc__)
21 | # do some adjustments whether Python v2 or v3
22 | if str is not bytes:
23 |     import PySimpleGUI as psg
24 | 
25 |     mytime = time.perf_counter
26 | else:
27 |     mytime = time.clock
28 | 
29 | rc = False
30 | if str is bytes:
31 |     imgdir = sys.argv[1]  # where my files are
32 | else:
33 |     imgdir = psg.PopupGetFolder(
34 |         "Make a PDF from Embedded Files", "Enter file directory:"
35 |     )
36 | 
37 | if not imgdir:
38 |     raise SystemExit()
39 | 
40 | t0 = mytime()  # set start timer
41 | 
42 | doc = fitz.open()
43 | 
44 | width, height = fitz.paper_size("a4")
45 | rect = fitz.Rect(0, 0, width, height) + (36, 36, -36, -36)
46 | imglist = os.listdir(imgdir)
47 | imgcount = len(imglist)
48 | 
49 | for i, f in enumerate(imglist):
50 |     path = os.path.join(imgdir, f)
51 |     if not os.path.isfile(path):
52 |         print("skipping non-file '%s'!" % f)
53 |         continue
54 | 
55 |     if str is not bytes:
56 |         psg.OneLineProgressMeter(
57 |             "Embedding Files", i + 1, imgcount, "dir: " + imgdir, "file: " + f
58 |         )
59 |     else:
60 |         print("embedding file '%s', (%i / %i)" % (f, i + 1, imgcount))
61 | 
62 |     img = open(path, "rb").read()
63 |     doc.embfile_add(f, img, filename=f, ufilename=f, desc=f)
64 | 
65 | page = doc.new_page()  # every doc needs at least one page
66 | 
67 | doc.save("output.pdf")
68 | t1 = mytime()
69 | print("%g" % round(t1 - t0, 3), "sec processing time")
70 | 


--------------------------------------------------------------------------------
/examples/embed-images/input/erik-jan-leusink-s2mkB4WOl9k-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/embed-images/input/erik-jan-leusink-s2mkB4WOl9k-unsplash.jpg


--------------------------------------------------------------------------------
/examples/embed-images/input/joe-caione-qO-PIF84Vxg-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/embed-images/input/joe-caione-qO-PIF84Vxg-unsplash.jpg


--------------------------------------------------------------------------------
/examples/embed-images/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/embed-images/output.pdf


--------------------------------------------------------------------------------
/examples/export-embedded/export.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Export an embedded file from the input document to the output document
 3 | -------------------------------------------------------------------------------
 4 | License: GNU AGPL V3
 5 | (c) 2021 Jorj X. McKie
 6 | 
 7 | Usage
 8 | -----
 9 | python export.py input.pdf joe-caione-qO-PIF84Vxg-unsplash.jpg output.pdf
10 | 
11 | Notes
12 | -----
13 | The output.pdf file generated in examples/embed-images is renamed as input.pdf
14 | to be used as the input file in this example.
15 | 
16 | Dependencies
17 | ------------
18 | PyMuPDF
19 | """
20 | 
21 | from __future__ import print_function
22 | import sys
23 | import fitz
24 | 
25 | pdffn = sys.argv[1]  # PDF file name
26 | name = sys.argv[2]  # embedded file identifier
27 | expfn = sys.argv[3]  # filename of exported file
28 | 
29 | doc = fitz.open(pdffn)  # open PDF
30 | outfile = open(expfn, "wb")  # to be on the safe side always open binary
31 | 
32 | # extract file content. Will get exception on any error.
33 | content = doc.embfile_get(name)
34 | 
35 | outfile.write(content)
36 | outfile.close()
37 | 


--------------------------------------------------------------------------------
/examples/export-embedded/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/export-embedded/input.pdf


--------------------------------------------------------------------------------
/examples/export-embedded/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/export-embedded/output.pdf


--------------------------------------------------------------------------------
/examples/export-metadata/export.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Export a document metadata dictionary to a CSV file
 3 | -------------------------------------------------------------------------------
 4 | License: GNU GPL V3
 5 | (c) 2018 Jorj X. McKie
 6 | 
 7 | Usage
 8 | -----
 9 | python export.py -d ";" input.pdf
10 | """
11 | 
12 | from __future__ import print_function
13 | import fitz
14 | import argparse
15 | 
16 | parser = argparse.ArgumentParser(
17 |     description="Enter CSV delimiter [;] and documment filename"
18 | )
19 | parser.add_argument("-d", help="CSV delimiter [;]", default=";")
20 | parser.add_argument("doc", help="document filename")
21 | args = parser.parse_args()
22 | delim = args.d  # requested CSV delimiter character
23 | fname = args.doc  # input document filename
24 | 
25 | doc = fitz.open(fname)
26 | meta = doc.metadata
27 | outf = open("output.csv", "w")
28 | for k in meta.keys():
29 |     v = meta.get(k)
30 |     if not v:
31 |         v = ""
32 |     rec = delim.join([k, v])
33 |     outf.writelines([rec, "\n"])
34 | outf.close()
35 | 


--------------------------------------------------------------------------------
/examples/export-metadata/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/export-metadata/input.pdf


--------------------------------------------------------------------------------
/examples/export-metadata/output.csv:
--------------------------------------------------------------------------------
 1 | format;PDF 1.5
 2 | title;
 3 | author;
 4 | subject;
 5 | keywords;
 6 | creator;LaTeX with hyperref package
 7 | producer;pdfTeX-1.40.16
 8 | creationDate;D:20210318172314-04'00'
 9 | modDate;D:20210318172314-04'00'
10 | trapped;
11 | encryption;
12 | 


--------------------------------------------------------------------------------
/examples/export-toc/export.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Export the table of contents (ToC) of a document to a CSV file
 3 | -------------------------------------------------------------------------------
 4 | License: GNU GPL V3
 5 | (c) 2018 Jorj X. McKie
 6 | 
 7 | Usage
 8 | -----
 9 | python export.py -d ";" input.pdf
10 | """
11 | 
12 | from __future__ import print_function
13 | import fitz
14 | import argparse
15 | 
16 | parser = argparse.ArgumentParser(
17 |     description="Enter CSV delimiter [;] and documment filename"
18 | )
19 | parser.add_argument("-d", help="CSV delimiter [;]", default=";")
20 | parser.add_argument("doc", help="document filename")
21 | args = parser.parse_args()
22 | delim = args.d  # requested CSV delimiter character
23 | fname = args.doc  # input document filename
24 | 
25 | doc = fitz.open(fname)
26 | toc = doc.get_toc(simple=False)
27 | ext = fname[-3:].lower()
28 | outf = open("output.csv", "w")
29 | for t in toc:
30 |     t4 = t[3]
31 |     if ext == "pdf":
32 |         if t4["kind"] == 1:
33 |             p4 = str(t4["to"].y)
34 |         else:
35 |             p4 = "0"
36 |     else:
37 |         p4 = "0"
38 |     rec = delim.join([str(t[0]), t[1].strip(), str(t[2]), p4])
39 |     outf.writelines([rec, "\n"])
40 | outf.close()
41 | 


--------------------------------------------------------------------------------
/examples/export-toc/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/export-toc/input.pdf


--------------------------------------------------------------------------------
/examples/extract-images/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/extract-images/input.pdf


--------------------------------------------------------------------------------
/examples/extract-images/output/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/extract-images/output/.gitkeep


--------------------------------------------------------------------------------
/examples/extract-images/output/img00005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/extract-images/output/img00005.png


--------------------------------------------------------------------------------
/examples/extract-images/output/img00011.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/extract-images/output/img00011.png


--------------------------------------------------------------------------------
/examples/extract-table/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/extract-table/input.pdf


--------------------------------------------------------------------------------
/examples/extract-xobj/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/extract-xobj/input.pdf


--------------------------------------------------------------------------------
/examples/extract-xobj/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/extract-xobj/output.pdf


--------------------------------------------------------------------------------
/examples/filmfestival-2tables/README.md:
--------------------------------------------------------------------------------
 1 | # Example for PyMuPDF Reporting
 2 | 
 3 | This script creates a report about a fictitious film festival.
 4 | 
 5 | It extracts data from an SQL database (sqlite3). The database contains two tables:
 6 | * films
 7 | * actors
 8 | 
 9 | The **_films_** table has columns **title**, **director**, **year** and the **_actors_** table has columns **name** and **film** title.
10 | 
11 | Two tabular reports are created in one common PDF.
12 | 1. Report 1 lists all films and names all actors being cast.
13 | 2. Report 2 lists all actors together with all the films where they have been cast.
14 | 
15 | The following are noteworthy details:
16 | * Demonstrate how to use fronts from the [pymupdf-fonts](https://pypi.org/project/pymupdf-fonts/) package.
17 | * Demonstrate how to combine multiple report sections (here: two table sections) in one report.
18 | * Due to MuPDF's automatic layouting algorithm, major layout changes can be achieved without coding effort, like
19 |     - choice of page size or paper format
20 |     - choice of number of columns per page
21 | * Influence on the layout can only be taken via the HTML and styling (CSS) definitions.


--------------------------------------------------------------------------------
/examples/filmfestival-2tables/filmfestival.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/filmfestival-2tables/filmfestival.db


--------------------------------------------------------------------------------
/examples/filmfestival-2tables/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/filmfestival-2tables/output.pdf


--------------------------------------------------------------------------------
/examples/icons/PyMuPDF.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/icons/PyMuPDF.ico


--------------------------------------------------------------------------------
/examples/icons/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 | 


--------------------------------------------------------------------------------
/examples/icons/ico_pdf.py:
--------------------------------------------------------------------------------
 1 | # ----------------------------------------------------------------------
 2 | # This file was generated by img2py.py
 3 | #
 4 | from wx.lib.embeddedimage import PyEmbeddedImage
 5 | 
 6 | img = PyEmbeddedImage(
 7 |     "iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAABHNCSVQICAgIfAhkiAAAA4BJ"
 8 |     "REFUWIXFll1oHFUUx393dyabrbvsbrZJuqlCxNWskppWMo3Yl9hiIlqFWjCFvhrBF0GK+lIf"
 9 |     "fLR5EPzCJ1Fr1AcFsR8iRGuMIIZSqaZGXC3YxK8YTGN0d7vJneNDOmv2c2aziR4YuGfuOed/"
10 |     "5pz/PXPhfxa1VhkYGEhlMplpEdkUsGAwyNjYWAL4tWICk5OTYlkWIoKThLMWEZRSRXrpfi0d"
11 |     "IBwO09/fz8TERBDIARhrE6gFDmDb9rrBRYSFhQWi0ShAEpgC8JWWqd7gXsGdx7btIryyBNYL"
12 |     "Fpr+kivN4ZrgIoLWugirqAXrBn9oEC7PszUQZPH18argQFkFqibgFdwOhpBvzoHPQC3nyoha"
13 |     "al9agYY5EP7u/KrjygriN6p+udP/DefA5a6eQqRP7nsYrXVNQnqqQF1tyGX+9d13oKb92veO"
14 |     "NMwBXz6PyudAw65dPYVZUc3e0ymopxJgI4Eg2OLJf0M5ABC98BUIoOD8H4uu9hvGAWc9f/hO"
15 |     "Zn/Kgs9H1+3Xu9q7VqAe8JVIjHjsGr544zRTsxkSMQpzoFos1wpUSqRasMUOxcT3f7O3r4/O"
16 |     "104izc2cnZmp+SGeK+BWhVwowg3btjDz5im01mQtCwyDbXfdVtN/Q1ogIoS7DH64lOHuO/oK"
17 |     "+0q309MRZE6VT0RH95SAG3h8/CThcBMX3/4QrTXaNLmyJQxPHAUgsbuVxKMP0vHKMf6MtBb5"
18 |     "lyZQdCMSEVlaWqo9zZQi3r8dlV09/+rgEHz+KT9OfYtPQ8IPRrwJcssQMGHHbj4+8hzJtjha"
19 |     "awYHB0mn0zu4eiHx9Dte9jfRNvoscuEcanvn6o8nFuX5o8exWiNcN/wUzaaJbdvMidCxtxNE"
20 |     "4JadyNRZsr/9jLS2VORAzVGcD0VpO3Ar7LsfGX0JdW0nnDnBZy9/xE2pFEMl7HbWs2MX+f3E"
21 |     "W+x89RnefXoUq7u7ENvzKI48fhgVifHCA48xsGcPN7a0w8gRTh17DyuVcj0tW/cfYubeISyX"
22 |     "UVyWAMCKMmDyDGIGGB5+EmN/F8r0c3rkfazeXs9HtZLuaRT7dJ5LL36Ayv5F4GAvxx8ZYfqd"
23 |     "r+ltEBzq4EDo5m7mx38B4J6rTo2Ce2pBI8G9gG/atdyLDuUtKBpEyWRS0ul0EVEcx2ritr9W"
24 |     "TNPE7/dj23Y7MFfJpkUplQWWN+NRSi0Chzxn/F/IPzlRiedgNxiOAAAAAElFTkSuQmCC"
25 | )
26 | 


--------------------------------------------------------------------------------
/examples/icons/pdf.py:
--------------------------------------------------------------------------------
 1 | # ----------------------------------------------------------------------
 2 | # This file was generated by img2py.py
 3 | #
 4 | from wx.lib.embeddedimage import PyEmbeddedImage
 5 | 
 6 | img = PyEmbeddedImage(
 7 |     "iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAABHNCSVQICAgIfAhkiAAABA5J"
 8 |     "REFUWIXtll1MU2cYx3/nox+0BVqB0lEhQNkUhzqyiWZxUTxL5sjmEm5MNsKyZC5L5uUSNubY"
 9 |     "XBSmxkSNmduFM3Zhbl4w3RY/yOpGsmRLNBHdIARFHCKfhfJdPtpzdtEVqIyWBNAb/zfnfZ8n"
10 |     "7/P/5X3Oe94j8KBKz3+HzhJg1GvEnDw+J7+UOqmUSBGBwg92kJybC4DeFFhWcwCbs0mOCIi6"
11 |     "Ilp/S0RvCYE5n+tfVoArVY3ynGDW1kHG+g0A08+lVMf1/QDUVdYDzAUAMK2YWHLjsP4zDktc"
12 |     "NqMF6jHAY4D/PwWz9Jqs3DtXwU4ENNpJF77ynA1WKPmintARFYBenhCOeH4Ir/F+qDydFEci"
13 |     "Atp7n1H7xZQnfr76MXfgXAU7Py0nbawRJ07aubnHRxARDcF9lBXVR3GRQqe2T9kUcv/TlxRH"
14 |     "YnkZGZUVZBc/yTvR6kcHaKv2AbQO0GRaTTfAq+Y/isLp0ioa36jk3epjuFARX/cqT13c+NGb"
15 |     "AC19NG0vgBcbPWeiWURvgRjic3e+XL/5yMXcqSAbfnKc+gbeKgAQ3vbcBV7QTii/0ItL9ZOG"
16 |     "gADw/bcYhPc91VHrs9CXMNg3uWsH+T/+wwnMGdMX2HGbskbbr2yiF9fYbZJ9OurQ0ADch3As"
17 |     "pHT0HdAmpg7uJQHiZeG05yw2EgEOfY5dAA0LfcerUGWJ7PQUhMsOj1biV9SDe0kouyCf4SVM"
18 |     "iwNImvRtKOUy/VevAc+HwwWlXAK4L9FRGGDLylO11QxKAsDKIn52BhG22QMtVzpZGwtAiJgp"
19 |     "H39J5lZLrEWL0kmlZPY0+g5MdPrx1XYTX2DDnJs4He86fXd6PNo4huvAmjk502oLCRuTY/FE"
20 |     "B/A3DxXb3XJN17B3NkCx3S0D1N9n4JksrKhKu9WI/utJjz2cq7mV07F4ANEotfTiQzQbGfy9"
21 |     "h8TNdoCWXkYA7qR78u6oE+r63qJmAAy/ai0Todx66239jVjuMQGEOKmhhxEcCRa57ZM2oC2Q"
22 |     "V/tsQ0/IhFRANIg3hvMCsv/v8Thx32iDGjqGBhmR1NgAkT+l2VtewZqpn9kBvaiONg8T6JtS"
23 |     "pSRZ1Tl0DF/zqZJVUnUOHUNXfXjPd4MoqDqHLpDrXqcGRsZVjMKUmKFjqM6LbVtKhMd1d83s"
24 |     "afQPkc5mwJRlxJCqx5RlxJRlJHPPqumx3irPjJNkNEkgbbcLU5YRs9OAIVXPvcO3olnEvA1J"
25 |     "2+1i+KYP0SBiXhV6EVN3Zc5UkEU0SZizZuSvAeQ4EWNOwuIAAOLX2SLmkmGmddo8ayxrrQsp"
26 |     "/ch/SB45QKgFheWH0cgnGAToeLgA2w9cAEANTiKK83V02TTTAlFS8Q/pHj7ApbIi0ASC4xo5"
27 |     "SteyurV6jj0Y+hezjEXWj4VyvAAAAABJRU5ErkJggg=="
28 | )
29 | 


--------------------------------------------------------------------------------
/examples/icons/pymupdf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/icons/pymupdf.png


--------------------------------------------------------------------------------
/examples/import-embedded/import.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Import a file to a document
 3 | -------------------------------------------------------------------------------
 4 | License: GNU AGPL V3
 5 | (c) 2023 Jorj X. McKie
 6 | 
 7 | Usage
 8 | -----
 9 | python import.py input.pdf joe-caione-qO-PIF84Vxg-unsplash.jpg -o output.pdf
10 | 
11 | Dependencies
12 | ------------
13 | PyMuPDF
14 | """
15 | 
16 | from __future__ import print_function
17 | import fitz
18 | import argparse
19 | 
20 | parser = argparse.ArgumentParser(
21 |     description="Enter PDF, file to embed, and optional name, description and output pdf."
22 | )
23 | parser.add_argument("pdf", help="PDF filename")
24 | parser.add_argument("file", help="name of embedded file")
25 | parser.add_argument("-n", "--name", help="name for embedded file entry (default: file)")
26 | parser.add_argument("-d", "--desc", help="description (default:  file)")
27 | parser.add_argument("-o", "--output", help="output PDF (default: modify pdf)")
28 | 
29 | args = parser.parse_args()
30 | 
31 | if not args.name:
32 |     name = args.file
33 | desc = args.desc
34 | if not args.desc:
35 |     desc = args.file
36 | 
37 | content = open(args.file, "rb").read()
38 | doc = fitz.open(args.pdf)
39 | doc.embfile_add(name, content, args.file, desc)
40 | 
41 | if not args.output:
42 |     doc.saveIncr()
43 | else:
44 |     doc.save(args.output, garbage=4, deflate=True)
45 | 


--------------------------------------------------------------------------------
/examples/import-embedded/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/import-embedded/input.pdf


--------------------------------------------------------------------------------
/examples/import-embedded/joe-caione-qO-PIF84Vxg-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/import-embedded/joe-caione-qO-PIF84Vxg-unsplash.jpg


--------------------------------------------------------------------------------
/examples/import-embedded/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/import-embedded/output.pdf


--------------------------------------------------------------------------------
/examples/import-metadata/import.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Import a metadata dictionary from a CSV file into a PDF document
 3 | -------------------------------------------------------------------------------
 4 | License: GNU GPL V3
 5 | (c) 2023 Jorj X. McKie
 6 | 
 7 | Usage
 8 | -----
 9 | python import.py -d ";" -x "n" -csv input.csv -pdf input.pdf
10 | 
11 | Description
12 | -----------
13 | The output.csv file generated in examples/export-metadata is renamed as input.csv
14 | to be used as an input file in this example. The input.pdf file behaves as both
15 | an input and an output file.
16 | 
17 | Dependencies
18 | ------------
19 | PyMuPDF
20 | """
21 | 
22 | import csv
23 | import fitz
24 | import argparse
25 | 
26 | parser = argparse.ArgumentParser(
27 |     description="Enter CSV delimiter [;], CSV filename and documment filename"
28 | )
29 | parser.add_argument("-d", help="CSV delimiter [;]", default=";")
30 | parser.add_argument("-x", help="delete XML info [n]", default="n")
31 | parser.add_argument("-csv", help="CSV filename")
32 | parser.add_argument("-pdf", help="PDF filename")
33 | 
34 | args = parser.parse_args()
35 | 
36 | assert args.csv, "missing CSV filename"
37 | assert args.pdf, "missing PDF filename"
38 | 
39 | print("delimiter", args.d)
40 | print("xml delete", args.x)
41 | print("csv file", args.csv)
42 | print("pdf file", args.pdf)
43 | print("----------------------------------------")
44 | 
45 | doc = fitz.open(args.pdf)
46 | oldmeta = doc.metadata
47 | print("old metadata:")
48 | for k, v in oldmeta.items():
49 |     print(k, ":", v)
50 | 
51 | with open(args.csv) as tocfile:
52 |     tocreader = csv.reader(tocfile, delimiter=args.d)
53 |     for row in tocreader:
54 |         oldmeta[row[0]] = row[1]
55 | 
56 | print("----------------------------------------")
57 | print("\nnew metadata:")
58 | for k, v in oldmeta.items():
59 |     print(k, ":", v)
60 | 
61 | doc.set_metadata(oldmeta)
62 | doc.saveIncr()
63 | 


--------------------------------------------------------------------------------
/examples/import-metadata/input.csv:
--------------------------------------------------------------------------------
 1 | format;PDF 1.5
 2 | title;
 3 | author;
 4 | subject;
 5 | keywords;
 6 | creator;LaTeX with hyperref package
 7 | producer;pdfTeX-1.40.16
 8 | creationDate;D:20210318172314-04'00'
 9 | modDate;D:20210318172314-04'00'
10 | trapped;
11 | encryption;
12 | 


--------------------------------------------------------------------------------
/examples/import-metadata/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/import-metadata/input.pdf


--------------------------------------------------------------------------------
/examples/import-toc/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/import-toc/input.pdf


--------------------------------------------------------------------------------
/examples/insert-images/input/erik-jan-leusink-s2mkB4WOl9k-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/insert-images/input/erik-jan-leusink-s2mkB4WOl9k-unsplash.jpg


--------------------------------------------------------------------------------
/examples/insert-images/input/joe-caione-qO-PIF84Vxg-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/insert-images/input/joe-caione-qO-PIF84Vxg-unsplash.jpg


--------------------------------------------------------------------------------
/examples/insert-images/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/insert-images/output.pdf


--------------------------------------------------------------------------------
/examples/insert-logo/file.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Insert the MuPDF logo in PNG format in all pages of a PDF document
 3 | --------------------------------------------------------------------------------
 4 | License: GNU GPL V3
 5 | (c) 2018-2019 Jorj X. McKie
 6 | 
 7 | Usage
 8 | -----
 9 | python file.py input.pdf logo.png
10 | 
11 | Description
12 | -----------
13 | Any PyMuPDF-supported document can be used as the logo/watermark including PDF,
14 | XPS, EPUB, CBZ, FB2 as well as any image type. SVG-based logos are not always
15 | shown correctly. Use a different PDF converter like svglib if that occurs.
16 | 
17 | Logos/watermarks are transparent for all document types except for images. If a
18 | transparency is required then the file must be manually converted to PDF first
19 | as described next:
20 | 
21 |     pix = fitz.Pixmap(logo_filename)
22 |     src = fitz.open()
23 |     src_page = src.new_page(-1, width = pix.width, height = pix.height)
24 |     src_page.insert_image(src_page.rect, pixmap = pix)
25 | 
26 | Dependencies
27 | ------------
28 | PyMuPDF
29 | """
30 | 
31 | import sys
32 | import fitz
33 | 
34 | src = fitz.open(sys.argv[2])
35 | 
36 | if not src.is_pdf:
37 |     pdfbytes = src.convert_to_pdf()
38 |     src.close()
39 |     src = fitz.open("pdf", pdfbytes)
40 | 
41 | rect = src[0].rect
42 | factor = 25 / rect.height
43 | rect *= factor
44 | 
45 | doc = fitz.open(sys.argv[1])
46 | xref = 0
47 | for page in doc:
48 |     xref = page.show_pdf_page(rect, src, 0, reuse_xref=xref, overlay=False)
49 | doc.save("output_file.pdf", garbage=4)
50 | 


--------------------------------------------------------------------------------
/examples/insert-logo/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/insert-logo/input.pdf


--------------------------------------------------------------------------------
/examples/insert-logo/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/insert-logo/logo.png


--------------------------------------------------------------------------------
/examples/insert-logo/output_file.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/insert-logo/output_file.pdf


--------------------------------------------------------------------------------
/examples/insert-logo/output_svg.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/insert-logo/output_svg.pdf


--------------------------------------------------------------------------------
/examples/insert-logo/svg.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Insert the MuPDF logo in SVG format in all pages of a PDF document
 3 | --------------------------------------------------------------------------------
 4 | License: GNU GPL V3
 5 | (c) 2018-2019 Jorj X. McKie
 6 | 
 7 | Usage
 8 | -----
 9 | python svg.py input.pdf logo.svg
10 | 
11 | Dependencies
12 | ------------
13 | PyMuPDF, svglib
14 | """
15 | 
16 | import sys
17 | import fitz
18 | from svglib.svglib import svg2rlg
19 | 
20 | drawing = svg2rlg(sys.argv[2])
21 | pdfbytes = drawing.asString("pdf")
22 | 
23 | src = fitz.open("pdf", pdfbytes)
24 | 
25 | rect = src[0].rect
26 | factor = 25 / rect.height
27 | rect *= factor
28 | 
29 | doc = fitz.open(sys.argv[1])
30 | for page in doc:
31 |     xref = page.show_pdf_page(rect, src, 0, overlay=True)
32 | doc.save("output_svg.pdf", garbage=4)
33 | 


--------------------------------------------------------------------------------
/examples/join-documents/input/made-with-cc.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/join-documents/input/made-with-cc.pdf


--------------------------------------------------------------------------------
/examples/join-documents/input/thinkpython2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/join-documents/input/thinkpython2.pdf


--------------------------------------------------------------------------------
/examples/join-documents/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/join-documents/output.pdf


--------------------------------------------------------------------------------
/examples/list-embedded/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/list-embedded/input.pdf


--------------------------------------------------------------------------------
/examples/list-embedded/list.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Print a list of embedded files in a document
 3 | -------------------------------------------------------------------------------
 4 | License: GNU AGPL V3
 5 | (c) 2021 Jorj X. McKie
 6 | 
 7 | Usage
 8 | -----
 9 | python list.py input.pdf
10 | 
11 | Notes
12 | -----
13 | The output.pdf file generated in examples/embed-images is renamed as input.pdf
14 | to be used as the input file in this example.
15 | 
16 | Dependencies
17 | ------------
18 | PyMuPDF
19 | """
20 | 
21 | from __future__ import print_function
22 | import sys
23 | import fitz
24 | 
25 | fn = sys.argv[1]
26 | doc = fitz.open(fn)
27 | 
28 | name_len = filename_len = 0
29 | total_len = total_size = 0
30 | 
31 | ef_list = []
32 | 
33 | for i in range(doc.embfile_count()):
34 |     info = doc.embfile_info(i)
35 |     ef = (
36 |         info["name"],
37 |         info["filename"],
38 |         info["length"],
39 |         info["size"],
40 |     )
41 |     ef_list.append(ef)
42 |     name_len = max(len(ef[0]), name_len)
43 |     filename_len = max(len(ef[1]), filename_len)
44 |     total_len += ef[2]
45 |     total_size += ef[3]
46 | 
47 | if len(ef_list) < 1:
48 |     print("no embedded files in", fn)
49 |     exit(1)
50 | 
51 | ratio = float(total_size) / total_len
52 | saves = 1 - ratio
53 | 
54 | header = (
55 |     "Name".ljust(name_len + 4)
56 |     + "Filename".ljust(filename_len + 4)
57 |     + "Length".rjust(10)
58 |     + "Size".rjust(11)
59 | )
60 | line = "-".ljust(len(header), "-")
61 | print(line)
62 | print(header)
63 | print(line)
64 | for info in ef_list:
65 |     print(
66 |         info[0].ljust(name_len + 3),
67 |         info[1].ljust(filename_len + 3),
68 |         str(info[2]).rjust(10),
69 |         str(info[3]).rjust(10),
70 |     )
71 | print(line)
72 | print(len(ef_list), "embedded files in '%s'. Totals:" % (fn,))
73 | print(
74 |     "File lengths: %s, compressed: %s, ratio: %s%% (savings: %s%%)."
75 |     % (total_len, total_size, str(round(ratio * 100, 2)), str(round(saves * 100, 2)))
76 | )
77 | print(line)
78 | 


--------------------------------------------------------------------------------
/examples/make-calendar/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/make-calendar/output.pdf


--------------------------------------------------------------------------------
/examples/optimize-document/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/optimize-document/input.pdf


--------------------------------------------------------------------------------
/examples/optimize-document/optimize.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Optimize a PDF document with FileOptimizer.
 3 | -------------------------------------------------------------------------------
 4 | License: GNU GPL V3
 5 | (c) 2022 Jorj X. McKie
 6 | 
 7 | Usage
 8 | -----
 9 | python optimize.py input.pdf
10 | 
11 | Notes
12 | -----
13 | Since "/Producer" and "/Creator" get affected by this, the document metadata is
14 | first saved to be restored after the optimization is completed. This means
15 | non-compressed object definitions are also accepted as created by FileOptimizer.
16 | 
17 | Dependencies
18 | ------------
19 | FileOptimizer
20 | """
21 | 
22 | from __future__ import print_function
23 | import fitz
24 | import sys, os, subprocess, tempfile, time
25 | 
26 | assert len(sys.argv) == 2, "need filename parameter"
27 | fn = sys.argv[1]
28 | assert fn.lower().endswith(".pdf"), "must be a PDF file"
29 | 
30 | fullname = os.path.abspath(fn)  # get the full path & name
31 | t0 = time.perf_counter()  # save current time
32 | doc = fitz.open(fullname)  # open PDF to save metadata
33 | meta = doc.metadata
34 | doc.close()
35 | 
36 | t1 = time.perf_counter()  # save current time again
37 | subprocess.call(["fileoptimizer64", fullname])  # now invoke super optimizer
38 | t2 = time.perf_counter()  # save current time again
39 | 
40 | cdir = os.path.split(fullname)[0]  # split dir from filename
41 | fnout = tempfile.mkstemp(suffix=".pdf", dir=cdir)  # create temp pdf name
42 | doc = fitz.open(fullname)  # open now optimized PDF
43 | doc.set_metadata(meta)  # restore old metadata
44 | doc.save(fnout[1], garbage=4)  # save temp PDF with it, a little sub opt
45 | doc.close()  # close it
46 | 
47 | os.remove(fn)  # remove super optimized file
48 | os.close(fnout[0])  # close temp file
49 | os.rename(fnout[1], fn)  # and rename it to original filename
50 | t3 = time.perf_counter()  # save current time again
51 | 
52 | # put out runtime statistics
53 | print("Timings:")
54 | print(str(round(t1 - t0, 4)).rjust(10), "save old metata")
55 | print(str(round(t2 - t1, 4)).rjust(10), "execute FileOptimizer")
56 | print(str(round(t3 - t2, 4)).rjust(10), "restore old metadata")
57 | 


--------------------------------------------------------------------------------
/examples/posterize-document/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/posterize-document/input.pdf


--------------------------------------------------------------------------------
/examples/posterize-document/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/posterize-document/output.pdf


--------------------------------------------------------------------------------
/examples/posterize-document/posterize.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Create a PDF copy with split-up pages (posterize)
 3 | --------------------------------------------------------------------------------
 4 | License: GNU AGPL V3.0+
 5 | (c) 2018 Jorj X. McKie
 6 | 
 7 | Usage
 8 | ------
 9 | python posterize.py input.pdf
10 | 
11 | Description
12 | -----------
13 | The output.pdf file contains 4 pages for every input page. The top-left,
14 | top-right, bottom-left, bottom-right parts of the page are now separate pages.
15 | The page dimensions are 1/4 page of the input file.
16 | 
17 | Dependencies
18 | ------------
19 | PyMuPDF
20 | """
21 | 
22 | from __future__ import print_function
23 | import fitz, sys
24 | 
25 | src = fitz.open(sys.argv[1])
26 | doc = fitz.open()
27 | 
28 | for spage in src:
29 |     xref = 0
30 |     r = spage.rect
31 |     d = fitz.Rect(spage.cropbox_position, spage.cropbox_position)
32 | 
33 |     r1 = r * 0.5  # top left
34 |     r2 = r1 + (r1.width, 0, r1.width, 0)  # top right
35 |     r3 = r1 + (0, r1.height, 0, r1.height)  # bottom left
36 |     r4 = fitz.Rect(r1.br, r.br)  # bottom right
37 |     rect_list = [r1, r2, r3, r4]
38 | 
39 |     for rx in rect_list:
40 |         rx += d
41 |         page = doc.new_page(-1, width=rx.width, height=rx.height)
42 |         xref = page.show_pdf_page(
43 |             page.rect,
44 |             src,
45 |             spage.number,
46 |             clip=rx,
47 |             reuse_xref=xref,
48 |         )
49 | 
50 | doc.save("output.pdf", garbage=4, deflate=True)
51 | 


--------------------------------------------------------------------------------
/examples/print-hsv/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/print-hsv/output.pdf


--------------------------------------------------------------------------------
/examples/print-rgb/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/print-rgb/output.pdf


--------------------------------------------------------------------------------
/examples/replace-image/input.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/replace-image/input.jpg


--------------------------------------------------------------------------------
/examples/replace-image/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/replace-image/input.pdf


--------------------------------------------------------------------------------
/examples/replace-image/output_remove.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/replace-image/output_remove.pdf


--------------------------------------------------------------------------------
/examples/replace-image/output_replace.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/replace-image/output_replace.pdf


--------------------------------------------------------------------------------
/examples/replace-image/remove.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Remove an image identified by xref
 3 | --------------------------------------------------------------------------------
 4 | License: GNU GPL V3
 5 | (c) 2022 Jorj X. McKie
 6 | 
 7 | Usage
 8 | -----
 9 | python remove.py
10 | 
11 | Description
12 | -----------
13 | This script does a pseudo-removal actually by replacing the image with a small
14 | fully transparent pixmap.
15 | """
16 | 
17 | import fitz
18 | from replace import img_replace
19 | 
20 | if tuple(map(int, fitz.VersionBind.split("."))) < (1, 19, 5):
21 |     raise ValueError("Need v1.19.5+")
22 | 
23 | doc = fitz.open("input.pdf")
24 | 
25 | page = doc[0]
26 | 
27 | images = page.get_images()  # we only are interested in first image here
28 | item = images[0]
29 | old_xref = item[0]
30 | 
31 | pix = fitz.Pixmap(fitz.csGRAY, (0, 0, 1, 1), 1)
32 | pix.clear_with()
33 | img_replace(page, old_xref, pixmap=pix)
34 | 
35 | doc.ez_save("output_remove.pdf", garbage=4)
36 | 


--------------------------------------------------------------------------------
/examples/replace-image/replace.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Replace an image identified by xref
 3 | --------------------------------------------------------------------------------
 4 | License: GNU GPL V3
 5 | (c) 2022 Jorj X. McKie
 6 | 
 7 | Usage
 8 | -----
 9 | python replace.py
10 | """
11 | 
12 | import fitz
13 | 
14 | if tuple(map(int, fitz.VersionBind.split("."))) < (1, 19, 5):
15 |     raise ValueError("Need v1.19.5+")
16 | 
17 | 
18 | def img_replace(page, xref, filename=None, stream=None, pixmap=None):
19 |     """Replace image identified by xref.
20 | 
21 |     Args:
22 |         page: a fitz.Page object
23 |         xref: cross reference number of image to replace
24 |         filename, stream, pixmap: must be given as for
25 |         page.insert_image().
26 | 
27 |     """
28 |     if bool(filename) + bool(stream) + bool(pixmap) != 1:
29 |         raise ValueError("Exactly one of filename/stream/pixmap must be given")
30 |     doc = page.parent  # the owning document
31 |     # insert new image anywhere in page
32 |     new_xref = page.insert_image(
33 |         page.rect, filename=filename, stream=stream, pixmap=pixmap
34 |     )
35 |     doc.xref_copy(new_xref, xref)  # copy over new to old
36 |     last_contents_xref = page.get_contents()[-1]
37 |     # new image insertion has created a new /Contents source,
38 |     # which we will set to spaces now
39 |     doc.update_stream(last_contents_xref, b" ")
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     doc = fitz.open("input.pdf")
44 |     img_file = "input.jpg"
45 |     page = doc[0]
46 |     images = page.get_images()  # we only are interested in first image here
47 |     item = images[0]
48 |     old_xref = item[0]
49 |     img_replace(page, old_xref, filename=img_file)
50 |     doc.ez_save("output_replace.pdf", garbage=4, pretty=True)
51 | 


--------------------------------------------------------------------------------
/examples/split-document/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/input.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/.gitkeep


--------------------------------------------------------------------------------
/examples/split-document/output/input-0.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-0.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-1.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-10.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-10.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-100.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-100.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-101.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-101.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-102.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-102.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-103.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-103.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-104.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-104.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-105.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-105.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-106.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-106.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-107.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-107.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-108.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-108.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-109.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-109.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-11.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-11.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-110.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-110.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-111.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-111.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-112.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-112.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-113.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-113.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-114.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-114.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-115.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-115.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-116.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-116.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-117.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-117.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-118.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-118.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-119.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-119.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-12.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-12.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-120.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-120.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-121.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-121.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-122.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-122.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-123.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-123.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-124.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-124.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-125.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-125.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-126.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-126.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-127.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-127.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-128.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-128.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-129.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-129.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-13.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-13.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-130.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-130.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-131.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-131.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-132.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-132.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-133.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-133.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-134.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-134.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-135.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-135.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-136.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-136.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-137.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-137.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-138.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-138.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-139.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-139.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-14.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-14.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-140.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-140.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-141.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-141.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-142.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-142.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-143.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-143.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-144.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-144.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-145.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-145.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-146.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-146.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-147.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-147.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-148.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-148.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-149.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-149.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-15.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-15.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-150.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-150.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-151.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-151.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-152.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-152.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-153.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-153.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-154.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-154.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-155.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-155.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-156.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-156.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-157.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-157.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-158.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-158.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-159.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-159.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-16.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-16.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-160.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-160.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-161.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-161.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-162.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-162.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-163.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-163.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-164.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-164.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-165.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-165.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-166.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-166.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-167.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-167.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-168.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-168.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-169.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-169.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-17.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-17.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-170.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-170.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-171.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-171.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-172.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-172.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-173.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-173.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-174.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-174.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-175.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-175.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-18.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-18.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-19.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-19.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-2.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-20.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-20.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-21.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-21.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-22.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-22.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-23.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-23.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-24.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-24.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-25.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-25.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-26.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-26.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-27.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-27.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-28.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-28.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-29.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-29.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-3.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-30.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-30.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-31.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-31.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-32.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-32.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-33.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-33.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-34.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-34.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-35.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-35.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-36.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-36.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-37.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-37.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-38.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-38.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-39.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-39.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-4.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-40.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-40.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-41.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-41.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-42.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-42.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-43.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-43.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-44.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-44.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-45.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-45.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-46.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-46.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-47.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-47.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-48.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-48.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-49.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-49.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-5.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-5.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-50.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-50.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-51.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-51.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-52.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-52.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-53.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-53.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-54.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-54.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-55.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-55.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-56.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-56.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-57.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-57.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-58.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-58.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-59.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-59.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-6.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-6.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-60.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-60.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-61.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-61.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-62.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-62.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-63.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-63.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-64.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-64.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-65.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-65.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-66.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-66.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-67.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-67.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-68.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-68.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-69.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-69.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-7.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-7.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-70.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-70.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-71.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-71.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-72.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-72.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-73.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-73.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-74.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-74.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-75.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-75.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-76.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-76.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-77.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-77.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-78.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-78.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-79.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-79.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-8.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-8.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-80.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-80.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-81.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-81.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-82.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-82.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-83.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-83.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-84.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-84.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-85.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-85.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-86.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-86.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-87.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-87.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-88.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-88.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-89.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-89.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-9.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-9.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-90.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-90.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-91.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-91.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-92.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-92.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-93.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-93.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-94.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-94.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-95.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-95.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-96.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-96.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-97.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-97.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-98.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-98.pdf


--------------------------------------------------------------------------------
/examples/split-document/output/input-99.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-99.pdf


--------------------------------------------------------------------------------
/examples/split-document/split.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Split a PDF document into multiple pages (1 per page)
 3 | --------------------------------------------------------------------------------
 4 | License: GNU GPL V3
 5 | (c) 2022 Jorj X. McKie
 6 | 
 7 | Usage
 8 | -----
 9 | python split.py input.pdf
10 | """
11 | 
12 | import sys
13 | import fitz
14 | 
15 | fn = sys.argv[1]
16 | fn1 = fn[:-4]
17 | src = fitz.open(fn)
18 | for i in range(len(src)):
19 |     doc = fitz.open()
20 |     doc.insert_pdf(src, from_page=i, to_page=i)
21 |     doc.save("./output/%s-%i.pdf" % (fn1, i))
22 |     doc.close()
23 | 


--------------------------------------------------------------------------------
/examples/test-blendmode/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/test-blendmode/output.pdf


--------------------------------------------------------------------------------
/examples/tile-image/input.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/input.jpg


--------------------------------------------------------------------------------
/examples/tile-image/output/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/output/.gitkeep


--------------------------------------------------------------------------------
/examples/tile-image/output/target-00.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/output/target-00.png


--------------------------------------------------------------------------------
/examples/tile-image/output/target-01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/output/target-01.png


--------------------------------------------------------------------------------
/examples/tile-image/output/target-02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/output/target-02.png


--------------------------------------------------------------------------------
/examples/tile-image/output/target-10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/output/target-10.png


--------------------------------------------------------------------------------
/examples/tile-image/output/target-11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/output/target-11.png


--------------------------------------------------------------------------------
/examples/tile-image/output/target-12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/output/target-12.png


--------------------------------------------------------------------------------
/examples/tile-image/output/target-20.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/output/target-20.png


--------------------------------------------------------------------------------
/examples/tile-image/output/target-21.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/output/target-21.png


--------------------------------------------------------------------------------
/examples/tile-image/output/target-22.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/output/target-22.png


--------------------------------------------------------------------------------
/examples/tile-image/output/target-30.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/output/target-30.png


--------------------------------------------------------------------------------
/examples/tile-image/output/target-31.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/output/target-31.png


--------------------------------------------------------------------------------
/examples/tile-image/output/target-32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/output/target-32.png


--------------------------------------------------------------------------------
/examples/tile-image/tile.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Tile an image into 3 x 4 tiles
 3 | --------------------------------------------------------------------------------
 4 | License: GNU GPL V3
 5 | (c) 2022 Jorj X. McKie
 6 | 
 7 | Usage
 8 | -----
 9 | python tile.py input.jpg
10 | 
11 | Description
12 | -----------
13 | This script demonstrates some of MuPDF's non-PDF graphic capabilities.
14 | """
15 | 
16 | import sys
17 | import fitz
18 | 
19 | print(fitz.__doc__)
20 | assert len(sys.argv) == 2, "Usage: %s <input file>" % sys.argv[0]
21 | 
22 | pix0 = fitz.Pixmap(sys.argv[1])
23 | tar_cs = pix0.colorspace
24 | tar_width = pix0.width * 3
25 | tar_height = pix0.height * 4
26 | tar_irect = fitz.IRect(0, 0, tar_width, tar_height)
27 | tar_pix = fitz.Pixmap(tar_cs, tar_irect, pix0.alpha)
28 | tar_pix.clear_with(90)
29 | 
30 | for i in list(range(4)):
31 |     y = i * pix0.height
32 |     for j in list(range(3)):
33 |         x = j * pix0.width
34 |         pix0.set_origin(x, y)
35 |         tar_pix.copy(pix0, pix0.irect)
36 |         fn = "./output/target-" + str(i) + str(j) + ".png"
37 |         tar_pix.save(fn)
38 | 


--------------------------------------------------------------------------------
/examples/view-document/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/view-document/input.pdf


--------------------------------------------------------------------------------
/examples/view-document/view.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Object-oriented (OOP) PDF viewer using wxPython
 3 | --------------------------------------------------------------------------------
 4 | License: GNU GPL V3
 5 | (c) 2022 Jorj X. McKie
 6 | 
 7 | Usage
 8 | -----
 9 | python view.py input.pdf
10 | 
11 | Dependencies
12 | ------------
13 | wxPython
14 | """
15 | 
16 | import sys
17 | import wx
18 | import wx.lib.sized_controls as sc
19 | from wx.lib.pdfviewer import pdfButtonPanel, pdfViewer
20 | 
21 | 
22 | class PDFViewer(sc.SizedFrame):
23 |     def __init__(self, parent, **kwds):
24 |         super(PDFViewer, self).__init__(parent, **kwds)
25 | 
26 |         paneCont = self.GetContentsPane()
27 |         self.buttonpanel = pdfButtonPanel(
28 |             paneCont, wx.NewIdRef(), wx.DefaultPosition, wx.DefaultSize, 0
29 |         )
30 |         self.buttonpanel.SetSizerProps(expand=True)
31 |         self.viewer = pdfViewer(
32 |             paneCont,
33 |             wx.NewIdRef(),
34 |             wx.DefaultPosition,
35 |             wx.DefaultSize,
36 |             wx.HSCROLL | wx.VSCROLL | wx.SUNKEN_BORDER,
37 |         )
38 | 
39 |         self.viewer.SetSizerProps(expand=True, proportion=1)
40 | 
41 |         # introduce buttonpanel and viewer to each other
42 |         self.buttonpanel.viewer = self.viewer
43 |         self.viewer.buttonpanel = self.buttonpanel
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     import wx.lib.mixins.inspection as WIT
48 | 
49 |     app = WIT.InspectableApp(redirect=False)
50 |     fname = sys.argv[1]
51 |     pdfV = PDFViewer(None, size=(800, 600))
52 |     pdfV.viewer.LoadFile(fname)
53 |     pdfV.Show()
54 | 
55 |     app.MainLoop()
56 | 


--------------------------------------------------------------------------------
/examples/zerofy-rotation/derotate.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import fitz
 3 | 
 4 | 
 5 | def page_rotation_set0(page):
 6 |     """Nullify page rotation."""
 7 | 
 8 |     rot = page.rotation  # contains normalized rotation value
 9 |     if rot == 0:
10 |         return page  # nothing to do
11 |     # need to derotate the page's content
12 |     mb = page.mediabox  # current mediabox
13 | 
14 |     if rot == 90:
15 |         # before derotation, shift content horizontally
16 |         mat0 = fitz.Matrix(1, 0, 0, 1, mb.y1 - mb.x1 - mb.x0 - mb.y0, 0)
17 |     elif rot == 270:
18 |         # before derotation, shift content vertically
19 |         mat0 = fitz.Matrix(1, 0, 0, 1, 0, mb.x1 - mb.y1 - mb.y0 - mb.x0)
20 |     else:
21 |         mat0 = fitz.Matrix(1, 0, 0, 1, -2 * mb.x0, -2 * mb.y0)
22 | 
23 |     # prefix with derotation matrix
24 |     mat = mat0 * page.derotation_matrix
25 |     cmd = b"%g %g %g %g %g %g cm " % tuple(mat)
26 |     xref = fitz.TOOLS._insert_contents(page, cmd, 0)
27 | 
28 |     # swap x- and y-coordinates
29 |     if rot in (90, 270):
30 |         x0, y0, x1, y1 = mb
31 |         mb.x0 = y0
32 |         mb.y0 = x0
33 |         mb.x1 = y1
34 |         mb.y1 = x1
35 |         page.set_mediabox(mb)
36 | 
37 |     page.set_rotation(0)
38 | 
39 |     # refresh the page to apply these changes
40 |     doc = page.parent
41 |     pno = page.number
42 |     page = doc[pno]
43 |     page.clean_contents()
44 |     return page
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     try:
49 |         filename = sys.argv[1]
50 |     except:
51 |         sys.exit("Usage: python derotate.py input.pdf")
52 |     doc = fitz.open(filename)
53 |     for pno in range(len(doc)):
54 |         page_rotation_set0(doc[pno])
55 |     doc.ez_save(filename.replace(".pdf", "-rot0.pdf"), clean=True)
56 | 


--------------------------------------------------------------------------------
/examples/zerofy-rotation/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/zerofy-rotation/input.pdf


--------------------------------------------------------------------------------
/examples/zerofy-rotation/zerofy-rotation.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This is a PyMuPDF utility script performing the following function:
 3 | 
 4 | It copies the input pages to the output file giving all pages a rotation
 5 | of zero - without changing page appearance.
 6 | 
 7 | Usage: "python zerofy-rotation.py input.pdf"
 8 | 
 9 | The resulting output file will be named "input-rot0.pdf".
10 | """
11 | 
12 | import sys
13 | import fitz
14 | 
15 | try:
16 |     src = fitz.open(sys.argv[1])  # source file
17 | except:
18 |     print("Usage: 'python zerofy-rotation.py input.pdf'\n")
19 |     raise
20 | doc = fitz.open()  # new output file
21 | 
22 | for src_page in src:  # iterate over input pages
23 |     src_rect = src_page.rect  # source page rect
24 |     w, h = src_rect.br  # save its width, height
25 |     src_rot = src_page.rotation  # save source rotation
26 |     src_page.set_rotation(0)  # set rotation to 0 temporarily
27 |     page = doc.new_page(width=w, height=h)  # make output page
28 |     page.show_pdf_page(  # insert source page
29 |         page.rect,
30 |         src,
31 |         src_page.number,
32 |         rotate=-src_rot,  # reversed original rotation
33 |     )
34 | 
35 | src.close()
36 | doc.ez_save(src.name.replace(".pdf", "-rot0.pdf"), clean=True)
37 | 


--------------------------------------------------------------------------------
/fields/date-field.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Demo Script: How to insert a text field in DATE format.
 3 | 
 4 | This script insert a DATE field on some PDF page using JavaScript for
 5 | formatting and field validation.
 6 | 
 7 | Note:
 8 | -----
 9 | This is an example for how to employ JavaScript for field formatting and
10 | validation. Consult this reference for other field types and situations,
11 | like inter-field validation and more:
12 | http://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/Acro6JSGuide.pdf
13 | """
14 | import fitz
15 | 
16 | # JavaScripts for defining a "date" field format and handling user keystrokes.
17 | JSF = 'AFDate_FormatEx("mm/dd/yyyy");'  # JS to define the format
18 | JSK = 'AFDate_KeystrokeEx("mm/dd/yyyy");'  # JS to handle keystrokes
19 | 
20 | doc = fitz.open()
21 | page = doc.new_page()
22 | w = fitz.Widget()  # create a skeleton Widget object
23 | w.field_type = fitz.PDF_WIDGET_TYPE_TEXT  # DATE fields are subtypes of TEXT
24 | w.rect = fitz.Rect(20, 20, 160, 80)  # where the date field appears on page
25 | w.field_name = "Date"  # give it a unique name
26 | w.field_value = "12/12/2022"  # field value
27 | 
28 | # insert JavaScripts in the widget
29 | w.script_format = JSF  # defines the format
30 | w.script_stroke = JSK  # handles keystrokes
31 | 
32 | annot = page.add_widget(w)  # insert the field in the page
33 | 
34 | doc.save(__file__.replace(".py", ".pdf"))
35 | 


--------------------------------------------------------------------------------
/fields/interfield-calculation.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Demo script: How to define inter-field interactions.
 3 | 
 4 | Make a PDF with three pages. On each page, two fields are added and the
 5 | result is stored in a third field on that page.
 6 | 
 7 | Choosing three pages doing essentially the same thing shall demonstrate,
 8 | that field names across the whole PDF must be uniquely named.
 9 | 
10 | Note:
11 | -----
12 | This is an example for how to employ JavaScript for field formatting and
13 | validation. Consult this reference for other field types and situations,
14 | like inter-field validation and more:
15 | http://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/Acro6JSGuide.pdf
16 | 
17 | Dependencies
18 | ------------
19 | PyMuPDF version 1.22.0 or later
20 | """
21 | import fitz
22 | 
23 | if not tuple(map(int, fitz.VersionBind.split("."))) > (1, 21, 1):
24 |     raise AssertionError("need PyMuPDF version > 1.21.1")
25 | 
26 | r1 = fitz.Rect(100, 100, 300, 120)
27 | r2 = fitz.Rect(100, 130, 300, 150)
28 | r3 = fitz.Rect(100, 180, 300, 200)
29 | 
30 | doc = fitz.open()  # make a new, empty PDF
31 | for i in range(3):  # make three pages in it
32 |     # in essence we are causing the computation NUM1 + NUM2 = RESULT
33 |     page = doc.new_page()  # make the page
34 | 
35 |     w = fitz.Widget()
36 |     w.field_name = f"NUM1{page.number}"  # unique name in document
37 |     w.rect = r1
38 |     w.field_type = fitz.PDF_WIDGET_TYPE_TEXT
39 |     w.field_value = f"{i*100+1}"
40 |     w.field_flags = 2
41 |     page.add_widget(w)
42 | 
43 |     w = fitz.Widget()
44 |     w.field_name = f"NUM2{page.number}"  # unique name in document
45 |     w.rect = r2
46 |     w.field_type = fitz.PDF_WIDGET_TYPE_TEXT
47 |     w.field_value = "200"
48 |     w.field_flags = 2
49 |     page.add_widget(w)
50 | 
51 |     w = fitz.Widget()  # the result field
52 |     w.field_name = f"RESULT{page.number}"  # unique name in document
53 |     w.rect = r3
54 |     w.field_type = fitz.PDF_WIDGET_TYPE_TEXT
55 |     w.field_value = "Resultat?"
56 |     w.script_calc = f'AFSimple_Calculate("SUM", new Array("NUM1{page.number}", "NUM2{page.number}"));'
57 |     page.add_widget(w)
58 | 
59 | doc.save(__file__.replace(".py", ".pdf"))
60 | 


--------------------------------------------------------------------------------
/fields/switch-text-on-off.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Demo script: How to show or hide fields based on checkbox content.
 3 | 
 4 | Depending on whether some checkbox is being checked, show or hide
 5 | a text widget.
 6 | 
 7 | Note:
 8 | -----
 9 | This is an example for how to employ JavaScript for field formatting and
10 | validation. Consult this reference for other field types and situations,
11 | like inter-field validation and more:
12 | http://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/Acro6JSGuide.pdf
13 | 
14 | Dependencies
15 | ------------
16 | PyMuPDF version 1.22.0 or later
17 | """
18 | import fitz
19 | 
20 | if not tuple(map(int, fitz.VersionBind.split("."))) > (1, 21, 1):
21 |     raise AssertionError("need PyMuPDF version > 1.21.1")
22 | 
23 | # This JavaScript will be executed if the checkbox value changes
24 | JSCRIPT = """if (this.getField("my-checkbox").value == "Yes")
25 |     this.getField("my-text").display = display.visible;
26 | else
27 |     this.getField("my-text").display = display.hidden;"""
28 | 
29 | doc = fitz.open()
30 | page = doc.new_page()
31 | 
32 | w = fitz.Widget()  # define a field skeleton object for the text
33 | w.rect = fitz.Rect(100, 150, 300, 170)
34 | w.field_type = fitz.PDF_WIDGET_TYPE_TEXT
35 | w.field_name = "my-text"  # use this to identify the field document-wide
36 | w.field_value = "Will be shown if checkbox is checked."
37 | w.script_calc = JSCRIPT  # use this property for inter-field actions
38 | page.add_widget(w)
39 | 
40 | w = fitz.Widget()  # define field skeleton for the checkbox
41 | w.rect = fitz.Rect(100, 100, 120, 120)
42 | w.field_type = fitz.PDF_WIDGET_TYPE_CHECKBOX
43 | w.field_name = "my-checkbox"  # use this to identify the field document-wide
44 | w.border_color = fitz.pdfcolor["red"]
45 | w.field_label = "click to show or hide text"  # show this on mouse hovering
46 | w.field_value = True
47 | page.add_widget(w)
48 | 
49 | doc.save(__file__.replace(".py", ".pdf"))
50 | 


--------------------------------------------------------------------------------
/font-replacement/multi-language.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/font-replacement/multi-language.jpg


--------------------------------------------------------------------------------
/font-replacement/page-17-after.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/font-replacement/page-17-after.png


--------------------------------------------------------------------------------
/font-replacement/page-17-before.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/font-replacement/page-17-before.png


--------------------------------------------------------------------------------
/font-replacement/run-log.txt:
--------------------------------------------------------------------------------
 1 | D:\Jorj\Documents\GitHub\PyMuPDF-Utilities\font-replacement>py repl-fontnames.py pymupdf.pdf
 2 | 
 3 | D:\Jorj\Documents\GitHub\PyMuPDF-Utilities\font-replacement>py repl-font.py pymupdf.pdf
 4 | Processing PDF 'pymupdf.pdf' with 318 pages.
 5 | 
 6 | Phase 1: Create unicode subsets.
 7 | End of phase 1, 0.49 seconds.
 8 | 
 9 | Font replacement overview:
10 |         NimbusMonL-Bold replaced by: Space Mono Bold.
11 |         NimbusMonL-Regu replaced by: Space Mono Regular.
12 |     NimbusMonL-ReguObli replaced by: Space Mono Italic.
13 |      NimbusRomNo9L-Medi replaced by: FiraGO Regular.
14 |  NimbusRomNo9L-MediItal replaced by: FiraGO Italic.
15 |      NimbusRomNo9L-Regu replaced by: FiraGO Regular.
16 |  NimbusRomNo9L-ReguItal replaced by: FiraGO Italic.
17 |         NimbusSanL-Bold replaced by: FiraGO Bold.
18 |     NimbusSanL-BoldItal replaced by: FiraGO Bold Italic.
19 |         NimbusSanL-Regu replaced by: FiraGO Regular.
20 |     NimbusSanL-ReguItal replaced by: FiraGO Italic.
21 | 
22 | Building font subsets:
23 | Used 67 glyphs of font 'Space Mono Bold'. 70 KB saved.
24 | Used 114 glyphs of font 'Space Mono Regular'. 64 KB saved.
25 | Used 88 glyphs of font 'Space Mono Italic'. 74 KB saved.
26 | Used 100 glyphs of font 'FiraGO Regular'. 753 KB saved.
27 | Used 97 glyphs of font 'FiraGO Italic'. 760 KB saved.
28 | Used 71 glyphs of font 'FiraGO Bold'. 764 KB saved.
29 | Used 31 glyphs of font 'FiraGO Bold Italic'. 780 KB saved.
30 | Font subsets built, 2.86 seconds.
31 | 
32 | Phase 2: rebuild document.
33 | End of phase 2, 12.82 seconds
34 | Total duration 16.17 seconds
35 | 
36 | D:\Jorj\Documents\GitHub\PyMuPDF-Utilities\font-replacement>
37 | 


--------------------------------------------------------------------------------
/jupyter-notebooks/1page-snap.log:
--------------------------------------------------------------------------------
 1 | %!MuPDF-Journal-100
 2 | 
 3 | journal
 4 | <<
 5 | /NumSections 1
 6 | /FileSize 210721
 7 | /Fingerprint <57c84501e4baddef56fd26959a808cfc>
 8 | /HistoryPos 6
 9 | >>
10 | entry
11 | (new page)
12 | 44 0 newobj
13 | 45 0 newobj
14 | 2 0 newobj
15 | entry
16 | (insert-0)
17 | 46 0 newobj
18 | 44 0 obj
19 | <<>>
20 | endobj
21 | 47 0 newobj
22 | 45 0 obj
23 | <</Type/Page/MediaBox[0 0 595 842]/Rotate 0/Resources 44 0 R/Parent 2 0 R>>
24 | endobj
25 | entry
26 | (insert-1)
27 | 48 0 newobj
28 | 45 0 obj
29 | <</Type/Page/MediaBox[0 0 595 842]/Rotate 0/Resources 44 0 R/Parent 2 0 R/Contents[47 0 R]>>
30 | endobj
31 | entry
32 | (insert-2)
33 | 49 0 newobj
34 | 45 0 obj
35 | <</Type/Page/MediaBox[0 0 595 842]/Rotate 0/Resources 44 0 R/Parent 2 0 R/Contents[47 0 R 48 0 R]>>
36 | endobj
37 | entry
38 | (insert-3)
39 | 50 0 newobj
40 | 45 0 obj
41 | <</Type/Page/MediaBox[0 0 595 842]/Rotate 0/Resources 44 0 R/Parent 2 0 R/Contents[47 0 R 48 0 R 49 0 R]>>
42 | endobj
43 | entry
44 | (insert-4)
45 | 51 0 newobj
46 | 45 0 obj
47 | <</Type/Page/MediaBox[0 0 595 842]/Rotate 0/Resources 44 0 R/Parent 2 0 R/Contents[47 0 R 48 0 R 49 0 R 50 0 R]>>
48 | endobj
49 | endjournal
50 | 


--------------------------------------------------------------------------------
/jupyter-notebooks/1page-snap.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/jupyter-notebooks/1page-snap.pdf


--------------------------------------------------------------------------------
/jupyter-notebooks/1page.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/jupyter-notebooks/1page.pdf


--------------------------------------------------------------------------------
/jupyter-notebooks/README-OCR.md:
--------------------------------------------------------------------------------
 1 | # How To Install and Enable Tesseract Dynamically
 2 | 
 3 | In some interactive environments, like Google Colab, JupyterLite and other Pyodide-based environments, you have access to a Python environment, that has a set of pre-installed packages. These configurations may not suffice your requirements.
 4 | 
 5 | While there are ways to dynamically pip-install packages via invoking pip as a shell command, or even installing software packages in the virtual machine hosting the interactive Python, additional considerations are required for PyMuPDF's OCR support of Tesseract-OCR:
 6 | 
 7 | * On importing PyMuPDF, a check is made, whether `os.environ["TESSDATA_PREFIX"]` exists. If yes, its value is stored in `fitz.TESSDATA_PREFIX`, else that value is set to `None`.
 8 | 
 9 | * If your notebook requires OCR support, do follow these steps:
10 | 
11 |     1. `!apt install tesseract-ocr`. When done, confirm the value of Tesseract's language support folder.
12 | 
13 |     2. `os.environ["TESSDATA_PREFIX"] = "/usr/share/tesseract-ocr/4.00/tessdata"`
14 | 
15 |     3. `import fitz`
16 | 
17 | * You should now be able to use PyMuPDF's OCR functions.


--------------------------------------------------------------------------------
/jupyter-notebooks/README.md:
--------------------------------------------------------------------------------
 1 | # PyMuPDF JUPYTER Notebooks
 2 | 
 3 | These are scripts that explain basic usage of PyMuPDF using jupyter notebook features. Just click on one of the `.ipynb` files to see its fully rendered session!
 4 | 
 5 | Over time this script collection will be extended. Your contribution is very welcome!
 6 | 
 7 | ## Example Files
 8 | * `1page.pdf` - 1-pager PDF used as a test file by several notebooks
 9 | * `blacked.pdf` - 1-pager PDF with three words covered by black rectangles. Used by `detect-hidden.ipynb` which demonstrates how badly done "redactions" can be detected - **detects hidden text.**
10 | * `partial_ocr.pdf`- 1-pager PDF containing normal text and two images that overlap each other.
11 | 
12 | ## Notebooks
13 | * `dehyphenate-flag.ipynb` - shows the effect of flag `TEXT_DEHYPHENATE` on text search and extraction.
14 | * `detect-hidden.ipynb` - shows how to **_detect text which is hidden_** by objects "drawn above" it.
15 | * `journalling1.ipynb` - introduction to PDF Journalling
16 | * `journalling2.ipynb` - chapter 2 of PDF Journalling
17 | * `journalling3.ipynb` - chapter 3 of PDF Journalling
18 | * `new-circle-annot.ipynb` - simple example for adding an annotation with desired properties
19 | * `ocr-illegible.ipynb` - OCR: how to dynamically make unrecognized characters readable
20 | * `partial-ocr.ipynb` - OCRs a page in full and in partial mode and explain the difference. Requires PyMuPDF v1.19.1.
21 | * `testpage-performance.ipynb` - compare performance of text extraction and search methods, with and without a separately prepared `TextPage` object.
22 | * `object-algebra.ipynb` - explains details on how points, rectangles quads can be added and multiplied as if they were ordinary numbers. This is an extension to the respective [chapter](https://pymupdf.readthedocs.io/en/latest/algebra.html) of the documentation.


--------------------------------------------------------------------------------
/jupyter-notebooks/blacked.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/jupyter-notebooks/blacked.pdf


--------------------------------------------------------------------------------
/jupyter-notebooks/input.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/jupyter-notebooks/input.pdf


--------------------------------------------------------------------------------
/jupyter-notebooks/input.pdf-status.log:
--------------------------------------------------------------------------------
 1 | %!MuPDF-Journal-100
 2 | 
 3 | journal
 4 | <<
 5 | /NumSections 0
 6 | /FileSize 210721
 7 | /Fingerprint <57c84501e4baddef56fd26959a808cfc>
 8 | /HistoryPos 0
 9 | >>
10 | endjournal
11 | 


--------------------------------------------------------------------------------
/jupyter-notebooks/partial-ocr.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/jupyter-notebooks/partial-ocr.pdf


--------------------------------------------------------------------------------
/jupyter-notebooks/show_image.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utility function for showing images.
 3 | 
 4 | Intended to be imported in Jupyter notebooks to display pixmap images.
 5 | 
 6 | Invocation: "show_image(item, title)", where item is a PyMuPDF object
 7 | which has a "get_pixmap" method, and title is an optional string.
 8 | 
 9 | The function executes "item.get_pixmap(dpi=150)" and show the resulting
10 | image.
11 | 
12 | 
13 | Dependencies
14 | ------------
15 | numpy, matplotlib, pymupdf
16 | """
17 | 
18 | 
19 | def show_image(item, title=""):
20 |     """Display a pixmap.
21 | 
22 |     Just to display Pixmap image of "item" - ignore the man behind the curtain.
23 | 
24 |     Args:
25 |         item: any PyMuPDF object having a "get_pixmap" method.
26 |         title: a string to be used as image title
27 | 
28 |     Generates an RGB Pixmap from item using a constant DPI and using matplotlib
29 |     to show it inline of the notebook.
30 |     """
31 |     DPI = 150  # use this resolution
32 |     import numpy as np
33 |     import matplotlib.pyplot as plt
34 | 
35 |     # %matplotlib inline
36 |     pix = item.get_pixmap(dpi=DPI)
37 |     img = np.ndarray([pix.h, pix.w, 3], dtype=np.uint8, buffer=pix.samples_mv)
38 |     plt.figure(dpi=DPI)  # set the figure's DPI
39 |     plt.title(title)  # set title of image
40 |     _ = plt.imshow(img, extent=(0, pix.w * 72 / DPI, pix.h * 72 / DPI, 0))
41 | 


--------------------------------------------------------------------------------
/optional-content/source-ocmd.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/optional-content/source-ocmd.pdf


--------------------------------------------------------------------------------
/optional-content/source-ocmd.py:
--------------------------------------------------------------------------------
 1 | """
 2 | PyMuPDF Demo Program
 3 | 
 4 | Show how to create a PDF page that display content depending on the state
 5 | of a set of optional content groups.
 6 | 
 7 | Here we create a PDF page with two objects of which exactly one is shown
 8 | at any time.
 9 | """
10 | import fitz
11 | 
12 | # source file with at least 2 pages
13 | src = fitz.open("source.pdf")
14 | 
15 | # new PDF with one page
16 | doc = fitz.open()
17 | page = doc.new_page()
18 | 
19 | # define 2 rectangles: upper and lower half page
20 | r0 = page.rect
21 | r0.y1 = r0.height / 2
22 | r1 = r0 + (0, r0.height, 0, r0.height)
23 | 
24 | # make 1 OCG and 1 OCMD
25 | ocg0 = doc.addOCG("ocg0", on=True)  # to be used for upper rect
26 | 
27 | # the following is interpreted as "not ocg0"
28 | ocmd0 = doc.set_ocmd(  # to be used for lower rect
29 |     ocgs=[ocg0],
30 |     policy="alloff",
31 | )
32 | 
33 | # alternatively, you can use visibility expressions:
34 | # ocmd0 = doc.set_ocmd(ve=["not", ocg0])
35 | 
36 | # insert the 2 source page images, each connected to one OCG
37 | page.show_pdf_page(r0, src, 0, oc=ocg0, rotate=90)
38 | page.show_pdf_page(r1, src, 1, oc=ocmd0, rotate=-90)
39 | 
40 | doc.save(  # save the file
41 |     __file__.replace(".py", ".pdf"),
42 |     garbage=3,
43 |     pretty=True,
44 |     deflate=True,
45 |     clean=True,
46 | )
47 | 
48 | """
49 | The new PDF can now be viewed by e.g. Adobe Acrobat reader. Setting
50 | "ocg0" ON of OFF will flip between showing page 0 and page 1.
51 | """
52 | 


--------------------------------------------------------------------------------
/optional-content/source-radio.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/optional-content/source-radio.pdf


--------------------------------------------------------------------------------
/optional-content/source-radio.py:
--------------------------------------------------------------------------------
 1 | """
 2 | PyMuPDF Demo Program
 3 | 
 4 | Show how to create a PDF page that display content depending on the state
 5 | of a set of optional content groups.
 6 | 
 7 | We display the first 4 pages of a source file on 4 quadrant of a new
 8 | PDF page of size ISO A4.
 9 | The 4 source images are displayed such that only is shown at a time. This is
10 | achieved via so-called "Radio-Button-Groups" of optional content groups.
11 | """
12 | import fitz
13 | 
14 | # source file with at least 4 pages
15 | src = fitz.open("source.pdf")
16 | 
17 | # new PDF with one page
18 | doc = fitz.open()
19 | page = doc.new_page()
20 | 
21 | # define the 4 rectangle quadrants to receive the source pages
22 | r0 = page.rect / 2
23 | r1 = r0 + (r0.width, 0, r0.width, 0)
24 | r2 = r0 + (0, r0.height, 0, r0.height)
25 | r3 = r2 + (r2.width, 0, r2.width, 0)
26 | 
27 | # make 4 OCGs - one for each source page image.
28 | xref0 = doc.add_ocg("ocg0", on=True)
29 | xref1 = doc.add_ocg("ocg1", on=False)
30 | xref2 = doc.add_ocg("ocg2", on=False)
31 | xref3 = doc.add_ocg("ocg3", on=False)
32 | doc.set_ocStates(
33 |     -1,  # the default OC configuration
34 |     rbgroups=[[xref0, xref1, xref2, xref3]],  # one radio-button group
35 | )
36 | 
37 | # insert the 4 source page images, each connected to one OCG
38 | page.show_pdf_page(r0, src, 0, oc=xref0)
39 | page.show_pdf_page(r1, src, 1, oc=xref1)
40 | page.show_pdf_page(r2, src, 2, oc=xref2)
41 | page.show_pdf_page(r3, src, 3, oc=xref3)
42 | 
43 | doc.save(  # save the file
44 |     __file__.replace(".py", ".pdf"),
45 |     garbage=3,
46 |     pretty=True,
47 |     deflate=True,
48 |     clean=True,
49 | )
50 | 
51 | # the new file can now be viewed by e.g. Adobe Acrobat reader and
52 | # viewing each page will switch off all other three.
53 | 


--------------------------------------------------------------------------------
/optional-content/source.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/optional-content/source.pdf


--------------------------------------------------------------------------------
/pdf-names-resolution/README.md:
--------------------------------------------------------------------------------
 1 | # Under Construction
 2 | 
 3 | This contains two demo script currently, which examine the PDF catalog and
 4 | resolve named destinations to pages.
 5 | 
 6 | We intend to make this available as a fitz.Document method.
 7 | 
 8 | Currently, invoke like this:
 9 | 
10 | ## Alternative 1: `find_names.py`
11 | This version works for both, the ca´lassic and the rebased architecture of PyMuPDF.
12 | However, the solution is not complete yet: there are cases, where names are not detected completely.
13 | ```python
14 | import fitz
15 | from find_names import resolve_names
16 | 
17 | doc=fitz.open("pymupdf.pdf")
18 | resolved_name = resolve_names(doc)
19 | 
20 | resolved_name["chapter.1"]
21 | {'page': 6, 'to': (72.0, 720.0), 'zoom': 0}
22 | ```
23 | 
24 | ## Alternative 2: `list_names.py`
25 | 
26 | This version shuld cover all cases for encoding named destinations - in contrast to Alternative 1.
27 | It can only be used with the rebased version of PyMuPDF. Example:
28 | ```python
29 | In [1]: import fitz_new as fitz
30 | In [2]: from list_names import resolve_names
31 | In [3]: doc=fitz.open("pymupdf.pdf")
32 | In [4]: resolved_name=resolve_names(doc)
33 | In [7]: resolved_name["chapter.1"]
34 | Out[7]: {'page': 6, 'to': (72.0, 720.0), 'zoom': 0.0}
35 | ```


--------------------------------------------------------------------------------
/reporting/README.md:
--------------------------------------------------------------------------------
1 | # PyMuPDF Reporting
2 | 
3 | This folder provides examples for using PyMuPDF's reporting feature.
4 | 
5 | Each example is contained in a sub-folder of `examples`, usually the script together with all necessary data to use it.
6 | 
7 | The names of the sub-folders should give an idea what the respective report is trying to achieve. Each sub-folder will also contain its own README file that explains any specifics of the example.
8 | 
9 | Please also do have a look at the [documentation](https://github.com/pymupdf/PyMuPDF-Utilities/blob/master/reporting/documentation-draft.md) draft to understand how to use this exciting new feature.


--------------------------------------------------------------------------------
/reporting/examples/filmfestival-2tables/README.md:
--------------------------------------------------------------------------------
 1 | # Example for PyMuPDF Reporting
 2 | 
 3 | This script creates a report about a fictitious film festival.
 4 | 
 5 | It extracts data from an SQL database (sqlite3). The database contains two tables:
 6 | * **films** - columns: **title**, **director**, **year**
 7 | * **actors** - columns: **name**, **film**
 8 | 
 9 | Two tabular reports are created in one common PDF.
10 | 1. Table 1 lists all films and names all actors being cast.
11 | 2. Table 2 lists all actors together with all the films where they have been cast.
12 | 
13 | Noteworthy details:
14 | * Demonstrate how to use fonts from the [pymupdf-fonts](https://pypi.org/project/pymupdf-fonts/) package.
15 | * Demonstrate how to **combine multiple report sections** (here: two table sections) in one report.
16 | * **Automatic layout:** major layout changes **without coding effort**, like
17 |     - page size (Letter, ISO A4) or paper format (portrait, landscape)
18 |     - number of columns per page
19 |     - page breaks between report sections
20 | * Appearance changes, like text colors or fonts just by modifying HTML and styling (CSS) definitions.


--------------------------------------------------------------------------------
/reporting/examples/filmfestival-2tables/filmfestival.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/filmfestival-2tables/filmfestival.db


--------------------------------------------------------------------------------
/reporting/examples/filmfestival-2tables/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/filmfestival-2tables/output.pdf


--------------------------------------------------------------------------------
/reporting/examples/invoice/README.md:
--------------------------------------------------------------------------------
 1 | # Example for PyMuPDF Reporting
 2 | 
 3 | This script creates an invoice with a layout involving fairly complex HTML definitions.
 4 | 
 5 | The single invoice items are contained in an SQL database (sqlite).
 6 | 
 7 | Points of interest:
 8 | 
 9 | * Company logo top-left on every page - defined as being part of the report header
10 | * The report header also includes a small constant table top-right
11 | * On page 1 only, there is a "prolog" section cotaining some introductory explanations.
12 |     - The HTML skeleton contains 4 variables to be filled with external data
13 | * Mark last report row with an extra backgound color
14 | * The item access function also computes an overall invoice total and appends it as the last report row.
15 | 


--------------------------------------------------------------------------------
/reporting/examples/invoice/invoice-parms.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/invoice/invoice-parms.db


--------------------------------------------------------------------------------
/reporting/examples/invoice/items.html:
--------------------------------------------------------------------------------
 1 | <style>
 2 |     body {
 3 |         font-family: sans-serif;
 4 |         font-size: 11px;
 5 |     }
 6 | 
 7 |     table {
 8 |         border-spacing: 0;
 9 |     }
10 | 
11 |     td,
12 |     th {
13 |         padding-left: 3px;
14 |         padding-right: 3px;
15 |         border: .2px solid #bbb;
16 |     }
17 | 
18 |     /*
19 |     td[id="line"],
20 |     td[id="uprice"],
21 |     td[id="qty"] {
22 |         padding-left: 3px;
23 |         padding-right: 10px;
24 |     }
25 | */
26 |     td[id="uprice"],
27 |     td[id="qty*uprice"] {
28 |         text-align: right;
29 |     }
30 | 
31 |     td[id="line"],
32 |     td[id="uom"],
33 |     td[id="qty"],
34 |     td[id="part"],
35 |     td[id="date"] {
36 |         text-align: center;
37 |     }
38 | </style>
39 | 
40 | <body style="font-family: sans-serif;">
41 |     <table>
42 |         <tr id="header" style="background-color: #aaceeb;">
43 |             <th>Line</th>
44 |             <th>H&P ID</th>
45 |             <th>Description</th>
46 |             <th>Part No.</th>
47 |             <th>Qty</th>
48 |             <th>UOM</th>
49 |             <th>Date</th>
50 |             <th>Unit Price</th>
51 |             <th>Total Price</th>
52 |         <tr id="template">
53 |             <td id="line"></td>
54 |             <td id="hp-id"></td>
55 |             <td id="desc"></td>
56 |             <td id="part"></td>
57 |             <td id="qty"></td>
58 |             <td id="uom"></td>
59 |             <td id="date"></td>
60 |             <td id="uprice"></td>
61 |             <td id="tprice"></td>
62 |         </tr>
63 |     </table>
64 | </body>


--------------------------------------------------------------------------------
/reporting/examples/invoice/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/invoice/logo.png


--------------------------------------------------------------------------------
/reporting/examples/invoice/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/invoice/output.pdf


--------------------------------------------------------------------------------
/reporting/examples/multi-format/README.md:
--------------------------------------------------------------------------------
 1 | # Example for PyMuPDF Reporting
 2 | 
 3 | This script creates a list of capital cities of the world.
 4 | 
 5 | It extracts data from a CSV file.
 6 | 
 7 | Notes of interest:
 8 | 
 9 | * Table printed in 2 columns per page
10 | * Alternating row background colors
11 | * Using user fonts from the pymupdf-fonts package


--------------------------------------------------------------------------------
/reporting/examples/multi-format/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/multi-format/output.pdf


--------------------------------------------------------------------------------
/reporting/examples/row-with-images/README.md:
--------------------------------------------------------------------------------
1 | # Example for PyMuPDF Reporting
2 | 
3 | This script creates a table from items in a CSV file.
4 | 
5 | Notes of interest:
6 | 
7 | * Table rows contain images that are stored in a ZIP file. The report generator "understands" field text that is prefixed with the string "|img|" and interprets it as a file name.
8 | * Three alternating row background colors.


--------------------------------------------------------------------------------
/reporting/examples/row-with-images/flags.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/row-with-images/flags.zip


--------------------------------------------------------------------------------
/reporting/examples/row-with-images/items.csv:
--------------------------------------------------------------------------------
 1 | country;member;flag;since
 2 | Belgium;Founder;|img|Belgium.jpg;1949
 3 | Denmark;Founder;|img|Denmark.jpg;1949
 4 | France;Founder;|img|France.jpg;1949
 5 | Iceland;Founder;|img|Iceland.jpg;1949
 6 | Italy;Founder;|img|Italy.jpg;1949
 7 | Canada;Founder;|img|Canada.jpg;1949
 8 | Luxembourg;Founder;|img|Luxembourg.jpg;1949
 9 | Netherlands;Founder;|img|Netherlands.jpg;1949
10 | Norway;Founder;|img|Norway.jpg;1949
11 | Portugal;Founder;|img|Portugal.jpg;1949
12 | United Kingdom;Founder;|img|United_Kingdom.jpg;1949
13 | United States;Founder;|img|United_States.jpg;1949
14 | Greece;Joiner;|img|Greece.jpg;1952
15 | Turkey;Joiner;|img|Turkey.jpg;1952
16 | Germany;Joiner;|img|Germany.jpg;1955
17 | Spain;Joiner;|img|Spain.jpg;1982
18 | Poland;Joiner;|img|Poland.jpg;1999
19 | Czech Republic;Joiner;|img|Czech_Republic.jpg;1999
20 | Hungary;Joiner;|img|Hungary.jpg;1999
21 | Bulgaria;Joiner;|img|Bulgaria.jpg;2004
22 | Estonia;Joiner;|img|Estonia.jpg;2004
23 | Latvia;Joiner;|img|Latvia.jpg;2004
24 | Lithuania;Joiner;|img|Lithuania.jpg;2004
25 | Romania;Joiner;|img|Romania.jpg;2004
26 | Slovakia;Joiner;|img|Slovakia.jpg;2004
27 | Slovenia;Joiner;|img|Slovenia.jpg;2004
28 | Albania;Joiner;|img|Albania.jpg;2009
29 | Croatia;Joiner;|img|Croatia.jpg;2009
30 | Montenegro;Joiner;|img|Montenegro.jpg;2017
31 | North Macedonia;Joiner;|img|North_Macedonia.jpg;2020
32 | Finland;Joiner;|img|Finland.jpg;2023
33 | Sweden;Joiner;|img|Sweden.jpg;2023


--------------------------------------------------------------------------------
/reporting/examples/row-with-images/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/row-with-images/output.pdf


--------------------------------------------------------------------------------
/reporting/examples/row-with-images/rows-with-images.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | import fitz
 3 | import zipfile
 4 | from Reports import *
 5 | 
 6 | # The following defines the overall report object
 7 | mediabox = fitz.paper_rect("a4")  # the only required parameter
 8 | report = Report(mediabox, font_families={"sans-serif": "ubuntu", "serif": "ubuntu"})
 9 | 
10 | # Predefined HTML to define the header for all pages
11 | 
12 | HEADER = (
13 |     """<h1 style="text-align:center;font-family: sans-serif;">Report Example</h1>"""
14 | )
15 | header = Block(html=HEADER, report=report)
16 | 
17 | FOOTER = """<h5 style="text-align:center;font-family: sans-serif;">Report Footer</h5>"""
18 | footer = Block(html=FOOTER, report=report)
19 | footer.make_story()
20 | 
21 | HTML = """
22 | <style>
23 | body {font-family: sans-serif;font-size: 11px;}
24 | td, th {
25 |     padding-left: 10px;
26 |     padding-right: 10px;
27 | }
28 | table {margin-left: 20%;}
29 | </style>
30 | 
31 | <body>
32 | <table>
33 | <tr id="header" style="background-color: #aaceeb;">
34 |     <th>Country</th>
35 |     <th>Type</th>
36 |     <th>Flag</th>
37 |     <th>Since</th>
38 | </tr>
39 | 
40 | <tr id="template">
41 |     <td id="country"></td>
42 |     <td id="member"></td>
43 |     <td id="flag"></td>
44 |     <td id="since"></td>
45 | </tr>
46 | </table>
47 | </body>
48 | """
49 | 
50 | national_flags = zipfile.ZipFile("flags.zip")
51 | 
52 | 
53 | def fetch_rows():
54 |     table_data = pathlib.Path("items.csv").read_bytes().decode()
55 |     data = [l.split(";") for l in table_data.splitlines()]
56 |     return data
57 | 
58 | 
59 | items = Table(
60 |     report=report,
61 |     html=HTML,
62 |     top_row="header",
63 |     fetch_rows=fetch_rows,
64 |     archive=national_flags,
65 |     alternating_bg=("#ccc", "#ddd", "#eee"),
66 | )
67 | 
68 | report.sections = [
69 |     [items, Options(cols=1, format="letter", newpage=True)],
70 | ]  # set sections list
71 | report.header = [header]
72 | report.footer = [footer]
73 | 
74 | # This generates the report and saves it to the given path name.
75 | report.run("output.pdf")
76 | 


--------------------------------------------------------------------------------
/reporting/examples/simple-article/README.md:
--------------------------------------------------------------------------------
 1 | # Example for PyMuPDF Reporting
 2 | 
 3 | This script creates a simple text article in 10 lines of code.
 4 | 
 5 | The article text is provided as HTML.
 6 | 
 7 | Notes of interest:
 8 | 
 9 | * Text is printed in 2 columns per page.
10 | * An image is automatically embedded in the layout.
11 | * Soft hyphen characters `&#173;` are recognized and used for generating line breaks where appropriate.


--------------------------------------------------------------------------------
/reporting/examples/simple-article/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/simple-article/output.pdf


--------------------------------------------------------------------------------
/reporting/examples/simple-article/simple-article.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | import fitz
 3 | from Reports import *
 4 | 
 5 | report = Report(mediabox=fitz.paper_rect("a4-l"))
 6 | 
 7 | HTML = pathlib.Path("springer.html").read_bytes().decode()
 8 | textblock = Block(html=HTML, report=report)
 9 | 
10 | report.sections = [[textblock, Options(cols=2, format=report.mediabox, newpage=True)]]
11 | report.run("output.pdf")
12 | 


--------------------------------------------------------------------------------
/reporting/examples/simple-article/springer.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/simple-article/springer.jpg


--------------------------------------------------------------------------------
/reporting/examples/user-fonts/DejaVuSansCondensed-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/user-fonts/DejaVuSansCondensed-Bold.ttf


--------------------------------------------------------------------------------
/reporting/examples/user-fonts/DejaVuSansCondensed.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/user-fonts/DejaVuSansCondensed.ttf


--------------------------------------------------------------------------------
/reporting/examples/user-fonts/filmfestival.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/user-fonts/filmfestival.db


--------------------------------------------------------------------------------
/reporting/examples/user-fonts/kenpixel.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/user-fonts/kenpixel.ttf


--------------------------------------------------------------------------------
/reporting/examples/user-fonts/output-dejavu.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/user-fonts/output-dejavu.pdf


--------------------------------------------------------------------------------
/reporting/examples/user-fonts/output-kenpixel.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/user-fonts/output-kenpixel.pdf


--------------------------------------------------------------------------------
/reporting/pymupdf-reporting.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/pymupdf-reporting.pdf


--------------------------------------------------------------------------------
/reporting/pymupdf-reporting.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/pymupdf-reporting.pptx


--------------------------------------------------------------------------------
/shapes/piechart1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/shapes/piechart1.pdf


--------------------------------------------------------------------------------
/shapes/piechart1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/shapes/piechart1.png


--------------------------------------------------------------------------------
/shapes/piechart2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/shapes/piechart2.pdf


--------------------------------------------------------------------------------
/shapes/symbol-list.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/shapes/symbol-list.pdf


--------------------------------------------------------------------------------
/shapes/symbol-list.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | Created on Sun Dec  9 08:34:06 2018
 5 | 
 6 | @author: Jorj
 7 | @license: GNU AFFERO GPL V3+
 8 | 
 9 | Create a list of available symbols defined in shapes_and_symbols.py
10 | 
11 | This also demonstrates an example usage: how these symbols could be used
12 | as bullet-point symbols in some text.
13 | 
14 | """
15 | 
16 | import fitz
17 | import shapes_and_symbols as sas
18 | 
19 | print(fitz.__doc__)
20 | # list of available symbol functions and their descriptions
21 | tlist = [
22 |     (sas.arrow, "arrow (easy)"),
23 |     (sas.caro, "caro (easy)"),
24 |     (sas.clover, "clover (easy)"),
25 |     (sas.diamond, "diamond (easy)"),
26 |     (sas.dontenter, "do not enter (medium)"),
27 |     (sas.frowney, "frowney (medium)"),
28 |     (sas.hand, "hand (complex)"),
29 |     (sas.heart, "heart (easy)"),
30 |     (sas.pencil, "pencil (very complex)"),
31 |     (sas.smiley, "smiley (easy)"),
32 | ]
33 | 
34 | r = fitz.Rect(50, 50, 100, 100)  # first rect to contain a symbol
35 | d = fitz.Rect(0, r.height + 10, 0, r.height + 10)  # displacement to next ret
36 | p = (15, -r.height * 0.2)  # starting point of explanation text
37 | rlist = [r]  # rectangle list
38 | 
39 | for i in range(1, len(tlist)):  # fill in all the rectangles
40 |     rlist.append(rlist[i - 1] + d)
41 | 
42 | doc = fitz.open()  # create empty PDF
43 | page = doc.new_page()  # create an empty page
44 | img = page.new_shape()  # start a Shape (canvas)
45 | 
46 | for i, r in enumerate(rlist):
47 |     tlist[i][0](img, rlist[i])  # execute symbol creation
48 |     img.insert_text(
49 |         rlist[i].br + p, tlist[i][1], fontsize=r.height / 1.2  # insert description text
50 |     )
51 | 
52 | # store everything to the page's /Contents object
53 | img.commit()
54 | 
55 | 
56 | doc.save(__file__.replace(".py", ".pdf"))  # save the PDF
57 | 


--------------------------------------------------------------------------------
/table-analysis/README.md:
--------------------------------------------------------------------------------
 1 | ## Breaking News: PyMuPDF's Table Support Starting with Version 1.23.0!
 2 | Starting with its version 1.23.0, PyMuPDF offers complete integrated support for identifying tables on document pages and extracting their content.
 3 | 
 4 | Just use the new [Page](https://pymupdf.readthedocs.io/en/latest/page.html) method [`find_tables()`]((https://pymupdf.readthedocs.io/en/latest/page.html#Page.find_tables)) to obtain an object that contains all detected tables on the page in a list.
 5 | 
 6 | You can iterate over these table objects to find details about their headers, table cells and their content. A growing number of example scripts shows how to do this and how to pass the extracted information downstream to pandas Dataframes and Excel, CSV or JSON files.
 7 | 
 8 | The following examples have been collected since 2023-08-20:
 9 | 
10 | * `find_tables.ipynb` (Jupyter notebook) reads a 1-page PDF with Chinese text and two tables.
11 | * `join-tables.ipynb` (Jupyter notebook) reads a multi-page PDF and joins the parts of a table that has been fragmented across these pages.
12 | * `compare-xps-pdf.ipynb` (Jupyter notebook) confirms support of PyMuPDF's table feature for general document (comparison XPS vs. PDF).
13 | 


--------------------------------------------------------------------------------
/table-analysis/XPS-table.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/table-analysis/XPS-table.pdf


--------------------------------------------------------------------------------
/table-analysis/XPS-table.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/table-analysis/XPS-table.xlsx


--------------------------------------------------------------------------------
/table-analysis/XPS-table.xps:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/table-analysis/XPS-table.xps


--------------------------------------------------------------------------------
/table-analysis/chinese-table.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/table-analysis/chinese-table.pdf


--------------------------------------------------------------------------------
/table-analysis/input1-bbox.json:
--------------------------------------------------------------------------------
1 | [
2 |     0.0,
3 |     83.22763061523438,
4 |     612.0,
5 |     390.90350341796875
6 | ]


--------------------------------------------------------------------------------
/table-analysis/input1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/table-analysis/input1.pdf


--------------------------------------------------------------------------------
/table-analysis/input2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/table-analysis/input2.pdf


--------------------------------------------------------------------------------
/table-analysis/national-capitals.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/table-analysis/national-capitals.pdf


--------------------------------------------------------------------------------
/table-analysis/show_image.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utility function for showing images.
 3 | 
 4 | Intended to be imported in Jupyter notebooks to display pixmap images.
 5 | 
 6 | Invocation: "show_image(item, title)", where item is a PyMuPDF object
 7 | which has a "get_pixmap" method, and title is an optional string.
 8 | 
 9 | The function executes "item.get_pixmap(dpi=150)" and show the resulting
10 | image.
11 | 
12 | 
13 | Dependencies
14 | ------------
15 | numpy, matplotlib, pymupdf
16 | """
17 | 
18 | 
19 | def show_image(item, title=""):
20 |     """Display a pixmap.
21 | 
22 |     Just to display Pixmap image of "item" - ignore the man behind the curtain.
23 | 
24 |     Args:
25 |         item: any PyMuPDF object having a "get_pixmap" method.
26 |         title: a string to be used as image title
27 | 
28 |     Generates an RGB Pixmap from item using a constant DPI and using matplotlib
29 |     to show it inline of the notebook.
30 |     """
31 |     DPI = 150  # use this resolution
32 |     import numpy as np
33 |     import matplotlib.pyplot as plt
34 | 
35 |     # %matplotlib inline
36 |     pix = item.get_pixmap(dpi=DPI)
37 |     img = np.ndarray([pix.h, pix.w, 3], dtype=np.uint8, buffer=pix.samples_mv)
38 |     plt.figure(dpi=DPI)  # set the figure's DPI
39 |     plt.title(title)  # set title of image
40 |     _ = plt.imshow(img, extent=(0, pix.w * 72 / DPI, pix.h * 72 / DPI, 0))
41 | 


--------------------------------------------------------------------------------
/text-documents/test.pdf:
--------------------------------------------------------------------------------
 1 | %PDF-1.7
 2 | %µ¶
 3 | 
 4 | 1 0 obj
 5 | <</Type/Catalog/Pages 2 0 R>>
 6 | endobj
 7 | 
 8 | 2 0 obj
 9 | <</Type/Pages/Count 1/Kids[3 0 R]>>
10 | endobj
11 | 
12 | 3 0 obj
13 | <</Type/Page/MediaBox[0 0 595 842]/Rotate 0/Resources<</Font<</helv 4 0 R>>>>/Parent 2 0 R/Contents 5 0 R>>
14 | endobj
15 | 
16 | 4 0 obj
17 | <</Type/Font/Subtype/Type1/BaseFont/Helvetica/Encoding/WinAnsiEncoding>>
18 | endobj
19 | 
20 | 5 0 obj
21 | <</Length 81>>
22 | stream
23 | q
24 | BT
25 | /helv 11 Tf
26 | 1 0 0 1 100 742 Tm
27 | [(Just some arbitrary content.)] TJ
28 | ET
29 | Q
30 | q
31 | Q
32 | 
33 | endstream
34 | endobj
35 | 
36 | xref
37 | 0 6
38 | 0000000000 65536 f 
39 | 0000000016 00000 n 
40 | 0000000062 00000 n 
41 | 0000000114 00000 n 
42 | 0000000238 00000 n 
43 | 0000000327 00000 n 
44 | 
45 | trailer
46 | <</Size 6/Root 1 0 R/ID[<CC2A084CB9885ADABC51CE10CFC725E8><8F2C15D6C784DF5A97728DB5403FCAB8>]>>
47 | startxref
48 | 457
49 | %%EOF
50 | 


--------------------------------------------------------------------------------
/text-extraction/1page-text.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/1page-text.jpg


--------------------------------------------------------------------------------
/text-extraction/1page.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/1page.pdf


--------------------------------------------------------------------------------
/text-extraction/Dart-text.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/Dart-text.jpg


--------------------------------------------------------------------------------
/text-extraction/Dart.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/Dart.pdf


--------------------------------------------------------------------------------
/text-extraction/PDF2Text.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | Created on Sun Jul 12 07:00:00 2015
 4 | 
 5 | @author: Jorj McKie
 6 | Copyright (c) 2015-2021 Jorj X. McKie
 7 | 
 8 | The license of this program is governed by GNU AGPL 3.0.
 9 | See the "COPYING" file of this repository.
10 | 
11 | This is an example for using the Python binding PyMuPDF of MuPDF.
12 | 
13 | This program extracts the text of any supported input document and writes it
14 | to a text file named input-filename + ".txt".
15 | 
16 | Changes
17 | -------
18 | 2021-06-21: add formfeed after each page of text.
19 | """
20 | 
21 | import fitz
22 | import sys
23 | 
24 | 
25 | def main(*args):
26 |     if not args:
27 |         filename = sys.argv[1]
28 |     else:
29 |         filename = args[0]
30 |     ofile = filename + ".txt"
31 |     doc = fitz.open(filename)
32 |     fout = open(ofile, "wb")
33 | 
34 |     for page in doc:
35 |         fout.write(page.get_text().encode("utf-8") + bytes((12,)))
36 | 
37 |     fout.close()
38 | 
39 | 
40 | if __name__ == "__main__":
41 |     main()


--------------------------------------------------------------------------------
/text-extraction/PDF2TextBlocks.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on Thu Dec 14 17:00:00 2017
 3 | 
 4 | @author: Jorj McKie
 5 | Copyright (c) 2017-2021 Jorj X. McKie
 6 | 
 7 | The license of this program is governed by GNU AGPL 3.0.
 8 | See the "COPYING" file of this repository.
 9 | 
10 | This is an example for using the Python binding PyMuPDF for MuPDF.
11 | 
12 | The program extracts the text of any supported input document and writes it
13 | to a text file.
14 | The input file name is provided as a parameter to this script (sys.argv[1])
15 | The output file name is input-filename + ".txt".
16 | 
17 | In an effort to ensure correct reading sequence, text blocks are sorted in
18 | ascending vertical, then horizontal direction. Sorting happens based on the
19 | coordinates of the blocks' top-left rectangle corner.
20 | This should work for text in horizontal, top-left to bottom-right writing mode.
21 | Please make adjustments to your case as appropriate.
22 | 
23 | Changes
24 | -------
25 | 2021-06-29: simplify block sorting and make script importable.
26 | """
27 | 
28 | import fitz
29 | import sys
30 | 
31 | 
32 | def main(*args):
33 |     if not args:
34 |         filename = sys.argv[1]
35 |     else:
36 |         filename = args[0]
37 |     ofile = filename + ".txt"
38 |     doc = fitz.open(filename)
39 |     fout = open(ofile, "wb")
40 | 
41 |     for page in doc:
42 |         blocks = page.get_text("blocks")
43 |         blocks.sort(key=lambda b: (b[1], b[0]))
44 |         for b in blocks:
45 |             fout.write(b[4].encode("utf-8"))
46 | 
47 |     fout.close()
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     main()
52 | 


--------------------------------------------------------------------------------
/text-extraction/Petresume-text.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/Petresume-text.jpg


--------------------------------------------------------------------------------
/text-extraction/Petresume.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/Petresume.pdf


--------------------------------------------------------------------------------
/text-extraction/demo1-text.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/demo1-text.jpg


--------------------------------------------------------------------------------
/text-extraction/demo1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/demo1.pdf


--------------------------------------------------------------------------------
/text-extraction/extend-dicts.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/extend-dicts.pdf


--------------------------------------------------------------------------------
/text-extraction/extend-dicts.py:
--------------------------------------------------------------------------------
 1 | """
 2 | PyMuPDF demo script
 3 | 
 4 | Show how to extend the standard "dict" and "rawdict" text extraction outputs
 5 | with information from the Page method "get_texttrace()":
 6 | * Sequenz number ("seqno")
 7 | * Type (stroke, fill, hidden)
 8 | * Opacity
 9 | """
10 | 
11 | import fitz
12 | import time
13 | 
14 | doc = fitz.open("extend-dicts.pdf")
15 | page = doc[0]
16 | char_dict = {}
17 | t0 = time.perf_counter()
18 | for span in page.get_texttrace():
19 |     seqno = span["seqno"]
20 |     stype = span["type"]
21 |     opacity = span["opacity"]
22 |     for char in span["chars"]:
23 |         origin = char[2]
24 |         char_dict[origin] = (seqno, stype, opacity)
25 | 
26 | t1 = time.perf_counter()
27 | print(f"Number of characters detected {len(char_dict.keys())}.")
28 | 
29 | text_blocks = page.get_text("dict", flags=fitz.TEXTFLAGS_TEXT)["blocks"]
30 | t2 = time.perf_counter()
31 | for b in text_blocks:
32 |     for l in b["lines"]:
33 |         for s in l["spans"]:
34 |             origin = s["origin"]
35 |             val = char_dict.get(s["origin"])
36 |             if val is None:  # a previous span has all this info
37 |                 s["seqno"] = seqno
38 |                 s["opacity"] = opacity
39 |                 s["type"] = stype
40 |                 continue
41 |             seqno, stype, opacity = val
42 |             s["seqno"] = seqno
43 |             s["opacity"] = opacity
44 |             s["type"] = stype
45 | 
46 | t3 = time.perf_counter()
47 | print("Timings:")
48 | print(f"Make texttrace dictionary: {t1-t0}")
49 | print(f"Extend standard dictionary: {t3-t2}")
50 | 


--------------------------------------------------------------------------------
/text-extraction/garbled-text.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/garbled-text.jpg


--------------------------------------------------------------------------------
/text-extraction/garbled.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/garbled.pdf


--------------------------------------------------------------------------------
/text-extraction/invoice-simple.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/invoice-simple.pdf


--------------------------------------------------------------------------------
/text-extraction/layout-demo1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/layout-demo1.pdf


--------------------------------------------------------------------------------
/text-extraction/lookup-keywords.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utility
 3 | --------
 4 | This demo script show how to extract key-value pairs from a page with a
 5 | "predictable" layout, as it can be found in invoices and other formalized
 6 | documents.
 7 | 
 8 | In such cases, a text extraction based on "words" leads to results that
 9 | are both, simple and fast and avoid using regular expressions.
10 | 
11 | The example analyzes an invoice and extracts the date, invoice number, and
12 | various amounts.
13 | 
14 | Because of the sort, correct values for each keyword will be found if the
15 | value's boundary box bottom is not higher than that of the keyword.
16 | So it could just as well be on the next line. The only condition is, that
17 | no other text exists in between.
18 | 
19 | Please note that the code works unchanged also for other supported document
20 | types, such as XPS or EPUB, etc.
21 | """
22 | 
23 | import fitz
24 | 
25 | doc = fitz.open("invoice-simple.pdf")  # example document
26 | page = doc[0]  # first page
27 | words = page.get_text("words", sort=True)  # extract sorted words
28 | 
29 | for i, word in enumerate(words):
30 |     # information items will be found prefixed with their "key"
31 |     text = word[4]
32 |     if text == "DATE:":  # the following word will be the date!
33 |         date = words[i + 1][4]
34 |         print("Invoice date:", date)
35 |     elif text == "Subtotal":
36 |         subtotal = words[i + 1][4]
37 |         print("Subtotal:", subtotal)
38 |     elif text == "Tax":
39 |         tax = words[i + 1][4]
40 |         print("Tax:", tax)
41 |     elif text == "INVOICE":
42 |         inv_number = words[i + 2][4]  # skip the "#" sign
43 |         print("Invoice number:", inv_number)
44 |     elif text == "BALANCE":
45 |         balance = words[i + 2][4]  # skip the word "DUE"
46 |         print("Balance due:", balance)
47 | 


--------------------------------------------------------------------------------
/text-extraction/shadows.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/shadows.pdf


--------------------------------------------------------------------------------
/text-extraction/textmaker.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/textmaker.pdf


--------------------------------------------------------------------------------
/text-extraction/textmaker2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/textmaker2.pdf


--------------------------------------------------------------------------------
/text-extraction/textmaker2.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Generates a PDF page to demonstrate capabilities and limitations of various
 3 | text extraction methods.
 4 | Reads "textmaker.pdf" created previously, then extracts every single
 5 | character and arbitrarily re-arranges the character list.
 6 | The make a new PDF page and write each character of the shuffled list
 7 | to the same position it had on the original page.
 8 | The resulting PDF looks exactly like the original, but refuses to deliver
 9 | meaningful results for all conventional text extraction methods.
10 | Also if you try to copy-paste with PDF viewers like Adobe Acrobat,
11 | Foxit Reader, PDF XChange, ... the result will be complete garbage.
12 | Evince on Linux is not as bad. I don't know how OSX tools would compare.
13 | 
14 | The only possible solution to recover the text is layout preservation.
15 | """
16 | import fitz
17 | import random
18 | 
19 | font = fitz.Font("cjk")  # use same font for output
20 | doc = fitz.open("textmaker.pdf")
21 | page = doc[0]
22 | w = page.rect.width
23 | h = page.rect.height
24 | chars = []  # save extracted characters here
25 | for b in page.get_text("rawdict")["blocks"]:
26 |     for l in b["lines"]:
27 |         for s in l["spans"]:
28 |             for c in s["chars"]:
29 |                 chars.append(c)
30 | doc.close()
31 | doc = fitz.open()  # make new PDF
32 | page = doc.new_page(width=w, height=h)  # new page with the old dimensions
33 | random.shuffle(chars)  # arbitrarily re-order characters
34 | tw = fitz.TextWriter(page.rect)
35 | # write the re-ordered characters to the page
36 | for c in chars:
37 |     tw.append(c["origin"], c["c"], font=font)
38 | tw.write_text(page)
39 | doc.subset_fonts()
40 | doc.ez_save(__file__.replace(".py", ".pdf"))
41 | 


--------------------------------------------------------------------------------
/textbox-extraction/search.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/textbox-extraction/search.pdf


--------------------------------------------------------------------------------
/textbox-extraction/search.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/textbox-extraction/search.png


--------------------------------------------------------------------------------
/textbox-extraction/textbox-extract-2.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Script showing how to select only text that is contained in a given rectangle
 3 | on a page.
 4 | 
 5 | We use "page.get_textbox", which is available since PyMuPDF v1.18.0.
 6 | The decision on what whill be included is made by character, so while much
 7 | simpler to use than the other script in this folder, it will ignore word
 8 | integrity and cut through any overlaps.
 9 | 
10 | There also is no logic that maintains natural reading order, so text will
11 | appear as stored in the document.
12 | 
13 | """
14 | import fitz
15 | 
16 | doc = fitz.open("search.pdf")  # any supported document type
17 | page = doc[0]  # we want text from this page
18 | 
19 | """
20 | -------------------------------------------------------------------------------
21 | Identify the rectangle.
22 | -------------------------------------------------------------------------------
23 | """
24 | rect = page.first_annot.rect  # this annot has been prepared for us!
25 | # Now we have the rectangle ---------------------------------------------------
26 | 
27 | print(page.get_textbox(rect))
28 | 


--------------------------------------------------------------------------------
/textwriter/cff-test.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/textwriter/cff-test.pdf


--------------------------------------------------------------------------------
/textwriter/demo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/textwriter/demo.pdf


--------------------------------------------------------------------------------
/textwriter/demo.py:
--------------------------------------------------------------------------------
 1 | import fitz, os
 2 | 
 3 | thisdir = lambda f: os.path.join(os.path.dirname(__file__), f)
 4 | thisfile = os.path.abspath(__file__)
 5 | outfile = thisfile.replace(".py", ".pdf")
 6 | 
 7 | font1 = fitz.Font("helv")
 8 | font2 = fitz.Font("tiro")
 9 | doc = fitz.open()
10 | page = doc.new_page()
11 | point = fitz.Point(50, 72)
12 | matrix = fitz.Matrix(-20)
13 | 
14 | wrt1 = fitz.TextWriter(page.rect, color=(0, 0, 1))
15 | wrt2 = fitz.TextWriter(page.rect, color=(1, 0, 0))
16 | 
17 | _, last = wrt1.append(point, "This text changes color,", font1, 11)
18 | _, last = wrt2.append(last, " font and fontsize", font2, 18)
19 | _, last = wrt1.append(last, " several", font1, 11)
20 | _, last = wrt2.append(last, " times!", font2, 24)
21 | 
22 | # output both text writers on current page in arbitrary sequence
23 | wrt1.write_text(page, morph=(point, matrix))  # using the same morph parameter
24 | wrt2.write_text(page, morph=(point, matrix))  # also preserves the joint text.
25 | 
26 | # make a new page
27 | page = doc.new_page()
28 | rect = wrt1.text_rect | wrt2.text_rect  # join rect of blue and red text
29 | # make new rectangle from it, rotated by 90 degrees
30 | nrect = fitz.Rect(
31 |     rect.tl,  # same top-left, but width and height exchanged
32 |     rect.x0 + rect.height,
33 |     rect.y0 + rect.width,
34 | )
35 | 
36 | # use the page method for joint rotated output
37 | page.write_text(rect=nrect, writers=(wrt1, wrt2), rotate=90)
38 | 
39 | # one more time with rotation by 270 degrees
40 | nrect += (
41 |     2 * nrect.width,  # identical copy somewhat shifted to the right
42 |     0,
43 |     2 * nrect.width,
44 |     0,
45 | )
46 | page.write_text(rect=nrect, writers=(wrt1, wrt2), rotate=-90)
47 | 
48 | # more outputs with 45 degrees
49 | page = doc.new_page()
50 | page.write_text(
51 |     rect=page.rect,
52 |     writers=(wrt1, wrt2),
53 |     color=(0.2, 0.6, 1),
54 |     rotate=-45,  # or recoloring
55 | )
56 | page.write_text(
57 |     rect=page.rect,
58 |     writers=(wrt1, wrt2),
59 |     opacity=0.5,  # can be used for watermarking
60 |     rotate=45,
61 | )
62 | doc.save(
63 |     outfile,
64 |     garbage=4,  # makes sense here to combine identical binary data
65 |     deflate=True,
66 | )
67 | 


--------------------------------------------------------------------------------
/textwriter/new-annots-tw-0.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/textwriter/new-annots-tw-0.pdf


--------------------------------------------------------------------------------
/textwriter/test-droid.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/textwriter/test-droid.pdf


--------------------------------------------------------------------------------
/textwriter/test.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/textwriter/test.pdf


--------------------------------------------------------------------------------
/textwriter/textwriter-textbox.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/textwriter/textwriter-textbox.pdf


--------------------------------------------------------------------------------
/word&line-marking/mark-lines.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/word&line-marking/mark-lines.png


--------------------------------------------------------------------------------
/word&line-marking/mark-lines.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A PyMuPDF demo script for highlighting lines of text.
 3 | 
 4 | This requires 3 parameters:
 5 | - start: point where marking should start - upper bound
 6 | - stop: point where marking should stop - lower bound
 7 | - clip: rectangle for further limiting width of lines. This can be used when
 8 |         page text is organized in columns: then we must prevent inclusion of
 9 |         text portions from the wrong columns.
10 | 
11 | The parameters are optional in the following sense:
12 | If 'start' is None, the top-left point of 'clip' is used.
13 | If 'stop' is None, the bottom-right point of 'clip' is used.
14 | If 'clip' is None, the page rectangle is used
15 | 
16 | Our example page has 3 text columns, and we luckily know that our text is
17 | located in the left column. We also know unique text strings which help us
18 | find the start and stop points.
19 | """
20 | import fitz
21 | 
22 | doc = fitz.open("search.pdf")  # the document
23 | page = doc[0]  # the page
24 | 
25 | # determine start point
26 | rl = page.search_for("im vorfeld solch ")  # use a unique string on the page
27 | # we might want to check that len(rl) == 1 here
28 | start = rl[0].tl  # top-left point
29 | 
30 | # determine stop point
31 | rl = page.search_for("stark aus.")  # use a unique string
32 | # again, possibly check len(rl) == 1
33 | stop = rl[0].br  # bottom-right point
34 | 
35 | # we need a clip rectangle, because the page has 3 text columns!
36 | clip = page.rect  # start with page rectangle
37 | width = clip.width  # take the width and limit it
38 | clip.x1 = width * 0.35  # to about one third to get the left column
39 | 
40 | page.add_highlight_annot(start=start, stop=stop, clip=clip)
41 | # ------------------------------------------------------------
42 | # underlining and strike-through work in the same way:
43 | # ------------------------------------------------------------
44 | # page.add_underline_annot(start=start, stop=stop, clip=clip)
45 | # page.add_strikeout_annot(start=start, stop=stop, clip=clip)
46 | # page.add_squiggly_annot(start=start, stop=stop, clip=clip)
47 | 
48 | doc.save(__file__.replace(".py", ".pdf"))
49 | 


--------------------------------------------------------------------------------
/word&line-marking/mark-lines2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/word&line-marking/mark-lines2.jpg


--------------------------------------------------------------------------------
/word&line-marking/mark-lines2.py:
--------------------------------------------------------------------------------
 1 | import fitz
 2 | 
 3 | """
 4 | This marks a longer, unique sentence on the page.
 5 | The parameters 'start', 'stop' and 'clip' are fully computed from the
 6 | returned hit rectangles.
 7 | """
 8 | doc = fitz.open("search.pdf")
 9 | page = doc[0]
10 | 
11 | # Search for this text. It is show with hyphens on the page, which we can
12 | # simply delete for our search. Line breaks can be handled like spaces.
13 | text1 = (
14 |     "Erklären ließe sich die Veränderung, wenn Beteigeuze einen",
15 |     "Materieauswurf ins All geschleudert hat, der einen Teil",
16 |     "der Strahlung abfängt, meinen die Forscher der",
17 |     "Europäischen Südsternwarte ESO.",
18 | )
19 | 
20 | rl = page.search_for(
21 |     " ".join(text1),  # reconstruct full sentence for searching
22 | )
23 | 
24 | # You should check success here!
25 | start = rl[0].tl  # top-left of first rectangle
26 | stop = rl[-1].br  # bottom-right of last rectangle
27 | clip = fitz.Rect()  # build clip as union of the hit rectangles
28 | for r in rl:
29 |     clip |= r
30 | 
31 | page.add_highlight_annot(
32 |     start=start,
33 |     stop=stop,
34 |     clip=clip,
35 | )
36 | 
37 | doc.save(__file__.replace(".py", ".pdf"), garbage=3, deflate=True)
38 | 


--------------------------------------------------------------------------------
/word&line-marking/mark-words.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/word&line-marking/mark-words.pdf


--------------------------------------------------------------------------------
/word&line-marking/search.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/word&line-marking/search.pdf


--------------------------------------------------------------------------------