├── LICENSE ├── OCR ├── PDF_XChange-OCRed.pdf ├── README.md ├── easyocr1.py ├── images-to-ocr-pdf.py ├── ocr-ed.pdf ├── ocr-ed.txt ├── ocrpages.py ├── scanned.pdf ├── tesseract1.py ├── tesseract2.py └── v110-changes.pdf ├── README.md ├── advanced-toc ├── README.md ├── colored-toc.pdf ├── colored-toc.png ├── colorize.py ├── example.pdf └── replaced-toc.pdf ├── alias-changer.py ├── animations ├── README.md ├── morph-demo1.jpg ├── morph-demo1.py ├── morph-demo2.py ├── morph-demo3.py ├── quad-show1.py ├── quad-show2.jpg └── quad-show2.py ├── annotations ├── freetext-annot-lang.pdf ├── freetext-annot-lang.py ├── new-annots-0.pdf ├── new-annots.py ├── opacity.pdf ├── opacity.py ├── show-no-annots.py ├── with-annots.png └── without-annots.png ├── cloud-interactions ├── README.md ├── from-aws-s3.py ├── from-google.py ├── from-ms-azure.py ├── to-aws-s3.py └── to-ms-azure.py ├── conversion ├── README.md ├── images-to-ocr-pdf.py ├── make-cbz.py ├── make-imagepdf.py └── make-page-images.py ├── examples ├── .gitignore ├── DeDRM-ebook.py ├── README.md ├── anonymize-document │ ├── anonymize.py │ ├── input.pdf │ └── output.pdf ├── attach-images │ ├── attach.py │ ├── input │ │ ├── erik-jan-leusink-s2mkB4WOl9k-unsplash.jpg │ │ └── joe-caione-qO-PIF84Vxg-unsplash.jpg │ └── output.pdf ├── browse-document │ ├── browse.py │ └── input.pdf ├── combine-pages │ ├── combine.py │ ├── input.pdf │ └── output.pdf ├── convert-document │ ├── convert.py │ ├── input.epub │ └── output.pdf ├── convert-image │ ├── convert.py │ ├── input.jpg │ └── output.png ├── convert-pixmap │ ├── convert.py │ ├── input.png │ └── output.jpg ├── convert-text │ ├── convert.py │ ├── input.txt │ └── output.pdf ├── copy-embedded │ ├── copy.py │ ├── input.pdf │ └── output.pdf ├── decrypt-document │ ├── decrypt.py │ ├── input.pdf │ └── output.pdf ├── display-document │ ├── display.py │ └── input.pdf ├── draw-cardioid │ ├── draw.py │ └── output.pdf ├── draw-caustic │ ├── draw.py │ ├── output.pdf │ ├── output.png │ ├── output.svg │ └── output.svgz ├── draw-fractal │ ├── carpet.py │ ├── output_carpet.png │ ├── output_punch.png │ ├── output_triangle.pdf │ ├── punch.py │ └── triangle.py ├── draw-polygon │ ├── draw.py │ ├── output.pdf │ └── output.svg ├── draw-rgb-area │ ├── draw.py │ ├── output_PIL.png │ └── output_fitz.png ├── draw-sines │ ├── draw.py │ └── output.pdf ├── edit-images │ ├── README.md │ ├── edit.py │ ├── figure-01.jpg │ └── input.pdf ├── edit-links │ ├── edit.py │ └── input.pdf ├── edit-toc │ ├── edit.py │ └── input.pdf ├── embed-images │ ├── embed.py │ ├── input │ │ ├── erik-jan-leusink-s2mkB4WOl9k-unsplash.jpg │ │ └── joe-caione-qO-PIF84Vxg-unsplash.jpg │ └── output.pdf ├── export-embedded │ ├── export.py │ ├── input.pdf │ └── output.pdf ├── export-metadata │ ├── export.py │ ├── input.pdf │ └── output.csv ├── export-toc │ ├── export.py │ ├── input.pdf │ └── output.csv ├── extract-images │ ├── extract-from-pages.py │ ├── extract-from-xref.py │ ├── input.pdf │ └── output │ │ ├── .gitkeep │ │ ├── img00005.png │ │ └── img00011.png ├── extract-table │ ├── ParseTab.py │ ├── README.md │ ├── extract.py │ ├── input.pdf │ └── wx-extract.py ├── extract-vector-graphics │ └── detect_graphics.py ├── extract-xobj │ ├── extract.py │ ├── input.pdf │ └── output.pdf ├── filmfestival-2tables │ ├── README.md │ ├── filmfestival.db │ ├── filmfestival.py │ └── output.pdf ├── icons │ ├── PyMuPDF.ico │ ├── __init__.py │ ├── ico_pdf.py │ ├── pdf.py │ └── pymupdf.png ├── import-embedded │ ├── import.py │ ├── input.pdf │ ├── joe-caione-qO-PIF84Vxg-unsplash.jpg │ └── output.pdf ├── import-metadata │ ├── import.py │ ├── input.csv │ └── input.pdf ├── import-toc │ ├── import.py │ ├── input.csv │ └── input.pdf ├── insert-images │ ├── input │ │ ├── erik-jan-leusink-s2mkB4WOl9k-unsplash.jpg │ │ └── joe-caione-qO-PIF84Vxg-unsplash.jpg │ ├── insert.py │ └── output.pdf ├── insert-logo │ ├── file.py │ ├── input.pdf │ ├── logo.png │ ├── logo.svg │ ├── output_file.pdf │ ├── output_svg.pdf │ └── svg.py ├── join-documents │ ├── input │ │ ├── made-with-cc.pdf │ │ └── thinkpython2.pdf │ ├── join.py │ └── output.pdf ├── list-embedded │ ├── input.pdf │ └── list.py ├── make-calendar │ ├── make.py │ └── output.pdf ├── optimize-document │ ├── input.pdf │ └── optimize.py ├── posterize-document │ ├── input.pdf │ ├── output.pdf │ └── posterize.py ├── print-hsv │ ├── output.pdf │ └── print.py ├── print-page-format │ └── print.py ├── print-rgb │ ├── output.pdf │ └── print.py ├── replace-image │ ├── README.md │ ├── input.jpg │ ├── input.pdf │ ├── output_remove.pdf │ ├── output_replace.pdf │ ├── remove.py │ └── replace.py ├── split-document │ ├── input.pdf │ ├── output │ │ ├── .gitkeep │ │ ├── input-0.pdf │ │ ├── input-1.pdf │ │ ├── input-10.pdf │ │ ├── input-100.pdf │ │ ├── input-101.pdf │ │ ├── input-102.pdf │ │ ├── input-103.pdf │ │ ├── input-104.pdf │ │ ├── input-105.pdf │ │ ├── input-106.pdf │ │ ├── input-107.pdf │ │ ├── input-108.pdf │ │ ├── input-109.pdf │ │ ├── input-11.pdf │ │ ├── input-110.pdf │ │ ├── input-111.pdf │ │ ├── input-112.pdf │ │ ├── input-113.pdf │ │ ├── input-114.pdf │ │ ├── input-115.pdf │ │ ├── input-116.pdf │ │ ├── input-117.pdf │ │ ├── input-118.pdf │ │ ├── input-119.pdf │ │ ├── input-12.pdf │ │ ├── input-120.pdf │ │ ├── input-121.pdf │ │ ├── input-122.pdf │ │ ├── input-123.pdf │ │ ├── input-124.pdf │ │ ├── input-125.pdf │ │ ├── input-126.pdf │ │ ├── input-127.pdf │ │ ├── input-128.pdf │ │ ├── input-129.pdf │ │ ├── input-13.pdf │ │ ├── input-130.pdf │ │ ├── input-131.pdf │ │ ├── input-132.pdf │ │ ├── input-133.pdf │ │ ├── input-134.pdf │ │ ├── input-135.pdf │ │ ├── input-136.pdf │ │ ├── input-137.pdf │ │ ├── input-138.pdf │ │ ├── input-139.pdf │ │ ├── input-14.pdf │ │ ├── input-140.pdf │ │ ├── input-141.pdf │ │ ├── input-142.pdf │ │ ├── input-143.pdf │ │ ├── input-144.pdf │ │ ├── input-145.pdf │ │ ├── input-146.pdf │ │ ├── input-147.pdf │ │ ├── input-148.pdf │ │ ├── input-149.pdf │ │ ├── input-15.pdf │ │ ├── input-150.pdf │ │ ├── input-151.pdf │ │ ├── input-152.pdf │ │ ├── input-153.pdf │ │ ├── input-154.pdf │ │ ├── input-155.pdf │ │ ├── input-156.pdf │ │ ├── input-157.pdf │ │ ├── input-158.pdf │ │ ├── input-159.pdf │ │ ├── input-16.pdf │ │ ├── input-160.pdf │ │ ├── input-161.pdf │ │ ├── input-162.pdf │ │ ├── input-163.pdf │ │ ├── input-164.pdf │ │ ├── input-165.pdf │ │ ├── input-166.pdf │ │ ├── input-167.pdf │ │ ├── input-168.pdf │ │ ├── input-169.pdf │ │ ├── input-17.pdf │ │ ├── input-170.pdf │ │ ├── input-171.pdf │ │ ├── input-172.pdf │ │ ├── input-173.pdf │ │ ├── input-174.pdf │ │ ├── input-175.pdf │ │ ├── input-18.pdf │ │ ├── input-19.pdf │ │ ├── input-2.pdf │ │ ├── input-20.pdf │ │ ├── input-21.pdf │ │ ├── input-22.pdf │ │ ├── input-23.pdf │ │ ├── input-24.pdf │ │ ├── input-25.pdf │ │ ├── input-26.pdf │ │ ├── input-27.pdf │ │ ├── input-28.pdf │ │ ├── input-29.pdf │ │ ├── input-3.pdf │ │ ├── input-30.pdf │ │ ├── input-31.pdf │ │ ├── input-32.pdf │ │ ├── input-33.pdf │ │ ├── input-34.pdf │ │ ├── input-35.pdf │ │ ├── input-36.pdf │ │ ├── input-37.pdf │ │ ├── input-38.pdf │ │ ├── input-39.pdf │ │ ├── input-4.pdf │ │ ├── input-40.pdf │ │ ├── input-41.pdf │ │ ├── input-42.pdf │ │ ├── input-43.pdf │ │ ├── input-44.pdf │ │ ├── input-45.pdf │ │ ├── input-46.pdf │ │ ├── input-47.pdf │ │ ├── input-48.pdf │ │ ├── input-49.pdf │ │ ├── input-5.pdf │ │ ├── input-50.pdf │ │ ├── input-51.pdf │ │ ├── input-52.pdf │ │ ├── input-53.pdf │ │ ├── input-54.pdf │ │ ├── input-55.pdf │ │ ├── input-56.pdf │ │ ├── input-57.pdf │ │ ├── input-58.pdf │ │ ├── input-59.pdf │ │ ├── input-6.pdf │ │ ├── input-60.pdf │ │ ├── input-61.pdf │ │ ├── input-62.pdf │ │ ├── input-63.pdf │ │ ├── input-64.pdf │ │ ├── input-65.pdf │ │ ├── input-66.pdf │ │ ├── input-67.pdf │ │ ├── input-68.pdf │ │ ├── input-69.pdf │ │ ├── input-7.pdf │ │ ├── input-70.pdf │ │ ├── input-71.pdf │ │ ├── input-72.pdf │ │ ├── input-73.pdf │ │ ├── input-74.pdf │ │ ├── input-75.pdf │ │ ├── input-76.pdf │ │ ├── input-77.pdf │ │ ├── input-78.pdf │ │ ├── input-79.pdf │ │ ├── input-8.pdf │ │ ├── input-80.pdf │ │ ├── input-81.pdf │ │ ├── input-82.pdf │ │ ├── input-83.pdf │ │ ├── input-84.pdf │ │ ├── input-85.pdf │ │ ├── input-86.pdf │ │ ├── input-87.pdf │ │ ├── input-88.pdf │ │ ├── input-89.pdf │ │ ├── input-9.pdf │ │ ├── input-90.pdf │ │ ├── input-91.pdf │ │ ├── input-92.pdf │ │ ├── input-93.pdf │ │ ├── input-94.pdf │ │ ├── input-95.pdf │ │ ├── input-96.pdf │ │ ├── input-97.pdf │ │ ├── input-98.pdf │ │ └── input-99.pdf │ └── split.py ├── test-blendmode │ ├── output.pdf │ └── test.py ├── tile-image │ ├── input.jpg │ ├── output │ │ ├── .gitkeep │ │ ├── target-00.png │ │ ├── target-01.png │ │ ├── target-02.png │ │ ├── target-10.png │ │ ├── target-11.png │ │ ├── target-12.png │ │ ├── target-20.png │ │ ├── target-21.png │ │ ├── target-22.png │ │ ├── target-30.png │ │ ├── target-31.png │ │ └── target-32.png │ └── tile.py ├── view-document │ ├── input.pdf │ └── view.py └── zerofy-rotation │ ├── derotate.py │ ├── input.pdf │ └── zerofy-rotation.py ├── fields ├── date-field.py ├── form-fields.py ├── interfield-calculation.py ├── list-fields.py ├── switch-text-on-off.py ├── widgettest-alt.pdf └── widgettest.py ├── font-replacement ├── multi-language.jpg ├── page-17-after.png ├── page-17-before.png ├── readme.md ├── repl-font.py ├── repl-fontnames.py └── run-log.txt ├── jupyter-notebooks ├── 1page-snap.log ├── 1page-snap.pdf ├── 1page.pdf ├── README-OCR.md ├── README.md ├── blacked.pdf ├── dehyphenate-flag.ipynb ├── detect-hidden.ipynb ├── input.pdf ├── input.pdf-status.log ├── journalling1.ipynb ├── journalling2.ipynb ├── new_circle_annot.ipynb ├── object-algebra.ipynb ├── ocr-illegible.ipynb ├── optional-content.ipynb ├── page-rectangles.ipynb ├── partial-ocr.ipynb ├── partial-ocr.pdf ├── show_image.py └── testpage-performance.ipynb ├── optional-content ├── readme.md ├── source-ocmd.pdf ├── source-ocmd.py ├── source-radio.pdf ├── source-radio.py └── source.pdf ├── pdf-names-resolution ├── README.md ├── find_names.py └── list_names.py ├── reporting ├── README.md ├── documentation-draft.md ├── examples │ ├── filmfestival-2tables │ │ ├── README.md │ │ ├── Reports.py │ │ ├── filmfestival.db │ │ ├── filmfestival.py │ │ └── output.pdf │ ├── invoice │ │ ├── README.md │ │ ├── Reports.py │ │ ├── header.html │ │ ├── invoice-parms.db │ │ ├── invoicer.py │ │ ├── items.html │ │ ├── logo.png │ │ ├── output.pdf │ │ └── prolog.html │ ├── multi-format │ │ ├── README.md │ │ ├── Reports.py │ │ ├── national-capitals.csv │ │ ├── national-capitals.py │ │ └── output.pdf │ ├── row-with-images │ │ ├── README.md │ │ ├── Reports.py │ │ ├── flags.zip │ │ ├── items.csv │ │ ├── output.pdf │ │ └── rows-with-images.py │ ├── simple-article │ │ ├── README.md │ │ ├── Reports.py │ │ ├── output.pdf │ │ ├── simple-article.py │ │ ├── springer.html │ │ └── springer.jpg │ └── user-fonts │ │ ├── DejaVuSansCondensed-Bold.ttf │ │ ├── DejaVuSansCondensed.ttf │ │ ├── README.md │ │ ├── Reports.py │ │ ├── dejavu.py │ │ ├── filmfestival.db │ │ ├── kenpixel.py │ │ ├── kenpixel.ttf │ │ ├── output-dejavu.pdf │ │ └── output-kenpixel.pdf ├── pymupdf-reporting.pdf └── pymupdf-reporting.pptx ├── shapes ├── piechart1.pdf ├── piechart1.png ├── piechart1.py ├── piechart2.pdf ├── piechart2.py ├── shapes_and_symbols.py ├── symbol-list.pdf └── symbol-list.py ├── table-analysis ├── README.md ├── XPS-table.pdf ├── XPS-table.xlsx ├── XPS-table.xps ├── chinese-table.pdf ├── clean_graphics.py ├── compare-xps-pdf.ipynb ├── find_tables.ipynb ├── gridlines-to-pandas.py ├── input1-bbox.json ├── input1.pdf ├── input2.pdf ├── join_tables.ipynb ├── national-capitals.pdf ├── show_image.py └── span-analysis-to-pandas.py ├── text-documents ├── README.md ├── any-file.ipynb ├── basic.ipynb ├── multi-language.ipynb └── test.pdf ├── text-extraction ├── 1page-text.jpg ├── 1page.pdf ├── Dart-text.jpg ├── Dart.pdf ├── PDF2Text.py ├── PDF2TextBlocks.py ├── Petresume-text.jpg ├── Petresume.pdf ├── README.md ├── demo1-text.jpg ├── demo1.pdf ├── extend-dicts.pdf ├── extend-dicts.py ├── fitzcli.py ├── garbled-text.jpg ├── garbled.pdf ├── invoice-simple.pdf ├── layout-analyzer.py ├── layout-demo1.pdf ├── lookup-keywords.py ├── multi_column.py ├── shadows.pdf ├── textmaker.pdf ├── textmaker.py ├── textmaker2.pdf └── textmaker2.py ├── textbox-extraction ├── readme.md ├── search.pdf ├── search.png ├── textbox-extract-1.py └── textbox-extract-2.py ├── textwriter ├── cff-test.pdf ├── demo.pdf ├── demo.py ├── new-annots-tw-0.pdf ├── new-annots-tw.py ├── test-droid.pdf ├── test.pdf ├── textwriter-textbox.pdf └── textwriter-textbox.py └── word&line-marking ├── mark-lines.png ├── mark-lines.py ├── mark-lines2.jpg ├── mark-lines2.py ├── mark-words.pdf ├── mark-words.py ├── readme.md └── search.pdf /OCR/PDF_XChange-OCRed.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/OCR/PDF_XChange-OCRed.pdf -------------------------------------------------------------------------------- /OCR/images-to-ocr-pdf.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility to OCR a list of images and output them as one PDF 3 | 4 | License: GNU AGPL 3.0 5 | Author: (c) Harald Lieder, harald.lieder@outlook.com 6 | Date: 2021-10-26 7 | """ 8 | import os 9 | import sys 10 | 11 | import fitz 12 | 13 | if tuple(map(int, fitz.VersionBind.split("."))) < (1, 19, 0): 14 | raise ValueError("Need at least PyMuPDF v1.19.0") 15 | 16 | doc = fitz.open() # output PDF 17 | img_folder = sys.argv[1] # example: image folder name provided 18 | dirname = os.path.dirname(img_folder) 19 | img_list = os.listdir(img_folder) # some list of image filenames 20 | for img in img_list: 21 | imgfile = os.path.join(dirname, img) 22 | pix = fitz.Pixmap(imgfile) # make a pixmap form the image file 23 | pdfbytes = pix.pdfocr_tobytes(language="eng") # 1-page PDF with the OCRed image 24 | imgpdf = fitz.open("pdf", pdfbytes) # open it as a PDF 25 | doc.insert_pdf(imgpdf) # append the image page to output 26 | 27 | doc.ez_save("ocr-pdf.pdf") # save output 28 | -------------------------------------------------------------------------------- /OCR/ocr-ed.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/OCR/ocr-ed.pdf -------------------------------------------------------------------------------- /OCR/ocr-ed.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | PyMuPDF— the Python 6 | bindings for MuPDF 7 | 8 | PyMuPDF Documentation 9 | Release 1.18.19 10 | 11 | Jorj X. McKie 12 | 13 | Sep 17, 2021 14 | -------------------------------------------------------------------------------- /OCR/ocrpages.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a basic script demonstrating the use of OCRmyPDF together with PyMuPDF. 3 | 4 | It reads a PDF's pages and passes them to ocrmypdf one by one. One could at this 5 | point insert some checks as to whether the page is actually an, contains no text, 6 | or text with many unrecognized characters or the like. 7 | 8 | Each page is then converted to a 1-page temporary PDF which is 9 | - passed to ocrmypdf for OCR-ing it 10 | - the 1-page output PDF of the pervious step is then text-extracted 11 | - return the extracted text 12 | 13 | Instead of extracting simple naive text format, one could also use all other 14 | text extraction formats like "dict" to get text position information. 15 | 16 | Requires 17 | --------- 18 | ocrmypdf 19 | """ 20 | import fitz 21 | import ocrmypdf 22 | import sys 23 | import io 24 | 25 | 26 | def ocr_the_page(page): 27 | """Extract the text from passed-in PDF page.""" 28 | src = page.parent # the page's document 29 | doc = fitz.open() # make temporary 1-pager 30 | doc.insert_pdf(src, from_page=page.number, to_page=page.number) 31 | pdfbytes = doc.tobytes() 32 | inbytes = io.BytesIO(pdfbytes) # transform to BytesIO object 33 | outbytes = io.BytesIO() # let ocrmypdf store its result pdf here 34 | ocrmypdf.ocr( 35 | inbytes, # input 1-pager 36 | outbytes, # ouput 1-pager 37 | language="eng", # modify as required e.g. ("eng", "ger") 38 | output_type="pdf", # only need simple PDF format 39 | # add more paramneters, e.g. to enforce OCR-ing, etc., e.g. 40 | # force_ocr=True, redo_ocr=True 41 | ) 42 | ocr_pdf = fitz.open("pdf", outbytes.getvalue()) # read output as fitz PDF 43 | text = ocr_pdf[0].get_text() # ...and extract text from the page 44 | return text # return it 45 | 46 | 47 | if __name__ == "__main__": 48 | doc = fitz.open(sys.argv[1]) 49 | for page in doc: 50 | text = ocr_the_page(page) 51 | print("Text from page %i:" % page.number) 52 | print(text) 53 | -------------------------------------------------------------------------------- /OCR/scanned.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/OCR/scanned.pdf -------------------------------------------------------------------------------- /OCR/v110-changes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/OCR/v110-changes.pdf -------------------------------------------------------------------------------- /advanced-toc/colored-toc.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/advanced-toc/colored-toc.pdf -------------------------------------------------------------------------------- /advanced-toc/colored-toc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/advanced-toc/colored-toc.png -------------------------------------------------------------------------------- /advanced-toc/colorize.py: -------------------------------------------------------------------------------- 1 | import fitz 2 | 3 | doc = fitz.open("example.pdf") 4 | toc = doc.get_toc(False) 5 | for i, item in enumerate(toc): 6 | lvl, title, pno, ddict = item 7 | ddict["collapse"] = False 8 | if lvl == 1: 9 | ddict["color"] = (1, 0, 0) 10 | ddict["bold"] = True 11 | ddict["italic"] = False 12 | elif lvl == 2: 13 | ddict["color"] = (0, 0, 1) 14 | ddict["bold"] = False 15 | ddict["italic"] = True 16 | else: 17 | ddict["color"] = (0, 1, 0) 18 | ddict["bold"] = ddict["italic"] = False 19 | doc.set_toc_item(i, dest_dict=ddict) 20 | doc.save("new-toc.pdf") 21 | -------------------------------------------------------------------------------- /advanced-toc/example.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/advanced-toc/example.pdf -------------------------------------------------------------------------------- /advanced-toc/replaced-toc.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/advanced-toc/replaced-toc.pdf -------------------------------------------------------------------------------- /animations/README.md: -------------------------------------------------------------------------------- 1 | This folder contains a few scripts which may best be characterized as "fun" or "entertainment" ... using PyMuPDF of course. 2 | 3 | They all work following the same basic approach: 4 | 5 | 1. Draw or write something on an empty page of a new PDF 6 | 2. Convert the page to an image 7 | 3. Show this image in a GUI (using PySimpleGUI) 8 | 4. Destroy image, page and PDF document 9 | 5. Modify some parameters 10 | 6. Start over with step 1 above in an endless loop. 11 | 12 | Because of the excellent performance of PyMuPDF (😉), this process is fast enough to be shown like a little video clip - mostly achieving more than 100 frames per second. 13 | 14 | Scripts `morph-demo1.py`, `morph-demo2.py` and `morph-demo3.py` show the effect of morphing a text box given some fixpoint. 15 | 16 | Scripts `quad-show1.py` and `quad-show2.py` simply draw quadrilaterals to demonstrate what happens when their corners are modified following certain patterns. 17 | -------------------------------------------------------------------------------- /animations/morph-demo1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/animations/morph-demo1.jpg -------------------------------------------------------------------------------- /animations/quad-show2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/animations/quad-show2.jpg -------------------------------------------------------------------------------- /annotations/freetext-annot-lang.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/annotations/freetext-annot-lang.pdf -------------------------------------------------------------------------------- /annotations/freetext-annot-lang.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division, print_function 3 | 4 | import os 5 | import sys 6 | 7 | import fitz 8 | 9 | print(fitz.__doc__) 10 | if fitz.VersionBind.split(".") < ["1", "17", "0"]: 11 | sys.exit("Need PyMuPDF v1.17.0 or later.") 12 | 13 | outfile = os.path.abspath(__file__).replace(".py", ".pdf") 14 | 15 | 16 | doc = fitz.open() # new PDF 17 | page = doc.new_page() # new page 18 | 19 | text = r"""This is a text of mixed languages to generate FreeText annotations with automatic font selection - a feature new in MuPDF v1.17. 20 | Euro: €, general Latin and other signs: | ~ ° ² ³ ñ ä ö ü ß â ¿ ¡ µ ¶ œ ¼ ½ ¾ ‰ 21 | Japan: 熊野三山本願所は、 15世紀末以降における熊野三山 (熊野本宮、 熊野新宮 22 | Greece: Στα ερείπια της πόλης, που ήταν ένα σημαντικό 23 | Korea: 에듀롬은 하나의 계정으로 전 세계 고등교육 기관의 인터넷에 접속할 24 | Russia: Ко времени восшествия на престол Якова I в значительной 25 | China: 北京作为城市的历史 可以追溯到 3,000 年前。西周初年, 周武王封召公奭于燕國。 26 | Devanagari (not supported): नि:शुल्क ज्ञानको लागी लाई धन्यबाद""".splitlines() 27 | 28 | blue = (0, 0, 1) 29 | red = (1, 0, 0) 30 | gold = (1, 1, 0) 31 | green = (0, 1, 0) 32 | 33 | # make the rectangles for filling in above text lines 34 | tl = page.rect.tl + (72, 144) # some distance from the page's corners 35 | br = page.rect.br - (72, 144) 36 | rect = fitz.Rect(tl, br) # put all annots inside this rectangle 37 | cells = fitz.make_table(rect, cols=1, rows=len(text)) 38 | shrink = (0, 5, 0, 0) # makes distance between annots 39 | for i in range(len(text)): 40 | annot = page.add_freetext_annot( 41 | cells[i][0] + shrink, 42 | text[i], 43 | fontsize=16, 44 | fontname="tiro", # used for non-CJK characters only! 45 | align=fitz.TEXT_ALIGN_CENTER, 46 | text_color=blue, 47 | ) 48 | annot.set_border(width=1.0) 49 | annot.update(fill_color=gold, border_color=green) 50 | 51 | doc.save(outfile, garbage=3, deflate=True) 52 | -------------------------------------------------------------------------------- /annotations/new-annots-0.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/annotations/new-annots-0.pdf -------------------------------------------------------------------------------- /annotations/opacity.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | import fitz 5 | 6 | print(fitz.__doc__) 7 | doc = fitz.open() 8 | page = doc.new_page() 9 | 10 | annot1 = page.add_circle_annot((50, 50, 100, 100)) 11 | annot1.set_colors(fill=(1, 0, 0), stroke=(1, 0, 0)) 12 | annot1.set_opacity(2 / 3) 13 | annot1.update(blend_mode="Multiply") 14 | 15 | annot2 = page.add_circle_annot((75, 75, 125, 125)) 16 | annot2.set_colors(fill=(0, 0, 1), stroke=(0, 0, 1)) 17 | annot2.set_opacity(1 / 3) 18 | annot2.update(blend_mode="Multiply") 19 | outfile = os.path.abspath(__file__).replace(".py", ".pdf") 20 | doc.save(outfile, expand=True, pretty=True) 21 | print("saved", outfile) 22 | -------------------------------------------------------------------------------- /annotations/show-no-annots.py: -------------------------------------------------------------------------------- 1 | import os 2 | import fitz 3 | 4 | """ 5 | Render a page with and without anootations. 6 | 7 | Please note that starting with v1.16.0, pixmaps without annotations 8 | can be created directly. 9 | """ 10 | print(fitz.__doc__) 11 | thisdir = os.path.dirname(__file__) 12 | infile = os.path.join(thisdir, "new-annots-0.pdf") 13 | src = fitz.open(infile) # a document with annotations 14 | p1 = src[0] 15 | pix1 = p1.get_pixmap(annots=True) 16 | pix1.save(os.path.join(thisdir, "with-annots.png")) # save page pixmap 17 | pix2 = p1.get_pixmap(annots=False) 18 | pix2.save(os.path.join(thisdir, "without-annots.png")) 19 | -------------------------------------------------------------------------------- /annotations/with-annots.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/annotations/with-annots.png -------------------------------------------------------------------------------- /annotations/without-annots.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/annotations/without-annots.png -------------------------------------------------------------------------------- /cloud-interactions/README.md: -------------------------------------------------------------------------------- 1 | This is a set of code snippets showing how to download or upload to cloud services offered by major providers. 2 | 3 | The focus of the scripts is to demonstrate, how using intermediate disk storage can be avoided by using PyMuPDF Document features. 4 | 5 | We are currently considering to extend `Document` creation such that cloud access is covered too. Because of the diversity of ways how this works by cloud service provider, this is somewhat tedious. So please bear with us until we are clear what we need to do. 6 | -------------------------------------------------------------------------------- /cloud-interactions/from-aws-s3.py: -------------------------------------------------------------------------------- 1 | import fitz 2 | import boto3 3 | 4 | s3 = boto3.client("s3") 5 | 6 | # fill in your credentials to access the cloud 7 | response = s3.get_object(Bucket="string", Key="string") 8 | mime = response["ContentType"] 9 | body = response["Body"] 10 | 11 | # define Document with these data 12 | doc = fitz.open(mime, body.read()) 13 | -------------------------------------------------------------------------------- /cloud-interactions/from-google.py: -------------------------------------------------------------------------------- 1 | import os 2 | import fitz # pymupdf 3 | import gcsfs # google cloud storage file system 4 | 5 | # Access the google filesystem. 6 | # You will need to supply credentials - which is omitted here 7 | fs = gcsfs.GCSFileSystem(project="my-google-project") 8 | 9 | filename = fs.ls("my-bucket")[0] # first filename in bucket 10 | ext = os.path.splitext(filename)[1] # determine file extension 11 | f = fs.open(filename, "rb") # open with that filesystem 12 | 13 | # now open with PyMuPDF using the bytes object of "f" 14 | doc = fitz.open(ext[1:], f.read()) 15 | -------------------------------------------------------------------------------- /cloud-interactions/from-ms-azure.py: -------------------------------------------------------------------------------- 1 | import os 2 | import fitz # pymupdf 3 | from azure.storage.blob import BlobClient 4 | 5 | blob = BlobClient.from_connection_string( 6 | conn_str="my_connection_string", 7 | container_name="my_container", 8 | blob_name="my_blob", 9 | ) 10 | 11 | with open("some-file.pdf", "wb") as my_blob: 12 | blob_data = blob.download_blob() 13 | blob_data.readinto(my_blob) 14 | 15 | # now open with PyMuPDF using the bytes object of "f" 16 | doc = fitz.open("pdf", my_blob.read()) 17 | -------------------------------------------------------------------------------- /cloud-interactions/to-aws-s3.py: -------------------------------------------------------------------------------- 1 | import fitz 2 | import boto3 3 | 4 | # process some PDF document 5 | doc = fitz.open("...") 6 | # then write / upload it directly to AWS S3 7 | # Instead of save, we use the tobytes(), which generates a bytes object 8 | pdfbytes = doc.tobytes( # optional 'save' parameters: 9 | garbage=3, 10 | deflate=True, 11 | owner_pw="owner-password", 12 | user_pw="user-pasword", 13 | ) 14 | 15 | s3 = boto3.client("s3") 16 | request_route = "string" 17 | request_token = "string" 18 | s3.write_get_object_response( 19 | Body=pdfbytes, 20 | RequestRoute=request_route, 21 | RequestToken=request_token, 22 | ) 23 | -------------------------------------------------------------------------------- /cloud-interactions/to-ms-azure.py: -------------------------------------------------------------------------------- 1 | import fitz # pymupdf 2 | from azure.storage.blob import BlobClient 3 | 4 | # some PDF document 5 | doc = fitz.open("...") 6 | 7 | # access Azure blob client 8 | blob = BlobClient.from_connection_string( 9 | conn_str="my_connection_string", 10 | container_name="my_container", 11 | blob_name="my_blob", 12 | ) 13 | 14 | # upload document 15 | blob.upload_blob( 16 | doc.tobytes( 17 | garbage=3, 18 | deflate=True, 19 | # more parameters 20 | ) 21 | ) 22 | -------------------------------------------------------------------------------- /conversion/README.md: -------------------------------------------------------------------------------- 1 | This folder contains scripts for document conversions. 2 | 3 | Over time, more examples will be added. Currently there are: 4 | 5 | * `make-cbz.py` - convert any document to a Comic Book 6 | * `make-imagepdf.py` - convert any document to a PDF with original pages rendered to images. 7 | * `make-page-images.py` - convert the pages of any document to PNG images. 8 | * `images-to-ocr-pdf.py` - make PDF from a list of images (one image per page), where each page contains an OCR text layer. 9 | 10 | 11 | Your contribution is welcome. This may include more conversion types, or improvements like better handling / supporting parameters of existing scripts. 12 | -------------------------------------------------------------------------------- /conversion/images-to-ocr-pdf.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility to OCR a list of images and output them as one PDF 3 | 4 | License: GNU AGPL 3.0 5 | Author: (c) Harald Lieder, harald.lieder@outlook.com 6 | Date: 2021-10-26 7 | """ 8 | import os 9 | import sys 10 | 11 | import fitz 12 | 13 | if tuple(map(int, fitz.VersionBind.split("."))) < (1, 19, 0): 14 | raise ValueError("Need at least PyMuPDF v1.19.0") 15 | 16 | doc = fitz.open() # output PDF 17 | img_folder = sys.argv[1] # example: image folder name provided 18 | dirname = os.path.dirname(img_folder) 19 | img_list = os.listdir(img_folder) # some list of image filenames 20 | for img in img_list: 21 | imgfile = os.path.join(dirname, img) 22 | pix = fitz.Pixmap(imgfile) # make a pixmap form the image file 23 | pdfbytes = pix.pdfocr_tobytes(language="eng") # 1-page PDF with the OCRed image 24 | imgpdf = fitz.open("pdf", pdfbytes) # open it as a PDF 25 | doc.insert_pdf(imgpdf) # append the image page to output 26 | 27 | doc.ez_save("ocr-pdf.pdf") # save output 28 | -------------------------------------------------------------------------------- /conversion/make-cbz.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility to convert a supported document to a Comic Book archive. 3 | 4 | License: GNU AGPL 3.0 5 | Author: (c) Harald Lieder, harald.lieder@outlook.com 6 | Date: 2021-08-30 7 | """ 8 | 9 | import os 10 | import sys 11 | import zipfile 12 | 13 | import fitz 14 | 15 | 16 | def main(doc, outfile=None, pages=None, dpi=96): 17 | if outfile is None: 18 | if doc.name: 19 | filename, _ = os.path.splitext(doc.name) 20 | outfile = filename + ".cbz" 21 | elif __file__.endswith(".py"): 22 | outfile = __file__.replace(".py", ".cbz") 23 | else: 24 | outfile = "out.cbz" 25 | zipout = zipfile.ZipFile( 26 | outfile, 27 | "w", 28 | compression=zipfile.ZIP_STORED, 29 | ) 30 | if pages is None: 31 | pages = range(doc.page_count) 32 | zoom = dpi / 72 33 | mat = fitz.Matrix(zoom, zoom) 34 | for pno in pages: 35 | page = doc[pno] 36 | pix = page.get_pixmap(matrix=mat) 37 | pix.set_dpi(dpi, dpi) 38 | pagename = "p%05i.png" % (pno + 1) 39 | zipout.writestr(pagename, pix.tobytes("png")) 40 | zipout.close() 41 | 42 | 43 | if __name__ == "__main__": 44 | filename = sys.argv[1] 45 | doc = fitz.open(filename) 46 | main(doc) 47 | -------------------------------------------------------------------------------- /conversion/make-imagepdf.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility to convert a supported document to an image-only PDF. 3 | 4 | License: GNU AGPL 3.0 5 | Author: (c) Harald Lieder, harald.lieder@outlook.com 6 | Date: 2021-08-30 7 | """ 8 | import os 9 | import sys 10 | 11 | import fitz 12 | 13 | 14 | def main(doc, outfile=None, pages=None, dpi=96): 15 | if outfile is None: 16 | if doc.name: 17 | filename, _ = os.path.splitext(doc.name) 18 | outfile = filename + ".pdf" 19 | elif __file__.endswith(".py"): 20 | outfile = __file__.replace(".py", ".pdf") 21 | else: 22 | outfile = "out.pdf" 23 | if outfile == doc.name: 24 | outfile += ".pdf" 25 | if pages is None: 26 | pages = range(doc.page_count) 27 | zoom = dpi / 72 28 | mat = fitz.Matrix(zoom, zoom) 29 | pdfout = fitz.open() 30 | for pno in pages: 31 | page = doc[pno] 32 | pix = page.get_pixmap(matrix=mat) 33 | pix.set_dpi(dpi, dpi) 34 | opage = pdfout.new_page(width=page.rect.width, height=page.rect.height) 35 | opage.insert_image(opage.rect, pixmap=pix) 36 | pdfout.ez_save(outfile) 37 | pdfout.close() 38 | 39 | 40 | if __name__ == "__main__": 41 | filename = sys.argv[1] 42 | doc = fitz.open(filename) 43 | main(doc) 44 | -------------------------------------------------------------------------------- /conversion/make-page-images.py: -------------------------------------------------------------------------------- 1 | """ 2 | Basic script to convert pages of an arbitrary document to PNG images. 3 | 4 | All MuPDF document types are supported: PDF, XPS, EPUB, etc. 5 | Page images are stored in the script's folder and named "page-0001.png", 6 | "page-0002.png". 7 | 8 | Desired resolution can be chosen by setting the "DPI" variable. 9 | """ 10 | import sys 11 | import fitz 12 | 13 | filename = sys.argv[1] 14 | doc = fitz.open(filename) 15 | DPI = 300 # the desired image resolution 16 | ZOOM = DPI / 72 # zoom factor, standard dpi is 72 17 | magnify = fitz.Matrix(ZOOM, ZOOM) # takes care of zooming 18 | for page in doc: 19 | pix = page.get_pixmap(matrix=magnify) # make page image 20 | pix.set_dpi(DPI, DPI) # store dpi info in image 21 | pix.save("page-%04i.png" % (page.number + 1)) 22 | 23 | # generates images named page-0001.png, page-0002.png, ... 24 | -------------------------------------------------------------------------------- /examples/.gitignore: -------------------------------------------------------------------------------- 1 | /__pycache__ 2 | /extract-table/__pycache__/ 3 | /replace-image/__pycache__/ 4 | /icons/__pycache__ 5 | -------------------------------------------------------------------------------- /examples/anonymize-document/anonymize.py: -------------------------------------------------------------------------------- 1 | """ 2 | Remove all text from a document. 3 | ------------------------------------------------------------------------------- 4 | License: GNU GPL V3 5 | (c) 2022 Jorj X. McKie 6 | 7 | Usage 8 | ----- 9 | python anonymize.py input.pdf 10 | 11 | Description 12 | ----------- 13 | Scan through all pages of a PDF and remove all text. The metadata dictionary 14 | will also be cleared with "none" values. Any XML-based metadata will also be 15 | deleted. 16 | """ 17 | 18 | import sys 19 | import fitz 20 | 21 | 22 | def remove_txt(cont): 23 | """ 24 | Remove everything enclosed in a pair of "BT" / "ET" strings, including both. 25 | Assuming "cont" is the string of a PDF "/Contents" stream, this will make 26 | all text of the owning page disappear (permanent delete). 27 | """ 28 | cont1 = cont.replace(b"\n", b" ") 29 | ct = cont1.split(b" ") 30 | nct = [] 31 | intext = False 32 | for word in ct: 33 | if word == b"ET": 34 | intext = False 35 | continue 36 | if word == b"BT": 37 | intext = True 38 | continue 39 | if intext: 40 | continue 41 | nct.append(word) 42 | 43 | ncont = b" ".join(nct) 44 | return ncont 45 | 46 | 47 | assert len(sys.argv) == 2, "need input PDF file name" 48 | fn = sys.argv[1] 49 | assert fn.endswith(".pdf"), "expect a PDF file" 50 | doc = fitz.open(fn) 51 | doc.set_metadata({}) # set metadata values to "none" 52 | doc.del_xml_metadata() # delete any XML metadata 53 | for page in doc: 54 | xref_lst = page.get_contents() 55 | for xref in xref_lst: 56 | cont = doc.xref_stream(xref) 57 | ncont = remove_txt(cont) 58 | doc.update_stream(xref, ncont) 59 | 60 | doc.save("output.pdf", clean=True, garbage=4) 61 | -------------------------------------------------------------------------------- /examples/anonymize-document/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/anonymize-document/input.pdf -------------------------------------------------------------------------------- /examples/anonymize-document/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/anonymize-document/output.pdf -------------------------------------------------------------------------------- /examples/attach-images/input/erik-jan-leusink-s2mkB4WOl9k-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/attach-images/input/erik-jan-leusink-s2mkB4WOl9k-unsplash.jpg -------------------------------------------------------------------------------- /examples/attach-images/input/joe-caione-qO-PIF84Vxg-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/attach-images/input/joe-caione-qO-PIF84Vxg-unsplash.jpg -------------------------------------------------------------------------------- /examples/attach-images/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/attach-images/output.pdf -------------------------------------------------------------------------------- /examples/browse-document/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/browse-document/input.pdf -------------------------------------------------------------------------------- /examples/combine-pages/combine.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copy a PDF document combining every 4 pages 3 | ------------------------------------------------------------------------------- 4 | License: GNU GPL V3 5 | (c) 2018 Jorj X. McKie 6 | 7 | Usage 8 | ----- 9 | python combine.py input.pdf 10 | 11 | Notes 12 | ----- 13 | (1) Output file is chosen to have A4 portrait pages. Input pages are scaled 14 | maintaining side proportions. Both can be changed, e.g. based on input 15 | page size. However, note that not all pages need to have the same size, etc. 16 | 17 | (2) Easily adapt the example to combine just 2 pages (like for a booklet) or 18 | make the output page dimension dependent on input, or whatever. 19 | 20 | (3) This should run very fast: needed less than 25 sec on a Python 3.6 64bit, 21 | Windows 10, AMD 4.0 GHz for the 1'310 pages of the Adobe manual. 22 | Without save-options "garbage" and "deflate" this goes below 4 seconds, but 23 | results in a bigger file. 24 | 25 | Dependencies 26 | ------------ 27 | PyMuPDF 1.12.1 or later 28 | """ 29 | 30 | from __future__ import print_function 31 | import fitz, sys 32 | 33 | infile = sys.argv[1] 34 | src = fitz.open(infile) 35 | doc = fitz.open() # empty output PDF 36 | 37 | width, height = fitz.paper_size("a4") # A4 portrait output page format 38 | r = fitz.Rect(0, 0, width, height) 39 | 40 | # define the 4 rectangles per page 41 | r1 = r * 0.5 # top left rect 42 | r2 = r1 + (r1.width, 0, r1.width, 0) # top right 43 | r3 = r1 + (0, r1.height, 0, r1.height) # bottom left 44 | r4 = fitz.Rect(r1.br, r.br) # bottom right 45 | 46 | # put them in a list 47 | r_tab = [r1, r2, r3, r4] 48 | 49 | # now copy input pages to output 50 | for spage in src: 51 | if spage.number % 4 == 0: # create new output page 52 | page = doc.new_page(-1, width=width, height=height) 53 | # insert input page into the correct rectangle 54 | page.show_pdf_page( 55 | r_tab[spage.number % 4], # select output rect 56 | src, # input document 57 | spage.number, 58 | ) # input page number 59 | 60 | # by all means, save new file using garbage collection and compression 61 | doc.save("output.pdf", garbage=4, deflate=True) 62 | -------------------------------------------------------------------------------- /examples/combine-pages/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/combine-pages/input.pdf -------------------------------------------------------------------------------- /examples/combine-pages/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/combine-pages/output.pdf -------------------------------------------------------------------------------- /examples/convert-document/input.epub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/convert-document/input.epub -------------------------------------------------------------------------------- /examples/convert-document/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/convert-document/output.pdf -------------------------------------------------------------------------------- /examples/convert-image/convert.py: -------------------------------------------------------------------------------- 1 | """ 2 | Convert an arbitrary image to a PNG pixmap using Pillow 3 | -------------------------------------------------------------------------------- 4 | License: GNU GPL V3 5 | (c) 2022 Jorj X. McKie 6 | 7 | Usage 8 | ----- 9 | python convert.py input.jpg 10 | 11 | Dependencies 12 | ------------ 13 | Pillow 14 | """ 15 | 16 | import sys 17 | import fitz 18 | from PIL import Image 19 | 20 | print(fitz.__doc__) 21 | 22 | if len(sys.argv) == 2: 23 | pic_fn = sys.argv[1] 24 | else: 25 | pic_fn = None 26 | 27 | if pic_fn: 28 | print("Reading %s" % pic_fn) 29 | pic_f = open(pic_fn, "rb") 30 | img = Image.open(pic_f).convert("RGB") 31 | samples = img.tobytes() 32 | pix = fitz.Pixmap(fitz.csRGB, img.size[0], img.size[1], samples, 0) 33 | pix.save("output.png") 34 | pic_f.close() 35 | -------------------------------------------------------------------------------- /examples/convert-image/input.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/convert-image/input.jpg -------------------------------------------------------------------------------- /examples/convert-image/output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/convert-image/output.png -------------------------------------------------------------------------------- /examples/convert-pixmap/convert.py: -------------------------------------------------------------------------------- 1 | """ 2 | Convert an arbitrary pixmap to JPEG format using Pillow 3 | -------------------------------------------------------------------------------- 4 | License: GNU GPL V3 5 | (c) 2022 Jorj X. McKie 6 | 7 | Usage 8 | ----- 9 | python convert.py input.png 10 | 11 | Dependencies 12 | ------------ 13 | Pillow 14 | """ 15 | 16 | import sys 17 | import fitz 18 | from PIL import Image 19 | 20 | print(fitz.__doc__) 21 | assert len(sys.argv) == 2, "Usage: %s " % sys.argv[0] 22 | 23 | pix = fitz.Pixmap(sys.argv[1]) 24 | rgb = "RGB" 25 | if pix.alpha: # JPEG cannot have alpha! 26 | pix0 = fitz.Pixmap(pix, 0) # drop alpha channel 27 | pix = pix0 # rename pixmap 28 | 29 | img = Image.frombuffer(rgb, [pix.width, pix.height], pix.samples, "raw", rgb, 0, 1) 30 | img.save("output.jpg") 31 | -------------------------------------------------------------------------------- /examples/convert-pixmap/input.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/convert-pixmap/input.png -------------------------------------------------------------------------------- /examples/convert-pixmap/output.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/convert-pixmap/output.jpg -------------------------------------------------------------------------------- /examples/convert-text/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/convert-text/output.pdf -------------------------------------------------------------------------------- /examples/copy-embedded/copy.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copy the embedded files in the input document to the output document 3 | ------------------------------------------------------------------------------- 4 | License: GNU AGPL V3 5 | (c) 2021 Jorj X. McKie 6 | 7 | Usage 8 | ----- 9 | python copy.py input.pdf output.pdf 10 | 11 | Notes 12 | ----- 13 | The output.pdf file generated in examples/embed-images is renamed as input.pdf 14 | to be used as the input file in this example. 15 | 16 | Dependencies 17 | ------------ 18 | PyMuPDF 19 | """ 20 | 21 | from __future__ import print_function 22 | import sys 23 | import fitz 24 | 25 | ifn = sys.argv[1] # input PDF 26 | ofn = sys.argv[2] # output PDF 27 | docin = fitz.open(ifn) 28 | docout = fitz.open(ofn) 29 | print("Copying embedded files from '%s' to '%s'" % (ifn, ofn)) 30 | for i in range(docin.embfile_count()): 31 | d = docin.embfile_info(i) # file metadata 32 | b = docin.embfile_get(i) # file content 33 | try: # safeguarding against duplicate entries 34 | print("copying entry:", d["name"]) 35 | docout.embfile_add(b, d["name"], d["file"], d["desc"]) 36 | except: 37 | pass 38 | 39 | # save output (incrementally or to new PDF) 40 | docout.saveIncr() 41 | -------------------------------------------------------------------------------- /examples/copy-embedded/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/copy-embedded/input.pdf -------------------------------------------------------------------------------- /examples/copy-embedded/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/copy-embedded/output.pdf -------------------------------------------------------------------------------- /examples/decrypt-document/decrypt.py: -------------------------------------------------------------------------------- 1 | """ 2 | Decrypt a PDF document with the password provided and save it as a new document 3 | -------------------------------------------------------------------------------- 4 | License: GNU GPL V3+ 5 | (c) 2022 Jorj X. McKie 6 | 7 | Usage 8 | ----- 9 | python decrypt.py input.pdf password output.pdf 10 | """ 11 | 12 | import sys 13 | import fitz 14 | 15 | print(fitz.__doc__) 16 | assert len(sys.argv) == 4, ( 17 | "Usage: %s " % sys.argv[0] 18 | ) 19 | 20 | doc = fitz.Document(sys.argv[1]) 21 | assert doc.needs_pass, sys.argv[0] + " not password protected" 22 | 23 | assert doc.authenticate(sys.argv[2]), 'cannot decrypt %s with password "%s"' % ( 24 | sys.argv[1], 25 | sys.argv[2], 26 | ) 27 | 28 | doc.save(sys.argv[3]) 29 | -------------------------------------------------------------------------------- /examples/decrypt-document/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/decrypt-document/input.pdf -------------------------------------------------------------------------------- /examples/decrypt-document/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/decrypt-document/output.pdf -------------------------------------------------------------------------------- /examples/display-document/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/display-document/input.pdf -------------------------------------------------------------------------------- /examples/draw-cardioid/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/draw-cardioid/output.pdf -------------------------------------------------------------------------------- /examples/draw-caustic/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/draw-caustic/output.pdf -------------------------------------------------------------------------------- /examples/draw-caustic/output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/draw-caustic/output.png -------------------------------------------------------------------------------- /examples/draw-caustic/output.svgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/draw-caustic/output.svgz -------------------------------------------------------------------------------- /examples/draw-fractal/output_carpet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/draw-fractal/output_carpet.png -------------------------------------------------------------------------------- /examples/draw-fractal/output_punch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/draw-fractal/output_punch.png -------------------------------------------------------------------------------- /examples/draw-fractal/output_triangle.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/draw-fractal/output_triangle.pdf -------------------------------------------------------------------------------- /examples/draw-polygon/draw.py: -------------------------------------------------------------------------------- 1 | """ 2 | Draw a regular polygon with a curly border 3 | -------------------------------------------------------------------------------- 4 | License: GNU GPL V3 5 | (c) 2017 Jorj X. McKie 6 | 7 | Usage 8 | ----- 9 | python draw.py 10 | 11 | Description 12 | ----------- 13 | Draw an arbitrary regular polygon using wavy lines instead of straight lines. 14 | Two output files are generated: a PDF and a SVG image file. The page size is 15 | adjusted to the drawing. This script also demonstrates how the draw commands can 16 | be used to calculate points without actually drawing them. 17 | 18 | Dependencies 19 | ------------ 20 | PyMuPDF 21 | """ 22 | 23 | import fitz 24 | 25 | print(fitz.__doc__) 26 | 27 | outpdf = "output.pdf" 28 | outsvg = "output.svg" 29 | doc = fitz.open() 30 | page = doc.new_page() 31 | img = page.new_shape() 32 | nedge = 5 # number of polygon edges 33 | breadth = 2 # wave amplitude 34 | beta = -1.0 * 360 / nedge # our angle, drawn clockwise 35 | center = fitz.Point(300, 300) # center of circle 36 | p0 = fitz.Point(300, 200) # start here (1st edge = north) 37 | p1 = +p0 # save as last edge to add 38 | points = [p0] # to store the polygon edges 39 | 40 | # we only use this to calculate the polygon edges 41 | # we will delete the resp. draw commands 42 | for i in range(nedge - 1): 43 | p0 = img.draw_sector(center, p0, beta) 44 | points.append(p0) 45 | 46 | # erase previous draw commands in contents buffer 47 | img.draw_cont = "" 48 | 49 | points.append(p1) # add starting point to edges list 50 | # now draw the lines along stored edges 51 | for i in range(nedge): 52 | img.draw_squiggle(points[i], points[i + 1], breadth=breadth) 53 | 54 | img.finish(color=(0, 0, 1), fill=(1, 1, 0), closePath=False) 55 | 56 | # adjust visible page to dimensions of the drawing 57 | page.set_cropbox(img.rect) 58 | img.commit() 59 | doc.save(outpdf) 60 | fout = open(outsvg, "w") 61 | fout.write(page.get_svg_image()) 62 | fout.close() 63 | doc.close() 64 | -------------------------------------------------------------------------------- /examples/draw-polygon/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/draw-polygon/output.pdf -------------------------------------------------------------------------------- /examples/draw-rgb-area/draw.py: -------------------------------------------------------------------------------- 1 | """ 2 | Draw an RGB pixel area with numpy and save it with fitz 3 | -------------------------------------------------------------------------------- 4 | License: GNU GPL V3 5 | (c) 2022 Jorj X. McKie 6 | 7 | Usage 8 | ----- 9 | python draw.py 10 | 11 | Description 12 | ----------- 13 | This is 10+ times faster than saving with pure python solutions like pypng and 14 | almost 2 times faster than saving with PIL. However, PIL images are smaller than 15 | those of MuPDF. 16 | 17 | Dependencies 18 | ------------ 19 | Pillow, numpy 20 | """ 21 | 22 | from __future__ import print_function 23 | import sys 24 | import time 25 | import fitz 26 | import numpy as np 27 | import PIL 28 | from PIL import Image 29 | 30 | print("Python:", sys.version) 31 | print("NumPy version", np.__version__) 32 | print(fitz.__doc__) 33 | print("PIL version", PIL.__version__) 34 | 35 | height = 2048 36 | width = 2028 37 | 38 | image = np.ndarray((height, width, 3), dtype=np.uint8) 39 | 40 | for i in range(height): 41 | for j in range(width): 42 | image[i, j] = np.array([i % 256, j % 256, (i + j) % 256], dtype=np.uint8) 43 | 44 | samples = image.tobytes() 45 | 46 | ttab = [(time.perf_counter(), "")] 47 | 48 | pix = fitz.Pixmap(fitz.csRGB, width, height, samples, 0) 49 | pix.save("output_fitz.png") 50 | ttab.append((time.perf_counter(), "fitz")) 51 | 52 | pix = Image.frombuffer("RGB", [width, height], samples, "raw", "RGB", 0, 1) 53 | pix.save("output_PIL.png") 54 | ttab.append((time.perf_counter(), "PIL")) 55 | 56 | for i, t in enumerate(ttab): 57 | if i > 0: 58 | print("storing with %s: %g sec." % (t[1], t[0] - ttab[i - 1][0])) 59 | -------------------------------------------------------------------------------- /examples/draw-rgb-area/output_PIL.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/draw-rgb-area/output_PIL.png -------------------------------------------------------------------------------- /examples/draw-rgb-area/output_fitz.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/draw-rgb-area/output_fitz.png -------------------------------------------------------------------------------- /examples/draw-sines/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/draw-sines/output.pdf -------------------------------------------------------------------------------- /examples/edit-images/figure-01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/edit-images/figure-01.jpg -------------------------------------------------------------------------------- /examples/edit-images/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/edit-images/input.pdf -------------------------------------------------------------------------------- /examples/edit-links/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/edit-links/input.pdf -------------------------------------------------------------------------------- /examples/edit-toc/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/edit-toc/input.pdf -------------------------------------------------------------------------------- /examples/embed-images/embed.py: -------------------------------------------------------------------------------- 1 | """ 2 | Embed the images found in the input directory 3 | ------------------------------------------------------------------------------- 4 | License: GNU GPL V3+ 5 | (c) 2018 Jorj X. McKie 6 | 7 | Usage 8 | ----- 9 | python embed.py 10 | 11 | Dependencies 12 | ------------ 13 | PyMuPDF 14 | PySimpleGUI, tkinter, optional: requires Python 3 if used 15 | """ 16 | 17 | from __future__ import print_function 18 | import os, time, sys, fitz 19 | 20 | print(fitz.__doc__) 21 | # do some adjustments whether Python v2 or v3 22 | if str is not bytes: 23 | import PySimpleGUI as psg 24 | 25 | mytime = time.perf_counter 26 | else: 27 | mytime = time.clock 28 | 29 | rc = False 30 | if str is bytes: 31 | imgdir = sys.argv[1] # where my files are 32 | else: 33 | imgdir = psg.PopupGetFolder( 34 | "Make a PDF from Embedded Files", "Enter file directory:" 35 | ) 36 | 37 | if not imgdir: 38 | raise SystemExit() 39 | 40 | t0 = mytime() # set start timer 41 | 42 | doc = fitz.open() 43 | 44 | width, height = fitz.paper_size("a4") 45 | rect = fitz.Rect(0, 0, width, height) + (36, 36, -36, -36) 46 | imglist = os.listdir(imgdir) 47 | imgcount = len(imglist) 48 | 49 | for i, f in enumerate(imglist): 50 | path = os.path.join(imgdir, f) 51 | if not os.path.isfile(path): 52 | print("skipping non-file '%s'!" % f) 53 | continue 54 | 55 | if str is not bytes: 56 | psg.OneLineProgressMeter( 57 | "Embedding Files", i + 1, imgcount, "dir: " + imgdir, "file: " + f 58 | ) 59 | else: 60 | print("embedding file '%s', (%i / %i)" % (f, i + 1, imgcount)) 61 | 62 | img = open(path, "rb").read() 63 | doc.embfile_add(f, img, filename=f, ufilename=f, desc=f) 64 | 65 | page = doc.new_page() # every doc needs at least one page 66 | 67 | doc.save("output.pdf") 68 | t1 = mytime() 69 | print("%g" % round(t1 - t0, 3), "sec processing time") 70 | -------------------------------------------------------------------------------- /examples/embed-images/input/erik-jan-leusink-s2mkB4WOl9k-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/embed-images/input/erik-jan-leusink-s2mkB4WOl9k-unsplash.jpg -------------------------------------------------------------------------------- /examples/embed-images/input/joe-caione-qO-PIF84Vxg-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/embed-images/input/joe-caione-qO-PIF84Vxg-unsplash.jpg -------------------------------------------------------------------------------- /examples/embed-images/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/embed-images/output.pdf -------------------------------------------------------------------------------- /examples/export-embedded/export.py: -------------------------------------------------------------------------------- 1 | """ 2 | Export an embedded file from the input document to the output document 3 | ------------------------------------------------------------------------------- 4 | License: GNU AGPL V3 5 | (c) 2021 Jorj X. McKie 6 | 7 | Usage 8 | ----- 9 | python export.py input.pdf joe-caione-qO-PIF84Vxg-unsplash.jpg output.pdf 10 | 11 | Notes 12 | ----- 13 | The output.pdf file generated in examples/embed-images is renamed as input.pdf 14 | to be used as the input file in this example. 15 | 16 | Dependencies 17 | ------------ 18 | PyMuPDF 19 | """ 20 | 21 | from __future__ import print_function 22 | import sys 23 | import fitz 24 | 25 | pdffn = sys.argv[1] # PDF file name 26 | name = sys.argv[2] # embedded file identifier 27 | expfn = sys.argv[3] # filename of exported file 28 | 29 | doc = fitz.open(pdffn) # open PDF 30 | outfile = open(expfn, "wb") # to be on the safe side always open binary 31 | 32 | # extract file content. Will get exception on any error. 33 | content = doc.embfile_get(name) 34 | 35 | outfile.write(content) 36 | outfile.close() 37 | -------------------------------------------------------------------------------- /examples/export-embedded/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/export-embedded/input.pdf -------------------------------------------------------------------------------- /examples/export-embedded/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/export-embedded/output.pdf -------------------------------------------------------------------------------- /examples/export-metadata/export.py: -------------------------------------------------------------------------------- 1 | """ 2 | Export a document metadata dictionary to a CSV file 3 | ------------------------------------------------------------------------------- 4 | License: GNU GPL V3 5 | (c) 2018 Jorj X. McKie 6 | 7 | Usage 8 | ----- 9 | python export.py -d ";" input.pdf 10 | """ 11 | 12 | from __future__ import print_function 13 | import fitz 14 | import argparse 15 | 16 | parser = argparse.ArgumentParser( 17 | description="Enter CSV delimiter [;] and documment filename" 18 | ) 19 | parser.add_argument("-d", help="CSV delimiter [;]", default=";") 20 | parser.add_argument("doc", help="document filename") 21 | args = parser.parse_args() 22 | delim = args.d # requested CSV delimiter character 23 | fname = args.doc # input document filename 24 | 25 | doc = fitz.open(fname) 26 | meta = doc.metadata 27 | outf = open("output.csv", "w") 28 | for k in meta.keys(): 29 | v = meta.get(k) 30 | if not v: 31 | v = "" 32 | rec = delim.join([k, v]) 33 | outf.writelines([rec, "\n"]) 34 | outf.close() 35 | -------------------------------------------------------------------------------- /examples/export-metadata/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/export-metadata/input.pdf -------------------------------------------------------------------------------- /examples/export-metadata/output.csv: -------------------------------------------------------------------------------- 1 | format;PDF 1.5 2 | title; 3 | author; 4 | subject; 5 | keywords; 6 | creator;LaTeX with hyperref package 7 | producer;pdfTeX-1.40.16 8 | creationDate;D:20210318172314-04'00' 9 | modDate;D:20210318172314-04'00' 10 | trapped; 11 | encryption; 12 | -------------------------------------------------------------------------------- /examples/export-toc/export.py: -------------------------------------------------------------------------------- 1 | """ 2 | Export the table of contents (ToC) of a document to a CSV file 3 | ------------------------------------------------------------------------------- 4 | License: GNU GPL V3 5 | (c) 2018 Jorj X. McKie 6 | 7 | Usage 8 | ----- 9 | python export.py -d ";" input.pdf 10 | """ 11 | 12 | from __future__ import print_function 13 | import fitz 14 | import argparse 15 | 16 | parser = argparse.ArgumentParser( 17 | description="Enter CSV delimiter [;] and documment filename" 18 | ) 19 | parser.add_argument("-d", help="CSV delimiter [;]", default=";") 20 | parser.add_argument("doc", help="document filename") 21 | args = parser.parse_args() 22 | delim = args.d # requested CSV delimiter character 23 | fname = args.doc # input document filename 24 | 25 | doc = fitz.open(fname) 26 | toc = doc.get_toc(simple=False) 27 | ext = fname[-3:].lower() 28 | outf = open("output.csv", "w") 29 | for t in toc: 30 | t4 = t[3] 31 | if ext == "pdf": 32 | if t4["kind"] == 1: 33 | p4 = str(t4["to"].y) 34 | else: 35 | p4 = "0" 36 | else: 37 | p4 = "0" 38 | rec = delim.join([str(t[0]), t[1].strip(), str(t[2]), p4]) 39 | outf.writelines([rec, "\n"]) 40 | outf.close() 41 | -------------------------------------------------------------------------------- /examples/export-toc/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/export-toc/input.pdf -------------------------------------------------------------------------------- /examples/extract-images/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/extract-images/input.pdf -------------------------------------------------------------------------------- /examples/extract-images/output/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/extract-images/output/.gitkeep -------------------------------------------------------------------------------- /examples/extract-images/output/img00005.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/extract-images/output/img00005.png -------------------------------------------------------------------------------- /examples/extract-images/output/img00011.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/extract-images/output/img00011.png -------------------------------------------------------------------------------- /examples/extract-table/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/extract-table/input.pdf -------------------------------------------------------------------------------- /examples/extract-xobj/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/extract-xobj/input.pdf -------------------------------------------------------------------------------- /examples/extract-xobj/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/extract-xobj/output.pdf -------------------------------------------------------------------------------- /examples/filmfestival-2tables/README.md: -------------------------------------------------------------------------------- 1 | # Example for PyMuPDF Reporting 2 | 3 | This script creates a report about a fictitious film festival. 4 | 5 | It extracts data from an SQL database (sqlite3). The database contains two tables: 6 | * films 7 | * actors 8 | 9 | The **_films_** table has columns **title**, **director**, **year** and the **_actors_** table has columns **name** and **film** title. 10 | 11 | Two tabular reports are created in one common PDF. 12 | 1. Report 1 lists all films and names all actors being cast. 13 | 2. Report 2 lists all actors together with all the films where they have been cast. 14 | 15 | The following are noteworthy details: 16 | * Demonstrate how to use fronts from the [pymupdf-fonts](https://pypi.org/project/pymupdf-fonts/) package. 17 | * Demonstrate how to combine multiple report sections (here: two table sections) in one report. 18 | * Due to MuPDF's automatic layouting algorithm, major layout changes can be achieved without coding effort, like 19 | - choice of page size or paper format 20 | - choice of number of columns per page 21 | * Influence on the layout can only be taken via the HTML and styling (CSS) definitions. -------------------------------------------------------------------------------- /examples/filmfestival-2tables/filmfestival.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/filmfestival-2tables/filmfestival.db -------------------------------------------------------------------------------- /examples/filmfestival-2tables/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/filmfestival-2tables/output.pdf -------------------------------------------------------------------------------- /examples/icons/PyMuPDF.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/icons/PyMuPDF.ico -------------------------------------------------------------------------------- /examples/icons/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | -------------------------------------------------------------------------------- /examples/icons/ico_pdf.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # This file was generated by img2py.py 3 | # 4 | from wx.lib.embeddedimage import PyEmbeddedImage 5 | 6 | img = PyEmbeddedImage( 7 | "iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAABHNCSVQICAgIfAhkiAAAA4BJ" 8 | "REFUWIXFll1oHFUUx393dyabrbvsbrZJuqlCxNWskppWMo3Yl9hiIlqFWjCFvhrBF0GK+lIf" 9 | "fLR5EPzCJ1Fr1AcFsR8iRGuMIIZSqaZGXC3YxK8YTGN0d7vJneNDOmv2c2aziR4YuGfuOed/" 10 | "5pz/PXPhfxa1VhkYGEhlMplpEdkUsGAwyNjYWAL4tWICk5OTYlkWIoKThLMWEZRSRXrpfi0d" 11 | "IBwO09/fz8TERBDIARhrE6gFDmDb9rrBRYSFhQWi0ShAEpgC8JWWqd7gXsGdx7btIryyBNYL" 12 | "Fpr+kivN4ZrgIoLWugirqAXrBn9oEC7PszUQZPH18argQFkFqibgFdwOhpBvzoHPQC3nyoha" 13 | "al9agYY5EP7u/KrjygriN6p+udP/DefA5a6eQqRP7nsYrXVNQnqqQF1tyGX+9d13oKb92veO" 14 | "NMwBXz6PyudAw65dPYVZUc3e0ymopxJgI4Eg2OLJf0M5ABC98BUIoOD8H4uu9hvGAWc9f/hO" 15 | "Zn/Kgs9H1+3Xu9q7VqAe8JVIjHjsGr544zRTsxkSMQpzoFos1wpUSqRasMUOxcT3f7O3r4/O" 16 | "104izc2cnZmp+SGeK+BWhVwowg3btjDz5im01mQtCwyDbXfdVtN/Q1ogIoS7DH64lOHuO/oK" 17 | "+0q309MRZE6VT0RH95SAG3h8/CThcBMX3/4QrTXaNLmyJQxPHAUgsbuVxKMP0vHKMf6MtBb5" 18 | "lyZQdCMSEVlaWqo9zZQi3r8dlV09/+rgEHz+KT9OfYtPQ8IPRrwJcssQMGHHbj4+8hzJtjha" 19 | "awYHB0mn0zu4eiHx9Dte9jfRNvoscuEcanvn6o8nFuX5o8exWiNcN/wUzaaJbdvMidCxtxNE" 20 | "4JadyNRZsr/9jLS2VORAzVGcD0VpO3Ar7LsfGX0JdW0nnDnBZy9/xE2pFEMl7HbWs2MX+f3E" 21 | "W+x89RnefXoUq7u7ENvzKI48fhgVifHCA48xsGcPN7a0w8gRTh17DyuVcj0tW/cfYubeISyX" 22 | "UVyWAMCKMmDyDGIGGB5+EmN/F8r0c3rkfazeXs9HtZLuaRT7dJ5LL36Ayv5F4GAvxx8ZYfqd" 23 | "r+ltEBzq4EDo5m7mx38B4J6rTo2Ce2pBI8G9gG/atdyLDuUtKBpEyWRS0ul0EVEcx2ritr9W" 24 | "TNPE7/dj23Y7MFfJpkUplQWWN+NRSi0Chzxn/F/IPzlRiedgNxiOAAAAAElFTkSuQmCC" 25 | ) 26 | -------------------------------------------------------------------------------- /examples/icons/pdf.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # This file was generated by img2py.py 3 | # 4 | from wx.lib.embeddedimage import PyEmbeddedImage 5 | 6 | img = PyEmbeddedImage( 7 | "iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAABHNCSVQICAgIfAhkiAAABA5J" 8 | "REFUWIXtll1MU2cYx3/nox+0BVqB0lEhQNkUhzqyiWZxUTxL5sjmEm5MNsKyZC5L5uUSNubY" 9 | "XBSmxkSNmduFM3Zhbl4w3RY/yOpGsmRLNBHdIARFHCKfhfJdPtpzdtEVqIyWBNAb/zfnfZ8n" 10 | "7/P/5X3Oe94j8KBKz3+HzhJg1GvEnDw+J7+UOqmUSBGBwg92kJybC4DeFFhWcwCbs0mOCIi6" 11 | "Ilp/S0RvCYE5n+tfVoArVY3ynGDW1kHG+g0A08+lVMf1/QDUVdYDzAUAMK2YWHLjsP4zDktc" 12 | "NqMF6jHAY4D/PwWz9Jqs3DtXwU4ENNpJF77ynA1WKPmintARFYBenhCOeH4Ir/F+qDydFEci" 13 | "Atp7n1H7xZQnfr76MXfgXAU7Py0nbawRJ07aubnHRxARDcF9lBXVR3GRQqe2T9kUcv/TlxRH" 14 | "YnkZGZUVZBc/yTvR6kcHaKv2AbQO0GRaTTfAq+Y/isLp0ioa36jk3epjuFARX/cqT13c+NGb" 15 | "AC19NG0vgBcbPWeiWURvgRjic3e+XL/5yMXcqSAbfnKc+gbeKgAQ3vbcBV7QTii/0ItL9ZOG" 16 | "gADw/bcYhPc91VHrs9CXMNg3uWsH+T/+wwnMGdMX2HGbskbbr2yiF9fYbZJ9OurQ0ADch3As" 17 | "pHT0HdAmpg7uJQHiZeG05yw2EgEOfY5dAA0LfcerUGWJ7PQUhMsOj1biV9SDe0kouyCf4SVM" 18 | "iwNImvRtKOUy/VevAc+HwwWlXAK4L9FRGGDLylO11QxKAsDKIn52BhG22QMtVzpZGwtAiJgp" 19 | "H39J5lZLrEWL0kmlZPY0+g5MdPrx1XYTX2DDnJs4He86fXd6PNo4huvAmjk502oLCRuTY/FE" 20 | "B/A3DxXb3XJN17B3NkCx3S0D1N9n4JksrKhKu9WI/utJjz2cq7mV07F4ANEotfTiQzQbGfy9" 21 | "h8TNdoCWXkYA7qR78u6oE+r63qJmAAy/ai0Todx66239jVjuMQGEOKmhhxEcCRa57ZM2oC2Q" 22 | "V/tsQ0/IhFRANIg3hvMCsv/v8Thx32iDGjqGBhmR1NgAkT+l2VtewZqpn9kBvaiONg8T6JtS" 23 | "pSRZ1Tl0DF/zqZJVUnUOHUNXfXjPd4MoqDqHLpDrXqcGRsZVjMKUmKFjqM6LbVtKhMd1d83s" 24 | "afQPkc5mwJRlxJCqx5RlxJRlJHPPqumx3irPjJNkNEkgbbcLU5YRs9OAIVXPvcO3olnEvA1J" 25 | "2+1i+KYP0SBiXhV6EVN3Zc5UkEU0SZizZuSvAeQ4EWNOwuIAAOLX2SLmkmGmddo8ayxrrQsp" 26 | "/ch/SB45QKgFheWH0cgnGAToeLgA2w9cAEANTiKK83V02TTTAlFS8Q/pHj7ApbIi0ASC4xo5" 27 | "SteyurV6jj0Y+hezjEXWj4VyvAAAAABJRU5ErkJggg==" 28 | ) 29 | -------------------------------------------------------------------------------- /examples/icons/pymupdf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/icons/pymupdf.png -------------------------------------------------------------------------------- /examples/import-embedded/import.py: -------------------------------------------------------------------------------- 1 | """ 2 | Import a file to a document 3 | ------------------------------------------------------------------------------- 4 | License: GNU AGPL V3 5 | (c) 2023 Jorj X. McKie 6 | 7 | Usage 8 | ----- 9 | python import.py input.pdf joe-caione-qO-PIF84Vxg-unsplash.jpg -o output.pdf 10 | 11 | Dependencies 12 | ------------ 13 | PyMuPDF 14 | """ 15 | 16 | from __future__ import print_function 17 | import fitz 18 | import argparse 19 | 20 | parser = argparse.ArgumentParser( 21 | description="Enter PDF, file to embed, and optional name, description and output pdf." 22 | ) 23 | parser.add_argument("pdf", help="PDF filename") 24 | parser.add_argument("file", help="name of embedded file") 25 | parser.add_argument("-n", "--name", help="name for embedded file entry (default: file)") 26 | parser.add_argument("-d", "--desc", help="description (default: file)") 27 | parser.add_argument("-o", "--output", help="output PDF (default: modify pdf)") 28 | 29 | args = parser.parse_args() 30 | 31 | if not args.name: 32 | name = args.file 33 | desc = args.desc 34 | if not args.desc: 35 | desc = args.file 36 | 37 | content = open(args.file, "rb").read() 38 | doc = fitz.open(args.pdf) 39 | doc.embfile_add(name, content, args.file, desc) 40 | 41 | if not args.output: 42 | doc.saveIncr() 43 | else: 44 | doc.save(args.output, garbage=4, deflate=True) 45 | -------------------------------------------------------------------------------- /examples/import-embedded/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/import-embedded/input.pdf -------------------------------------------------------------------------------- /examples/import-embedded/joe-caione-qO-PIF84Vxg-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/import-embedded/joe-caione-qO-PIF84Vxg-unsplash.jpg -------------------------------------------------------------------------------- /examples/import-embedded/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/import-embedded/output.pdf -------------------------------------------------------------------------------- /examples/import-metadata/import.py: -------------------------------------------------------------------------------- 1 | """ 2 | Import a metadata dictionary from a CSV file into a PDF document 3 | ------------------------------------------------------------------------------- 4 | License: GNU GPL V3 5 | (c) 2023 Jorj X. McKie 6 | 7 | Usage 8 | ----- 9 | python import.py -d ";" -x "n" -csv input.csv -pdf input.pdf 10 | 11 | Description 12 | ----------- 13 | The output.csv file generated in examples/export-metadata is renamed as input.csv 14 | to be used as an input file in this example. The input.pdf file behaves as both 15 | an input and an output file. 16 | 17 | Dependencies 18 | ------------ 19 | PyMuPDF 20 | """ 21 | 22 | import csv 23 | import fitz 24 | import argparse 25 | 26 | parser = argparse.ArgumentParser( 27 | description="Enter CSV delimiter [;], CSV filename and documment filename" 28 | ) 29 | parser.add_argument("-d", help="CSV delimiter [;]", default=";") 30 | parser.add_argument("-x", help="delete XML info [n]", default="n") 31 | parser.add_argument("-csv", help="CSV filename") 32 | parser.add_argument("-pdf", help="PDF filename") 33 | 34 | args = parser.parse_args() 35 | 36 | assert args.csv, "missing CSV filename" 37 | assert args.pdf, "missing PDF filename" 38 | 39 | print("delimiter", args.d) 40 | print("xml delete", args.x) 41 | print("csv file", args.csv) 42 | print("pdf file", args.pdf) 43 | print("----------------------------------------") 44 | 45 | doc = fitz.open(args.pdf) 46 | oldmeta = doc.metadata 47 | print("old metadata:") 48 | for k, v in oldmeta.items(): 49 | print(k, ":", v) 50 | 51 | with open(args.csv) as tocfile: 52 | tocreader = csv.reader(tocfile, delimiter=args.d) 53 | for row in tocreader: 54 | oldmeta[row[0]] = row[1] 55 | 56 | print("----------------------------------------") 57 | print("\nnew metadata:") 58 | for k, v in oldmeta.items(): 59 | print(k, ":", v) 60 | 61 | doc.set_metadata(oldmeta) 62 | doc.saveIncr() 63 | -------------------------------------------------------------------------------- /examples/import-metadata/input.csv: -------------------------------------------------------------------------------- 1 | format;PDF 1.5 2 | title; 3 | author; 4 | subject; 5 | keywords; 6 | creator;LaTeX with hyperref package 7 | producer;pdfTeX-1.40.16 8 | creationDate;D:20210318172314-04'00' 9 | modDate;D:20210318172314-04'00' 10 | trapped; 11 | encryption; 12 | -------------------------------------------------------------------------------- /examples/import-metadata/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/import-metadata/input.pdf -------------------------------------------------------------------------------- /examples/import-toc/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/import-toc/input.pdf -------------------------------------------------------------------------------- /examples/insert-images/input/erik-jan-leusink-s2mkB4WOl9k-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/insert-images/input/erik-jan-leusink-s2mkB4WOl9k-unsplash.jpg -------------------------------------------------------------------------------- /examples/insert-images/input/joe-caione-qO-PIF84Vxg-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/insert-images/input/joe-caione-qO-PIF84Vxg-unsplash.jpg -------------------------------------------------------------------------------- /examples/insert-images/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/insert-images/output.pdf -------------------------------------------------------------------------------- /examples/insert-logo/file.py: -------------------------------------------------------------------------------- 1 | """ 2 | Insert the MuPDF logo in PNG format in all pages of a PDF document 3 | -------------------------------------------------------------------------------- 4 | License: GNU GPL V3 5 | (c) 2018-2019 Jorj X. McKie 6 | 7 | Usage 8 | ----- 9 | python file.py input.pdf logo.png 10 | 11 | Description 12 | ----------- 13 | Any PyMuPDF-supported document can be used as the logo/watermark including PDF, 14 | XPS, EPUB, CBZ, FB2 as well as any image type. SVG-based logos are not always 15 | shown correctly. Use a different PDF converter like svglib if that occurs. 16 | 17 | Logos/watermarks are transparent for all document types except for images. If a 18 | transparency is required then the file must be manually converted to PDF first 19 | as described next: 20 | 21 | pix = fitz.Pixmap(logo_filename) 22 | src = fitz.open() 23 | src_page = src.new_page(-1, width = pix.width, height = pix.height) 24 | src_page.insert_image(src_page.rect, pixmap = pix) 25 | 26 | Dependencies 27 | ------------ 28 | PyMuPDF 29 | """ 30 | 31 | import sys 32 | import fitz 33 | 34 | src = fitz.open(sys.argv[2]) 35 | 36 | if not src.is_pdf: 37 | pdfbytes = src.convert_to_pdf() 38 | src.close() 39 | src = fitz.open("pdf", pdfbytes) 40 | 41 | rect = src[0].rect 42 | factor = 25 / rect.height 43 | rect *= factor 44 | 45 | doc = fitz.open(sys.argv[1]) 46 | xref = 0 47 | for page in doc: 48 | xref = page.show_pdf_page(rect, src, 0, reuse_xref=xref, overlay=False) 49 | doc.save("output_file.pdf", garbage=4) 50 | -------------------------------------------------------------------------------- /examples/insert-logo/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/insert-logo/input.pdf -------------------------------------------------------------------------------- /examples/insert-logo/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/insert-logo/logo.png -------------------------------------------------------------------------------- /examples/insert-logo/output_file.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/insert-logo/output_file.pdf -------------------------------------------------------------------------------- /examples/insert-logo/output_svg.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/insert-logo/output_svg.pdf -------------------------------------------------------------------------------- /examples/insert-logo/svg.py: -------------------------------------------------------------------------------- 1 | """ 2 | Insert the MuPDF logo in SVG format in all pages of a PDF document 3 | -------------------------------------------------------------------------------- 4 | License: GNU GPL V3 5 | (c) 2018-2019 Jorj X. McKie 6 | 7 | Usage 8 | ----- 9 | python svg.py input.pdf logo.svg 10 | 11 | Dependencies 12 | ------------ 13 | PyMuPDF, svglib 14 | """ 15 | 16 | import sys 17 | import fitz 18 | from svglib.svglib import svg2rlg 19 | 20 | drawing = svg2rlg(sys.argv[2]) 21 | pdfbytes = drawing.asString("pdf") 22 | 23 | src = fitz.open("pdf", pdfbytes) 24 | 25 | rect = src[0].rect 26 | factor = 25 / rect.height 27 | rect *= factor 28 | 29 | doc = fitz.open(sys.argv[1]) 30 | for page in doc: 31 | xref = page.show_pdf_page(rect, src, 0, overlay=True) 32 | doc.save("output_svg.pdf", garbage=4) 33 | -------------------------------------------------------------------------------- /examples/join-documents/input/made-with-cc.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/join-documents/input/made-with-cc.pdf -------------------------------------------------------------------------------- /examples/join-documents/input/thinkpython2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/join-documents/input/thinkpython2.pdf -------------------------------------------------------------------------------- /examples/join-documents/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/join-documents/output.pdf -------------------------------------------------------------------------------- /examples/list-embedded/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/list-embedded/input.pdf -------------------------------------------------------------------------------- /examples/list-embedded/list.py: -------------------------------------------------------------------------------- 1 | """ 2 | Print a list of embedded files in a document 3 | ------------------------------------------------------------------------------- 4 | License: GNU AGPL V3 5 | (c) 2021 Jorj X. McKie 6 | 7 | Usage 8 | ----- 9 | python list.py input.pdf 10 | 11 | Notes 12 | ----- 13 | The output.pdf file generated in examples/embed-images is renamed as input.pdf 14 | to be used as the input file in this example. 15 | 16 | Dependencies 17 | ------------ 18 | PyMuPDF 19 | """ 20 | 21 | from __future__ import print_function 22 | import sys 23 | import fitz 24 | 25 | fn = sys.argv[1] 26 | doc = fitz.open(fn) 27 | 28 | name_len = filename_len = 0 29 | total_len = total_size = 0 30 | 31 | ef_list = [] 32 | 33 | for i in range(doc.embfile_count()): 34 | info = doc.embfile_info(i) 35 | ef = ( 36 | info["name"], 37 | info["filename"], 38 | info["length"], 39 | info["size"], 40 | ) 41 | ef_list.append(ef) 42 | name_len = max(len(ef[0]), name_len) 43 | filename_len = max(len(ef[1]), filename_len) 44 | total_len += ef[2] 45 | total_size += ef[3] 46 | 47 | if len(ef_list) < 1: 48 | print("no embedded files in", fn) 49 | exit(1) 50 | 51 | ratio = float(total_size) / total_len 52 | saves = 1 - ratio 53 | 54 | header = ( 55 | "Name".ljust(name_len + 4) 56 | + "Filename".ljust(filename_len + 4) 57 | + "Length".rjust(10) 58 | + "Size".rjust(11) 59 | ) 60 | line = "-".ljust(len(header), "-") 61 | print(line) 62 | print(header) 63 | print(line) 64 | for info in ef_list: 65 | print( 66 | info[0].ljust(name_len + 3), 67 | info[1].ljust(filename_len + 3), 68 | str(info[2]).rjust(10), 69 | str(info[3]).rjust(10), 70 | ) 71 | print(line) 72 | print(len(ef_list), "embedded files in '%s'. Totals:" % (fn,)) 73 | print( 74 | "File lengths: %s, compressed: %s, ratio: %s%% (savings: %s%%)." 75 | % (total_len, total_size, str(round(ratio * 100, 2)), str(round(saves * 100, 2))) 76 | ) 77 | print(line) 78 | -------------------------------------------------------------------------------- /examples/make-calendar/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/make-calendar/output.pdf -------------------------------------------------------------------------------- /examples/optimize-document/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/optimize-document/input.pdf -------------------------------------------------------------------------------- /examples/optimize-document/optimize.py: -------------------------------------------------------------------------------- 1 | """ 2 | Optimize a PDF document with FileOptimizer. 3 | ------------------------------------------------------------------------------- 4 | License: GNU GPL V3 5 | (c) 2022 Jorj X. McKie 6 | 7 | Usage 8 | ----- 9 | python optimize.py input.pdf 10 | 11 | Notes 12 | ----- 13 | Since "/Producer" and "/Creator" get affected by this, the document metadata is 14 | first saved to be restored after the optimization is completed. This means 15 | non-compressed object definitions are also accepted as created by FileOptimizer. 16 | 17 | Dependencies 18 | ------------ 19 | FileOptimizer 20 | """ 21 | 22 | from __future__ import print_function 23 | import fitz 24 | import sys, os, subprocess, tempfile, time 25 | 26 | assert len(sys.argv) == 2, "need filename parameter" 27 | fn = sys.argv[1] 28 | assert fn.lower().endswith(".pdf"), "must be a PDF file" 29 | 30 | fullname = os.path.abspath(fn) # get the full path & name 31 | t0 = time.perf_counter() # save current time 32 | doc = fitz.open(fullname) # open PDF to save metadata 33 | meta = doc.metadata 34 | doc.close() 35 | 36 | t1 = time.perf_counter() # save current time again 37 | subprocess.call(["fileoptimizer64", fullname]) # now invoke super optimizer 38 | t2 = time.perf_counter() # save current time again 39 | 40 | cdir = os.path.split(fullname)[0] # split dir from filename 41 | fnout = tempfile.mkstemp(suffix=".pdf", dir=cdir) # create temp pdf name 42 | doc = fitz.open(fullname) # open now optimized PDF 43 | doc.set_metadata(meta) # restore old metadata 44 | doc.save(fnout[1], garbage=4) # save temp PDF with it, a little sub opt 45 | doc.close() # close it 46 | 47 | os.remove(fn) # remove super optimized file 48 | os.close(fnout[0]) # close temp file 49 | os.rename(fnout[1], fn) # and rename it to original filename 50 | t3 = time.perf_counter() # save current time again 51 | 52 | # put out runtime statistics 53 | print("Timings:") 54 | print(str(round(t1 - t0, 4)).rjust(10), "save old metata") 55 | print(str(round(t2 - t1, 4)).rjust(10), "execute FileOptimizer") 56 | print(str(round(t3 - t2, 4)).rjust(10), "restore old metadata") 57 | -------------------------------------------------------------------------------- /examples/posterize-document/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/posterize-document/input.pdf -------------------------------------------------------------------------------- /examples/posterize-document/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/posterize-document/output.pdf -------------------------------------------------------------------------------- /examples/posterize-document/posterize.py: -------------------------------------------------------------------------------- 1 | """ 2 | Create a PDF copy with split-up pages (posterize) 3 | -------------------------------------------------------------------------------- 4 | License: GNU AGPL V3.0+ 5 | (c) 2018 Jorj X. McKie 6 | 7 | Usage 8 | ------ 9 | python posterize.py input.pdf 10 | 11 | Description 12 | ----------- 13 | The output.pdf file contains 4 pages for every input page. The top-left, 14 | top-right, bottom-left, bottom-right parts of the page are now separate pages. 15 | The page dimensions are 1/4 page of the input file. 16 | 17 | Dependencies 18 | ------------ 19 | PyMuPDF 20 | """ 21 | 22 | from __future__ import print_function 23 | import fitz, sys 24 | 25 | src = fitz.open(sys.argv[1]) 26 | doc = fitz.open() 27 | 28 | for spage in src: 29 | xref = 0 30 | r = spage.rect 31 | d = fitz.Rect(spage.cropbox_position, spage.cropbox_position) 32 | 33 | r1 = r * 0.5 # top left 34 | r2 = r1 + (r1.width, 0, r1.width, 0) # top right 35 | r3 = r1 + (0, r1.height, 0, r1.height) # bottom left 36 | r4 = fitz.Rect(r1.br, r.br) # bottom right 37 | rect_list = [r1, r2, r3, r4] 38 | 39 | for rx in rect_list: 40 | rx += d 41 | page = doc.new_page(-1, width=rx.width, height=rx.height) 42 | xref = page.show_pdf_page( 43 | page.rect, 44 | src, 45 | spage.number, 46 | clip=rx, 47 | reuse_xref=xref, 48 | ) 49 | 50 | doc.save("output.pdf", garbage=4, deflate=True) 51 | -------------------------------------------------------------------------------- /examples/print-hsv/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/print-hsv/output.pdf -------------------------------------------------------------------------------- /examples/print-rgb/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/print-rgb/output.pdf -------------------------------------------------------------------------------- /examples/replace-image/input.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/replace-image/input.jpg -------------------------------------------------------------------------------- /examples/replace-image/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/replace-image/input.pdf -------------------------------------------------------------------------------- /examples/replace-image/output_remove.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/replace-image/output_remove.pdf -------------------------------------------------------------------------------- /examples/replace-image/output_replace.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/replace-image/output_replace.pdf -------------------------------------------------------------------------------- /examples/replace-image/remove.py: -------------------------------------------------------------------------------- 1 | """ 2 | Remove an image identified by xref 3 | -------------------------------------------------------------------------------- 4 | License: GNU GPL V3 5 | (c) 2022 Jorj X. McKie 6 | 7 | Usage 8 | ----- 9 | python remove.py 10 | 11 | Description 12 | ----------- 13 | This script does a pseudo-removal actually by replacing the image with a small 14 | fully transparent pixmap. 15 | """ 16 | 17 | import fitz 18 | from replace import img_replace 19 | 20 | if tuple(map(int, fitz.VersionBind.split("."))) < (1, 19, 5): 21 | raise ValueError("Need v1.19.5+") 22 | 23 | doc = fitz.open("input.pdf") 24 | 25 | page = doc[0] 26 | 27 | images = page.get_images() # we only are interested in first image here 28 | item = images[0] 29 | old_xref = item[0] 30 | 31 | pix = fitz.Pixmap(fitz.csGRAY, (0, 0, 1, 1), 1) 32 | pix.clear_with() 33 | img_replace(page, old_xref, pixmap=pix) 34 | 35 | doc.ez_save("output_remove.pdf", garbage=4) 36 | -------------------------------------------------------------------------------- /examples/replace-image/replace.py: -------------------------------------------------------------------------------- 1 | """ 2 | Replace an image identified by xref 3 | -------------------------------------------------------------------------------- 4 | License: GNU GPL V3 5 | (c) 2022 Jorj X. McKie 6 | 7 | Usage 8 | ----- 9 | python replace.py 10 | """ 11 | 12 | import fitz 13 | 14 | if tuple(map(int, fitz.VersionBind.split("."))) < (1, 19, 5): 15 | raise ValueError("Need v1.19.5+") 16 | 17 | 18 | def img_replace(page, xref, filename=None, stream=None, pixmap=None): 19 | """Replace image identified by xref. 20 | 21 | Args: 22 | page: a fitz.Page object 23 | xref: cross reference number of image to replace 24 | filename, stream, pixmap: must be given as for 25 | page.insert_image(). 26 | 27 | """ 28 | if bool(filename) + bool(stream) + bool(pixmap) != 1: 29 | raise ValueError("Exactly one of filename/stream/pixmap must be given") 30 | doc = page.parent # the owning document 31 | # insert new image anywhere in page 32 | new_xref = page.insert_image( 33 | page.rect, filename=filename, stream=stream, pixmap=pixmap 34 | ) 35 | doc.xref_copy(new_xref, xref) # copy over new to old 36 | last_contents_xref = page.get_contents()[-1] 37 | # new image insertion has created a new /Contents source, 38 | # which we will set to spaces now 39 | doc.update_stream(last_contents_xref, b" ") 40 | 41 | 42 | if __name__ == "__main__": 43 | doc = fitz.open("input.pdf") 44 | img_file = "input.jpg" 45 | page = doc[0] 46 | images = page.get_images() # we only are interested in first image here 47 | item = images[0] 48 | old_xref = item[0] 49 | img_replace(page, old_xref, filename=img_file) 50 | doc.ez_save("output_replace.pdf", garbage=4, pretty=True) 51 | -------------------------------------------------------------------------------- /examples/split-document/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/input.pdf -------------------------------------------------------------------------------- /examples/split-document/output/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/.gitkeep -------------------------------------------------------------------------------- /examples/split-document/output/input-0.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-0.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-1.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-10.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-10.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-100.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-100.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-101.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-101.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-102.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-102.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-103.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-103.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-104.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-104.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-105.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-105.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-106.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-106.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-107.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-107.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-108.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-108.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-109.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-109.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-11.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-11.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-110.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-110.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-111.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-111.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-112.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-112.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-113.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-113.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-114.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-114.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-115.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-115.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-116.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-116.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-117.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-117.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-118.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-118.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-119.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-119.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-12.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-12.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-120.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-120.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-121.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-121.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-122.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-122.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-123.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-123.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-124.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-124.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-125.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-125.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-126.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-126.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-127.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-127.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-128.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-128.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-129.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-129.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-13.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-13.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-130.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-130.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-131.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-131.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-132.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-132.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-133.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-133.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-134.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-134.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-135.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-135.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-136.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-136.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-137.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-137.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-138.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-138.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-139.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-139.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-14.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-14.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-140.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-140.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-141.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-141.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-142.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-142.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-143.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-143.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-144.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-144.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-145.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-145.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-146.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-146.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-147.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-147.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-148.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-148.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-149.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-149.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-15.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-15.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-150.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-150.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-151.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-151.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-152.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-152.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-153.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-153.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-154.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-154.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-155.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-155.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-156.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-156.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-157.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-157.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-158.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-158.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-159.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-159.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-16.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-16.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-160.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-160.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-161.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-161.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-162.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-162.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-163.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-163.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-164.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-164.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-165.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-165.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-166.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-166.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-167.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-167.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-168.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-168.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-169.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-169.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-17.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-17.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-170.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-170.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-171.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-171.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-172.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-172.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-173.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-173.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-174.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-174.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-175.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-175.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-18.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-18.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-19.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-19.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-2.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-20.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-20.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-21.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-21.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-22.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-22.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-23.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-23.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-24.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-24.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-25.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-25.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-26.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-26.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-27.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-27.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-28.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-28.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-29.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-29.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-3.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-30.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-30.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-31.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-31.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-32.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-32.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-33.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-33.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-34.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-34.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-35.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-35.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-36.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-36.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-37.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-37.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-38.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-38.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-39.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-39.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-4.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-40.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-40.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-41.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-41.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-42.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-42.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-43.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-43.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-44.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-44.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-45.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-45.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-46.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-46.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-47.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-47.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-48.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-48.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-49.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-49.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-5.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-50.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-50.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-51.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-51.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-52.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-52.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-53.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-53.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-54.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-54.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-55.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-55.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-56.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-56.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-57.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-57.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-58.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-58.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-59.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-59.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-6.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-6.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-60.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-60.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-61.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-61.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-62.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-62.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-63.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-63.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-64.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-64.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-65.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-65.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-66.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-66.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-67.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-67.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-68.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-68.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-69.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-69.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-7.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-7.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-70.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-70.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-71.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-71.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-72.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-72.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-73.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-73.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-74.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-74.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-75.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-75.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-76.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-76.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-77.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-77.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-78.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-78.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-79.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-79.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-8.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-8.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-80.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-80.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-81.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-81.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-82.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-82.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-83.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-83.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-84.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-84.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-85.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-85.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-86.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-86.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-87.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-87.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-88.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-88.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-89.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-89.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-9.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-9.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-90.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-90.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-91.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-91.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-92.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-92.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-93.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-93.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-94.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-94.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-95.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-95.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-96.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-96.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-97.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-97.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-98.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-98.pdf -------------------------------------------------------------------------------- /examples/split-document/output/input-99.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/split-document/output/input-99.pdf -------------------------------------------------------------------------------- /examples/split-document/split.py: -------------------------------------------------------------------------------- 1 | """ 2 | Split a PDF document into multiple pages (1 per page) 3 | -------------------------------------------------------------------------------- 4 | License: GNU GPL V3 5 | (c) 2022 Jorj X. McKie 6 | 7 | Usage 8 | ----- 9 | python split.py input.pdf 10 | """ 11 | 12 | import sys 13 | import fitz 14 | 15 | fn = sys.argv[1] 16 | fn1 = fn[:-4] 17 | src = fitz.open(fn) 18 | for i in range(len(src)): 19 | doc = fitz.open() 20 | doc.insert_pdf(src, from_page=i, to_page=i) 21 | doc.save("./output/%s-%i.pdf" % (fn1, i)) 22 | doc.close() 23 | -------------------------------------------------------------------------------- /examples/test-blendmode/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/test-blendmode/output.pdf -------------------------------------------------------------------------------- /examples/tile-image/input.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/input.jpg -------------------------------------------------------------------------------- /examples/tile-image/output/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/output/.gitkeep -------------------------------------------------------------------------------- /examples/tile-image/output/target-00.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/output/target-00.png -------------------------------------------------------------------------------- /examples/tile-image/output/target-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/output/target-01.png -------------------------------------------------------------------------------- /examples/tile-image/output/target-02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/output/target-02.png -------------------------------------------------------------------------------- /examples/tile-image/output/target-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/output/target-10.png -------------------------------------------------------------------------------- /examples/tile-image/output/target-11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/output/target-11.png -------------------------------------------------------------------------------- /examples/tile-image/output/target-12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/output/target-12.png -------------------------------------------------------------------------------- /examples/tile-image/output/target-20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/output/target-20.png -------------------------------------------------------------------------------- /examples/tile-image/output/target-21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/output/target-21.png -------------------------------------------------------------------------------- /examples/tile-image/output/target-22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/output/target-22.png -------------------------------------------------------------------------------- /examples/tile-image/output/target-30.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/output/target-30.png -------------------------------------------------------------------------------- /examples/tile-image/output/target-31.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/output/target-31.png -------------------------------------------------------------------------------- /examples/tile-image/output/target-32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/tile-image/output/target-32.png -------------------------------------------------------------------------------- /examples/tile-image/tile.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tile an image into 3 x 4 tiles 3 | -------------------------------------------------------------------------------- 4 | License: GNU GPL V3 5 | (c) 2022 Jorj X. McKie 6 | 7 | Usage 8 | ----- 9 | python tile.py input.jpg 10 | 11 | Description 12 | ----------- 13 | This script demonstrates some of MuPDF's non-PDF graphic capabilities. 14 | """ 15 | 16 | import sys 17 | import fitz 18 | 19 | print(fitz.__doc__) 20 | assert len(sys.argv) == 2, "Usage: %s " % sys.argv[0] 21 | 22 | pix0 = fitz.Pixmap(sys.argv[1]) 23 | tar_cs = pix0.colorspace 24 | tar_width = pix0.width * 3 25 | tar_height = pix0.height * 4 26 | tar_irect = fitz.IRect(0, 0, tar_width, tar_height) 27 | tar_pix = fitz.Pixmap(tar_cs, tar_irect, pix0.alpha) 28 | tar_pix.clear_with(90) 29 | 30 | for i in list(range(4)): 31 | y = i * pix0.height 32 | for j in list(range(3)): 33 | x = j * pix0.width 34 | pix0.set_origin(x, y) 35 | tar_pix.copy(pix0, pix0.irect) 36 | fn = "./output/target-" + str(i) + str(j) + ".png" 37 | tar_pix.save(fn) 38 | -------------------------------------------------------------------------------- /examples/view-document/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/view-document/input.pdf -------------------------------------------------------------------------------- /examples/view-document/view.py: -------------------------------------------------------------------------------- 1 | """ 2 | Object-oriented (OOP) PDF viewer using wxPython 3 | -------------------------------------------------------------------------------- 4 | License: GNU GPL V3 5 | (c) 2022 Jorj X. McKie 6 | 7 | Usage 8 | ----- 9 | python view.py input.pdf 10 | 11 | Dependencies 12 | ------------ 13 | wxPython 14 | """ 15 | 16 | import sys 17 | import wx 18 | import wx.lib.sized_controls as sc 19 | from wx.lib.pdfviewer import pdfButtonPanel, pdfViewer 20 | 21 | 22 | class PDFViewer(sc.SizedFrame): 23 | def __init__(self, parent, **kwds): 24 | super(PDFViewer, self).__init__(parent, **kwds) 25 | 26 | paneCont = self.GetContentsPane() 27 | self.buttonpanel = pdfButtonPanel( 28 | paneCont, wx.NewIdRef(), wx.DefaultPosition, wx.DefaultSize, 0 29 | ) 30 | self.buttonpanel.SetSizerProps(expand=True) 31 | self.viewer = pdfViewer( 32 | paneCont, 33 | wx.NewIdRef(), 34 | wx.DefaultPosition, 35 | wx.DefaultSize, 36 | wx.HSCROLL | wx.VSCROLL | wx.SUNKEN_BORDER, 37 | ) 38 | 39 | self.viewer.SetSizerProps(expand=True, proportion=1) 40 | 41 | # introduce buttonpanel and viewer to each other 42 | self.buttonpanel.viewer = self.viewer 43 | self.viewer.buttonpanel = self.buttonpanel 44 | 45 | 46 | if __name__ == "__main__": 47 | import wx.lib.mixins.inspection as WIT 48 | 49 | app = WIT.InspectableApp(redirect=False) 50 | fname = sys.argv[1] 51 | pdfV = PDFViewer(None, size=(800, 600)) 52 | pdfV.viewer.LoadFile(fname) 53 | pdfV.Show() 54 | 55 | app.MainLoop() 56 | -------------------------------------------------------------------------------- /examples/zerofy-rotation/derotate.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import fitz 3 | 4 | 5 | def page_rotation_set0(page): 6 | """Nullify page rotation.""" 7 | 8 | rot = page.rotation # contains normalized rotation value 9 | if rot == 0: 10 | return page # nothing to do 11 | # need to derotate the page's content 12 | mb = page.mediabox # current mediabox 13 | 14 | if rot == 90: 15 | # before derotation, shift content horizontally 16 | mat0 = fitz.Matrix(1, 0, 0, 1, mb.y1 - mb.x1 - mb.x0 - mb.y0, 0) 17 | elif rot == 270: 18 | # before derotation, shift content vertically 19 | mat0 = fitz.Matrix(1, 0, 0, 1, 0, mb.x1 - mb.y1 - mb.y0 - mb.x0) 20 | else: 21 | mat0 = fitz.Matrix(1, 0, 0, 1, -2 * mb.x0, -2 * mb.y0) 22 | 23 | # prefix with derotation matrix 24 | mat = mat0 * page.derotation_matrix 25 | cmd = b"%g %g %g %g %g %g cm " % tuple(mat) 26 | xref = fitz.TOOLS._insert_contents(page, cmd, 0) 27 | 28 | # swap x- and y-coordinates 29 | if rot in (90, 270): 30 | x0, y0, x1, y1 = mb 31 | mb.x0 = y0 32 | mb.y0 = x0 33 | mb.x1 = y1 34 | mb.y1 = x1 35 | page.set_mediabox(mb) 36 | 37 | page.set_rotation(0) 38 | 39 | # refresh the page to apply these changes 40 | doc = page.parent 41 | pno = page.number 42 | page = doc[pno] 43 | page.clean_contents() 44 | return page 45 | 46 | 47 | if __name__ == "__main__": 48 | try: 49 | filename = sys.argv[1] 50 | except: 51 | sys.exit("Usage: python derotate.py input.pdf") 52 | doc = fitz.open(filename) 53 | for pno in range(len(doc)): 54 | page_rotation_set0(doc[pno]) 55 | doc.ez_save(filename.replace(".pdf", "-rot0.pdf"), clean=True) 56 | -------------------------------------------------------------------------------- /examples/zerofy-rotation/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/examples/zerofy-rotation/input.pdf -------------------------------------------------------------------------------- /examples/zerofy-rotation/zerofy-rotation.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a PyMuPDF utility script performing the following function: 3 | 4 | It copies the input pages to the output file giving all pages a rotation 5 | of zero - without changing page appearance. 6 | 7 | Usage: "python zerofy-rotation.py input.pdf" 8 | 9 | The resulting output file will be named "input-rot0.pdf". 10 | """ 11 | 12 | import sys 13 | import fitz 14 | 15 | try: 16 | src = fitz.open(sys.argv[1]) # source file 17 | except: 18 | print("Usage: 'python zerofy-rotation.py input.pdf'\n") 19 | raise 20 | doc = fitz.open() # new output file 21 | 22 | for src_page in src: # iterate over input pages 23 | src_rect = src_page.rect # source page rect 24 | w, h = src_rect.br # save its width, height 25 | src_rot = src_page.rotation # save source rotation 26 | src_page.set_rotation(0) # set rotation to 0 temporarily 27 | page = doc.new_page(width=w, height=h) # make output page 28 | page.show_pdf_page( # insert source page 29 | page.rect, 30 | src, 31 | src_page.number, 32 | rotate=-src_rot, # reversed original rotation 33 | ) 34 | 35 | src.close() 36 | doc.ez_save(src.name.replace(".pdf", "-rot0.pdf"), clean=True) 37 | -------------------------------------------------------------------------------- /fields/date-field.py: -------------------------------------------------------------------------------- 1 | """ 2 | Demo Script: How to insert a text field in DATE format. 3 | 4 | This script insert a DATE field on some PDF page using JavaScript for 5 | formatting and field validation. 6 | 7 | Note: 8 | ----- 9 | This is an example for how to employ JavaScript for field formatting and 10 | validation. Consult this reference for other field types and situations, 11 | like inter-field validation and more: 12 | http://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/Acro6JSGuide.pdf 13 | """ 14 | import fitz 15 | 16 | # JavaScripts for defining a "date" field format and handling user keystrokes. 17 | JSF = 'AFDate_FormatEx("mm/dd/yyyy");' # JS to define the format 18 | JSK = 'AFDate_KeystrokeEx("mm/dd/yyyy");' # JS to handle keystrokes 19 | 20 | doc = fitz.open() 21 | page = doc.new_page() 22 | w = fitz.Widget() # create a skeleton Widget object 23 | w.field_type = fitz.PDF_WIDGET_TYPE_TEXT # DATE fields are subtypes of TEXT 24 | w.rect = fitz.Rect(20, 20, 160, 80) # where the date field appears on page 25 | w.field_name = "Date" # give it a unique name 26 | w.field_value = "12/12/2022" # field value 27 | 28 | # insert JavaScripts in the widget 29 | w.script_format = JSF # defines the format 30 | w.script_stroke = JSK # handles keystrokes 31 | 32 | annot = page.add_widget(w) # insert the field in the page 33 | 34 | doc.save(__file__.replace(".py", ".pdf")) 35 | -------------------------------------------------------------------------------- /fields/interfield-calculation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Demo script: How to define inter-field interactions. 3 | 4 | Make a PDF with three pages. On each page, two fields are added and the 5 | result is stored in a third field on that page. 6 | 7 | Choosing three pages doing essentially the same thing shall demonstrate, 8 | that field names across the whole PDF must be uniquely named. 9 | 10 | Note: 11 | ----- 12 | This is an example for how to employ JavaScript for field formatting and 13 | validation. Consult this reference for other field types and situations, 14 | like inter-field validation and more: 15 | http://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/Acro6JSGuide.pdf 16 | 17 | Dependencies 18 | ------------ 19 | PyMuPDF version 1.22.0 or later 20 | """ 21 | import fitz 22 | 23 | if not tuple(map(int, fitz.VersionBind.split("."))) > (1, 21, 1): 24 | raise AssertionError("need PyMuPDF version > 1.21.1") 25 | 26 | r1 = fitz.Rect(100, 100, 300, 120) 27 | r2 = fitz.Rect(100, 130, 300, 150) 28 | r3 = fitz.Rect(100, 180, 300, 200) 29 | 30 | doc = fitz.open() # make a new, empty PDF 31 | for i in range(3): # make three pages in it 32 | # in essence we are causing the computation NUM1 + NUM2 = RESULT 33 | page = doc.new_page() # make the page 34 | 35 | w = fitz.Widget() 36 | w.field_name = f"NUM1{page.number}" # unique name in document 37 | w.rect = r1 38 | w.field_type = fitz.PDF_WIDGET_TYPE_TEXT 39 | w.field_value = f"{i*100+1}" 40 | w.field_flags = 2 41 | page.add_widget(w) 42 | 43 | w = fitz.Widget() 44 | w.field_name = f"NUM2{page.number}" # unique name in document 45 | w.rect = r2 46 | w.field_type = fitz.PDF_WIDGET_TYPE_TEXT 47 | w.field_value = "200" 48 | w.field_flags = 2 49 | page.add_widget(w) 50 | 51 | w = fitz.Widget() # the result field 52 | w.field_name = f"RESULT{page.number}" # unique name in document 53 | w.rect = r3 54 | w.field_type = fitz.PDF_WIDGET_TYPE_TEXT 55 | w.field_value = "Resultat?" 56 | w.script_calc = f'AFSimple_Calculate("SUM", new Array("NUM1{page.number}", "NUM2{page.number}"));' 57 | page.add_widget(w) 58 | 59 | doc.save(__file__.replace(".py", ".pdf")) 60 | -------------------------------------------------------------------------------- /fields/switch-text-on-off.py: -------------------------------------------------------------------------------- 1 | """ 2 | Demo script: How to show or hide fields based on checkbox content. 3 | 4 | Depending on whether some checkbox is being checked, show or hide 5 | a text widget. 6 | 7 | Note: 8 | ----- 9 | This is an example for how to employ JavaScript for field formatting and 10 | validation. Consult this reference for other field types and situations, 11 | like inter-field validation and more: 12 | http://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/Acro6JSGuide.pdf 13 | 14 | Dependencies 15 | ------------ 16 | PyMuPDF version 1.22.0 or later 17 | """ 18 | import fitz 19 | 20 | if not tuple(map(int, fitz.VersionBind.split("."))) > (1, 21, 1): 21 | raise AssertionError("need PyMuPDF version > 1.21.1") 22 | 23 | # This JavaScript will be executed if the checkbox value changes 24 | JSCRIPT = """if (this.getField("my-checkbox").value == "Yes") 25 | this.getField("my-text").display = display.visible; 26 | else 27 | this.getField("my-text").display = display.hidden;""" 28 | 29 | doc = fitz.open() 30 | page = doc.new_page() 31 | 32 | w = fitz.Widget() # define a field skeleton object for the text 33 | w.rect = fitz.Rect(100, 150, 300, 170) 34 | w.field_type = fitz.PDF_WIDGET_TYPE_TEXT 35 | w.field_name = "my-text" # use this to identify the field document-wide 36 | w.field_value = "Will be shown if checkbox is checked." 37 | w.script_calc = JSCRIPT # use this property for inter-field actions 38 | page.add_widget(w) 39 | 40 | w = fitz.Widget() # define field skeleton for the checkbox 41 | w.rect = fitz.Rect(100, 100, 120, 120) 42 | w.field_type = fitz.PDF_WIDGET_TYPE_CHECKBOX 43 | w.field_name = "my-checkbox" # use this to identify the field document-wide 44 | w.border_color = fitz.pdfcolor["red"] 45 | w.field_label = "click to show or hide text" # show this on mouse hovering 46 | w.field_value = True 47 | page.add_widget(w) 48 | 49 | doc.save(__file__.replace(".py", ".pdf")) 50 | -------------------------------------------------------------------------------- /font-replacement/multi-language.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/font-replacement/multi-language.jpg -------------------------------------------------------------------------------- /font-replacement/page-17-after.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/font-replacement/page-17-after.png -------------------------------------------------------------------------------- /font-replacement/page-17-before.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/font-replacement/page-17-before.png -------------------------------------------------------------------------------- /font-replacement/run-log.txt: -------------------------------------------------------------------------------- 1 | D:\Jorj\Documents\GitHub\PyMuPDF-Utilities\font-replacement>py repl-fontnames.py pymupdf.pdf 2 | 3 | D:\Jorj\Documents\GitHub\PyMuPDF-Utilities\font-replacement>py repl-font.py pymupdf.pdf 4 | Processing PDF 'pymupdf.pdf' with 318 pages. 5 | 6 | Phase 1: Create unicode subsets. 7 | End of phase 1, 0.49 seconds. 8 | 9 | Font replacement overview: 10 | NimbusMonL-Bold replaced by: Space Mono Bold. 11 | NimbusMonL-Regu replaced by: Space Mono Regular. 12 | NimbusMonL-ReguObli replaced by: Space Mono Italic. 13 | NimbusRomNo9L-Medi replaced by: FiraGO Regular. 14 | NimbusRomNo9L-MediItal replaced by: FiraGO Italic. 15 | NimbusRomNo9L-Regu replaced by: FiraGO Regular. 16 | NimbusRomNo9L-ReguItal replaced by: FiraGO Italic. 17 | NimbusSanL-Bold replaced by: FiraGO Bold. 18 | NimbusSanL-BoldItal replaced by: FiraGO Bold Italic. 19 | NimbusSanL-Regu replaced by: FiraGO Regular. 20 | NimbusSanL-ReguItal replaced by: FiraGO Italic. 21 | 22 | Building font subsets: 23 | Used 67 glyphs of font 'Space Mono Bold'. 70 KB saved. 24 | Used 114 glyphs of font 'Space Mono Regular'. 64 KB saved. 25 | Used 88 glyphs of font 'Space Mono Italic'. 74 KB saved. 26 | Used 100 glyphs of font 'FiraGO Regular'. 753 KB saved. 27 | Used 97 glyphs of font 'FiraGO Italic'. 760 KB saved. 28 | Used 71 glyphs of font 'FiraGO Bold'. 764 KB saved. 29 | Used 31 glyphs of font 'FiraGO Bold Italic'. 780 KB saved. 30 | Font subsets built, 2.86 seconds. 31 | 32 | Phase 2: rebuild document. 33 | End of phase 2, 12.82 seconds 34 | Total duration 16.17 seconds 35 | 36 | D:\Jorj\Documents\GitHub\PyMuPDF-Utilities\font-replacement> 37 | -------------------------------------------------------------------------------- /jupyter-notebooks/1page-snap.log: -------------------------------------------------------------------------------- 1 | %!MuPDF-Journal-100 2 | 3 | journal 4 | << 5 | /NumSections 1 6 | /FileSize 210721 7 | /Fingerprint <57c84501e4baddef56fd26959a808cfc> 8 | /HistoryPos 6 9 | >> 10 | entry 11 | (new page) 12 | 44 0 newobj 13 | 45 0 newobj 14 | 2 0 newobj 15 | entry 16 | (insert-0) 17 | 46 0 newobj 18 | 44 0 obj 19 | <<>> 20 | endobj 21 | 47 0 newobj 22 | 45 0 obj 23 | <> 24 | endobj 25 | entry 26 | (insert-1) 27 | 48 0 newobj 28 | 45 0 obj 29 | <> 30 | endobj 31 | entry 32 | (insert-2) 33 | 49 0 newobj 34 | 45 0 obj 35 | <> 36 | endobj 37 | entry 38 | (insert-3) 39 | 50 0 newobj 40 | 45 0 obj 41 | <> 42 | endobj 43 | entry 44 | (insert-4) 45 | 51 0 newobj 46 | 45 0 obj 47 | <> 48 | endobj 49 | endjournal 50 | -------------------------------------------------------------------------------- /jupyter-notebooks/1page-snap.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/jupyter-notebooks/1page-snap.pdf -------------------------------------------------------------------------------- /jupyter-notebooks/1page.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/jupyter-notebooks/1page.pdf -------------------------------------------------------------------------------- /jupyter-notebooks/README-OCR.md: -------------------------------------------------------------------------------- 1 | # How To Install and Enable Tesseract Dynamically 2 | 3 | In some interactive environments, like Google Colab, JupyterLite and other Pyodide-based environments, you have access to a Python environment, that has a set of pre-installed packages. These configurations may not suffice your requirements. 4 | 5 | While there are ways to dynamically pip-install packages via invoking pip as a shell command, or even installing software packages in the virtual machine hosting the interactive Python, additional considerations are required for PyMuPDF's OCR support of Tesseract-OCR: 6 | 7 | * On importing PyMuPDF, a check is made, whether `os.environ["TESSDATA_PREFIX"]` exists. If yes, its value is stored in `fitz.TESSDATA_PREFIX`, else that value is set to `None`. 8 | 9 | * If your notebook requires OCR support, do follow these steps: 10 | 11 | 1. `!apt install tesseract-ocr`. When done, confirm the value of Tesseract's language support folder. 12 | 13 | 2. `os.environ["TESSDATA_PREFIX"] = "/usr/share/tesseract-ocr/4.00/tessdata"` 14 | 15 | 3. `import fitz` 16 | 17 | * You should now be able to use PyMuPDF's OCR functions. -------------------------------------------------------------------------------- /jupyter-notebooks/README.md: -------------------------------------------------------------------------------- 1 | # PyMuPDF JUPYTER Notebooks 2 | 3 | These are scripts that explain basic usage of PyMuPDF using jupyter notebook features. Just click on one of the `.ipynb` files to see its fully rendered session! 4 | 5 | Over time this script collection will be extended. Your contribution is very welcome! 6 | 7 | ## Example Files 8 | * `1page.pdf` - 1-pager PDF used as a test file by several notebooks 9 | * `blacked.pdf` - 1-pager PDF with three words covered by black rectangles. Used by `detect-hidden.ipynb` which demonstrates how badly done "redactions" can be detected - **detects hidden text.** 10 | * `partial_ocr.pdf`- 1-pager PDF containing normal text and two images that overlap each other. 11 | 12 | ## Notebooks 13 | * `dehyphenate-flag.ipynb` - shows the effect of flag `TEXT_DEHYPHENATE` on text search and extraction. 14 | * `detect-hidden.ipynb` - shows how to **_detect text which is hidden_** by objects "drawn above" it. 15 | * `journalling1.ipynb` - introduction to PDF Journalling 16 | * `journalling2.ipynb` - chapter 2 of PDF Journalling 17 | * `journalling3.ipynb` - chapter 3 of PDF Journalling 18 | * `new-circle-annot.ipynb` - simple example for adding an annotation with desired properties 19 | * `ocr-illegible.ipynb` - OCR: how to dynamically make unrecognized characters readable 20 | * `partial-ocr.ipynb` - OCRs a page in full and in partial mode and explain the difference. Requires PyMuPDF v1.19.1. 21 | * `testpage-performance.ipynb` - compare performance of text extraction and search methods, with and without a separately prepared `TextPage` object. 22 | * `object-algebra.ipynb` - explains details on how points, rectangles quads can be added and multiplied as if they were ordinary numbers. This is an extension to the respective [chapter](https://pymupdf.readthedocs.io/en/latest/algebra.html) of the documentation. -------------------------------------------------------------------------------- /jupyter-notebooks/blacked.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/jupyter-notebooks/blacked.pdf -------------------------------------------------------------------------------- /jupyter-notebooks/input.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/jupyter-notebooks/input.pdf -------------------------------------------------------------------------------- /jupyter-notebooks/input.pdf-status.log: -------------------------------------------------------------------------------- 1 | %!MuPDF-Journal-100 2 | 3 | journal 4 | << 5 | /NumSections 0 6 | /FileSize 210721 7 | /Fingerprint <57c84501e4baddef56fd26959a808cfc> 8 | /HistoryPos 0 9 | >> 10 | endjournal 11 | -------------------------------------------------------------------------------- /jupyter-notebooks/partial-ocr.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/jupyter-notebooks/partial-ocr.pdf -------------------------------------------------------------------------------- /jupyter-notebooks/show_image.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility function for showing images. 3 | 4 | Intended to be imported in Jupyter notebooks to display pixmap images. 5 | 6 | Invocation: "show_image(item, title)", where item is a PyMuPDF object 7 | which has a "get_pixmap" method, and title is an optional string. 8 | 9 | The function executes "item.get_pixmap(dpi=150)" and show the resulting 10 | image. 11 | 12 | 13 | Dependencies 14 | ------------ 15 | numpy, matplotlib, pymupdf 16 | """ 17 | 18 | 19 | def show_image(item, title=""): 20 | """Display a pixmap. 21 | 22 | Just to display Pixmap image of "item" - ignore the man behind the curtain. 23 | 24 | Args: 25 | item: any PyMuPDF object having a "get_pixmap" method. 26 | title: a string to be used as image title 27 | 28 | Generates an RGB Pixmap from item using a constant DPI and using matplotlib 29 | to show it inline of the notebook. 30 | """ 31 | DPI = 150 # use this resolution 32 | import numpy as np 33 | import matplotlib.pyplot as plt 34 | 35 | # %matplotlib inline 36 | pix = item.get_pixmap(dpi=DPI) 37 | img = np.ndarray([pix.h, pix.w, 3], dtype=np.uint8, buffer=pix.samples_mv) 38 | plt.figure(dpi=DPI) # set the figure's DPI 39 | plt.title(title) # set title of image 40 | _ = plt.imshow(img, extent=(0, pix.w * 72 / DPI, pix.h * 72 / DPI, 0)) 41 | -------------------------------------------------------------------------------- /optional-content/source-ocmd.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/optional-content/source-ocmd.pdf -------------------------------------------------------------------------------- /optional-content/source-ocmd.py: -------------------------------------------------------------------------------- 1 | """ 2 | PyMuPDF Demo Program 3 | 4 | Show how to create a PDF page that display content depending on the state 5 | of a set of optional content groups. 6 | 7 | Here we create a PDF page with two objects of which exactly one is shown 8 | at any time. 9 | """ 10 | import fitz 11 | 12 | # source file with at least 2 pages 13 | src = fitz.open("source.pdf") 14 | 15 | # new PDF with one page 16 | doc = fitz.open() 17 | page = doc.new_page() 18 | 19 | # define 2 rectangles: upper and lower half page 20 | r0 = page.rect 21 | r0.y1 = r0.height / 2 22 | r1 = r0 + (0, r0.height, 0, r0.height) 23 | 24 | # make 1 OCG and 1 OCMD 25 | ocg0 = doc.addOCG("ocg0", on=True) # to be used for upper rect 26 | 27 | # the following is interpreted as "not ocg0" 28 | ocmd0 = doc.set_ocmd( # to be used for lower rect 29 | ocgs=[ocg0], 30 | policy="alloff", 31 | ) 32 | 33 | # alternatively, you can use visibility expressions: 34 | # ocmd0 = doc.set_ocmd(ve=["not", ocg0]) 35 | 36 | # insert the 2 source page images, each connected to one OCG 37 | page.show_pdf_page(r0, src, 0, oc=ocg0, rotate=90) 38 | page.show_pdf_page(r1, src, 1, oc=ocmd0, rotate=-90) 39 | 40 | doc.save( # save the file 41 | __file__.replace(".py", ".pdf"), 42 | garbage=3, 43 | pretty=True, 44 | deflate=True, 45 | clean=True, 46 | ) 47 | 48 | """ 49 | The new PDF can now be viewed by e.g. Adobe Acrobat reader. Setting 50 | "ocg0" ON of OFF will flip between showing page 0 and page 1. 51 | """ 52 | -------------------------------------------------------------------------------- /optional-content/source-radio.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/optional-content/source-radio.pdf -------------------------------------------------------------------------------- /optional-content/source-radio.py: -------------------------------------------------------------------------------- 1 | """ 2 | PyMuPDF Demo Program 3 | 4 | Show how to create a PDF page that display content depending on the state 5 | of a set of optional content groups. 6 | 7 | We display the first 4 pages of a source file on 4 quadrant of a new 8 | PDF page of size ISO A4. 9 | The 4 source images are displayed such that only is shown at a time. This is 10 | achieved via so-called "Radio-Button-Groups" of optional content groups. 11 | """ 12 | import fitz 13 | 14 | # source file with at least 4 pages 15 | src = fitz.open("source.pdf") 16 | 17 | # new PDF with one page 18 | doc = fitz.open() 19 | page = doc.new_page() 20 | 21 | # define the 4 rectangle quadrants to receive the source pages 22 | r0 = page.rect / 2 23 | r1 = r0 + (r0.width, 0, r0.width, 0) 24 | r2 = r0 + (0, r0.height, 0, r0.height) 25 | r3 = r2 + (r2.width, 0, r2.width, 0) 26 | 27 | # make 4 OCGs - one for each source page image. 28 | xref0 = doc.add_ocg("ocg0", on=True) 29 | xref1 = doc.add_ocg("ocg1", on=False) 30 | xref2 = doc.add_ocg("ocg2", on=False) 31 | xref3 = doc.add_ocg("ocg3", on=False) 32 | doc.set_ocStates( 33 | -1, # the default OC configuration 34 | rbgroups=[[xref0, xref1, xref2, xref3]], # one radio-button group 35 | ) 36 | 37 | # insert the 4 source page images, each connected to one OCG 38 | page.show_pdf_page(r0, src, 0, oc=xref0) 39 | page.show_pdf_page(r1, src, 1, oc=xref1) 40 | page.show_pdf_page(r2, src, 2, oc=xref2) 41 | page.show_pdf_page(r3, src, 3, oc=xref3) 42 | 43 | doc.save( # save the file 44 | __file__.replace(".py", ".pdf"), 45 | garbage=3, 46 | pretty=True, 47 | deflate=True, 48 | clean=True, 49 | ) 50 | 51 | # the new file can now be viewed by e.g. Adobe Acrobat reader and 52 | # viewing each page will switch off all other three. 53 | -------------------------------------------------------------------------------- /optional-content/source.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/optional-content/source.pdf -------------------------------------------------------------------------------- /pdf-names-resolution/README.md: -------------------------------------------------------------------------------- 1 | # Under Construction 2 | 3 | This contains two demo script currently, which examine the PDF catalog and 4 | resolve named destinations to pages. 5 | 6 | We intend to make this available as a fitz.Document method. 7 | 8 | Currently, invoke like this: 9 | 10 | ## Alternative 1: `find_names.py` 11 | This version works for both, the ca´lassic and the rebased architecture of PyMuPDF. 12 | However, the solution is not complete yet: there are cases, where names are not detected completely. 13 | ```python 14 | import fitz 15 | from find_names import resolve_names 16 | 17 | doc=fitz.open("pymupdf.pdf") 18 | resolved_name = resolve_names(doc) 19 | 20 | resolved_name["chapter.1"] 21 | {'page': 6, 'to': (72.0, 720.0), 'zoom': 0} 22 | ``` 23 | 24 | ## Alternative 2: `list_names.py` 25 | 26 | This version shuld cover all cases for encoding named destinations - in contrast to Alternative 1. 27 | It can only be used with the rebased version of PyMuPDF. Example: 28 | ```python 29 | In [1]: import fitz_new as fitz 30 | In [2]: from list_names import resolve_names 31 | In [3]: doc=fitz.open("pymupdf.pdf") 32 | In [4]: resolved_name=resolve_names(doc) 33 | In [7]: resolved_name["chapter.1"] 34 | Out[7]: {'page': 6, 'to': (72.0, 720.0), 'zoom': 0.0} 35 | ``` -------------------------------------------------------------------------------- /reporting/README.md: -------------------------------------------------------------------------------- 1 | # PyMuPDF Reporting 2 | 3 | This folder provides examples for using PyMuPDF's reporting feature. 4 | 5 | Each example is contained in a sub-folder of `examples`, usually the script together with all necessary data to use it. 6 | 7 | The names of the sub-folders should give an idea what the respective report is trying to achieve. Each sub-folder will also contain its own README file that explains any specifics of the example. 8 | 9 | Please also do have a look at the [documentation](https://github.com/pymupdf/PyMuPDF-Utilities/blob/master/reporting/documentation-draft.md) draft to understand how to use this exciting new feature. -------------------------------------------------------------------------------- /reporting/examples/filmfestival-2tables/README.md: -------------------------------------------------------------------------------- 1 | # Example for PyMuPDF Reporting 2 | 3 | This script creates a report about a fictitious film festival. 4 | 5 | It extracts data from an SQL database (sqlite3). The database contains two tables: 6 | * **films** - columns: **title**, **director**, **year** 7 | * **actors** - columns: **name**, **film** 8 | 9 | Two tabular reports are created in one common PDF. 10 | 1. Table 1 lists all films and names all actors being cast. 11 | 2. Table 2 lists all actors together with all the films where they have been cast. 12 | 13 | Noteworthy details: 14 | * Demonstrate how to use fonts from the [pymupdf-fonts](https://pypi.org/project/pymupdf-fonts/) package. 15 | * Demonstrate how to **combine multiple report sections** (here: two table sections) in one report. 16 | * **Automatic layout:** major layout changes **without coding effort**, like 17 | - page size (Letter, ISO A4) or paper format (portrait, landscape) 18 | - number of columns per page 19 | - page breaks between report sections 20 | * Appearance changes, like text colors or fonts just by modifying HTML and styling (CSS) definitions. -------------------------------------------------------------------------------- /reporting/examples/filmfestival-2tables/filmfestival.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/filmfestival-2tables/filmfestival.db -------------------------------------------------------------------------------- /reporting/examples/filmfestival-2tables/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/filmfestival-2tables/output.pdf -------------------------------------------------------------------------------- /reporting/examples/invoice/README.md: -------------------------------------------------------------------------------- 1 | # Example for PyMuPDF Reporting 2 | 3 | This script creates an invoice with a layout involving fairly complex HTML definitions. 4 | 5 | The single invoice items are contained in an SQL database (sqlite). 6 | 7 | Points of interest: 8 | 9 | * Company logo top-left on every page - defined as being part of the report header 10 | * The report header also includes a small constant table top-right 11 | * On page 1 only, there is a "prolog" section cotaining some introductory explanations. 12 | - The HTML skeleton contains 4 variables to be filled with external data 13 | * Mark last report row with an extra backgound color 14 | * The item access function also computes an overall invoice total and appends it as the last report row. 15 | -------------------------------------------------------------------------------- /reporting/examples/invoice/invoice-parms.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/invoice/invoice-parms.db -------------------------------------------------------------------------------- /reporting/examples/invoice/items.html: -------------------------------------------------------------------------------- 1 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 |
64 | -------------------------------------------------------------------------------- /reporting/examples/invoice/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/invoice/logo.png -------------------------------------------------------------------------------- /reporting/examples/invoice/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/invoice/output.pdf -------------------------------------------------------------------------------- /reporting/examples/multi-format/README.md: -------------------------------------------------------------------------------- 1 | # Example for PyMuPDF Reporting 2 | 3 | This script creates a list of capital cities of the world. 4 | 5 | It extracts data from a CSV file. 6 | 7 | Notes of interest: 8 | 9 | * Table printed in 2 columns per page 10 | * Alternating row background colors 11 | * Using user fonts from the pymupdf-fonts package -------------------------------------------------------------------------------- /reporting/examples/multi-format/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/multi-format/output.pdf -------------------------------------------------------------------------------- /reporting/examples/row-with-images/README.md: -------------------------------------------------------------------------------- 1 | # Example for PyMuPDF Reporting 2 | 3 | This script creates a table from items in a CSV file. 4 | 5 | Notes of interest: 6 | 7 | * Table rows contain images that are stored in a ZIP file. The report generator "understands" field text that is prefixed with the string "|img|" and interprets it as a file name. 8 | * Three alternating row background colors. -------------------------------------------------------------------------------- /reporting/examples/row-with-images/flags.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/row-with-images/flags.zip -------------------------------------------------------------------------------- /reporting/examples/row-with-images/items.csv: -------------------------------------------------------------------------------- 1 | country;member;flag;since 2 | Belgium;Founder;|img|Belgium.jpg;1949 3 | Denmark;Founder;|img|Denmark.jpg;1949 4 | France;Founder;|img|France.jpg;1949 5 | Iceland;Founder;|img|Iceland.jpg;1949 6 | Italy;Founder;|img|Italy.jpg;1949 7 | Canada;Founder;|img|Canada.jpg;1949 8 | Luxembourg;Founder;|img|Luxembourg.jpg;1949 9 | Netherlands;Founder;|img|Netherlands.jpg;1949 10 | Norway;Founder;|img|Norway.jpg;1949 11 | Portugal;Founder;|img|Portugal.jpg;1949 12 | United Kingdom;Founder;|img|United_Kingdom.jpg;1949 13 | United States;Founder;|img|United_States.jpg;1949 14 | Greece;Joiner;|img|Greece.jpg;1952 15 | Turkey;Joiner;|img|Turkey.jpg;1952 16 | Germany;Joiner;|img|Germany.jpg;1955 17 | Spain;Joiner;|img|Spain.jpg;1982 18 | Poland;Joiner;|img|Poland.jpg;1999 19 | Czech Republic;Joiner;|img|Czech_Republic.jpg;1999 20 | Hungary;Joiner;|img|Hungary.jpg;1999 21 | Bulgaria;Joiner;|img|Bulgaria.jpg;2004 22 | Estonia;Joiner;|img|Estonia.jpg;2004 23 | Latvia;Joiner;|img|Latvia.jpg;2004 24 | Lithuania;Joiner;|img|Lithuania.jpg;2004 25 | Romania;Joiner;|img|Romania.jpg;2004 26 | Slovakia;Joiner;|img|Slovakia.jpg;2004 27 | Slovenia;Joiner;|img|Slovenia.jpg;2004 28 | Albania;Joiner;|img|Albania.jpg;2009 29 | Croatia;Joiner;|img|Croatia.jpg;2009 30 | Montenegro;Joiner;|img|Montenegro.jpg;2017 31 | North Macedonia;Joiner;|img|North_Macedonia.jpg;2020 32 | Finland;Joiner;|img|Finland.jpg;2023 33 | Sweden;Joiner;|img|Sweden.jpg;2023 -------------------------------------------------------------------------------- /reporting/examples/row-with-images/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/row-with-images/output.pdf -------------------------------------------------------------------------------- /reporting/examples/row-with-images/rows-with-images.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import fitz 3 | import zipfile 4 | from Reports import * 5 | 6 | # The following defines the overall report object 7 | mediabox = fitz.paper_rect("a4") # the only required parameter 8 | report = Report(mediabox, font_families={"sans-serif": "ubuntu", "serif": "ubuntu"}) 9 | 10 | # Predefined HTML to define the header for all pages 11 | 12 | HEADER = ( 13 | """

Report Example

""" 14 | ) 15 | header = Block(html=HEADER, report=report) 16 | 17 | FOOTER = """
Report Footer
""" 18 | footer = Block(html=FOOTER, report=report) 19 | footer.make_story() 20 | 21 | HTML = """ 22 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 |
47 | 48 | """ 49 | 50 | national_flags = zipfile.ZipFile("flags.zip") 51 | 52 | 53 | def fetch_rows(): 54 | table_data = pathlib.Path("items.csv").read_bytes().decode() 55 | data = [l.split(";") for l in table_data.splitlines()] 56 | return data 57 | 58 | 59 | items = Table( 60 | report=report, 61 | html=HTML, 62 | top_row="header", 63 | fetch_rows=fetch_rows, 64 | archive=national_flags, 65 | alternating_bg=("#ccc", "#ddd", "#eee"), 66 | ) 67 | 68 | report.sections = [ 69 | [items, Options(cols=1, format="letter", newpage=True)], 70 | ] # set sections list 71 | report.header = [header] 72 | report.footer = [footer] 73 | 74 | # This generates the report and saves it to the given path name. 75 | report.run("output.pdf") 76 | -------------------------------------------------------------------------------- /reporting/examples/simple-article/README.md: -------------------------------------------------------------------------------- 1 | # Example for PyMuPDF Reporting 2 | 3 | This script creates a simple text article in 10 lines of code. 4 | 5 | The article text is provided as HTML. 6 | 7 | Notes of interest: 8 | 9 | * Text is printed in 2 columns per page. 10 | * An image is automatically embedded in the layout. 11 | * Soft hyphen characters `­` are recognized and used for generating line breaks where appropriate. -------------------------------------------------------------------------------- /reporting/examples/simple-article/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/simple-article/output.pdf -------------------------------------------------------------------------------- /reporting/examples/simple-article/simple-article.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import fitz 3 | from Reports import * 4 | 5 | report = Report(mediabox=fitz.paper_rect("a4-l")) 6 | 7 | HTML = pathlib.Path("springer.html").read_bytes().decode() 8 | textblock = Block(html=HTML, report=report) 9 | 10 | report.sections = [[textblock, Options(cols=2, format=report.mediabox, newpage=True)]] 11 | report.run("output.pdf") 12 | -------------------------------------------------------------------------------- /reporting/examples/simple-article/springer.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/simple-article/springer.jpg -------------------------------------------------------------------------------- /reporting/examples/user-fonts/DejaVuSansCondensed-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/user-fonts/DejaVuSansCondensed-Bold.ttf -------------------------------------------------------------------------------- /reporting/examples/user-fonts/DejaVuSansCondensed.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/user-fonts/DejaVuSansCondensed.ttf -------------------------------------------------------------------------------- /reporting/examples/user-fonts/filmfestival.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/user-fonts/filmfestival.db -------------------------------------------------------------------------------- /reporting/examples/user-fonts/kenpixel.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/user-fonts/kenpixel.ttf -------------------------------------------------------------------------------- /reporting/examples/user-fonts/output-dejavu.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/user-fonts/output-dejavu.pdf -------------------------------------------------------------------------------- /reporting/examples/user-fonts/output-kenpixel.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/examples/user-fonts/output-kenpixel.pdf -------------------------------------------------------------------------------- /reporting/pymupdf-reporting.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/pymupdf-reporting.pdf -------------------------------------------------------------------------------- /reporting/pymupdf-reporting.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/reporting/pymupdf-reporting.pptx -------------------------------------------------------------------------------- /shapes/piechart1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/shapes/piechart1.pdf -------------------------------------------------------------------------------- /shapes/piechart1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/shapes/piechart1.png -------------------------------------------------------------------------------- /shapes/piechart2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/shapes/piechart2.pdf -------------------------------------------------------------------------------- /shapes/symbol-list.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/shapes/symbol-list.pdf -------------------------------------------------------------------------------- /shapes/symbol-list.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | Created on Sun Dec 9 08:34:06 2018 5 | 6 | @author: Jorj 7 | @license: GNU AFFERO GPL V3+ 8 | 9 | Create a list of available symbols defined in shapes_and_symbols.py 10 | 11 | This also demonstrates an example usage: how these symbols could be used 12 | as bullet-point symbols in some text. 13 | 14 | """ 15 | 16 | import fitz 17 | import shapes_and_symbols as sas 18 | 19 | print(fitz.__doc__) 20 | # list of available symbol functions and their descriptions 21 | tlist = [ 22 | (sas.arrow, "arrow (easy)"), 23 | (sas.caro, "caro (easy)"), 24 | (sas.clover, "clover (easy)"), 25 | (sas.diamond, "diamond (easy)"), 26 | (sas.dontenter, "do not enter (medium)"), 27 | (sas.frowney, "frowney (medium)"), 28 | (sas.hand, "hand (complex)"), 29 | (sas.heart, "heart (easy)"), 30 | (sas.pencil, "pencil (very complex)"), 31 | (sas.smiley, "smiley (easy)"), 32 | ] 33 | 34 | r = fitz.Rect(50, 50, 100, 100) # first rect to contain a symbol 35 | d = fitz.Rect(0, r.height + 10, 0, r.height + 10) # displacement to next ret 36 | p = (15, -r.height * 0.2) # starting point of explanation text 37 | rlist = [r] # rectangle list 38 | 39 | for i in range(1, len(tlist)): # fill in all the rectangles 40 | rlist.append(rlist[i - 1] + d) 41 | 42 | doc = fitz.open() # create empty PDF 43 | page = doc.new_page() # create an empty page 44 | img = page.new_shape() # start a Shape (canvas) 45 | 46 | for i, r in enumerate(rlist): 47 | tlist[i][0](img, rlist[i]) # execute symbol creation 48 | img.insert_text( 49 | rlist[i].br + p, tlist[i][1], fontsize=r.height / 1.2 # insert description text 50 | ) 51 | 52 | # store everything to the page's /Contents object 53 | img.commit() 54 | 55 | 56 | doc.save(__file__.replace(".py", ".pdf")) # save the PDF 57 | -------------------------------------------------------------------------------- /table-analysis/README.md: -------------------------------------------------------------------------------- 1 | ## Breaking News: PyMuPDF's Table Support Starting with Version 1.23.0! 2 | Starting with its version 1.23.0, PyMuPDF offers complete integrated support for identifying tables on document pages and extracting their content. 3 | 4 | Just use the new [Page](https://pymupdf.readthedocs.io/en/latest/page.html) method [`find_tables()`]((https://pymupdf.readthedocs.io/en/latest/page.html#Page.find_tables)) to obtain an object that contains all detected tables on the page in a list. 5 | 6 | You can iterate over these table objects to find details about their headers, table cells and their content. A growing number of example scripts shows how to do this and how to pass the extracted information downstream to pandas Dataframes and Excel, CSV or JSON files. 7 | 8 | The following examples have been collected since 2023-08-20: 9 | 10 | * `find_tables.ipynb` (Jupyter notebook) reads a 1-page PDF with Chinese text and two tables. 11 | * `join-tables.ipynb` (Jupyter notebook) reads a multi-page PDF and joins the parts of a table that has been fragmented across these pages. 12 | * `compare-xps-pdf.ipynb` (Jupyter notebook) confirms support of PyMuPDF's table feature for general document (comparison XPS vs. PDF). 13 | -------------------------------------------------------------------------------- /table-analysis/XPS-table.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/table-analysis/XPS-table.pdf -------------------------------------------------------------------------------- /table-analysis/XPS-table.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/table-analysis/XPS-table.xlsx -------------------------------------------------------------------------------- /table-analysis/XPS-table.xps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/table-analysis/XPS-table.xps -------------------------------------------------------------------------------- /table-analysis/chinese-table.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/table-analysis/chinese-table.pdf -------------------------------------------------------------------------------- /table-analysis/input1-bbox.json: -------------------------------------------------------------------------------- 1 | [ 2 | 0.0, 3 | 83.22763061523438, 4 | 612.0, 5 | 390.90350341796875 6 | ] -------------------------------------------------------------------------------- /table-analysis/input1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/table-analysis/input1.pdf -------------------------------------------------------------------------------- /table-analysis/input2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/table-analysis/input2.pdf -------------------------------------------------------------------------------- /table-analysis/national-capitals.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/table-analysis/national-capitals.pdf -------------------------------------------------------------------------------- /table-analysis/show_image.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility function for showing images. 3 | 4 | Intended to be imported in Jupyter notebooks to display pixmap images. 5 | 6 | Invocation: "show_image(item, title)", where item is a PyMuPDF object 7 | which has a "get_pixmap" method, and title is an optional string. 8 | 9 | The function executes "item.get_pixmap(dpi=150)" and show the resulting 10 | image. 11 | 12 | 13 | Dependencies 14 | ------------ 15 | numpy, matplotlib, pymupdf 16 | """ 17 | 18 | 19 | def show_image(item, title=""): 20 | """Display a pixmap. 21 | 22 | Just to display Pixmap image of "item" - ignore the man behind the curtain. 23 | 24 | Args: 25 | item: any PyMuPDF object having a "get_pixmap" method. 26 | title: a string to be used as image title 27 | 28 | Generates an RGB Pixmap from item using a constant DPI and using matplotlib 29 | to show it inline of the notebook. 30 | """ 31 | DPI = 150 # use this resolution 32 | import numpy as np 33 | import matplotlib.pyplot as plt 34 | 35 | # %matplotlib inline 36 | pix = item.get_pixmap(dpi=DPI) 37 | img = np.ndarray([pix.h, pix.w, 3], dtype=np.uint8, buffer=pix.samples_mv) 38 | plt.figure(dpi=DPI) # set the figure's DPI 39 | plt.title(title) # set title of image 40 | _ = plt.imshow(img, extent=(0, pix.w * 72 / DPI, pix.h * 72 / DPI, 0)) 41 | -------------------------------------------------------------------------------- /text-documents/test.pdf: -------------------------------------------------------------------------------- 1 | %PDF-1.7 2 | %µ¶ 3 | 4 | 1 0 obj 5 | <> 6 | endobj 7 | 8 | 2 0 obj 9 | <> 10 | endobj 11 | 12 | 3 0 obj 13 | <>>>/Parent 2 0 R/Contents 5 0 R>> 14 | endobj 15 | 16 | 4 0 obj 17 | <> 18 | endobj 19 | 20 | 5 0 obj 21 | <> 22 | stream 23 | q 24 | BT 25 | /helv 11 Tf 26 | 1 0 0 1 100 742 Tm 27 | [(Just some arbitrary content.)] TJ 28 | ET 29 | Q 30 | q 31 | Q 32 | 33 | endstream 34 | endobj 35 | 36 | xref 37 | 0 6 38 | 0000000000 65536 f 39 | 0000000016 00000 n 40 | 0000000062 00000 n 41 | 0000000114 00000 n 42 | 0000000238 00000 n 43 | 0000000327 00000 n 44 | 45 | trailer 46 | <<8F2C15D6C784DF5A97728DB5403FCAB8>]>> 47 | startxref 48 | 457 49 | %%EOF 50 | -------------------------------------------------------------------------------- /text-extraction/1page-text.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/1page-text.jpg -------------------------------------------------------------------------------- /text-extraction/1page.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/1page.pdf -------------------------------------------------------------------------------- /text-extraction/Dart-text.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/Dart-text.jpg -------------------------------------------------------------------------------- /text-extraction/Dart.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/Dart.pdf -------------------------------------------------------------------------------- /text-extraction/PDF2Text.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Created on Sun Jul 12 07:00:00 2015 4 | 5 | @author: Jorj McKie 6 | Copyright (c) 2015-2021 Jorj X. McKie 7 | 8 | The license of this program is governed by GNU AGPL 3.0. 9 | See the "COPYING" file of this repository. 10 | 11 | This is an example for using the Python binding PyMuPDF of MuPDF. 12 | 13 | This program extracts the text of any supported input document and writes it 14 | to a text file named input-filename + ".txt". 15 | 16 | Changes 17 | ------- 18 | 2021-06-21: add formfeed after each page of text. 19 | """ 20 | 21 | import fitz 22 | import sys 23 | 24 | 25 | def main(*args): 26 | if not args: 27 | filename = sys.argv[1] 28 | else: 29 | filename = args[0] 30 | ofile = filename + ".txt" 31 | doc = fitz.open(filename) 32 | fout = open(ofile, "wb") 33 | 34 | for page in doc: 35 | fout.write(page.get_text().encode("utf-8") + bytes((12,))) 36 | 37 | fout.close() 38 | 39 | 40 | if __name__ == "__main__": 41 | main() -------------------------------------------------------------------------------- /text-extraction/PDF2TextBlocks.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Thu Dec 14 17:00:00 2017 3 | 4 | @author: Jorj McKie 5 | Copyright (c) 2017-2021 Jorj X. McKie 6 | 7 | The license of this program is governed by GNU AGPL 3.0. 8 | See the "COPYING" file of this repository. 9 | 10 | This is an example for using the Python binding PyMuPDF for MuPDF. 11 | 12 | The program extracts the text of any supported input document and writes it 13 | to a text file. 14 | The input file name is provided as a parameter to this script (sys.argv[1]) 15 | The output file name is input-filename + ".txt". 16 | 17 | In an effort to ensure correct reading sequence, text blocks are sorted in 18 | ascending vertical, then horizontal direction. Sorting happens based on the 19 | coordinates of the blocks' top-left rectangle corner. 20 | This should work for text in horizontal, top-left to bottom-right writing mode. 21 | Please make adjustments to your case as appropriate. 22 | 23 | Changes 24 | ------- 25 | 2021-06-29: simplify block sorting and make script importable. 26 | """ 27 | 28 | import fitz 29 | import sys 30 | 31 | 32 | def main(*args): 33 | if not args: 34 | filename = sys.argv[1] 35 | else: 36 | filename = args[0] 37 | ofile = filename + ".txt" 38 | doc = fitz.open(filename) 39 | fout = open(ofile, "wb") 40 | 41 | for page in doc: 42 | blocks = page.get_text("blocks") 43 | blocks.sort(key=lambda b: (b[1], b[0])) 44 | for b in blocks: 45 | fout.write(b[4].encode("utf-8")) 46 | 47 | fout.close() 48 | 49 | 50 | if __name__ == "__main__": 51 | main() 52 | -------------------------------------------------------------------------------- /text-extraction/Petresume-text.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/Petresume-text.jpg -------------------------------------------------------------------------------- /text-extraction/Petresume.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/Petresume.pdf -------------------------------------------------------------------------------- /text-extraction/demo1-text.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/demo1-text.jpg -------------------------------------------------------------------------------- /text-extraction/demo1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/demo1.pdf -------------------------------------------------------------------------------- /text-extraction/extend-dicts.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/extend-dicts.pdf -------------------------------------------------------------------------------- /text-extraction/extend-dicts.py: -------------------------------------------------------------------------------- 1 | """ 2 | PyMuPDF demo script 3 | 4 | Show how to extend the standard "dict" and "rawdict" text extraction outputs 5 | with information from the Page method "get_texttrace()": 6 | * Sequenz number ("seqno") 7 | * Type (stroke, fill, hidden) 8 | * Opacity 9 | """ 10 | 11 | import fitz 12 | import time 13 | 14 | doc = fitz.open("extend-dicts.pdf") 15 | page = doc[0] 16 | char_dict = {} 17 | t0 = time.perf_counter() 18 | for span in page.get_texttrace(): 19 | seqno = span["seqno"] 20 | stype = span["type"] 21 | opacity = span["opacity"] 22 | for char in span["chars"]: 23 | origin = char[2] 24 | char_dict[origin] = (seqno, stype, opacity) 25 | 26 | t1 = time.perf_counter() 27 | print(f"Number of characters detected {len(char_dict.keys())}.") 28 | 29 | text_blocks = page.get_text("dict", flags=fitz.TEXTFLAGS_TEXT)["blocks"] 30 | t2 = time.perf_counter() 31 | for b in text_blocks: 32 | for l in b["lines"]: 33 | for s in l["spans"]: 34 | origin = s["origin"] 35 | val = char_dict.get(s["origin"]) 36 | if val is None: # a previous span has all this info 37 | s["seqno"] = seqno 38 | s["opacity"] = opacity 39 | s["type"] = stype 40 | continue 41 | seqno, stype, opacity = val 42 | s["seqno"] = seqno 43 | s["opacity"] = opacity 44 | s["type"] = stype 45 | 46 | t3 = time.perf_counter() 47 | print("Timings:") 48 | print(f"Make texttrace dictionary: {t1-t0}") 49 | print(f"Extend standard dictionary: {t3-t2}") 50 | -------------------------------------------------------------------------------- /text-extraction/garbled-text.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/garbled-text.jpg -------------------------------------------------------------------------------- /text-extraction/garbled.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/garbled.pdf -------------------------------------------------------------------------------- /text-extraction/invoice-simple.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/invoice-simple.pdf -------------------------------------------------------------------------------- /text-extraction/layout-demo1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/layout-demo1.pdf -------------------------------------------------------------------------------- /text-extraction/lookup-keywords.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility 3 | -------- 4 | This demo script show how to extract key-value pairs from a page with a 5 | "predictable" layout, as it can be found in invoices and other formalized 6 | documents. 7 | 8 | In such cases, a text extraction based on "words" leads to results that 9 | are both, simple and fast and avoid using regular expressions. 10 | 11 | The example analyzes an invoice and extracts the date, invoice number, and 12 | various amounts. 13 | 14 | Because of the sort, correct values for each keyword will be found if the 15 | value's boundary box bottom is not higher than that of the keyword. 16 | So it could just as well be on the next line. The only condition is, that 17 | no other text exists in between. 18 | 19 | Please note that the code works unchanged also for other supported document 20 | types, such as XPS or EPUB, etc. 21 | """ 22 | 23 | import fitz 24 | 25 | doc = fitz.open("invoice-simple.pdf") # example document 26 | page = doc[0] # first page 27 | words = page.get_text("words", sort=True) # extract sorted words 28 | 29 | for i, word in enumerate(words): 30 | # information items will be found prefixed with their "key" 31 | text = word[4] 32 | if text == "DATE:": # the following word will be the date! 33 | date = words[i + 1][4] 34 | print("Invoice date:", date) 35 | elif text == "Subtotal": 36 | subtotal = words[i + 1][4] 37 | print("Subtotal:", subtotal) 38 | elif text == "Tax": 39 | tax = words[i + 1][4] 40 | print("Tax:", tax) 41 | elif text == "INVOICE": 42 | inv_number = words[i + 2][4] # skip the "#" sign 43 | print("Invoice number:", inv_number) 44 | elif text == "BALANCE": 45 | balance = words[i + 2][4] # skip the word "DUE" 46 | print("Balance due:", balance) 47 | -------------------------------------------------------------------------------- /text-extraction/shadows.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/shadows.pdf -------------------------------------------------------------------------------- /text-extraction/textmaker.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/textmaker.pdf -------------------------------------------------------------------------------- /text-extraction/textmaker2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/text-extraction/textmaker2.pdf -------------------------------------------------------------------------------- /text-extraction/textmaker2.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generates a PDF page to demonstrate capabilities and limitations of various 3 | text extraction methods. 4 | Reads "textmaker.pdf" created previously, then extracts every single 5 | character and arbitrarily re-arranges the character list. 6 | The make a new PDF page and write each character of the shuffled list 7 | to the same position it had on the original page. 8 | The resulting PDF looks exactly like the original, but refuses to deliver 9 | meaningful results for all conventional text extraction methods. 10 | Also if you try to copy-paste with PDF viewers like Adobe Acrobat, 11 | Foxit Reader, PDF XChange, ... the result will be complete garbage. 12 | Evince on Linux is not as bad. I don't know how OSX tools would compare. 13 | 14 | The only possible solution to recover the text is layout preservation. 15 | """ 16 | import fitz 17 | import random 18 | 19 | font = fitz.Font("cjk") # use same font for output 20 | doc = fitz.open("textmaker.pdf") 21 | page = doc[0] 22 | w = page.rect.width 23 | h = page.rect.height 24 | chars = [] # save extracted characters here 25 | for b in page.get_text("rawdict")["blocks"]: 26 | for l in b["lines"]: 27 | for s in l["spans"]: 28 | for c in s["chars"]: 29 | chars.append(c) 30 | doc.close() 31 | doc = fitz.open() # make new PDF 32 | page = doc.new_page(width=w, height=h) # new page with the old dimensions 33 | random.shuffle(chars) # arbitrarily re-order characters 34 | tw = fitz.TextWriter(page.rect) 35 | # write the re-ordered characters to the page 36 | for c in chars: 37 | tw.append(c["origin"], c["c"], font=font) 38 | tw.write_text(page) 39 | doc.subset_fonts() 40 | doc.ez_save(__file__.replace(".py", ".pdf")) 41 | -------------------------------------------------------------------------------- /textbox-extraction/search.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/textbox-extraction/search.pdf -------------------------------------------------------------------------------- /textbox-extraction/search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/textbox-extraction/search.png -------------------------------------------------------------------------------- /textbox-extraction/textbox-extract-2.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script showing how to select only text that is contained in a given rectangle 3 | on a page. 4 | 5 | We use "page.get_textbox", which is available since PyMuPDF v1.18.0. 6 | The decision on what whill be included is made by character, so while much 7 | simpler to use than the other script in this folder, it will ignore word 8 | integrity and cut through any overlaps. 9 | 10 | There also is no logic that maintains natural reading order, so text will 11 | appear as stored in the document. 12 | 13 | """ 14 | import fitz 15 | 16 | doc = fitz.open("search.pdf") # any supported document type 17 | page = doc[0] # we want text from this page 18 | 19 | """ 20 | ------------------------------------------------------------------------------- 21 | Identify the rectangle. 22 | ------------------------------------------------------------------------------- 23 | """ 24 | rect = page.first_annot.rect # this annot has been prepared for us! 25 | # Now we have the rectangle --------------------------------------------------- 26 | 27 | print(page.get_textbox(rect)) 28 | -------------------------------------------------------------------------------- /textwriter/cff-test.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/textwriter/cff-test.pdf -------------------------------------------------------------------------------- /textwriter/demo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/textwriter/demo.pdf -------------------------------------------------------------------------------- /textwriter/demo.py: -------------------------------------------------------------------------------- 1 | import fitz, os 2 | 3 | thisdir = lambda f: os.path.join(os.path.dirname(__file__), f) 4 | thisfile = os.path.abspath(__file__) 5 | outfile = thisfile.replace(".py", ".pdf") 6 | 7 | font1 = fitz.Font("helv") 8 | font2 = fitz.Font("tiro") 9 | doc = fitz.open() 10 | page = doc.new_page() 11 | point = fitz.Point(50, 72) 12 | matrix = fitz.Matrix(-20) 13 | 14 | wrt1 = fitz.TextWriter(page.rect, color=(0, 0, 1)) 15 | wrt2 = fitz.TextWriter(page.rect, color=(1, 0, 0)) 16 | 17 | _, last = wrt1.append(point, "This text changes color,", font1, 11) 18 | _, last = wrt2.append(last, " font and fontsize", font2, 18) 19 | _, last = wrt1.append(last, " several", font1, 11) 20 | _, last = wrt2.append(last, " times!", font2, 24) 21 | 22 | # output both text writers on current page in arbitrary sequence 23 | wrt1.write_text(page, morph=(point, matrix)) # using the same morph parameter 24 | wrt2.write_text(page, morph=(point, matrix)) # also preserves the joint text. 25 | 26 | # make a new page 27 | page = doc.new_page() 28 | rect = wrt1.text_rect | wrt2.text_rect # join rect of blue and red text 29 | # make new rectangle from it, rotated by 90 degrees 30 | nrect = fitz.Rect( 31 | rect.tl, # same top-left, but width and height exchanged 32 | rect.x0 + rect.height, 33 | rect.y0 + rect.width, 34 | ) 35 | 36 | # use the page method for joint rotated output 37 | page.write_text(rect=nrect, writers=(wrt1, wrt2), rotate=90) 38 | 39 | # one more time with rotation by 270 degrees 40 | nrect += ( 41 | 2 * nrect.width, # identical copy somewhat shifted to the right 42 | 0, 43 | 2 * nrect.width, 44 | 0, 45 | ) 46 | page.write_text(rect=nrect, writers=(wrt1, wrt2), rotate=-90) 47 | 48 | # more outputs with 45 degrees 49 | page = doc.new_page() 50 | page.write_text( 51 | rect=page.rect, 52 | writers=(wrt1, wrt2), 53 | color=(0.2, 0.6, 1), 54 | rotate=-45, # or recoloring 55 | ) 56 | page.write_text( 57 | rect=page.rect, 58 | writers=(wrt1, wrt2), 59 | opacity=0.5, # can be used for watermarking 60 | rotate=45, 61 | ) 62 | doc.save( 63 | outfile, 64 | garbage=4, # makes sense here to combine identical binary data 65 | deflate=True, 66 | ) 67 | -------------------------------------------------------------------------------- /textwriter/new-annots-tw-0.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/textwriter/new-annots-tw-0.pdf -------------------------------------------------------------------------------- /textwriter/test-droid.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/textwriter/test-droid.pdf -------------------------------------------------------------------------------- /textwriter/test.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/textwriter/test.pdf -------------------------------------------------------------------------------- /textwriter/textwriter-textbox.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/textwriter/textwriter-textbox.pdf -------------------------------------------------------------------------------- /word&line-marking/mark-lines.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/word&line-marking/mark-lines.png -------------------------------------------------------------------------------- /word&line-marking/mark-lines.py: -------------------------------------------------------------------------------- 1 | """ 2 | A PyMuPDF demo script for highlighting lines of text. 3 | 4 | This requires 3 parameters: 5 | - start: point where marking should start - upper bound 6 | - stop: point where marking should stop - lower bound 7 | - clip: rectangle for further limiting width of lines. This can be used when 8 | page text is organized in columns: then we must prevent inclusion of 9 | text portions from the wrong columns. 10 | 11 | The parameters are optional in the following sense: 12 | If 'start' is None, the top-left point of 'clip' is used. 13 | If 'stop' is None, the bottom-right point of 'clip' is used. 14 | If 'clip' is None, the page rectangle is used 15 | 16 | Our example page has 3 text columns, and we luckily know that our text is 17 | located in the left column. We also know unique text strings which help us 18 | find the start and stop points. 19 | """ 20 | import fitz 21 | 22 | doc = fitz.open("search.pdf") # the document 23 | page = doc[0] # the page 24 | 25 | # determine start point 26 | rl = page.search_for("im vorfeld solch ") # use a unique string on the page 27 | # we might want to check that len(rl) == 1 here 28 | start = rl[0].tl # top-left point 29 | 30 | # determine stop point 31 | rl = page.search_for("stark aus.") # use a unique string 32 | # again, possibly check len(rl) == 1 33 | stop = rl[0].br # bottom-right point 34 | 35 | # we need a clip rectangle, because the page has 3 text columns! 36 | clip = page.rect # start with page rectangle 37 | width = clip.width # take the width and limit it 38 | clip.x1 = width * 0.35 # to about one third to get the left column 39 | 40 | page.add_highlight_annot(start=start, stop=stop, clip=clip) 41 | # ------------------------------------------------------------ 42 | # underlining and strike-through work in the same way: 43 | # ------------------------------------------------------------ 44 | # page.add_underline_annot(start=start, stop=stop, clip=clip) 45 | # page.add_strikeout_annot(start=start, stop=stop, clip=clip) 46 | # page.add_squiggly_annot(start=start, stop=stop, clip=clip) 47 | 48 | doc.save(__file__.replace(".py", ".pdf")) 49 | -------------------------------------------------------------------------------- /word&line-marking/mark-lines2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/word&line-marking/mark-lines2.jpg -------------------------------------------------------------------------------- /word&line-marking/mark-lines2.py: -------------------------------------------------------------------------------- 1 | import fitz 2 | 3 | """ 4 | This marks a longer, unique sentence on the page. 5 | The parameters 'start', 'stop' and 'clip' are fully computed from the 6 | returned hit rectangles. 7 | """ 8 | doc = fitz.open("search.pdf") 9 | page = doc[0] 10 | 11 | # Search for this text. It is show with hyphens on the page, which we can 12 | # simply delete for our search. Line breaks can be handled like spaces. 13 | text1 = ( 14 | "Erklären ließe sich die Veränderung, wenn Beteigeuze einen", 15 | "Materieauswurf ins All geschleudert hat, der einen Teil", 16 | "der Strahlung abfängt, meinen die Forscher der", 17 | "Europäischen Südsternwarte ESO.", 18 | ) 19 | 20 | rl = page.search_for( 21 | " ".join(text1), # reconstruct full sentence for searching 22 | ) 23 | 24 | # You should check success here! 25 | start = rl[0].tl # top-left of first rectangle 26 | stop = rl[-1].br # bottom-right of last rectangle 27 | clip = fitz.Rect() # build clip as union of the hit rectangles 28 | for r in rl: 29 | clip |= r 30 | 31 | page.add_highlight_annot( 32 | start=start, 33 | stop=stop, 34 | clip=clip, 35 | ) 36 | 37 | doc.save(__file__.replace(".py", ".pdf"), garbage=3, deflate=True) 38 | -------------------------------------------------------------------------------- /word&line-marking/mark-words.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/word&line-marking/mark-words.pdf -------------------------------------------------------------------------------- /word&line-marking/search.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pymupdf/PyMuPDF-Utilities/4d266de74be4c4d6dfb9925007b0d1a3818bf78a/word&line-marking/search.pdf --------------------------------------------------------------------------------