├── src └── pdfje │ ├── py.typed │ ├── vendor │ └── __init__.py │ ├── typeset │ ├── __init__.py │ ├── hyphens.py │ ├── layout.py │ ├── parse.py │ ├── state.py │ ├── firstfit.py │ └── knuth_plass.py │ ├── layout │ ├── __init__.py │ ├── rule.py │ ├── pages.py │ ├── common.py │ └── paragraph.py │ ├── fonts │ ├── __init__.py │ └── common.py │ ├── __init__.py │ ├── compat.py │ ├── units.py │ ├── resources.py │ ├── page.py │ ├── document.py │ └── atoms.py ├── tests ├── __init__.py ├── layout │ ├── __init__.py │ ├── test_rule.py │ ├── test_common.py │ └── test_paragraph.py ├── typeset │ ├── __init__.py │ ├── test_hyphens.py │ └── test_state.py ├── test_units.py ├── test_page.py ├── test_draw.py ├── test_atoms.py ├── conftest.py ├── test_fonts.py ├── test_common.py ├── common.py └── test_style.py ├── docs ├── changelog.rst ├── requirements.txt ├── index.rst ├── Makefile ├── examples.rst ├── conf.py └── api.rst ├── sample.png ├── resources ├── fonts │ ├── CrimsonText-Bold.ttf │ ├── CrimsonText-Italic.ttf │ ├── CrimsonText-Regular.ttf │ ├── DejaVuSansCondensed.ttf │ ├── CrimsonText-BoldItalic.ttf │ ├── DejaVuSansCondensed-Bold.ttf │ ├── DejaVuSansCondensed-Oblique.ttf │ ├── DejaVuSansCondensed-BoldOblique.ttf │ ├── CrimsonText-License.txt │ └── DejaVuLicense.txt ├── Core14_AFMs │ ├── MustRead.html │ ├── ZapfDingbats.afm │ └── Symbol.afm ├── optimal_vs_firstfit.py ├── scripts │ └── parse_afm.py └── sample.py ├── pytest.ini ├── dependabot.yml ├── .github ├── dependabot.yml └── workflows │ └── tests.yml ├── .readthedocs.yml ├── mypy.ini ├── Makefile ├── LICENSE ├── pyproject.toml ├── tox.ini ├── .gitignore ├── CHANGELOG.rst ├── examples ├── book.py └── multicolumn.py └── README.rst /src/pdfje/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/pdfje/vendor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/layout/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/typeset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/pdfje/typeset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/changelog.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../CHANGELOG.rst 2 | -------------------------------------------------------------------------------- /sample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ariebovenberg/pdfje/HEAD/sample.png -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx<8.3 2 | furo~=2025.12.19 3 | sphinx-toolbox~=4.1.0 4 | sphinx-autodoc-typehints~=3.2 5 | -------------------------------------------------------------------------------- /resources/fonts/CrimsonText-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ariebovenberg/pdfje/HEAD/resources/fonts/CrimsonText-Bold.ttf -------------------------------------------------------------------------------- /resources/fonts/CrimsonText-Italic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ariebovenberg/pdfje/HEAD/resources/fonts/CrimsonText-Italic.ttf -------------------------------------------------------------------------------- /resources/fonts/CrimsonText-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ariebovenberg/pdfje/HEAD/resources/fonts/CrimsonText-Regular.ttf -------------------------------------------------------------------------------- /resources/fonts/DejaVuSansCondensed.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ariebovenberg/pdfje/HEAD/resources/fonts/DejaVuSansCondensed.ttf -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = --benchmark-disable 3 | markers = 4 | slow: marks tests as slow (deselect with '-m "not slow"') 5 | -------------------------------------------------------------------------------- /resources/fonts/CrimsonText-BoldItalic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ariebovenberg/pdfje/HEAD/resources/fonts/CrimsonText-BoldItalic.ttf -------------------------------------------------------------------------------- /resources/fonts/DejaVuSansCondensed-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ariebovenberg/pdfje/HEAD/resources/fonts/DejaVuSansCondensed-Bold.ttf -------------------------------------------------------------------------------- /resources/fonts/DejaVuSansCondensed-Oblique.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ariebovenberg/pdfje/HEAD/resources/fonts/DejaVuSansCondensed-Oblique.ttf -------------------------------------------------------------------------------- /resources/fonts/DejaVuSansCondensed-BoldOblique.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ariebovenberg/pdfje/HEAD/resources/fonts/DejaVuSansCondensed-BoldOblique.ttf -------------------------------------------------------------------------------- /dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: pip 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | time: "04:00" 8 | open-pull-requests-limit: 10 9 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: pip 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | time: "04:00" 8 | open-pull-requests-limit: 10 9 | -------------------------------------------------------------------------------- /src/pdfje/layout/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from .common import Block 4 | from .paragraph import LinebreakParams, Paragraph 5 | from .rule import Rule 6 | 7 | __all__ = ["Block", "Paragraph", "Rule", "LinebreakParams"] 8 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../README.rst 2 | 3 | Contents 4 | ======== 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | 9 | tutorial.rst 10 | examples.rst 11 | api.rst 12 | changelog.rst 13 | 14 | Indices and tables 15 | ================== 16 | 17 | * :ref:`genindex` 18 | * :ref:`modindex` 19 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sphinx: 4 | builder: html 5 | configuration: docs/conf.py 6 | fail_on_warning: true 7 | 8 | build: 9 | os: ubuntu-22.04 10 | tools: 11 | python: "3.11" 12 | 13 | python: 14 | install: 15 | - requirements: docs/requirements.txt 16 | - method: pip 17 | path: . 18 | -------------------------------------------------------------------------------- /tests/test_units.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from pdfje.units import cm, inch, mm, pc, pt 4 | 5 | from .common import approx 6 | 7 | 8 | def test_units(): 9 | assert inch(1) == approx(72) 10 | assert cm(1) == approx(28.34645669291339) 11 | assert mm(1) == approx(2.8346456692913386) 12 | assert pc(1) == approx(12) 13 | assert pt(1) == 1 14 | -------------------------------------------------------------------------------- /src/pdfje/fonts/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from .builtins import courier, helvetica, symbol, times_roman, zapf_dingbats 4 | from .common import BuiltinTypeface, TrueType 5 | 6 | __all__ = [ 7 | "helvetica", 8 | "times_roman", 9 | "courier", 10 | "symbol", 11 | "zapf_dingbats", 12 | "BuiltinTypeface", 13 | "TrueType", 14 | ] 15 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | disallow_untyped_defs = True 3 | warn_redundant_casts = True 4 | warn_unused_ignores = True 5 | warn_unreachable = True 6 | enable_error_code = redundant-expr 7 | 8 | [mypy-tests.*] 9 | check_untyped_defs = True 10 | disallow_untyped_defs = False 11 | warn_unreachable = True 12 | 13 | [mypy-fontTools.*] 14 | ignore_missing_imports = True 15 | 16 | [mypy-hypothesis.*] 17 | ignore_missing_imports = True 18 | 19 | [mypy-pyphen.*] 20 | ignore_missing_imports = True 21 | 22 | [mypy-pdfje.vendor.*] 23 | ignore_errors = True 24 | -------------------------------------------------------------------------------- /src/pdfje/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from .common import RGB, XY, black, blue, cyan, lime, magenta, red, yellow 4 | from .document import Document 5 | from .layout.pages import AutoPage 6 | from .page import Column, Page 7 | 8 | __version__ = __import__("importlib.metadata").metadata.version(__name__) 9 | 10 | __all__ = [ 11 | # document & pages 12 | "Document", 13 | "Page", 14 | "Column", 15 | "AutoPage", 16 | # helpers 17 | "red", 18 | "lime", 19 | "blue", 20 | "black", 21 | "yellow", 22 | "magenta", 23 | "cyan", 24 | # common 25 | "RGB", 26 | "XY", 27 | ] 28 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = Quiz 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/examples.rst: -------------------------------------------------------------------------------- 1 | .. _examples: 2 | 3 | Examples 4 | ======== 5 | 6 | The code for the examples can be found in the ``examples/`` directory. 7 | 8 | 📚 A book 9 | ~~~~~~~~~ 10 | 11 | This example shows: 12 | 13 | - Creating single pages and autogenerated ones 14 | - Page numbering 15 | - Simple graphics 16 | - Custom font 17 | 18 | .. collapse:: Source code (click to expand) 19 | 20 | .. literalinclude :: ../examples/book.py 21 | 22 | .. _multi-column: 23 | 24 | 📰 Multiple columns 25 | ~~~~~~~~~~~~~~~~~~~ 26 | 27 | This example shows the flexibility of the layout engine. 28 | 29 | .. collapse:: Source code (click to expand) 30 | 31 | .. literalinclude :: ../examples/multicolumn.py 32 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - "**" 7 | push: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | build: 13 | runs-on: ubuntu-latest 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] 18 | 19 | steps: 20 | - uses: actions/checkout@v1 21 | - name: Set up Python ${{ matrix.python-version }} 22 | uses: actions/setup-python@v2 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | - name: Install dependencies 26 | run: | 27 | python -m pip install --upgrade pip 28 | pip install "tox<5" tox-gh-actions "poetry>=1.7,<1.8" 29 | - name: Test with tox 30 | run: tox 31 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: init 2 | init: 3 | poetry install 4 | pip install -r docs/requirements.txt 5 | 6 | .PHONY: clean 7 | clean: 8 | rm -rf .coverage .hypothesis .mypy_cache .pytest_cache .tox *.egg-info 9 | rm -rf dist 10 | find . | grep -E "(__pycache__|docs_.*$$|\.pyc|\.pyo$$)" | xargs rm -rf 11 | 12 | .PHONY: isort 13 | isort: 14 | isort . 15 | 16 | .PHONY: format 17 | format: 18 | black . 19 | 20 | .PHONY: fix 21 | fix: isort format 22 | 23 | .PHONY: lint 24 | lint: 25 | flake8 . 26 | 27 | .PHONY: mypy 28 | mypy: 29 | mypy --pretty --strict src examples/ 30 | mypy --pretty tests/ 31 | 32 | .PHONY: test 33 | test: 34 | pytest --cov=pdfje 35 | 36 | .PHONY: docs 37 | docs: 38 | @touch docs/api.rst 39 | make -C docs/ html 40 | 41 | .PHONY: publish 42 | publish: 43 | rm -rf dist/* 44 | poetry build 45 | twine upload dist/* 46 | -------------------------------------------------------------------------------- /tests/layout/test_rule.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import pytest 4 | 5 | from pdfje import XY 6 | from pdfje.layout import Rule 7 | from pdfje.layout.common import ColumnFill 8 | from pdfje.page import Column 9 | from pdfje.resources import Resources 10 | from pdfje.style import StyleFull 11 | 12 | STYLE = StyleFull.DEFAULT 13 | 14 | COLUMNS = [ 15 | col := ColumnFill(Column(XY(80, 40), 205, 210), (), 20), 16 | ColumnFill(Column(XY(350, 40), 195, 190), (), 110), 17 | ColumnFill(Column(XY(350, 40), 200, 200), (), 90), 18 | ] 19 | 20 | 21 | @pytest.mark.skip(reason="not yet implemented") 22 | def test_into_columns_skipped_because_of_break(): 23 | r = Rule(margin=(12, 0, 10, 0)) 24 | filled = list(r.into_columns(Resources(), STYLE, iter(COLUMNS))) 25 | assert len(filled) == 1 26 | assert filled[0] is COLUMNS[0] 27 | -------------------------------------------------------------------------------- /tests/typeset/test_hyphens.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import pytest 4 | 5 | from pdfje.typeset.hyphens import ( 6 | HAS_PYPHEN, 7 | default_hyphenator, 8 | never_hyphenate, 9 | parse_hyphenator, 10 | ) 11 | 12 | 13 | def test_default_hyphenation(): 14 | assert list(default_hyphenator("beautiful")) == ["beau", "ti", "ful"] 15 | 16 | 17 | class TestParseHyphenator: 18 | @pytest.mark.skipif(not HAS_PYPHEN, reason="pyphen not installed") 19 | def test_pyphen(self): 20 | from pyphen import Pyphen 21 | 22 | p = Pyphen(lang="nl_NL") 23 | h = parse_hyphenator(p) 24 | result = h("beautiful") 25 | assert hasattr(result, "__iter__") 26 | assert list(result) == ["beau", "ti", "ful"] 27 | 28 | def test_none(self): 29 | assert parse_hyphenator(None) is never_hyphenate 30 | -------------------------------------------------------------------------------- /src/pdfje/compat.py: -------------------------------------------------------------------------------- 1 | "Compatibility layer for various Python versions" 2 | from __future__ import annotations 3 | 4 | import sys 5 | from itertools import tee 6 | from typing import TYPE_CHECKING, Callable, Iterable, Iterator, TypeVar 7 | 8 | __all__ = ["pairwise", "cache"] 9 | 10 | 11 | if sys.version_info < (3, 10) or TYPE_CHECKING: # pragma: no cover 12 | T = TypeVar("T") 13 | 14 | def pairwise(i: Iterable[T]) -> Iterator[tuple[T, T]]: 15 | a, b = tee(i) 16 | next(b, None) 17 | return zip(a, b) 18 | 19 | else: 20 | from itertools import pairwise 21 | 22 | 23 | if sys.version_info < (3, 9) or TYPE_CHECKING: # pragma: no cover 24 | from functools import lru_cache 25 | 26 | _Tcall = TypeVar("_Tcall", bound=Callable[..., object]) 27 | 28 | def cache(func: _Tcall) -> _Tcall: 29 | return lru_cache(maxsize=None)(func) # type: ignore 30 | 31 | else: 32 | from functools import cache 33 | -------------------------------------------------------------------------------- /resources/Core14_AFMs/MustRead.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Core 14 AFM Files - ReadMe 7 | 8 | 9 | 10 | or 11 | 12 | 13 | 14 | 15 | 16 |
This file and the 14 PostScript(R) AFM files it accompanies may be used, copied, and distributed for any purpose and without charge, with or without modification, provided that all copyright notices are retained; that the AFM files are not distributed without this file; that all modifications to this file or any of the AFM files are prominently noted in the modified file(s); and that this paragraph is not modified. Adobe Systems has no responsibility or obligation to support the use of the AFM files. Col
17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /tests/test_page.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from pdfje import XY, Column, Page 4 | from pdfje.draw import Circle 5 | from pdfje.units import A5 6 | 7 | from .common import approx 8 | 9 | 10 | class TestPage: 11 | def test_default_column(self): 12 | p = Page(size=A5) 13 | assert len(p.columns) == 1 14 | assert p.columns[0].width < A5.x 15 | assert p.columns[0].height < A5.y 16 | 17 | def test_one_column_by_margins(self): 18 | [column] = Page(size=A5, margin=(20, 30)).columns 19 | assert column.origin.x == approx(30) 20 | assert column.origin.y == approx(20) 21 | assert column.width == approx(A5.x - 60) 22 | assert column.height == approx(A5.y - 40) 23 | 24 | def test_add(self): 25 | p = Page() 26 | p2 = p.add(Circle((0, 0), 10)) 27 | assert p == Page() 28 | assert p2 == Page((Circle((0, 0), 10),)) 29 | 30 | 31 | class TestColumn: 32 | def test_init(self): 33 | assert Column((1, 2), 3, 4) == Column(XY(1, 2), 3, 4) 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2022 - 2023 Arie Bovenberg 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/pdfje/units.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from functools import partial 4 | from operator import mul 5 | from typing import Callable 6 | 7 | from .common import XY, Pt 8 | 9 | __all__ = [ 10 | "inch", 11 | "cm", 12 | "mm", 13 | "pc", 14 | "pt", 15 | "Pt", 16 | ] 17 | 18 | inch: Callable[[float], Pt] = partial(mul, 72) 19 | inch.__doc__ = "Convert inches to points" 20 | cm: Callable[[float], Pt] = partial(mul, 28.346456692913385) 21 | cm.__doc__ = "Convert centimeters to points" 22 | mm: Callable[[float], Pt] = partial(mul, 2.8346456692913385) 23 | mm.__doc__ = "Convert millimeters to points" 24 | pc: Callable[[float], Pt] = partial(mul, 12) 25 | pc.__doc__ = "Convert picas to points" 26 | 27 | 28 | def pt(x: float) -> Pt: 29 | "No-op conversion. Can be used to make units explicit." 30 | return x 31 | 32 | 33 | A0 = XY(2380, 3368) 34 | "A0 paper size" 35 | A1 = XY(1684, 2380) 36 | "A1 paper size" 37 | A2 = XY(1190, 1684) 38 | "A2 paper size" 39 | A3 = XY(842, 1190) 40 | "A3 paper size" 41 | A4 = XY(595, 842) 42 | "A4 paper size" 43 | A5 = XY(420, 595) 44 | "A5 paper size" 45 | A6 = XY(297, 420) 46 | "A6 paper size" 47 | letter = XY(612, 792) 48 | "Letter paper size" 49 | legal = XY(612, 1008) 50 | "Legal paper size" 51 | tabloid = XY(792, 1224) 52 | "Tabloid paper size" 53 | ledger = tabloid.flip() 54 | "Ledger paper size, same as tabloid landscape" 55 | -------------------------------------------------------------------------------- /tests/test_draw.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from pdfje.common import XY 4 | from pdfje.draw import Polyline, Rect 5 | 6 | NA = NotImplemented 7 | 8 | 9 | class TestPolyline: 10 | def test_empty(self): 11 | assert b"".join(Polyline([]).render(NA, NA)) == b"" 12 | 13 | def test_one_point(self): 14 | assert ( 15 | b"".join(Polyline([(2, 3)]).render(NA, NA)) 16 | == b"2 3 m 0 0 0 RG S\n" 17 | ) 18 | 19 | def test_several_points(self): 20 | assert ( 21 | b"".join(Polyline([(2, 3), XY(4, 5), (6, 7)]).render(NA, NA)) 22 | == b"2 3 m 4 5 l 6 7 l 0 0 0 RG S\n" 23 | ) 24 | 25 | def test_several_points_closed(self): 26 | assert ( 27 | b"".join( 28 | Polyline([(2, 3), XY(4, 5), (6, 7)], close=True).render(NA, NA) 29 | ) 30 | == b"2 3 m 4 5 l 6 7 l 0 0 0 RG s\n" 31 | ) 32 | 33 | 34 | class TestRect: 35 | def test_init(self): 36 | assert Rect((2, 3), 4, 5).origin == XY(2, 3) 37 | assert Rect((2, 3), 4, 5).width == 4 38 | assert Rect((2, 3), 4, 5).height == 5 39 | 40 | def test_render(self): 41 | assert ( 42 | b"".join(Rect((2, 3), 4, 5).render(NA, NA)) 43 | == b"2 3 4 5 re 0 0 0 RG S\n" 44 | ) 45 | 46 | def test_invisible(self): 47 | assert ( 48 | b"".join(Rect((2, 3), 4, 5, stroke=None).render(NA, NA)) 49 | == b"2 3 4 5 re n\n" 50 | ) 51 | -------------------------------------------------------------------------------- /tests/test_atoms.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import pytest 4 | from hypothesis import given 5 | from hypothesis.strategies import binary 6 | 7 | from pdfje.atoms import HexString, _escape, sanitize_name 8 | 9 | 10 | @pytest.mark.parametrize( 11 | "string,expect", 12 | [ 13 | (b"", b""), 14 | ( 15 | b"!\"$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTU" 16 | b"VWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~", 17 | b"!\"$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTU" 18 | b"VWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~", 19 | ), 20 | (b"foo\r# a\x00b\n\tc", b"foo#23#20abc"), 21 | ], 22 | ) 23 | def test_sanitize_name(string, expect): 24 | assert sanitize_name(string) == expect 25 | 26 | 27 | @pytest.mark.parametrize( 28 | "string,expect", 29 | [ 30 | (b"", b""), 31 | (b"\x00\x02a9~!kbn[]'/?", b"\x00\x02a9~!kbn[]'/?"), 32 | (b"a\\b\\", b"a\\\\b\\\\"), 33 | (b"a\t\nb\f\b", b"a\\t\\nb\\f\\b"), 34 | ], 35 | ) 36 | def test_escape_string(string, expect): 37 | assert _escape(string) == expect 38 | 39 | 40 | @pytest.mark.slow 41 | @given(binary()) 42 | def test_escape_fuzzing(bytestr): 43 | assert len(_escape(bytestr)) >= len(bytestr) 44 | 45 | 46 | @pytest.mark.parametrize( 47 | "string,expect", 48 | [ 49 | (b"", b"<>"), 50 | (b"\x00\xa9b Z", b"<00A962205A>"), 51 | (b"<>", b"<3C3E>"), 52 | ], 53 | ) 54 | def test_hex_string(string, expect): 55 | assert b"".join(HexString(string).write()).upper() == expect 56 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # documentation build configuration file, created by 5 | # sphinx-quickstart on Tue Jun 13 22:58:12 2017. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | from __future__ import annotations 17 | 18 | # -- Project information ----------------------------------------------------- 19 | import importlib.metadata 20 | 21 | metadata = importlib.metadata.metadata("pdfje") 22 | 23 | project = metadata["Name"] 24 | author = metadata["Author"] 25 | version = metadata["Version"] 26 | release = metadata["Version"] 27 | 28 | 29 | # -- General configuration ------------------------------------------------ 30 | 31 | extensions = [ 32 | "sphinx.ext.autodoc", 33 | "sphinx.ext.intersphinx", 34 | "sphinx.ext.napoleon", 35 | "sphinx.ext.viewcode", 36 | "sphinx_toolbox.collapse", 37 | "sphinx_autodoc_typehints", 38 | ] 39 | templates_path = ["_templates"] 40 | source_suffix = ".rst" 41 | 42 | master_doc = "index" 43 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 44 | 45 | # -- Options for HTML output ---------------------------------------------- 46 | 47 | autodoc_member_order = "bysource" 48 | html_theme = "furo" 49 | highlight_language = "python3" 50 | pygments_style = "default" 51 | intersphinx_mapping = { 52 | "python": ("https://docs.python.org/3", None), 53 | "pyphen": ("https://doc.courtbouillon.org/pyphen/stable/", None), 54 | } 55 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | .. _api: 2 | 3 | API reference 4 | ============= 5 | 6 | Unless otherwise noted, all classes are immutable. 7 | 8 | pdfje 9 | ----- 10 | 11 | .. automodule:: pdfje 12 | :members: 13 | 14 | pdfje.style 15 | ----------- 16 | 17 | .. automodule:: pdfje.style 18 | :members: 19 | 20 | .. autodata:: pdfje.style.bold 21 | .. autodata:: pdfje.style.italic 22 | .. autodata:: pdfje.style.regular 23 | 24 | pdfje.layout 25 | ------------ 26 | 27 | .. automodule:: pdfje.layout 28 | :members: 29 | 30 | pdfje.draw 31 | ---------- 32 | 33 | .. automodule:: pdfje.draw 34 | :members: 35 | 36 | pdfje.fonts 37 | ----------- 38 | 39 | .. autodata:: pdfje.fonts.helvetica 40 | .. autodata:: pdfje.fonts.times_roman 41 | .. autodata:: pdfje.fonts.courier 42 | .. autodata:: pdfje.fonts.symbol 43 | .. autodata:: pdfje.fonts.zapf_dingbats 44 | 45 | .. automodule:: pdfje.fonts 46 | :members: 47 | 48 | 49 | pdfje.units 50 | ----------- 51 | 52 | .. automodule:: pdfje.units 53 | :members: 54 | 55 | 56 | **Page sizes** 57 | 58 | Below are common page sizes. 59 | Because the page size is a :class:`~pdfje.XY` object, you can use 60 | ``x`` and ``y`` attributes to get the width and height of a page size. 61 | The landscape variants can be obtained by calling :meth:`~pdfje.XY.flip`. 62 | 63 | .. code-block:: python 64 | 65 | from pdfje.units import A4 66 | 67 | A4.x # 595 68 | A4.y # 842 69 | A4.flip() # XY(842, 595) -- the landscape variant 70 | A4 / 2 # XY(297.5, 421) -- point at the center of the page 71 | 72 | .. autodata:: pdfje.units.A0 73 | .. autodata:: pdfje.units.A1 74 | .. autodata:: pdfje.units.A2 75 | .. autodata:: pdfje.units.A3 76 | .. autodata:: pdfje.units.A4 77 | .. autodata:: pdfje.units.A5 78 | .. autodata:: pdfje.units.A6 79 | .. autodata:: pdfje.units.letter 80 | .. autodata:: pdfje.units.legal 81 | .. autodata:: pdfje.units.tabloid 82 | .. autodata:: pdfje.units.ledger 83 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from pathlib import Path 4 | 5 | import pytest 6 | 7 | from pdfje.fonts.common import TrueType 8 | 9 | RESOURCES = Path(__file__).parent / "../resources" 10 | 11 | 12 | def pytest_addoption(parser): 13 | parser.addoption("--output-path", help="Output path for PDF files") 14 | parser.addoption( 15 | "--runslow", action="store_true", default=False, help="run slow tests" 16 | ) 17 | 18 | 19 | @pytest.fixture 20 | def outfile(tmpdir, request) -> Path: 21 | base = Path(request.config.getoption("--output-path") or tmpdir) 22 | base.mkdir(exist_ok=True) 23 | func = request.function 24 | return ( 25 | base 26 | / "-".join( 27 | [func.__module__[6:], func.__qualname__] # remove "tests." prefix 28 | ).replace(".", "-") 29 | ).with_suffix(".pdf") 30 | 31 | 32 | def pytest_collection_modifyitems(config, items): 33 | if config.getoption("--runslow"): 34 | # --runslow given in cli: do not skip slow tests 35 | return 36 | skip_slow = pytest.mark.skip(reason="need --runslow option to run") 37 | for item in items: 38 | if "slow" in item.keywords: 39 | item.add_marker(skip_slow) 40 | 41 | 42 | @pytest.fixture(scope="session") 43 | def dejavu() -> TrueType: 44 | return TrueType( 45 | RESOURCES / "fonts/DejaVuSansCondensed.ttf", 46 | RESOURCES / "fonts/DejaVuSansCondensed-Bold.ttf", 47 | RESOURCES / "fonts/DejaVuSansCondensed-Oblique.ttf", 48 | RESOURCES / "fonts/DejaVuSansCondensed-BoldOblique.ttf", 49 | ) 50 | 51 | 52 | @pytest.fixture(scope="session") 53 | def crimson() -> TrueType: 54 | return TrueType( 55 | RESOURCES / "fonts/CrimsonText-Regular.ttf", 56 | RESOURCES / "fonts/CrimsonText-Bold.ttf", 57 | RESOURCES / "fonts/CrimsonText-Italic.ttf", 58 | RESOURCES / "fonts/CrimsonText-BoldItalic.ttf", 59 | ) 60 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "pdfje" 3 | version = "0.6.1" 4 | description = "Write beautiful PDFs in declarative Python" 5 | authors = ["Arie Bovenberg "] 6 | license = "MIT" 7 | classifiers = [ 8 | "Programming Language :: Python :: 3.8", 9 | "Programming Language :: Python :: 3.9", 10 | "Programming Language :: Python :: 3.10", 11 | "Programming Language :: Python :: 3.11", 12 | "Programming Language :: Python :: 3.12", 13 | ] 14 | packages = [ 15 | { include = "pdfje", from = "src" }, 16 | ] 17 | documentation = "https://pdfje.readthedocs.io" 18 | readme = "README.rst" 19 | include = ["CHANGELOG.rst", "README.rst"] 20 | repository = "https://github.com/ariebovenberg/pdfje" 21 | keywords = ["pdf"] 22 | 23 | [tool.poetry.dependencies] 24 | python = ">=3.8.1,<4.0" 25 | fonttools = {version="^4.38.0", optional=true} 26 | pyphen = {version=">=0.13.0", optional=true} 27 | 28 | [tool.poetry.extras] 29 | fonts = ["fonttools"] 30 | hyphens = ["pyphen"] 31 | 32 | [tool.poetry.group.test.dependencies] 33 | pytest = ">=7.0.1,<9.0.0" 34 | pytest-cov = ">=4,<6" 35 | pytest-benchmark = "^4.0.0" 36 | hypothesis = "^6.68.2" 37 | 38 | [tool.poetry.group.typecheck.dependencies] 39 | mypy = "^1.0.0" 40 | 41 | [tool.poetry.group.linting.dependencies] 42 | black = "^24" 43 | flake8 = ">=6,<8" 44 | isort = "^5.7.0" 45 | slotscheck = ">=0.17,<0.20" 46 | 47 | 48 | [tool.black] 49 | line-length = 79 50 | include = '\.pyi?$' 51 | exclude = ''' 52 | /( 53 | \.eggs 54 | | \.git 55 | | \.mypy_cache 56 | | \.tox 57 | | \.venv 58 | | _build 59 | | build 60 | | dist 61 | )/ 62 | ''' 63 | 64 | [tool.isort] 65 | line_length = 79 66 | profile = 'black' 67 | add_imports = ['from __future__ import annotations'] 68 | 69 | [tool.slotscheck] 70 | strict-imports = true 71 | require-superclass = true 72 | require-subclass = true 73 | exclude-modules = "^pdfje\\.vendor.*" 74 | 75 | [build-system] 76 | requires = ["poetry-core>=1.1.0"] 77 | build-backend = "poetry.core.masonry.api" 78 | -------------------------------------------------------------------------------- /src/pdfje/layout/rule.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from dataclasses import dataclass 4 | from typing import Iterator, final 5 | 6 | from ..common import ( 7 | RGB, 8 | XY, 9 | HexColor, 10 | Pt, 11 | Sides, 12 | SidesLike, 13 | Streamable, 14 | add_slots, 15 | black, 16 | setattr_frozen, 17 | ) 18 | from ..resources import Resources 19 | from ..style import StyleFull 20 | from .common import Block, ColumnFill, Shaped 21 | 22 | 23 | @final 24 | @add_slots 25 | @dataclass(frozen=True, init=False) 26 | class Rule(Block): 27 | """A :class:`Block` that draws a horizontal line""" 28 | 29 | color: RGB 30 | margin: Sides 31 | 32 | def __init__( 33 | self, 34 | color: RGB | HexColor = black, 35 | margin: SidesLike = Sides(6, 0, 6, 0), 36 | ) -> None: 37 | setattr_frozen(self, "color", RGB.parse(color)) 38 | setattr_frozen(self, "margin", Sides.parse(margin)) 39 | 40 | def into_columns( 41 | self, _: Resources, __: StyleFull, cs: Iterator[ColumnFill] 42 | ) -> Iterator[ColumnFill]: 43 | col = next(cs) 44 | top, right, bottom, left = self.margin 45 | if (height := top + bottom) > col.height_free: 46 | # There is not enough room for the rule in the current column. 47 | # Yield the column and start a new one. 48 | yield col 49 | y = col.box.origin.y + col.height_free - top 50 | x = col.box.origin.x + left 51 | yield col.add( 52 | ShapedRule( 53 | XY(x, y), 54 | XY(col.box.origin.x + col.box.width - right, y), 55 | self.color, 56 | height, 57 | ), 58 | ) 59 | 60 | 61 | @add_slots 62 | @dataclass(frozen=True) 63 | class ShapedRule(Shaped): 64 | start: XY 65 | end: XY 66 | color: RGB 67 | height: Pt 68 | 69 | def render(self, _: XY, __: Pt) -> Streamable: 70 | yield b"%g %g m %g %g l %g %g %g RG S\n" % ( 71 | *self.start, 72 | *self.end, 73 | *self.color, 74 | ) 75 | -------------------------------------------------------------------------------- /src/pdfje/typeset/hyphens.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from functools import partial 4 | from itertools import chain, starmap 5 | from typing import TYPE_CHECKING, Callable, Iterable, Union 6 | 7 | from ..compat import pairwise 8 | 9 | Hyphenator = Callable[[str], Iterable[str]] 10 | " hyphenation -> hy phen ation " 11 | 12 | 13 | def never_hyphenate(txt: str) -> Iterable[str]: 14 | return (txt,) 15 | 16 | 17 | # The confusing logic here is to avoid importing pyphen if it's not 18 | # installed, and also keeping the type checker happy -- complicated 19 | # by the fact that pyphen has no type annotations. 20 | if TYPE_CHECKING: 21 | 22 | class Pyphen: 23 | def __init__(self, lang: str) -> None: ... 24 | 25 | def positions(self, txt: str) -> Iterable[int]: ... 26 | 27 | HAS_PYPHEN = True 28 | 29 | HyphenatorLike = Hyphenator | Pyphen | None 30 | 31 | else: 32 | try: 33 | from pyphen import Pyphen 34 | except ImportError: # pragma: no cover 35 | HAS_PYPHEN = False 36 | HyphenatorLike = Union[Hyphenator, None] 37 | else: 38 | HAS_PYPHEN = True 39 | HyphenatorLike = Union[Hyphenator, Pyphen, None] 40 | 41 | 42 | if HAS_PYPHEN: 43 | 44 | def parse_hyphenator(p: HyphenatorLike) -> Hyphenator: 45 | if isinstance(p, Pyphen): 46 | return partial(_pyphenate, p) 47 | elif p is None: 48 | return never_hyphenate 49 | return p 50 | 51 | def _pyphenate(p: Pyphen, txt: str) -> Iterable[str]: 52 | return ( 53 | map( 54 | txt.__getitem__, 55 | starmap(slice, pairwise(chain((0,), pos, (None,)))), 56 | ) 57 | if (pos := p.positions(txt)) 58 | else (txt,) 59 | ) 60 | 61 | default_hyphenator: Hyphenator = partial(_pyphenate, Pyphen(lang="en_US")) 62 | 63 | else: # pragma: no cover 64 | from ..vendor.hyphenate import hyphenate_word 65 | 66 | default_hyphenator = hyphenate_word 67 | 68 | def parse_hyphenator(p: HyphenatorLike) -> Hyphenator: 69 | return never_hyphenate if p is None else p # type: ignore 70 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | isolated_build = true 3 | envlist = py{38,39,310,311,312},style,docs,minimal,typecheck,isort,slotscheck 4 | [testenv] 5 | allowlist_externals = 6 | poetry 7 | setenv= 8 | POETRY_VIRTUALENVS_CREATE=false 9 | commands_pre= 10 | poetry install -n -v --no-root --only test -E fonts -E hyphens 11 | extras= 12 | fonts 13 | hyphens 14 | commands= 15 | pytest 16 | 17 | [testenv:py312] 18 | commands = 19 | pytest -v --cov=pdfje {posargs} 20 | 21 | [testenv:minimal] 22 | commands_pre= 23 | poetry install -n -v --no-root --only test 24 | commands= 25 | pytest {posargs} 26 | extras= 27 | 28 | [testenv:typecheck] 29 | commands_pre= 30 | poetry install -n -v --no-root --only typecheck --only test -E fonts -E hyphens 31 | 32 | commands= 33 | mypy --pretty --strict src/ examples/ 34 | mypy --pretty tests/ 35 | 36 | [testenv:style] 37 | commands_pre= 38 | poetry install -n -v --no-root --only linting -E fonts -E hyphens 39 | commands= 40 | black --check --diff src/ tests/ 41 | flake8 src/ tests/ 42 | 43 | [testenv:isort] 44 | commands_pre= 45 | poetry install -n -v --no-root --only linting -E fonts -E hyphens 46 | commands= 47 | isort --check-only --diff src/ tests/ 48 | 49 | [testenv:slotscheck] 50 | commands_pre= 51 | poetry install -n -v --no-root --only linting -E fonts -E hyphens 52 | commands= 53 | slotscheck -m pdfje 54 | 55 | [testenv:docs] 56 | basepython=python3.11 57 | deps= 58 | -rdocs/requirements.txt 59 | commands= 60 | sphinx-build -W -d "{toxworkdir}/docs_doctree" docs "{toxworkdir}/docs_out" \ 61 | --color -bhtml 62 | python -c 'import pathlib; print("documentation available under " \ 63 | + (pathlib.Path(r"{toxworkdir}") / "docs_out" / "index.html").as_uri())' 64 | 65 | [coverage:run] 66 | branch=True 67 | [coverage:report] 68 | fail_under=99 69 | exclude_lines= 70 | pragma: no cover 71 | raise NotImplementedError 72 | def __repr__ 73 | @overload 74 | ^\s+def.*: \.\.\. 75 | if TYPE_CHECKING: 76 | omit=*/pdfje/vendor/* 77 | 78 | [gh-actions] 79 | python = 80 | 3.8: py38 81 | 3.9: py39 82 | 3.10: py310 83 | 3.11: py311, style, isort, docs, minimal 84 | 3.12: py312, typecheck, slotscheck 85 | 86 | [flake8] 87 | exclude = .git,__pycache__,docs/source/conf.py,old,build,dist,.tox,src/pdfje/vendor 88 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | pip-wheel-metadata/ 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # Jupyter Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # SageMath parsed files 81 | *.sage.py 82 | 83 | # dotenv 84 | .env 85 | 86 | # virtualenv 87 | .venv 88 | venv/ 89 | ENV/ 90 | 91 | # Spyder project settings 92 | .spyderproject 93 | .spyproject 94 | 95 | # Rope project settings 96 | .ropeproject 97 | 98 | # mkdocs documentation 99 | /site 100 | 101 | # mypy 102 | .mypy_cache/ 103 | /.idea 104 | 105 | .vim 106 | 107 | ### macOS ### 108 | # General 109 | .DS_Store 110 | .AppleDouble 111 | .LSOverride 112 | 113 | # Icon must end with two \r 114 | Icon 115 | 116 | # Thumbnails 117 | ._* 118 | 119 | # Files that might appear in the root of a volume 120 | .DocumentRevisions-V100 121 | .fseventsd 122 | .Spotlight-V100 123 | .TemporaryItems 124 | .Trashes 125 | .VolumeIcon.icns 126 | .com.apple.timemachine.donotpresent 127 | 128 | # Directories potentially created on remote AFP share 129 | .AppleDB 130 | .AppleDesktop 131 | Network Trash Folder 132 | Temporary Items 133 | .apdisk 134 | 135 | .envrc 136 | *.csv 137 | *.pdf 138 | output/ 139 | -------------------------------------------------------------------------------- /src/pdfje/layout/pages.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from dataclasses import dataclass 4 | from functools import partial 5 | from itertools import chain, count 6 | from typing import Callable, Iterable, Iterator, final 7 | 8 | from ..common import add_slots, always, flatten, setattr_frozen 9 | from ..page import Page, RenderedPage 10 | from ..resources import Resources 11 | from ..style import StyleFull 12 | from .common import Block, PageFill, fill_pages 13 | from .paragraph import Paragraph 14 | 15 | 16 | @final 17 | @add_slots 18 | @dataclass(frozen=True, init=False) 19 | class AutoPage: 20 | """Automatically lays out content on multiple pages. 21 | 22 | Parameters 23 | ---------- 24 | content: ~typing.Iterable[~pdfje.Block | str] | ~pdfje.Block | str 25 | The content to lay out on the pages. Can be parsed from single string 26 | or block. 27 | template: ~pdfje.Page | ~typing.Callable[[int], ~pdfje.Page] 28 | A page to use as a template for the layout. If a callable is given, 29 | it is called with the page number as the only argument to generate 30 | the page. Defaults to the default :class:`Page`. 31 | 32 | """ 33 | 34 | content: Iterable[str | Block] 35 | template: Callable[[int], Page] 36 | 37 | def __init__( 38 | self, 39 | content: str | Block | Iterable[Block | str], 40 | template: Page | Callable[[int], Page] = always(Page()), 41 | ) -> None: 42 | if isinstance(content, str): 43 | content = [Paragraph(content)] 44 | elif isinstance(content, Block): 45 | content = [content] 46 | setattr_frozen(self, "content", content) 47 | 48 | if isinstance(template, Page): 49 | template = always(template) 50 | setattr_frozen(self, "template", template) 51 | 52 | def render( 53 | self, r: Resources, s: StyleFull, pnum: int, / 54 | ) -> Iterator[RenderedPage]: 55 | pages: Iterator[PageFill] = map( 56 | PageFill.new, map(self.template, count(pnum)) 57 | ) 58 | for block in map(_as_block, self.content): 59 | pages, filled = fill_pages( 60 | pages, partial(block.into_columns, r, s) 61 | ) 62 | for p in filled: 63 | yield p.base.fill(r, s, flatten(p.done)) 64 | 65 | last = next(pages) 66 | yield last.base.fill(r, s, flatten(chain(last.done, last.todo))) 67 | 68 | 69 | def _as_block(b: str | Block) -> Block: 70 | return Paragraph(b) if isinstance(b, str) else b 71 | -------------------------------------------------------------------------------- /src/pdfje/resources.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from dataclasses import dataclass, field 4 | from itertools import chain, count 5 | from typing import Iterable, Iterator 6 | 7 | from . import atoms 8 | from .atoms import ASCII 9 | from .common import add_slots 10 | from .fonts import BuiltinTypeface, TrueType 11 | from .fonts.common import BuiltinFont, Font, Typeface 12 | from .fonts.embed import OBJS_PER_EMBEDDED_FONT, Subset 13 | 14 | 15 | @add_slots 16 | @dataclass(frozen=True, eq=False) 17 | class Resources: 18 | """Keeps track of PDF resources within a document, such as fonts""" 19 | 20 | _builtins: dict[tuple[ASCII, bool, bool], BuiltinFont] = field( 21 | default_factory=dict 22 | ) 23 | _subsets: dict[tuple[TrueType, bool, bool], Subset] = field( 24 | default_factory=dict 25 | ) 26 | _next_subset_index: Iterator[int] = field(default_factory=count.__call__) 27 | 28 | def to_objects(self, first_id: atoms.ObjectID) -> Iterable[atoms.Object]: 29 | for sub, i in zip( 30 | self._subsets.values(), 31 | count(first_id, step=OBJS_PER_EMBEDDED_FONT), 32 | ): 33 | yield from sub.to_objects(i) 34 | 35 | def to_atoms(self, first_id: atoms.ObjectID) -> atoms.Dictionary: 36 | return atoms.Dictionary( 37 | ( 38 | b"Font", 39 | atoms.Dictionary( 40 | *chain( 41 | ( 42 | (b.id, b.to_resource()) 43 | for b in self._builtins.values() 44 | ), 45 | ( 46 | (s.id, atoms.Ref(obj_id)) 47 | for s, obj_id in zip( 48 | self._subsets.values(), 49 | count(first_id, step=OBJS_PER_EMBEDDED_FONT), 50 | ) 51 | ), 52 | ) 53 | ), 54 | ) 55 | ) 56 | 57 | def font(self, f: Typeface, bold: bool, italic: bool) -> Font: 58 | if isinstance(f, BuiltinTypeface): 59 | return self._builtins.setdefault( 60 | (f.regular.name, bold, italic), f.font(bold, italic) 61 | ) 62 | else: 63 | try: 64 | return self._subsets[(f, bold, italic)] 65 | except KeyError: 66 | new_subset = self._subsets[(f, bold, italic)] = Subset.new( 67 | b"F%i" % next(self._next_subset_index), 68 | f.font(bold, italic), 69 | ) 70 | return new_subset 71 | -------------------------------------------------------------------------------- /tests/typeset/test_state.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from pdfje.atoms import LiteralStr, Real 4 | from pdfje.typeset.state import NO_OP, Passage, splitlines 5 | from pdfje.typeset.words import _encode_kerning 6 | 7 | from ..common import BIG, BLUE, FONT, GREEN, RED 8 | 9 | 10 | class TestSplitlines: 11 | def test_empty(self): 12 | result = splitlines(iter([])) 13 | assert next(result, None) is None 14 | 15 | def test_no_breaks(self): 16 | result = splitlines( 17 | iter( 18 | [ 19 | Passage(RED, "Beautiful "), 20 | Passage(BLUE, "is better "), 21 | Passage(GREEN, "than ugly."), 22 | ] 23 | ) 24 | ) 25 | assert list(next(result)) == [ 26 | Passage(RED, "Beautiful "), 27 | Passage(BLUE, "is better "), 28 | Passage(GREEN, "than ugly."), 29 | ] 30 | 31 | def test_breaks(self): 32 | result = splitlines( 33 | iter( 34 | [ 35 | Passage(RED, "Beautiful "), 36 | Passage(BLUE, "is better "), 37 | Passage(GREEN, "than\nugly.\r\n\n"), 38 | Passage(RED, "Explicit is "), 39 | Passage(BIG, "better than \nimplicit. \n"), 40 | ] 41 | ) 42 | ) 43 | assert list(next(result)) == [ 44 | Passage(RED, "Beautiful "), 45 | Passage(BLUE, "is better "), 46 | Passage(GREEN, "than"), 47 | ] 48 | assert list(next(result)) == [Passage(NO_OP, "ugly.")] 49 | assert list(next(result)) == [Passage(NO_OP, "")] 50 | assert list(next(result)) == [ 51 | Passage(NO_OP, ""), 52 | Passage(RED, "Explicit is "), 53 | Passage(BIG, "better than "), 54 | ] 55 | assert list(next(result)) == [Passage(NO_OP, "implicit. ")] 56 | assert list(next(result)) == [Passage(NO_OP, "")] 57 | 58 | 59 | class TestEncodeKerning: 60 | def test_typical(self): 61 | assert list( 62 | _encode_kerning("abcdefg", [(1, -20), (2, -30), (6, -40)], FONT) 63 | ) == [ 64 | LiteralStr(b"a"), 65 | Real(20), 66 | LiteralStr(b"b"), 67 | Real(30), 68 | LiteralStr(b"cdef"), 69 | Real(40), 70 | LiteralStr(b"g"), 71 | ] 72 | 73 | def test_kern_first_char(self): 74 | assert list( 75 | _encode_kerning("abcdefg", [(0, -20), (2, -30)], FONT) 76 | ) == [ 77 | Real(20), 78 | LiteralStr(b"ab"), 79 | Real(30), 80 | LiteralStr(b"cdefg"), 81 | ] 82 | 83 | def test_no_kern(self): 84 | assert list(_encode_kerning("abcdefg", [], FONT)) == [ 85 | LiteralStr(b"abcdefg") 86 | ] 87 | -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | Changelog 2 | ========= 3 | 4 | 0.6.1 (2023-11-13) 5 | ------------------ 6 | 7 | - 🐍 Official Python 3.12 compatibility 8 | 9 | 0.6.0 (2023-08-15) 10 | ------------------ 11 | 12 | **Added** 13 | 14 | - 🧮 Paragraphs can be optimally typeset using the Knuth-Plass line 15 | breaking algorithm. Use the ``optimal`` argument for this. 16 | - 🛟 Paragraphs support automatically avoiding orphaned lines with 17 | ``avoid_orphans`` argument. 18 | 19 | **Breaking** 20 | 21 | - 📊 In the rare case that a paragraphs contains different text sizes, 22 | all lines now rendered with the same leading. 23 | This is more consistent and allows for faster layouting. 24 | 25 | **Fixed** 26 | 27 | - 🐍 Fix compatibility with Python 3.8 and 3.9 28 | 29 | 0.5.0 (2023-05-07) 30 | ------------------ 31 | 32 | **Breaking** 33 | 34 | - 🪆 Expose most classes from submodules instead of root 35 | (e.g. ``pdfje.Rect`` becomes ``pdfje.draw.Rect``). 36 | The new locations can be found in the API documentation. 37 | - 🏷️ ``Rule`` ``padding`` attribute renamed to ``margin``. 38 | 39 | **Added** 40 | 41 | - 📰 Support for horizontal alignment and justification of text. 42 | - 🫸 Support for indenting the first line of a paragraph. 43 | - ✂️ Automatic hyphenation of text. 44 | 45 | 0.4.0 (2023-04-10) 46 | ------------------ 47 | 48 | A big release with lots of new features and improvements. 49 | Most importantly, the page layout engine is now complete and 50 | can be used to create multi-page/column documents. 51 | 52 | **Added** 53 | 54 | - 📖 Automatic layout of multi-style text into lines, columns, and pages 55 | - 🔬 Automatic kerning for supported fonts 56 | - 🖌️ Support for drawing basic shapes 57 | - 🎨 Additional text styling options 58 | - 📦 Make fonttools dependency optional 59 | - 📏 Horizontal rule element 60 | 61 | **Documentation** 62 | 63 | - 🧑‍🏫 Add a tutorial and examples 64 | - 📋 Polished docstrings in public API 65 | 66 | **Performance** 67 | 68 | - ⛳️ Document pages and fonts are now written in one efficient pass 69 | 70 | **Breaking** 71 | 72 | - 🌅 Drop Python 3.7 support 73 | 74 | 0.3.0 (2022-12-02) 75 | ------------------ 76 | 77 | **Added** 78 | 79 | - 🍰 Documents can be created directly from string input 80 | - 🪜 Support for explicit newlines in text 81 | - 📢 ``Document.write()`` supports paths, file-like objects and iterator output 82 | - ✅ Improved PDF spec compliance 83 | 84 | **Changed** 85 | 86 | - 📚 Text is now positioned automatically within a page 87 | 88 | 0.2.0 (2022-12-01) 89 | ------------------ 90 | 91 | **Added** 92 | 93 | - 🖌️ Different builtin fonts can be selected 94 | - 📥 Truetype fonts can be embedded 95 | - 🌏 Support for non-ASCII text 96 | - 📐 Pages can be rotated 97 | - 🤏 Compression is applied to keep filesize small 98 | 99 | 0.1.0 (2022-11-02) 100 | ------------------ 101 | 102 | **Added** 103 | 104 | - 💬 Support basic ASCII text on different pages 105 | 106 | 0.0.1 (2022-10-28) 107 | ------------------ 108 | 109 | **Added** 110 | 111 | - 🌱 Write a valid, minimal, empty PDF file 112 | -------------------------------------------------------------------------------- /src/pdfje/typeset/layout.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import abc 4 | from dataclasses import dataclass 5 | from typing import Callable, Iterable, Iterator, Sequence 6 | 7 | from pdfje.typeset.words import WordLike 8 | 9 | from ..common import ( 10 | XY, 11 | Align, 12 | Pt, 13 | Streamable, 14 | add_slots, 15 | fix_abstract_properties, 16 | ) 17 | from ..layout.common import Shaped # FUTURE: fix this near-circular dependency 18 | from .state import State 19 | 20 | 21 | @add_slots 22 | @dataclass(frozen=True) 23 | class ShapedText(Shaped): 24 | lines: Sequence[Line] 25 | lead: Pt 26 | align: Align 27 | height: Pt 28 | 29 | def render(self, pos: XY, width: Pt) -> Iterator[bytes]: 30 | return render_text( 31 | pos, self.pre_state(), width, self.lines, self.lead, self.align 32 | ) 33 | 34 | def end_state(self) -> State | None: 35 | # this slightly convoluted way takes into account that lines 36 | # may (in rare cases) be empty 37 | return next( 38 | (w.state for s in reversed(self.lines) for w in reversed(s.words)), 39 | None, 40 | ) 41 | 42 | def pre_state(self) -> State | None: 43 | # this slightly convoluted way takes into account that lines 44 | # may (in rare cases) be empty 45 | return next( 46 | (wd.state for ln in self.lines for wd in ln.words), 47 | None, 48 | ) 49 | 50 | 51 | def render_text( 52 | pos: XY, 53 | state: State | None, 54 | prev_width: Pt, 55 | lines: Iterable[Line], 56 | lead: Pt, 57 | align: Align, 58 | ) -> Iterator[bytes]: 59 | yield b"BT\n%g %g Td\n" % pos.astuple() 60 | yield from state or () 61 | yield from _pick_renderer(align.value)(lines, lead, prev_width) 62 | yield b"ET\n" 63 | 64 | 65 | @fix_abstract_properties 66 | class Line(Streamable): 67 | __slots__ = () 68 | 69 | @property 70 | @abc.abstractmethod 71 | def words(self) -> Sequence[WordLike]: ... 72 | 73 | @property 74 | @abc.abstractmethod 75 | def width(self) -> Pt: ... 76 | 77 | 78 | def _render_left(lines: Iterable[Line], lead: Pt, _: Pt) -> Iterator[bytes]: 79 | yield b"%g TL\n" % lead 80 | for ln in lines: 81 | yield b"T*\n" 82 | yield from ln 83 | 84 | 85 | def _render_centered( 86 | lines: Iterable[Line], lead: Pt, prev_width: Pt 87 | ) -> Iterator[bytes]: 88 | for ln in lines: 89 | yield b"%g %g TD\n" % ((prev_width - ln.width) / 2, -lead) 90 | yield from ln 91 | prev_width = ln.width 92 | 93 | 94 | def _render_right( 95 | lines: Iterable[Line], lead: Pt, prev_width: Pt 96 | ) -> Iterator[bytes]: 97 | for ln in lines: 98 | yield b"%g %g TD\n" % ((prev_width - ln.width), -lead) 99 | yield from ln 100 | prev_width = ln.width 101 | 102 | 103 | _pick_renderer: Callable[ 104 | [int], Callable[[Iterable[Line], Pt, Pt], Iterable[bytes]] 105 | ] = [ 106 | _render_left, 107 | _render_centered, 108 | _render_right, 109 | _render_left, # justified lines are already stretched, so left-align. 110 | ].__getitem__ 111 | -------------------------------------------------------------------------------- /examples/book.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import re 4 | from pathlib import Path 5 | from typing import Iterable, Sequence 6 | 7 | from pdfje import XY, AutoPage, Document, Page 8 | from pdfje.draw import Ellipse, Rect, Text 9 | from pdfje.fonts import TrueType 10 | from pdfje.layout import Paragraph, Rule 11 | from pdfje.style import Style 12 | from pdfje.units import inch, mm 13 | 14 | 15 | def main() -> None: 16 | "Generate a PDF with the content of The Great Gatsby" 17 | Document( 18 | [TITLE_PAGE] 19 | + [AutoPage(blocks, template=create_page) for blocks in chapters()], 20 | style=CRIMSON, 21 | ).write("book.pdf") 22 | 23 | 24 | def create_page(num: int) -> Page: 25 | # Add a page number at the bottom of the base page 26 | return BASEPAGE.add( 27 | Text( 28 | (PAGESIZE.x / 2, mm(20)), str(num), Style(size=10), align="center" 29 | ) 30 | ) 31 | 32 | 33 | PAGESIZE = XY(inch(5), inch(8)) 34 | BASEPAGE = Page( 35 | [ 36 | # The title in small text at the top of the page 37 | Text( 38 | (PAGESIZE.x / 2, PAGESIZE.y - mm(10)), 39 | "The Great Gatsby", 40 | Style(size=10, italic=True), 41 | align="center", 42 | ), 43 | ], 44 | size=PAGESIZE, 45 | margin=(mm(20), mm(20), mm(25)), 46 | ) 47 | 48 | HEADING = Style(size=20, bold=True, line_spacing=3.5) 49 | 50 | TITLE_PAGE = Page( 51 | [ 52 | # Some nice shapes 53 | Rect( 54 | (PAGESIZE.x / 2 - 200, 275), # use page dimensions to center it 55 | width=400, 56 | height=150, 57 | fill="#99aaff", 58 | stroke=None, 59 | ), 60 | Ellipse((PAGESIZE.x / 2, 350), 300, 100, fill="#22d388"), 61 | # The title and author on top of the shapes 62 | Text( 63 | (PAGESIZE.x / 2, 380), 64 | "The Great Gatsby", 65 | Style(size=30, bold=True), 66 | align="center", 67 | ), 68 | Text( 69 | (PAGESIZE.x / 2, 335), 70 | "F. Scott Fitzgerald", 71 | Style(size=14, italic=True), 72 | align="center", 73 | ), 74 | ], 75 | size=PAGESIZE, 76 | ) 77 | CRIMSON = TrueType( 78 | Path(__file__).parent / "../resources/fonts/CrimsonText-Regular.ttf", 79 | Path(__file__).parent / "../resources/fonts/CrimsonText-Bold.ttf", 80 | Path(__file__).parent / "../resources/fonts/CrimsonText-Italic.ttf", 81 | Path(__file__).parent / "../resources/fonts/CrimsonText-BoldItalic.ttf", 82 | ) 83 | 84 | 85 | _CHAPTER_NUMERALS = set("I II III IV V VI VII VIII IX X".split()) 86 | 87 | 88 | def chapters() -> Iterable[Sequence[Paragraph | Rule]]: 89 | "Book content grouped by chapters" 90 | buffer: list[Paragraph | Rule] = [Paragraph("Chapter I\n", HEADING)] 91 | indent = 0 92 | for p in PARAGRAPHS: 93 | if p.strip() in _CHAPTER_NUMERALS: 94 | yield buffer 95 | buffer = [Paragraph(f"Chapter {p.strip()}\n", HEADING)] 96 | indent = 0 97 | elif p.startswith("------"): 98 | buffer.append(Rule("#aaaaaa", (20, 10, 10))) 99 | else: 100 | buffer.append( 101 | Paragraph( 102 | p, Style(line_spacing=1.2), align="justify", indent=indent 103 | ) 104 | ) 105 | indent = 15 106 | yield buffer 107 | 108 | 109 | PARAGRAPHS = [ 110 | m.replace("\n", " ") 111 | for m in re.split( 112 | r"\n\n", 113 | ( 114 | Path(__file__).parent / "../resources/books/the-great-gatsby.txt" 115 | ).read_text()[1374:-18415], 116 | ) 117 | ] 118 | 119 | if __name__ == "__main__": 120 | main() 121 | -------------------------------------------------------------------------------- /tests/test_fonts.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from pathlib import Path 4 | from random import Random 5 | 6 | import pytest 7 | 8 | from pdfje.common import dictget 9 | from pdfje.fonts.common import KerningTable, TrueType, kern 10 | from pdfje.fonts.embed import Subset, _utf16be_hex 11 | 12 | try: 13 | import fontTools # noqa 14 | 15 | HAS_FONTTOOLS = True 16 | except ImportError: 17 | HAS_FONTTOOLS = False 18 | 19 | 20 | def _make_subset(cids) -> Subset: 21 | pytest.importorskip("fontTools") 22 | return Subset( 23 | b"F0", 24 | NotImplemented, 25 | lambda _: 1, 26 | cids, 27 | NotImplemented, 28 | None, 29 | ) 30 | 31 | 32 | _EXAMPLE_KERNINGTABLE: KerningTable = dictget( 33 | { 34 | ("x", "y"): -40, 35 | ("a", "b"): -60, 36 | (" ", "a"): -20, 37 | ("a", " "): -10, 38 | ("z", " "): -10, 39 | }, 40 | 0, 41 | ) 42 | 43 | 44 | class TestKern: 45 | def test_empty(self): 46 | assert list(kern(_EXAMPLE_KERNINGTABLE, "", " ")) == [] 47 | 48 | def test_no_kerning_needed(self): 49 | assert list(kern(_EXAMPLE_KERNINGTABLE, "basdfzyx", " ")) == [] 50 | 51 | def test_lots_of_kerning(self): 52 | assert list(kern(_EXAMPLE_KERNINGTABLE, "aaababaxyz", " ")) == [ 53 | (0, -20), 54 | (3, -60), 55 | (5, -60), 56 | (8, -40), 57 | ] 58 | 59 | def test_lots_of_kerning_no_init(self): 60 | assert list(kern(_EXAMPLE_KERNINGTABLE, "aaababaxyz", None)) == [ 61 | (3, -60), 62 | (5, -60), 63 | (8, -40), 64 | ] 65 | 66 | def test_one_letter(self): 67 | assert list(kern(_EXAMPLE_KERNINGTABLE, "a", " ")) == [ 68 | (0, -20), 69 | ] 70 | 71 | 72 | class TestEncodeEmbeddedSubset: 73 | def test_empty(self): 74 | assert _make_subset({}).encode("") == b"" 75 | 76 | def test_ascii(self): 77 | assert ( 78 | _make_subset( 79 | {ord("a"): 1, ord("b"): 4, ord("\n"): 0xFFFE}, 80 | ).encode("ab\n") 81 | == b"\x00\x01\x00\x04\xff\xfe" 82 | ) 83 | 84 | def test_exotic_unicode(self): 85 | assert ( 86 | _make_subset( 87 | {ord("🌵"): 9, ord("𫄸"): 0xD900, ord("𒀗"): 0xFFFE} 88 | ).encode( 89 | "🌵𫄸𒀗", 90 | ) 91 | == b"\x00\x09\xd9\x00\xff\xfe" 92 | ) 93 | 94 | def test_long_string(self, benchmark): 95 | count = 10_000 96 | rand = Random(0) 97 | string = "".join(map(chr, rand.sample(range(0x10FFFF), k=count))) 98 | cids = list(range(count)) 99 | rand.shuffle(cids) 100 | cmap = dict(zip(map(ord, string), cids)) 101 | assert len(benchmark(_make_subset(cmap).encode, string)) == 2 * len( 102 | string 103 | ) 104 | 105 | 106 | def test_true_type_init(): 107 | t = TrueType( 108 | Path(__file__).parent / "../resources/fonts/Roboto-Regular.ttf", 109 | str(Path(__file__).parent / "../resources/fonts/Roboto-Bold.ttf"), 110 | Path(__file__).parent / "../resources/fonts/Roboto-Italic.ttf", 111 | Path(__file__).parent / "../resources/fonts/Roboto-BoldItalic.ttf", 112 | ) 113 | assert isinstance(t.bold, Path) 114 | 115 | 116 | @pytest.mark.skipif(HAS_FONTTOOLS, reason="fontTools installed") 117 | def test_fonttools_notimplemented(): 118 | with pytest.raises(NotImplementedError): 119 | _make_subset({}).encode("") 120 | 121 | 122 | class TestUTF16BEHex: 123 | def test_one_byte(self, benchmark): 124 | assert benchmark(_utf16be_hex, ord("a")) == b"0061" 125 | 126 | def test_two_bytes(self, benchmark): 127 | assert benchmark(_utf16be_hex, ord("∫")) == b"222B" 128 | 129 | def test_four_bytes(self, benchmark): 130 | assert benchmark(_utf16be_hex, ord("🌵")) == b"D83CDF35" 131 | -------------------------------------------------------------------------------- /tests/layout/test_common.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from dataclasses import dataclass 4 | from typing import Iterable, Iterator 5 | 6 | from pdfje import XY, Column, Page 7 | from pdfje.common import Pt, add_slots 8 | from pdfje.layout.common import ColumnFill, PageFill, Shaped, fill_pages 9 | from pdfje.units import A3, A4 10 | 11 | PAGES = [ 12 | PageFill( 13 | Page(size=A3), 14 | (ColumnFill(Column(XY(40, 40), 190, 180), (), 100),), 15 | (ColumnFill(Column(XY(300, 40), 200, 200), (), 10),), 16 | ), 17 | PageFill(Page(size=A4), (), ()), 18 | PageFill( 19 | Page(size=A3.flip()), 20 | ( 21 | ColumnFill(Column(XY(80, 40), 205, 210), (), 100), 22 | ColumnFill(Column(XY(350, 40), 195, 190), (), 100), 23 | ), 24 | (), 25 | ), 26 | PageFill( 27 | Page(size=A4.flip()), 28 | ( 29 | ColumnFill(Column(XY(40, 40), 210, 170), (), 100), 30 | ColumnFill(Column(XY(300, 40), 195, 160), (), 100), 31 | ), 32 | (), 33 | ), 34 | ] 35 | 36 | 37 | @add_slots 38 | @dataclass(frozen=True) 39 | class _DummyShapedBlock(Shaped): 40 | content: bytes 41 | height: Pt 42 | 43 | def render(self, pos: XY, width: Pt) -> Iterable[bytes]: 44 | raise NotImplementedError() 45 | 46 | 47 | class TestFillPages: 48 | def test_empty(self): 49 | pages, filled = fill_pages(iter(PAGES), lambda _: iter(())) 50 | assert list(pages) == PAGES 51 | assert list(filled) == [] 52 | 53 | def test_fills_one_page_partially(self): 54 | def dummy_filler(cs: Iterator[ColumnFill]) -> Iterator[ColumnFill]: 55 | yield next(cs).add(_DummyShapedBlock(b"dummy content", 40)) 56 | 57 | pages, filled = fill_pages(iter(PAGES), dummy_filler) 58 | assert list(pages) == [ 59 | PageFill( 60 | Page(size=A3), 61 | ( 62 | ColumnFill( 63 | Column(XY(40, 40), 190, 180), 64 | ( 65 | ( 66 | XY(40, 140), 67 | _DummyShapedBlock(b"dummy content", 40), 68 | ), 69 | ), 70 | 60, 71 | ), 72 | ), 73 | (ColumnFill(Column(XY(300, 40), 200, 200), (), 10),), 74 | ), 75 | *PAGES[1:], 76 | ] 77 | assert list(filled) == [] 78 | 79 | def test_fills_multiple_pages(self): 80 | def dummy_filler(cs: Iterator[ColumnFill]) -> Iterator[ColumnFill]: 81 | for char in "abc": 82 | yield next(cs).add(_DummyShapedBlock(char.encode(), 40)) 83 | next(cs) # it's important we test consuming one more than yielded 84 | 85 | pages, filled = fill_pages(iter(PAGES), dummy_filler) 86 | assert list(pages) == [ 87 | PageFill( 88 | Page(size=A3.flip()), 89 | ( 90 | ColumnFill( 91 | Column(XY(350, 40), 195, 190), 92 | ((XY(350, 140), _DummyShapedBlock(b"c", 40)),), 93 | 60, 94 | ), 95 | ), 96 | ( 97 | ColumnFill( 98 | Column(XY(80, 40), 205, 210), 99 | ((XY(80, 140), _DummyShapedBlock(b"b", 40)),), 100 | 60, 101 | ), 102 | ), 103 | ), 104 | *PAGES[3:], 105 | ] 106 | assert filled == [ 107 | PageFill( 108 | Page(size=A3), 109 | (), 110 | ( 111 | ColumnFill(Column(XY(300, 40), 200, 200), (), 10), 112 | ColumnFill( 113 | Column(XY(40, 40), 190, 180), 114 | ((XY(40, 140), _DummyShapedBlock(b"a", 40)),), 115 | 60, 116 | ), 117 | ), 118 | ), 119 | PageFill(Page(size=A4), (), ()), 120 | ] 121 | -------------------------------------------------------------------------------- /src/pdfje/layout/common.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import abc 4 | from dataclasses import dataclass 5 | from itertools import islice, tee 6 | from typing import Callable, Iterator, Sequence 7 | 8 | from ..common import ( 9 | XY, 10 | Streamable, 11 | add_slots, 12 | fix_abstract_properties, 13 | flatten, 14 | peek, 15 | prepend, 16 | ) 17 | from ..page import Column, Page 18 | from ..resources import Resources 19 | from ..style import StyleFull 20 | from ..units import Pt 21 | 22 | __all__ = [ 23 | "Block", 24 | ] 25 | 26 | 27 | class Block(abc.ABC): 28 | """Base class for block elements that can be laid out in a column 29 | by :class:`~pdfje.AutoPage`. 30 | """ 31 | 32 | __slots__ = () 33 | 34 | # Fill the given columns with this block's content. It may consume as many 35 | # columns as it needs to determine how to render itself. It should only 36 | # yield columns that are actually filled -- which may be fewer than it 37 | # consumed (e.g. if it needed to look ahead). 38 | # 39 | # Why not a generator? Because a block may need to consume multiple 40 | # columns to render itself, before starting to yield completed columns 41 | @abc.abstractmethod 42 | def into_columns( 43 | self, res: Resources, style: StyleFull, cs: Iterator[ColumnFill], / 44 | ) -> Iterator[ColumnFill]: ... 45 | 46 | 47 | @fix_abstract_properties 48 | class Shaped(abc.ABC): 49 | __slots__ = () 50 | 51 | # FUTURE: remove width from this interface. It can be set 52 | # on this object itself. 53 | @abc.abstractmethod 54 | def render(self, pos: XY, width: Pt) -> Streamable: ... 55 | 56 | @property 57 | @abc.abstractmethod 58 | def height(self) -> Pt: ... 59 | 60 | 61 | @add_slots 62 | @dataclass(frozen=True) 63 | class ColumnFill(Streamable): 64 | box: Column 65 | blocks: Sequence[tuple[XY, Shaped]] 66 | height_free: Pt 67 | 68 | @staticmethod 69 | def new(col: Column) -> ColumnFill: 70 | return ColumnFill(col, [], col.height) 71 | 72 | def add(self, s: Shaped) -> ColumnFill: 73 | return ColumnFill( 74 | self.box, 75 | (*self.blocks, (self.cursor(), s)), 76 | self.height_free - s.height, 77 | ) 78 | 79 | def cursor(self) -> XY: 80 | return self.box.origin.add_y(self.height_free) 81 | 82 | def __iter__(self) -> Iterator[bytes]: 83 | for loc, s in self.blocks: 84 | yield from s.render(loc, self.box.width) 85 | 86 | 87 | _ColumnFiller = Callable[[Iterator[ColumnFill]], Iterator[ColumnFill]] 88 | 89 | 90 | @add_slots 91 | @dataclass(frozen=True) 92 | class PageFill: 93 | base: Page 94 | todo: Sequence[ColumnFill] # in the order they will be filled 95 | done: Sequence[ColumnFill] # most recently filled last 96 | 97 | def reopen_most_recent_column(self) -> PageFill: 98 | return PageFill(self.base, (self.done[-1], *self.todo), self.done[:-1]) 99 | 100 | @staticmethod 101 | def new(page: Page) -> PageFill: 102 | return PageFill(page, list(map(ColumnFill.new, page.columns)), ()) 103 | 104 | 105 | def fill_pages( 106 | doc: Iterator[PageFill], f: _ColumnFiller 107 | ) -> tuple[Iterator[PageFill], Sequence[PageFill]]: 108 | trunk, branch = tee(doc) 109 | return _fill_into( # pragma: no branch 110 | f(flatten(p.todo for p in branch)), trunk 111 | ) 112 | 113 | 114 | def _fill_into( 115 | filled: Iterator[ColumnFill], doc: Iterator[PageFill] 116 | ) -> tuple[Iterator[PageFill], Sequence[PageFill]]: 117 | try: 118 | _, filled = peek(filled) 119 | except StopIteration: 120 | return doc, [] # no content to add 121 | 122 | completed: list[PageFill] = [] 123 | for page in doc: # pragma: no branch 124 | page_cols = list(islice(filled, len(page.todo))) 125 | completed.append( 126 | PageFill( 127 | page.base, 128 | page.todo[len(page_cols) :], # noqa 129 | (*page.done, *page_cols), 130 | ) 131 | ) 132 | try: 133 | _, filled = peek(filled) 134 | except StopIteration: 135 | break # no more content -- wrap things up 136 | 137 | return prepend(completed.pop().reopen_most_recent_column(), doc), completed 138 | -------------------------------------------------------------------------------- /tests/layout/test_paragraph.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import pytest 4 | 5 | from pdfje import red 6 | from pdfje.common import XY, Align 7 | from pdfje.layout.common import ColumnFill 8 | from pdfje.layout.paragraph import LinebreakParams, Paragraph 9 | from pdfje.page import Column 10 | from pdfje.resources import Resources 11 | from pdfje.style import Style 12 | from pdfje.vendor.hyphenate import hyphenate_word 13 | 14 | from ..common import LOREM_IPSUM, plaintext 15 | 16 | STYLE = Style(italic=True, color=red, hyphens=hyphenate_word).setdefault() 17 | 18 | 19 | @pytest.fixture 20 | def res() -> Resources: 21 | return Resources() 22 | 23 | 24 | def test_paragraph_init(): 25 | assert Paragraph("Hello world") == Paragraph( 26 | ["Hello world"], 27 | style=Style.EMPTY, 28 | align=Align.LEFT, 29 | indent=0, 30 | avoid_orphans=True, 31 | optimal=LinebreakParams( 32 | tolerance=1, 33 | hyphen_penalty=1000, 34 | consecutive_hyphen_penalty=1000, 35 | fitness_diff_penalty=1000, 36 | ), 37 | ) 38 | assert Paragraph( 39 | "Hello world", style="#003311", align="center", indent=2, optimal=False 40 | ) == Paragraph( 41 | ["Hello world"], 42 | style=Style(color="#003311"), 43 | align=Align.CENTER, 44 | indent=2, 45 | avoid_orphans=True, 46 | optimal=None, 47 | ) 48 | 49 | 50 | @plaintext.register 51 | def _(f: ColumnFill) -> str: 52 | # It isn't always valid to assume a space character between columns, but 53 | # it's good enough for the test data. 54 | plain = "".join(plaintext(para) for _, para in f.blocks).strip() 55 | if plain: 56 | plain += " " 57 | return plain 58 | 59 | 60 | def linecounts(filled: list[ColumnFill]) -> list[int]: 61 | return [ 62 | sum(len(para.lines) for _, para in f.blocks) # type: ignore 63 | for f in filled 64 | ] 65 | 66 | 67 | class TestParagraphFill: 68 | def test_empty(self, res: Resources): 69 | cols = [ 70 | ColumnFill(Column(XY(80, 40), 205, 210), (), 105), 71 | ColumnFill(Column(XY(350, 40), 195, 190), (), 110), 72 | ColumnFill(Column(XY(350, 40), 200, 200), (), 90), 73 | ] 74 | p = Paragraph("", optimal=False) 75 | filled = list(p.into_columns(res, STYLE, iter(cols))) 76 | assert len(filled) == 1 77 | assert plaintext(filled) == "" 78 | 79 | def test_everything_fits_on_one_page(self, res: Resources): 80 | cols = [ 81 | ColumnFill(Column(XY(80, 40), 400, 800), (), 800), 82 | ColumnFill(Column(XY(350, 40), 405, 750), (), 750), 83 | ColumnFill(Column(XY(350, 40), 300, 780), (), 780), 84 | ColumnFill(Column(XY(350, 40), 300, 780), (), 780), 85 | ] 86 | p = Paragraph(LOREM_IPSUM, optimal=False) 87 | filled = list(p.into_columns(res, STYLE, iter(cols))) 88 | assert len(filled) == 1 89 | assert plaintext(filled).strip() == LOREM_IPSUM.replace("\n", " ") 90 | 91 | @pytest.mark.parametrize("optimal", [False, True]) 92 | @pytest.mark.parametrize("avoid_orphans", [True, False]) 93 | def test_spread_across_pages( 94 | self, res: Resources, avoid_orphans: bool, optimal: bool 95 | ): 96 | cols = [ 97 | ColumnFill(Column(XY(80, 40), 400, 800), (), 100), 98 | ColumnFill(Column(XY(350, 40), 150, 50), (), 50), 99 | ColumnFill(Column(XY(350, 40), 300, 780), (), 780), 100 | ColumnFill(Column(XY(350, 40), 300, 780), (), 780), 101 | ] 102 | p = Paragraph( 103 | LOREM_IPSUM, avoid_orphans=avoid_orphans, optimal=optimal 104 | ) 105 | filled = list(p.into_columns(res, STYLE, iter(cols))) 106 | assert len(filled) == 3 107 | assert linecounts(filled) == [6, 3, 41] 108 | assert plaintext(filled).strip() == LOREM_IPSUM.replace("\n", " ") 109 | 110 | def test_column_lookahead(self, res: Resources): 111 | cols = [ 112 | ColumnFill(Column(XY(80, 40), 400, 800), (), 100), 113 | ColumnFill(Column(XY(350, 40), 400, 100), (), 100), 114 | ColumnFill(Column(XY(350, 40), 300, 50), (), 50), 115 | ColumnFill(Column(XY(350, 40), 320, 32), (), 32), 116 | ColumnFill(Column(XY(350, 40), 300, 800), (), 800), 117 | ] 118 | p = Paragraph(LOREM_IPSUM, optimal=True) 119 | filled = list(p.into_columns(res, STYLE, iter(cols))) 120 | assert len(filled) == 5 121 | assert linecounts(filled) == [6, 6, 3, 2, 29] 122 | -------------------------------------------------------------------------------- /tests/test_common.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import pytest 4 | 5 | from pdfje.common import RGB, XY, Sides 6 | 7 | from .common import approx 8 | 9 | 10 | class TestXY: 11 | def test_basics(self): 12 | xy = XY(1, 2) 13 | assert xy.x == 1 14 | assert xy.y == 2 15 | assert xy.astuple() == (1, 2) 16 | 17 | def test_is_sequence(self): 18 | xy = XY(1, 2) 19 | assert xy[0] == 1 20 | assert xy[1] == 2 21 | 22 | with pytest.raises(IndexError): 23 | xy[2] 24 | 25 | assert list(xy) == [1, 2] 26 | assert len(xy) == 2 27 | assert xy.index(1) == 0 28 | assert 2 in xy 29 | assert xy.count(1) == 1 30 | assert list(reversed(xy)) == [2, 1] 31 | 32 | def test_parse(self): 33 | assert XY.parse((1, 3)) == XY(1, 3) 34 | assert XY.parse(XY(1, 3)) == XY(1, 3) 35 | 36 | def test_division(self): 37 | assert XY(1, 2) / 2 == XY(0.5, 1) 38 | 39 | with pytest.raises(TypeError, match="operand"): 40 | XY(1, 2) / "foo" # type: ignore[operator] 41 | 42 | def test_flip(self): 43 | assert XY(1, 2).flip() == XY(2, 1) 44 | 45 | def test_add_coordinates(self): 46 | assert XY(1, 2).add_x(3) == XY(4, 2) 47 | assert XY(1, 2).add_y(3) == XY(1, 5) 48 | 49 | def test_subtract(self): 50 | assert XY(1, 2) - XY(3, 4) == XY(-2, -2) 51 | assert XY(1, 2) - (3, 4) == XY(-2, -2) 52 | 53 | with pytest.raises(TypeError, match="operand"): 54 | XY(1, 2) - "foo" # type: ignore[operator] 55 | 56 | def test_add(self): 57 | assert XY(1, 2) + XY(3, 4) == XY(4, 6) 58 | assert XY(1, 2) + (3, 4) == XY(4, 6) 59 | 60 | with pytest.raises(TypeError, match="operand"): 61 | XY(1, 2) + "foo" # type: ignore[operator] 62 | 63 | def test_multiply(self): 64 | assert XY(1, 2) * 3 == XY(3, 6) 65 | 66 | with pytest.raises(TypeError, match="operand"): 67 | XY(1, 2) * {} # type: ignore[operator] 68 | 69 | 70 | class TestRGB: 71 | def test_basics(self): 72 | rgb = RGB(1, 0.5, 0) 73 | assert rgb.red == 1 74 | assert rgb.green == 0.5 75 | assert rgb.blue == 0 76 | assert rgb.astuple() == (1, 0.5, 0) 77 | 78 | def test_is_sequence(self): 79 | rgb = RGB(1, 0.5, 0) 80 | assert rgb[0] == 1 81 | assert rgb[1] == 0.5 82 | assert rgb[2] == 0 83 | 84 | with pytest.raises(IndexError): 85 | rgb[3] 86 | 87 | assert list(rgb) == [1, 0.5, 0] 88 | assert len(rgb) == 3 89 | assert rgb.index(1) == 0 90 | assert 0.5 in rgb 91 | assert rgb.count(1) == 1 92 | assert list(reversed(rgb)) == [0, 0.5, 1] 93 | 94 | def test_parse(self): 95 | assert RGB.parse((1, 0.5, 0)) == RGB(1, 0.5, 0) 96 | parsed = RGB.parse("#a044e9") 97 | assert parsed.red == approx(160 / 255) 98 | assert parsed.green == approx(68 / 255) 99 | assert parsed.blue == approx(233 / 255) 100 | 101 | with pytest.raises(AssertionError, match="RGB"): 102 | RGB.parse(object()) # type: ignore 103 | 104 | 105 | class TestSides: 106 | def test_basics(self): 107 | sides = Sides(1, 2, 3, 4) 108 | assert sides.top == 1 109 | assert sides.right == 2 110 | assert sides.bottom == 3 111 | assert sides.left == 4 112 | assert sides.astuple() == (1, 2, 3, 4) 113 | 114 | def test_is_sequence(self): 115 | sides = Sides(1, 2, 3, 4) 116 | assert sides[0] == 1 117 | assert sides[1] == 2 118 | assert sides[2] == 3 119 | assert sides[3] == 4 120 | 121 | with pytest.raises(IndexError): 122 | sides[4] 123 | 124 | assert list(sides) == [1, 2, 3, 4] 125 | assert len(sides) == 4 126 | assert sides.index(1) == 0 127 | assert 2 in sides 128 | assert sides.count(1) == 1 129 | assert list(reversed(sides)) == [4, 3, 2, 1] 130 | 131 | def test_parse(self): 132 | assert Sides.parse(1) == Sides(1, 1, 1, 1) 133 | assert Sides.parse((1, 2)) == Sides(1, 2, 1, 2) 134 | assert Sides.parse((1, 2, 3)) == Sides(1, 2, 3, 2) 135 | assert Sides.parse((1, 2, 3, 4)) == Sides(1, 2, 3, 4) 136 | assert Sides.parse(Sides(1, 2, 3, 4)) == Sides(1, 2, 3, 4) 137 | 138 | with pytest.raises(TypeError, match="sides"): 139 | Sides.parse((20, 30, 25, 35, 40)) # type: ignore[arg-type] 140 | 141 | with pytest.raises(TypeError, match="sides"): 142 | Sides.parse("foo") # type: ignore[arg-type] 143 | -------------------------------------------------------------------------------- /resources/fonts/CrimsonText-License.txt: -------------------------------------------------------------------------------- 1 | Copyright 2010 The Crimson Text Project Authors (https://github.com/googlefonts/Crimson) 2 | 3 | This Font Software is licensed under the SIL Open Font License, Version 1.1. 4 | This license is copied below, and is also available with a FAQ at: 5 | http://scripts.sil.org/OFL 6 | 7 | 8 | ----------------------------------------------------------- 9 | SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007 10 | ----------------------------------------------------------- 11 | 12 | PREAMBLE 13 | The goals of the Open Font License (OFL) are to stimulate worldwide 14 | development of collaborative font projects, to support the font creation 15 | efforts of academic and linguistic communities, and to provide a free and 16 | open framework in which fonts may be shared and improved in partnership 17 | with others. 18 | 19 | The OFL allows the licensed fonts to be used, studied, modified and 20 | redistributed freely as long as they are not sold by themselves. The 21 | fonts, including any derivative works, can be bundled, embedded, 22 | redistributed and/or sold with any software provided that any reserved 23 | names are not used by derivative works. The fonts and derivatives, 24 | however, cannot be released under any other type of license. The 25 | requirement for fonts to remain under this license does not apply 26 | to any document created using the fonts or their derivatives. 27 | 28 | DEFINITIONS 29 | "Font Software" refers to the set of files released by the Copyright 30 | Holder(s) under this license and clearly marked as such. This may 31 | include source files, build scripts and documentation. 32 | 33 | "Reserved Font Name" refers to any names specified as such after the 34 | copyright statement(s). 35 | 36 | "Original Version" refers to the collection of Font Software components as 37 | distributed by the Copyright Holder(s). 38 | 39 | "Modified Version" refers to any derivative made by adding to, deleting, 40 | or substituting -- in part or in whole -- any of the components of the 41 | Original Version, by changing formats or by porting the Font Software to a 42 | new environment. 43 | 44 | "Author" refers to any designer, engineer, programmer, technical 45 | writer or other person who contributed to the Font Software. 46 | 47 | PERMISSION & CONDITIONS 48 | Permission is hereby granted, free of charge, to any person obtaining 49 | a copy of the Font Software, to use, study, copy, merge, embed, modify, 50 | redistribute, and sell modified and unmodified copies of the Font 51 | Software, subject to the following conditions: 52 | 53 | 1) Neither the Font Software nor any of its individual components, 54 | in Original or Modified Versions, may be sold by itself. 55 | 56 | 2) Original or Modified Versions of the Font Software may be bundled, 57 | redistributed and/or sold with any software, provided that each copy 58 | contains the above copyright notice and this license. These can be 59 | included either as stand-alone text files, human-readable headers or 60 | in the appropriate machine-readable metadata fields within text or 61 | binary files as long as those fields can be easily viewed by the user. 62 | 63 | 3) No Modified Version of the Font Software may use the Reserved Font 64 | Name(s) unless explicit written permission is granted by the corresponding 65 | Copyright Holder. This restriction only applies to the primary font name as 66 | presented to the users. 67 | 68 | 4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font 69 | Software shall not be used to promote, endorse or advertise any 70 | Modified Version, except to acknowledge the contribution(s) of the 71 | Copyright Holder(s) and the Author(s) or with their explicit written 72 | permission. 73 | 74 | 5) The Font Software, modified or unmodified, in part or in whole, 75 | must be distributed entirely under this license, and must not be 76 | distributed under any other license. The requirement for fonts to 77 | remain under this license does not apply to any document created 78 | using the Font Software. 79 | 80 | TERMINATION 81 | This license becomes null and void if any of the above conditions are 82 | not met. 83 | 84 | DISCLAIMER 85 | THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 86 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF 87 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT 88 | OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE 89 | COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 90 | INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL 91 | DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 92 | FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM 93 | OTHER DEALINGS IN THE FONT SOFTWARE. 94 | -------------------------------------------------------------------------------- /src/pdfje/typeset/parse.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import re 4 | from typing import Generator, Iterable, Iterator 5 | 6 | from ..common import Char, Pos, prepend 7 | from ..compat import pairwise 8 | from ..fonts.common import TEXTSPACE_TO_GLYPHSPACE 9 | from .state import Chain, Command, Passage, State 10 | from .words import MixedSlug, Slug, TrailingSpace, Word, WordLike 11 | 12 | # FUTURE: expand to support the full unicode spec, 13 | # see https://unicode.org/reports/tr14/. 14 | _WORD_RE = re.compile( 15 | r"(.*?( +|-|\N{ZERO WIDTH SPACE}|\N{EM DASH}|(?=\N{EM DASH}\w)))" 16 | ) 17 | 18 | 19 | def into_words( 20 | it: Iterable[Passage], state: State 21 | ) -> tuple[Command, Iterator[WordLike]]: 22 | it = iter(it) 23 | cmd, txt, state = _fold_commands(it, state) 24 | return cmd, _parse(it, state, txt) if txt else iter(()) 25 | 26 | 27 | def _parse( 28 | it: Iterable[Passage], state: State, txt: str | None 29 | ) -> Iterator[WordLike]: 30 | it = iter(it) 31 | prev: Char | None = None 32 | pos = 0 33 | 34 | while txt: 35 | last = yield from _parse_simple_words(txt, pos, state, prev) 36 | if isinstance(last, str): 37 | last, txt, pos = _complete_word(it, last, state, prev) 38 | state = last.state 39 | if txt is None: 40 | yield last 41 | return 42 | elif pos < len(txt): 43 | yield last 44 | continue 45 | try: 46 | cmd, txt = next(it) 47 | except StopIteration: 48 | yield last 49 | return 50 | yield last.with_cmd(cmd) 51 | state = cmd.apply(state) 52 | pos = 0 53 | 54 | 55 | def _parse_simple_words( 56 | txt: str, pos: Pos, state: State, prev: Char | None 57 | ) -> Generator[WordLike, None, str | Word]: 58 | assert pos < len(txt) 59 | ms = _WORD_RE.finditer(txt, pos) 60 | try: 61 | next_match = next(ms) 62 | except StopIteration: 63 | return txt[pos:] 64 | 65 | for match, next_match in pairwise(prepend(next_match, ms)): 66 | word = match.group() 67 | match.groups() 68 | yield Word.new(word, state, prev) 69 | prev = word[-1] 70 | 71 | final_word = Word.new(next_match.group(), state, prev) 72 | pos = next_match.end() 73 | if pos < len(txt): 74 | yield final_word 75 | return txt[pos:] 76 | else: 77 | return final_word 78 | 79 | 80 | def _complete_word( 81 | it: Iterator[Passage], head: str, state: State, prev: Char | None 82 | ) -> tuple[Word, str | None, Pos]: 83 | parts: list[tuple[Command, str]] = [] 84 | has_trailing_space = False 85 | st: Passage | None 86 | for st in it: 87 | if match := _WORD_RE.search(st.txt): 88 | word = match.group() 89 | if word.endswith(" "): 90 | has_trailing_space = True 91 | word = word[:-1] 92 | parts.append((st.cmd, word)) 93 | pos = match.end() 94 | break 95 | parts.append((st.cmd, st.txt)) 96 | else: 97 | pos = 0 98 | st = None 99 | if not parts: 100 | # A common case -- i.e. no space after the last word of a paragraph 101 | return Word.new(head, state, prev), st, pos 102 | 103 | # SIMPLIFICATION: for now, we don't hyphenate words that are split across 104 | # multiple styles. This because it's a rare case, and it's non-trivial 105 | # to implement. 106 | 107 | seg = Slug.new(head, state, prev) 108 | prev = seg.last() 109 | segments: list[tuple[Slug, Command]] = [] 110 | cmds = [] 111 | for cmd, txt in parts: 112 | new_state = cmd.apply(state) 113 | prev = prev if state.kerns_with(new_state) else None 114 | state = new_state 115 | cmds.append(cmd) 116 | if txt: 117 | segments.append((seg, Chain.squash(cmds))) 118 | cmds.clear() 119 | seg = Slug.new(txt, state, prev) 120 | prev = txt[-1] 121 | 122 | segments.append((seg, Chain.squash(cmds))) 123 | 124 | trailing_space = None 125 | if has_trailing_space: 126 | trailing_space = TrailingSpace( 127 | state.font.spacewidth / TEXTSPACE_TO_GLYPHSPACE * state.size, 128 | state.font.charkern(prev, " ") if prev else 0, 129 | state.size, 130 | ) 131 | 132 | return ( 133 | Word((MixedSlug(tuple(segments), state),), trailing_space, state), 134 | st.txt if st else None, 135 | pos, 136 | ) 137 | 138 | 139 | def _fold_commands( 140 | it: Iterator[Passage], state: State 141 | ) -> tuple[Command, str | None, State]: 142 | buffer: list[Command] = [] 143 | for s in it: 144 | buffer.append(s.cmd) 145 | state = s.cmd.apply(state) 146 | if s.txt: 147 | return Chain.squash(buffer), s.txt, state 148 | return Chain.squash(buffer), None, state 149 | -------------------------------------------------------------------------------- /examples/multicolumn.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from pdfje import AutoPage, Column, Document, Page 4 | from pdfje.fonts import times_roman 5 | from pdfje.layout import Paragraph 6 | from pdfje.style import Style, italic 7 | from pdfje.units import A3, A4, A6, inch, mm 8 | 9 | 10 | def main() -> None: 11 | "Generate a PDF with differently styled text layed out in various columns" 12 | Document( 13 | [ 14 | AutoPage( 15 | # Repeat the same text in different styles 16 | [Paragraph(LOREM_IPSUM, s) for s in STYLES * 3], 17 | # Cycle through the three page templates 18 | template=lambda i: TEMPLATES[i % 3], 19 | ) 20 | ] 21 | ).write("multicolumn.pdf") 22 | 23 | 24 | STYLES = [Style(size=10), "#225588" | italic, Style(size=15, font=times_roman)] 25 | TEMPLATES = [ 26 | # A one-column page 27 | Page(size=A6, margin=mm(15)), 28 | # A two-column page 29 | Page( 30 | columns=[ 31 | Column( 32 | (inch(1), inch(1)), 33 | width=(A4.x / 2) - inch(1.25), 34 | height=A4.y - inch(2), 35 | ), 36 | Column( 37 | (A4.x / 2 + inch(0.25), inch(1)), 38 | width=(A4.x / 2) - inch(1.25), 39 | height=A4.y - inch(2), 40 | ), 41 | ] 42 | ), 43 | # A page with three arbitrary columns 44 | Page( 45 | size=A3.flip(), 46 | columns=[ 47 | Column( 48 | (inch(1), inch(1)), 49 | width=(A3.y / 4), 50 | height=A3.x - inch(2), 51 | ), 52 | Column( 53 | (A3.y / 4 + inch(1.5), inch(5)), 54 | width=(A3.y / 2) - inch(1.25), 55 | height=A3.x - inch(8), 56 | ), 57 | Column( 58 | ((A3.y * 0.8) + inch(0.25), inch(4)), 59 | width=(A3.y / 10), 60 | height=inch(5), 61 | ), 62 | ], 63 | ), 64 | ] 65 | 66 | 67 | LOREM_IPSUM = """\ 68 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. \ 69 | Integer sed aliquet justo. Donec eu ultricies velit, porta pharetra massa. \ 70 | Ut non augue a urna iaculis vulputate ut sit amet sem. \ 71 | Nullam lectus felis, rhoncus sed convallis a, egestas semper risus. \ 72 | Fusce gravida metus non vulputate vestibulum. \ 73 | Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere \ 74 | cubilia curae; Donec placerat suscipit velit. \ 75 | Mauris tincidunt lorem a eros eleifend tincidunt. \ 76 | Maecenas faucibus imperdiet massa quis pretium. Integer in lobortis nisi. \ 77 | Mauris at odio nec sem volutpat aliquam. Aliquam erat volutpat. \ 78 | 79 | Fusce at vehicula justo. Vestibulum eget viverra velit. \ 80 | Vivamus et nisi pulvinar, elementum lorem nec, volutpat leo. \ 81 | Aliquam erat volutpat. Sed tristique quis arcu vitae vehicula. \ 82 | Morbi egestas vel diam eget dapibus. Donec sit amet lorem turpis. \ 83 | Maecenas ultrices nunc vitae enim scelerisque tempus. \ 84 | Maecenas aliquet dui non hendrerit viverra. \ 85 | Aliquam fringilla, est sit amet gravida convallis, elit ipsum efficitur orci, \ 86 | eget convallis neque nunc nec lorem. Nam nisl sem, \ 87 | tristique a ultrices sed, finibus id enim. 88 | 89 | Etiam vel dolor ultricies, gravida felis in, vestibulum magna. \ 90 | In diam ex, elementum ut massa a, facilisis sollicitudin lacus. \ 91 | Integer lacus ante, ullamcorper ac mauris eget, rutrum facilisis velit. \ 92 | Mauris eu enim efficitur, malesuada ipsum nec, sodales enim. \ 93 | Nam ac tortor velit. Suspendisse ut leo a felis aliquam dapibus ut a justo. \ 94 | Vestibulum sed commodo tortor. Sed vitae enim ipsum. \ 95 | Duis pellentesque dui et ipsum suscipit, in semper odio dictum. \ 96 | 97 | Sed in fermentum leo. Donec maximus suscipit metus. \ 98 | Nulla convallis tortor mollis urna maximus mattis. \ 99 | Sed aliquet leo ac sem aliquam, et ultricies mauris maximus. \ 100 | Cras orci ex, fermentum nec purus non, molestie venenatis odio. \ 101 | Etiam vitae sollicitudin nisl. Sed a ullamcorper velit. \ 102 | 103 | Aliquam congue aliquet eros scelerisque hendrerit. Vestibulum quis ante ex. \ 104 | Fusce venenatis mauris dolor, nec mattis libero pharetra feugiat. \ 105 | Pellentesque habitant morbi tristique senectus et netus et malesuada \ 106 | fames ac turpis egestas. Cras vitae nisl molestie augue finibus lobortis. \ 107 | In hac habitasse platea dictumst. Maecenas rutrum interdum urna, \ 108 | ut finibus tortor facilisis ac. Donec in fringilla mi. \ 109 | Sed molestie accumsan nisi at mattis. \ 110 | Integer eget orci nec urna finibus porta. \ 111 | Sed eu dui vel lacus pulvinar blandit sed a urna. \ 112 | Quisque lacus arcu, mattis vel rhoncus hendrerit, dapibus sed massa. \ 113 | Vivamus sed massa est. In hac habitasse platea dictumst. \ 114 | Nullam volutpat sapien quis tincidunt sagittis. \ 115 | """ 116 | 117 | if __name__ == "__main__": 118 | main() 119 | -------------------------------------------------------------------------------- /resources/optimal_vs_firstfit.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from dataclasses import replace 4 | from pathlib import Path 5 | 6 | from pdfje import XY, AutoPage, Column, Document, Page 7 | from pdfje.draw import Text 8 | from pdfje.fonts import TrueType 9 | from pdfje.layout import Paragraph 10 | from pdfje.layout.paragraph import LinebreakParams 11 | from pdfje.style import Span, Style, italic 12 | from pdfje.units import inch, mm 13 | 14 | 15 | def main() -> None: 16 | Document( 17 | [ 18 | AutoPage( 19 | [*content, *(replace(p, optimal=False) for p in content)], 20 | template=TEMPLATE, 21 | ) 22 | ], 23 | style=CRIMSON, 24 | ).write("optimal-vs-firstfit.pdf") 25 | 26 | 27 | PAGESIZE = XY(inch(10), inch(8)) 28 | MARGIN = mm(16) 29 | TEMPLATE = Page( 30 | [ 31 | # The title in small text at the top of the page 32 | Text( 33 | (PAGESIZE.x / 4, PAGESIZE.y - mm(5)), 34 | "Optimal", 35 | Style(size=12, bold=True), 36 | align="center", 37 | ), 38 | Text( 39 | (PAGESIZE.x * 0.75, PAGESIZE.y - mm(5)), 40 | "Fast", 41 | Style(size=12, bold=True), 42 | align="center", 43 | ), 44 | ], 45 | size=PAGESIZE, 46 | columns=[ 47 | Column( 48 | (MARGIN, MARGIN), 49 | PAGESIZE.x / 2 - MARGIN * 2, 50 | PAGESIZE.y - MARGIN * 2, 51 | ), 52 | Column( 53 | (PAGESIZE.x / 2 + MARGIN, MARGIN), 54 | PAGESIZE.x / 2 - MARGIN * 2, 55 | PAGESIZE.y - MARGIN * 2, 56 | ), 57 | ], 58 | ) 59 | 60 | CRIMSON = TrueType( 61 | Path(__file__).parent / "../resources/fonts/CrimsonText-Regular.ttf", 62 | Path(__file__).parent / "../resources/fonts/CrimsonText-Bold.ttf", 63 | Path(__file__).parent / "../resources/fonts/CrimsonText-Italic.ttf", 64 | Path(__file__).parent / "../resources/fonts/CrimsonText-BoldItalic.ttf", 65 | ) 66 | 67 | 68 | def flatten_newlines(txt: str) -> str: 69 | return "\n".join(s.replace("\n", " ") for s in txt.split("\n\n")) 70 | 71 | 72 | # Extract from https://www.gutenberg.org/ebooks/1661 73 | content = [ 74 | Paragraph( 75 | [ 76 | flatten_newlines( 77 | """\ 78 | “To the man who loves art for its own sake,” remarked Sherlock 79 | Holmes, tossing aside the advertisement sheet of""" 80 | ), 81 | Span(" The Daily Telegraph", italic), 82 | flatten_newlines( 83 | """, “it is 84 | frequently in its least important and lowliest manifestations that the 85 | keenest pleasure is to be derived. It is pleasant to me to observe, 86 | Watson, that you have so far grasped this truth that in these little 87 | records of our cases which you have been good enough to draw up, and, I 88 | am bound to say, occasionally to embellish, you have given prominence 89 | not so much to the many """ 90 | ), 91 | Span("causes célèbres", italic), 92 | flatten_newlines( 93 | """ and sensational trials in 94 | which I have figured but rather to those incidents which may have been 95 | trivial in themselves, but which have given room for those faculties of 96 | deduction and of logical synthesis which I have made my special 97 | province.”""" 98 | ), 99 | ], 100 | align="justify", 101 | indent=0, 102 | optimal=LinebreakParams( 103 | tolerance=1, 104 | hyphen_penalty=0, 105 | ), 106 | avoid_orphans=False, 107 | ), 108 | Paragraph( 109 | [ 110 | flatten_newlines( 111 | """\ 112 | “And yet,” said I, smiling, “I cannot quite hold myself absolved from 113 | the charge of sensationalism which has been urged against my records.” 114 | 115 | “You have erred, perhaps,” he observed, taking up a glowing cinder with 116 | the tongs and lighting with it the long cherry-wood pipe which was wont 117 | to replace his clay when he was in a disputatious rather than a 118 | meditative mood—“you have erred perhaps in attempting to put colour and 119 | life into each of your statements instead of confining yourself to the 120 | task of placing upon record that severe reasoning from cause to effect 121 | which is really the only notable feature about the thing.” 122 | 123 | “It seems to me that I have done you full justice in the matter,” I 124 | remarked with some coldness, for I was repelled by the egotism which I 125 | had more than once observed to be a strong factor in my friend’s 126 | singular character. 127 | 128 | 129 | 130 | 131 | """ 132 | ), 133 | ], 134 | align="justify", 135 | indent=18, 136 | optimal=LinebreakParams( 137 | tolerance=3, 138 | hyphen_penalty=1000, 139 | ), 140 | avoid_orphans=False, 141 | ), 142 | ] 143 | 144 | 145 | if __name__ == "__main__": 146 | main() 147 | -------------------------------------------------------------------------------- /src/pdfje/typeset/state.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import abc 4 | import re 5 | from dataclasses import dataclass, field, replace 6 | from typing import Collection, Iterable, Iterator, NamedTuple 7 | 8 | from ..common import ( 9 | RGB, 10 | NonEmptyIterator, 11 | Pos, 12 | Pt, 13 | Streamable, 14 | add_slots, 15 | flatten, 16 | prepend, 17 | setattr_frozen, 18 | ) 19 | from ..fonts.common import Font 20 | from .hyphens import Hyphenator 21 | 22 | _next_newline = re.compile(r"(?:\r\n|\n)").search 23 | 24 | 25 | class Command(Streamable): 26 | __slots__ = () 27 | 28 | @abc.abstractmethod 29 | def apply(self, s: State, /) -> State: ... 30 | 31 | 32 | @add_slots 33 | @dataclass(frozen=True) 34 | class _NoOp(Command): 35 | def apply(self, s: State) -> State: 36 | return s 37 | 38 | def __iter__(self) -> Iterator[bytes]: 39 | return iter(()) 40 | 41 | 42 | NO_OP = _NoOp() 43 | 44 | 45 | @add_slots 46 | @dataclass(frozen=True) 47 | class Chain(Command): 48 | items: Collection[Command] 49 | 50 | def apply(self, s: State) -> State: 51 | for c in self.items: 52 | s = c.apply(s) 53 | return s 54 | 55 | def __iter__(self) -> Iterator[bytes]: 56 | return flatten(self.items) 57 | 58 | @staticmethod 59 | def squash(it: Iterable[Command]) -> Command: 60 | by_type = {type(i): i for i in it} 61 | if len(by_type) == 1: 62 | return by_type.popitem()[1] 63 | elif len(by_type) == 0: 64 | return NO_OP 65 | else: 66 | return Chain(by_type.values()) 67 | 68 | 69 | @add_slots 70 | @dataclass(frozen=True) 71 | class SetFont(Command): 72 | font: Font 73 | size: Pt 74 | 75 | def apply(self, s: State) -> State: 76 | return replace(s, font=self.font, size=self.size) 77 | 78 | def __iter__(self) -> Iterator[bytes]: 79 | yield b"/%b %g Tf\n" % (self.font.id, self.size) 80 | 81 | 82 | @add_slots 83 | @dataclass(frozen=True) 84 | class SetLineSpacing(Command): 85 | value: float 86 | 87 | def apply(self, s: State) -> State: 88 | return replace(s, line_spacing=self.value) 89 | 90 | def __iter__(self) -> Iterator[bytes]: 91 | # We don't actually emit anything here, 92 | # because its value is already used to calculate the leading space 93 | # on a per-line basis. 94 | return iter(()) 95 | 96 | 97 | @add_slots 98 | @dataclass(frozen=True) 99 | class SetColor(Command): 100 | value: RGB 101 | 102 | def apply(self, s: State) -> State: 103 | return replace(s, color=self.value) 104 | 105 | def __iter__(self) -> Iterator[bytes]: 106 | yield b"%g %g %g rg\n" % self.value.astuple() 107 | 108 | 109 | @add_slots 110 | @dataclass(frozen=True) 111 | class SetHyphens(Command): 112 | value: Hyphenator 113 | 114 | def apply(self, s: State) -> State: 115 | return replace(s, hyphens=self.value) 116 | 117 | def __iter__(self) -> Iterator[bytes]: 118 | # hyphenation behavior is not written to the PDF stream itself, 119 | # but rather used in the text layout algorithm. 120 | return iter(()) 121 | 122 | 123 | @add_slots 124 | @dataclass(frozen=True) 125 | class State(Streamable): 126 | """Text state, see PDF 32000-1:2008, table 105""" 127 | 128 | font: Font 129 | size: Pt 130 | color: RGB 131 | line_spacing: float 132 | hyphens: Hyphenator 133 | 134 | lead: Pt = field(init=False, compare=False) # cached calculation 135 | 136 | def __iter__(self) -> Iterator[bytes]: 137 | yield from SetFont(self.font, self.size) 138 | yield from SetColor(self.color) 139 | 140 | def __post_init__(self) -> None: 141 | setattr_frozen(self, "lead", self.size * self.line_spacing) 142 | 143 | def kerns_with(self, other: State, /) -> bool: 144 | return self.font == other.font and self.size == other.size 145 | 146 | 147 | # NOTE: the result must be consumed in order, similar to itertools.groupby 148 | def splitlines(it: Iterable[Passage]) -> Iterator[NonEmptyIterator[Passage]]: 149 | it = iter(it) 150 | try: 151 | transition: list[tuple[Passage, Pos]] = [(next(it), 0)] 152 | except StopIteration: 153 | return 154 | 155 | def _group() -> NonEmptyIterator[Passage]: 156 | psg, pos = transition.pop() 157 | for psg in prepend(psg, it): 158 | if (newline := _next_newline(psg.txt, pos)) is None: 159 | yield Passage(NO_OP, psg.txt[pos:]) if pos else psg 160 | pos = 0 161 | else: 162 | yield Passage( 163 | NO_OP if pos else psg.cmd, 164 | psg.txt[pos : newline.start()], # noqa 165 | ) 166 | transition.append((psg, newline.end())) 167 | return 168 | 169 | while transition: 170 | yield _group() 171 | 172 | 173 | class Passage(NamedTuple): 174 | cmd: Command 175 | txt: str 176 | 177 | 178 | def max_lead(s: Iterable[Passage], state: State) -> Pt: 179 | # FUTURE: we apply commands elsewhere, so doing it also here 180 | # is perhaps a bit wasteful 181 | lead = 0.0 182 | for cmd, txt in s: 183 | state = cmd.apply(state) 184 | # Only count leading if there is actually text with this value 185 | if txt: 186 | lead = max(lead, state.lead) 187 | # If there's no text to go on, use the state's default 188 | return lead or state.lead 189 | -------------------------------------------------------------------------------- /src/pdfje/page.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import abc 4 | from dataclasses import dataclass 5 | from itertools import chain 6 | from operator import methodcaller 7 | from typing import Iterable, Iterator, Literal, Sequence, final 8 | 9 | from . import atoms 10 | from .atoms import OBJ_ID_PAGETREE, OBJ_ID_RESOURCES 11 | from .common import ( 12 | XY, 13 | Sides, 14 | SidesLike, 15 | Streamable, 16 | add_slots, 17 | flatten, 18 | setattr_frozen, 19 | ) 20 | from .resources import Resources 21 | from .style import StyleFull 22 | from .units import A4, Pt, inch 23 | 24 | Rotation = Literal[0, 90, 180, 270] 25 | 26 | 27 | class Drawing(abc.ABC): 28 | """Base class for all drawing operations wich can be put on 29 | a :class:`~pdfje.Page`.""" 30 | 31 | __slots__ = () 32 | 33 | @abc.abstractmethod 34 | def render(self, r: Resources, s: StyleFull, /) -> Streamable: ... 35 | 36 | 37 | @final 38 | @add_slots 39 | @dataclass(frozen=True, init=False) 40 | class Column: 41 | """A column to lay out block elements in. 42 | 43 | Parameters 44 | ---------- 45 | origin 46 | The bottom left corner of the column. Can be parsed from a 2-tuple. 47 | width 48 | The width of the column. Must be larger than 0. 49 | height 50 | The height of the column. 51 | 52 | """ 53 | 54 | origin: XY 55 | width: Pt 56 | height: Pt 57 | 58 | def __init__( 59 | self, origin: XY | tuple[Pt, Pt], width: Pt, height: Pt 60 | ) -> None: 61 | setattr_frozen(self, "origin", XY.parse(origin)) 62 | setattr_frozen(self, "width", width) 63 | setattr_frozen(self, "height", height) 64 | assert self.width > 0 65 | 66 | 67 | @add_slots 68 | @dataclass(frozen=True) 69 | class RenderedPage: 70 | rotate: Rotation 71 | size: XY 72 | stream: Streamable 73 | 74 | def to_atoms(self, i: atoms.ObjectID) -> Iterable[atoms.Object]: 75 | yield i, atoms.Dictionary( 76 | (b"Type", atoms.Name(b"Page")), 77 | (b"Parent", atoms.Ref(OBJ_ID_PAGETREE)), 78 | (b"MediaBox", atoms.Array(map(atoms.Real, [0, 0, *self.size]))), 79 | (b"Contents", atoms.Ref(i + 1)), 80 | (b"Resources", atoms.Ref(OBJ_ID_RESOURCES)), 81 | (b"Rotate", atoms.Int(self.rotate)), 82 | ) 83 | yield i + 1, atoms.Stream(self.stream) 84 | 85 | 86 | @final 87 | @add_slots 88 | @dataclass(frozen=True, init=False) 89 | class Page: 90 | """A single page within a document. Contains drawings at given positions. 91 | 92 | Example 93 | ------- 94 | 95 | .. code-block:: python 96 | 97 | from pdfje import Page, Line, Rect, Text, A5 98 | title_page = Page([ 99 | Text((100, 200), "My awesome story"), 100 | Line((100, 100), (200, 100)), 101 | Rect((50, 50), width=200, height=300), 102 | ], size=A5) 103 | 104 | Parameters 105 | ---------- 106 | content 107 | The drawings to render on the page. 108 | size 109 | The size of the page in points. Common page sizes are available 110 | as constants: 111 | 112 | .. code-block:: python 113 | 114 | from pdfje.units import Page, A4, A5, A6, letter, legal, tabloid 115 | 116 | rotate 117 | The rotation of the page in degrees. 118 | margin 119 | The margin around the page in points, used for layout. 120 | Can be a single value, or a 2, 3 or 4-tuple following the CSS 121 | shorthand convention. see https://www.w3schools.com/css/css_margin.asp 122 | columns 123 | The columns to use for laying out the content. 124 | If not given, the content is laid out in a single column 125 | based on the page size and margin. 126 | 127 | """ 128 | 129 | content: Iterable[Drawing] 130 | size: XY 131 | rotate: Rotation 132 | columns: Sequence[Column] 133 | 134 | def __init__( 135 | self, 136 | content: Iterable[Drawing] = (), 137 | size: XY | tuple[Pt, Pt] = A4, 138 | rotate: Rotation = 0, 139 | margin: SidesLike = Sides.parse(inch(1)), 140 | columns: Sequence[Column] = (), 141 | ) -> None: 142 | size = XY.parse(size) 143 | setattr_frozen(self, "content", content) 144 | setattr_frozen(self, "rotate", rotate) 145 | setattr_frozen(self, "columns", columns or [_column(size, margin)]) 146 | setattr_frozen(self, "size", size) 147 | 148 | def add(self, d: Drawing, /) -> Page: 149 | """Create a new page with the given drawing added 150 | 151 | Parameters 152 | ---------- 153 | d 154 | The drawing to add to the page 155 | """ 156 | return Page( 157 | (*self.content, d), self.size, self.rotate, columns=self.columns 158 | ) 159 | 160 | def render( 161 | self, r: Resources, s: StyleFull, pnum: int, / 162 | ) -> Iterator[RenderedPage]: 163 | yield RenderedPage( 164 | self.rotate, 165 | self.size, 166 | flatten(map(methodcaller("render", r, s), self.content)), 167 | ) 168 | 169 | def fill( 170 | self, r: Resources, s: StyleFull, extra: Iterable[bytes] 171 | ) -> RenderedPage: 172 | return RenderedPage( 173 | self.rotate, 174 | self.size, 175 | chain( 176 | flatten(map(methodcaller("render", r, s), self.content)), 177 | extra, 178 | ), 179 | ) 180 | 181 | 182 | def _column(page: XY, margin: SidesLike) -> Column: 183 | top, right, bottom, left = Sides.parse(margin) 184 | return Column( 185 | XY(left, bottom), page.x - left - right, page.y - top - bottom 186 | ) 187 | -------------------------------------------------------------------------------- /src/pdfje/document.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import os 4 | from dataclasses import dataclass 5 | from itertools import count, islice 6 | from pathlib import Path 7 | from typing import IO, Iterable, Iterator, final, overload 8 | 9 | from . import atoms 10 | from .atoms import OBJ_ID_PAGETREE, OBJ_ID_RESOURCES 11 | from .common import add_slots, flatten, setattr_frozen 12 | from .layout import Block, Paragraph 13 | from .layout.pages import AutoPage 14 | from .page import Page 15 | from .resources import Resources 16 | from .style import Style, StyleFull, StyleLike 17 | 18 | _OBJ_ID_FIRST_PAGE: atoms.ObjectID = OBJ_ID_RESOURCES + 1 19 | _OBJS_PER_PAGE = 2 20 | 21 | 22 | @final 23 | @add_slots 24 | @dataclass(frozen=True, init=False) 25 | class Document: 26 | """a PDF Document 27 | 28 | Parameters 29 | ---------- 30 | 31 | content 32 | The content of the document. 33 | style 34 | Change the default style of the document. 35 | 36 | Examples 37 | -------- 38 | 39 | Below are some examples of creating documents. 40 | 41 | >>> Document() # the minimal PDF -- one empty page 42 | >>> Document("hello world") # a document with pages of text 43 | >>> Document([ # document with explicit pages 44 | ... Page(...), 45 | ... AutoPage([LOREM_IPSUM, ZEN_OF_PYTHON]), 46 | ... Page(), 47 | ... ]) 48 | 49 | 50 | note 51 | ---- 52 | A document must contain at least one page to be valid 53 | """ 54 | 55 | pages: Iterable[Page | AutoPage] 56 | style: Style 57 | 58 | def __init__( 59 | self, 60 | content: Iterable[Page | AutoPage] | str | Block | None = None, 61 | style: StyleLike = Style.EMPTY, 62 | ) -> None: 63 | if content is None: 64 | content = [Page()] 65 | elif isinstance(content, str): 66 | content = [AutoPage([Paragraph(content)])] 67 | elif isinstance(content, Block): 68 | content = [AutoPage([content])] 69 | 70 | setattr_frozen(self, "pages", content) 71 | setattr_frozen(self, "style", Style.parse(style)) 72 | 73 | @overload 74 | def write(self) -> Iterator[bytes]: ... 75 | 76 | @overload 77 | def write(self, target: os.PathLike[str] | str | IO[bytes]) -> None: ... 78 | 79 | def write( # type: ignore[return] 80 | self, target: os.PathLike[str] | str | IO[bytes] | None = None 81 | ) -> Iterator[bytes] | None: 82 | """Write the document to a given target. If no target is given, 83 | outputs the binary PDF content iteratively. See examples below. 84 | 85 | Parameters 86 | ---------- 87 | target: ~os.PathLike | str | ~typing.IO[bytes] | None 88 | The target to write to. If not given, the PDF content is returned 89 | as an iterator. 90 | 91 | Returns 92 | ------- 93 | ~typing.Iterator[bytes] | None 94 | 95 | Examples 96 | -------- 97 | 98 | String, :class:`~pathlib.Path`, or :class:`~os.PathLike` target: 99 | 100 | >>> doc.write("myfolder/foo.pdf") 101 | >>> doc.write(Path.home() / "documents/foo.pdf") 102 | 103 | Files and file-like objects: 104 | 105 | >>> with open("my/file.pdf", 'wb') as f: 106 | ... doc.write(f) 107 | >>> doc.write(b:= BytesIO()) 108 | 109 | Iterator output is useful for streaming PDF contents. Below is 110 | an example of an HTTP request using the ``httpx`` library. 111 | 112 | >>> httpx.post("https://mysite.foo/upload", content=doc.write(), 113 | ... headers={"Content-Type": "application/pdf"}) 114 | """ 115 | if target is None: 116 | return self._write_iter() 117 | elif isinstance(target, (str, os.PathLike)): 118 | self._write_to_path(Path(os.fspath(target))) 119 | else: # i.e. IO[bytes] 120 | target.writelines(self._write_iter()) 121 | 122 | def _write_iter(self) -> Iterator[bytes]: 123 | return atoms.write(_doc_objects(self.pages, self.style.setdefault())) 124 | 125 | def _write_to_path(self, p: Path) -> None: 126 | with p.open("wb") as wfile: 127 | wfile.writelines(self.write()) 128 | 129 | 130 | def _doc_objects( 131 | items: Iterable[Page | AutoPage], style: StyleFull 132 | ) -> Iterator[atoms.Object]: 133 | res = Resources() 134 | obj_id = pagenum = 0 135 | # FUTURE: the scoping of `pagenum` is a bit tricky here. Find a better 136 | # way to do this -- or add a specific test. 137 | for pagenum, obj_id, page in zip( 138 | count(1), 139 | count(_OBJ_ID_FIRST_PAGE, step=_OBJS_PER_PAGE), 140 | flatten(p.render(res, style, pagenum + 1) for p in items), 141 | ): 142 | yield from page.to_atoms(obj_id) 143 | 144 | if not pagenum: 145 | raise RuntimeError( 146 | "Cannot write PDF document without at least one page" 147 | ) 148 | first_font_id = obj_id + _OBJS_PER_PAGE 149 | 150 | yield from res.to_objects(first_font_id) 151 | yield from _write_headers( 152 | (obj_id - _OBJ_ID_FIRST_PAGE) // _OBJS_PER_PAGE + 1, 153 | res.to_atoms(first_font_id), 154 | ) 155 | 156 | 157 | _CATALOG_OBJ = ( 158 | atoms.OBJ_ID_CATALOG, 159 | atoms.Dictionary( 160 | (b"Type", atoms.Name(b"Catalog")), 161 | (b"Pages", atoms.Ref(OBJ_ID_PAGETREE)), 162 | ), 163 | ) 164 | 165 | 166 | def _write_headers( 167 | num_pages: int, resources: atoms.Dictionary 168 | ) -> Iterable[atoms.Object]: 169 | yield _CATALOG_OBJ 170 | yield ( 171 | OBJ_ID_PAGETREE, 172 | atoms.Dictionary( 173 | (b"Type", atoms.Name(b"Pages")), 174 | ( 175 | b"Kids", 176 | atoms.Array( 177 | map( 178 | atoms.Ref, 179 | islice( 180 | count(_OBJ_ID_FIRST_PAGE, step=_OBJS_PER_PAGE), 181 | num_pages, 182 | ), 183 | ) 184 | ), 185 | ), 186 | (b"Count", atoms.Int(num_pages)), 187 | ), 188 | ) 189 | yield OBJ_ID_RESOURCES, resources 190 | -------------------------------------------------------------------------------- /src/pdfje/fonts/common.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import abc 4 | from dataclasses import dataclass, field 5 | from itertools import chain, count 6 | from pathlib import Path 7 | from typing import TYPE_CHECKING, Iterable, Tuple, Union, final 8 | 9 | from .. import atoms 10 | from ..atoms import ASCII 11 | from ..common import ( 12 | Char, 13 | Func, 14 | Pos, 15 | Pt, 16 | add_slots, 17 | fix_abstract_properties, 18 | setattr_frozen, 19 | ) 20 | from ..compat import pairwise 21 | 22 | FontID = bytes # unique, internal identifier assigned to a font within a PDF 23 | GlyphPt = float # length unit in glyph space 24 | TEXTSPACE_TO_GLYPHSPACE = 1000 # See PDF32000-1:2008 (9.7.3) 25 | 26 | 27 | @fix_abstract_properties 28 | class Font(abc.ABC): 29 | """A specific font within a typeface""" 30 | 31 | __slots__ = () 32 | 33 | @property 34 | @abc.abstractmethod 35 | def id(self) -> FontID: ... 36 | 37 | # It's worth caching this value, as it is used often 38 | @property 39 | @abc.abstractmethod 40 | def spacewidth(self) -> GlyphPt: ... 41 | 42 | @property 43 | @abc.abstractmethod 44 | def encoding_width(self) -> int: 45 | """The number of bytes assigned to each character when encoding""" 46 | 47 | @abc.abstractmethod 48 | def encode(self, s: str, /) -> bytes: ... 49 | 50 | @abc.abstractmethod 51 | def width(self, s: str, /) -> Pt: 52 | """The total width of the given string (excluding kerning)""" 53 | 54 | @staticmethod 55 | @abc.abstractmethod 56 | def charwidth(c: Char, /) -> GlyphPt: ... 57 | 58 | @abc.abstractmethod 59 | def kern(self, s: str, /, prev: Char | None) -> Iterable[Kern]: ... 60 | 61 | @abc.abstractmethod 62 | def charkern(self, a: Char, b: Char, /) -> GlyphPt: ... 63 | 64 | 65 | @final 66 | @add_slots 67 | @dataclass(frozen=True, init=False) 68 | class TrueType: 69 | """A TrueType font to be embedded in a PDF 70 | 71 | Parameters 72 | ---------- 73 | regular 74 | The regular (i.e. non-bold, non-italic) .ttf file 75 | bold 76 | The bold .ttf file 77 | italic 78 | The italic .ttf file 79 | bold_italic 80 | The bold italic .ttf file 81 | 82 | """ 83 | 84 | regular: Path 85 | bold: Path 86 | italic: Path 87 | bold_italic: Path 88 | 89 | def __init__( 90 | self, 91 | regular: Path | str, 92 | bold: Path | str, 93 | italic: Path | str, 94 | bold_italic: Path | str, 95 | ) -> None: 96 | setattr_frozen(self, "regular", Path(regular)) 97 | setattr_frozen(self, "bold", Path(bold)) 98 | setattr_frozen(self, "italic", Path(italic)) 99 | setattr_frozen(self, "bold_italic", Path(bold_italic)) 100 | 101 | # This method cannot be defined in the class body, as it would cause a 102 | # circular import. The implementation is patched into the class 103 | # in the `style` module. 104 | if TYPE_CHECKING: # pragma: no cover 105 | from ..common import HexColor 106 | from ..style import Style, StyleLike 107 | 108 | def __or__(self, _: StyleLike, /) -> Style: ... 109 | 110 | def __ror__(self, _: HexColor, /) -> Style: ... 111 | 112 | def font(self, bold: bool, italic: bool) -> Path: 113 | if bold: 114 | return self.bold_italic if italic else self.bold 115 | else: 116 | return self.italic if italic else self.regular 117 | 118 | 119 | @final 120 | @add_slots 121 | @dataclass(frozen=True, repr=False) 122 | class BuiltinTypeface: 123 | """A typeface that is built into the PDF renderer.""" 124 | 125 | regular: BuiltinFont 126 | bold: BuiltinFont 127 | italic: BuiltinFont 128 | bold_italic: BuiltinFont 129 | 130 | # This method cannot be defined in the class body, as it would cause a 131 | # circular import. The implementation is patched into the class 132 | # in the `style` module. 133 | if TYPE_CHECKING: # pragma: no cover 134 | from ..common import HexColor 135 | from ..style import Style, StyleLike 136 | 137 | def __or__(self, _: StyleLike, /) -> Style: ... 138 | 139 | def __ror__(self, _: HexColor, /) -> Style: ... 140 | 141 | def __repr__(self) -> str: 142 | return f"{self.__class__.__name__}({self.regular.name.decode()})" 143 | 144 | def font(self, bold: bool, italic: bool) -> BuiltinFont: 145 | if bold: 146 | return self.bold_italic if italic else self.bold 147 | else: 148 | return self.italic if italic else self.regular 149 | 150 | 151 | Typeface = Union[BuiltinTypeface, TrueType] 152 | 153 | 154 | @final 155 | @add_slots 156 | @dataclass(frozen=True, eq=False) 157 | class BuiltinFont(Font): 158 | name: ASCII 159 | id: FontID 160 | charwidth: Func[Char, GlyphPt] = field(repr=False) 161 | kerning: KerningTable | None = field(repr=False) 162 | spacewidth: Pt = field(init=False, repr=False) 163 | 164 | encoding_width = 1 165 | 166 | def __post_init__(self) -> None: 167 | setattr_frozen(self, "spacewidth", self.charwidth(" ")) 168 | 169 | def width(self, s: str) -> Pt: 170 | return sum(map(self.charwidth, s)) / TEXTSPACE_TO_GLYPHSPACE 171 | 172 | @staticmethod 173 | def encode(s: str) -> bytes: 174 | # FUTURE: normalize unicode to allow better unicode representation 175 | return s.encode("cp1252", errors="replace") 176 | 177 | def kern(self, s: str, /, prev: Char | None) -> Iterable[Kern]: 178 | return kern(self.kerning, s, prev) if self.kerning else () 179 | 180 | def charkern(self, a: Char, b: Char) -> GlyphPt: 181 | return self.kerning((a, b)) if self.kerning else 0 182 | 183 | def to_resource(self) -> atoms.Dictionary: 184 | return atoms.Dictionary( 185 | (b"Type", atoms.Name(b"Font")), 186 | (b"Subtype", atoms.Name(b"Type1")), 187 | (b"BaseFont", atoms.Name(self.name)), 188 | (b"Encoding", atoms.Name(b"WinAnsiEncoding")), 189 | ) 190 | 191 | 192 | KerningTable = Func[Tuple[Char, Char], GlyphPt] 193 | Kern = Tuple[Pos, GlyphPt] 194 | 195 | 196 | def kern( 197 | table: KerningTable, 198 | s: str, 199 | prev: Char | None, 200 | ) -> Iterable[Kern]: 201 | for i, pair in zip( 202 | count(not prev), 203 | pairwise(chain(prev, s) if prev else s), 204 | ): 205 | if space := table(pair): 206 | yield (i, space) 207 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | 🌷 pdfje 2 | ======== 3 | 4 | .. image:: https://img.shields.io/pypi/v/pdfje.svg?style=flat-square&color=blue 5 | :target: https://pypi.python.org/pypi/pdfje 6 | 7 | .. image:: https://img.shields.io/pypi/pyversions/pdfje.svg?style=flat-square 8 | :target: https://pypi.python.org/pypi/pdfje 9 | 10 | .. image:: https://img.shields.io/pypi/l/pdfje.svg?style=flat-square&color=blue 11 | :target: https://pypi.python.org/pypi/pdfje 12 | 13 | .. image:: https://img.shields.io/badge/mypy-strict-forestgreen?style=flat-square 14 | :target: https://mypy.readthedocs.io/en/stable/command_line.html#cmdoption-mypy-strict 15 | 16 | .. image:: https://img.shields.io/badge/coverage-99%25-forestgreen?style=flat-square 17 | :target: https://github.com/ariebovenberg/pdfje 18 | 19 | .. image:: https://img.shields.io/github/actions/workflow/status/ariebovenberg/pdfje/tests.yml?branch=main&style=flat-square 20 | :target: https://github.com/ariebovenberg/pdfje 21 | 22 | .. image:: https://img.shields.io/readthedocs/pdfje.svg?style=flat-square 23 | :target: http://pdfje.readthedocs.io/ 24 | 25 | .. 26 | 27 | **pdf·je** [`🔉 `_ PDF·yuh] (noun) Dutch for 'small PDF' 28 | 29 | **Write beautiful PDFs in declarative Python.** 30 | 31 | Features 32 | -------- 33 | 34 | What makes **pdfje** stand out from the other PDF writers? Here are some of the highlights: 35 | 36 | 🧩 Declarative API 37 | ~~~~~~~~~~~~~~~~~~ 38 | 39 | In most PDF writers, you create empty objects and 40 | then mutate them with methods like ``addText()``, 41 | all while changing the state with methods like ``setFont()``. 42 | **Pdfje** is different. You describe the document you want to write, 43 | and pdfje takes care of the details. No state to manage, no mutations. 44 | This makes your code easier to reuse and reason about. 45 | 46 | .. code-block:: python 47 | 48 | from pdfje import Document 49 | Document("Olá Mundo!").write("hello.pdf") 50 | 51 | See `the tutorial `_ 52 | for a complete overview of features, including: 53 | 54 | - Styling text including font, size, and color 55 | - Automatic layout of text into one or more columns 56 | - Builtin and embedded fonts 57 | - Drawing basic shapes 58 | 59 | See the roadmap_ for supported features. 60 | 61 | 📖 Decent typography 62 | ~~~~~~~~~~~~~~~~~~~~ 63 | 64 | Legibility counts. Good typography is a key part of that. 65 | **Pdfje** supports several features to make your documents look great: 66 | 67 | - Visually pleasing linebreaks, using the `same basic principles as LaTeX `_ 68 | - Automatic `kerning `_ using available font metrics 69 | - Avoiding `widows and orphans `_ by moving 70 | lines between columns or pages. 71 | 72 | .. image:: https://github.com/ariebovenberg/pdfje/raw/main/sample.png 73 | :alt: Sample document with two columns of text 74 | 75 | 🎈 Small footprint 76 | ~~~~~~~~~~~~~~~~~~ 77 | 78 | The PDF format supports many features, but most of the time you only need a few. 79 | Why install many dependencies — just to write a simple document? 80 | Not only is **pdfje** pure-Python, it allows you to 81 | install only the dependencies you need. 82 | 83 | .. code-block:: bash 84 | 85 | pip install pdfje # no dependencies 86 | pip install pdfje[fonts, hyphens] # embedded fonts and improved hyphenation 87 | 88 | .. _roadmap: 89 | 90 | Roadmap 91 | ------- 92 | 93 | **Pdfje** has basic functionality, 94 | but is not yet feature-complete. 95 | Until the 1.0 version, the API may change with minor releases. 96 | 97 | Features: 98 | 99 | ✅ = implemented, 🚧 = may be planned, ❌ = not planned 100 | 101 | - Typesetting 102 | - ✅ Automatic kerning 103 | - ✅ Wrapping text into lines, columns, and pages 104 | - ✅ Page sizes 105 | - ✅ Centering text 106 | - ✅ Justification 107 | - ✅ Hyphenation 108 | - ✅ Move lines between columns/pages to avoid widows/orphans 109 | - ✅ Tex-style line breaking 110 | - 🚧 Headings (which stick to their paragraphs) 111 | - 🚧 Indentation 112 | - 🚧 Keeping layout elements together 113 | - 🚧 Loosening paragraphs to avoid orphans/widows 114 | - 🚧 Broader unicode support in text wrapping 115 | - Drawing operations 116 | - ✅ Lines 117 | - ✅ Rectangles 118 | - ✅ Circles, ellipses 119 | - 🚧 Arbitrary paths, fills, and strokes 120 | - Text styling 121 | - ✅ Font and size 122 | - ✅ Embedded fonts 123 | - ✅ Colors 124 | - ✅ Bold, italic 125 | - 🚧 Underline and strikethrough 126 | - 🚧 Superscript and subscript 127 | - ❌ Complex fill patterns 128 | - 🚧 Images 129 | - 🚧 Bookmarks and links 130 | - 🚧 Tables 131 | - 🚧 Bullet/numbered lists 132 | - 🚧 Inline markup with Markdown (Commonmark/MyST) 133 | - ❌ Emoji 134 | - ❌ Tables of contents 135 | - ❌ Forms 136 | - ❌ Annotations 137 | 138 | Versioning and compatibility policy 139 | ----------------------------------- 140 | 141 | **Pdfje** follows semantic versioning. 142 | Until the 1.0 version, the API may change with minor releases. 143 | Breaking changes will be announced in the changelog. 144 | Since the API is fully typed, your typechecker and/or IDE 145 | will help you adjust to any API changes. 146 | 147 | License 148 | ------- 149 | 150 | This library is licensed under the terms of the MIT license. 151 | It also includes short scripts from other projects (see ``pdfje/vendor``), 152 | which are either also MIT licensed, or in the public domain. 153 | 154 | Contributing 155 | ------------ 156 | 157 | Here are some useful tips for developing in the ``pdfje`` codebase itself: 158 | 159 | - Install dependencies with ``poetry install``. 160 | - To write output files during tests, use ``pytest --output-path=`` 161 | - To also run more comprehensive but 'slow' tests, use ``pytest --runslow`` 162 | 163 | Acknowledgements 164 | ---------------- 165 | 166 | **pdfje** is inspired by the following projects. 167 | If you're looking for a PDF writer, you may want to check them out as well: 168 | 169 | - `python-typesetting `_ 170 | - `fpdf2 `_ 171 | - `ReportLab `_ 172 | - `WeasyPrint `_ 173 | - `borb `_ 174 | - `wkhtmltopdf `_ 175 | - `pydyf `_ 176 | -------------------------------------------------------------------------------- /resources/scripts/parse_afm.py: -------------------------------------------------------------------------------- 1 | """Script to extract font metrics from .afm files. 2 | 3 | Usage: `python parse_afm.py ` 4 | """ 5 | 6 | from __future__ import annotations 7 | 8 | import sys 9 | 10 | from fontTools.afmLib import AFM 11 | 12 | 13 | def print_widths() -> None: 14 | f = AFM(sys.argv[1]) 15 | widths = { 16 | ord(char.strip() or char): f[name][1] for name, char in NAMES.items() 17 | } 18 | for k, v in sorted(widths.items()): 19 | print(f"{k}: {v},") 20 | 21 | 22 | def print_kern() -> None: 23 | f = AFM(sys.argv[1]) 24 | kern = { 25 | (NAMES[a], NAMES[b]): value 26 | for (a, b), value in f._kerning.items() 27 | # Ignore characters we can't encode cp1252 anyway 28 | if a in NAMES and b in NAMES 29 | } 30 | for k, v in sorted(kern.items()): 31 | print(f"{k}: {v},") 32 | 33 | 34 | NAMES = { 35 | "A": "A", 36 | "AE": "Æ", 37 | "Aacute": "Á", 38 | "Acircumflex": "Â", 39 | "Adieresis": "Ä", 40 | "Agrave": "À", 41 | "Aring": "Å", 42 | "Atilde": "Ã", 43 | "B": "B", 44 | "C": "C", 45 | "Ccedilla": "Ç", 46 | "D": "D", 47 | "E": "E", 48 | "Eacute": "É", 49 | "Ecircumflex": "Ê", 50 | "Edieresis": "Ë", 51 | "Egrave": "È", 52 | "Eth": "Ð", 53 | "Euro": "€", 54 | "F": "F", 55 | "G": "G", 56 | "H": "H", 57 | "I": "I", 58 | "Iacute": "Í", 59 | "Icircumflex": "Î", 60 | "Idieresis": "Ï", 61 | "Igrave": "Ì", 62 | "J": "J", 63 | "K": "K", 64 | "L": "L", 65 | "Lslash": "Ł", 66 | "M": "M", 67 | "N": "N", 68 | "Ntilde": "Ñ", 69 | "O": "O", 70 | "OE": "Œ", 71 | "Oacute": "Ó", 72 | "Ocircumflex": "Ô", 73 | "Odieresis": "Ö", 74 | "Ograve": "Ò", 75 | "Oslash": "Ø", 76 | "Otilde": "Õ", 77 | "P": "P", 78 | "Q": "Q", 79 | "R": "R", 80 | "S": "S", 81 | "Scaron": "Š", 82 | "T": "T", 83 | "Thorn": "Þ", 84 | "U": "U", 85 | "Uacute": "Ú", 86 | "Ucircumflex": "Û", 87 | "Udieresis": "Ü", 88 | "Ugrave": "Ù", 89 | "V": "V", 90 | "W": "W", 91 | "X": "X", 92 | "Y": "Y", 93 | "Yacute": "Ý", 94 | "Ydieresis": "Ÿ", 95 | "Z": "Z", 96 | "Zcaron": "Ž", 97 | "a": "a", 98 | "aacute": "á", 99 | "acircumflex": "â", 100 | "acute": " ́", 101 | "adieresis": "ä", 102 | "ae": "æ", 103 | "agrave": "à", 104 | "ampersand": "&", 105 | "aring": "å", 106 | "asciicircum": "^", 107 | "asciitilde": "~", 108 | "asterisk": "*", 109 | "at": "@", 110 | "atilde": "ã", 111 | "b": "b", 112 | "backslash": "\\", 113 | "bar": "|", 114 | "braceleft": "{", 115 | "braceright": "}", 116 | "bracketleft": "[", 117 | "bracketright": "]", 118 | "breve": " ̆", 119 | "brokenbar": "¦", 120 | "bullet": "•", 121 | "c": "c", 122 | "caron": "ˇ", 123 | "ccedilla": "ç", 124 | "cedilla": " ̧", 125 | "cent": "¢", 126 | "circumflex": "ˆ", 127 | "colon": ":", 128 | "comma": ",", 129 | "copyright": "©", 130 | "currency": "¤", 131 | "d": "d", 132 | "dagger": "†", 133 | "daggerdbl": "‡", 134 | "degree": "°", 135 | "dieresis": " ̈", 136 | "divide": "÷", 137 | "dollar": "$", 138 | "dotaccent": " ̇", 139 | "dotlessi": "ı", 140 | "e": "e", 141 | "eacute": "é", 142 | "ecircumflex": "ê", 143 | "edieresis": "ë", 144 | "egrave": "è", 145 | "eight": "8", 146 | "ellipsis": "…", 147 | "emdash": "—", 148 | "endash": "–", 149 | "equal": "=", 150 | "eth": "ð", 151 | "exclam": "!", 152 | "exclamdown": "¡", 153 | "f": "f", 154 | "fi": "fi", 155 | "five": "5", 156 | "fl": "fl", 157 | "florin": "ƒ", 158 | "four": "4", 159 | "fraction": "⁄", 160 | "g": "g", 161 | "germandbls": "ß", 162 | "grave": "`", 163 | "greater": ">", 164 | "guillemotleft": "«", 165 | "guillemotright": "»", 166 | "guilsinglleft": "‹", 167 | "guilsinglright": "›", 168 | "h": "h", 169 | "hungarumlaut": " ̋", 170 | "hyphen": "-", 171 | "i": "i", 172 | "iacute": "í", 173 | "icircumflex": "î", 174 | "idieresis": "ï", 175 | "igrave": "ì", 176 | "j": "j", 177 | "k": "k", 178 | "l": "l", 179 | "less": "<", 180 | "logicalnot": "¬", 181 | "lslash": "ł", 182 | "m": "m", 183 | "macron": " ̄", 184 | "minus": "−", 185 | "mu": "μ", 186 | "multiply": "×", 187 | "n": "n", 188 | "nine": "9", 189 | "ntilde": "ñ", 190 | "numbersign": "#", 191 | "o": "o", 192 | "oacute": "ó", 193 | "ocircumflex": "ô", 194 | "odieresis": "ö", 195 | "oe": "œ", 196 | "ogonek": " ̨", 197 | "ograve": "ò", 198 | "one": "1", 199 | "onehalf": "½", 200 | "onequarter": "¼", 201 | "onesuperior": "¹", 202 | "ordfeminine": "ª", 203 | "ordmasculine": "º", 204 | "oslash": "ø", 205 | "otilde": "õ", 206 | "p": "p", 207 | "paragraph": "¶", 208 | "parenleft": "(", 209 | "parenright": ")", 210 | "percent": "%", 211 | "period": ".", 212 | "periodcentered": "·", 213 | "perthousand": "‰", 214 | "plus": "+", 215 | "plusminus": "±", 216 | "q": "q", 217 | "question": "?", 218 | "questiondown": "¿", 219 | "quotedbl": '"', 220 | "quotedblbase": "„", 221 | "quotedblleft": "“", 222 | "quotedblright": "”", 223 | "quoteleft": "‘", 224 | "quoteright": "’", 225 | "quotesinglbase": "‚", 226 | "quotesingle": "'", 227 | "r": "r", 228 | "registered": "®", 229 | "ring": " ̊", 230 | "s": "s", 231 | "scaron": "š", 232 | "section": "§", 233 | "semicolon": ";", 234 | "seven": "7", 235 | "six": "6", 236 | "slash": "/", 237 | "space": " ", 238 | "sterling": "£", 239 | "t": "t", 240 | "thorn": "þ", 241 | "three": "3", 242 | "threequarters": "¾", 243 | "threesuperior": "³", 244 | "tilde": " ̃", 245 | "trademark": "™", 246 | "two": "2", 247 | "twosuperior": "²", 248 | "u": "u", 249 | "uacute": "ú", 250 | "ucircumflex": "û", 251 | "udieresis": "ü", 252 | "ugrave": "ù", 253 | "underscore": "_", 254 | "v": "v", 255 | "w": "w", 256 | "x": "x", 257 | "y": "y", 258 | "yacute": "ý", 259 | "ydieresis": "ÿ", 260 | "yen": "¥", 261 | "z": "z", 262 | "zcaron": "ž", 263 | "zero": "0", 264 | } 265 | 266 | ZAPF_NAMES = {} 267 | 268 | 269 | # sanity checks 270 | assert len(set(NAMES.values())) == len(NAMES) 271 | # for char in NAMES.values(): 272 | # char.encode("cp1252") 273 | 274 | 275 | if __name__ == "__main__": 276 | # print_widths() 277 | print_kern() 278 | -------------------------------------------------------------------------------- /resources/sample.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from pathlib import Path 4 | 5 | from pdfje import XY, AutoPage, Column, Document, Page 6 | from pdfje.draw import Text 7 | from pdfje.fonts import TrueType 8 | from pdfje.layout import Paragraph 9 | from pdfje.layout.paragraph import LinebreakParams 10 | from pdfje.style import Span, Style, italic 11 | from pdfje.units import inch, mm 12 | 13 | 14 | def main() -> None: 15 | Document([AutoPage(chapter, template=TEMPLATE)], style=CRIMSON).write( 16 | "sample.pdf" 17 | ) 18 | 19 | 20 | PAGESIZE = XY(inch(8), inch(3.6)) 21 | TEMPLATE = Page( 22 | [ 23 | # The title in small text at the top of the page 24 | Text( 25 | (PAGESIZE.x / 2, PAGESIZE.y - mm(5)), 26 | "The Adventures of Sherlock Holmes", 27 | Style(size=10, italic=True), 28 | align="center", 29 | ), 30 | ], 31 | size=PAGESIZE, 32 | columns=[ 33 | Column( 34 | (mm(15), mm(15)), 35 | PAGESIZE.x / 2 - mm(30), 36 | PAGESIZE.y - mm(30), 37 | ), 38 | Column( 39 | (PAGESIZE.x / 2 + mm(15), mm(15)), 40 | PAGESIZE.x / 2 - mm(30), 41 | PAGESIZE.y - mm(30), 42 | ), 43 | ], 44 | ) 45 | 46 | CRIMSON = TrueType( 47 | Path(__file__).parent / "../resources/fonts/CrimsonText-Regular.ttf", 48 | Path(__file__).parent / "../resources/fonts/CrimsonText-Bold.ttf", 49 | Path(__file__).parent / "../resources/fonts/CrimsonText-Italic.ttf", 50 | Path(__file__).parent / "../resources/fonts/CrimsonText-BoldItalic.ttf", 51 | ) 52 | 53 | 54 | def flatten_newlines(txt: str) -> str: 55 | return "\n".join(s.replace("\n", " ") for s in txt.split("\n\n")) 56 | 57 | 58 | # Extract from https://www.gutenberg.org/ebooks/1661 59 | chapter = Paragraph( 60 | [ 61 | flatten_newlines( 62 | """“To the man who loves art for its own sake,” remarked Sherlock 63 | Holmes, tossing aside the advertisement sheet of""" 64 | ), 65 | Span(" The Daily Telegraph", italic), 66 | flatten_newlines( 67 | """, “it is 68 | frequently in its least important and lowliest manifestations that the 69 | keenest pleasure is to be derived. It is pleasant to me to observe, 70 | Watson, that you have so far grasped this truth that in these little 71 | records of our cases which you have been good enough to draw up, and, I 72 | am bound to say, occasionally to embellish, you have given prominence 73 | not so much to the many """ 74 | ), 75 | Span("causes célèbres", italic), 76 | flatten_newlines( 77 | """ and sensational trials in 78 | which I have figured but rather to those incidents which may have been 79 | trivial in themselves, but which have given room for those faculties of 80 | deduction and of logical synthesis which I have made my special 81 | province.” 82 | 83 | “And yet,” said I, smiling, “I cannot quite hold myself absolved from 84 | the charge of sensationalism which has been urged against my records.” 85 | 86 | “You have erred, perhaps,” he observed, taking up a glowing cinder with 87 | the tongs and lighting with it the long cherry-wood pipe which was wont 88 | to replace his clay when he was in a disputatious rather than a 89 | meditative mood—“you have erred perhaps in attempting to put colour and 90 | life into each of your statements instead of confining yourself to the 91 | task of placing upon record that severe reasoning from cause to effect 92 | which is really the only notable feature about the thing.” 93 | 94 | “It seems to me that I have done you full justice in the matter,” I 95 | remarked with some coldness, for I was repelled by the egotism which I 96 | had more than once observed to be a strong factor in my friend’s 97 | singular character. 98 | 99 | “No, it is not selfishness or conceit,” said he, answering, as was his 100 | wont, my thoughts rather than my words. “If I claim full justice for my 101 | art, it is because it is an impersonal thing—a thing beyond myself. 102 | Crime is common. Logic is rare. Therefore it is upon the logic rather 103 | than upon the crime that you should dwell. You have degraded what 104 | should have been a course of lectures into a series of tales.” 105 | 106 | It was a cold morning of the early spring, and we sat after breakfast 107 | on either side of a cheery fire in the old room at Baker Street. A 108 | thick fog rolled down between the lines of dun-coloured houses, and the 109 | opposing windows loomed like dark, shapeless blurs through the heavy 110 | yellow wreaths. Our gas was lit and shone on the white cloth and 111 | glimmer of china and metal, for the table had not been cleared yet. 112 | Sherlock Holmes had been silent all the morning, dipping continuously 113 | into the advertisement columns of a succession of papers until at last, 114 | having apparently given up his search, he had emerged in no very sweet 115 | temper to lecture me upon my literary shortcomings. 116 | 117 | “At the same time,” he remarked after a pause, during which he had sat 118 | puffing at his long pipe and gazing down into the fire, “you can hardly 119 | be open to a charge of sensationalism, for out of these cases which you 120 | have been so kind as to interest yourself in, a fair proportion do not 121 | treat of crime, in its legal sense, at all. The small matter in which I 122 | endeavoured to help the King of Bohemia, the singular experience of 123 | Miss Mary Sutherland, the problem connected with the man with the 124 | twisted lip, and the incident of the noble bachelor, were all matters 125 | which are outside the pale of the law. But in avoiding the sensational, 126 | I fear that you may have bordered on the trivial.” 127 | 128 | “The end may have been so,” I answered, “but the methods I hold to have 129 | been novel and of interest.” 130 | 131 | “Pshaw, my dear fellow, what do the public, the great unobservant 132 | public, who could hardly tell a weaver by his tooth or a compositor by 133 | his left thumb, care about the finer shades of analysis and deduction! 134 | But, indeed, if you are trivial, I cannot blame you, for the days of 135 | the great cases are past. Man, or at least criminal man, has lost all 136 | enterprise and originality. As to my own little practice, it seems to 137 | be degenerating into an agency for recovering lost lead pencils and 138 | giving advice to young ladies from boarding-schools. I think that I 139 | have touched bottom at last, however. This note I had this morning 140 | marks my zero-point, I fancy. Read it!” He tossed a crumpled letter 141 | across to me. 142 | """ 143 | ), 144 | ], 145 | align="justify", 146 | indent=18, 147 | optimal=LinebreakParams( 148 | tolerance=1, 149 | hyphen_penalty=1000, 150 | ), 151 | avoid_orphans=False, 152 | ) 153 | 154 | 155 | if __name__ == "__main__": 156 | main() 157 | -------------------------------------------------------------------------------- /src/pdfje/typeset/firstfit.py: -------------------------------------------------------------------------------- 1 | "A simple first-fit line wrapping algorithm." 2 | from __future__ import annotations 3 | 4 | from dataclasses import dataclass 5 | from itertools import tee 6 | from typing import Iterable, Iterator, NamedTuple, Sequence 7 | 8 | from ..atoms import LiteralStr, Real 9 | from ..common import XY, Align, NonEmptyIterator, Pt, add_slots, prepend 10 | from .layout import Line as _Line 11 | from .layout import ShapedText 12 | from .words import WordLike, render_kerned 13 | 14 | 15 | def shape( 16 | words: Iterator[WordLike], 17 | columns: Iterator[XY], 18 | allow_empty: bool, 19 | lead: Pt, 20 | avoid_orphans: bool, 21 | align: Align, 22 | ) -> Iterator[ShapedText]: 23 | _shape = _shape_avoid_orphans if avoid_orphans else _shape_simple 24 | return ( 25 | ( 26 | ShapedText( 27 | list(map(Line.justify, ls)), lead, align, len(ls) * lead 28 | ) 29 | for ls in _shape(words, columns, allow_empty, lead) 30 | ) 31 | if align is Align.JUSTIFY 32 | else ( 33 | ShapedText(ls, lead, align, len(ls) * lead) 34 | for ls in _shape(words, columns, allow_empty, lead) 35 | ) 36 | ) 37 | 38 | 39 | def _shape_avoid_orphans( 40 | ws: Iterator[WordLike] | None, 41 | columns: Iterator[XY], 42 | allow_empty: bool, 43 | lead: Pt, 44 | ) -> Iterator[Sequence[Line]]: 45 | col = next(columns) 46 | ws, lines, ws_undo = take_box(ws, col, allow_empty, lead) 47 | # In case of an avoidable orphan, start over 48 | if ws and len(lines) == 1 and allow_empty: 49 | ws = ws_undo 50 | lines = () 51 | elif not ws: 52 | yield lines 53 | return 54 | 55 | col = next(columns) 56 | while True: 57 | lines_prev = lines 58 | ws_undo_prev = ws_undo 59 | ws, lines, ws_undo = take_box(ws, col, False, lead) 60 | # case: paragraph not done. Continue to next column. 61 | if ws: 62 | yield lines_prev 63 | col = next(columns) 64 | # case: a potentially fixable orphan 65 | elif len(lines) == 1 and len(lines_prev) > 2 and col.y >= lead * 2: 66 | # FUTURE: optimize the case where the column widths are the same, 67 | # and we don't need to re-typeset the last line. 68 | assert ws_undo_prev is not None 69 | ws_undo_prev, _branch = tee(ws_undo_prev) 70 | _, _lines_new, ws_undo = take_box(_branch, col, False, lead) 71 | if len(_lines_new) == 1: 72 | break # our attempt to fix the orphan failed. We're done. 73 | else: 74 | lines = lines_prev[:-1] 75 | ws = ws_undo_prev 76 | # case: we're done, but no (fixable) orphan. 77 | else: 78 | break 79 | 80 | yield lines_prev 81 | yield lines 82 | 83 | 84 | # filling is a lot simpler if we don't avoid orphaned lines. 85 | def _shape_simple( 86 | ws: Iterator[WordLike] | None, 87 | columns: Iterator[XY], 88 | allow_empty: bool, 89 | lead: Pt, 90 | ) -> Iterator[Sequence[Line]]: 91 | for col in columns: # pragma: no branch 92 | ws, lines, _ = take_box(ws, col, allow_empty, lead) 93 | yield lines 94 | if not ws: 95 | return 96 | allow_empty = False 97 | 98 | 99 | class _FilledBox(NamedTuple): 100 | rest: NonEmptyIterator[WordLike] | None 101 | lines: Sequence[Line] 102 | rest_incl_lastline: NonEmptyIterator[WordLike] | None 103 | 104 | 105 | def take_box( 106 | queue: NonEmptyIterator[WordLike] | None, 107 | space: XY, 108 | allow_empty: bool, 109 | lead: Pt, 110 | ) -> _FilledBox: 111 | width, height = space 112 | max_line_count: float = height // lead or not allow_empty 113 | lines: list[Line] = [] 114 | queue_prev = queue 115 | while queue and len(lines) < max_line_count: 116 | # OPTIMIZE: it'd be more efficient to only 'tee' on the last line 117 | queue, queue_prev = tee(queue) 118 | queue, ln = take_line(queue, width) 119 | lines.append(ln) 120 | return _FilledBox(queue, lines, queue_prev) 121 | 122 | 123 | def take_line( 124 | ws: NonEmptyIterator[WordLike], width: Pt 125 | ) -> tuple[NonEmptyIterator[WordLike] | None, Line]: 126 | space = width 127 | content: list[WordLike] = [] 128 | 129 | for word in ws: 130 | if word.pruned_width() > space: 131 | break 132 | 133 | space -= word.width 134 | content.append(word) 135 | else: 136 | # i.e. this is the last line of the paragraph 137 | return (None, Line(tuple(content), width - space, 0)) 138 | 139 | last_word, dangling = word.hyphenate(space) 140 | ws = prepend(dangling, ws) 141 | if last_word: 142 | space -= last_word.width 143 | content.append(last_word) 144 | elif content and (extra_space := content[-1].prunable_space()): 145 | content[-1] = content[-1].pruned() 146 | space += extra_space 147 | elif not content: 148 | # We force placing at least a minimal word fragment to avoid 149 | # infinitely waiting for enough width. 150 | # This shouldn't occur in practice often, where the column 151 | # width is much larger than the longest word segment. 152 | word, leftover = next(ws).minimal_box() 153 | if leftover: 154 | ws = prepend(leftover, ws) 155 | else: 156 | # An extra check is needed to tell whether this is the last 157 | # word in the paragraph. 158 | try: 159 | ws = prepend(next(ws), ws) 160 | except StopIteration: 161 | return (None, Line((word,), word.width, 0)) 162 | content = [word] 163 | space -= word.width 164 | 165 | return (ws, Line(tuple(content), width - space, space)) 166 | 167 | 168 | @add_slots 169 | @dataclass(frozen=True) 170 | class Line(_Line): 171 | words: tuple[WordLike, ...] 172 | width: Pt 173 | space: Pt 174 | 175 | def justify(self) -> Line: 176 | try: 177 | # The additional width per word break, weighted by the font size, 178 | # which is needed to justify the text. 179 | width_per_break = self.space / sum( 180 | w.state.size for w in self.words if w.tail 181 | ) 182 | except ZeroDivisionError: 183 | return self # No word breaks means no justification. 184 | return Line( 185 | tuple( 186 | w.extend_tail(width_per_break * w.state.size) 187 | for w in self.words 188 | ), 189 | self.width + self.space, 190 | 0, 191 | ) 192 | 193 | def __iter__(self) -> Iterator[bytes]: 194 | content: Iterable[Real | LiteralStr] = () 195 | for w in self.words: 196 | content = yield from w.encode_into_line(content) 197 | yield from render_kerned(content) 198 | -------------------------------------------------------------------------------- /src/pdfje/layout/paragraph.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from dataclasses import dataclass 4 | from functools import partial 5 | from itertools import tee 6 | from typing import ( 7 | ClassVar, 8 | Iterable, 9 | Iterator, 10 | Literal, 11 | Protocol, 12 | Sequence, 13 | cast, 14 | final, 15 | ) 16 | 17 | from ..common import XY, Align, Pt, add_slots, advance, prepend, setattr_frozen 18 | from ..resources import Resources 19 | from ..style import Span, Style, StyledMixin, StyleFull, StyleLike 20 | from ..typeset import firstfit, optimum 21 | from ..typeset.layout import ShapedText 22 | from ..typeset.parse import into_words 23 | from ..typeset.state import Passage, State, max_lead, splitlines 24 | from ..typeset.words import WordLike, indent_first 25 | from .common import Block, ColumnFill 26 | 27 | 28 | @add_slots 29 | @dataclass(frozen=True) 30 | class LinebreakParams: 31 | """Parameters for tweaking the optimum-fit algorithm. 32 | 33 | Parameters 34 | ---------- 35 | tolerance 36 | The tolerance for the stretch of each line. 37 | If no feasible solution is found, the tolerance is increased until 38 | there is. 39 | Increase the tolerance if you want to avoid hyphenation 40 | at the cost of more stretching and longer runtime. 41 | hyphen_penalty 42 | The penalty for hyphenating a word. If increasing this value does 43 | not result in fewer hyphens, try increasing the tolerance. 44 | consecutive_hyphen_penalty 45 | The penalty for placing hyphens on consecutive lines. If increasing 46 | this value does not appear to work, try increasing the tolerance. 47 | fitness_diff_penalty 48 | The penalty for very tight and very loose lines following each other. 49 | """ 50 | 51 | tolerance: float = 1 52 | hyphen_penalty: float = 1000 53 | consecutive_hyphen_penalty: float = 1000 54 | fitness_diff_penalty: float = 1000 55 | 56 | DEFAULT: ClassVar["LinebreakParams"] 57 | 58 | 59 | LinebreakParams.DEFAULT = LinebreakParams() 60 | 61 | 62 | @final 63 | @add_slots 64 | @dataclass(frozen=True, init=False) 65 | class Paragraph(Block, StyledMixin): 66 | """A :class:`Block` that renders a paragraph of text. 67 | 68 | Parameters 69 | ---------- 70 | content 71 | The text to render. Can be a string, or a nested :class:`~pdfje.Span`. 72 | style 73 | The style to render the text with. 74 | See :ref:`tutorial