├── src
└── pdfje
│ ├── py.typed
│ ├── vendor
│ └── __init__.py
│ ├── typeset
│ ├── __init__.py
│ ├── hyphens.py
│ ├── layout.py
│ ├── parse.py
│ ├── state.py
│ ├── firstfit.py
│ └── knuth_plass.py
│ ├── layout
│ ├── __init__.py
│ ├── rule.py
│ ├── pages.py
│ ├── common.py
│ └── paragraph.py
│ ├── fonts
│ ├── __init__.py
│ └── common.py
│ ├── __init__.py
│ ├── compat.py
│ ├── units.py
│ ├── resources.py
│ ├── page.py
│ ├── document.py
│ └── atoms.py
├── tests
├── __init__.py
├── layout
│ ├── __init__.py
│ ├── test_rule.py
│ ├── test_common.py
│ └── test_paragraph.py
├── typeset
│ ├── __init__.py
│ ├── test_hyphens.py
│ └── test_state.py
├── test_units.py
├── test_page.py
├── test_draw.py
├── test_atoms.py
├── conftest.py
├── test_fonts.py
├── test_common.py
├── common.py
└── test_style.py
├── docs
├── changelog.rst
├── requirements.txt
├── index.rst
├── Makefile
├── examples.rst
├── conf.py
└── api.rst
├── sample.png
├── resources
├── fonts
│ ├── CrimsonText-Bold.ttf
│ ├── CrimsonText-Italic.ttf
│ ├── CrimsonText-Regular.ttf
│ ├── DejaVuSansCondensed.ttf
│ ├── CrimsonText-BoldItalic.ttf
│ ├── DejaVuSansCondensed-Bold.ttf
│ ├── DejaVuSansCondensed-Oblique.ttf
│ ├── DejaVuSansCondensed-BoldOblique.ttf
│ ├── CrimsonText-License.txt
│ └── DejaVuLicense.txt
├── Core14_AFMs
│ ├── MustRead.html
│ ├── ZapfDingbats.afm
│ └── Symbol.afm
├── optimal_vs_firstfit.py
├── scripts
│ └── parse_afm.py
└── sample.py
├── pytest.ini
├── dependabot.yml
├── .github
├── dependabot.yml
└── workflows
│ └── tests.yml
├── .readthedocs.yml
├── mypy.ini
├── Makefile
├── LICENSE
├── pyproject.toml
├── tox.ini
├── .gitignore
├── CHANGELOG.rst
├── examples
├── book.py
└── multicolumn.py
└── README.rst
/src/pdfje/py.typed:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/pdfje/vendor/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/layout/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/typeset/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/pdfje/typeset/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/changelog.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../CHANGELOG.rst
2 |
--------------------------------------------------------------------------------
/sample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ariebovenberg/pdfje/HEAD/sample.png
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx<8.3
2 | furo~=2025.12.19
3 | sphinx-toolbox~=4.1.0
4 | sphinx-autodoc-typehints~=3.2
5 |
--------------------------------------------------------------------------------
/resources/fonts/CrimsonText-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ariebovenberg/pdfje/HEAD/resources/fonts/CrimsonText-Bold.ttf
--------------------------------------------------------------------------------
/resources/fonts/CrimsonText-Italic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ariebovenberg/pdfje/HEAD/resources/fonts/CrimsonText-Italic.ttf
--------------------------------------------------------------------------------
/resources/fonts/CrimsonText-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ariebovenberg/pdfje/HEAD/resources/fonts/CrimsonText-Regular.ttf
--------------------------------------------------------------------------------
/resources/fonts/DejaVuSansCondensed.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ariebovenberg/pdfje/HEAD/resources/fonts/DejaVuSansCondensed.ttf
--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | addopts = --benchmark-disable
3 | markers =
4 | slow: marks tests as slow (deselect with '-m "not slow"')
5 |
--------------------------------------------------------------------------------
/resources/fonts/CrimsonText-BoldItalic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ariebovenberg/pdfje/HEAD/resources/fonts/CrimsonText-BoldItalic.ttf
--------------------------------------------------------------------------------
/resources/fonts/DejaVuSansCondensed-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ariebovenberg/pdfje/HEAD/resources/fonts/DejaVuSansCondensed-Bold.ttf
--------------------------------------------------------------------------------
/resources/fonts/DejaVuSansCondensed-Oblique.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ariebovenberg/pdfje/HEAD/resources/fonts/DejaVuSansCondensed-Oblique.ttf
--------------------------------------------------------------------------------
/resources/fonts/DejaVuSansCondensed-BoldOblique.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ariebovenberg/pdfje/HEAD/resources/fonts/DejaVuSansCondensed-BoldOblique.ttf
--------------------------------------------------------------------------------
/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: pip
4 | directory: "/"
5 | schedule:
6 | interval: daily
7 | time: "04:00"
8 | open-pull-requests-limit: 10
9 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: pip
4 | directory: "/"
5 | schedule:
6 | interval: daily
7 | time: "04:00"
8 | open-pull-requests-limit: 10
9 |
--------------------------------------------------------------------------------
/src/pdfje/layout/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from .common import Block
4 | from .paragraph import LinebreakParams, Paragraph
5 | from .rule import Rule
6 |
7 | __all__ = ["Block", "Paragraph", "Rule", "LinebreakParams"]
8 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../README.rst
2 |
3 | Contents
4 | ========
5 |
6 | .. toctree::
7 | :maxdepth: 2
8 |
9 | tutorial.rst
10 | examples.rst
11 | api.rst
12 | changelog.rst
13 |
14 | Indices and tables
15 | ==================
16 |
17 | * :ref:`genindex`
18 | * :ref:`modindex`
19 |
--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | sphinx:
4 | builder: html
5 | configuration: docs/conf.py
6 | fail_on_warning: true
7 |
8 | build:
9 | os: ubuntu-22.04
10 | tools:
11 | python: "3.11"
12 |
13 | python:
14 | install:
15 | - requirements: docs/requirements.txt
16 | - method: pip
17 | path: .
18 |
--------------------------------------------------------------------------------
/tests/test_units.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from pdfje.units import cm, inch, mm, pc, pt
4 |
5 | from .common import approx
6 |
7 |
8 | def test_units():
9 | assert inch(1) == approx(72)
10 | assert cm(1) == approx(28.34645669291339)
11 | assert mm(1) == approx(2.8346456692913386)
12 | assert pc(1) == approx(12)
13 | assert pt(1) == 1
14 |
--------------------------------------------------------------------------------
/src/pdfje/fonts/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from .builtins import courier, helvetica, symbol, times_roman, zapf_dingbats
4 | from .common import BuiltinTypeface, TrueType
5 |
6 | __all__ = [
7 | "helvetica",
8 | "times_roman",
9 | "courier",
10 | "symbol",
11 | "zapf_dingbats",
12 | "BuiltinTypeface",
13 | "TrueType",
14 | ]
15 |
--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | disallow_untyped_defs = True
3 | warn_redundant_casts = True
4 | warn_unused_ignores = True
5 | warn_unreachable = True
6 | enable_error_code = redundant-expr
7 |
8 | [mypy-tests.*]
9 | check_untyped_defs = True
10 | disallow_untyped_defs = False
11 | warn_unreachable = True
12 |
13 | [mypy-fontTools.*]
14 | ignore_missing_imports = True
15 |
16 | [mypy-hypothesis.*]
17 | ignore_missing_imports = True
18 |
19 | [mypy-pyphen.*]
20 | ignore_missing_imports = True
21 |
22 | [mypy-pdfje.vendor.*]
23 | ignore_errors = True
24 |
--------------------------------------------------------------------------------
/src/pdfje/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from .common import RGB, XY, black, blue, cyan, lime, magenta, red, yellow
4 | from .document import Document
5 | from .layout.pages import AutoPage
6 | from .page import Column, Page
7 |
8 | __version__ = __import__("importlib.metadata").metadata.version(__name__)
9 |
10 | __all__ = [
11 | # document & pages
12 | "Document",
13 | "Page",
14 | "Column",
15 | "AutoPage",
16 | # helpers
17 | "red",
18 | "lime",
19 | "blue",
20 | "black",
21 | "yellow",
22 | "magenta",
23 | "cyan",
24 | # common
25 | "RGB",
26 | "XY",
27 | ]
28 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SPHINXPROJ = Quiz
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--------------------------------------------------------------------------------
/docs/examples.rst:
--------------------------------------------------------------------------------
1 | .. _examples:
2 |
3 | Examples
4 | ========
5 |
6 | The code for the examples can be found in the ``examples/`` directory.
7 |
8 | 📚 A book
9 | ~~~~~~~~~
10 |
11 | This example shows:
12 |
13 | - Creating single pages and autogenerated ones
14 | - Page numbering
15 | - Simple graphics
16 | - Custom font
17 |
18 | .. collapse:: Source code (click to expand)
19 |
20 | .. literalinclude :: ../examples/book.py
21 |
22 | .. _multi-column:
23 |
24 | 📰 Multiple columns
25 | ~~~~~~~~~~~~~~~~~~~
26 |
27 | This example shows the flexibility of the layout engine.
28 |
29 | .. collapse:: Source code (click to expand)
30 |
31 | .. literalinclude :: ../examples/multicolumn.py
32 |
--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | name: Build
2 |
3 | on:
4 | pull_request:
5 | branches:
6 | - "**"
7 | push:
8 | branches:
9 | - main
10 |
11 | jobs:
12 | build:
13 | runs-on: ubuntu-latest
14 | strategy:
15 | fail-fast: false
16 | matrix:
17 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
18 |
19 | steps:
20 | - uses: actions/checkout@v1
21 | - name: Set up Python ${{ matrix.python-version }}
22 | uses: actions/setup-python@v2
23 | with:
24 | python-version: ${{ matrix.python-version }}
25 | - name: Install dependencies
26 | run: |
27 | python -m pip install --upgrade pip
28 | pip install "tox<5" tox-gh-actions "poetry>=1.7,<1.8"
29 | - name: Test with tox
30 | run: tox
31 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: init
2 | init:
3 | poetry install
4 | pip install -r docs/requirements.txt
5 |
6 | .PHONY: clean
7 | clean:
8 | rm -rf .coverage .hypothesis .mypy_cache .pytest_cache .tox *.egg-info
9 | rm -rf dist
10 | find . | grep -E "(__pycache__|docs_.*$$|\.pyc|\.pyo$$)" | xargs rm -rf
11 |
12 | .PHONY: isort
13 | isort:
14 | isort .
15 |
16 | .PHONY: format
17 | format:
18 | black .
19 |
20 | .PHONY: fix
21 | fix: isort format
22 |
23 | .PHONY: lint
24 | lint:
25 | flake8 .
26 |
27 | .PHONY: mypy
28 | mypy:
29 | mypy --pretty --strict src examples/
30 | mypy --pretty tests/
31 |
32 | .PHONY: test
33 | test:
34 | pytest --cov=pdfje
35 |
36 | .PHONY: docs
37 | docs:
38 | @touch docs/api.rst
39 | make -C docs/ html
40 |
41 | .PHONY: publish
42 | publish:
43 | rm -rf dist/*
44 | poetry build
45 | twine upload dist/*
46 |
--------------------------------------------------------------------------------
/tests/layout/test_rule.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import pytest
4 |
5 | from pdfje import XY
6 | from pdfje.layout import Rule
7 | from pdfje.layout.common import ColumnFill
8 | from pdfje.page import Column
9 | from pdfje.resources import Resources
10 | from pdfje.style import StyleFull
11 |
12 | STYLE = StyleFull.DEFAULT
13 |
14 | COLUMNS = [
15 | col := ColumnFill(Column(XY(80, 40), 205, 210), (), 20),
16 | ColumnFill(Column(XY(350, 40), 195, 190), (), 110),
17 | ColumnFill(Column(XY(350, 40), 200, 200), (), 90),
18 | ]
19 |
20 |
21 | @pytest.mark.skip(reason="not yet implemented")
22 | def test_into_columns_skipped_because_of_break():
23 | r = Rule(margin=(12, 0, 10, 0))
24 | filled = list(r.into_columns(Resources(), STYLE, iter(COLUMNS)))
25 | assert len(filled) == 1
26 | assert filled[0] is COLUMNS[0]
27 |
--------------------------------------------------------------------------------
/tests/typeset/test_hyphens.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import pytest
4 |
5 | from pdfje.typeset.hyphens import (
6 | HAS_PYPHEN,
7 | default_hyphenator,
8 | never_hyphenate,
9 | parse_hyphenator,
10 | )
11 |
12 |
13 | def test_default_hyphenation():
14 | assert list(default_hyphenator("beautiful")) == ["beau", "ti", "ful"]
15 |
16 |
17 | class TestParseHyphenator:
18 | @pytest.mark.skipif(not HAS_PYPHEN, reason="pyphen not installed")
19 | def test_pyphen(self):
20 | from pyphen import Pyphen
21 |
22 | p = Pyphen(lang="nl_NL")
23 | h = parse_hyphenator(p)
24 | result = h("beautiful")
25 | assert hasattr(result, "__iter__")
26 | assert list(result) == ["beau", "ti", "ful"]
27 |
28 | def test_none(self):
29 | assert parse_hyphenator(None) is never_hyphenate
30 |
--------------------------------------------------------------------------------
/src/pdfje/compat.py:
--------------------------------------------------------------------------------
1 | "Compatibility layer for various Python versions"
2 | from __future__ import annotations
3 |
4 | import sys
5 | from itertools import tee
6 | from typing import TYPE_CHECKING, Callable, Iterable, Iterator, TypeVar
7 |
8 | __all__ = ["pairwise", "cache"]
9 |
10 |
11 | if sys.version_info < (3, 10) or TYPE_CHECKING: # pragma: no cover
12 | T = TypeVar("T")
13 |
14 | def pairwise(i: Iterable[T]) -> Iterator[tuple[T, T]]:
15 | a, b = tee(i)
16 | next(b, None)
17 | return zip(a, b)
18 |
19 | else:
20 | from itertools import pairwise
21 |
22 |
23 | if sys.version_info < (3, 9) or TYPE_CHECKING: # pragma: no cover
24 | from functools import lru_cache
25 |
26 | _Tcall = TypeVar("_Tcall", bound=Callable[..., object])
27 |
28 | def cache(func: _Tcall) -> _Tcall:
29 | return lru_cache(maxsize=None)(func) # type: ignore
30 |
31 | else:
32 | from functools import cache
33 |
--------------------------------------------------------------------------------
/resources/Core14_AFMs/MustRead.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Core 14 AFM Files - ReadMe
7 |
8 |
9 |
10 | or
11 |
12 |
13 | |
14 | This file and the 14 PostScript(R) AFM files it accompanies may be used, copied, and distributed for any purpose and without charge, with or without modification, provided that all copyright notices are retained; that the AFM files are not distributed without this file; that all modifications to this file or any of the AFM files are prominently noted in the modified file(s); and that this paragraph is not modified. Adobe Systems has no responsibility or obligation to support the use of the AFM files. Col |
15 |
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/tests/test_page.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from pdfje import XY, Column, Page
4 | from pdfje.draw import Circle
5 | from pdfje.units import A5
6 |
7 | from .common import approx
8 |
9 |
10 | class TestPage:
11 | def test_default_column(self):
12 | p = Page(size=A5)
13 | assert len(p.columns) == 1
14 | assert p.columns[0].width < A5.x
15 | assert p.columns[0].height < A5.y
16 |
17 | def test_one_column_by_margins(self):
18 | [column] = Page(size=A5, margin=(20, 30)).columns
19 | assert column.origin.x == approx(30)
20 | assert column.origin.y == approx(20)
21 | assert column.width == approx(A5.x - 60)
22 | assert column.height == approx(A5.y - 40)
23 |
24 | def test_add(self):
25 | p = Page()
26 | p2 = p.add(Circle((0, 0), 10))
27 | assert p == Page()
28 | assert p2 == Page((Circle((0, 0), 10),))
29 |
30 |
31 | class TestColumn:
32 | def test_init(self):
33 | assert Column((1, 2), 3, 4) == Column(XY(1, 2), 3, 4)
34 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2022 - 2023 Arie Bovenberg
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/src/pdfje/units.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from functools import partial
4 | from operator import mul
5 | from typing import Callable
6 |
7 | from .common import XY, Pt
8 |
9 | __all__ = [
10 | "inch",
11 | "cm",
12 | "mm",
13 | "pc",
14 | "pt",
15 | "Pt",
16 | ]
17 |
18 | inch: Callable[[float], Pt] = partial(mul, 72)
19 | inch.__doc__ = "Convert inches to points"
20 | cm: Callable[[float], Pt] = partial(mul, 28.346456692913385)
21 | cm.__doc__ = "Convert centimeters to points"
22 | mm: Callable[[float], Pt] = partial(mul, 2.8346456692913385)
23 | mm.__doc__ = "Convert millimeters to points"
24 | pc: Callable[[float], Pt] = partial(mul, 12)
25 | pc.__doc__ = "Convert picas to points"
26 |
27 |
28 | def pt(x: float) -> Pt:
29 | "No-op conversion. Can be used to make units explicit."
30 | return x
31 |
32 |
33 | A0 = XY(2380, 3368)
34 | "A0 paper size"
35 | A1 = XY(1684, 2380)
36 | "A1 paper size"
37 | A2 = XY(1190, 1684)
38 | "A2 paper size"
39 | A3 = XY(842, 1190)
40 | "A3 paper size"
41 | A4 = XY(595, 842)
42 | "A4 paper size"
43 | A5 = XY(420, 595)
44 | "A5 paper size"
45 | A6 = XY(297, 420)
46 | "A6 paper size"
47 | letter = XY(612, 792)
48 | "Letter paper size"
49 | legal = XY(612, 1008)
50 | "Legal paper size"
51 | tabloid = XY(792, 1224)
52 | "Tabloid paper size"
53 | ledger = tabloid.flip()
54 | "Ledger paper size, same as tabloid landscape"
55 |
--------------------------------------------------------------------------------
/tests/test_draw.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from pdfje.common import XY
4 | from pdfje.draw import Polyline, Rect
5 |
6 | NA = NotImplemented
7 |
8 |
9 | class TestPolyline:
10 | def test_empty(self):
11 | assert b"".join(Polyline([]).render(NA, NA)) == b""
12 |
13 | def test_one_point(self):
14 | assert (
15 | b"".join(Polyline([(2, 3)]).render(NA, NA))
16 | == b"2 3 m 0 0 0 RG S\n"
17 | )
18 |
19 | def test_several_points(self):
20 | assert (
21 | b"".join(Polyline([(2, 3), XY(4, 5), (6, 7)]).render(NA, NA))
22 | == b"2 3 m 4 5 l 6 7 l 0 0 0 RG S\n"
23 | )
24 |
25 | def test_several_points_closed(self):
26 | assert (
27 | b"".join(
28 | Polyline([(2, 3), XY(4, 5), (6, 7)], close=True).render(NA, NA)
29 | )
30 | == b"2 3 m 4 5 l 6 7 l 0 0 0 RG s\n"
31 | )
32 |
33 |
34 | class TestRect:
35 | def test_init(self):
36 | assert Rect((2, 3), 4, 5).origin == XY(2, 3)
37 | assert Rect((2, 3), 4, 5).width == 4
38 | assert Rect((2, 3), 4, 5).height == 5
39 |
40 | def test_render(self):
41 | assert (
42 | b"".join(Rect((2, 3), 4, 5).render(NA, NA))
43 | == b"2 3 4 5 re 0 0 0 RG S\n"
44 | )
45 |
46 | def test_invisible(self):
47 | assert (
48 | b"".join(Rect((2, 3), 4, 5, stroke=None).render(NA, NA))
49 | == b"2 3 4 5 re n\n"
50 | )
51 |
--------------------------------------------------------------------------------
/tests/test_atoms.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import pytest
4 | from hypothesis import given
5 | from hypothesis.strategies import binary
6 |
7 | from pdfje.atoms import HexString, _escape, sanitize_name
8 |
9 |
10 | @pytest.mark.parametrize(
11 | "string,expect",
12 | [
13 | (b"", b""),
14 | (
15 | b"!\"$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTU"
16 | b"VWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~",
17 | b"!\"$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTU"
18 | b"VWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~",
19 | ),
20 | (b"foo\r# a\x00b\n\tc", b"foo#23#20abc"),
21 | ],
22 | )
23 | def test_sanitize_name(string, expect):
24 | assert sanitize_name(string) == expect
25 |
26 |
27 | @pytest.mark.parametrize(
28 | "string,expect",
29 | [
30 | (b"", b""),
31 | (b"\x00\x02a9~!kbn[]'/?", b"\x00\x02a9~!kbn[]'/?"),
32 | (b"a\\b\\", b"a\\\\b\\\\"),
33 | (b"a\t\nb\f\b", b"a\\t\\nb\\f\\b"),
34 | ],
35 | )
36 | def test_escape_string(string, expect):
37 | assert _escape(string) == expect
38 |
39 |
40 | @pytest.mark.slow
41 | @given(binary())
42 | def test_escape_fuzzing(bytestr):
43 | assert len(_escape(bytestr)) >= len(bytestr)
44 |
45 |
46 | @pytest.mark.parametrize(
47 | "string,expect",
48 | [
49 | (b"", b"<>"),
50 | (b"\x00\xa9b Z", b"<00A962205A>"),
51 | (b"<>", b"<3C3E>"),
52 | ],
53 | )
54 | def test_hex_string(string, expect):
55 | assert b"".join(HexString(string).write()).upper() == expect
56 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | #
4 | # documentation build configuration file, created by
5 | # sphinx-quickstart on Tue Jun 13 22:58:12 2017.
6 | #
7 | # This file is execfile()d with the current directory set to its
8 | # containing dir.
9 | #
10 | # Note that not all possible configuration values are present in this
11 | # autogenerated file.
12 | #
13 | # All configuration values have a default; values that are commented out
14 | # serve to show the default.
15 |
16 | from __future__ import annotations
17 |
18 | # -- Project information -----------------------------------------------------
19 | import importlib.metadata
20 |
21 | metadata = importlib.metadata.metadata("pdfje")
22 |
23 | project = metadata["Name"]
24 | author = metadata["Author"]
25 | version = metadata["Version"]
26 | release = metadata["Version"]
27 |
28 |
29 | # -- General configuration ------------------------------------------------
30 |
31 | extensions = [
32 | "sphinx.ext.autodoc",
33 | "sphinx.ext.intersphinx",
34 | "sphinx.ext.napoleon",
35 | "sphinx.ext.viewcode",
36 | "sphinx_toolbox.collapse",
37 | "sphinx_autodoc_typehints",
38 | ]
39 | templates_path = ["_templates"]
40 | source_suffix = ".rst"
41 |
42 | master_doc = "index"
43 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
44 |
45 | # -- Options for HTML output ----------------------------------------------
46 |
47 | autodoc_member_order = "bysource"
48 | html_theme = "furo"
49 | highlight_language = "python3"
50 | pygments_style = "default"
51 | intersphinx_mapping = {
52 | "python": ("https://docs.python.org/3", None),
53 | "pyphen": ("https://doc.courtbouillon.org/pyphen/stable/", None),
54 | }
55 |
--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
1 | .. _api:
2 |
3 | API reference
4 | =============
5 |
6 | Unless otherwise noted, all classes are immutable.
7 |
8 | pdfje
9 | -----
10 |
11 | .. automodule:: pdfje
12 | :members:
13 |
14 | pdfje.style
15 | -----------
16 |
17 | .. automodule:: pdfje.style
18 | :members:
19 |
20 | .. autodata:: pdfje.style.bold
21 | .. autodata:: pdfje.style.italic
22 | .. autodata:: pdfje.style.regular
23 |
24 | pdfje.layout
25 | ------------
26 |
27 | .. automodule:: pdfje.layout
28 | :members:
29 |
30 | pdfje.draw
31 | ----------
32 |
33 | .. automodule:: pdfje.draw
34 | :members:
35 |
36 | pdfje.fonts
37 | -----------
38 |
39 | .. autodata:: pdfje.fonts.helvetica
40 | .. autodata:: pdfje.fonts.times_roman
41 | .. autodata:: pdfje.fonts.courier
42 | .. autodata:: pdfje.fonts.symbol
43 | .. autodata:: pdfje.fonts.zapf_dingbats
44 |
45 | .. automodule:: pdfje.fonts
46 | :members:
47 |
48 |
49 | pdfje.units
50 | -----------
51 |
52 | .. automodule:: pdfje.units
53 | :members:
54 |
55 |
56 | **Page sizes**
57 |
58 | Below are common page sizes.
59 | Because the page size is a :class:`~pdfje.XY` object, you can use
60 | ``x`` and ``y`` attributes to get the width and height of a page size.
61 | The landscape variants can be obtained by calling :meth:`~pdfje.XY.flip`.
62 |
63 | .. code-block:: python
64 |
65 | from pdfje.units import A4
66 |
67 | A4.x # 595
68 | A4.y # 842
69 | A4.flip() # XY(842, 595) -- the landscape variant
70 | A4 / 2 # XY(297.5, 421) -- point at the center of the page
71 |
72 | .. autodata:: pdfje.units.A0
73 | .. autodata:: pdfje.units.A1
74 | .. autodata:: pdfje.units.A2
75 | .. autodata:: pdfje.units.A3
76 | .. autodata:: pdfje.units.A4
77 | .. autodata:: pdfje.units.A5
78 | .. autodata:: pdfje.units.A6
79 | .. autodata:: pdfje.units.letter
80 | .. autodata:: pdfje.units.legal
81 | .. autodata:: pdfje.units.tabloid
82 | .. autodata:: pdfje.units.ledger
83 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from pathlib import Path
4 |
5 | import pytest
6 |
7 | from pdfje.fonts.common import TrueType
8 |
9 | RESOURCES = Path(__file__).parent / "../resources"
10 |
11 |
12 | def pytest_addoption(parser):
13 | parser.addoption("--output-path", help="Output path for PDF files")
14 | parser.addoption(
15 | "--runslow", action="store_true", default=False, help="run slow tests"
16 | )
17 |
18 |
19 | @pytest.fixture
20 | def outfile(tmpdir, request) -> Path:
21 | base = Path(request.config.getoption("--output-path") or tmpdir)
22 | base.mkdir(exist_ok=True)
23 | func = request.function
24 | return (
25 | base
26 | / "-".join(
27 | [func.__module__[6:], func.__qualname__] # remove "tests." prefix
28 | ).replace(".", "-")
29 | ).with_suffix(".pdf")
30 |
31 |
32 | def pytest_collection_modifyitems(config, items):
33 | if config.getoption("--runslow"):
34 | # --runslow given in cli: do not skip slow tests
35 | return
36 | skip_slow = pytest.mark.skip(reason="need --runslow option to run")
37 | for item in items:
38 | if "slow" in item.keywords:
39 | item.add_marker(skip_slow)
40 |
41 |
42 | @pytest.fixture(scope="session")
43 | def dejavu() -> TrueType:
44 | return TrueType(
45 | RESOURCES / "fonts/DejaVuSansCondensed.ttf",
46 | RESOURCES / "fonts/DejaVuSansCondensed-Bold.ttf",
47 | RESOURCES / "fonts/DejaVuSansCondensed-Oblique.ttf",
48 | RESOURCES / "fonts/DejaVuSansCondensed-BoldOblique.ttf",
49 | )
50 |
51 |
52 | @pytest.fixture(scope="session")
53 | def crimson() -> TrueType:
54 | return TrueType(
55 | RESOURCES / "fonts/CrimsonText-Regular.ttf",
56 | RESOURCES / "fonts/CrimsonText-Bold.ttf",
57 | RESOURCES / "fonts/CrimsonText-Italic.ttf",
58 | RESOURCES / "fonts/CrimsonText-BoldItalic.ttf",
59 | )
60 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "pdfje"
3 | version = "0.6.1"
4 | description = "Write beautiful PDFs in declarative Python"
5 | authors = ["Arie Bovenberg "]
6 | license = "MIT"
7 | classifiers = [
8 | "Programming Language :: Python :: 3.8",
9 | "Programming Language :: Python :: 3.9",
10 | "Programming Language :: Python :: 3.10",
11 | "Programming Language :: Python :: 3.11",
12 | "Programming Language :: Python :: 3.12",
13 | ]
14 | packages = [
15 | { include = "pdfje", from = "src" },
16 | ]
17 | documentation = "https://pdfje.readthedocs.io"
18 | readme = "README.rst"
19 | include = ["CHANGELOG.rst", "README.rst"]
20 | repository = "https://github.com/ariebovenberg/pdfje"
21 | keywords = ["pdf"]
22 |
23 | [tool.poetry.dependencies]
24 | python = ">=3.8.1,<4.0"
25 | fonttools = {version="^4.38.0", optional=true}
26 | pyphen = {version=">=0.13.0", optional=true}
27 |
28 | [tool.poetry.extras]
29 | fonts = ["fonttools"]
30 | hyphens = ["pyphen"]
31 |
32 | [tool.poetry.group.test.dependencies]
33 | pytest = ">=7.0.1,<9.0.0"
34 | pytest-cov = ">=4,<6"
35 | pytest-benchmark = "^4.0.0"
36 | hypothesis = "^6.68.2"
37 |
38 | [tool.poetry.group.typecheck.dependencies]
39 | mypy = "^1.0.0"
40 |
41 | [tool.poetry.group.linting.dependencies]
42 | black = "^24"
43 | flake8 = ">=6,<8"
44 | isort = "^5.7.0"
45 | slotscheck = ">=0.17,<0.20"
46 |
47 |
48 | [tool.black]
49 | line-length = 79
50 | include = '\.pyi?$'
51 | exclude = '''
52 | /(
53 | \.eggs
54 | | \.git
55 | | \.mypy_cache
56 | | \.tox
57 | | \.venv
58 | | _build
59 | | build
60 | | dist
61 | )/
62 | '''
63 |
64 | [tool.isort]
65 | line_length = 79
66 | profile = 'black'
67 | add_imports = ['from __future__ import annotations']
68 |
69 | [tool.slotscheck]
70 | strict-imports = true
71 | require-superclass = true
72 | require-subclass = true
73 | exclude-modules = "^pdfje\\.vendor.*"
74 |
75 | [build-system]
76 | requires = ["poetry-core>=1.1.0"]
77 | build-backend = "poetry.core.masonry.api"
78 |
--------------------------------------------------------------------------------
/src/pdfje/layout/rule.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from dataclasses import dataclass
4 | from typing import Iterator, final
5 |
6 | from ..common import (
7 | RGB,
8 | XY,
9 | HexColor,
10 | Pt,
11 | Sides,
12 | SidesLike,
13 | Streamable,
14 | add_slots,
15 | black,
16 | setattr_frozen,
17 | )
18 | from ..resources import Resources
19 | from ..style import StyleFull
20 | from .common import Block, ColumnFill, Shaped
21 |
22 |
23 | @final
24 | @add_slots
25 | @dataclass(frozen=True, init=False)
26 | class Rule(Block):
27 | """A :class:`Block` that draws a horizontal line"""
28 |
29 | color: RGB
30 | margin: Sides
31 |
32 | def __init__(
33 | self,
34 | color: RGB | HexColor = black,
35 | margin: SidesLike = Sides(6, 0, 6, 0),
36 | ) -> None:
37 | setattr_frozen(self, "color", RGB.parse(color))
38 | setattr_frozen(self, "margin", Sides.parse(margin))
39 |
40 | def into_columns(
41 | self, _: Resources, __: StyleFull, cs: Iterator[ColumnFill]
42 | ) -> Iterator[ColumnFill]:
43 | col = next(cs)
44 | top, right, bottom, left = self.margin
45 | if (height := top + bottom) > col.height_free:
46 | # There is not enough room for the rule in the current column.
47 | # Yield the column and start a new one.
48 | yield col
49 | y = col.box.origin.y + col.height_free - top
50 | x = col.box.origin.x + left
51 | yield col.add(
52 | ShapedRule(
53 | XY(x, y),
54 | XY(col.box.origin.x + col.box.width - right, y),
55 | self.color,
56 | height,
57 | ),
58 | )
59 |
60 |
61 | @add_slots
62 | @dataclass(frozen=True)
63 | class ShapedRule(Shaped):
64 | start: XY
65 | end: XY
66 | color: RGB
67 | height: Pt
68 |
69 | def render(self, _: XY, __: Pt) -> Streamable:
70 | yield b"%g %g m %g %g l %g %g %g RG S\n" % (
71 | *self.start,
72 | *self.end,
73 | *self.color,
74 | )
75 |
--------------------------------------------------------------------------------
/src/pdfje/typeset/hyphens.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from functools import partial
4 | from itertools import chain, starmap
5 | from typing import TYPE_CHECKING, Callable, Iterable, Union
6 |
7 | from ..compat import pairwise
8 |
9 | Hyphenator = Callable[[str], Iterable[str]]
10 | " hyphenation -> hy phen ation "
11 |
12 |
13 | def never_hyphenate(txt: str) -> Iterable[str]:
14 | return (txt,)
15 |
16 |
17 | # The confusing logic here is to avoid importing pyphen if it's not
18 | # installed, and also keeping the type checker happy -- complicated
19 | # by the fact that pyphen has no type annotations.
20 | if TYPE_CHECKING:
21 |
22 | class Pyphen:
23 | def __init__(self, lang: str) -> None: ...
24 |
25 | def positions(self, txt: str) -> Iterable[int]: ...
26 |
27 | HAS_PYPHEN = True
28 |
29 | HyphenatorLike = Hyphenator | Pyphen | None
30 |
31 | else:
32 | try:
33 | from pyphen import Pyphen
34 | except ImportError: # pragma: no cover
35 | HAS_PYPHEN = False
36 | HyphenatorLike = Union[Hyphenator, None]
37 | else:
38 | HAS_PYPHEN = True
39 | HyphenatorLike = Union[Hyphenator, Pyphen, None]
40 |
41 |
42 | if HAS_PYPHEN:
43 |
44 | def parse_hyphenator(p: HyphenatorLike) -> Hyphenator:
45 | if isinstance(p, Pyphen):
46 | return partial(_pyphenate, p)
47 | elif p is None:
48 | return never_hyphenate
49 | return p
50 |
51 | def _pyphenate(p: Pyphen, txt: str) -> Iterable[str]:
52 | return (
53 | map(
54 | txt.__getitem__,
55 | starmap(slice, pairwise(chain((0,), pos, (None,)))),
56 | )
57 | if (pos := p.positions(txt))
58 | else (txt,)
59 | )
60 |
61 | default_hyphenator: Hyphenator = partial(_pyphenate, Pyphen(lang="en_US"))
62 |
63 | else: # pragma: no cover
64 | from ..vendor.hyphenate import hyphenate_word
65 |
66 | default_hyphenator = hyphenate_word
67 |
68 | def parse_hyphenator(p: HyphenatorLike) -> Hyphenator:
69 | return never_hyphenate if p is None else p # type: ignore
70 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | isolated_build = true
3 | envlist = py{38,39,310,311,312},style,docs,minimal,typecheck,isort,slotscheck
4 | [testenv]
5 | allowlist_externals =
6 | poetry
7 | setenv=
8 | POETRY_VIRTUALENVS_CREATE=false
9 | commands_pre=
10 | poetry install -n -v --no-root --only test -E fonts -E hyphens
11 | extras=
12 | fonts
13 | hyphens
14 | commands=
15 | pytest
16 |
17 | [testenv:py312]
18 | commands =
19 | pytest -v --cov=pdfje {posargs}
20 |
21 | [testenv:minimal]
22 | commands_pre=
23 | poetry install -n -v --no-root --only test
24 | commands=
25 | pytest {posargs}
26 | extras=
27 |
28 | [testenv:typecheck]
29 | commands_pre=
30 | poetry install -n -v --no-root --only typecheck --only test -E fonts -E hyphens
31 |
32 | commands=
33 | mypy --pretty --strict src/ examples/
34 | mypy --pretty tests/
35 |
36 | [testenv:style]
37 | commands_pre=
38 | poetry install -n -v --no-root --only linting -E fonts -E hyphens
39 | commands=
40 | black --check --diff src/ tests/
41 | flake8 src/ tests/
42 |
43 | [testenv:isort]
44 | commands_pre=
45 | poetry install -n -v --no-root --only linting -E fonts -E hyphens
46 | commands=
47 | isort --check-only --diff src/ tests/
48 |
49 | [testenv:slotscheck]
50 | commands_pre=
51 | poetry install -n -v --no-root --only linting -E fonts -E hyphens
52 | commands=
53 | slotscheck -m pdfje
54 |
55 | [testenv:docs]
56 | basepython=python3.11
57 | deps=
58 | -rdocs/requirements.txt
59 | commands=
60 | sphinx-build -W -d "{toxworkdir}/docs_doctree" docs "{toxworkdir}/docs_out" \
61 | --color -bhtml
62 | python -c 'import pathlib; print("documentation available under " \
63 | + (pathlib.Path(r"{toxworkdir}") / "docs_out" / "index.html").as_uri())'
64 |
65 | [coverage:run]
66 | branch=True
67 | [coverage:report]
68 | fail_under=99
69 | exclude_lines=
70 | pragma: no cover
71 | raise NotImplementedError
72 | def __repr__
73 | @overload
74 | ^\s+def.*: \.\.\.
75 | if TYPE_CHECKING:
76 | omit=*/pdfje/vendor/*
77 |
78 | [gh-actions]
79 | python =
80 | 3.8: py38
81 | 3.9: py39
82 | 3.10: py310
83 | 3.11: py311, style, isort, docs, minimal
84 | 3.12: py312, typecheck, slotscheck
85 |
86 | [flake8]
87 | exclude = .git,__pycache__,docs/source/conf.py,old,build,dist,.tox,src/pdfje/vendor
88 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | pip-wheel-metadata/
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .coverage
43 | .coverage.*
44 | .cache
45 | nosetests.xml
46 | coverage.xml
47 | *.cover
48 | .hypothesis/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 |
58 | # Flask stuff:
59 | instance/
60 | .webassets-cache
61 |
62 | # Scrapy stuff:
63 | .scrapy
64 |
65 | # Sphinx documentation
66 | docs/_build/
67 |
68 | # PyBuilder
69 | target/
70 |
71 | # Jupyter Notebook
72 | .ipynb_checkpoints
73 |
74 | # pyenv
75 | .python-version
76 |
77 | # celery beat schedule file
78 | celerybeat-schedule
79 |
80 | # SageMath parsed files
81 | *.sage.py
82 |
83 | # dotenv
84 | .env
85 |
86 | # virtualenv
87 | .venv
88 | venv/
89 | ENV/
90 |
91 | # Spyder project settings
92 | .spyderproject
93 | .spyproject
94 |
95 | # Rope project settings
96 | .ropeproject
97 |
98 | # mkdocs documentation
99 | /site
100 |
101 | # mypy
102 | .mypy_cache/
103 | /.idea
104 |
105 | .vim
106 |
107 | ### macOS ###
108 | # General
109 | .DS_Store
110 | .AppleDouble
111 | .LSOverride
112 |
113 | # Icon must end with two \r
114 | Icon
115 |
116 | # Thumbnails
117 | ._*
118 |
119 | # Files that might appear in the root of a volume
120 | .DocumentRevisions-V100
121 | .fseventsd
122 | .Spotlight-V100
123 | .TemporaryItems
124 | .Trashes
125 | .VolumeIcon.icns
126 | .com.apple.timemachine.donotpresent
127 |
128 | # Directories potentially created on remote AFP share
129 | .AppleDB
130 | .AppleDesktop
131 | Network Trash Folder
132 | Temporary Items
133 | .apdisk
134 |
135 | .envrc
136 | *.csv
137 | *.pdf
138 | output/
139 |
--------------------------------------------------------------------------------
/src/pdfje/layout/pages.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from dataclasses import dataclass
4 | from functools import partial
5 | from itertools import chain, count
6 | from typing import Callable, Iterable, Iterator, final
7 |
8 | from ..common import add_slots, always, flatten, setattr_frozen
9 | from ..page import Page, RenderedPage
10 | from ..resources import Resources
11 | from ..style import StyleFull
12 | from .common import Block, PageFill, fill_pages
13 | from .paragraph import Paragraph
14 |
15 |
16 | @final
17 | @add_slots
18 | @dataclass(frozen=True, init=False)
19 | class AutoPage:
20 | """Automatically lays out content on multiple pages.
21 |
22 | Parameters
23 | ----------
24 | content: ~typing.Iterable[~pdfje.Block | str] | ~pdfje.Block | str
25 | The content to lay out on the pages. Can be parsed from single string
26 | or block.
27 | template: ~pdfje.Page | ~typing.Callable[[int], ~pdfje.Page]
28 | A page to use as a template for the layout. If a callable is given,
29 | it is called with the page number as the only argument to generate
30 | the page. Defaults to the default :class:`Page`.
31 |
32 | """
33 |
34 | content: Iterable[str | Block]
35 | template: Callable[[int], Page]
36 |
37 | def __init__(
38 | self,
39 | content: str | Block | Iterable[Block | str],
40 | template: Page | Callable[[int], Page] = always(Page()),
41 | ) -> None:
42 | if isinstance(content, str):
43 | content = [Paragraph(content)]
44 | elif isinstance(content, Block):
45 | content = [content]
46 | setattr_frozen(self, "content", content)
47 |
48 | if isinstance(template, Page):
49 | template = always(template)
50 | setattr_frozen(self, "template", template)
51 |
52 | def render(
53 | self, r: Resources, s: StyleFull, pnum: int, /
54 | ) -> Iterator[RenderedPage]:
55 | pages: Iterator[PageFill] = map(
56 | PageFill.new, map(self.template, count(pnum))
57 | )
58 | for block in map(_as_block, self.content):
59 | pages, filled = fill_pages(
60 | pages, partial(block.into_columns, r, s)
61 | )
62 | for p in filled:
63 | yield p.base.fill(r, s, flatten(p.done))
64 |
65 | last = next(pages)
66 | yield last.base.fill(r, s, flatten(chain(last.done, last.todo)))
67 |
68 |
69 | def _as_block(b: str | Block) -> Block:
70 | return Paragraph(b) if isinstance(b, str) else b
71 |
--------------------------------------------------------------------------------
/src/pdfje/resources.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from dataclasses import dataclass, field
4 | from itertools import chain, count
5 | from typing import Iterable, Iterator
6 |
7 | from . import atoms
8 | from .atoms import ASCII
9 | from .common import add_slots
10 | from .fonts import BuiltinTypeface, TrueType
11 | from .fonts.common import BuiltinFont, Font, Typeface
12 | from .fonts.embed import OBJS_PER_EMBEDDED_FONT, Subset
13 |
14 |
15 | @add_slots
16 | @dataclass(frozen=True, eq=False)
17 | class Resources:
18 | """Keeps track of PDF resources within a document, such as fonts"""
19 |
20 | _builtins: dict[tuple[ASCII, bool, bool], BuiltinFont] = field(
21 | default_factory=dict
22 | )
23 | _subsets: dict[tuple[TrueType, bool, bool], Subset] = field(
24 | default_factory=dict
25 | )
26 | _next_subset_index: Iterator[int] = field(default_factory=count.__call__)
27 |
28 | def to_objects(self, first_id: atoms.ObjectID) -> Iterable[atoms.Object]:
29 | for sub, i in zip(
30 | self._subsets.values(),
31 | count(first_id, step=OBJS_PER_EMBEDDED_FONT),
32 | ):
33 | yield from sub.to_objects(i)
34 |
35 | def to_atoms(self, first_id: atoms.ObjectID) -> atoms.Dictionary:
36 | return atoms.Dictionary(
37 | (
38 | b"Font",
39 | atoms.Dictionary(
40 | *chain(
41 | (
42 | (b.id, b.to_resource())
43 | for b in self._builtins.values()
44 | ),
45 | (
46 | (s.id, atoms.Ref(obj_id))
47 | for s, obj_id in zip(
48 | self._subsets.values(),
49 | count(first_id, step=OBJS_PER_EMBEDDED_FONT),
50 | )
51 | ),
52 | )
53 | ),
54 | )
55 | )
56 |
57 | def font(self, f: Typeface, bold: bool, italic: bool) -> Font:
58 | if isinstance(f, BuiltinTypeface):
59 | return self._builtins.setdefault(
60 | (f.regular.name, bold, italic), f.font(bold, italic)
61 | )
62 | else:
63 | try:
64 | return self._subsets[(f, bold, italic)]
65 | except KeyError:
66 | new_subset = self._subsets[(f, bold, italic)] = Subset.new(
67 | b"F%i" % next(self._next_subset_index),
68 | f.font(bold, italic),
69 | )
70 | return new_subset
71 |
--------------------------------------------------------------------------------
/tests/typeset/test_state.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from pdfje.atoms import LiteralStr, Real
4 | from pdfje.typeset.state import NO_OP, Passage, splitlines
5 | from pdfje.typeset.words import _encode_kerning
6 |
7 | from ..common import BIG, BLUE, FONT, GREEN, RED
8 |
9 |
10 | class TestSplitlines:
11 | def test_empty(self):
12 | result = splitlines(iter([]))
13 | assert next(result, None) is None
14 |
15 | def test_no_breaks(self):
16 | result = splitlines(
17 | iter(
18 | [
19 | Passage(RED, "Beautiful "),
20 | Passage(BLUE, "is better "),
21 | Passage(GREEN, "than ugly."),
22 | ]
23 | )
24 | )
25 | assert list(next(result)) == [
26 | Passage(RED, "Beautiful "),
27 | Passage(BLUE, "is better "),
28 | Passage(GREEN, "than ugly."),
29 | ]
30 |
31 | def test_breaks(self):
32 | result = splitlines(
33 | iter(
34 | [
35 | Passage(RED, "Beautiful "),
36 | Passage(BLUE, "is better "),
37 | Passage(GREEN, "than\nugly.\r\n\n"),
38 | Passage(RED, "Explicit is "),
39 | Passage(BIG, "better than \nimplicit. \n"),
40 | ]
41 | )
42 | )
43 | assert list(next(result)) == [
44 | Passage(RED, "Beautiful "),
45 | Passage(BLUE, "is better "),
46 | Passage(GREEN, "than"),
47 | ]
48 | assert list(next(result)) == [Passage(NO_OP, "ugly.")]
49 | assert list(next(result)) == [Passage(NO_OP, "")]
50 | assert list(next(result)) == [
51 | Passage(NO_OP, ""),
52 | Passage(RED, "Explicit is "),
53 | Passage(BIG, "better than "),
54 | ]
55 | assert list(next(result)) == [Passage(NO_OP, "implicit. ")]
56 | assert list(next(result)) == [Passage(NO_OP, "")]
57 |
58 |
59 | class TestEncodeKerning:
60 | def test_typical(self):
61 | assert list(
62 | _encode_kerning("abcdefg", [(1, -20), (2, -30), (6, -40)], FONT)
63 | ) == [
64 | LiteralStr(b"a"),
65 | Real(20),
66 | LiteralStr(b"b"),
67 | Real(30),
68 | LiteralStr(b"cdef"),
69 | Real(40),
70 | LiteralStr(b"g"),
71 | ]
72 |
73 | def test_kern_first_char(self):
74 | assert list(
75 | _encode_kerning("abcdefg", [(0, -20), (2, -30)], FONT)
76 | ) == [
77 | Real(20),
78 | LiteralStr(b"ab"),
79 | Real(30),
80 | LiteralStr(b"cdefg"),
81 | ]
82 |
83 | def test_no_kern(self):
84 | assert list(_encode_kerning("abcdefg", [], FONT)) == [
85 | LiteralStr(b"abcdefg")
86 | ]
87 |
--------------------------------------------------------------------------------
/CHANGELOG.rst:
--------------------------------------------------------------------------------
1 | Changelog
2 | =========
3 |
4 | 0.6.1 (2023-11-13)
5 | ------------------
6 |
7 | - 🐍 Official Python 3.12 compatibility
8 |
9 | 0.6.0 (2023-08-15)
10 | ------------------
11 |
12 | **Added**
13 |
14 | - 🧮 Paragraphs can be optimally typeset using the Knuth-Plass line
15 | breaking algorithm. Use the ``optimal`` argument for this.
16 | - 🛟 Paragraphs support automatically avoiding orphaned lines with
17 | ``avoid_orphans`` argument.
18 |
19 | **Breaking**
20 |
21 | - 📊 In the rare case that a paragraphs contains different text sizes,
22 | all lines now rendered with the same leading.
23 | This is more consistent and allows for faster layouting.
24 |
25 | **Fixed**
26 |
27 | - 🐍 Fix compatibility with Python 3.8 and 3.9
28 |
29 | 0.5.0 (2023-05-07)
30 | ------------------
31 |
32 | **Breaking**
33 |
34 | - 🪆 Expose most classes from submodules instead of root
35 | (e.g. ``pdfje.Rect`` becomes ``pdfje.draw.Rect``).
36 | The new locations can be found in the API documentation.
37 | - 🏷️ ``Rule`` ``padding`` attribute renamed to ``margin``.
38 |
39 | **Added**
40 |
41 | - 📰 Support for horizontal alignment and justification of text.
42 | - 🫸 Support for indenting the first line of a paragraph.
43 | - ✂️ Automatic hyphenation of text.
44 |
45 | 0.4.0 (2023-04-10)
46 | ------------------
47 |
48 | A big release with lots of new features and improvements.
49 | Most importantly, the page layout engine is now complete and
50 | can be used to create multi-page/column documents.
51 |
52 | **Added**
53 |
54 | - 📖 Automatic layout of multi-style text into lines, columns, and pages
55 | - 🔬 Automatic kerning for supported fonts
56 | - 🖌️ Support for drawing basic shapes
57 | - 🎨 Additional text styling options
58 | - 📦 Make fonttools dependency optional
59 | - 📏 Horizontal rule element
60 |
61 | **Documentation**
62 |
63 | - 🧑🏫 Add a tutorial and examples
64 | - 📋 Polished docstrings in public API
65 |
66 | **Performance**
67 |
68 | - ⛳️ Document pages and fonts are now written in one efficient pass
69 |
70 | **Breaking**
71 |
72 | - 🌅 Drop Python 3.7 support
73 |
74 | 0.3.0 (2022-12-02)
75 | ------------------
76 |
77 | **Added**
78 |
79 | - 🍰 Documents can be created directly from string input
80 | - 🪜 Support for explicit newlines in text
81 | - 📢 ``Document.write()`` supports paths, file-like objects and iterator output
82 | - ✅ Improved PDF spec compliance
83 |
84 | **Changed**
85 |
86 | - 📚 Text is now positioned automatically within a page
87 |
88 | 0.2.0 (2022-12-01)
89 | ------------------
90 |
91 | **Added**
92 |
93 | - 🖌️ Different builtin fonts can be selected
94 | - 📥 Truetype fonts can be embedded
95 | - 🌏 Support for non-ASCII text
96 | - 📐 Pages can be rotated
97 | - 🤏 Compression is applied to keep filesize small
98 |
99 | 0.1.0 (2022-11-02)
100 | ------------------
101 |
102 | **Added**
103 |
104 | - 💬 Support basic ASCII text on different pages
105 |
106 | 0.0.1 (2022-10-28)
107 | ------------------
108 |
109 | **Added**
110 |
111 | - 🌱 Write a valid, minimal, empty PDF file
112 |
--------------------------------------------------------------------------------
/src/pdfje/typeset/layout.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import abc
4 | from dataclasses import dataclass
5 | from typing import Callable, Iterable, Iterator, Sequence
6 |
7 | from pdfje.typeset.words import WordLike
8 |
9 | from ..common import (
10 | XY,
11 | Align,
12 | Pt,
13 | Streamable,
14 | add_slots,
15 | fix_abstract_properties,
16 | )
17 | from ..layout.common import Shaped # FUTURE: fix this near-circular dependency
18 | from .state import State
19 |
20 |
21 | @add_slots
22 | @dataclass(frozen=True)
23 | class ShapedText(Shaped):
24 | lines: Sequence[Line]
25 | lead: Pt
26 | align: Align
27 | height: Pt
28 |
29 | def render(self, pos: XY, width: Pt) -> Iterator[bytes]:
30 | return render_text(
31 | pos, self.pre_state(), width, self.lines, self.lead, self.align
32 | )
33 |
34 | def end_state(self) -> State | None:
35 | # this slightly convoluted way takes into account that lines
36 | # may (in rare cases) be empty
37 | return next(
38 | (w.state for s in reversed(self.lines) for w in reversed(s.words)),
39 | None,
40 | )
41 |
42 | def pre_state(self) -> State | None:
43 | # this slightly convoluted way takes into account that lines
44 | # may (in rare cases) be empty
45 | return next(
46 | (wd.state for ln in self.lines for wd in ln.words),
47 | None,
48 | )
49 |
50 |
51 | def render_text(
52 | pos: XY,
53 | state: State | None,
54 | prev_width: Pt,
55 | lines: Iterable[Line],
56 | lead: Pt,
57 | align: Align,
58 | ) -> Iterator[bytes]:
59 | yield b"BT\n%g %g Td\n" % pos.astuple()
60 | yield from state or ()
61 | yield from _pick_renderer(align.value)(lines, lead, prev_width)
62 | yield b"ET\n"
63 |
64 |
65 | @fix_abstract_properties
66 | class Line(Streamable):
67 | __slots__ = ()
68 |
69 | @property
70 | @abc.abstractmethod
71 | def words(self) -> Sequence[WordLike]: ...
72 |
73 | @property
74 | @abc.abstractmethod
75 | def width(self) -> Pt: ...
76 |
77 |
78 | def _render_left(lines: Iterable[Line], lead: Pt, _: Pt) -> Iterator[bytes]:
79 | yield b"%g TL\n" % lead
80 | for ln in lines:
81 | yield b"T*\n"
82 | yield from ln
83 |
84 |
85 | def _render_centered(
86 | lines: Iterable[Line], lead: Pt, prev_width: Pt
87 | ) -> Iterator[bytes]:
88 | for ln in lines:
89 | yield b"%g %g TD\n" % ((prev_width - ln.width) / 2, -lead)
90 | yield from ln
91 | prev_width = ln.width
92 |
93 |
94 | def _render_right(
95 | lines: Iterable[Line], lead: Pt, prev_width: Pt
96 | ) -> Iterator[bytes]:
97 | for ln in lines:
98 | yield b"%g %g TD\n" % ((prev_width - ln.width), -lead)
99 | yield from ln
100 | prev_width = ln.width
101 |
102 |
103 | _pick_renderer: Callable[
104 | [int], Callable[[Iterable[Line], Pt, Pt], Iterable[bytes]]
105 | ] = [
106 | _render_left,
107 | _render_centered,
108 | _render_right,
109 | _render_left, # justified lines are already stretched, so left-align.
110 | ].__getitem__
111 |
--------------------------------------------------------------------------------
/examples/book.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import re
4 | from pathlib import Path
5 | from typing import Iterable, Sequence
6 |
7 | from pdfje import XY, AutoPage, Document, Page
8 | from pdfje.draw import Ellipse, Rect, Text
9 | from pdfje.fonts import TrueType
10 | from pdfje.layout import Paragraph, Rule
11 | from pdfje.style import Style
12 | from pdfje.units import inch, mm
13 |
14 |
15 | def main() -> None:
16 | "Generate a PDF with the content of The Great Gatsby"
17 | Document(
18 | [TITLE_PAGE]
19 | + [AutoPage(blocks, template=create_page) for blocks in chapters()],
20 | style=CRIMSON,
21 | ).write("book.pdf")
22 |
23 |
24 | def create_page(num: int) -> Page:
25 | # Add a page number at the bottom of the base page
26 | return BASEPAGE.add(
27 | Text(
28 | (PAGESIZE.x / 2, mm(20)), str(num), Style(size=10), align="center"
29 | )
30 | )
31 |
32 |
33 | PAGESIZE = XY(inch(5), inch(8))
34 | BASEPAGE = Page(
35 | [
36 | # The title in small text at the top of the page
37 | Text(
38 | (PAGESIZE.x / 2, PAGESIZE.y - mm(10)),
39 | "The Great Gatsby",
40 | Style(size=10, italic=True),
41 | align="center",
42 | ),
43 | ],
44 | size=PAGESIZE,
45 | margin=(mm(20), mm(20), mm(25)),
46 | )
47 |
48 | HEADING = Style(size=20, bold=True, line_spacing=3.5)
49 |
50 | TITLE_PAGE = Page(
51 | [
52 | # Some nice shapes
53 | Rect(
54 | (PAGESIZE.x / 2 - 200, 275), # use page dimensions to center it
55 | width=400,
56 | height=150,
57 | fill="#99aaff",
58 | stroke=None,
59 | ),
60 | Ellipse((PAGESIZE.x / 2, 350), 300, 100, fill="#22d388"),
61 | # The title and author on top of the shapes
62 | Text(
63 | (PAGESIZE.x / 2, 380),
64 | "The Great Gatsby",
65 | Style(size=30, bold=True),
66 | align="center",
67 | ),
68 | Text(
69 | (PAGESIZE.x / 2, 335),
70 | "F. Scott Fitzgerald",
71 | Style(size=14, italic=True),
72 | align="center",
73 | ),
74 | ],
75 | size=PAGESIZE,
76 | )
77 | CRIMSON = TrueType(
78 | Path(__file__).parent / "../resources/fonts/CrimsonText-Regular.ttf",
79 | Path(__file__).parent / "../resources/fonts/CrimsonText-Bold.ttf",
80 | Path(__file__).parent / "../resources/fonts/CrimsonText-Italic.ttf",
81 | Path(__file__).parent / "../resources/fonts/CrimsonText-BoldItalic.ttf",
82 | )
83 |
84 |
85 | _CHAPTER_NUMERALS = set("I II III IV V VI VII VIII IX X".split())
86 |
87 |
88 | def chapters() -> Iterable[Sequence[Paragraph | Rule]]:
89 | "Book content grouped by chapters"
90 | buffer: list[Paragraph | Rule] = [Paragraph("Chapter I\n", HEADING)]
91 | indent = 0
92 | for p in PARAGRAPHS:
93 | if p.strip() in _CHAPTER_NUMERALS:
94 | yield buffer
95 | buffer = [Paragraph(f"Chapter {p.strip()}\n", HEADING)]
96 | indent = 0
97 | elif p.startswith("------"):
98 | buffer.append(Rule("#aaaaaa", (20, 10, 10)))
99 | else:
100 | buffer.append(
101 | Paragraph(
102 | p, Style(line_spacing=1.2), align="justify", indent=indent
103 | )
104 | )
105 | indent = 15
106 | yield buffer
107 |
108 |
109 | PARAGRAPHS = [
110 | m.replace("\n", " ")
111 | for m in re.split(
112 | r"\n\n",
113 | (
114 | Path(__file__).parent / "../resources/books/the-great-gatsby.txt"
115 | ).read_text()[1374:-18415],
116 | )
117 | ]
118 |
119 | if __name__ == "__main__":
120 | main()
121 |
--------------------------------------------------------------------------------
/tests/test_fonts.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from pathlib import Path
4 | from random import Random
5 |
6 | import pytest
7 |
8 | from pdfje.common import dictget
9 | from pdfje.fonts.common import KerningTable, TrueType, kern
10 | from pdfje.fonts.embed import Subset, _utf16be_hex
11 |
12 | try:
13 | import fontTools # noqa
14 |
15 | HAS_FONTTOOLS = True
16 | except ImportError:
17 | HAS_FONTTOOLS = False
18 |
19 |
20 | def _make_subset(cids) -> Subset:
21 | pytest.importorskip("fontTools")
22 | return Subset(
23 | b"F0",
24 | NotImplemented,
25 | lambda _: 1,
26 | cids,
27 | NotImplemented,
28 | None,
29 | )
30 |
31 |
32 | _EXAMPLE_KERNINGTABLE: KerningTable = dictget(
33 | {
34 | ("x", "y"): -40,
35 | ("a", "b"): -60,
36 | (" ", "a"): -20,
37 | ("a", " "): -10,
38 | ("z", " "): -10,
39 | },
40 | 0,
41 | )
42 |
43 |
44 | class TestKern:
45 | def test_empty(self):
46 | assert list(kern(_EXAMPLE_KERNINGTABLE, "", " ")) == []
47 |
48 | def test_no_kerning_needed(self):
49 | assert list(kern(_EXAMPLE_KERNINGTABLE, "basdfzyx", " ")) == []
50 |
51 | def test_lots_of_kerning(self):
52 | assert list(kern(_EXAMPLE_KERNINGTABLE, "aaababaxyz", " ")) == [
53 | (0, -20),
54 | (3, -60),
55 | (5, -60),
56 | (8, -40),
57 | ]
58 |
59 | def test_lots_of_kerning_no_init(self):
60 | assert list(kern(_EXAMPLE_KERNINGTABLE, "aaababaxyz", None)) == [
61 | (3, -60),
62 | (5, -60),
63 | (8, -40),
64 | ]
65 |
66 | def test_one_letter(self):
67 | assert list(kern(_EXAMPLE_KERNINGTABLE, "a", " ")) == [
68 | (0, -20),
69 | ]
70 |
71 |
72 | class TestEncodeEmbeddedSubset:
73 | def test_empty(self):
74 | assert _make_subset({}).encode("") == b""
75 |
76 | def test_ascii(self):
77 | assert (
78 | _make_subset(
79 | {ord("a"): 1, ord("b"): 4, ord("\n"): 0xFFFE},
80 | ).encode("ab\n")
81 | == b"\x00\x01\x00\x04\xff\xfe"
82 | )
83 |
84 | def test_exotic_unicode(self):
85 | assert (
86 | _make_subset(
87 | {ord("🌵"): 9, ord("𫄸"): 0xD900, ord("𒀗"): 0xFFFE}
88 | ).encode(
89 | "🌵𫄸𒀗",
90 | )
91 | == b"\x00\x09\xd9\x00\xff\xfe"
92 | )
93 |
94 | def test_long_string(self, benchmark):
95 | count = 10_000
96 | rand = Random(0)
97 | string = "".join(map(chr, rand.sample(range(0x10FFFF), k=count)))
98 | cids = list(range(count))
99 | rand.shuffle(cids)
100 | cmap = dict(zip(map(ord, string), cids))
101 | assert len(benchmark(_make_subset(cmap).encode, string)) == 2 * len(
102 | string
103 | )
104 |
105 |
106 | def test_true_type_init():
107 | t = TrueType(
108 | Path(__file__).parent / "../resources/fonts/Roboto-Regular.ttf",
109 | str(Path(__file__).parent / "../resources/fonts/Roboto-Bold.ttf"),
110 | Path(__file__).parent / "../resources/fonts/Roboto-Italic.ttf",
111 | Path(__file__).parent / "../resources/fonts/Roboto-BoldItalic.ttf",
112 | )
113 | assert isinstance(t.bold, Path)
114 |
115 |
116 | @pytest.mark.skipif(HAS_FONTTOOLS, reason="fontTools installed")
117 | def test_fonttools_notimplemented():
118 | with pytest.raises(NotImplementedError):
119 | _make_subset({}).encode("")
120 |
121 |
122 | class TestUTF16BEHex:
123 | def test_one_byte(self, benchmark):
124 | assert benchmark(_utf16be_hex, ord("a")) == b"0061"
125 |
126 | def test_two_bytes(self, benchmark):
127 | assert benchmark(_utf16be_hex, ord("∫")) == b"222B"
128 |
129 | def test_four_bytes(self, benchmark):
130 | assert benchmark(_utf16be_hex, ord("🌵")) == b"D83CDF35"
131 |
--------------------------------------------------------------------------------
/tests/layout/test_common.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from dataclasses import dataclass
4 | from typing import Iterable, Iterator
5 |
6 | from pdfje import XY, Column, Page
7 | from pdfje.common import Pt, add_slots
8 | from pdfje.layout.common import ColumnFill, PageFill, Shaped, fill_pages
9 | from pdfje.units import A3, A4
10 |
11 | PAGES = [
12 | PageFill(
13 | Page(size=A3),
14 | (ColumnFill(Column(XY(40, 40), 190, 180), (), 100),),
15 | (ColumnFill(Column(XY(300, 40), 200, 200), (), 10),),
16 | ),
17 | PageFill(Page(size=A4), (), ()),
18 | PageFill(
19 | Page(size=A3.flip()),
20 | (
21 | ColumnFill(Column(XY(80, 40), 205, 210), (), 100),
22 | ColumnFill(Column(XY(350, 40), 195, 190), (), 100),
23 | ),
24 | (),
25 | ),
26 | PageFill(
27 | Page(size=A4.flip()),
28 | (
29 | ColumnFill(Column(XY(40, 40), 210, 170), (), 100),
30 | ColumnFill(Column(XY(300, 40), 195, 160), (), 100),
31 | ),
32 | (),
33 | ),
34 | ]
35 |
36 |
37 | @add_slots
38 | @dataclass(frozen=True)
39 | class _DummyShapedBlock(Shaped):
40 | content: bytes
41 | height: Pt
42 |
43 | def render(self, pos: XY, width: Pt) -> Iterable[bytes]:
44 | raise NotImplementedError()
45 |
46 |
47 | class TestFillPages:
48 | def test_empty(self):
49 | pages, filled = fill_pages(iter(PAGES), lambda _: iter(()))
50 | assert list(pages) == PAGES
51 | assert list(filled) == []
52 |
53 | def test_fills_one_page_partially(self):
54 | def dummy_filler(cs: Iterator[ColumnFill]) -> Iterator[ColumnFill]:
55 | yield next(cs).add(_DummyShapedBlock(b"dummy content", 40))
56 |
57 | pages, filled = fill_pages(iter(PAGES), dummy_filler)
58 | assert list(pages) == [
59 | PageFill(
60 | Page(size=A3),
61 | (
62 | ColumnFill(
63 | Column(XY(40, 40), 190, 180),
64 | (
65 | (
66 | XY(40, 140),
67 | _DummyShapedBlock(b"dummy content", 40),
68 | ),
69 | ),
70 | 60,
71 | ),
72 | ),
73 | (ColumnFill(Column(XY(300, 40), 200, 200), (), 10),),
74 | ),
75 | *PAGES[1:],
76 | ]
77 | assert list(filled) == []
78 |
79 | def test_fills_multiple_pages(self):
80 | def dummy_filler(cs: Iterator[ColumnFill]) -> Iterator[ColumnFill]:
81 | for char in "abc":
82 | yield next(cs).add(_DummyShapedBlock(char.encode(), 40))
83 | next(cs) # it's important we test consuming one more than yielded
84 |
85 | pages, filled = fill_pages(iter(PAGES), dummy_filler)
86 | assert list(pages) == [
87 | PageFill(
88 | Page(size=A3.flip()),
89 | (
90 | ColumnFill(
91 | Column(XY(350, 40), 195, 190),
92 | ((XY(350, 140), _DummyShapedBlock(b"c", 40)),),
93 | 60,
94 | ),
95 | ),
96 | (
97 | ColumnFill(
98 | Column(XY(80, 40), 205, 210),
99 | ((XY(80, 140), _DummyShapedBlock(b"b", 40)),),
100 | 60,
101 | ),
102 | ),
103 | ),
104 | *PAGES[3:],
105 | ]
106 | assert filled == [
107 | PageFill(
108 | Page(size=A3),
109 | (),
110 | (
111 | ColumnFill(Column(XY(300, 40), 200, 200), (), 10),
112 | ColumnFill(
113 | Column(XY(40, 40), 190, 180),
114 | ((XY(40, 140), _DummyShapedBlock(b"a", 40)),),
115 | 60,
116 | ),
117 | ),
118 | ),
119 | PageFill(Page(size=A4), (), ()),
120 | ]
121 |
--------------------------------------------------------------------------------
/src/pdfje/layout/common.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import abc
4 | from dataclasses import dataclass
5 | from itertools import islice, tee
6 | from typing import Callable, Iterator, Sequence
7 |
8 | from ..common import (
9 | XY,
10 | Streamable,
11 | add_slots,
12 | fix_abstract_properties,
13 | flatten,
14 | peek,
15 | prepend,
16 | )
17 | from ..page import Column, Page
18 | from ..resources import Resources
19 | from ..style import StyleFull
20 | from ..units import Pt
21 |
22 | __all__ = [
23 | "Block",
24 | ]
25 |
26 |
27 | class Block(abc.ABC):
28 | """Base class for block elements that can be laid out in a column
29 | by :class:`~pdfje.AutoPage`.
30 | """
31 |
32 | __slots__ = ()
33 |
34 | # Fill the given columns with this block's content. It may consume as many
35 | # columns as it needs to determine how to render itself. It should only
36 | # yield columns that are actually filled -- which may be fewer than it
37 | # consumed (e.g. if it needed to look ahead).
38 | #
39 | # Why not a generator? Because a block may need to consume multiple
40 | # columns to render itself, before starting to yield completed columns
41 | @abc.abstractmethod
42 | def into_columns(
43 | self, res: Resources, style: StyleFull, cs: Iterator[ColumnFill], /
44 | ) -> Iterator[ColumnFill]: ...
45 |
46 |
47 | @fix_abstract_properties
48 | class Shaped(abc.ABC):
49 | __slots__ = ()
50 |
51 | # FUTURE: remove width from this interface. It can be set
52 | # on this object itself.
53 | @abc.abstractmethod
54 | def render(self, pos: XY, width: Pt) -> Streamable: ...
55 |
56 | @property
57 | @abc.abstractmethod
58 | def height(self) -> Pt: ...
59 |
60 |
61 | @add_slots
62 | @dataclass(frozen=True)
63 | class ColumnFill(Streamable):
64 | box: Column
65 | blocks: Sequence[tuple[XY, Shaped]]
66 | height_free: Pt
67 |
68 | @staticmethod
69 | def new(col: Column) -> ColumnFill:
70 | return ColumnFill(col, [], col.height)
71 |
72 | def add(self, s: Shaped) -> ColumnFill:
73 | return ColumnFill(
74 | self.box,
75 | (*self.blocks, (self.cursor(), s)),
76 | self.height_free - s.height,
77 | )
78 |
79 | def cursor(self) -> XY:
80 | return self.box.origin.add_y(self.height_free)
81 |
82 | def __iter__(self) -> Iterator[bytes]:
83 | for loc, s in self.blocks:
84 | yield from s.render(loc, self.box.width)
85 |
86 |
87 | _ColumnFiller = Callable[[Iterator[ColumnFill]], Iterator[ColumnFill]]
88 |
89 |
90 | @add_slots
91 | @dataclass(frozen=True)
92 | class PageFill:
93 | base: Page
94 | todo: Sequence[ColumnFill] # in the order they will be filled
95 | done: Sequence[ColumnFill] # most recently filled last
96 |
97 | def reopen_most_recent_column(self) -> PageFill:
98 | return PageFill(self.base, (self.done[-1], *self.todo), self.done[:-1])
99 |
100 | @staticmethod
101 | def new(page: Page) -> PageFill:
102 | return PageFill(page, list(map(ColumnFill.new, page.columns)), ())
103 |
104 |
105 | def fill_pages(
106 | doc: Iterator[PageFill], f: _ColumnFiller
107 | ) -> tuple[Iterator[PageFill], Sequence[PageFill]]:
108 | trunk, branch = tee(doc)
109 | return _fill_into( # pragma: no branch
110 | f(flatten(p.todo for p in branch)), trunk
111 | )
112 |
113 |
114 | def _fill_into(
115 | filled: Iterator[ColumnFill], doc: Iterator[PageFill]
116 | ) -> tuple[Iterator[PageFill], Sequence[PageFill]]:
117 | try:
118 | _, filled = peek(filled)
119 | except StopIteration:
120 | return doc, [] # no content to add
121 |
122 | completed: list[PageFill] = []
123 | for page in doc: # pragma: no branch
124 | page_cols = list(islice(filled, len(page.todo)))
125 | completed.append(
126 | PageFill(
127 | page.base,
128 | page.todo[len(page_cols) :], # noqa
129 | (*page.done, *page_cols),
130 | )
131 | )
132 | try:
133 | _, filled = peek(filled)
134 | except StopIteration:
135 | break # no more content -- wrap things up
136 |
137 | return prepend(completed.pop().reopen_most_recent_column(), doc), completed
138 |
--------------------------------------------------------------------------------
/tests/layout/test_paragraph.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import pytest
4 |
5 | from pdfje import red
6 | from pdfje.common import XY, Align
7 | from pdfje.layout.common import ColumnFill
8 | from pdfje.layout.paragraph import LinebreakParams, Paragraph
9 | from pdfje.page import Column
10 | from pdfje.resources import Resources
11 | from pdfje.style import Style
12 | from pdfje.vendor.hyphenate import hyphenate_word
13 |
14 | from ..common import LOREM_IPSUM, plaintext
15 |
16 | STYLE = Style(italic=True, color=red, hyphens=hyphenate_word).setdefault()
17 |
18 |
19 | @pytest.fixture
20 | def res() -> Resources:
21 | return Resources()
22 |
23 |
24 | def test_paragraph_init():
25 | assert Paragraph("Hello world") == Paragraph(
26 | ["Hello world"],
27 | style=Style.EMPTY,
28 | align=Align.LEFT,
29 | indent=0,
30 | avoid_orphans=True,
31 | optimal=LinebreakParams(
32 | tolerance=1,
33 | hyphen_penalty=1000,
34 | consecutive_hyphen_penalty=1000,
35 | fitness_diff_penalty=1000,
36 | ),
37 | )
38 | assert Paragraph(
39 | "Hello world", style="#003311", align="center", indent=2, optimal=False
40 | ) == Paragraph(
41 | ["Hello world"],
42 | style=Style(color="#003311"),
43 | align=Align.CENTER,
44 | indent=2,
45 | avoid_orphans=True,
46 | optimal=None,
47 | )
48 |
49 |
50 | @plaintext.register
51 | def _(f: ColumnFill) -> str:
52 | # It isn't always valid to assume a space character between columns, but
53 | # it's good enough for the test data.
54 | plain = "".join(plaintext(para) for _, para in f.blocks).strip()
55 | if plain:
56 | plain += " "
57 | return plain
58 |
59 |
60 | def linecounts(filled: list[ColumnFill]) -> list[int]:
61 | return [
62 | sum(len(para.lines) for _, para in f.blocks) # type: ignore
63 | for f in filled
64 | ]
65 |
66 |
67 | class TestParagraphFill:
68 | def test_empty(self, res: Resources):
69 | cols = [
70 | ColumnFill(Column(XY(80, 40), 205, 210), (), 105),
71 | ColumnFill(Column(XY(350, 40), 195, 190), (), 110),
72 | ColumnFill(Column(XY(350, 40), 200, 200), (), 90),
73 | ]
74 | p = Paragraph("", optimal=False)
75 | filled = list(p.into_columns(res, STYLE, iter(cols)))
76 | assert len(filled) == 1
77 | assert plaintext(filled) == ""
78 |
79 | def test_everything_fits_on_one_page(self, res: Resources):
80 | cols = [
81 | ColumnFill(Column(XY(80, 40), 400, 800), (), 800),
82 | ColumnFill(Column(XY(350, 40), 405, 750), (), 750),
83 | ColumnFill(Column(XY(350, 40), 300, 780), (), 780),
84 | ColumnFill(Column(XY(350, 40), 300, 780), (), 780),
85 | ]
86 | p = Paragraph(LOREM_IPSUM, optimal=False)
87 | filled = list(p.into_columns(res, STYLE, iter(cols)))
88 | assert len(filled) == 1
89 | assert plaintext(filled).strip() == LOREM_IPSUM.replace("\n", " ")
90 |
91 | @pytest.mark.parametrize("optimal", [False, True])
92 | @pytest.mark.parametrize("avoid_orphans", [True, False])
93 | def test_spread_across_pages(
94 | self, res: Resources, avoid_orphans: bool, optimal: bool
95 | ):
96 | cols = [
97 | ColumnFill(Column(XY(80, 40), 400, 800), (), 100),
98 | ColumnFill(Column(XY(350, 40), 150, 50), (), 50),
99 | ColumnFill(Column(XY(350, 40), 300, 780), (), 780),
100 | ColumnFill(Column(XY(350, 40), 300, 780), (), 780),
101 | ]
102 | p = Paragraph(
103 | LOREM_IPSUM, avoid_orphans=avoid_orphans, optimal=optimal
104 | )
105 | filled = list(p.into_columns(res, STYLE, iter(cols)))
106 | assert len(filled) == 3
107 | assert linecounts(filled) == [6, 3, 41]
108 | assert plaintext(filled).strip() == LOREM_IPSUM.replace("\n", " ")
109 |
110 | def test_column_lookahead(self, res: Resources):
111 | cols = [
112 | ColumnFill(Column(XY(80, 40), 400, 800), (), 100),
113 | ColumnFill(Column(XY(350, 40), 400, 100), (), 100),
114 | ColumnFill(Column(XY(350, 40), 300, 50), (), 50),
115 | ColumnFill(Column(XY(350, 40), 320, 32), (), 32),
116 | ColumnFill(Column(XY(350, 40), 300, 800), (), 800),
117 | ]
118 | p = Paragraph(LOREM_IPSUM, optimal=True)
119 | filled = list(p.into_columns(res, STYLE, iter(cols)))
120 | assert len(filled) == 5
121 | assert linecounts(filled) == [6, 6, 3, 2, 29]
122 |
--------------------------------------------------------------------------------
/tests/test_common.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import pytest
4 |
5 | from pdfje.common import RGB, XY, Sides
6 |
7 | from .common import approx
8 |
9 |
10 | class TestXY:
11 | def test_basics(self):
12 | xy = XY(1, 2)
13 | assert xy.x == 1
14 | assert xy.y == 2
15 | assert xy.astuple() == (1, 2)
16 |
17 | def test_is_sequence(self):
18 | xy = XY(1, 2)
19 | assert xy[0] == 1
20 | assert xy[1] == 2
21 |
22 | with pytest.raises(IndexError):
23 | xy[2]
24 |
25 | assert list(xy) == [1, 2]
26 | assert len(xy) == 2
27 | assert xy.index(1) == 0
28 | assert 2 in xy
29 | assert xy.count(1) == 1
30 | assert list(reversed(xy)) == [2, 1]
31 |
32 | def test_parse(self):
33 | assert XY.parse((1, 3)) == XY(1, 3)
34 | assert XY.parse(XY(1, 3)) == XY(1, 3)
35 |
36 | def test_division(self):
37 | assert XY(1, 2) / 2 == XY(0.5, 1)
38 |
39 | with pytest.raises(TypeError, match="operand"):
40 | XY(1, 2) / "foo" # type: ignore[operator]
41 |
42 | def test_flip(self):
43 | assert XY(1, 2).flip() == XY(2, 1)
44 |
45 | def test_add_coordinates(self):
46 | assert XY(1, 2).add_x(3) == XY(4, 2)
47 | assert XY(1, 2).add_y(3) == XY(1, 5)
48 |
49 | def test_subtract(self):
50 | assert XY(1, 2) - XY(3, 4) == XY(-2, -2)
51 | assert XY(1, 2) - (3, 4) == XY(-2, -2)
52 |
53 | with pytest.raises(TypeError, match="operand"):
54 | XY(1, 2) - "foo" # type: ignore[operator]
55 |
56 | def test_add(self):
57 | assert XY(1, 2) + XY(3, 4) == XY(4, 6)
58 | assert XY(1, 2) + (3, 4) == XY(4, 6)
59 |
60 | with pytest.raises(TypeError, match="operand"):
61 | XY(1, 2) + "foo" # type: ignore[operator]
62 |
63 | def test_multiply(self):
64 | assert XY(1, 2) * 3 == XY(3, 6)
65 |
66 | with pytest.raises(TypeError, match="operand"):
67 | XY(1, 2) * {} # type: ignore[operator]
68 |
69 |
70 | class TestRGB:
71 | def test_basics(self):
72 | rgb = RGB(1, 0.5, 0)
73 | assert rgb.red == 1
74 | assert rgb.green == 0.5
75 | assert rgb.blue == 0
76 | assert rgb.astuple() == (1, 0.5, 0)
77 |
78 | def test_is_sequence(self):
79 | rgb = RGB(1, 0.5, 0)
80 | assert rgb[0] == 1
81 | assert rgb[1] == 0.5
82 | assert rgb[2] == 0
83 |
84 | with pytest.raises(IndexError):
85 | rgb[3]
86 |
87 | assert list(rgb) == [1, 0.5, 0]
88 | assert len(rgb) == 3
89 | assert rgb.index(1) == 0
90 | assert 0.5 in rgb
91 | assert rgb.count(1) == 1
92 | assert list(reversed(rgb)) == [0, 0.5, 1]
93 |
94 | def test_parse(self):
95 | assert RGB.parse((1, 0.5, 0)) == RGB(1, 0.5, 0)
96 | parsed = RGB.parse("#a044e9")
97 | assert parsed.red == approx(160 / 255)
98 | assert parsed.green == approx(68 / 255)
99 | assert parsed.blue == approx(233 / 255)
100 |
101 | with pytest.raises(AssertionError, match="RGB"):
102 | RGB.parse(object()) # type: ignore
103 |
104 |
105 | class TestSides:
106 | def test_basics(self):
107 | sides = Sides(1, 2, 3, 4)
108 | assert sides.top == 1
109 | assert sides.right == 2
110 | assert sides.bottom == 3
111 | assert sides.left == 4
112 | assert sides.astuple() == (1, 2, 3, 4)
113 |
114 | def test_is_sequence(self):
115 | sides = Sides(1, 2, 3, 4)
116 | assert sides[0] == 1
117 | assert sides[1] == 2
118 | assert sides[2] == 3
119 | assert sides[3] == 4
120 |
121 | with pytest.raises(IndexError):
122 | sides[4]
123 |
124 | assert list(sides) == [1, 2, 3, 4]
125 | assert len(sides) == 4
126 | assert sides.index(1) == 0
127 | assert 2 in sides
128 | assert sides.count(1) == 1
129 | assert list(reversed(sides)) == [4, 3, 2, 1]
130 |
131 | def test_parse(self):
132 | assert Sides.parse(1) == Sides(1, 1, 1, 1)
133 | assert Sides.parse((1, 2)) == Sides(1, 2, 1, 2)
134 | assert Sides.parse((1, 2, 3)) == Sides(1, 2, 3, 2)
135 | assert Sides.parse((1, 2, 3, 4)) == Sides(1, 2, 3, 4)
136 | assert Sides.parse(Sides(1, 2, 3, 4)) == Sides(1, 2, 3, 4)
137 |
138 | with pytest.raises(TypeError, match="sides"):
139 | Sides.parse((20, 30, 25, 35, 40)) # type: ignore[arg-type]
140 |
141 | with pytest.raises(TypeError, match="sides"):
142 | Sides.parse("foo") # type: ignore[arg-type]
143 |
--------------------------------------------------------------------------------
/resources/fonts/CrimsonText-License.txt:
--------------------------------------------------------------------------------
1 | Copyright 2010 The Crimson Text Project Authors (https://github.com/googlefonts/Crimson)
2 |
3 | This Font Software is licensed under the SIL Open Font License, Version 1.1.
4 | This license is copied below, and is also available with a FAQ at:
5 | http://scripts.sil.org/OFL
6 |
7 |
8 | -----------------------------------------------------------
9 | SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
10 | -----------------------------------------------------------
11 |
12 | PREAMBLE
13 | The goals of the Open Font License (OFL) are to stimulate worldwide
14 | development of collaborative font projects, to support the font creation
15 | efforts of academic and linguistic communities, and to provide a free and
16 | open framework in which fonts may be shared and improved in partnership
17 | with others.
18 |
19 | The OFL allows the licensed fonts to be used, studied, modified and
20 | redistributed freely as long as they are not sold by themselves. The
21 | fonts, including any derivative works, can be bundled, embedded,
22 | redistributed and/or sold with any software provided that any reserved
23 | names are not used by derivative works. The fonts and derivatives,
24 | however, cannot be released under any other type of license. The
25 | requirement for fonts to remain under this license does not apply
26 | to any document created using the fonts or their derivatives.
27 |
28 | DEFINITIONS
29 | "Font Software" refers to the set of files released by the Copyright
30 | Holder(s) under this license and clearly marked as such. This may
31 | include source files, build scripts and documentation.
32 |
33 | "Reserved Font Name" refers to any names specified as such after the
34 | copyright statement(s).
35 |
36 | "Original Version" refers to the collection of Font Software components as
37 | distributed by the Copyright Holder(s).
38 |
39 | "Modified Version" refers to any derivative made by adding to, deleting,
40 | or substituting -- in part or in whole -- any of the components of the
41 | Original Version, by changing formats or by porting the Font Software to a
42 | new environment.
43 |
44 | "Author" refers to any designer, engineer, programmer, technical
45 | writer or other person who contributed to the Font Software.
46 |
47 | PERMISSION & CONDITIONS
48 | Permission is hereby granted, free of charge, to any person obtaining
49 | a copy of the Font Software, to use, study, copy, merge, embed, modify,
50 | redistribute, and sell modified and unmodified copies of the Font
51 | Software, subject to the following conditions:
52 |
53 | 1) Neither the Font Software nor any of its individual components,
54 | in Original or Modified Versions, may be sold by itself.
55 |
56 | 2) Original or Modified Versions of the Font Software may be bundled,
57 | redistributed and/or sold with any software, provided that each copy
58 | contains the above copyright notice and this license. These can be
59 | included either as stand-alone text files, human-readable headers or
60 | in the appropriate machine-readable metadata fields within text or
61 | binary files as long as those fields can be easily viewed by the user.
62 |
63 | 3) No Modified Version of the Font Software may use the Reserved Font
64 | Name(s) unless explicit written permission is granted by the corresponding
65 | Copyright Holder. This restriction only applies to the primary font name as
66 | presented to the users.
67 |
68 | 4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
69 | Software shall not be used to promote, endorse or advertise any
70 | Modified Version, except to acknowledge the contribution(s) of the
71 | Copyright Holder(s) and the Author(s) or with their explicit written
72 | permission.
73 |
74 | 5) The Font Software, modified or unmodified, in part or in whole,
75 | must be distributed entirely under this license, and must not be
76 | distributed under any other license. The requirement for fonts to
77 | remain under this license does not apply to any document created
78 | using the Font Software.
79 |
80 | TERMINATION
81 | This license becomes null and void if any of the above conditions are
82 | not met.
83 |
84 | DISCLAIMER
85 | THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
86 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
87 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
88 | OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
89 | COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
90 | INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
91 | DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
92 | FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
93 | OTHER DEALINGS IN THE FONT SOFTWARE.
94 |
--------------------------------------------------------------------------------
/src/pdfje/typeset/parse.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import re
4 | from typing import Generator, Iterable, Iterator
5 |
6 | from ..common import Char, Pos, prepend
7 | from ..compat import pairwise
8 | from ..fonts.common import TEXTSPACE_TO_GLYPHSPACE
9 | from .state import Chain, Command, Passage, State
10 | from .words import MixedSlug, Slug, TrailingSpace, Word, WordLike
11 |
12 | # FUTURE: expand to support the full unicode spec,
13 | # see https://unicode.org/reports/tr14/.
14 | _WORD_RE = re.compile(
15 | r"(.*?( +|-|\N{ZERO WIDTH SPACE}|\N{EM DASH}|(?=\N{EM DASH}\w)))"
16 | )
17 |
18 |
19 | def into_words(
20 | it: Iterable[Passage], state: State
21 | ) -> tuple[Command, Iterator[WordLike]]:
22 | it = iter(it)
23 | cmd, txt, state = _fold_commands(it, state)
24 | return cmd, _parse(it, state, txt) if txt else iter(())
25 |
26 |
27 | def _parse(
28 | it: Iterable[Passage], state: State, txt: str | None
29 | ) -> Iterator[WordLike]:
30 | it = iter(it)
31 | prev: Char | None = None
32 | pos = 0
33 |
34 | while txt:
35 | last = yield from _parse_simple_words(txt, pos, state, prev)
36 | if isinstance(last, str):
37 | last, txt, pos = _complete_word(it, last, state, prev)
38 | state = last.state
39 | if txt is None:
40 | yield last
41 | return
42 | elif pos < len(txt):
43 | yield last
44 | continue
45 | try:
46 | cmd, txt = next(it)
47 | except StopIteration:
48 | yield last
49 | return
50 | yield last.with_cmd(cmd)
51 | state = cmd.apply(state)
52 | pos = 0
53 |
54 |
55 | def _parse_simple_words(
56 | txt: str, pos: Pos, state: State, prev: Char | None
57 | ) -> Generator[WordLike, None, str | Word]:
58 | assert pos < len(txt)
59 | ms = _WORD_RE.finditer(txt, pos)
60 | try:
61 | next_match = next(ms)
62 | except StopIteration:
63 | return txt[pos:]
64 |
65 | for match, next_match in pairwise(prepend(next_match, ms)):
66 | word = match.group()
67 | match.groups()
68 | yield Word.new(word, state, prev)
69 | prev = word[-1]
70 |
71 | final_word = Word.new(next_match.group(), state, prev)
72 | pos = next_match.end()
73 | if pos < len(txt):
74 | yield final_word
75 | return txt[pos:]
76 | else:
77 | return final_word
78 |
79 |
80 | def _complete_word(
81 | it: Iterator[Passage], head: str, state: State, prev: Char | None
82 | ) -> tuple[Word, str | None, Pos]:
83 | parts: list[tuple[Command, str]] = []
84 | has_trailing_space = False
85 | st: Passage | None
86 | for st in it:
87 | if match := _WORD_RE.search(st.txt):
88 | word = match.group()
89 | if word.endswith(" "):
90 | has_trailing_space = True
91 | word = word[:-1]
92 | parts.append((st.cmd, word))
93 | pos = match.end()
94 | break
95 | parts.append((st.cmd, st.txt))
96 | else:
97 | pos = 0
98 | st = None
99 | if not parts:
100 | # A common case -- i.e. no space after the last word of a paragraph
101 | return Word.new(head, state, prev), st, pos
102 |
103 | # SIMPLIFICATION: for now, we don't hyphenate words that are split across
104 | # multiple styles. This because it's a rare case, and it's non-trivial
105 | # to implement.
106 |
107 | seg = Slug.new(head, state, prev)
108 | prev = seg.last()
109 | segments: list[tuple[Slug, Command]] = []
110 | cmds = []
111 | for cmd, txt in parts:
112 | new_state = cmd.apply(state)
113 | prev = prev if state.kerns_with(new_state) else None
114 | state = new_state
115 | cmds.append(cmd)
116 | if txt:
117 | segments.append((seg, Chain.squash(cmds)))
118 | cmds.clear()
119 | seg = Slug.new(txt, state, prev)
120 | prev = txt[-1]
121 |
122 | segments.append((seg, Chain.squash(cmds)))
123 |
124 | trailing_space = None
125 | if has_trailing_space:
126 | trailing_space = TrailingSpace(
127 | state.font.spacewidth / TEXTSPACE_TO_GLYPHSPACE * state.size,
128 | state.font.charkern(prev, " ") if prev else 0,
129 | state.size,
130 | )
131 |
132 | return (
133 | Word((MixedSlug(tuple(segments), state),), trailing_space, state),
134 | st.txt if st else None,
135 | pos,
136 | )
137 |
138 |
139 | def _fold_commands(
140 | it: Iterator[Passage], state: State
141 | ) -> tuple[Command, str | None, State]:
142 | buffer: list[Command] = []
143 | for s in it:
144 | buffer.append(s.cmd)
145 | state = s.cmd.apply(state)
146 | if s.txt:
147 | return Chain.squash(buffer), s.txt, state
148 | return Chain.squash(buffer), None, state
149 |
--------------------------------------------------------------------------------
/examples/multicolumn.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from pdfje import AutoPage, Column, Document, Page
4 | from pdfje.fonts import times_roman
5 | from pdfje.layout import Paragraph
6 | from pdfje.style import Style, italic
7 | from pdfje.units import A3, A4, A6, inch, mm
8 |
9 |
10 | def main() -> None:
11 | "Generate a PDF with differently styled text layed out in various columns"
12 | Document(
13 | [
14 | AutoPage(
15 | # Repeat the same text in different styles
16 | [Paragraph(LOREM_IPSUM, s) for s in STYLES * 3],
17 | # Cycle through the three page templates
18 | template=lambda i: TEMPLATES[i % 3],
19 | )
20 | ]
21 | ).write("multicolumn.pdf")
22 |
23 |
24 | STYLES = [Style(size=10), "#225588" | italic, Style(size=15, font=times_roman)]
25 | TEMPLATES = [
26 | # A one-column page
27 | Page(size=A6, margin=mm(15)),
28 | # A two-column page
29 | Page(
30 | columns=[
31 | Column(
32 | (inch(1), inch(1)),
33 | width=(A4.x / 2) - inch(1.25),
34 | height=A4.y - inch(2),
35 | ),
36 | Column(
37 | (A4.x / 2 + inch(0.25), inch(1)),
38 | width=(A4.x / 2) - inch(1.25),
39 | height=A4.y - inch(2),
40 | ),
41 | ]
42 | ),
43 | # A page with three arbitrary columns
44 | Page(
45 | size=A3.flip(),
46 | columns=[
47 | Column(
48 | (inch(1), inch(1)),
49 | width=(A3.y / 4),
50 | height=A3.x - inch(2),
51 | ),
52 | Column(
53 | (A3.y / 4 + inch(1.5), inch(5)),
54 | width=(A3.y / 2) - inch(1.25),
55 | height=A3.x - inch(8),
56 | ),
57 | Column(
58 | ((A3.y * 0.8) + inch(0.25), inch(4)),
59 | width=(A3.y / 10),
60 | height=inch(5),
61 | ),
62 | ],
63 | ),
64 | ]
65 |
66 |
67 | LOREM_IPSUM = """\
68 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. \
69 | Integer sed aliquet justo. Donec eu ultricies velit, porta pharetra massa. \
70 | Ut non augue a urna iaculis vulputate ut sit amet sem. \
71 | Nullam lectus felis, rhoncus sed convallis a, egestas semper risus. \
72 | Fusce gravida metus non vulputate vestibulum. \
73 | Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere \
74 | cubilia curae; Donec placerat suscipit velit. \
75 | Mauris tincidunt lorem a eros eleifend tincidunt. \
76 | Maecenas faucibus imperdiet massa quis pretium. Integer in lobortis nisi. \
77 | Mauris at odio nec sem volutpat aliquam. Aliquam erat volutpat. \
78 |
79 | Fusce at vehicula justo. Vestibulum eget viverra velit. \
80 | Vivamus et nisi pulvinar, elementum lorem nec, volutpat leo. \
81 | Aliquam erat volutpat. Sed tristique quis arcu vitae vehicula. \
82 | Morbi egestas vel diam eget dapibus. Donec sit amet lorem turpis. \
83 | Maecenas ultrices nunc vitae enim scelerisque tempus. \
84 | Maecenas aliquet dui non hendrerit viverra. \
85 | Aliquam fringilla, est sit amet gravida convallis, elit ipsum efficitur orci, \
86 | eget convallis neque nunc nec lorem. Nam nisl sem, \
87 | tristique a ultrices sed, finibus id enim.
88 |
89 | Etiam vel dolor ultricies, gravida felis in, vestibulum magna. \
90 | In diam ex, elementum ut massa a, facilisis sollicitudin lacus. \
91 | Integer lacus ante, ullamcorper ac mauris eget, rutrum facilisis velit. \
92 | Mauris eu enim efficitur, malesuada ipsum nec, sodales enim. \
93 | Nam ac tortor velit. Suspendisse ut leo a felis aliquam dapibus ut a justo. \
94 | Vestibulum sed commodo tortor. Sed vitae enim ipsum. \
95 | Duis pellentesque dui et ipsum suscipit, in semper odio dictum. \
96 |
97 | Sed in fermentum leo. Donec maximus suscipit metus. \
98 | Nulla convallis tortor mollis urna maximus mattis. \
99 | Sed aliquet leo ac sem aliquam, et ultricies mauris maximus. \
100 | Cras orci ex, fermentum nec purus non, molestie venenatis odio. \
101 | Etiam vitae sollicitudin nisl. Sed a ullamcorper velit. \
102 |
103 | Aliquam congue aliquet eros scelerisque hendrerit. Vestibulum quis ante ex. \
104 | Fusce venenatis mauris dolor, nec mattis libero pharetra feugiat. \
105 | Pellentesque habitant morbi tristique senectus et netus et malesuada \
106 | fames ac turpis egestas. Cras vitae nisl molestie augue finibus lobortis. \
107 | In hac habitasse platea dictumst. Maecenas rutrum interdum urna, \
108 | ut finibus tortor facilisis ac. Donec in fringilla mi. \
109 | Sed molestie accumsan nisi at mattis. \
110 | Integer eget orci nec urna finibus porta. \
111 | Sed eu dui vel lacus pulvinar blandit sed a urna. \
112 | Quisque lacus arcu, mattis vel rhoncus hendrerit, dapibus sed massa. \
113 | Vivamus sed massa est. In hac habitasse platea dictumst. \
114 | Nullam volutpat sapien quis tincidunt sagittis. \
115 | """
116 |
117 | if __name__ == "__main__":
118 | main()
119 |
--------------------------------------------------------------------------------
/resources/optimal_vs_firstfit.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from dataclasses import replace
4 | from pathlib import Path
5 |
6 | from pdfje import XY, AutoPage, Column, Document, Page
7 | from pdfje.draw import Text
8 | from pdfje.fonts import TrueType
9 | from pdfje.layout import Paragraph
10 | from pdfje.layout.paragraph import LinebreakParams
11 | from pdfje.style import Span, Style, italic
12 | from pdfje.units import inch, mm
13 |
14 |
15 | def main() -> None:
16 | Document(
17 | [
18 | AutoPage(
19 | [*content, *(replace(p, optimal=False) for p in content)],
20 | template=TEMPLATE,
21 | )
22 | ],
23 | style=CRIMSON,
24 | ).write("optimal-vs-firstfit.pdf")
25 |
26 |
27 | PAGESIZE = XY(inch(10), inch(8))
28 | MARGIN = mm(16)
29 | TEMPLATE = Page(
30 | [
31 | # The title in small text at the top of the page
32 | Text(
33 | (PAGESIZE.x / 4, PAGESIZE.y - mm(5)),
34 | "Optimal",
35 | Style(size=12, bold=True),
36 | align="center",
37 | ),
38 | Text(
39 | (PAGESIZE.x * 0.75, PAGESIZE.y - mm(5)),
40 | "Fast",
41 | Style(size=12, bold=True),
42 | align="center",
43 | ),
44 | ],
45 | size=PAGESIZE,
46 | columns=[
47 | Column(
48 | (MARGIN, MARGIN),
49 | PAGESIZE.x / 2 - MARGIN * 2,
50 | PAGESIZE.y - MARGIN * 2,
51 | ),
52 | Column(
53 | (PAGESIZE.x / 2 + MARGIN, MARGIN),
54 | PAGESIZE.x / 2 - MARGIN * 2,
55 | PAGESIZE.y - MARGIN * 2,
56 | ),
57 | ],
58 | )
59 |
60 | CRIMSON = TrueType(
61 | Path(__file__).parent / "../resources/fonts/CrimsonText-Regular.ttf",
62 | Path(__file__).parent / "../resources/fonts/CrimsonText-Bold.ttf",
63 | Path(__file__).parent / "../resources/fonts/CrimsonText-Italic.ttf",
64 | Path(__file__).parent / "../resources/fonts/CrimsonText-BoldItalic.ttf",
65 | )
66 |
67 |
68 | def flatten_newlines(txt: str) -> str:
69 | return "\n".join(s.replace("\n", " ") for s in txt.split("\n\n"))
70 |
71 |
72 | # Extract from https://www.gutenberg.org/ebooks/1661
73 | content = [
74 | Paragraph(
75 | [
76 | flatten_newlines(
77 | """\
78 | “To the man who loves art for its own sake,” remarked Sherlock
79 | Holmes, tossing aside the advertisement sheet of"""
80 | ),
81 | Span(" The Daily Telegraph", italic),
82 | flatten_newlines(
83 | """, “it is
84 | frequently in its least important and lowliest manifestations that the
85 | keenest pleasure is to be derived. It is pleasant to me to observe,
86 | Watson, that you have so far grasped this truth that in these little
87 | records of our cases which you have been good enough to draw up, and, I
88 | am bound to say, occasionally to embellish, you have given prominence
89 | not so much to the many """
90 | ),
91 | Span("causes célèbres", italic),
92 | flatten_newlines(
93 | """ and sensational trials in
94 | which I have figured but rather to those incidents which may have been
95 | trivial in themselves, but which have given room for those faculties of
96 | deduction and of logical synthesis which I have made my special
97 | province.”"""
98 | ),
99 | ],
100 | align="justify",
101 | indent=0,
102 | optimal=LinebreakParams(
103 | tolerance=1,
104 | hyphen_penalty=0,
105 | ),
106 | avoid_orphans=False,
107 | ),
108 | Paragraph(
109 | [
110 | flatten_newlines(
111 | """\
112 | “And yet,” said I, smiling, “I cannot quite hold myself absolved from
113 | the charge of sensationalism which has been urged against my records.”
114 |
115 | “You have erred, perhaps,” he observed, taking up a glowing cinder with
116 | the tongs and lighting with it the long cherry-wood pipe which was wont
117 | to replace his clay when he was in a disputatious rather than a
118 | meditative mood—“you have erred perhaps in attempting to put colour and
119 | life into each of your statements instead of confining yourself to the
120 | task of placing upon record that severe reasoning from cause to effect
121 | which is really the only notable feature about the thing.”
122 |
123 | “It seems to me that I have done you full justice in the matter,” I
124 | remarked with some coldness, for I was repelled by the egotism which I
125 | had more than once observed to be a strong factor in my friend’s
126 | singular character.
127 |
128 |
129 |
130 |
131 | """
132 | ),
133 | ],
134 | align="justify",
135 | indent=18,
136 | optimal=LinebreakParams(
137 | tolerance=3,
138 | hyphen_penalty=1000,
139 | ),
140 | avoid_orphans=False,
141 | ),
142 | ]
143 |
144 |
145 | if __name__ == "__main__":
146 | main()
147 |
--------------------------------------------------------------------------------
/src/pdfje/typeset/state.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import abc
4 | import re
5 | from dataclasses import dataclass, field, replace
6 | from typing import Collection, Iterable, Iterator, NamedTuple
7 |
8 | from ..common import (
9 | RGB,
10 | NonEmptyIterator,
11 | Pos,
12 | Pt,
13 | Streamable,
14 | add_slots,
15 | flatten,
16 | prepend,
17 | setattr_frozen,
18 | )
19 | from ..fonts.common import Font
20 | from .hyphens import Hyphenator
21 |
22 | _next_newline = re.compile(r"(?:\r\n|\n)").search
23 |
24 |
25 | class Command(Streamable):
26 | __slots__ = ()
27 |
28 | @abc.abstractmethod
29 | def apply(self, s: State, /) -> State: ...
30 |
31 |
32 | @add_slots
33 | @dataclass(frozen=True)
34 | class _NoOp(Command):
35 | def apply(self, s: State) -> State:
36 | return s
37 |
38 | def __iter__(self) -> Iterator[bytes]:
39 | return iter(())
40 |
41 |
42 | NO_OP = _NoOp()
43 |
44 |
45 | @add_slots
46 | @dataclass(frozen=True)
47 | class Chain(Command):
48 | items: Collection[Command]
49 |
50 | def apply(self, s: State) -> State:
51 | for c in self.items:
52 | s = c.apply(s)
53 | return s
54 |
55 | def __iter__(self) -> Iterator[bytes]:
56 | return flatten(self.items)
57 |
58 | @staticmethod
59 | def squash(it: Iterable[Command]) -> Command:
60 | by_type = {type(i): i for i in it}
61 | if len(by_type) == 1:
62 | return by_type.popitem()[1]
63 | elif len(by_type) == 0:
64 | return NO_OP
65 | else:
66 | return Chain(by_type.values())
67 |
68 |
69 | @add_slots
70 | @dataclass(frozen=True)
71 | class SetFont(Command):
72 | font: Font
73 | size: Pt
74 |
75 | def apply(self, s: State) -> State:
76 | return replace(s, font=self.font, size=self.size)
77 |
78 | def __iter__(self) -> Iterator[bytes]:
79 | yield b"/%b %g Tf\n" % (self.font.id, self.size)
80 |
81 |
82 | @add_slots
83 | @dataclass(frozen=True)
84 | class SetLineSpacing(Command):
85 | value: float
86 |
87 | def apply(self, s: State) -> State:
88 | return replace(s, line_spacing=self.value)
89 |
90 | def __iter__(self) -> Iterator[bytes]:
91 | # We don't actually emit anything here,
92 | # because its value is already used to calculate the leading space
93 | # on a per-line basis.
94 | return iter(())
95 |
96 |
97 | @add_slots
98 | @dataclass(frozen=True)
99 | class SetColor(Command):
100 | value: RGB
101 |
102 | def apply(self, s: State) -> State:
103 | return replace(s, color=self.value)
104 |
105 | def __iter__(self) -> Iterator[bytes]:
106 | yield b"%g %g %g rg\n" % self.value.astuple()
107 |
108 |
109 | @add_slots
110 | @dataclass(frozen=True)
111 | class SetHyphens(Command):
112 | value: Hyphenator
113 |
114 | def apply(self, s: State) -> State:
115 | return replace(s, hyphens=self.value)
116 |
117 | def __iter__(self) -> Iterator[bytes]:
118 | # hyphenation behavior is not written to the PDF stream itself,
119 | # but rather used in the text layout algorithm.
120 | return iter(())
121 |
122 |
123 | @add_slots
124 | @dataclass(frozen=True)
125 | class State(Streamable):
126 | """Text state, see PDF 32000-1:2008, table 105"""
127 |
128 | font: Font
129 | size: Pt
130 | color: RGB
131 | line_spacing: float
132 | hyphens: Hyphenator
133 |
134 | lead: Pt = field(init=False, compare=False) # cached calculation
135 |
136 | def __iter__(self) -> Iterator[bytes]:
137 | yield from SetFont(self.font, self.size)
138 | yield from SetColor(self.color)
139 |
140 | def __post_init__(self) -> None:
141 | setattr_frozen(self, "lead", self.size * self.line_spacing)
142 |
143 | def kerns_with(self, other: State, /) -> bool:
144 | return self.font == other.font and self.size == other.size
145 |
146 |
147 | # NOTE: the result must be consumed in order, similar to itertools.groupby
148 | def splitlines(it: Iterable[Passage]) -> Iterator[NonEmptyIterator[Passage]]:
149 | it = iter(it)
150 | try:
151 | transition: list[tuple[Passage, Pos]] = [(next(it), 0)]
152 | except StopIteration:
153 | return
154 |
155 | def _group() -> NonEmptyIterator[Passage]:
156 | psg, pos = transition.pop()
157 | for psg in prepend(psg, it):
158 | if (newline := _next_newline(psg.txt, pos)) is None:
159 | yield Passage(NO_OP, psg.txt[pos:]) if pos else psg
160 | pos = 0
161 | else:
162 | yield Passage(
163 | NO_OP if pos else psg.cmd,
164 | psg.txt[pos : newline.start()], # noqa
165 | )
166 | transition.append((psg, newline.end()))
167 | return
168 |
169 | while transition:
170 | yield _group()
171 |
172 |
173 | class Passage(NamedTuple):
174 | cmd: Command
175 | txt: str
176 |
177 |
178 | def max_lead(s: Iterable[Passage], state: State) -> Pt:
179 | # FUTURE: we apply commands elsewhere, so doing it also here
180 | # is perhaps a bit wasteful
181 | lead = 0.0
182 | for cmd, txt in s:
183 | state = cmd.apply(state)
184 | # Only count leading if there is actually text with this value
185 | if txt:
186 | lead = max(lead, state.lead)
187 | # If there's no text to go on, use the state's default
188 | return lead or state.lead
189 |
--------------------------------------------------------------------------------
/src/pdfje/page.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import abc
4 | from dataclasses import dataclass
5 | from itertools import chain
6 | from operator import methodcaller
7 | from typing import Iterable, Iterator, Literal, Sequence, final
8 |
9 | from . import atoms
10 | from .atoms import OBJ_ID_PAGETREE, OBJ_ID_RESOURCES
11 | from .common import (
12 | XY,
13 | Sides,
14 | SidesLike,
15 | Streamable,
16 | add_slots,
17 | flatten,
18 | setattr_frozen,
19 | )
20 | from .resources import Resources
21 | from .style import StyleFull
22 | from .units import A4, Pt, inch
23 |
24 | Rotation = Literal[0, 90, 180, 270]
25 |
26 |
27 | class Drawing(abc.ABC):
28 | """Base class for all drawing operations wich can be put on
29 | a :class:`~pdfje.Page`."""
30 |
31 | __slots__ = ()
32 |
33 | @abc.abstractmethod
34 | def render(self, r: Resources, s: StyleFull, /) -> Streamable: ...
35 |
36 |
37 | @final
38 | @add_slots
39 | @dataclass(frozen=True, init=False)
40 | class Column:
41 | """A column to lay out block elements in.
42 |
43 | Parameters
44 | ----------
45 | origin
46 | The bottom left corner of the column. Can be parsed from a 2-tuple.
47 | width
48 | The width of the column. Must be larger than 0.
49 | height
50 | The height of the column.
51 |
52 | """
53 |
54 | origin: XY
55 | width: Pt
56 | height: Pt
57 |
58 | def __init__(
59 | self, origin: XY | tuple[Pt, Pt], width: Pt, height: Pt
60 | ) -> None:
61 | setattr_frozen(self, "origin", XY.parse(origin))
62 | setattr_frozen(self, "width", width)
63 | setattr_frozen(self, "height", height)
64 | assert self.width > 0
65 |
66 |
67 | @add_slots
68 | @dataclass(frozen=True)
69 | class RenderedPage:
70 | rotate: Rotation
71 | size: XY
72 | stream: Streamable
73 |
74 | def to_atoms(self, i: atoms.ObjectID) -> Iterable[atoms.Object]:
75 | yield i, atoms.Dictionary(
76 | (b"Type", atoms.Name(b"Page")),
77 | (b"Parent", atoms.Ref(OBJ_ID_PAGETREE)),
78 | (b"MediaBox", atoms.Array(map(atoms.Real, [0, 0, *self.size]))),
79 | (b"Contents", atoms.Ref(i + 1)),
80 | (b"Resources", atoms.Ref(OBJ_ID_RESOURCES)),
81 | (b"Rotate", atoms.Int(self.rotate)),
82 | )
83 | yield i + 1, atoms.Stream(self.stream)
84 |
85 |
86 | @final
87 | @add_slots
88 | @dataclass(frozen=True, init=False)
89 | class Page:
90 | """A single page within a document. Contains drawings at given positions.
91 |
92 | Example
93 | -------
94 |
95 | .. code-block:: python
96 |
97 | from pdfje import Page, Line, Rect, Text, A5
98 | title_page = Page([
99 | Text((100, 200), "My awesome story"),
100 | Line((100, 100), (200, 100)),
101 | Rect((50, 50), width=200, height=300),
102 | ], size=A5)
103 |
104 | Parameters
105 | ----------
106 | content
107 | The drawings to render on the page.
108 | size
109 | The size of the page in points. Common page sizes are available
110 | as constants:
111 |
112 | .. code-block:: python
113 |
114 | from pdfje.units import Page, A4, A5, A6, letter, legal, tabloid
115 |
116 | rotate
117 | The rotation of the page in degrees.
118 | margin
119 | The margin around the page in points, used for layout.
120 | Can be a single value, or a 2, 3 or 4-tuple following the CSS
121 | shorthand convention. see https://www.w3schools.com/css/css_margin.asp
122 | columns
123 | The columns to use for laying out the content.
124 | If not given, the content is laid out in a single column
125 | based on the page size and margin.
126 |
127 | """
128 |
129 | content: Iterable[Drawing]
130 | size: XY
131 | rotate: Rotation
132 | columns: Sequence[Column]
133 |
134 | def __init__(
135 | self,
136 | content: Iterable[Drawing] = (),
137 | size: XY | tuple[Pt, Pt] = A4,
138 | rotate: Rotation = 0,
139 | margin: SidesLike = Sides.parse(inch(1)),
140 | columns: Sequence[Column] = (),
141 | ) -> None:
142 | size = XY.parse(size)
143 | setattr_frozen(self, "content", content)
144 | setattr_frozen(self, "rotate", rotate)
145 | setattr_frozen(self, "columns", columns or [_column(size, margin)])
146 | setattr_frozen(self, "size", size)
147 |
148 | def add(self, d: Drawing, /) -> Page:
149 | """Create a new page with the given drawing added
150 |
151 | Parameters
152 | ----------
153 | d
154 | The drawing to add to the page
155 | """
156 | return Page(
157 | (*self.content, d), self.size, self.rotate, columns=self.columns
158 | )
159 |
160 | def render(
161 | self, r: Resources, s: StyleFull, pnum: int, /
162 | ) -> Iterator[RenderedPage]:
163 | yield RenderedPage(
164 | self.rotate,
165 | self.size,
166 | flatten(map(methodcaller("render", r, s), self.content)),
167 | )
168 |
169 | def fill(
170 | self, r: Resources, s: StyleFull, extra: Iterable[bytes]
171 | ) -> RenderedPage:
172 | return RenderedPage(
173 | self.rotate,
174 | self.size,
175 | chain(
176 | flatten(map(methodcaller("render", r, s), self.content)),
177 | extra,
178 | ),
179 | )
180 |
181 |
182 | def _column(page: XY, margin: SidesLike) -> Column:
183 | top, right, bottom, left = Sides.parse(margin)
184 | return Column(
185 | XY(left, bottom), page.x - left - right, page.y - top - bottom
186 | )
187 |
--------------------------------------------------------------------------------
/src/pdfje/document.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import os
4 | from dataclasses import dataclass
5 | from itertools import count, islice
6 | from pathlib import Path
7 | from typing import IO, Iterable, Iterator, final, overload
8 |
9 | from . import atoms
10 | from .atoms import OBJ_ID_PAGETREE, OBJ_ID_RESOURCES
11 | from .common import add_slots, flatten, setattr_frozen
12 | from .layout import Block, Paragraph
13 | from .layout.pages import AutoPage
14 | from .page import Page
15 | from .resources import Resources
16 | from .style import Style, StyleFull, StyleLike
17 |
18 | _OBJ_ID_FIRST_PAGE: atoms.ObjectID = OBJ_ID_RESOURCES + 1
19 | _OBJS_PER_PAGE = 2
20 |
21 |
22 | @final
23 | @add_slots
24 | @dataclass(frozen=True, init=False)
25 | class Document:
26 | """a PDF Document
27 |
28 | Parameters
29 | ----------
30 |
31 | content
32 | The content of the document.
33 | style
34 | Change the default style of the document.
35 |
36 | Examples
37 | --------
38 |
39 | Below are some examples of creating documents.
40 |
41 | >>> Document() # the minimal PDF -- one empty page
42 | >>> Document("hello world") # a document with pages of text
43 | >>> Document([ # document with explicit pages
44 | ... Page(...),
45 | ... AutoPage([LOREM_IPSUM, ZEN_OF_PYTHON]),
46 | ... Page(),
47 | ... ])
48 |
49 |
50 | note
51 | ----
52 | A document must contain at least one page to be valid
53 | """
54 |
55 | pages: Iterable[Page | AutoPage]
56 | style: Style
57 |
58 | def __init__(
59 | self,
60 | content: Iterable[Page | AutoPage] | str | Block | None = None,
61 | style: StyleLike = Style.EMPTY,
62 | ) -> None:
63 | if content is None:
64 | content = [Page()]
65 | elif isinstance(content, str):
66 | content = [AutoPage([Paragraph(content)])]
67 | elif isinstance(content, Block):
68 | content = [AutoPage([content])]
69 |
70 | setattr_frozen(self, "pages", content)
71 | setattr_frozen(self, "style", Style.parse(style))
72 |
73 | @overload
74 | def write(self) -> Iterator[bytes]: ...
75 |
76 | @overload
77 | def write(self, target: os.PathLike[str] | str | IO[bytes]) -> None: ...
78 |
79 | def write( # type: ignore[return]
80 | self, target: os.PathLike[str] | str | IO[bytes] | None = None
81 | ) -> Iterator[bytes] | None:
82 | """Write the document to a given target. If no target is given,
83 | outputs the binary PDF content iteratively. See examples below.
84 |
85 | Parameters
86 | ----------
87 | target: ~os.PathLike | str | ~typing.IO[bytes] | None
88 | The target to write to. If not given, the PDF content is returned
89 | as an iterator.
90 |
91 | Returns
92 | -------
93 | ~typing.Iterator[bytes] | None
94 |
95 | Examples
96 | --------
97 |
98 | String, :class:`~pathlib.Path`, or :class:`~os.PathLike` target:
99 |
100 | >>> doc.write("myfolder/foo.pdf")
101 | >>> doc.write(Path.home() / "documents/foo.pdf")
102 |
103 | Files and file-like objects:
104 |
105 | >>> with open("my/file.pdf", 'wb') as f:
106 | ... doc.write(f)
107 | >>> doc.write(b:= BytesIO())
108 |
109 | Iterator output is useful for streaming PDF contents. Below is
110 | an example of an HTTP request using the ``httpx`` library.
111 |
112 | >>> httpx.post("https://mysite.foo/upload", content=doc.write(),
113 | ... headers={"Content-Type": "application/pdf"})
114 | """
115 | if target is None:
116 | return self._write_iter()
117 | elif isinstance(target, (str, os.PathLike)):
118 | self._write_to_path(Path(os.fspath(target)))
119 | else: # i.e. IO[bytes]
120 | target.writelines(self._write_iter())
121 |
122 | def _write_iter(self) -> Iterator[bytes]:
123 | return atoms.write(_doc_objects(self.pages, self.style.setdefault()))
124 |
125 | def _write_to_path(self, p: Path) -> None:
126 | with p.open("wb") as wfile:
127 | wfile.writelines(self.write())
128 |
129 |
130 | def _doc_objects(
131 | items: Iterable[Page | AutoPage], style: StyleFull
132 | ) -> Iterator[atoms.Object]:
133 | res = Resources()
134 | obj_id = pagenum = 0
135 | # FUTURE: the scoping of `pagenum` is a bit tricky here. Find a better
136 | # way to do this -- or add a specific test.
137 | for pagenum, obj_id, page in zip(
138 | count(1),
139 | count(_OBJ_ID_FIRST_PAGE, step=_OBJS_PER_PAGE),
140 | flatten(p.render(res, style, pagenum + 1) for p in items),
141 | ):
142 | yield from page.to_atoms(obj_id)
143 |
144 | if not pagenum:
145 | raise RuntimeError(
146 | "Cannot write PDF document without at least one page"
147 | )
148 | first_font_id = obj_id + _OBJS_PER_PAGE
149 |
150 | yield from res.to_objects(first_font_id)
151 | yield from _write_headers(
152 | (obj_id - _OBJ_ID_FIRST_PAGE) // _OBJS_PER_PAGE + 1,
153 | res.to_atoms(first_font_id),
154 | )
155 |
156 |
157 | _CATALOG_OBJ = (
158 | atoms.OBJ_ID_CATALOG,
159 | atoms.Dictionary(
160 | (b"Type", atoms.Name(b"Catalog")),
161 | (b"Pages", atoms.Ref(OBJ_ID_PAGETREE)),
162 | ),
163 | )
164 |
165 |
166 | def _write_headers(
167 | num_pages: int, resources: atoms.Dictionary
168 | ) -> Iterable[atoms.Object]:
169 | yield _CATALOG_OBJ
170 | yield (
171 | OBJ_ID_PAGETREE,
172 | atoms.Dictionary(
173 | (b"Type", atoms.Name(b"Pages")),
174 | (
175 | b"Kids",
176 | atoms.Array(
177 | map(
178 | atoms.Ref,
179 | islice(
180 | count(_OBJ_ID_FIRST_PAGE, step=_OBJS_PER_PAGE),
181 | num_pages,
182 | ),
183 | )
184 | ),
185 | ),
186 | (b"Count", atoms.Int(num_pages)),
187 | ),
188 | )
189 | yield OBJ_ID_RESOURCES, resources
190 |
--------------------------------------------------------------------------------
/src/pdfje/fonts/common.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import abc
4 | from dataclasses import dataclass, field
5 | from itertools import chain, count
6 | from pathlib import Path
7 | from typing import TYPE_CHECKING, Iterable, Tuple, Union, final
8 |
9 | from .. import atoms
10 | from ..atoms import ASCII
11 | from ..common import (
12 | Char,
13 | Func,
14 | Pos,
15 | Pt,
16 | add_slots,
17 | fix_abstract_properties,
18 | setattr_frozen,
19 | )
20 | from ..compat import pairwise
21 |
22 | FontID = bytes # unique, internal identifier assigned to a font within a PDF
23 | GlyphPt = float # length unit in glyph space
24 | TEXTSPACE_TO_GLYPHSPACE = 1000 # See PDF32000-1:2008 (9.7.3)
25 |
26 |
27 | @fix_abstract_properties
28 | class Font(abc.ABC):
29 | """A specific font within a typeface"""
30 |
31 | __slots__ = ()
32 |
33 | @property
34 | @abc.abstractmethod
35 | def id(self) -> FontID: ...
36 |
37 | # It's worth caching this value, as it is used often
38 | @property
39 | @abc.abstractmethod
40 | def spacewidth(self) -> GlyphPt: ...
41 |
42 | @property
43 | @abc.abstractmethod
44 | def encoding_width(self) -> int:
45 | """The number of bytes assigned to each character when encoding"""
46 |
47 | @abc.abstractmethod
48 | def encode(self, s: str, /) -> bytes: ...
49 |
50 | @abc.abstractmethod
51 | def width(self, s: str, /) -> Pt:
52 | """The total width of the given string (excluding kerning)"""
53 |
54 | @staticmethod
55 | @abc.abstractmethod
56 | def charwidth(c: Char, /) -> GlyphPt: ...
57 |
58 | @abc.abstractmethod
59 | def kern(self, s: str, /, prev: Char | None) -> Iterable[Kern]: ...
60 |
61 | @abc.abstractmethod
62 | def charkern(self, a: Char, b: Char, /) -> GlyphPt: ...
63 |
64 |
65 | @final
66 | @add_slots
67 | @dataclass(frozen=True, init=False)
68 | class TrueType:
69 | """A TrueType font to be embedded in a PDF
70 |
71 | Parameters
72 | ----------
73 | regular
74 | The regular (i.e. non-bold, non-italic) .ttf file
75 | bold
76 | The bold .ttf file
77 | italic
78 | The italic .ttf file
79 | bold_italic
80 | The bold italic .ttf file
81 |
82 | """
83 |
84 | regular: Path
85 | bold: Path
86 | italic: Path
87 | bold_italic: Path
88 |
89 | def __init__(
90 | self,
91 | regular: Path | str,
92 | bold: Path | str,
93 | italic: Path | str,
94 | bold_italic: Path | str,
95 | ) -> None:
96 | setattr_frozen(self, "regular", Path(regular))
97 | setattr_frozen(self, "bold", Path(bold))
98 | setattr_frozen(self, "italic", Path(italic))
99 | setattr_frozen(self, "bold_italic", Path(bold_italic))
100 |
101 | # This method cannot be defined in the class body, as it would cause a
102 | # circular import. The implementation is patched into the class
103 | # in the `style` module.
104 | if TYPE_CHECKING: # pragma: no cover
105 | from ..common import HexColor
106 | from ..style import Style, StyleLike
107 |
108 | def __or__(self, _: StyleLike, /) -> Style: ...
109 |
110 | def __ror__(self, _: HexColor, /) -> Style: ...
111 |
112 | def font(self, bold: bool, italic: bool) -> Path:
113 | if bold:
114 | return self.bold_italic if italic else self.bold
115 | else:
116 | return self.italic if italic else self.regular
117 |
118 |
119 | @final
120 | @add_slots
121 | @dataclass(frozen=True, repr=False)
122 | class BuiltinTypeface:
123 | """A typeface that is built into the PDF renderer."""
124 |
125 | regular: BuiltinFont
126 | bold: BuiltinFont
127 | italic: BuiltinFont
128 | bold_italic: BuiltinFont
129 |
130 | # This method cannot be defined in the class body, as it would cause a
131 | # circular import. The implementation is patched into the class
132 | # in the `style` module.
133 | if TYPE_CHECKING: # pragma: no cover
134 | from ..common import HexColor
135 | from ..style import Style, StyleLike
136 |
137 | def __or__(self, _: StyleLike, /) -> Style: ...
138 |
139 | def __ror__(self, _: HexColor, /) -> Style: ...
140 |
141 | def __repr__(self) -> str:
142 | return f"{self.__class__.__name__}({self.regular.name.decode()})"
143 |
144 | def font(self, bold: bool, italic: bool) -> BuiltinFont:
145 | if bold:
146 | return self.bold_italic if italic else self.bold
147 | else:
148 | return self.italic if italic else self.regular
149 |
150 |
151 | Typeface = Union[BuiltinTypeface, TrueType]
152 |
153 |
154 | @final
155 | @add_slots
156 | @dataclass(frozen=True, eq=False)
157 | class BuiltinFont(Font):
158 | name: ASCII
159 | id: FontID
160 | charwidth: Func[Char, GlyphPt] = field(repr=False)
161 | kerning: KerningTable | None = field(repr=False)
162 | spacewidth: Pt = field(init=False, repr=False)
163 |
164 | encoding_width = 1
165 |
166 | def __post_init__(self) -> None:
167 | setattr_frozen(self, "spacewidth", self.charwidth(" "))
168 |
169 | def width(self, s: str) -> Pt:
170 | return sum(map(self.charwidth, s)) / TEXTSPACE_TO_GLYPHSPACE
171 |
172 | @staticmethod
173 | def encode(s: str) -> bytes:
174 | # FUTURE: normalize unicode to allow better unicode representation
175 | return s.encode("cp1252", errors="replace")
176 |
177 | def kern(self, s: str, /, prev: Char | None) -> Iterable[Kern]:
178 | return kern(self.kerning, s, prev) if self.kerning else ()
179 |
180 | def charkern(self, a: Char, b: Char) -> GlyphPt:
181 | return self.kerning((a, b)) if self.kerning else 0
182 |
183 | def to_resource(self) -> atoms.Dictionary:
184 | return atoms.Dictionary(
185 | (b"Type", atoms.Name(b"Font")),
186 | (b"Subtype", atoms.Name(b"Type1")),
187 | (b"BaseFont", atoms.Name(self.name)),
188 | (b"Encoding", atoms.Name(b"WinAnsiEncoding")),
189 | )
190 |
191 |
192 | KerningTable = Func[Tuple[Char, Char], GlyphPt]
193 | Kern = Tuple[Pos, GlyphPt]
194 |
195 |
196 | def kern(
197 | table: KerningTable,
198 | s: str,
199 | prev: Char | None,
200 | ) -> Iterable[Kern]:
201 | for i, pair in zip(
202 | count(not prev),
203 | pairwise(chain(prev, s) if prev else s),
204 | ):
205 | if space := table(pair):
206 | yield (i, space)
207 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | 🌷 pdfje
2 | ========
3 |
4 | .. image:: https://img.shields.io/pypi/v/pdfje.svg?style=flat-square&color=blue
5 | :target: https://pypi.python.org/pypi/pdfje
6 |
7 | .. image:: https://img.shields.io/pypi/pyversions/pdfje.svg?style=flat-square
8 | :target: https://pypi.python.org/pypi/pdfje
9 |
10 | .. image:: https://img.shields.io/pypi/l/pdfje.svg?style=flat-square&color=blue
11 | :target: https://pypi.python.org/pypi/pdfje
12 |
13 | .. image:: https://img.shields.io/badge/mypy-strict-forestgreen?style=flat-square
14 | :target: https://mypy.readthedocs.io/en/stable/command_line.html#cmdoption-mypy-strict
15 |
16 | .. image:: https://img.shields.io/badge/coverage-99%25-forestgreen?style=flat-square
17 | :target: https://github.com/ariebovenberg/pdfje
18 |
19 | .. image:: https://img.shields.io/github/actions/workflow/status/ariebovenberg/pdfje/tests.yml?branch=main&style=flat-square
20 | :target: https://github.com/ariebovenberg/pdfje
21 |
22 | .. image:: https://img.shields.io/readthedocs/pdfje.svg?style=flat-square
23 | :target: http://pdfje.readthedocs.io/
24 |
25 | ..
26 |
27 | **pdf·je** [`🔉 `_ PDF·yuh] (noun) Dutch for 'small PDF'
28 |
29 | **Write beautiful PDFs in declarative Python.**
30 |
31 | Features
32 | --------
33 |
34 | What makes **pdfje** stand out from the other PDF writers? Here are some of the highlights:
35 |
36 | 🧩 Declarative API
37 | ~~~~~~~~~~~~~~~~~~
38 |
39 | In most PDF writers, you create empty objects and
40 | then mutate them with methods like ``addText()``,
41 | all while changing the state with methods like ``setFont()``.
42 | **Pdfje** is different. You describe the document you want to write,
43 | and pdfje takes care of the details. No state to manage, no mutations.
44 | This makes your code easier to reuse and reason about.
45 |
46 | .. code-block:: python
47 |
48 | from pdfje import Document
49 | Document("Olá Mundo!").write("hello.pdf")
50 |
51 | See `the tutorial `_
52 | for a complete overview of features, including:
53 |
54 | - Styling text including font, size, and color
55 | - Automatic layout of text into one or more columns
56 | - Builtin and embedded fonts
57 | - Drawing basic shapes
58 |
59 | See the roadmap_ for supported features.
60 |
61 | 📖 Decent typography
62 | ~~~~~~~~~~~~~~~~~~~~
63 |
64 | Legibility counts. Good typography is a key part of that.
65 | **Pdfje** supports several features to make your documents look great:
66 |
67 | - Visually pleasing linebreaks, using the `same basic principles as LaTeX `_
68 | - Automatic `kerning `_ using available font metrics
69 | - Avoiding `widows and orphans `_ by moving
70 | lines between columns or pages.
71 |
72 | .. image:: https://github.com/ariebovenberg/pdfje/raw/main/sample.png
73 | :alt: Sample document with two columns of text
74 |
75 | 🎈 Small footprint
76 | ~~~~~~~~~~~~~~~~~~
77 |
78 | The PDF format supports many features, but most of the time you only need a few.
79 | Why install many dependencies — just to write a simple document?
80 | Not only is **pdfje** pure-Python, it allows you to
81 | install only the dependencies you need.
82 |
83 | .. code-block:: bash
84 |
85 | pip install pdfje # no dependencies
86 | pip install pdfje[fonts, hyphens] # embedded fonts and improved hyphenation
87 |
88 | .. _roadmap:
89 |
90 | Roadmap
91 | -------
92 |
93 | **Pdfje** has basic functionality,
94 | but is not yet feature-complete.
95 | Until the 1.0 version, the API may change with minor releases.
96 |
97 | Features:
98 |
99 | ✅ = implemented, 🚧 = may be planned, ❌ = not planned
100 |
101 | - Typesetting
102 | - ✅ Automatic kerning
103 | - ✅ Wrapping text into lines, columns, and pages
104 | - ✅ Page sizes
105 | - ✅ Centering text
106 | - ✅ Justification
107 | - ✅ Hyphenation
108 | - ✅ Move lines between columns/pages to avoid widows/orphans
109 | - ✅ Tex-style line breaking
110 | - 🚧 Headings (which stick to their paragraphs)
111 | - 🚧 Indentation
112 | - 🚧 Keeping layout elements together
113 | - 🚧 Loosening paragraphs to avoid orphans/widows
114 | - 🚧 Broader unicode support in text wrapping
115 | - Drawing operations
116 | - ✅ Lines
117 | - ✅ Rectangles
118 | - ✅ Circles, ellipses
119 | - 🚧 Arbitrary paths, fills, and strokes
120 | - Text styling
121 | - ✅ Font and size
122 | - ✅ Embedded fonts
123 | - ✅ Colors
124 | - ✅ Bold, italic
125 | - 🚧 Underline and strikethrough
126 | - 🚧 Superscript and subscript
127 | - ❌ Complex fill patterns
128 | - 🚧 Images
129 | - 🚧 Bookmarks and links
130 | - 🚧 Tables
131 | - 🚧 Bullet/numbered lists
132 | - 🚧 Inline markup with Markdown (Commonmark/MyST)
133 | - ❌ Emoji
134 | - ❌ Tables of contents
135 | - ❌ Forms
136 | - ❌ Annotations
137 |
138 | Versioning and compatibility policy
139 | -----------------------------------
140 |
141 | **Pdfje** follows semantic versioning.
142 | Until the 1.0 version, the API may change with minor releases.
143 | Breaking changes will be announced in the changelog.
144 | Since the API is fully typed, your typechecker and/or IDE
145 | will help you adjust to any API changes.
146 |
147 | License
148 | -------
149 |
150 | This library is licensed under the terms of the MIT license.
151 | It also includes short scripts from other projects (see ``pdfje/vendor``),
152 | which are either also MIT licensed, or in the public domain.
153 |
154 | Contributing
155 | ------------
156 |
157 | Here are some useful tips for developing in the ``pdfje`` codebase itself:
158 |
159 | - Install dependencies with ``poetry install``.
160 | - To write output files during tests, use ``pytest --output-path=``
161 | - To also run more comprehensive but 'slow' tests, use ``pytest --runslow``
162 |
163 | Acknowledgements
164 | ----------------
165 |
166 | **pdfje** is inspired by the following projects.
167 | If you're looking for a PDF writer, you may want to check them out as well:
168 |
169 | - `python-typesetting `_
170 | - `fpdf2 `_
171 | - `ReportLab `_
172 | - `WeasyPrint `_
173 | - `borb `_
174 | - `wkhtmltopdf `_
175 | - `pydyf `_
176 |
--------------------------------------------------------------------------------
/resources/scripts/parse_afm.py:
--------------------------------------------------------------------------------
1 | """Script to extract font metrics from .afm files.
2 |
3 | Usage: `python parse_afm.py `
4 | """
5 |
6 | from __future__ import annotations
7 |
8 | import sys
9 |
10 | from fontTools.afmLib import AFM
11 |
12 |
13 | def print_widths() -> None:
14 | f = AFM(sys.argv[1])
15 | widths = {
16 | ord(char.strip() or char): f[name][1] for name, char in NAMES.items()
17 | }
18 | for k, v in sorted(widths.items()):
19 | print(f"{k}: {v},")
20 |
21 |
22 | def print_kern() -> None:
23 | f = AFM(sys.argv[1])
24 | kern = {
25 | (NAMES[a], NAMES[b]): value
26 | for (a, b), value in f._kerning.items()
27 | # Ignore characters we can't encode cp1252 anyway
28 | if a in NAMES and b in NAMES
29 | }
30 | for k, v in sorted(kern.items()):
31 | print(f"{k}: {v},")
32 |
33 |
34 | NAMES = {
35 | "A": "A",
36 | "AE": "Æ",
37 | "Aacute": "Á",
38 | "Acircumflex": "Â",
39 | "Adieresis": "Ä",
40 | "Agrave": "À",
41 | "Aring": "Å",
42 | "Atilde": "Ã",
43 | "B": "B",
44 | "C": "C",
45 | "Ccedilla": "Ç",
46 | "D": "D",
47 | "E": "E",
48 | "Eacute": "É",
49 | "Ecircumflex": "Ê",
50 | "Edieresis": "Ë",
51 | "Egrave": "È",
52 | "Eth": "Ð",
53 | "Euro": "€",
54 | "F": "F",
55 | "G": "G",
56 | "H": "H",
57 | "I": "I",
58 | "Iacute": "Í",
59 | "Icircumflex": "Î",
60 | "Idieresis": "Ï",
61 | "Igrave": "Ì",
62 | "J": "J",
63 | "K": "K",
64 | "L": "L",
65 | "Lslash": "Ł",
66 | "M": "M",
67 | "N": "N",
68 | "Ntilde": "Ñ",
69 | "O": "O",
70 | "OE": "Œ",
71 | "Oacute": "Ó",
72 | "Ocircumflex": "Ô",
73 | "Odieresis": "Ö",
74 | "Ograve": "Ò",
75 | "Oslash": "Ø",
76 | "Otilde": "Õ",
77 | "P": "P",
78 | "Q": "Q",
79 | "R": "R",
80 | "S": "S",
81 | "Scaron": "Š",
82 | "T": "T",
83 | "Thorn": "Þ",
84 | "U": "U",
85 | "Uacute": "Ú",
86 | "Ucircumflex": "Û",
87 | "Udieresis": "Ü",
88 | "Ugrave": "Ù",
89 | "V": "V",
90 | "W": "W",
91 | "X": "X",
92 | "Y": "Y",
93 | "Yacute": "Ý",
94 | "Ydieresis": "Ÿ",
95 | "Z": "Z",
96 | "Zcaron": "Ž",
97 | "a": "a",
98 | "aacute": "á",
99 | "acircumflex": "â",
100 | "acute": " ́",
101 | "adieresis": "ä",
102 | "ae": "æ",
103 | "agrave": "à",
104 | "ampersand": "&",
105 | "aring": "å",
106 | "asciicircum": "^",
107 | "asciitilde": "~",
108 | "asterisk": "*",
109 | "at": "@",
110 | "atilde": "ã",
111 | "b": "b",
112 | "backslash": "\\",
113 | "bar": "|",
114 | "braceleft": "{",
115 | "braceright": "}",
116 | "bracketleft": "[",
117 | "bracketright": "]",
118 | "breve": " ̆",
119 | "brokenbar": "¦",
120 | "bullet": "•",
121 | "c": "c",
122 | "caron": "ˇ",
123 | "ccedilla": "ç",
124 | "cedilla": " ̧",
125 | "cent": "¢",
126 | "circumflex": "ˆ",
127 | "colon": ":",
128 | "comma": ",",
129 | "copyright": "©",
130 | "currency": "¤",
131 | "d": "d",
132 | "dagger": "†",
133 | "daggerdbl": "‡",
134 | "degree": "°",
135 | "dieresis": " ̈",
136 | "divide": "÷",
137 | "dollar": "$",
138 | "dotaccent": " ̇",
139 | "dotlessi": "ı",
140 | "e": "e",
141 | "eacute": "é",
142 | "ecircumflex": "ê",
143 | "edieresis": "ë",
144 | "egrave": "è",
145 | "eight": "8",
146 | "ellipsis": "…",
147 | "emdash": "—",
148 | "endash": "–",
149 | "equal": "=",
150 | "eth": "ð",
151 | "exclam": "!",
152 | "exclamdown": "¡",
153 | "f": "f",
154 | "fi": "fi",
155 | "five": "5",
156 | "fl": "fl",
157 | "florin": "ƒ",
158 | "four": "4",
159 | "fraction": "⁄",
160 | "g": "g",
161 | "germandbls": "ß",
162 | "grave": "`",
163 | "greater": ">",
164 | "guillemotleft": "«",
165 | "guillemotright": "»",
166 | "guilsinglleft": "‹",
167 | "guilsinglright": "›",
168 | "h": "h",
169 | "hungarumlaut": " ̋",
170 | "hyphen": "-",
171 | "i": "i",
172 | "iacute": "í",
173 | "icircumflex": "î",
174 | "idieresis": "ï",
175 | "igrave": "ì",
176 | "j": "j",
177 | "k": "k",
178 | "l": "l",
179 | "less": "<",
180 | "logicalnot": "¬",
181 | "lslash": "ł",
182 | "m": "m",
183 | "macron": " ̄",
184 | "minus": "−",
185 | "mu": "μ",
186 | "multiply": "×",
187 | "n": "n",
188 | "nine": "9",
189 | "ntilde": "ñ",
190 | "numbersign": "#",
191 | "o": "o",
192 | "oacute": "ó",
193 | "ocircumflex": "ô",
194 | "odieresis": "ö",
195 | "oe": "œ",
196 | "ogonek": " ̨",
197 | "ograve": "ò",
198 | "one": "1",
199 | "onehalf": "½",
200 | "onequarter": "¼",
201 | "onesuperior": "¹",
202 | "ordfeminine": "ª",
203 | "ordmasculine": "º",
204 | "oslash": "ø",
205 | "otilde": "õ",
206 | "p": "p",
207 | "paragraph": "¶",
208 | "parenleft": "(",
209 | "parenright": ")",
210 | "percent": "%",
211 | "period": ".",
212 | "periodcentered": "·",
213 | "perthousand": "‰",
214 | "plus": "+",
215 | "plusminus": "±",
216 | "q": "q",
217 | "question": "?",
218 | "questiondown": "¿",
219 | "quotedbl": '"',
220 | "quotedblbase": "„",
221 | "quotedblleft": "“",
222 | "quotedblright": "”",
223 | "quoteleft": "‘",
224 | "quoteright": "’",
225 | "quotesinglbase": "‚",
226 | "quotesingle": "'",
227 | "r": "r",
228 | "registered": "®",
229 | "ring": " ̊",
230 | "s": "s",
231 | "scaron": "š",
232 | "section": "§",
233 | "semicolon": ";",
234 | "seven": "7",
235 | "six": "6",
236 | "slash": "/",
237 | "space": " ",
238 | "sterling": "£",
239 | "t": "t",
240 | "thorn": "þ",
241 | "three": "3",
242 | "threequarters": "¾",
243 | "threesuperior": "³",
244 | "tilde": " ̃",
245 | "trademark": "™",
246 | "two": "2",
247 | "twosuperior": "²",
248 | "u": "u",
249 | "uacute": "ú",
250 | "ucircumflex": "û",
251 | "udieresis": "ü",
252 | "ugrave": "ù",
253 | "underscore": "_",
254 | "v": "v",
255 | "w": "w",
256 | "x": "x",
257 | "y": "y",
258 | "yacute": "ý",
259 | "ydieresis": "ÿ",
260 | "yen": "¥",
261 | "z": "z",
262 | "zcaron": "ž",
263 | "zero": "0",
264 | }
265 |
266 | ZAPF_NAMES = {}
267 |
268 |
269 | # sanity checks
270 | assert len(set(NAMES.values())) == len(NAMES)
271 | # for char in NAMES.values():
272 | # char.encode("cp1252")
273 |
274 |
275 | if __name__ == "__main__":
276 | # print_widths()
277 | print_kern()
278 |
--------------------------------------------------------------------------------
/resources/sample.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from pathlib import Path
4 |
5 | from pdfje import XY, AutoPage, Column, Document, Page
6 | from pdfje.draw import Text
7 | from pdfje.fonts import TrueType
8 | from pdfje.layout import Paragraph
9 | from pdfje.layout.paragraph import LinebreakParams
10 | from pdfje.style import Span, Style, italic
11 | from pdfje.units import inch, mm
12 |
13 |
14 | def main() -> None:
15 | Document([AutoPage(chapter, template=TEMPLATE)], style=CRIMSON).write(
16 | "sample.pdf"
17 | )
18 |
19 |
20 | PAGESIZE = XY(inch(8), inch(3.6))
21 | TEMPLATE = Page(
22 | [
23 | # The title in small text at the top of the page
24 | Text(
25 | (PAGESIZE.x / 2, PAGESIZE.y - mm(5)),
26 | "The Adventures of Sherlock Holmes",
27 | Style(size=10, italic=True),
28 | align="center",
29 | ),
30 | ],
31 | size=PAGESIZE,
32 | columns=[
33 | Column(
34 | (mm(15), mm(15)),
35 | PAGESIZE.x / 2 - mm(30),
36 | PAGESIZE.y - mm(30),
37 | ),
38 | Column(
39 | (PAGESIZE.x / 2 + mm(15), mm(15)),
40 | PAGESIZE.x / 2 - mm(30),
41 | PAGESIZE.y - mm(30),
42 | ),
43 | ],
44 | )
45 |
46 | CRIMSON = TrueType(
47 | Path(__file__).parent / "../resources/fonts/CrimsonText-Regular.ttf",
48 | Path(__file__).parent / "../resources/fonts/CrimsonText-Bold.ttf",
49 | Path(__file__).parent / "../resources/fonts/CrimsonText-Italic.ttf",
50 | Path(__file__).parent / "../resources/fonts/CrimsonText-BoldItalic.ttf",
51 | )
52 |
53 |
54 | def flatten_newlines(txt: str) -> str:
55 | return "\n".join(s.replace("\n", " ") for s in txt.split("\n\n"))
56 |
57 |
58 | # Extract from https://www.gutenberg.org/ebooks/1661
59 | chapter = Paragraph(
60 | [
61 | flatten_newlines(
62 | """“To the man who loves art for its own sake,” remarked Sherlock
63 | Holmes, tossing aside the advertisement sheet of"""
64 | ),
65 | Span(" The Daily Telegraph", italic),
66 | flatten_newlines(
67 | """, “it is
68 | frequently in its least important and lowliest manifestations that the
69 | keenest pleasure is to be derived. It is pleasant to me to observe,
70 | Watson, that you have so far grasped this truth that in these little
71 | records of our cases which you have been good enough to draw up, and, I
72 | am bound to say, occasionally to embellish, you have given prominence
73 | not so much to the many """
74 | ),
75 | Span("causes célèbres", italic),
76 | flatten_newlines(
77 | """ and sensational trials in
78 | which I have figured but rather to those incidents which may have been
79 | trivial in themselves, but which have given room for those faculties of
80 | deduction and of logical synthesis which I have made my special
81 | province.”
82 |
83 | “And yet,” said I, smiling, “I cannot quite hold myself absolved from
84 | the charge of sensationalism which has been urged against my records.”
85 |
86 | “You have erred, perhaps,” he observed, taking up a glowing cinder with
87 | the tongs and lighting with it the long cherry-wood pipe which was wont
88 | to replace his clay when he was in a disputatious rather than a
89 | meditative mood—“you have erred perhaps in attempting to put colour and
90 | life into each of your statements instead of confining yourself to the
91 | task of placing upon record that severe reasoning from cause to effect
92 | which is really the only notable feature about the thing.”
93 |
94 | “It seems to me that I have done you full justice in the matter,” I
95 | remarked with some coldness, for I was repelled by the egotism which I
96 | had more than once observed to be a strong factor in my friend’s
97 | singular character.
98 |
99 | “No, it is not selfishness or conceit,” said he, answering, as was his
100 | wont, my thoughts rather than my words. “If I claim full justice for my
101 | art, it is because it is an impersonal thing—a thing beyond myself.
102 | Crime is common. Logic is rare. Therefore it is upon the logic rather
103 | than upon the crime that you should dwell. You have degraded what
104 | should have been a course of lectures into a series of tales.”
105 |
106 | It was a cold morning of the early spring, and we sat after breakfast
107 | on either side of a cheery fire in the old room at Baker Street. A
108 | thick fog rolled down between the lines of dun-coloured houses, and the
109 | opposing windows loomed like dark, shapeless blurs through the heavy
110 | yellow wreaths. Our gas was lit and shone on the white cloth and
111 | glimmer of china and metal, for the table had not been cleared yet.
112 | Sherlock Holmes had been silent all the morning, dipping continuously
113 | into the advertisement columns of a succession of papers until at last,
114 | having apparently given up his search, he had emerged in no very sweet
115 | temper to lecture me upon my literary shortcomings.
116 |
117 | “At the same time,” he remarked after a pause, during which he had sat
118 | puffing at his long pipe and gazing down into the fire, “you can hardly
119 | be open to a charge of sensationalism, for out of these cases which you
120 | have been so kind as to interest yourself in, a fair proportion do not
121 | treat of crime, in its legal sense, at all. The small matter in which I
122 | endeavoured to help the King of Bohemia, the singular experience of
123 | Miss Mary Sutherland, the problem connected with the man with the
124 | twisted lip, and the incident of the noble bachelor, were all matters
125 | which are outside the pale of the law. But in avoiding the sensational,
126 | I fear that you may have bordered on the trivial.”
127 |
128 | “The end may have been so,” I answered, “but the methods I hold to have
129 | been novel and of interest.”
130 |
131 | “Pshaw, my dear fellow, what do the public, the great unobservant
132 | public, who could hardly tell a weaver by his tooth or a compositor by
133 | his left thumb, care about the finer shades of analysis and deduction!
134 | But, indeed, if you are trivial, I cannot blame you, for the days of
135 | the great cases are past. Man, or at least criminal man, has lost all
136 | enterprise and originality. As to my own little practice, it seems to
137 | be degenerating into an agency for recovering lost lead pencils and
138 | giving advice to young ladies from boarding-schools. I think that I
139 | have touched bottom at last, however. This note I had this morning
140 | marks my zero-point, I fancy. Read it!” He tossed a crumpled letter
141 | across to me.
142 | """
143 | ),
144 | ],
145 | align="justify",
146 | indent=18,
147 | optimal=LinebreakParams(
148 | tolerance=1,
149 | hyphen_penalty=1000,
150 | ),
151 | avoid_orphans=False,
152 | )
153 |
154 |
155 | if __name__ == "__main__":
156 | main()
157 |
--------------------------------------------------------------------------------
/src/pdfje/typeset/firstfit.py:
--------------------------------------------------------------------------------
1 | "A simple first-fit line wrapping algorithm."
2 | from __future__ import annotations
3 |
4 | from dataclasses import dataclass
5 | from itertools import tee
6 | from typing import Iterable, Iterator, NamedTuple, Sequence
7 |
8 | from ..atoms import LiteralStr, Real
9 | from ..common import XY, Align, NonEmptyIterator, Pt, add_slots, prepend
10 | from .layout import Line as _Line
11 | from .layout import ShapedText
12 | from .words import WordLike, render_kerned
13 |
14 |
15 | def shape(
16 | words: Iterator[WordLike],
17 | columns: Iterator[XY],
18 | allow_empty: bool,
19 | lead: Pt,
20 | avoid_orphans: bool,
21 | align: Align,
22 | ) -> Iterator[ShapedText]:
23 | _shape = _shape_avoid_orphans if avoid_orphans else _shape_simple
24 | return (
25 | (
26 | ShapedText(
27 | list(map(Line.justify, ls)), lead, align, len(ls) * lead
28 | )
29 | for ls in _shape(words, columns, allow_empty, lead)
30 | )
31 | if align is Align.JUSTIFY
32 | else (
33 | ShapedText(ls, lead, align, len(ls) * lead)
34 | for ls in _shape(words, columns, allow_empty, lead)
35 | )
36 | )
37 |
38 |
39 | def _shape_avoid_orphans(
40 | ws: Iterator[WordLike] | None,
41 | columns: Iterator[XY],
42 | allow_empty: bool,
43 | lead: Pt,
44 | ) -> Iterator[Sequence[Line]]:
45 | col = next(columns)
46 | ws, lines, ws_undo = take_box(ws, col, allow_empty, lead)
47 | # In case of an avoidable orphan, start over
48 | if ws and len(lines) == 1 and allow_empty:
49 | ws = ws_undo
50 | lines = ()
51 | elif not ws:
52 | yield lines
53 | return
54 |
55 | col = next(columns)
56 | while True:
57 | lines_prev = lines
58 | ws_undo_prev = ws_undo
59 | ws, lines, ws_undo = take_box(ws, col, False, lead)
60 | # case: paragraph not done. Continue to next column.
61 | if ws:
62 | yield lines_prev
63 | col = next(columns)
64 | # case: a potentially fixable orphan
65 | elif len(lines) == 1 and len(lines_prev) > 2 and col.y >= lead * 2:
66 | # FUTURE: optimize the case where the column widths are the same,
67 | # and we don't need to re-typeset the last line.
68 | assert ws_undo_prev is not None
69 | ws_undo_prev, _branch = tee(ws_undo_prev)
70 | _, _lines_new, ws_undo = take_box(_branch, col, False, lead)
71 | if len(_lines_new) == 1:
72 | break # our attempt to fix the orphan failed. We're done.
73 | else:
74 | lines = lines_prev[:-1]
75 | ws = ws_undo_prev
76 | # case: we're done, but no (fixable) orphan.
77 | else:
78 | break
79 |
80 | yield lines_prev
81 | yield lines
82 |
83 |
84 | # filling is a lot simpler if we don't avoid orphaned lines.
85 | def _shape_simple(
86 | ws: Iterator[WordLike] | None,
87 | columns: Iterator[XY],
88 | allow_empty: bool,
89 | lead: Pt,
90 | ) -> Iterator[Sequence[Line]]:
91 | for col in columns: # pragma: no branch
92 | ws, lines, _ = take_box(ws, col, allow_empty, lead)
93 | yield lines
94 | if not ws:
95 | return
96 | allow_empty = False
97 |
98 |
99 | class _FilledBox(NamedTuple):
100 | rest: NonEmptyIterator[WordLike] | None
101 | lines: Sequence[Line]
102 | rest_incl_lastline: NonEmptyIterator[WordLike] | None
103 |
104 |
105 | def take_box(
106 | queue: NonEmptyIterator[WordLike] | None,
107 | space: XY,
108 | allow_empty: bool,
109 | lead: Pt,
110 | ) -> _FilledBox:
111 | width, height = space
112 | max_line_count: float = height // lead or not allow_empty
113 | lines: list[Line] = []
114 | queue_prev = queue
115 | while queue and len(lines) < max_line_count:
116 | # OPTIMIZE: it'd be more efficient to only 'tee' on the last line
117 | queue, queue_prev = tee(queue)
118 | queue, ln = take_line(queue, width)
119 | lines.append(ln)
120 | return _FilledBox(queue, lines, queue_prev)
121 |
122 |
123 | def take_line(
124 | ws: NonEmptyIterator[WordLike], width: Pt
125 | ) -> tuple[NonEmptyIterator[WordLike] | None, Line]:
126 | space = width
127 | content: list[WordLike] = []
128 |
129 | for word in ws:
130 | if word.pruned_width() > space:
131 | break
132 |
133 | space -= word.width
134 | content.append(word)
135 | else:
136 | # i.e. this is the last line of the paragraph
137 | return (None, Line(tuple(content), width - space, 0))
138 |
139 | last_word, dangling = word.hyphenate(space)
140 | ws = prepend(dangling, ws)
141 | if last_word:
142 | space -= last_word.width
143 | content.append(last_word)
144 | elif content and (extra_space := content[-1].prunable_space()):
145 | content[-1] = content[-1].pruned()
146 | space += extra_space
147 | elif not content:
148 | # We force placing at least a minimal word fragment to avoid
149 | # infinitely waiting for enough width.
150 | # This shouldn't occur in practice often, where the column
151 | # width is much larger than the longest word segment.
152 | word, leftover = next(ws).minimal_box()
153 | if leftover:
154 | ws = prepend(leftover, ws)
155 | else:
156 | # An extra check is needed to tell whether this is the last
157 | # word in the paragraph.
158 | try:
159 | ws = prepend(next(ws), ws)
160 | except StopIteration:
161 | return (None, Line((word,), word.width, 0))
162 | content = [word]
163 | space -= word.width
164 |
165 | return (ws, Line(tuple(content), width - space, space))
166 |
167 |
168 | @add_slots
169 | @dataclass(frozen=True)
170 | class Line(_Line):
171 | words: tuple[WordLike, ...]
172 | width: Pt
173 | space: Pt
174 |
175 | def justify(self) -> Line:
176 | try:
177 | # The additional width per word break, weighted by the font size,
178 | # which is needed to justify the text.
179 | width_per_break = self.space / sum(
180 | w.state.size for w in self.words if w.tail
181 | )
182 | except ZeroDivisionError:
183 | return self # No word breaks means no justification.
184 | return Line(
185 | tuple(
186 | w.extend_tail(width_per_break * w.state.size)
187 | for w in self.words
188 | ),
189 | self.width + self.space,
190 | 0,
191 | )
192 |
193 | def __iter__(self) -> Iterator[bytes]:
194 | content: Iterable[Real | LiteralStr] = ()
195 | for w in self.words:
196 | content = yield from w.encode_into_line(content)
197 | yield from render_kerned(content)
198 |
--------------------------------------------------------------------------------
/src/pdfje/layout/paragraph.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from dataclasses import dataclass
4 | from functools import partial
5 | from itertools import tee
6 | from typing import (
7 | ClassVar,
8 | Iterable,
9 | Iterator,
10 | Literal,
11 | Protocol,
12 | Sequence,
13 | cast,
14 | final,
15 | )
16 |
17 | from ..common import XY, Align, Pt, add_slots, advance, prepend, setattr_frozen
18 | from ..resources import Resources
19 | from ..style import Span, Style, StyledMixin, StyleFull, StyleLike
20 | from ..typeset import firstfit, optimum
21 | from ..typeset.layout import ShapedText
22 | from ..typeset.parse import into_words
23 | from ..typeset.state import Passage, State, max_lead, splitlines
24 | from ..typeset.words import WordLike, indent_first
25 | from .common import Block, ColumnFill
26 |
27 |
28 | @add_slots
29 | @dataclass(frozen=True)
30 | class LinebreakParams:
31 | """Parameters for tweaking the optimum-fit algorithm.
32 |
33 | Parameters
34 | ----------
35 | tolerance
36 | The tolerance for the stretch of each line.
37 | If no feasible solution is found, the tolerance is increased until
38 | there is.
39 | Increase the tolerance if you want to avoid hyphenation
40 | at the cost of more stretching and longer runtime.
41 | hyphen_penalty
42 | The penalty for hyphenating a word. If increasing this value does
43 | not result in fewer hyphens, try increasing the tolerance.
44 | consecutive_hyphen_penalty
45 | The penalty for placing hyphens on consecutive lines. If increasing
46 | this value does not appear to work, try increasing the tolerance.
47 | fitness_diff_penalty
48 | The penalty for very tight and very loose lines following each other.
49 | """
50 |
51 | tolerance: float = 1
52 | hyphen_penalty: float = 1000
53 | consecutive_hyphen_penalty: float = 1000
54 | fitness_diff_penalty: float = 1000
55 |
56 | DEFAULT: ClassVar["LinebreakParams"]
57 |
58 |
59 | LinebreakParams.DEFAULT = LinebreakParams()
60 |
61 |
62 | @final
63 | @add_slots
64 | @dataclass(frozen=True, init=False)
65 | class Paragraph(Block, StyledMixin):
66 | """A :class:`Block` that renders a paragraph of text.
67 |
68 | Parameters
69 | ----------
70 | content
71 | The text to render. Can be a string, or a nested :class:`~pdfje.Span`.
72 | style
73 | The style to render the text with.
74 | See :ref:`tutorial