├── .editorconfig ├── .github ├── FUNDING.yml └── workflows │ └── ci.yaml ├── LICENSE ├── Makefile ├── README.md ├── pagebreak.lua └── test ├── expected.adoc ├── expected.html ├── expected.ms ├── expected.no-form-feed.html ├── expected.typst ├── input.md ├── test-adoc.yaml ├── test-html.yaml ├── test-ms.yaml ├── test-no-form-feed.html.yaml └── test-typst.yaml /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig helps developers define and maintain consistent 2 | # coding styles between different editors and IDEs 3 | # editorconfig.org 4 | 5 | root = true 6 | 7 | [*] 8 | end_of_line = lf 9 | charset = utf-8 10 | trim_trailing_whitespace = true 11 | insert_final_newline = true 12 | 13 | [Makefile] 14 | indent_style = tab 15 | 16 | [*.lua] 17 | indent_style = space 18 | indent_size = 2 19 | # Code should stay below 80 characters per line. 20 | max_line_length = 80 21 | 22 | [*.md] 23 | # Text with 60 to 66 characters per line is said to be the easiest 24 | # to read. 25 | max_line_length = 66 26 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [tarleb] 2 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | # Run on all pull requests that change code. 5 | pull_request: 6 | paths-ignore: 7 | - 'README.md' 8 | - LICENSE 9 | - .editorconfig 10 | # Run every time a code change is pushed. 11 | push: 12 | paths-ignore: 13 | - 'README.md' 14 | - LICENSE 15 | - .editorconfig 16 | # Test if things still work each Tuesday morning. 17 | # This way we will catch incompatible pandoc changes in a timely 18 | # manner. 19 | schedule: 20 | # At 4:33am each Tuesday 21 | - cron: '33 4 * * 2' 22 | 23 | jobs: 24 | test: 25 | runs-on: ubuntu-latest 26 | strategy: 27 | fail-fast: true 28 | matrix: 29 | pandoc: 30 | - latest 31 | # The oldest version that's guaranteed to be supported. 32 | # Older versions down to 2.17 should work, too, but aren't 33 | # tested, as there have been syntax changes in the generated 34 | # output. 35 | - 3.7.0.1 36 | 37 | container: 38 | image: pandoc/core:${{ matrix.pandoc }} 39 | 40 | steps: 41 | - name: Checkout 42 | uses: actions/checkout@v4 43 | 44 | - name: Install dependencies 45 | run: apk add make 46 | 47 | - name: Test 48 | run: make test 49 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright © 2017-2025 Albert Krewinkel, Benct Philip Jonsson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | FILTER_FILE := $(wildcard *.lua) 2 | DIFF ?= diff 3 | PANDOC ?= pandoc 4 | 5 | test: test-adoc test-html test-ms test-typst test-no-form-feed.html 6 | 7 | test-typst: $(FILTER_FILE) test/input.md test/test-typst.yaml 8 | @if $(PANDOC) --list-input-formats | grep -q typst; then \ 9 | $(PANDOC) --defaults test/test-typst.yaml | \ 10 | $(DIFF) test/expected.typst -; \ 11 | fi 12 | 13 | test-%: $(FILTER_FILE) test/input.md test/test-%.yaml 14 | @$(PANDOC) --defaults test/test-$*.yaml | \ 15 | $(DIFF) test/expected.$* - 16 | 17 | .PHONY: test 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | pagebreak 2 | ========= 3 | 4 | This filter converts paragraps containing only the LaTeX 5 | `\newpage` or `\pagebreak` command into appropriate pagebreak 6 | markup for other formats. The command must be the only contents 7 | of a raw TeX block in order to be recognized. I.e., for Markdown 8 | the following is sufficient: 9 | 10 | Paragraph before page break 11 | 12 | \newpage 13 | 14 | Paragraph after page break 15 | 16 | Fully supported output formats are: 17 | 18 | - AsciiDoc / Asciidoctor 19 | - ConTeXt 20 | - Docx 21 | - EPUB 22 | - groff ms 23 | - Typst 24 | - HTML 25 | - LaTeX 26 | 27 | ODT is supported, but requires additional settings in the 28 | reference document (Configuration). 29 | 30 | In all other formats, the page break is represented using the 31 | form feed character. 32 | 33 | Note that not all input formats support the `raw_tex` format 34 | extension, which is required to use the filter in the default 35 | configuration. Enable the `break-on.plaintext-command` option to 36 | use this filter with if `raw_tex` is unavailable. 37 | 38 | Usage 39 | ----- 40 | 41 | The filter modifies the internal document representation; it can 42 | be used with many publishing systems that are based on pandoc. 43 | 44 | ### Plain pandoc 45 | 46 | Pass the filter to pandoc via the `--lua-filter` (or `-L`) command 47 | line option. 48 | 49 | pandoc --lua-filter pagebreak.lua ... 50 | 51 | ### Quarto / R Markdown 52 | 53 | The use of this filter with either Quarto or R Markdown is 54 | *discouraged*, as both systems ship with their own fork of this 55 | extension. Use those instead. 56 | 57 | Note also that Quarto allows to use the [`{{ pagebreak }}` 58 | short-code][shortcode] to insert page breaks. 59 | 60 | [shortcode]: https://quarto.org/docs/authoring/markdown-basics.html#page-breaks 61 | 62 | Configuration 63 | ------------- 64 | 65 | The filter can be configured through the `pagebreak` metadata 66 | field. 67 | 68 | Example config in YAML frontmatter: 69 | 70 | ``` yaml 71 | --- 72 | pagebreak: 73 | break-on: 74 | # Treat paragraphs that contain just a form feed 75 | # character as pagebreak markers. 76 | form-feed: true 77 | # Allow plaintext commands, i.e., respect LaTeX newpage 78 | # commands even if they are not in a raw TeX block. 79 | plaintext-command: true 80 | 81 | # Use a div with this class instead of hard-coded CSS 82 | html-class: 'page-break' 83 | 84 | # ODT style used for pagebreak paragraphs 85 | odt-style: 'Pagebreak' 86 | --- 87 | ``` 88 | 89 | Currently supported options: 90 | 91 | - `break-on.form-feed`: boolean value indicating whether 92 | the filter should replace paragraphs that contains nothing but 93 | form feed characters with page breaks. Enabling option can have 94 | a significant performance impact for large documents and is 95 | therefore *disabled by default*. 96 | 97 | - `break-on.plaintext-command`: boolean value that controls 98 | whether paragraphs with LaTeX commands should be interpreted as 99 | pagebreak markers. Enabling this option may impact performance, 100 | so it is *disabled* by default. 101 | 102 | - `html-class`: If you want to use an HTML class rather than an 103 | inline style set the value of the metadata key `html-class` or 104 | the environment variable `PANDOC_PAGEBREAK_HTML_CLASS` (the 105 | metadata 'wins' if both are defined) to the name of the class 106 | and use CSS like this: 107 | 108 | ``` css 109 | @media all { 110 | .page-break { display: none; } 111 | } 112 | @media print { 113 | .page-break { display: block; page-break-after: always; } 114 | } 115 | ``` 116 | 117 | Note that this will also be used for EPUB output. 118 | 119 | - `odt-style`: To use with ODT you must create a reference ODT 120 | with a named paragraph style called `Pagebreak` (or whatever you 121 | set the metadata field `odt-style` or the environment variable 122 | `PANDOC_PAGEBREAK_ODT_STYLE` to) and define it as having no 123 | extra space before or after but set it to have a pagebreak after 124 | it . 125 | 126 | (There will be an empty placeholder paragraph, which means some 127 | extra vertical space, and you probably want that space to go at 128 | the bottom of the page before the break rather than at the top 129 | of the page after the break!) 130 | 131 | Alternative syntax 132 | ------------------ 133 | 134 | The form feed character as the only element in a paragraph is 135 | supported as an alternative to the LaTeX syntax described above. 136 | See [Configuration](#configuration) for info on how to enable this 137 | feature. 138 | -------------------------------------------------------------------------------- /pagebreak.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | pagebreak – convert raw LaTeX page breaks to other formats 3 | 4 | Copyright © 2017-2025 Benct Philip Jonsson, Albert Krewinkel 5 | 6 | Permission to use, copy, modify, and/or distribute this software for any 7 | purpose with or without fee is hereby granted, provided that the above 8 | copyright notice and this permission notice appear in all copies. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 | ]] 18 | local stringify = (require 'pandoc.utils').stringify 19 | 20 | --- configs – these are populated in the Meta filter. 21 | local default_pagebreaks = { 22 | asciidoc = '<<<\n\n', 23 | context = '\\page', 24 | epub = '

', 25 | html = '
', 26 | latex = '\\newpage{}', 27 | ms = '.bp', 28 | ooxml = '', 29 | odt = '', 30 | typst = '#pagebreak()\n\n' 31 | } 32 | 33 | local function pagebreak_from_config (config) 34 | local pagebreak = default_pagebreaks 35 | local html_class = config['html-class'] 36 | and stringify(config['html-class']) 37 | or os.getenv 'PANDOC_PAGEBREAK_HTML_CLASS' 38 | if html_class and html_class ~= '' then 39 | pagebreak.html = string.format('
', html_class) 40 | end 41 | 42 | local odt_style = config['odt-style'] 43 | and stringify(config['odt-style']) 44 | or os.getenv 'PANDOC_PAGEBREAK_ODT_STYLE' 45 | if odt_style and odt_style ~= '' then 46 | pagebreak.odt = string.format('', odt_style) 47 | end 48 | 49 | return pagebreak 50 | end 51 | 52 | --- Return a block element causing a page break in the given format. 53 | local function newpage(format, pagebreak) 54 | if format:match 'asciidoc' then 55 | return pandoc.RawBlock('asciidoc', pagebreak.asciidoc) 56 | elseif format == 'context' then 57 | return pandoc.RawBlock('context', pagebreak.context) 58 | elseif format == 'docx' then 59 | return pandoc.RawBlock('openxml', pagebreak.ooxml) 60 | elseif format:match 'epub' then 61 | return pandoc.RawBlock('html', pagebreak.epub) 62 | elseif format:match 'html.*' then 63 | return pandoc.RawBlock('html', pagebreak.html) 64 | elseif format:match 'latex' then 65 | return pandoc.RawBlock('tex', pagebreak.latex) 66 | elseif format:match 'ms' then 67 | return pandoc.RawBlock('ms', pagebreak.ms) 68 | elseif format:match 'odt' then 69 | return pandoc.RawBlock('opendocument', pagebreak.odt) 70 | elseif format:match 'typst' then 71 | return pandoc.RawBlock('typst', pagebreak.typst) 72 | else 73 | -- fall back to insert a form feed character 74 | return pandoc.Para{pandoc.Str '\f'} 75 | end 76 | end 77 | 78 | --- Checks whether the given string contains a LaTeX pagebreak or 79 | --- newpage command. 80 | local function is_newpage_command(command) 81 | return command:match '^\\newpage%{?%}?$' 82 | or command:match '^\\pagebreak%{?%}?$' 83 | end 84 | 85 | -- Returns a filter function for RawBlock elements, checking for LaTeX 86 | -- pagebreak/newpage commands; returns `nil` when the target format is 87 | -- LaTeX. 88 | local function latex_pagebreak (pagebreak) 89 | -- Don't do anything if the output is TeX 90 | if FORMAT:match 'tex$' then 91 | return nil 92 | end 93 | return function (el) 94 | -- check that the block is TeX or LaTeX and contains only 95 | -- \newpage or \pagebreak. 96 | if el.format:match 'tex' and is_newpage_command(el.text) then 97 | -- use format-specific pagebreak marker. FORMAT is set by pandoc to 98 | -- the targeted output format. 99 | return pagebreak 100 | end 101 | -- otherwise, leave the block unchanged 102 | return nil 103 | end 104 | end 105 | 106 | --- Checks if a paragraph contains nothing but a form feed character. 107 | local formfeed_check = function (para) 108 | return #para.content == 1 and para.content[1].text == '\f' 109 | end 110 | 111 | --- Checks if a paragraph looks like a LaTeX newpage command. 112 | local function plaintext_check (para) 113 | return #para.content == 1 and para.content[1].t == 'Str' and 114 | is_newpage_command(para.content[1].text) 115 | end 116 | 117 | --- Replaces a paragraph with a pagebreak if on of the `checks` returns true. 118 | local function para_pagebreak(raw_pagebreak, checks) 119 | local is_pb = function (para) 120 | return checks:find_if(function (pred) return pred(para) end) 121 | end 122 | return function (para) 123 | if is_pb(para) then 124 | return raw_pagebreak 125 | end 126 | end 127 | end 128 | 129 | --- Filter function; this is the entrypoint when used as a filter. 130 | function Pandoc (doc) 131 | local config = doc.meta.pagebreak or {} 132 | local break_on = config['break-on'] or {} 133 | local raw_pagebreak = newpage(FORMAT, pagebreak_from_config(doc.meta)) 134 | local paragraph_checks = pandoc.List{} 135 | if break_on['form-feed'] then 136 | paragraph_checks:insert(formfeed_check) 137 | end 138 | if break_on['plaintext-command'] then 139 | paragraph_checks:insert(plaintext_check) 140 | end 141 | -- unset pagebreak options field in the metadata 142 | doc.meta.pagebreak = nil 143 | return doc:walk { 144 | RawBlock = latex_pagebreak(raw_pagebreak), 145 | -- Replace paragraphs that contain just a form feed char. 146 | Para = #paragraph_checks > 0 147 | and para_pagebreak(raw_pagebreak, paragraph_checks) 148 | or nil 149 | } 150 | end 151 | -------------------------------------------------------------------------------- /test/expected.adoc: -------------------------------------------------------------------------------- 1 | Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Donec 2 | hendrerit tempor tellus. Donec pretium posuere tellus. 3 | 4 | <<< 5 | 6 | Cum sociis natoque penatibus et magnis dis parturient montes, nascetur 7 | ridiculus mus. Nulla posuere. Donec vitae dolor. 8 | 9 | <<< 10 | 11 | Pellentesque dapibus suscipit ligula. Donec posuere augue in quam. 12 | Suspendisse potenti. 13 | 14 | The following does not mark a pagebreak unless the interpretation of 15 | LaTeX commands in plain paragraphs is enabled. 16 | 17 | <<< 18 | 19 | Cum sociis natoque penatibus et magnis dis parturient montes, nascetur 20 | ridiculus mus. 21 | 22 | Final paragraph without a preceding pagebreak. 23 | -------------------------------------------------------------------------------- /test/expected.html: -------------------------------------------------------------------------------- 1 |

Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Donec hendrerit tempor tellus. Donec pretium posuere tellus.

2 |
3 |

Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Nulla posuere. Donec vitae dolor.

4 |
5 |

Pellentesque dapibus suscipit ligula. Donec posuere augue in quam. Suspendisse potenti.

6 |

The following does not mark a pagebreak unless the interpretation of LaTeX commands in plain paragraphs is enabled.

7 |

\pagebreak

8 |

Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus.

9 |

Final paragraph without a preceding pagebreak.

10 | -------------------------------------------------------------------------------- /test/expected.ms: -------------------------------------------------------------------------------- 1 | .LP 2 | Lorem ipsum dolor sit amet, consectetuer adipiscing elit. 3 | Donec hendrerit tempor tellus. 4 | Donec pretium posuere tellus. 5 | .bp 6 | .PP 7 | Cum sociis natoque penatibus et magnis dis parturient montes, nascetur 8 | ridiculus mus. 9 | Nulla posuere. 10 | Donec vitae dolor. 11 | .bp 12 | .PP 13 | Pellentesque dapibus suscipit ligula. 14 | Donec posuere augue in quam. 15 | Suspendisse potenti. 16 | .PP 17 | The following does not mark a pagebreak unless the interpretation of 18 | LaTeX commands in plain paragraphs is enabled. 19 | .PP 20 | \(rspagebreak 21 | .PP 22 | Cum sociis natoque penatibus et magnis dis parturient montes, nascetur 23 | ridiculus mus. 24 | .PP 25 | Final paragraph without a preceding pagebreak. 26 | -------------------------------------------------------------------------------- /test/expected.no-form-feed.html: -------------------------------------------------------------------------------- 1 |

Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Donec hendrerit tempor tellus. Donec pretium posuere tellus.

2 |
3 |

Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Nulla posuere. Donec vitae dolor.

4 |

5 |

Pellentesque dapibus suscipit ligula. Donec posuere augue in quam. Suspendisse potenti.

6 |

The following does not mark a pagebreak unless the interpretation of LaTeX commands in plain paragraphs is enabled.

7 |

\pagebreak

8 |

Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus.

9 |

Final paragraph without a preceding pagebreak.

10 | -------------------------------------------------------------------------------- /test/expected.typst: -------------------------------------------------------------------------------- 1 | Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Donec 2 | hendrerit tempor tellus. Donec pretium posuere tellus. 3 | 4 | #pagebreak() 5 | 6 | Cum sociis natoque penatibus et magnis dis parturient montes, nascetur 7 | ridiculus mus. Nulla posuere. Donec vitae dolor. 8 | 9 | #pagebreak() 10 | 11 | Pellentesque dapibus suscipit ligula. Donec posuere augue in quam. 12 | Suspendisse potenti. 13 | 14 | The following does not mark a pagebreak unless the interpretation of 15 | LaTeX commands in plain paragraphs is enabled. 16 | 17 | \\pagebreak 18 | 19 | Cum sociis natoque penatibus et magnis dis parturient montes, nascetur 20 | ridiculus mus. 21 | 22 | Final paragraph without a preceding pagebreak. 23 | -------------------------------------------------------------------------------- /test/input.md: -------------------------------------------------------------------------------- 1 | Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Donec 2 | hendrerit tempor tellus. Donec pretium posuere tellus. 3 | 4 | \newpage 5 | 6 | Cum sociis natoque penatibus et magnis dis parturient montes, 7 | nascetur ridiculus mus. Nulla posuere. Donec vitae dolor. 8 | 9 | 10 | 11 | Pellentesque dapibus suscipit ligula. Donec posuere augue in 12 | quam. Suspendisse potenti. 13 | 14 | The following does not mark a pagebreak unless the interpretation 15 | of LaTeX commands in plain paragraphs is enabled. 16 | 17 | \\pagebreak 18 | 19 | Cum sociis natoque penatibus et magnis dis parturient montes, 20 | nascetur ridiculus mus. 21 | 22 | Final paragraph without a preceding pagebreak. 23 | -------------------------------------------------------------------------------- /test/test-adoc.yaml: -------------------------------------------------------------------------------- 1 | input-files: [test/input.md] 2 | to: asciidoc 3 | filters: 4 | - pagebreak.lua 5 | 6 | metadata: 7 | pagebreak: 8 | break-on: 9 | form-feed: true 10 | plaintext-command: true 11 | -------------------------------------------------------------------------------- /test/test-html.yaml: -------------------------------------------------------------------------------- 1 | input-files: [test/input.md] 2 | to: html5 3 | wrap: none 4 | filters: 5 | - pagebreak.lua 6 | 7 | metadata: 8 | pagebreak: 9 | break-on: 10 | form-feed: true 11 | -------------------------------------------------------------------------------- /test/test-ms.yaml: -------------------------------------------------------------------------------- 1 | input-files: [test/input.md] 2 | to: ms 3 | filters: 4 | - pagebreak.lua 5 | 6 | metadata: 7 | pagebreak: 8 | break-on: 9 | form-feed: true 10 | -------------------------------------------------------------------------------- /test/test-no-form-feed.html.yaml: -------------------------------------------------------------------------------- 1 | input-files: [test/input.md] 2 | to: html5 3 | wrap: none 4 | filters: 5 | - pagebreak.lua 6 | -------------------------------------------------------------------------------- /test/test-typst.yaml: -------------------------------------------------------------------------------- 1 | input-files: [test/input.md] 2 | to: typst 3 | filters: 4 | - pagebreak.lua 5 | 6 | metadata: 7 | pagebreak: 8 | break-on: 9 | form-feed: true 10 | --------------------------------------------------------------------------------