├── .gitignore ├── pelican └── plugins │ └── pandoc_reader │ ├── test │ ├── markdown │ │ ├── empty.md │ │ ├── no_metadata_end.md │ │ ├── wrong_metadata_end.md │ │ ├── no_metadata.md │ │ ├── metadata_end_with_leading_spaces.md │ │ ├── metadata_start_with_leading_spaces.md │ │ ├── mathjax_content.md │ │ ├── no_title_in_metadata.md │ │ ├── valid_content.md │ │ ├── valid_content_with_raw_paths.md │ │ ├── valid_content_with_toc.md │ │ ├── reading_time_content.md │ │ ├── valid_content_with_citation.md │ │ └── valid_content_with_citation.bib │ ├── defaults_files │ │ ├── no_input_format.yaml │ │ ├── no_output_format.yaml │ │ ├── valid_second_defaults_file_wrap_none.yaml │ │ ├── valid_defaults.yaml │ │ ├── valid_defaults_file_1.yaml │ │ ├── valid_defaults_file_2.yaml │ │ ├── standalone_true.yaml │ │ ├── invalid_from_input_format.yaml │ │ ├── invalid_to_output_format.yaml │ │ ├── valid_second_defaults_file_writer.yaml │ │ ├── invalid_reader_input_format.yaml │ │ ├── self_contained_true.yaml │ │ ├── to_writer_both_given.yaml │ │ ├── valid_second_defaults_file_reader.yaml │ │ ├── invalid_writer_output_format.yaml │ │ ├── from_reader_both_given.yaml │ │ ├── valid_defaults_with_toc.yaml │ │ ├── valid_first_defaults_file_writer.yaml │ │ ├── valid_first_defaults_file_reader.yaml │ │ ├── valid_first_defaults_file_dup_keys.yaml │ │ ├── valid_defaults_with_citeproc_filter.yaml │ │ ├── valid_second_defaults_file_dup_keys.yaml │ │ └── valid_defaults_with_toc_and_citations.yaml │ ├── test_valid_multiple_default_files.py │ ├── test_invalid_args.py │ ├── test_valid_defaults_files.py │ ├── test_pandoc_availability.py │ ├── test_specific_outputs.py │ ├── test_invalid_metadata.py │ ├── test_valid_args.py │ ├── test_valid_arg_citations.py │ ├── test_invalid_defaults_files.py │ ├── html │ │ └── expected_html.py │ └── test_invalid_multiple_defaults_files.py │ ├── __init__.py │ ├── templates │ └── pandoc-reader-default.html │ ├── filters │ └── wordcount.lua │ └── pandoc_reader.py ├── .github ├── FUNDING.yml └── workflows │ └── main.yml ├── .editorconfig ├── CONTRIBUTING.md ├── .pre-commit-config.yaml ├── .cruft.json ├── CHANGELOG.md ├── pyproject.toml ├── tasks.py └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .pdm-python 2 | pdm.lock 3 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/markdown/empty.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: justinmayer 2 | liberapay: pelican 3 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/markdown/no_metadata_end.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "No Metadata End" 3 | author: "My Author" 4 | date: "2020-10-16" 5 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/__init__.py: -------------------------------------------------------------------------------- 1 | """Importing pandoc_reader package.""" 2 | 3 | from .pandoc_reader import * # noqa: F403,PGH004,RUF100 4 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/markdown/wrong_metadata_end.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "No Metadata End" 3 | author: "My Author" 4 | date: "2020-10-16" 5 | ~~~ 6 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/markdown/no_metadata.md: -------------------------------------------------------------------------------- 1 | # File that does not contain any metadata 2 | 3 | This is a file that does not have any metadata. 4 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/markdown/metadata_end_with_leading_spaces.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Metadata Block End with Leading Spaces" 3 | author: "My Author" 4 | date: "2020-10-16" 5 | --- 6 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/markdown/metadata_start_with_leading_spaces.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Metadata Block Start with Leading Spaces" 3 | author: "My Author" 4 | date: "2020-10-16" 5 | --- 6 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/markdown/mathjax_content.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "MathJax Content" 3 | author: "My Author" 4 | date: "2020-10-16" 5 | --- 6 | $$ 7 | e^{i\theta} = \cos\theta + i \sin\theta. 8 | $$ 9 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/markdown/no_title_in_metadata.md: -------------------------------------------------------------------------------- 1 | --- 2 | author: "My Author" 3 | date: "2020-10-16" 4 | --- 5 | This is some valid content that should pass. If it does not pass we will know something is wrong. 6 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/markdown/valid_content.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Valid Content" 3 | author: "My Author" 4 | date: "2020-10-16" 5 | --- 6 | This is some valid content that should pass. If it does not pass we will know something is wrong. 7 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | end_of_line = lf 6 | indent_size = 4 7 | indent_style = space 8 | insert_final_newline = true 9 | trim_trailing_whitespace = true 10 | 11 | [*.py] 12 | max_line_length = 88 13 | 14 | [*.yml] 15 | indent_size = 2 16 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/defaults_files/no_input_format.yaml: -------------------------------------------------------------------------------- 1 | # no_input_format.yaml 2 | # 3 | # A defaults file that specifies no input format which is invalid. 4 | # 5 | writer: html5 6 | 7 | html-math-method: 8 | method: mathjax 9 | url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" 10 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/defaults_files/no_output_format.yaml: -------------------------------------------------------------------------------- 1 | # no_output_format.yaml 2 | # 3 | # A defaults file that specifies no output format which is invalid. 4 | # 5 | reader: markdown 6 | 7 | html-math-method: 8 | method: mathjax 9 | url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" 10 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/defaults_files/valid_second_defaults_file_wrap_none.yaml: -------------------------------------------------------------------------------- 1 | # valid_second_defaults_file_wrap_none.yaml 2 | # 3 | # A defaults that just specifies one attribute to be used as a dummy second 4 | # defaults file to test invalid attributes in the first defaults file 5 | # specified. 6 | # 7 | wrap: none 8 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/defaults_files/valid_defaults.yaml: -------------------------------------------------------------------------------- 1 | # valid_defaults.yaaml 2 | # 3 | # A defaults file that is valid. 4 | # 5 | reader: markdown+smart 6 | writer: html5 7 | wrap: none 8 | 9 | html-math-method: 10 | method: mathjax 11 | url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" 12 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/defaults_files/valid_defaults_file_1.yaml: -------------------------------------------------------------------------------- 1 | # valid_defaults_file_1.yaml 2 | # 3 | # A defaults file that specifies reader and writer formats and is 4 | # used together with valid_defaults_file_2.yaml to test the valid 5 | # multiple defaults file case. 6 | # 7 | reader: markdown+smart 8 | writer: html5 9 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/defaults_files/valid_defaults_file_2.yaml: -------------------------------------------------------------------------------- 1 | # valid_defaults_file_2.yaml 2 | # 3 | # A defaults file that sets the table-of-contents to true and wrap to none and 4 | # is used together with the valid_defaults_file_1.yaml to test the valid 5 | # multiple defaults file case. 6 | # 7 | table-of-contents: true 8 | wrap: none 9 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/templates/pandoc-reader-default.html: -------------------------------------------------------------------------------- 1 | $meta-json$ 2 | 3 | $if(toc)$ 4 | 10 | $endif$ 11 | $body$ 12 | $for(include-after)$ 13 | $include-after$ 14 | $endfor$ 15 | 16 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/defaults_files/standalone_true.yaml: -------------------------------------------------------------------------------- 1 | # standalone_true.yaml 2 | # 3 | # A defaults file that sets standalone to true which is invalid. 4 | # 5 | reader: markdown+smart 6 | writer: html5 7 | standalone: true 8 | 9 | html-math-method: 10 | method: mathjax 11 | url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" 12 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/defaults_files/invalid_from_input_format.yaml: -------------------------------------------------------------------------------- 1 | # invalid_from_input_format.yaml 2 | # 3 | # A defaults file that sets the from attribute to an invalid input format. 4 | # 5 | from: myinputformat 6 | to: html5 7 | 8 | html-math-method: 9 | method: mathjax 10 | url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" 11 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/defaults_files/invalid_to_output_format.yaml: -------------------------------------------------------------------------------- 1 | # invalid_to_output_format.yaml 2 | # 3 | # A defaults file that sets the to attribute to an invalid output format. 4 | # 5 | from: markdown+smart 6 | to: myoutputformat 7 | 8 | html-math-method: 9 | method: mathjax 10 | url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" 11 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/defaults_files/valid_second_defaults_file_writer.yaml: -------------------------------------------------------------------------------- 1 | # valid_second_defaults_file_writer.yaml 2 | # 3 | # A defaults file that sets the to attribute and is used with the 4 | # valid_first_defaults_file_writer.yaml that sets the writer attribute. Taken together 5 | # the defaults are invalid as you cannot specify writer and to. 6 | # 7 | to: html5 8 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/defaults_files/invalid_reader_input_format.yaml: -------------------------------------------------------------------------------- 1 | # invalid_reader_input_format.yaml 2 | # 3 | # A defaults file that sets the reader attribute to an invalid input format. 4 | # 5 | reader: myinputformat 6 | writer: html5 7 | 8 | html-math-method: 9 | method: mathjax 10 | url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" 11 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/defaults_files/self_contained_true.yaml: -------------------------------------------------------------------------------- 1 | # self_contained_true.yaml 2 | # 3 | # A defaults file that sets self-contained to true which is invalid. 4 | # 5 | reader: markdown+smart 6 | writer: html5 7 | self-contained: true 8 | 9 | html-math-method: 10 | method: mathjax 11 | url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" 12 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/defaults_files/to_writer_both_given.yaml: -------------------------------------------------------------------------------- 1 | # to_writer_both_given.yaml 2 | # 3 | # A defaults file that sets both the to and writer attributes which is invalid. 4 | # 5 | reader: markdown+smart 6 | to: html5 7 | writer: html5 8 | 9 | html-math-method: 10 | method: mathjax 11 | url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" 12 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/defaults_files/valid_second_defaults_file_reader.yaml: -------------------------------------------------------------------------------- 1 | # valid_second_defaults_file_reader.yaml 2 | # 3 | # A defaults file that sets a from format and is used together with 4 | # valid_first_defaults_file_reader.yaml that the sets reader attribute. 5 | # Taken together the defaults are invalid as you cannot set from and reader. 6 | # 7 | from: markdown+smart 8 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/defaults_files/invalid_writer_output_format.yaml: -------------------------------------------------------------------------------- 1 | # invalid_writer_output_format.yaml 2 | # 3 | # A defaults file that sets the writer attribute to an invalid output format. 4 | # 5 | reader: markdown+smart 6 | writer: myoutputformat 7 | 8 | html-math-method: 9 | method: mathjax 10 | url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" 11 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/defaults_files/from_reader_both_given.yaml: -------------------------------------------------------------------------------- 1 | # from_reader_both_given.yaml 2 | # 3 | # A defaults file that sets both the reader and from attributes which is invalid. 4 | # 5 | reader: markdown+smart 6 | from: markdown+smart 7 | writer: html5 8 | 9 | html-math-method: 10 | method: mathjax 11 | url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" 12 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/defaults_files/valid_defaults_with_toc.yaml: -------------------------------------------------------------------------------- 1 | # valid_defaults_with_toc.yaml 2 | # 3 | # A defaults file that is valid and sets table-of-contents to true. 4 | # 5 | reader: markdown+smart 6 | writer: html5 7 | wrap: none 8 | 9 | table-of-contents: true 10 | 11 | html-math-method: 12 | method: mathjax 13 | url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" 14 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/defaults_files/valid_first_defaults_file_writer.yaml: -------------------------------------------------------------------------------- 1 | # valid_first_defaults_file_writer.yaml 2 | # 3 | # A defaults file that sets the writer attribute and is used with the 4 | # valid_second_defaults_file_writer.yaml that sets the to attribute. Taken together 5 | # the defaults are invalid as you cannot specify writer and to. 6 | # 7 | reader: markdown+smart 8 | writer: html5 9 | wrap: none 10 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/markdown/valid_content_with_raw_paths.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Valid Content with Fictitious Raw Paths" 3 | author: "My Author" 4 | date: "2020-10-16" 5 | --- 6 | This is some valid content that should pass. If it does not pass we will know something is wrong. 7 | 8 | Our fictitious internal files are available [at]({filename}/path/to/file): 9 | 10 | Our fictitious static files are available [at]({static}/path/to/file): 11 | 12 | Our fictitious attachments are available [at]({attach}path/to/file): 13 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/defaults_files/valid_first_defaults_file_reader.yaml: -------------------------------------------------------------------------------- 1 | # valid_first_defaults_file_reader.yaml 2 | # 3 | # A defaults file that sets a reader and writer formats and is used together with 4 | # valid_second_defaults_file_reader.yaml that the sets from attribute. 5 | # Taken together the defaults are invalid as you cannot set from and reader. 6 | # 7 | reader: markdown+smart 8 | writer: html5 9 | 10 | html-math-method: 11 | method: mathjax 12 | url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" 13 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/defaults_files/valid_first_defaults_file_dup_keys.yaml: -------------------------------------------------------------------------------- 1 | # valid_first_defaults_file_dup_keys.yaml 2 | # 3 | # A defaults file that is valid and is used together with 4 | # valid_second_defaults_file_dup_keys.yaml. Taken together the defaults are 5 | # invalid as they specify the same keys and can cause unexpected output 6 | # if not caught 7 | # 8 | reader: markdown+smart 9 | writer: html5 10 | wrap: none 11 | 12 | html-math-method: 13 | method: mathjax 14 | url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" 15 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/defaults_files/valid_defaults_with_citeproc_filter.yaml: -------------------------------------------------------------------------------- 1 | # valid_defaults_with_citeproc_filter.yaml 2 | # 3 | # A defaults file that is valid and sets citeproc as a filter. 4 | # 5 | reader: markdown+smart 6 | writer: html5 7 | wrap: none 8 | 9 | html-math-method: 10 | method: mathjax 11 | url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" 12 | 13 | csl: "https://www.zotero.org/styles/ieee-with-url" 14 | 15 | metadata: 16 | link-citations: false 17 | reference-section-title: References 18 | 19 | filters: 20 | - citeproc 21 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/defaults_files/valid_second_defaults_file_dup_keys.yaml: -------------------------------------------------------------------------------- 1 | # valid_second_defaults_file_dup_keys.yaml 2 | # 3 | # A defaults file that is valid and is used together with 4 | # valid_first_defaults_file_dup_keys.yaml. Taken together the defaults are 5 | # invalid as they specify the same keys and can cause unexpected output 6 | # if not caught 7 | # 8 | reader: markdown+smart 9 | writer: html5 10 | wrap: none 11 | 12 | table-of-contents: true 13 | 14 | html-math-method: 15 | method: mathjax 16 | url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" 17 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Contributing 2 | ============ 3 | 4 | Contributions are welcome and much appreciated. Every little bit helps. You can contribute by improving the documentation, adding missing features, and fixing bugs. You can also help out by reviewing and commenting on [existing issues][]. 5 | 6 | To start contributing to this plugin, review the [Contributing to Pelican][] documentation, beginning with the **Contributing Code** section. 7 | 8 | [existing issues]: https://github.com/pelican-plugins/pandoc-reader/issues 9 | [Contributing to Pelican]: https://docs.getpelican.com/en/latest/contribute.html 10 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/defaults_files/valid_defaults_with_toc_and_citations.yaml: -------------------------------------------------------------------------------- 1 | # valid_defaults_with_toc_and_citations.yaml 2 | # 3 | # A defaults file that is valid and sets table-of-contents to true and enables citations. 4 | # 5 | reader: markdown+smart 6 | writer: html5 7 | wrap: none 8 | 9 | table-of-contents: true 10 | 11 | html-math-method: 12 | method: mathjax 13 | url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" 14 | 15 | citeproc: true 16 | csl: "https://www.zotero.org/styles/ieee-with-url" 17 | 18 | metadata: 19 | link-citations: false 20 | reference-section-title: References 21 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/markdown/valid_content_with_toc.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Valid Content with Table of Contents" 3 | author: "My Author" 4 | date: "2020-10-16" 5 | --- 6 | This is some valid content that should pass. If it does not pass we will know something is wrong. 7 | 8 | ## First Heading 9 | 10 | This should be the first heading in my table of contents. 11 | 12 | ## Second Heading 13 | 14 | This should be the second heading in my table of contents. 15 | 16 | ### First Subheading 17 | 18 | This is a subsection that should be shown as such in the table of contents. 19 | 20 | ### Second Subheading 21 | 22 | This is another subsection that should be shown as such in the table of contents. 23 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/markdown/reading_time_content.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Reading time Content" 3 | author: "My Author" 4 | date: "2020-10-16" 5 | --- 6 | 7 | ## What is Lorem Ipsum 8 | 9 | Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum. Lorep Ipsum paragragh should be 100 words. 10 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | ci: 3 | autoupdate_schedule: quarterly 4 | 5 | # See https://pre-commit.com/hooks.html for info on hooks 6 | repos: 7 | - repo: https://github.com/pre-commit/pre-commit-hooks 8 | rev: v5.0.0 9 | hooks: 10 | - id: check-added-large-files 11 | - id: check-ast 12 | - id: check-case-conflict 13 | - id: check-docstring-first 14 | - id: check-merge-conflict 15 | - id: check-toml 16 | - id: check-yaml 17 | - id: debug-statements 18 | - id: detect-private-key 19 | - id: end-of-file-fixer 20 | - id: forbid-new-submodules 21 | - id: trailing-whitespace 22 | 23 | - repo: https://github.com/astral-sh/ruff-pre-commit 24 | rev: v0.12.2 25 | hooks: 26 | - id: ruff-check 27 | - id: ruff-format 28 | args: ["--check"] 29 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/markdown/valid_content_with_citation.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Valid Content With Citation 3 | author: My Author 4 | date: 2020-10-16 5 | summary: "But this foundational principle of science has now been called into question by [String Theory](https://www.britannica.com/science/string-theory)." 6 | --- 7 | ## String Theory 8 | 9 | But this foundational principle of science has now been called into question by [String Theory](https://www.britannica.com/science/string-theory), which is a relative newcomer to theoretical physics, but one that has captured the common imagination, judging by the popular explanations that abound on the Web [@mann2019; @wood2019; @jones2020]. And whether string theory is or is not science, Popper notwithstanding, is an issue that is still up for debate [@siegel2015; @castelvecchi2016; @alves2017; @francis2019]. 10 | -------------------------------------------------------------------------------- /.cruft.json: -------------------------------------------------------------------------------- 1 | { 2 | "template": "https://github.com/getpelican/cookiecutter-pelican-plugin", 3 | "commit": "8966a5d5f055ebfdeb0cf88758436eaf4cc62cd5", 4 | "checkout": null, 5 | "context": { 6 | "cookiecutter": { 7 | "plugin_name": "Pandoc Reader", 8 | "repo_name": "pandoc-reader", 9 | "package_name": "pandoc_reader", 10 | "distribution_name": "pelican-pandoc-reader", 11 | "version": "3.0.0", 12 | "description": "Pelican plugin for converting Pandoc's Markdown variant to HTML.", 13 | "authors": "{name = \"Nandakumar Chandrasekhar\", email = \"navanitachora@gmail.com\"}, {name = \"Justin Mayer\", email = \"entroP@gmail.com\"}", 14 | "keywords": "\"pelican\", \"plugin\", \"markdown\", \"pandoc\"", 15 | "readme": "README.md", 16 | "contributing": "CONTRIBUTING.md", 17 | "license": "GNU Affero General Public License v3|AGPL-3.0", 18 | "repo_url": "https://github.com/pelican-plugins/pandoc-reader", 19 | "dev_status": "5 - Production/Stable", 20 | "tests_exist": true, 21 | "python_version": "~=3.9", 22 | "pelican_version": ">=4.5", 23 | "_template": "https://github.com/getpelican/cookiecutter-pelican-plugin", 24 | "_commit": "8966a5d5f055ebfdeb0cf88758436eaf4cc62cd5" 25 | } 26 | }, 27 | "directory": null 28 | } 29 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/filters/wordcount.lua: -------------------------------------------------------------------------------- 1 | -- counts words in a document 2 | 3 | words = 0 4 | characters = 0 5 | characters_and_spaces = 0 6 | process_anyway = false 7 | 8 | wordcount = { 9 | Str = function(el) 10 | -- we don't count a word if it's entirely punctuation: 11 | if el.text:match("%P") then 12 | words = words + 1 13 | end 14 | characters = characters + utf8.len(el.text) 15 | characters_and_spaces = characters_and_spaces + utf8.len(el.text) 16 | end, 17 | 18 | Space = function(el) 19 | characters_and_spaces = characters_and_spaces + 1 20 | end, 21 | 22 | Code = function(el) 23 | _,n = el.text:gsub("%S+","") 24 | words = words + n 25 | text_nospace = el.text:gsub("%s", "") 26 | characters = characters + utf8.len(text_nospace) 27 | characters_and_spaces = characters_and_spaces + utf8.len(el.text) 28 | end, 29 | 30 | CodeBlock = function(el) 31 | _,n = el.text:gsub("%S+","") 32 | words = words + n 33 | text_nospace = el.text:gsub("%s", "") 34 | characters = characters + utf8.len(text_nospace) 35 | characters_and_spaces = characters_and_spaces + utf8.len(el.text) 36 | end 37 | } 38 | 39 | -- check if the `wordcount` variable is set to `process-anyway` 40 | function Meta(meta) 41 | if meta.wordcount and (meta.wordcount=="process-anyway" 42 | or meta.wordcount=="process" or meta.wordcount=="convert") then 43 | process_anyway = true 44 | end 45 | end 46 | 47 | function Pandoc(el) 48 | -- skip metadata, just count body: 49 | pandoc.walk_block(pandoc.Div(el.blocks), wordcount) 50 | print(words .. " words in body") 51 | print(characters .. " characters in body") 52 | print(characters_and_spaces .. " characters in body (including spaces)") 53 | if not process_anyway then 54 | os.exit(0) 55 | end 56 | end 57 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/test_valid_multiple_default_files.py: -------------------------------------------------------------------------------- 1 | """Test using multiple valid defaults files with the pandoc-reader plugin.""" 2 | 3 | import os 4 | import unittest 5 | 6 | from pelican.plugins.pandoc_reader import PandocReader 7 | from pelican.plugins.pandoc_reader.test.html.expected_html import ( 8 | HTML_TOC, 9 | HTML_WITH_HEADINGS, 10 | ) 11 | from pelican.tests.support import get_settings 12 | 13 | DIR_PATH = os.path.dirname(__file__) 14 | TEST_CONTENT_PATH = os.path.abspath(os.path.join(DIR_PATH, "markdown")) 15 | TEST_DEFAULTS_FILES_PATH = os.path.abspath(os.path.join(DIR_PATH, "defaults_files")) 16 | 17 | 18 | class TestValidCasesWithMultipleDefaultsFiles(unittest.TestCase): 19 | def test_multiple_defaults_files_valid_case(self): 20 | """Check that we get the expected output with multiple valid default files.""" 21 | pandoc_defaults_files = [ 22 | os.path.join(TEST_DEFAULTS_FILES_PATH, "valid_defaults_file_1.yaml"), 23 | os.path.join(TEST_DEFAULTS_FILES_PATH, "valid_defaults_file_2.yaml"), 24 | ] 25 | 26 | settings = get_settings(PANDOC_DEFAULTS_FILES=pandoc_defaults_files) 27 | 28 | pandoc_reader = PandocReader(settings) 29 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content_with_toc.md") 30 | output, metadata = pandoc_reader.read(source_path) 31 | self.maxDiff = None # pylint: disable=invalid-name 32 | 33 | self.assertEqual(HTML_WITH_HEADINGS, output) 34 | self.assertEqual("Valid Content with Table of Contents", str(metadata["title"])) 35 | self.assertEqual("My Author", str(metadata["author"])) 36 | self.assertEqual("2020-10-16 00:00:00", str(metadata["date"])) 37 | self.assertEqual(HTML_TOC, str(metadata["toc"])) 38 | 39 | 40 | if __name__ == "__main__": 41 | unittest.main() 42 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | CHANGELOG 2 | ========= 3 | 4 | 3.0.0 - 2024-12-01 5 | ------------------ 6 | 7 | * Dropping Support for Python 3.9 8 | * Updated Markdown Python package 9 | 10 | Contributed by [nandac](https://github.com/nandac) via [PR #42](https://github.com/pelican-plugins/pandoc-reader/pull/42/) 11 | 12 | 13 | 2.1.0 - 2023-11-23 14 | ------------------ 15 | 16 | * Using [wordcount Lua Filter](https://github.com/pandoc/lua-filters/tree/master/wordcount) instead of the [markdown-word-count](https://github.com/gandreadis/markdown-word-count) Python package to calculate word count 17 | 18 | Contributed by [nandac](https://github.com/nandac) via [PR #35](https://github.com/pelican-plugins/pandoc-reader/pull/35/) 19 | 20 | 21 | 2.0.0 - 2023-08-12 22 | ------------------ 23 | 24 | * Dropping support for Python 3.7 25 | * Removed `PANDOC_DEFAULT_FILES` in favour of `PANDOC_DEFAULTS_FILES` 26 | * Upgrade PyYAML and ruamel.yaml to the latest version available 27 | 28 | 1.2.0 - 2022-10-29 29 | ------------------ 30 | 31 | * Deprecating `PANDOC_DEFAULT_FILES` setting in favour of `PANDOC_DEFAULTS_FILES` 32 | * Handle defaults files in a manner consistent with Pandoc's handling of these files 33 | 34 | Contributed by [nandac](https://github.com/nandac) via [PR #25](https://github.com/pelican-plugins/pandoc-reader/pull/25/) 35 | 36 | 37 | 1.1.0 - 2021-02-16 38 | ------------------ 39 | 40 | Add PANDOC_EXECUTABLE_PATH setting to customize `pandoc` executable location 41 | 42 | [nandac](https://github.com/nandac) [PR #19](https://github.com/pelican-plugins/pandoc-reader/pull/19/) 43 | 44 | 1.0.1 - 2021-02-05 45 | ------------------ 46 | 47 | * Raise exception for metadata with leading or trailing whitespace 48 | * Remove citations extension check since Pandoc now enables it by default 49 | 50 | 1.0.0 - 2020-12-04 51 | ------------------ 52 | 53 | * Convert to namespace plugin 54 | * Add support for citations, table of contents generation, and reading time calculation 55 | * Add support for Pandoc default files 56 | * Add support for specifying `citeproc` as a filter 57 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/markdown/valid_content_with_citation.bib: -------------------------------------------------------------------------------- 1 | @online{castelvecchi2016, 2 | url = {https://www.nature.com/news/feuding-physicists-turn-to-philosophy-for-help-1.19076}, 3 | title = {{Feuding physicists turn to philosophy for help}}, 4 | titleaddon = {String theory is at the heart of a debate over the integrity of the scientific method itself}, 5 | author = {Davide Castelvecchi}, 6 | date = {2016-01-05}, 7 | urldate = {2020-11-12} 8 | } 9 | 10 | @online{francis2019, 11 | url = {https://www.scientificamerican.com/article/is-string-theory-science/}, 12 | title = {{Falsifiability and physics}}, 13 | titleaddon = {Can a theory that isn’t completely testable still be useful to physics?}, 14 | author = {Matthew R Francis}, 15 | date = {2019-04-23}, 16 | urldate = {2020-11-12} 17 | } 18 | 19 | @online{alves2017, 20 | url = {https://metafact.io/factchecks/30-is-string-theory-falsifiable}, 21 | title = {{Is String theory falsifiable?}}, 22 | titleaddon = {Can a theory that isn’t completely testable still be useful to physics?}, 23 | author = {Rafael Alves Batista and Joel Primack}, 24 | date = {circa 2017}, 25 | urldate = {2020-11-12} 26 | } 27 | 28 | @online{siegel2015, 29 | url = {https://www.forbes.com/sites/startswithabang/2015/12/23/why-string-theory-is-not-science/}, 30 | title = {{Why String Theory Is Not A Scientific Theory}}, 31 | author = {Ethan Siegel}, 32 | date = {2015-12-23}, 33 | urldate = {2020-11-12} 34 | } 35 | 36 | @online{mann2019, 37 | url = {https://www.livescience.com/65033-what-is-string-theory.html}, 38 | title = {{What Is String Theory?}}, 39 | author = {Adam Mann}, 40 | date = {2019-03-20}, 41 | urldate = {2020-11-12} 42 | } 43 | 44 | @online{wood2019, 45 | url = {https://www.space.com/17594-string-theory.html}, 46 | title = {{What Is String Theory?}}, 47 | titleaddon = {Reference Article: A simplified explanation and brief history of string theory}, 48 | author = {Charlie Wood}, 49 | date = {2019-07-11}, 50 | urldate = {2020-11-12} 51 | } 52 | 53 | @online{jones2020, 54 | url = {https://www.thoughtco.com/what-is-string-theory-2699363}, 55 | title = {{The Basics of String Theory}}, 56 | author = {Andrew Zimmerman Jones}, 57 | date = {2019-03-02}, 58 | urldate = {2020-11-12} 59 | } 60 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/test_invalid_args.py: -------------------------------------------------------------------------------- 1 | """Test using invalid arguments and extensions with the pandoc-reader plugin.""" 2 | 3 | import os 4 | import unittest 5 | 6 | from pelican.plugins.pandoc_reader import PandocReader 7 | from pelican.tests.support import get_settings 8 | 9 | DIR_PATH = os.path.dirname(__file__) 10 | TEST_CONTENT_PATH = os.path.abspath(os.path.join(DIR_PATH, "markdown")) 11 | 12 | # These settings will be set in pelicanconf.py by plugin users. 13 | # Appending --wrap=None so that rendered HTML5 does not have new lines (\n) 14 | # which causes tests to fail. 15 | # See https://pandoc.org/MANUAL.html#general-writer-options 16 | PANDOC_ARGS = ["--mathjax", "--wrap=none"] 17 | PANDOC_EXTENSIONS = ["+smart"] 18 | 19 | 20 | class TestInvalidCasesWithArguments(unittest.TestCase): 21 | """Invalid test cases using Pandoc arguments and extensions.""" 22 | 23 | def test_invalid_standalone_argument(self): 24 | """Check that specifying --standalone raises an exception.""" 25 | pandoc_arguments = ["--standalone"] 26 | settings = get_settings( 27 | PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, PANDOC_ARGS=pandoc_arguments 28 | ) 29 | 30 | pandoc_reader = PandocReader(settings) 31 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") 32 | 33 | with self.assertRaises(ValueError) as context_manager: 34 | pandoc_reader.read(source_path) 35 | 36 | message = str(context_manager.exception) 37 | self.assertEqual("Argument --standalone is not supported.", message) 38 | 39 | def test_invalid_self_contained_argument(self): 40 | """Check that specifying --self-contained raises an exception.""" 41 | pandoc_arguments = ["--self-contained"] 42 | settings = get_settings( 43 | PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, PANDOC_ARGS=pandoc_arguments 44 | ) 45 | 46 | pandoc_reader = PandocReader(settings) 47 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") 48 | 49 | with self.assertRaises(ValueError) as context_manager: 50 | pandoc_reader.read(source_path) 51 | 52 | message = str(context_manager.exception) 53 | self.assertEqual("Argument --self-contained is not supported.", message) 54 | 55 | 56 | if __name__ == "__main__": 57 | unittest.main() 58 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/test_valid_defaults_files.py: -------------------------------------------------------------------------------- 1 | """Test using valid default files with the pandoc-reader plugin.""" 2 | 3 | import os 4 | import unittest 5 | 6 | from pelican.plugins.pandoc_reader import PandocReader 7 | from pelican.plugins.pandoc_reader.test.html.expected_html import ( 8 | HTML_MATHJAX, 9 | HTML_TOC, 10 | HTML_VALID_TEXT, 11 | HTML_WITH_HEADINGS, 12 | ) 13 | from pelican.tests.support import get_settings 14 | 15 | DIR_PATH = os.path.dirname(__file__) 16 | TEST_CONTENT_PATH = os.path.abspath(os.path.join(DIR_PATH, "markdown")) 17 | TEST_DEFAULTS_FILES_PATH = os.path.abspath(os.path.join(DIR_PATH, "defaults_files")) 18 | 19 | 20 | class TestValidCasesWithDefaultsFiles(unittest.TestCase): 21 | """Valid test cases using defaults files.""" 22 | 23 | def test_valid_file_with_valid_defaults(self): 24 | """Check if we get the expected output specifying valid defaults.""" 25 | pandoc_defaults_files = [ 26 | os.path.join(TEST_DEFAULTS_FILES_PATH, "valid_defaults.yaml") 27 | ] 28 | 29 | settings = get_settings(PANDOC_DEFAULTS_FILES=pandoc_defaults_files) 30 | 31 | pandoc_reader = PandocReader(settings) 32 | 33 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") 34 | output, metadata = pandoc_reader.read(source_path) 35 | 36 | self.assertEqual(HTML_VALID_TEXT, output) 37 | self.assertEqual("Valid Content", str(metadata["title"])) 38 | self.assertEqual("My Author", str(metadata["author"])) 39 | self.assertEqual("2020-10-16 00:00:00", str(metadata["date"])) 40 | 41 | def test_mathjax_with_valid_defaults(self): 42 | """Check if mathematics is rendered correctly with valid defaults.""" 43 | pandoc_defaults_files = [ 44 | os.path.join(TEST_DEFAULTS_FILES_PATH, "valid_defaults.yaml") 45 | ] 46 | 47 | settings = get_settings(PANDOC_DEFAULTS_FILES=pandoc_defaults_files) 48 | 49 | pandoc_reader = PandocReader(settings) 50 | 51 | source_path = os.path.join(TEST_CONTENT_PATH, "mathjax_content.md") 52 | output, metadata = pandoc_reader.read(source_path) 53 | 54 | self.assertEqual(HTML_MATHJAX, output) 55 | self.assertEqual("MathJax Content", str(metadata["title"])) 56 | self.assertEqual("My Author", str(metadata["author"])) 57 | self.assertEqual("2020-10-16 00:00:00", str(metadata["date"])) 58 | 59 | def test_toc_with_valid_defaults(self): 60 | """Check if output and table of contents are as expected with valid defaults.""" 61 | pandoc_defaults_files = [ 62 | os.path.join(TEST_DEFAULTS_FILES_PATH, "valid_defaults_with_toc.yaml") 63 | ] 64 | 65 | settings = get_settings(PANDOC_DEFAULTS_FILES=pandoc_defaults_files) 66 | pandoc_reader = PandocReader(settings) 67 | 68 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content_with_toc.md") 69 | output, metadata = pandoc_reader.read(source_path) 70 | self.maxDiff = None # pylint: disable=invalid-name 71 | 72 | self.assertEqual(HTML_WITH_HEADINGS, output) 73 | self.assertEqual("Valid Content with Table of Contents", str(metadata["title"])) 74 | self.assertEqual("My Author", str(metadata["author"])) 75 | self.assertEqual("2020-10-16 00:00:00", str(metadata["date"])) 76 | self.assertEqual(HTML_TOC, str(metadata["toc"])) 77 | 78 | 79 | if __name__ == "__main__": 80 | unittest.main() 81 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: [push, pull_request] 4 | 5 | env: 6 | PYTEST_ADDOPTS: "--color=yes" 7 | 8 | permissions: 9 | contents: read 10 | 11 | jobs: 12 | test: 13 | name: Test - Python ${{ matrix.python-version }} 14 | runs-on: ubuntu-latest 15 | strategy: 16 | matrix: 17 | python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] 18 | 19 | steps: 20 | - uses: actions/checkout@v4 21 | with: 22 | persist-credentials: false 23 | 24 | - name: Set up Python ${{ matrix.python-version }} & PDM 25 | uses: pdm-project/setup-pdm@v4 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | cache: true 29 | cache-dependency-path: ./pyproject.toml 30 | 31 | - name: Install dependencies 32 | run: pdm install 33 | 34 | - name: Install supported Pandoc 3.5 35 | run: | 36 | wget https://github.com/jgm/pandoc/releases/download/3.5/pandoc-3.5-1-amd64.deb 37 | sudo dpkg -i pandoc-3.5-1-amd64.deb 38 | 39 | - name: Install unsupported Pandoc 2.10 40 | run: | 41 | wget https://github.com/jgm/pandoc/releases/download/2.10.1/pandoc-2.10.1-linux-amd64.tar.gz 42 | sudo mkdir 2.10 43 | sudo tar xvzf pandoc-2.10.1-linux-amd64.tar.gz --strip-components 1 -C 2.10 44 | 45 | - name: Install unsupported Pandoc 1.19 46 | run: | 47 | wget https://github.com/jgm/pandoc/releases/download/1.19.2.1/pandoc-1.19.2.1-1-amd64.deb 48 | sudo mkdir 1.19 49 | sudo ar p pandoc-1.19.2.1-1-amd64.deb data.tar.gz | sudo tar xvz --strip-components 2 -C 1.19 50 | 51 | - name: Run tests 52 | run: pdm run invoke tests 53 | 54 | lint: 55 | name: Lint 56 | runs-on: ubuntu-latest 57 | 58 | steps: 59 | - uses: actions/checkout@v4 60 | with: 61 | persist-credentials: false 62 | 63 | - name: Validate links in Markdown files 64 | uses: JustinBeckwith/linkinator-action@v1 65 | with: 66 | retry: true 67 | 68 | - name: Set up Python & PDM 69 | uses: pdm-project/setup-pdm@v4 70 | with: 71 | python-version: "3.10" 72 | 73 | - name: Install dependencies 74 | run: pdm install 75 | 76 | - name: Run linters 77 | run: pdm run invoke lint --diff 78 | 79 | deploy: 80 | name: Deploy 81 | environment: Deployment 82 | needs: [test, lint] 83 | runs-on: ubuntu-latest 84 | if: github.ref=='refs/heads/main' && github.event_name!='pull_request' 85 | 86 | permissions: 87 | contents: write 88 | id-token: write 89 | 90 | steps: 91 | - uses: actions/checkout@v4 92 | 93 | - name: Set up Python 94 | uses: actions/setup-python@v5 95 | with: 96 | python-version: "3.10" 97 | 98 | - name: Check release 99 | id: check_release 100 | run: | 101 | python -m pip install autopub[github] 102 | autopub check 103 | 104 | - name: Publish 105 | if: ${{ steps.check_release.outputs.autopub_release=='true' }} 106 | env: 107 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 108 | run: | 109 | autopub prepare 110 | autopub commit 111 | autopub build 112 | autopub githubrelease 113 | 114 | - name: Upload package to PyPI 115 | if: ${{ steps.check_release.outputs.autopub_release=='true' }} 116 | uses: pypa/gh-action-pypi-publish@release/v1 117 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/test_pandoc_availability.py: -------------------------------------------------------------------------------- 1 | """Test if a supported version of the pandoc executable is available.""" 2 | 3 | import os 4 | import shutil 5 | import unittest 6 | 7 | from pelican.plugins.pandoc_reader import PandocReader 8 | from pelican.tests.support import get_settings 9 | 10 | DIR_PATH = os.path.dirname(__file__) 11 | TEST_CONTENT_PATH = os.path.abspath(os.path.join(DIR_PATH, "markdown")) 12 | 13 | # These settings will be set in pelicanconf.py by plugin users. 14 | # Appending --wrap=None so that rendered HTML5 does not have new lines (\n) 15 | # which causes tests to fail. 16 | # See https://pandoc.org/MANUAL.html#general-writer-options 17 | PANDOC_ARGS = ["--mathjax", "--wrap=none"] 18 | PANDOC_EXTENSIONS = ["+smart"] 19 | 20 | 21 | class TestPandocAvailability(unittest.TestCase): 22 | """Test if a support version of the pandoc executable is available.""" 23 | 24 | def test_pandoc_availability_one(self): 25 | """Check if pandoc executable is available.""" 26 | settings = get_settings( 27 | PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, 28 | PANDOC_ARGS=PANDOC_ARGS, 29 | ) 30 | 31 | pandoc_reader = PandocReader(settings) 32 | source_path = os.path.join(TEST_CONTENT_PATH, "empty.md") 33 | 34 | if not shutil.which("pandoc"): 35 | # Case where pandoc is not available 36 | with self.assertRaises(Exception) as context_manager: 37 | pandoc_reader.read(source_path) 38 | 39 | message = str(context_manager.exception) 40 | self.assertEqual("Could not find Pandoc. Please install.", message) 41 | else: 42 | self.assertTrue(True) 43 | 44 | def test_pandoc_availability_two(self): 45 | """Check if pandoc executable is available at the given path.""" 46 | settings = get_settings( 47 | PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, 48 | PANDOC_ARGS=PANDOC_ARGS, 49 | PANDOC_EXECUTABLE_PATH="2.11/bin/pandoc", 50 | ) 51 | 52 | pandoc_reader = PandocReader(settings) 53 | source_path = os.path.join(TEST_CONTENT_PATH, "empty.md") 54 | 55 | with self.assertRaises(Exception) as context_manager: 56 | pandoc_reader.read(source_path) 57 | 58 | message = str(context_manager.exception) 59 | self.assertEqual("Could not find Pandoc. Please install.", message) 60 | 61 | def test_pandoc_unsupported_major_version(self): 62 | """Check if the installed pandoc has a supported major version.""" 63 | settings = get_settings( 64 | PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, 65 | PANDOC_ARGS=PANDOC_ARGS, 66 | PANDOC_EXECUTABLE_PATH="1.19/bin/pandoc", 67 | ) 68 | 69 | pandoc_reader = PandocReader(settings) 70 | source_path = os.path.join(TEST_CONTENT_PATH, "empty.md") 71 | 72 | with self.assertRaises(Exception) as context_manager: 73 | pandoc_reader.read(source_path) 74 | 75 | message = str(context_manager.exception) 76 | self.assertEqual("Pandoc version must be 2.11 or higher.", message) 77 | 78 | def test_pandoc_unsupported_minor_version(self): 79 | """Check if the installed pandoc has a supported minor version.""" 80 | settings = get_settings( 81 | PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, 82 | PANDOC_ARGS=PANDOC_ARGS, 83 | PANDOC_EXECUTABLE_PATH="2.10/bin/pandoc", 84 | ) 85 | 86 | pandoc_reader = PandocReader(settings) 87 | source_path = os.path.join(TEST_CONTENT_PATH, "empty.md") 88 | 89 | with self.assertRaises(Exception) as context_manager: 90 | pandoc_reader.read(source_path) 91 | 92 | message = str(context_manager.exception) 93 | self.assertEqual("Pandoc version must be 2.11 or higher.", message) 94 | 95 | 96 | if __name__ == "__main__": 97 | unittest.main() 98 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/test_specific_outputs.py: -------------------------------------------------------------------------------- 1 | """Test reading time and summary output from the pandoc-reader plugin.""" 2 | 3 | import os 4 | import unittest 5 | 6 | from pelican.plugins.pandoc_reader import PandocReader 7 | from pelican.tests.support import get_settings 8 | 9 | DIR_PATH = os.path.dirname(__file__) 10 | TEST_CONTENT_PATH = os.path.abspath(os.path.join(DIR_PATH, "markdown")) 11 | 12 | # These settings will be set in pelicanconf.py by plugin users. 13 | # Appending --wrap=None so that rendered HTML5 does not have new lines (\n) 14 | # which causes tests to fail. 15 | # See https://pandoc.org/MANUAL.html#general-writer-options 16 | PANDOC_ARGS = ["--mathjax", "--wrap=none"] 17 | PANDOC_EXTENSIONS = ["+smart"] 18 | CALCULATE_READING_TIME = True 19 | FORMATTED_FIELDS = ["summary"] 20 | 21 | 22 | class TestReadingTimeAndSummary(unittest.TestCase): 23 | """Test reading time and summary formatted fields.""" 24 | 25 | def test_default_wpm_reading_time(self): 26 | """Check if 200 words per minute gives us a reading time of 1 minute.""" 27 | settings = get_settings( 28 | PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, 29 | PANDOC_ARGS=PANDOC_ARGS, 30 | CALCULATE_READING_TIME=CALCULATE_READING_TIME, 31 | ) 32 | 33 | pandoc_reader = PandocReader(settings) 34 | source_path = os.path.join(TEST_CONTENT_PATH, "reading_time_content.md") 35 | _, metadata = pandoc_reader.read(source_path) 36 | 37 | self.assertEqual("1 minute", str(metadata["reading_time"])) 38 | 39 | def test_user_defined_wpm_reading_time(self): 40 | """Check if a user defined 100 words per minute gives us 2 minutes.""" 41 | settings = get_settings( 42 | PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, 43 | PANDOC_ARGS=PANDOC_ARGS, 44 | CALCULATE_READING_TIME=CALCULATE_READING_TIME, 45 | READING_SPEED=100, 46 | ) 47 | 48 | pandoc_reader = PandocReader(settings) 49 | source_path = os.path.join(TEST_CONTENT_PATH, "reading_time_content.md") 50 | _, metadata = pandoc_reader.read(source_path) 51 | 52 | self.assertEqual("2 minutes", str(metadata["reading_time"])) 53 | 54 | def test_invalid_user_defined_wpm(self): 55 | """Check if an exception is raised if words per minute is not a number.""" 56 | settings = get_settings( 57 | PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, 58 | PANDOC_ARGS=PANDOC_ARGS, 59 | CALCULATE_READING_TIME=CALCULATE_READING_TIME, 60 | READING_SPEED="my words per minute", 61 | ) 62 | 63 | pandoc_reader = PandocReader(settings) 64 | source_path = os.path.join(TEST_CONTENT_PATH, "reading_time_content.md") 65 | 66 | with self.assertRaises(ValueError) as context_manager: 67 | pandoc_reader.read(source_path) 68 | 69 | message = str(context_manager.exception) 70 | self.assertEqual("READING_SPEED setting must be a number.", message) 71 | 72 | def test_summary(self): 73 | """Check if summary output is valid.""" 74 | settings = get_settings( 75 | PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, 76 | PANDOC_ARGS=PANDOC_ARGS, 77 | FORMATTED_FIELDS=FORMATTED_FIELDS, 78 | ) 79 | 80 | pandoc_reader = PandocReader(settings) 81 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content_with_citation.md") 82 | _, metadata = pandoc_reader.read(source_path) 83 | 84 | self.assertEqual( 85 | ( 86 | "But this foundational principle of science has now been" 87 | " called into question by" 88 | ' ' 89 | "String Theory." 90 | ), 91 | str(metadata["summary"]), 92 | ) 93 | 94 | 95 | if __name__ == "__main__": 96 | unittest.main() 97 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "pelican-pandoc-reader" 3 | version = "3.0.0" 4 | description = "Pelican plugin for converting Pandoc's Markdown variant to HTML." 5 | authors = [{name = "Nandakumar Chandrasekhar", email = "navanitachora@gmail.com"}, {name = "Justin Mayer", email = "entroP@gmail.com"}] 6 | license = {text = "AGPL-3.0"} 7 | readme = "README.md" 8 | keywords = ["pelican", "plugin", "markdown", "pandoc"] 9 | classifiers = [ 10 | "Development Status :: 5 - Production/Stable", 11 | "Environment :: Console", 12 | "Framework :: Pelican", 13 | "Framework :: Pelican :: Plugins", 14 | "Intended Audience :: End Users/Desktop", 15 | "License :: OSI Approved :: GNU Affero General Public License v3", 16 | "Operating System :: OS Independent", 17 | "Programming Language :: Python :: 3", 18 | "Programming Language :: Python :: 3.9", 19 | "Programming Language :: Python :: 3.10", 20 | "Programming Language :: Python :: 3.11", 21 | "Programming Language :: Python :: 3.12", 22 | "Programming Language :: Python :: 3.13", 23 | "Topic :: Internet :: WWW/HTTP", 24 | "Topic :: Software Development :: Libraries :: Python Modules", 25 | ] 26 | requires-python = "~=3.9" 27 | dependencies = [ 28 | "pelican>=4.5", 29 | "beautifulsoup4>=4.9.3", 30 | "docutils>=0.21.2", 31 | "markdown>=3.6", 32 | "pyyaml>=6.0", 33 | "ruamel.yaml>=0.17.32", 34 | ] 35 | 36 | [project.urls] 37 | "Homepage" = "https://github.com/pelican-plugins/pandoc-reader" 38 | "Issue Tracker" = "https://github.com/pelican-plugins/pandoc-reader/issues" 39 | "Changelog" = "https://github.com/pelican-plugins/pandoc-reader/blob/main/CHANGELOG.md" 40 | "Funding" = "https://donate.getpelican.com/" 41 | 42 | [dependency-groups] 43 | lint = [ 44 | "invoke>=2.2", 45 | "ruff>=0.12.0,<1.0.0", 46 | ] 47 | test = [ 48 | "invoke>=2.2", 49 | "markdown>=3.6", 50 | "pytest>=7.0", 51 | "pytest-cov>=4.0", 52 | "pytest-sugar>=1.0", 53 | ] 54 | 55 | [tool.pdm.build] 56 | source-includes = [ 57 | "CHANGELOG.md", 58 | "CONTRIBUTING.md", 59 | ] 60 | includes = ["pelican/"] 61 | excludes = ["**/.DS_Store", "**/test_data/**", "tasks.py"] 62 | 63 | [tool.autopub] 64 | project-name = "Pandoc Reader" 65 | git-username = "botpub" 66 | git-email = "52496925+botpub@users.noreply.github.com" 67 | append-github-contributor = true 68 | 69 | [tool.ruff.lint] 70 | select = [ 71 | "B", # flake8-bugbear 72 | "BLE", # flake8-blind-except 73 | "C4", # flake8-comprehensions 74 | "D", # pydocstyle 75 | "E", # pycodestyle 76 | "F", # pyflakes 77 | "I", # isort 78 | "ICN", # flake8-import-conventions 79 | "ISC", # flake8-implicit-str-concat 80 | "PGH", # pygrep-hooks 81 | "PL", # pylint 82 | "RET", # flake8-return 83 | "RUF", # ruff-specific rules 84 | "SIM", # flake8-simplify 85 | "T10", # flake8-debugger 86 | "T20", # flake8-print 87 | "TID", # flake8-tidy-imports 88 | "TRY", # tryceratops 89 | "UP", # pyupgrade 90 | "W", # pycodestyle 91 | "YTT", # flake8-2020 92 | ] 93 | 94 | ignore = [ 95 | "D100", # missing docstring in public module 96 | "D101", # missing docstring in public class 97 | "D104", # missing docstring in public package 98 | "D203", # blank line before class docstring 99 | "D213", # multi-line docstring summary should start at the second line 100 | "ISC001", # disabled so `ruff format` works without warning 101 | "PLW2901", # for loop variable overwritten by assignment target 102 | "RUF005", # use iterable unpacking instead of concatenation 103 | "SIM102", # nested `if` statements 104 | "TRY002", # create your own exception class 105 | "TRY003", # long messages outside the exception class 106 | ] 107 | 108 | allowed-confusables = ["’"] 109 | 110 | [tool.ruff.lint.isort] 111 | combine-as-imports = true 112 | force-sort-within-sections = true 113 | known-first-party = ["pelican"] 114 | 115 | [build-system] 116 | requires = ["pdm-backend"] 117 | build-backend = "pdm.backend" 118 | -------------------------------------------------------------------------------- /tasks.py: -------------------------------------------------------------------------------- 1 | from inspect import cleandoc 2 | import logging 3 | import os 4 | from pathlib import Path 5 | from shutil import which 6 | 7 | from invoke import task 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | PKG_NAME = "pandoc_reader" 12 | PKG_PATH = Path(f"pelican/plugins/{PKG_NAME}") 13 | 14 | ACTIVE_VENV = os.environ.get("VIRTUAL_ENV", None) 15 | VENV_HOME = Path(os.environ.get("WORKON_HOME", "~/.local/share/virtualenvs")) 16 | VENV_PATH = Path(ACTIVE_VENV) if ACTIVE_VENV else (VENV_HOME.expanduser() / PKG_NAME) 17 | VENV = str(VENV_PATH.expanduser()) 18 | BIN_DIR = "bin" if os.name != "nt" else "Scripts" 19 | VENV_BIN = Path(VENV) / Path(BIN_DIR) 20 | 21 | TOOLS = ("cruft", "pdm", "pre-commit") 22 | PDM = which("pdm") if which("pdm") else (VENV_BIN / "pdm") 23 | CMD_PREFIX = f"{VENV_BIN}/" if ACTIVE_VENV else f"{PDM} run " 24 | CRUFT = which("cruft") if which("cruft") else f"{CMD_PREFIX}cruft" 25 | PRECOMMIT = which("pre-commit") if which("pre-commit") else f"{CMD_PREFIX}pre-commit" 26 | PTY = os.name != "nt" 27 | 28 | 29 | @task 30 | def tests(c, deprecations=False): 31 | """Run the test suite, optionally with `--deprecations`.""" 32 | deprecations_flag = "" if deprecations else "-W ignore::DeprecationWarning" 33 | c.run(f"{CMD_PREFIX}pytest {deprecations_flag}", pty=PTY) 34 | 35 | 36 | @task 37 | def format(c, check=False, diff=False): 38 | """Run Ruff's auto-formatter, optionally with `--check` or `--diff`.""" 39 | check_flag, diff_flag = "", "" 40 | if check: 41 | check_flag = "--check" 42 | if diff: 43 | diff_flag = "--diff" 44 | c.run( 45 | f"{CMD_PREFIX}ruff format {check_flag} {diff_flag} {PKG_PATH} tasks.py", pty=PTY 46 | ) 47 | 48 | 49 | @task 50 | def ruff(c, concise=False, fix=False, diff=False): 51 | """Run Ruff to ensure code meets project standards.""" 52 | concise_flag, fix_flag, diff_flag = "", "", "" 53 | if concise: 54 | concise_flag = "--output-format=concise" 55 | if fix: 56 | fix_flag = "--fix" 57 | if diff: 58 | diff_flag = "--diff" 59 | c.run(f"{CMD_PREFIX}ruff check {concise_flag} {diff_flag} {fix_flag} .", pty=PTY) 60 | 61 | 62 | @task 63 | def lint(c, concise=False, fix=False, diff=False): 64 | """Check code style via linting tools.""" 65 | ruff(c, concise=concise, fix=fix, diff=diff) 66 | format(c, check=(not fix), diff=diff) 67 | 68 | 69 | @task 70 | def tools(c): 71 | """Install development tools in the virtual environment if not already on PATH.""" 72 | for tool in TOOLS: 73 | if not which(tool): 74 | logger.info(f"** Installing {tool} **") 75 | c.run(f"{CMD_PREFIX}pip install {tool}") 76 | 77 | 78 | @task 79 | def precommit(c): 80 | """Install pre-commit hooks to .git/hooks/pre-commit.""" 81 | logger.info("** Installing pre-commit hooks **") 82 | c.run(f"{PRECOMMIT} install") 83 | 84 | 85 | @task 86 | def update(c, check=False): 87 | """Apply upstream plugin template changes to this project.""" 88 | if check: 89 | logger.info("** Checking for upstream template changes **") 90 | c.run(f"{CRUFT} check", pty=PTY) 91 | else: 92 | logger.info("** Updating project from upstream template **") 93 | c.run(f"{CRUFT} update", pty=PTY) 94 | 95 | 96 | @task 97 | def setup(c): 98 | """Set up the development environment.""" 99 | if which("pdm") or ACTIVE_VENV: 100 | tools(c) 101 | c.run(f"{CMD_PREFIX}python -m pip install --upgrade pip", pty=PTY) 102 | c.run(f"{PDM} update --dev", pty=PTY) 103 | precommit(c) 104 | logger.info("\nDevelopment environment should now be set up and ready!\n") 105 | else: 106 | error_message = """ 107 | PDM is not installed, and there is no active virtual environment available. 108 | You can either manually create and activate a virtual environment, or you can 109 | install PDM via: 110 | 111 | curl -sSL https://raw.githubusercontent.com/pdm-project/pdm/main/install-pdm.py | python3 - 112 | 113 | Once you have taken one of the above two steps, run `invoke setup` again. 114 | """ # noqa: E501 115 | raise SystemExit(cleandoc(error_message)) 116 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/test_invalid_metadata.py: -------------------------------------------------------------------------------- 1 | """Test using invalid metadata with the pandoc-reader plugin.""" 2 | 3 | import os 4 | import unittest 5 | 6 | from pelican.plugins.pandoc_reader import PandocReader 7 | from pelican.tests.support import get_settings 8 | 9 | DIR_PATH = os.path.dirname(__file__) 10 | TEST_CONTENT_PATH = os.path.abspath(os.path.join(DIR_PATH, "markdown")) 11 | 12 | # These settings will be set in pelicanconf.py by plugin users. 13 | # Appending --wrap=None so that rendered HTML5 does not have new lines (\n) 14 | # which causes tests to fail. 15 | # See https://pandoc.org/MANUAL.html#general-writer-options 16 | PANDOC_ARGS = ["--mathjax", "--wrap=none"] 17 | PANDOC_EXTENSIONS = ["+smart"] 18 | 19 | 20 | class TestInvalidMetadata(unittest.TestCase): 21 | """Invalid metadata test cases.""" 22 | 23 | def test_empty_file(self): 24 | """Check if an exception is raised if the file is empty.""" 25 | settings = get_settings( 26 | PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, PANDOC_ARGS=PANDOC_ARGS 27 | ) 28 | 29 | pandoc_reader = PandocReader(settings) 30 | source_path = os.path.join(TEST_CONTENT_PATH, "empty.md") 31 | 32 | # If the file is empty retrieval of metadata should fail 33 | with self.assertRaises(Exception) as context_manager: 34 | pandoc_reader.read(source_path) 35 | 36 | message = str(context_manager.exception) 37 | self.assertEqual("Could not find metadata. File is empty.", message) 38 | 39 | def test_non_empty_file_no_metadata(self): 40 | """Check if an exception is raised if the file has no metadata.""" 41 | settings = get_settings( 42 | PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, PANDOC_ARGS=PANDOC_ARGS 43 | ) 44 | 45 | pandoc_reader = PandocReader(settings) 46 | source_path = os.path.join(TEST_CONTENT_PATH, "no_metadata.md") 47 | 48 | # If the file is not empty but has no metadata it should fail 49 | with self.assertRaises(Exception) as context_manager: 50 | pandoc_reader.read(source_path) 51 | 52 | message = str(context_manager.exception) 53 | self.assertEqual("Could not find metadata header '---'.", message) 54 | 55 | def test_metadata_start_with_leading_spaces(self): 56 | """Check that a metadata block start with leading spaces throws an exception.""" 57 | settings = get_settings( 58 | PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, PANDOC_ARGS=PANDOC_ARGS 59 | ) 60 | 61 | pandoc_reader = PandocReader(settings) 62 | source_path = os.path.join( 63 | TEST_CONTENT_PATH, "metadata_start_with_leading_spaces.md" 64 | ) 65 | 66 | # Metadata starting --- should not have leading spaces 67 | with self.assertRaises(Exception) as context_manager: 68 | pandoc_reader.read(source_path) 69 | 70 | message = str(context_manager.exception) 71 | self.assertEqual("Could not find metadata header '---'.", message) 72 | 73 | def test_metadata_block_end_with_leading_spaces(self): 74 | """Check that a metadata block end with leading spaces throws an exception.""" 75 | settings = get_settings( 76 | PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, PANDOC_ARGS=PANDOC_ARGS 77 | ) 78 | 79 | pandoc_reader = PandocReader(settings) 80 | source_path = os.path.join( 81 | TEST_CONTENT_PATH, "metadata_end_with_leading_spaces.md" 82 | ) 83 | 84 | # Metadata end --- or ... should not have leading spaces 85 | with self.assertRaises(Exception) as context_manager: 86 | pandoc_reader.read(source_path) 87 | 88 | message = str(context_manager.exception) 89 | self.assertEqual("Could not find end of metadata block.", message) 90 | 91 | def test_no_metadata_block_end(self): 92 | """Check that an exception is thrown if the metadata block does not end.""" 93 | settings = get_settings( 94 | PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, PANDOC_ARGS=PANDOC_ARGS 95 | ) 96 | 97 | pandoc_reader = PandocReader(settings) 98 | source_path = os.path.join(TEST_CONTENT_PATH, "no_metadata_end.md") 99 | 100 | # Metadata blocks should end with '___' or '...' if not it should fail 101 | with self.assertRaises(Exception) as context_manager: 102 | pandoc_reader.read(source_path) 103 | 104 | message = str(context_manager.exception) 105 | self.assertEqual("Could not find end of metadata block.", message) 106 | 107 | def test_invalid_metadata_block_end(self): 108 | """Check that an exception is thrown if the metadata block end is wrong.""" 109 | settings = get_settings( 110 | PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, PANDOC_ARGS=PANDOC_ARGS 111 | ) 112 | 113 | pandoc_reader = PandocReader(settings) 114 | source_path = os.path.join(TEST_CONTENT_PATH, "wrong_metadata_end.md") 115 | 116 | # Metadata blocks should end with '___' or '...' if not it should fail 117 | with self.assertRaises(Exception) as context_manager: 118 | pandoc_reader.read(source_path) 119 | 120 | message = str(context_manager.exception) 121 | self.assertEqual("Could not find end of metadata block.", message) 122 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/test_valid_args.py: -------------------------------------------------------------------------------- 1 | """Test using valid arguments and extensions with the pandoc-reader plugin.""" 2 | 3 | import os 4 | import unittest 5 | 6 | from pelican.plugins.pandoc_reader import PandocReader 7 | from pelican.plugins.pandoc_reader.test.html.expected_html import ( 8 | HTML_MATHJAX, 9 | HTML_RAW_CONVERSION, 10 | HTML_TOC, 11 | HTML_VALID_TEXT, 12 | HTML_WITH_HEADINGS, 13 | ) 14 | from pelican.tests.support import get_settings 15 | 16 | DIR_PATH = os.path.dirname(__file__) 17 | TEST_CONTENT_PATH = os.path.abspath(os.path.join(DIR_PATH, "markdown")) 18 | 19 | # These settings will be set in pelicanconf.py by plugin users. 20 | # Appending --wrap=None so that rendered HTML5 does not have new lines (\n) 21 | # which causes tests to fail. 22 | # See https://pandoc.org/MANUAL.html#general-writer-options 23 | PANDOC_ARGS = ["--mathjax", "--wrap=none"] 24 | PANDOC_EXTENSIONS = ["+smart"] 25 | 26 | 27 | class TestValidCasesWithArguments(unittest.TestCase): 28 | """Valid test cases using Pandoc arguments and extensions.""" 29 | 30 | def test_valid_file(self): 31 | """Check if we get the expected output for valid input.""" 32 | settings = get_settings( 33 | PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, PANDOC_ARGS=PANDOC_ARGS 34 | ) 35 | 36 | pandoc_reader = PandocReader(settings) 37 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") 38 | output, metadata = pandoc_reader.read(source_path) 39 | 40 | self.assertEqual(HTML_VALID_TEXT, output) 41 | self.assertEqual("Valid Content", str(metadata["title"])) 42 | self.assertEqual("My Author", str(metadata["author"])) 43 | self.assertEqual("2020-10-16 00:00:00", str(metadata["date"])) 44 | 45 | def test_mathjax_content(self): 46 | """Check if mathematics is rendered correctly.""" 47 | settings = get_settings( 48 | PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, PANDOC_ARGS=PANDOC_ARGS 49 | ) 50 | 51 | pandoc_reader = PandocReader(settings) 52 | source_path = os.path.join(TEST_CONTENT_PATH, "mathjax_content.md") 53 | output, metadata = pandoc_reader.read(source_path) 54 | 55 | self.assertEqual(HTML_MATHJAX, output) 56 | self.assertEqual("MathJax Content", str(metadata["title"])) 57 | self.assertEqual("My Author", str(metadata["author"])) 58 | self.assertEqual("2020-10-16 00:00:00", str(metadata["date"])) 59 | 60 | def test_encoded_to_raw_conversion(self): 61 | """Check if raw paths are left untouched in output returned.""" 62 | settings = get_settings( 63 | PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, PANDOC_ARGS=PANDOC_ARGS 64 | ) 65 | 66 | pandoc_reader = PandocReader(settings) 67 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content_with_raw_paths.md") 68 | output, metadata = pandoc_reader.read(source_path) 69 | 70 | # Setting this so that assert is able to execute the difference 71 | self.maxDiff = None # pylint: disable=invalid-name 72 | 73 | self.assertEqual(HTML_RAW_CONVERSION, output) 74 | self.assertEqual( 75 | "Valid Content with Fictitious Raw Paths", str(metadata["title"]) 76 | ) 77 | self.assertEqual("My Author", str(metadata["author"])) 78 | self.assertEqual("2020-10-16 00:00:00", str(metadata["date"])) 79 | 80 | def test_valid_content_with_toc_1(self): 81 | """Check if expected output is returned with --toc argument.""" 82 | settings = get_settings( 83 | PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, 84 | PANDOC_ARGS=[*PANDOC_ARGS, "--toc"], 85 | ) 86 | 87 | pandoc_reader = PandocReader(settings) 88 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content_with_toc.md") 89 | output, metadata = pandoc_reader.read(source_path) 90 | 91 | # Setting this so that assert is able to execute the difference 92 | self.maxDiff = None # pylint: disable=invalid-name 93 | 94 | self.assertEqual(HTML_WITH_HEADINGS, output) 95 | self.assertEqual("Valid Content with Table of Contents", str(metadata["title"])) 96 | self.assertEqual("My Author", str(metadata["author"])) 97 | self.assertEqual("2020-10-16 00:00:00", str(metadata["date"])) 98 | self.assertEqual(HTML_TOC, str(metadata["toc"])) 99 | 100 | def test_valid_content_with_toc_2(self): 101 | """Check if expected output is returned with --table-of-contents argument.""" 102 | settings = get_settings( 103 | PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, 104 | PANDOC_ARGS=[*PANDOC_ARGS, "--table-of-contents"], 105 | ) 106 | 107 | pandoc_reader = PandocReader(settings) 108 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content_with_toc.md") 109 | output, metadata = pandoc_reader.read(source_path) 110 | 111 | # Setting this so that assert is able to execute the difference 112 | self.maxDiff = None # pylint: disable=invalid-name 113 | 114 | self.assertEqual(HTML_WITH_HEADINGS, output) 115 | self.assertEqual("Valid Content with Table of Contents", str(metadata["title"])) 116 | self.assertEqual("My Author", str(metadata["author"])) 117 | self.assertEqual("2020-10-16 00:00:00", str(metadata["date"])) 118 | self.assertEqual(HTML_TOC, str(metadata["toc"])) 119 | 120 | 121 | if __name__ == "__main__": 122 | unittest.main() 123 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/test_valid_arg_citations.py: -------------------------------------------------------------------------------- 1 | """Test using valid arguments and citations with the pandoc-reader plugin.""" 2 | 3 | import os 4 | import unittest 5 | 6 | from pelican.plugins.pandoc_reader import PandocReader 7 | from pelican.plugins.pandoc_reader.test.html.expected_html import ( 8 | HTML_CITATION_TOC, 9 | HTML_WITH_CITATIONS, 10 | ) 11 | from pelican.tests.support import get_settings 12 | 13 | DIR_PATH = os.path.dirname(__file__) 14 | TEST_CONTENT_PATH = os.path.abspath(os.path.join(DIR_PATH, "markdown")) 15 | TEST_DEFAULTS_FILES_PATH = os.path.abspath(os.path.join(DIR_PATH, "defaults_files")) 16 | 17 | # These settings will be set in pelicanconf.py by plugin users. 18 | # Appending --wrap=None so that rendered HTML5 does not have new lines (\n) 19 | # which causes tests to fail. 20 | # See https://pandoc.org/MANUAL.html#general-writer-options 21 | PANDOC_ARGS = ["--mathjax", "--wrap=none"] 22 | PANDOC_EXTENSIONS = ["+smart"] 23 | 24 | 25 | class TestValidCaseWithArgumentsAndCitations(unittest.TestCase): 26 | """Test cases with valid arguments and citations.""" 27 | 28 | def test_citations_1(self): 29 | """Check if expected output and citations is returned with -C argument.""" 30 | settings = get_settings( 31 | PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, 32 | PANDOC_ARGS=PANDOC_ARGS 33 | + [ 34 | "-C", 35 | "--csl=https://www.zotero.org/styles/ieee-with-url", 36 | "--metadata=link-citations:false", 37 | "--metadata=reference-section-title:References", 38 | ], 39 | ) 40 | 41 | pandoc_reader = PandocReader(settings) 42 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content_with_citation.md") 43 | output, metadata = pandoc_reader.read(source_path) 44 | 45 | # Setting this so that assert is able to execute the difference 46 | self.maxDiff = None # pylint: disable=invalid-name 47 | 48 | self.assertEqual(HTML_WITH_CITATIONS, output) 49 | self.assertEqual("Valid Content With Citation", str(metadata["title"])) 50 | self.assertEqual("My Author", str(metadata["author"])) 51 | self.assertEqual("2020-10-16 00:00:00", str(metadata["date"])) 52 | 53 | def test_citations_2(self): 54 | """Check if expected output and citations is returned with --citeproc.""" 55 | settings = get_settings( 56 | PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, 57 | PANDOC_ARGS=PANDOC_ARGS 58 | + [ 59 | "--citeproc", 60 | "--csl=https://www.zotero.org/styles/ieee-with-url", 61 | "--metadata=link-citations:false", 62 | "--metadata=reference-section-title:References", 63 | ], 64 | ) 65 | 66 | pandoc_reader = PandocReader(settings) 67 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content_with_citation.md") 68 | output, metadata = pandoc_reader.read(source_path) 69 | 70 | # Setting this so that assert is able to execute the difference 71 | self.maxDiff = None # pylint: disable=invalid-name 72 | 73 | self.assertEqual(HTML_WITH_CITATIONS, output) 74 | self.assertEqual("Valid Content With Citation", str(metadata["title"])) 75 | self.assertEqual("My Author", str(metadata["author"])) 76 | self.assertEqual("2020-10-16 00:00:00", str(metadata["date"])) 77 | 78 | def test_citations_and_toc_with_valid_defaults(self): 79 | """Check if expected output, citations and table of contents is returned.""" 80 | pandoc_defaults_files = [ 81 | os.path.join( 82 | TEST_DEFAULTS_FILES_PATH, 83 | "valid_defaults_with_toc_and_citations.yaml", 84 | ) 85 | ] 86 | 87 | settings = get_settings( 88 | PANDOC_DEFAULTS_FILES=pandoc_defaults_files, 89 | ) 90 | pandoc_reader = PandocReader(settings) 91 | 92 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content_with_citation.md") 93 | output, metadata = pandoc_reader.read(source_path) 94 | self.maxDiff = None # pylint: disable=invalid-name 95 | 96 | self.assertEqual(HTML_WITH_CITATIONS, output) 97 | self.assertEqual("Valid Content With Citation", str(metadata["title"])) 98 | self.assertEqual("My Author", str(metadata["author"])) 99 | self.assertEqual("2020-10-16 00:00:00", str(metadata["date"])) 100 | self.assertEqual(HTML_CITATION_TOC, str(metadata["toc"])) 101 | 102 | def test_citations_and_with_citeproc_filter(self): 103 | """Check if expected output, citations are returned using citeproc filter.""" 104 | pandoc_defaults_files = [ 105 | os.path.join( 106 | TEST_DEFAULTS_FILES_PATH, 107 | "valid_defaults_with_citeproc_filter.yaml", 108 | ) 109 | ] 110 | 111 | settings = get_settings( 112 | PANDOC_DEFAULTS_FILES=pandoc_defaults_files, 113 | ) 114 | pandoc_reader = PandocReader(settings) 115 | 116 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content_with_citation.md") 117 | output, metadata = pandoc_reader.read(source_path) 118 | self.maxDiff = None # pylint: disable=invalid-name 119 | 120 | self.assertEqual(HTML_WITH_CITATIONS, output) 121 | self.assertEqual("Valid Content With Citation", str(metadata["title"])) 122 | self.assertEqual("My Author", str(metadata["author"])) 123 | self.assertEqual("2020-10-16 00:00:00", str(metadata["date"])) 124 | 125 | 126 | if __name__ == "__main__": 127 | unittest.main() 128 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/test_invalid_defaults_files.py: -------------------------------------------------------------------------------- 1 | """Test using invalid defaults files with the pandoc-reader plugin.""" 2 | 3 | import os 4 | import unittest 5 | 6 | from pelican.plugins.pandoc_reader import PandocReader 7 | from pelican.tests.support import get_settings 8 | 9 | DIR_PATH = os.path.dirname(__file__) 10 | TEST_CONTENT_PATH = os.path.abspath(os.path.join(DIR_PATH, "markdown")) 11 | TEST_DEFAULTS_FILES_PATH = os.path.abspath(os.path.join(DIR_PATH, "defaults_files")) 12 | 13 | 14 | class TestInvalidCasesWithDefaultsFiles(unittest.TestCase): 15 | """Invalid test cases using invalid defaults files.""" 16 | 17 | def test_invalid_standalone(self): 18 | """Check if an exception is raised if standalone is true.""" 19 | pandoc_defaults_files = [ 20 | os.path.join(TEST_DEFAULTS_FILES_PATH, "standalone_true.yaml") 21 | ] 22 | 23 | settings = get_settings(PANDOC_DEFAULTS_FILES=pandoc_defaults_files) 24 | 25 | pandoc_reader = PandocReader(settings) 26 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") 27 | 28 | with self.assertRaises(ValueError) as context_manager: 29 | pandoc_reader.read(source_path) 30 | 31 | message = str(context_manager.exception) 32 | self.assertEqual("The default standalone should be set to false.", message) 33 | 34 | def test_invalid_self_contained(self): 35 | """Check if an exception is raised if self-contained is true.""" 36 | pandoc_defaults_files = [ 37 | os.path.join(TEST_DEFAULTS_FILES_PATH, "self_contained_true.yaml") 38 | ] 39 | 40 | settings = get_settings(PANDOC_DEFAULTS_FILES=pandoc_defaults_files) 41 | 42 | pandoc_reader = PandocReader(settings) 43 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") 44 | 45 | with self.assertRaises(ValueError) as context_manager: 46 | pandoc_reader.read(source_path) 47 | 48 | message = str(context_manager.exception) 49 | self.assertEqual("The default self-contained should be set to false.", message) 50 | 51 | def test_no_input_format(self): 52 | """Check if an exception is raised if no input format is specified.""" 53 | pandoc_defaults_files = [ 54 | os.path.join(TEST_DEFAULTS_FILES_PATH, "no_input_format.yaml") 55 | ] 56 | 57 | settings = get_settings(PANDOC_DEFAULTS_FILES=pandoc_defaults_files) 58 | 59 | pandoc_reader = PandocReader(settings) 60 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") 61 | 62 | with self.assertRaises(ValueError) as context_manager: 63 | pandoc_reader.read(source_path) 64 | 65 | message = str(context_manager.exception) 66 | self.assertEqual("No input format specified.", message) 67 | 68 | def test_invalid_reader_input_format(self): 69 | """Check if an exception is raised if reader input format is invalid.""" 70 | pandoc_defaults_files = [ 71 | os.path.join(TEST_DEFAULTS_FILES_PATH, "invalid_reader_input_format.yaml") 72 | ] 73 | 74 | settings = get_settings(PANDOC_DEFAULTS_FILES=pandoc_defaults_files) 75 | 76 | pandoc_reader = PandocReader(settings) 77 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") 78 | 79 | with self.assertRaises(ValueError) as context_manager: 80 | pandoc_reader.read(source_path) 81 | 82 | message = str(context_manager.exception) 83 | self.assertEqual("Input type has to be a Markdown variant.", message) 84 | 85 | def test_invalid_from_input_format(self): 86 | """Check if an exception is raised if from input format is invalid.""" 87 | pandoc_defaults_files = [ 88 | os.path.join(TEST_DEFAULTS_FILES_PATH, "invalid_from_input_format.yaml") 89 | ] 90 | 91 | settings = get_settings(PANDOC_DEFAULTS_FILES=pandoc_defaults_files) 92 | 93 | pandoc_reader = PandocReader(settings) 94 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") 95 | 96 | with self.assertRaises(ValueError) as context_manager: 97 | pandoc_reader.read(source_path) 98 | 99 | message = str(context_manager.exception) 100 | self.assertEqual("Input type has to be a Markdown variant.", message) 101 | 102 | def test_from_reader_both_given(self): 103 | """Check if an exception is raised if from and reader are both given.""" 104 | pandoc_defaults_files = [ 105 | os.path.join(TEST_DEFAULTS_FILES_PATH, "from_reader_both_given.yaml") 106 | ] 107 | 108 | settings = get_settings(PANDOC_DEFAULTS_FILES=pandoc_defaults_files) 109 | 110 | pandoc_reader = PandocReader(settings) 111 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") 112 | 113 | with self.assertRaises(ValueError) as context_manager: 114 | pandoc_reader.read(source_path) 115 | 116 | message = str(context_manager.exception) 117 | self.assertEqual( 118 | ( 119 | "Specifying both from and reader is not supported." 120 | " Please specify just one." 121 | ), 122 | message, 123 | ) 124 | 125 | def test_to_writer_both_given(self): 126 | """Check if an exception is raised if to and writer are both given.""" 127 | pandoc_defaults_files = [ 128 | os.path.join(TEST_DEFAULTS_FILES_PATH, "to_writer_both_given.yaml") 129 | ] 130 | 131 | settings = get_settings(PANDOC_DEFAULTS_FILES=pandoc_defaults_files) 132 | 133 | pandoc_reader = PandocReader(settings) 134 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") 135 | 136 | with self.assertRaises(ValueError) as context_manager: 137 | pandoc_reader.read(source_path) 138 | 139 | message = str(context_manager.exception) 140 | self.assertEqual( 141 | ( 142 | "Specifying both to and writer is not supported." 143 | " Please specify just one." 144 | ), 145 | message, 146 | ) 147 | 148 | def test_no_output_format(self): 149 | """Check if an exception is raised if no output format is specified.""" 150 | pandoc_defaults_files = [ 151 | os.path.join(TEST_DEFAULTS_FILES_PATH, "no_output_format.yaml") 152 | ] 153 | 154 | settings = get_settings(PANDOC_DEFAULTS_FILES=pandoc_defaults_files) 155 | 156 | pandoc_reader = PandocReader(settings) 157 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") 158 | 159 | with self.assertRaises(ValueError) as context_manager: 160 | pandoc_reader.read(source_path) 161 | 162 | message = str(context_manager.exception) 163 | self.assertEqual("Output format type must be either html or html5.", message) 164 | 165 | def test_invalid_writer_output_format(self): 166 | """Check if an exception is raised if writer output format is invalid.""" 167 | pandoc_defaults_files = [ 168 | os.path.join(TEST_DEFAULTS_FILES_PATH, "invalid_writer_output_format.yaml") 169 | ] 170 | 171 | settings = get_settings(PANDOC_DEFAULTS_FILES=pandoc_defaults_files) 172 | 173 | pandoc_reader = PandocReader(settings) 174 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") 175 | 176 | with self.assertRaises(ValueError) as context_manager: 177 | pandoc_reader.read(source_path) 178 | 179 | message = str(context_manager.exception) 180 | self.assertEqual("Output format type must be either html or html5.", message) 181 | 182 | def test_invalid_to_output_format(self): 183 | """Check if an exception is raised if to output format is invalid.""" 184 | pandoc_defaults_files = [ 185 | os.path.join(TEST_DEFAULTS_FILES_PATH, "invalid_to_output_format.yaml") 186 | ] 187 | 188 | settings = get_settings(PANDOC_DEFAULTS_FILES=pandoc_defaults_files) 189 | 190 | pandoc_reader = PandocReader(settings) 191 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") 192 | 193 | with self.assertRaises(ValueError) as context_manager: 194 | pandoc_reader.read(source_path) 195 | 196 | message = str(context_manager.exception) 197 | self.assertEqual("Output format type must be either html or html5.", message) 198 | 199 | 200 | if __name__ == "__main__": 201 | unittest.main() 202 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/html/expected_html.py: -------------------------------------------------------------------------------- 1 | """Expected HTML output for various types of text.""" 2 | 3 | HTML_WITH_CITATIONS = "\n".join( 4 | [ 5 | '

String Theory

', 6 | ( 7 | "

But this foundational principle of science has now " 8 | "been called into question by " 9 | '' 10 | "String Theory, " 11 | "which is a relative newcomer to theoretical physics, but one that has " 12 | "captured the common imagination, judging by the popular explanations " 13 | "that abound on the Web " 14 | '' 15 | "[1], [2], [3]" 16 | ". And whether string theory is or is not science, Popper " 17 | "notwithstanding, is an issue that is still up for debate " 18 | '' 20 | "[4], [5], [6], [7].

" 21 | ), 22 | '

References

', 23 | ( 24 | '
' 26 | ), 27 | '
', 28 | ( 29 | '
[1]
' 30 | "A. Mann, " 31 | "What Is String Theory? Mar. 20, 2019. " 32 | "Available: " 33 | '' 34 | "https://www.livescience.com/65033-what-is-string-theory.html. " 35 | "[Accessed: Nov. 12, 2020]
" 36 | ), 37 | "
", 38 | '
', 39 | ( 40 | '
[2]
' 41 | "C. Wood, What Is String Theory?. Reference article: " 42 | "A simplified explanation and brief history of string theory,” " 43 | 'Jul. 11, 2019. Available: https://www.space.com/17594-string-theory.html. ' 46 | "[Accessed: Nov. 12, 2020]
" 47 | ), 48 | "
", 49 | '
', 50 | ( 51 | '
[3]
' 52 | 'A. Z. Jones, The Basics of String Theory' 53 | "," 54 | '” Mar. 02, 2019. Available: https://www.thoughtco.com/' 56 | "what-is-string-theory-2699363. [Accessed: Nov. 12, 2020]
" 57 | ), 58 | "
", 59 | '
', 60 | ( 61 | '
[4]
' 62 | "E. Siegel, " 63 | "Why String Theory Is Not A Scientific Theory,” " 64 | "Dec. " 65 | '23, 2015. Available: https://www.forbes.com' 67 | "/sites/" 68 | "startswithabang/2015/12/23/why-string-theory-is-not-science/. " 69 | "[Accessed: " 70 | "Nov. 12, 2020]
" 71 | ), 72 | "
", 73 | '
', 74 | ( 75 | '
[5]
D. ' 76 | 'Castelvecchi, Feuding physicists turn to ' 77 | "philosophy " 78 | "for help. String theory is at the heart of a debate over the " 79 | "integrity of " 80 | "the scientific method itself,” Jan. 05, 2016. Available: " 81 | 'https://www.nature.com/news/feuding-physicists-turn-to-' 84 | "philosophy-" 85 | "for-help-1.19076. [Accessed: Nov. 12, 2020]
" 86 | ), 87 | "
", 88 | '
', 89 | ( 90 | '
[6]
R. ' 91 | 'A. Batista and J. Primack, Is String theory ' 92 | "falsifiable?. Can a theory that isn’t completely testable still be " 93 | "useful " 94 | 'to physics?” Available: https://metafact.io/factchecks/30-is-string-' 97 | "theory-" 98 | "falsifiable. [Accessed: Nov. 12, 2020]
" 99 | ), 100 | "
", 101 | '
', 102 | ( 103 | '
[7]
' 104 | "M. R. " 105 | 'Francis, Falsifiability and physics. ' 106 | "Can a theory that isn’t completely testable still be useful to physics?”" 107 | " " 108 | 'Apr. 23, 2019. Available: https://www.scientificamerican.com/article/' 111 | "is-string-" 112 | "theory-science/. [Accessed: Nov. 12, 2020]
" 113 | ), 114 | "
", 115 | "
", 116 | ] 117 | ) 118 | 119 | HTML_CITATION_TOC = ( 120 | '" 125 | ) 126 | 127 | HTML_VALID_TEXT = ( 128 | "

This is some valid content that should pass." 129 | " If it does not pass we" 130 | " will know something is wrong.

" 131 | ) 132 | 133 | HTML_MATHJAX = ( 134 | '

\\[\n' 135 | "e^{i\\theta} = \\cos\\theta + i \\sin\\theta.\n" 136 | "\\]

" 137 | ) 138 | 139 | HTML_RAW_CONVERSION = ( 140 | "

This is some valid content that should pass." 141 | " If it does not pass we will know something is wrong.

\n" 142 | "

Our fictitious internal files are available" 143 | ' at:

\n' 144 | "

Our fictitious static files are available" 145 | ' at:

\n' 146 | "

Our fictitious attachments are available" 147 | ' at:

' 148 | ) 149 | 150 | HTML_WITH_HEADINGS = ( 151 | "

This is some valid content that should pass." 152 | " If it does not pass we will know something is wrong.

\n" 153 | '

First Heading

\n' 154 | "

This should be the first heading in my" 155 | " table of contents.

\n" 156 | '

Second Heading

\n' 157 | "

This should be the second heading in my" 158 | " table of contents.

\n" 159 | '

First Subheading

\n' 160 | "

This is a subsection that should be shown as such" 161 | " in the table of contents.

\n" 162 | '

Second Subheading

\n' 163 | "

This is another subsection that should be shown as" 164 | " such in the table of contents.

" 165 | ) 166 | 167 | HTML_TOC = ( 168 | '" 181 | ) 182 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/test/test_invalid_multiple_defaults_files.py: -------------------------------------------------------------------------------- 1 | """Test using multiple invalid default files with the pandoc-reader plugin.""" 2 | 3 | import os 4 | import unittest 5 | 6 | from pelican.plugins.pandoc_reader import PandocReader 7 | from pelican.tests.support import get_settings 8 | 9 | DIR_PATH = os.path.dirname(__file__) 10 | TEST_CONTENT_PATH = os.path.abspath(os.path.join(DIR_PATH, "markdown")) 11 | TEST_DEFAULTS_FILES_PATH = os.path.abspath(os.path.join(DIR_PATH, "defaults_files")) 12 | 13 | 14 | class TestInvalidCasesWithMultipleDefaultsFiles(unittest.TestCase): 15 | def test_multiple_defaults_files_with_duplicate_keys(self): 16 | """Check if an exceptions is raised when we specify duplicate keys.""" 17 | pandoc_defaults_files = [ 18 | os.path.join( 19 | TEST_DEFAULTS_FILES_PATH, "valid_first_defaults_file_dup_keys.yaml" 20 | ), 21 | os.path.join( 22 | TEST_DEFAULTS_FILES_PATH, "valid_second_defaults_file_dup_keys.yaml" 23 | ), 24 | ] 25 | 26 | settings = get_settings(PANDOC_DEFAULTS_FILES=pandoc_defaults_files) 27 | 28 | pandoc_reader = PandocReader(settings) 29 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") 30 | 31 | with self.assertRaises(ValueError) as context_manager: 32 | pandoc_reader.read(source_path) 33 | 34 | message = str(context_manager.exception) 35 | self.assertEqual("Duplicate keys defined in multiple defaults files.", message) 36 | 37 | def test_multiple_defaults_files_with_multiple_writers(self): 38 | """Check if an exception is raised when we specify multiple writers.""" 39 | pandoc_defaults_files = [ 40 | os.path.join( 41 | TEST_DEFAULTS_FILES_PATH, "valid_first_defaults_file_writer.yaml" 42 | ), 43 | os.path.join( 44 | TEST_DEFAULTS_FILES_PATH, "valid_second_defaults_file_writer.yaml" 45 | ), 46 | ] 47 | 48 | settings = get_settings(PANDOC_DEFAULTS_FILES=pandoc_defaults_files) 49 | 50 | pandoc_reader = PandocReader(settings) 51 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") 52 | 53 | with self.assertRaises(ValueError) as context_manager: 54 | pandoc_reader.read(source_path) 55 | 56 | message = str(context_manager.exception) 57 | self.assertEqual( 58 | "Specifying both to and writer is not supported. Please specify just one.", 59 | message, 60 | ) 61 | 62 | def test_multiple_defaults_files_with_standalone(self): 63 | """Check if an exception is raised if standalone is true.""" 64 | pandoc_defaults_files = [ 65 | os.path.join(TEST_DEFAULTS_FILES_PATH, "standalone_true.yaml"), 66 | os.path.join( 67 | TEST_DEFAULTS_FILES_PATH, "valid_second_defaults_file_wrap_none.yaml" 68 | ), 69 | ] 70 | 71 | settings = get_settings(PANDOC_DEFAULTS_FILES=pandoc_defaults_files) 72 | 73 | pandoc_reader = PandocReader(settings) 74 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") 75 | 76 | with self.assertRaises(ValueError) as context_manager: 77 | pandoc_reader.read(source_path) 78 | 79 | message = str(context_manager.exception) 80 | self.assertEqual("The default standalone should be set to false.", message) 81 | 82 | def test_multiple_defaults_files_with_self_contained(self): 83 | """Check if an exception is raised if self-contained is true.""" 84 | pandoc_defaults_files = [ 85 | os.path.join(TEST_DEFAULTS_FILES_PATH, "self_contained_true.yaml"), 86 | os.path.join( 87 | TEST_DEFAULTS_FILES_PATH, "valid_second_defaults_file_wrap_none.yaml" 88 | ), 89 | ] 90 | 91 | settings = get_settings(PANDOC_DEFAULTS_FILES=pandoc_defaults_files) 92 | 93 | pandoc_reader = PandocReader(settings) 94 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") 95 | 96 | with self.assertRaises(ValueError) as context_manager: 97 | pandoc_reader.read(source_path) 98 | 99 | message = str(context_manager.exception) 100 | self.assertEqual("The default self-contained should be set to false.", message) 101 | 102 | def test_multiple_defaults_files_no_input_format(self): 103 | """Check if an exception is raised if no input format is specified.""" 104 | pandoc_defaults_files = [ 105 | os.path.join(TEST_DEFAULTS_FILES_PATH, "no_input_format.yaml"), 106 | os.path.join( 107 | TEST_DEFAULTS_FILES_PATH, "valid_second_defaults_file_wrap_none.yaml" 108 | ), 109 | ] 110 | 111 | settings = get_settings(PANDOC_DEFAULTS_FILES=pandoc_defaults_files) 112 | 113 | pandoc_reader = PandocReader(settings) 114 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") 115 | 116 | with self.assertRaises(ValueError) as context_manager: 117 | pandoc_reader.read(source_path) 118 | 119 | message = str(context_manager.exception) 120 | self.assertEqual("No input format specified.", message) 121 | 122 | def test_multiple_defaults_files_invalid_reader_input_format(self): 123 | """Check if an exception is raised if reader input format is invalid.""" 124 | pandoc_defaults_files = [ 125 | os.path.join(TEST_DEFAULTS_FILES_PATH, "invalid_reader_input_format.yaml"), 126 | os.path.join( 127 | TEST_DEFAULTS_FILES_PATH, "valid_second_defaults_file_wrap_none.yaml" 128 | ), 129 | ] 130 | 131 | settings = get_settings(PANDOC_DEFAULTS_FILES=pandoc_defaults_files) 132 | 133 | pandoc_reader = PandocReader(settings) 134 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") 135 | 136 | with self.assertRaises(ValueError) as context_manager: 137 | pandoc_reader.read(source_path) 138 | 139 | message = str(context_manager.exception) 140 | self.assertEqual("Input type has to be a Markdown variant.", message) 141 | 142 | def test_multiple_defaults_files_invalid_from_input_format(self): 143 | """Check if an exception is raised if from input format is invalid.""" 144 | pandoc_defaults_files = [ 145 | os.path.join(TEST_DEFAULTS_FILES_PATH, "invalid_from_input_format.yaml"), 146 | os.path.join( 147 | TEST_DEFAULTS_FILES_PATH, "valid_second_defaults_file_wrap_none.yaml" 148 | ), 149 | ] 150 | 151 | settings = get_settings(PANDOC_DEFAULTS_FILES=pandoc_defaults_files) 152 | 153 | pandoc_reader = PandocReader(settings) 154 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") 155 | 156 | with self.assertRaises(ValueError) as context_manager: 157 | pandoc_reader.read(source_path) 158 | 159 | message = str(context_manager.exception) 160 | self.assertEqual("Input type has to be a Markdown variant.", message) 161 | 162 | def test_multiple_defaults_files_with_multiple_readers(self): 163 | """Check if an exception is raised if from and reader are both given.""" 164 | pandoc_defaults_files = [ 165 | os.path.join( 166 | TEST_DEFAULTS_FILES_PATH, "valid_first_defaults_file_reader.yaml" 167 | ), 168 | os.path.join( 169 | TEST_DEFAULTS_FILES_PATH, "valid_second_defaults_file_reader.yaml" 170 | ), 171 | ] 172 | 173 | settings = get_settings(PANDOC_DEFAULTS_FILES=pandoc_defaults_files) 174 | 175 | pandoc_reader = PandocReader(settings) 176 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") 177 | 178 | with self.assertRaises(ValueError) as context_manager: 179 | pandoc_reader.read(source_path) 180 | 181 | message = str(context_manager.exception) 182 | self.assertEqual( 183 | ( 184 | "Specifying both from and reader is not supported." 185 | " Please specify just one." 186 | ), 187 | message, 188 | ) 189 | 190 | def test_multiple_defaults_files_no_output_format(self): 191 | """Check if an exception is raised if no output format is specified.""" 192 | pandoc_defaults_files = [ 193 | os.path.join(TEST_DEFAULTS_FILES_PATH, "no_output_format.yaml"), 194 | os.path.join( 195 | TEST_DEFAULTS_FILES_PATH, "valid_second_defaults_file_wrap_none.yaml" 196 | ), 197 | ] 198 | 199 | settings = get_settings(PANDOC_DEFAULTS_FILES=pandoc_defaults_files) 200 | 201 | pandoc_reader = PandocReader(settings) 202 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") 203 | 204 | with self.assertRaises(ValueError) as context_manager: 205 | pandoc_reader.read(source_path) 206 | 207 | message = str(context_manager.exception) 208 | self.assertEqual("Output format type must be either html or html5.", message) 209 | 210 | def test_multiple_defaults_files_invalid_writer_output_format(self): 211 | """Check if an exception is raised if writer output format is invalid.""" 212 | pandoc_defaults_files = [ 213 | os.path.join(TEST_DEFAULTS_FILES_PATH, "invalid_writer_output_format.yaml"), 214 | os.path.join( 215 | TEST_DEFAULTS_FILES_PATH, "valid_second_defaults_file_wrap_none.yaml" 216 | ), 217 | ] 218 | 219 | settings = get_settings(PANDOC_DEFAULTS_FILES=pandoc_defaults_files) 220 | 221 | pandoc_reader = PandocReader(settings) 222 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") 223 | 224 | with self.assertRaises(ValueError) as context_manager: 225 | pandoc_reader.read(source_path) 226 | 227 | message = str(context_manager.exception) 228 | self.assertEqual("Output format type must be either html or html5.", message) 229 | 230 | def test_multiple_defaults_invalid_to_output_format(self): 231 | """Check if an exception is raised if to output format is invalid.""" 232 | pandoc_defaults_files = [ 233 | os.path.join(TEST_DEFAULTS_FILES_PATH, "invalid_to_output_format.yaml"), 234 | os.path.join( 235 | TEST_DEFAULTS_FILES_PATH, "valid_second_defaults_file_wrap_none.yaml" 236 | ), 237 | ] 238 | 239 | settings = get_settings(PANDOC_DEFAULTS_FILES=pandoc_defaults_files) 240 | 241 | pandoc_reader = PandocReader(settings) 242 | source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") 243 | 244 | with self.assertRaises(ValueError) as context_manager: 245 | pandoc_reader.read(source_path) 246 | 247 | message = str(context_manager.exception) 248 | self.assertEqual("Output format type must be either html or html5.", message) 249 | 250 | 251 | if __name__ == "__main__": 252 | unittest.main() 253 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Pandoc Reader: A Plugin for Pelican 2 | =================================== 3 | 4 | [![Build Status](https://img.shields.io/github/actions/workflow/status/pelican-plugins/pandoc-reader/main.yml?branch=main)](https://github.com/pelican-plugins/pandoc-reader/actions) 5 | [![PyPI Version](https://img.shields.io/pypi/v/pelican-pandoc-reader)](https://pypi.org/project/pelican-pandoc-reader/) 6 | [![Downloads](https://img.shields.io/pypi/dm/pelican-pandoc-reader)](https://pypi.org/project/pelican-pandoc-reader/) 7 | ![License](https://img.shields.io/pypi/l/pelican-pandoc-reader?color=blue) 8 | 9 | Pandoc Reader is a [Pelican][] plugin that converts documents written in [Pandoc’s variant of Markdown][] into HTML. 10 | 11 | Requirements 12 | ------------ 13 | 14 | This plugin requires: 15 | 16 | * Python 3.9 or higher; and 17 | * Pandoc 2.11 or higher [[Pandoc installation instructions](https://pandoc.org/installing.html)]. 18 | 19 | By default, the plugin looks for a `pandoc` executable on your `PATH`. If you wish, [you may specify an alternative location for your `pandoc` executable](#customizing-the-path-for-the-pandoc-executable). 20 | 21 | Installation 22 | ------------ 23 | 24 | This plugin can be installed via: 25 | 26 | ```bash 27 | python -m pip install pelican-pandoc-reader 28 | ``` 29 | 30 | As long as you have not explicitly added a `PLUGINS` setting to your Pelican settings file, then the newly-installed plugin should be automatically detected and enabled. Otherwise, you must add `pandoc_reader` to your existing `PLUGINS` list. For more information, please see the [How to Use Plugins](https://docs.getpelican.com/en/latest/plugins.html#how-to-use-plugins) documentation. 31 | 32 | Configuration 33 | ------------- 34 | 35 | This plugin converts [Pandoc’s variant of Markdown][] into HTML. Conversion from other Markdown variants is supported but requires the use of a [Pandoc defaults file](https://pandoc.org/MANUAL.html#defaults-files). 36 | 37 | Converting to output formats other than HTML is not supported. 38 | 39 | ### Specifying File Metadata 40 | 41 | The plugin expects all Markdown files to start with a YAML-formatted content header, as shown below. 42 | 43 | ```yaml 44 | --- 45 | title: "" 46 | author: "" 47 | data: "" 48 | --- 49 | ``` 50 | 51 | … or … 52 | 53 | ```yaml 54 | --- 55 | title: "" 56 | author: "" 57 | date: "" 58 | ... 59 | ``` 60 | 61 | > ⚠️ **Note:** The YAML-formatted header shown above is syntax specific to Pandoc for specifying content metadata. This is different from Pelican’s front-matter format. If you ever decide to stop using this plugin and switch to Pelican’s default Markdown handling, you may need to switch your front-matter metadata to [Python-Markdown’s Meta-Data format](https://python-markdown.github.io/extensions/meta_data/). 62 | 63 | If you have files that use Pelican's front matter format, there is a script written by [Joseph Reagle](https://github.com/reagle) available that [converts Pelican's front matter to Pandoc's YAML header format](https://gist.github.com/reagle/5bc44ba9e2f1b961d1aaca9179fb403b). 64 | 65 | For more information on Pandoc's YAML metadata block or Pelican's default metadata format please visit the links below: 66 | 67 | * [Pandoc’s YAML metadata blocks](https://pandoc.org/MANUAL.html#metadata-blocks) 68 | * [Pelican’s default metadata format](https://docs.getpelican.com/en/stable/content.html#file-metadata) 69 | 70 | ### Specifying Pandoc Options 71 | 72 | The plugin supports two **mutually exclusive** methods for passing options to Pandoc. 73 | 74 | #### Method One: Via Pelican Settings 75 | 76 | The first method involves configuring two settings in your Pelican settings file (e.g., `pelicanconf.py`): 77 | 78 | * `PANDOC_ARGS` 79 | * `PANDOC_EXTENSIONS` 80 | 81 | In the `PANDOC_ARGS` setting, you may specify any arguments supported by Pandoc, as shown below: 82 | 83 | ```python 84 | PANDOC_ARGS = [ 85 | "--mathjax", 86 | "--citeproc", 87 | ] 88 | ``` 89 | 90 | In the `PANDOC_EXTENSIONS` setting, you may enable/disable any number of the supported [Pandoc extensions](https://pandoc.org/MANUAL.html#extensions): 91 | 92 | ```python 93 | PANDOC_EXTENSIONS = [ 94 | "+footnotes", # Enabled extension 95 | "-pipe_tables", # Disabled extension 96 | ] 97 | ``` 98 | 99 | #### Method Two: Using Pandoc Defaults Files 100 | 101 | The second method involves specifying the path(s) to one or more [Pandoc defaults files][], with all your preferences written in YAML format. 102 | 103 | These paths should be set in your Pelican settings file by using the setting `PANDOC_DEFAULTS_FILES`. The paths may be absolute or relative, but relative paths are recommended as they are more portable. 104 | 105 | ```python 106 | PANDOC_DEFAULTS_FILES = [ 107 | "", 108 | "", 109 | ] 110 | ``` 111 | 112 | Here is a minimal example of content that should be available in a Pandoc defaults file: 113 | 114 | ```yaml 115 | reader: markdown 116 | writer: html5 117 | ``` 118 | 119 | Using defaults files has the added benefit of allowing you to use other Markdown variants supported by Pandoc, such as [CommonMark](https://commonmark.org/) and [GitHub-Flavored Markdown](https://docs.github.com/en/free-pro-team@latest/github/writing-on-github). 120 | 121 | Please see [Pandoc defaults files][] for a more complete example. 122 | 123 | > ⚠️ **Note:** Neither method supports the `--standalone` or `--self-contained` arguments, which will yield an error if invoked. 124 | 125 | ### Generating a Table of Contents 126 | 127 | If you want to create a table of contents (ToC) for posts or pages, you may do so by specifying the `--toc` or `--table-of-contents` argument in the `PANDOC_ARGS` setting, as shown below: 128 | 129 | ```python 130 | PANDOC_ARGS = [ 131 | "--toc", 132 | ] 133 | ``` 134 | 135 | … or … 136 | 137 | ```python 138 | PANDOC_ARGS = [ 139 | "--table-of-contents", 140 | ] 141 | ``` 142 | 143 | To add a ToC via a Pandoc defaults file, use the syntax below: 144 | 145 | ```yaml 146 | table-of-contents: true 147 | ``` 148 | 149 | The table of contents will be available for use in templates using the `{{ article.toc }}` or `{{ page.toc }}` Jinja template variables. 150 | 151 | ### Enabling Citations 152 | 153 | You may enable citations by specifying the `-C` or `--citeproc` option. 154 | 155 | Set the `PANDOC_ARGS` and `PANDOC_EXTENSIONS` in your Pelican settings file as shown below: 156 | 157 | ```python 158 | PANDOC_ARGS = [ 159 | "--citeproc", 160 | ] 161 | ``` 162 | 163 | … or … 164 | 165 | ```python 166 | PANDOC_ARGS = [ 167 | "-C", 168 | ] 169 | ``` 170 | 171 | If you are using a Pandoc defaults file, you need the following as a bare minimum to enable citations: 172 | 173 | ```yaml 174 | reader: markdown 175 | writer: html5 176 | 177 | citeproc: true 178 | ``` 179 | 180 | Without these settings, citations will not be processed by the plugin. 181 | 182 | It is not necessary to specify the `+citations` extension since it is enabled by default. However, if you were to disable citations by specifying `-citations` in `PANDOC_EXTENSIONS` or by setting `reader: markdown-citations` in your defaults file, citations will **not** work. 183 | 184 | You may write your bibliography in any format supported by Pandoc with the appropriate extensions specified. However, you **must** name the bibliography file the same as your post. 185 | 186 | For example, a post with the file name `my-post.md` should have a bibliography file called `my-post.bib`, `my-post.json`, `my-post.yaml` or `my-post.bibtex` in the same directory as your post, or in a subdirectory of the directory that your blog resides in. Failure to do so will prevent the references from being picked up. 187 | 188 | #### Known Issues with Citations 189 | 190 | If enabling citations with a specific style, you need to specify a CSL (Citation Style Language) file, available from the [Zotero Style Repository](https://www.zotero.org/styles). For example, if you are using `ieee-with-url` style file, it may be specified in your Pelican settings file, as shown below: 191 | 192 | ```python 193 | PANDOC_ARGS = [ 194 | "--csl=https://www.zotero.org/styles/ieee-with-url", 195 | ] 196 | ``` 197 | 198 | Or in a Pandoc defaults file: 199 | 200 | ```yaml 201 | csl: "https://www.zotero.org/styles/ieee-with-url" 202 | ``` 203 | 204 | Specifying a *remote* (that is, not local) CSL file as shown above dramatically increases the time taken to process Markdown content. To improve processing speed, it is _highly_ recommended that you use a local copy of the CSL file downloaded from Zotero. 205 | 206 | You may then reference it in your Pelican settings file as shown below: 207 | 208 | ```python 209 | PANDOC_ARGS = [ 210 | "--csl=path/to/file/ieee-with-url.csl", 211 | ] 212 | ``` 213 | 214 | Or in a Pandoc defaults file: 215 | 216 | ```yaml 217 | csl: "path/to/file/ieee-with-url.csl" 218 | ``` 219 | 220 | ### Calculating and Displaying Reading Times 221 | 222 | This plugin may be used to calculate the estimated reading time of articles and pages by setting `CALCULATE_READING_TIME` to `True` in your Pelican settings file: 223 | 224 | ```python 225 | CALCULATE_READING_TIME = True 226 | ``` 227 | 228 | You may display the estimated reading time using the `{{ article.reading_time }}` or `{{ page.reading_time }}` template variables. The unit of time will be displayed as “minute” for reading times less than or equal to one minute, or “minutes” for those greater than one minute. 229 | 230 | The reading time is calculated by dividing the number of words by the reading speed, which is the average number of words read in a minute. 231 | 232 | The default value for reading speed is set to 200 words per minute, but may be customized by setting `READING_SPEED` to the desired words per minute value in your Pelican settings file: 233 | 234 | ```python 235 | READING_SPEED = 236 | ``` 237 | 238 | The number of words in a document is calculated using the [wordcount Lua Filter](https://github.com/pandoc/lua-filters/tree/master/wordcount). 239 | 240 | ### Customizing the Path for the `pandoc` Executable 241 | 242 | If your `pandoc` executable does not reside on your `PATH`, set the `PANDOC_EXECUTABLE_PATH` in your Pelican settings file to the absolute path of where your `pandoc` resides as shown below: 243 | 244 | ```python 245 | PANDOC_EXECUTABLE_PATH = /path/to/my/pandoc 246 | ``` 247 | 248 | This setting is useful in cases where the `pandoc` executable from your hosting provider is not recent enough, and you may need to install a version of Pandoc-compatible with this plugin—in a non-standard location. 249 | 250 | Contributing 251 | ------------ 252 | 253 | Contributions are welcome and much appreciated. Every little bit helps. You can contribute by improving the documentation, adding missing features, and fixing bugs. You can also help out by reviewing and commenting on [existing issues][]. 254 | 255 | To start contributing to this plugin, review the [Contributing to Pelican][] documentation, beginning with the **Contributing Code** section. 256 | 257 | Special thanks to [Justin Mayer](https://justinmayer.com), [Erwin Janssen](https://github.com/ErwinJanssen), [Joseph Reagle](https://github.com/reagle) and [Deniz Turgut](https://github.com/avaris) for their improvements and feedback on this plugin. 258 | 259 | [existing issues]: https://github.com/pelican-plugins/pandoc-reader/issues 260 | [Contributing to Pelican]: https://docs.getpelican.com/en/latest/contribute.html 261 | 262 | License 263 | ------- 264 | 265 | This project is licensed under the AGPL-3.0 license. 266 | 267 | [Pelican]: https://getpelican.com 268 | [Pandoc’s variant of Markdown]: https://pandoc.org/MANUAL.html#pandocs-markdown 269 | [Pandoc defaults files]: https://pandoc.org/MANUAL.html#default-files 270 | -------------------------------------------------------------------------------- /pelican/plugins/pandoc_reader/pandoc_reader.py: -------------------------------------------------------------------------------- 1 | """Reader that processes Pandoc Markdown and returns HTML5.""" 2 | 3 | import json 4 | import math 5 | import os 6 | import shutil 7 | import subprocess 8 | 9 | import bs4 10 | from ruamel.yaml import YAML, constructor 11 | 12 | from pelican import signals 13 | from pelican.readers import BaseReader 14 | from pelican.utils import pelican_open 15 | 16 | DEFAULT_READING_SPEED = 200 # Words per minute 17 | DEFAULT_PANDOC_EXECUTABLE = "pandoc" 18 | DIR_PATH = os.path.dirname(__file__) 19 | ENCODED_LINKS_TO_RAW_LINKS_MAP = { 20 | "%7Bstatic%7D": "{static}", 21 | "%7Battach%7D": "{attach}", 22 | "%7Bfilename%7D": "{filename}", 23 | } 24 | FILE_EXTENSIONS = ["md", "mkd", "mkdn", "mdwn", "mdown", "markdown", "Rmd"] 25 | FILTERS_PATH = os.path.abspath(os.path.join(DIR_PATH, "filters")) 26 | PANDOC_READER_HTML_TEMPLATE = "pandoc-reader-default.html" 27 | PANDOC_SUPPORTED_MAJOR_VERSION = 2 28 | PANDOC_SUPPORTED_MINOR_VERSION = 11 29 | 30 | TEMPLATES_PATH = os.path.abspath(os.path.join(DIR_PATH, "templates")) 31 | UNSUPPORTED_ARGUMENTS = ("--standalone", "--self-contained") 32 | VALID_BIB_EXTENSIONS = ["json", "yaml", "bibtex", "bib"] 33 | 34 | # Markdown variants supported in defaults files 35 | # Update as Pandoc adds or removes support for formats 36 | VALID_INPUT_FORMATS = ( 37 | "commonmark", 38 | "commonmark_x", 39 | "gfm", 40 | "markdown", 41 | "markdown_mmd", 42 | "markdown_phpextra", 43 | "markdown_strict", 44 | ) 45 | VALID_OUTPUT_FORMATS = ("html", "html5") 46 | 47 | 48 | class PandocReader(BaseReader): 49 | """Convert files written in Pandoc Markdown to HTML 5.""" 50 | 51 | enabled = True 52 | file_extensions = FILE_EXTENSIONS 53 | 54 | def read(self, source_path): 55 | """Parse Pandoc Markdown and return HTML5 markup and metadata.""" 56 | # Get the user-defined path to the Pandoc executable or fall back to default 57 | pandoc_executable = self.settings.get( 58 | "PANDOC_EXECUTABLE_PATH", DEFAULT_PANDOC_EXECUTABLE 59 | ) 60 | 61 | # If user-defined path, expand and make it absolute in case the path is relative 62 | if pandoc_executable != DEFAULT_PANDOC_EXECUTABLE: 63 | pandoc_executable = os.path.abspath(os.path.expanduser(pandoc_executable)) 64 | 65 | # Check if pandoc is installed and is executable 66 | if not shutil.which(pandoc_executable): 67 | raise Exception("Could not find Pandoc. Please install.") 68 | 69 | # Check if the version of pandoc installed is 2.11 or higher 70 | self._check_pandoc_version(pandoc_executable) 71 | 72 | # Open Markdown file and read content 73 | content = "" 74 | with pelican_open(source_path) as file_content: 75 | content = file_content 76 | 77 | # Retrieve HTML content and metadata 78 | output, metadata = self._create_html(source_path, content, pandoc_executable) 79 | 80 | return output, metadata 81 | 82 | def _create_html(self, source_path, content, pandoc_executable): 83 | """Create HTML5 content.""" 84 | # Get settings set in pelicanconf.py 85 | defaults_files = self.settings.get("PANDOC_DEFAULTS_FILES", []) 86 | arguments = self.settings.get("PANDOC_ARGS", []) 87 | extensions = self.settings.get("PANDOC_EXTENSIONS", []) 88 | 89 | if isinstance(extensions, list): 90 | extensions = "".join(extensions) 91 | 92 | # Check if source content has a YAML metadata block 93 | self._check_yaml_metadata_block(content) 94 | 95 | # Check validity of arguments or defaults files 96 | table_of_contents, citations = self._validate_fields( 97 | defaults_files, arguments, extensions 98 | ) 99 | 100 | # Construct preliminary pandoc command 101 | pandoc_cmd = self._construct_pandoc_command( 102 | pandoc_executable, defaults_files, arguments, extensions 103 | ) 104 | 105 | # Find and add bibliography if citations are specified 106 | if citations: 107 | for bib_file in self._find_bibs(source_path): 108 | pandoc_cmd.append(f"--bibliography={bib_file}") 109 | 110 | # Create HTML content using pandoc-reader-default.html template 111 | output = self._run_pandoc(pandoc_cmd, content) 112 | 113 | # Extract table of contents, text and metadata from HTML output 114 | output, toc, pandoc_metadata = self._extract_contents(output, table_of_contents) 115 | 116 | # Replace all occurrences of %7Bstatic%7D to {static}, 117 | # %7Battach%7D to {attach} and %7Bfilename%7D to {filename} 118 | # so that static links are resolvable by pelican 119 | for encoded_str, raw_str in ENCODED_LINKS_TO_RAW_LINKS_MAP.items(): 120 | output = output.replace(encoded_str, raw_str) 121 | 122 | # Parse Pandoc metadata and add it to Pelican 123 | metadata = self._process_metadata(pandoc_metadata) 124 | 125 | if table_of_contents: 126 | # Create table of contents and add to metadata 127 | metadata["toc"] = self.process_metadata("toc", toc) 128 | 129 | if self.settings.get("CALCULATE_READING_TIME", []): 130 | # Calculate reading time and add to metadata 131 | metadata["reading_time"] = self.process_metadata( 132 | "reading_time", 133 | self._calculate_reading_time(pandoc_executable, source_path), 134 | ) 135 | 136 | return output, metadata 137 | 138 | def _validate_fields(self, defaults_files, arguments, extensions): 139 | """Validate fields and return citations and ToC request values.""" 140 | # If defaults_files is empty then validate the argument and extensions 141 | if not defaults_files: 142 | # Validate the arguments to see that they are supported 143 | # by the plugin 144 | self._check_arguments(arguments) 145 | 146 | # Check if citations have been requested 147 | citations = self._check_if_citations(arguments, extensions) 148 | 149 | # Check if table of contents has been requested 150 | table_of_contents = self._check_if_toc(arguments) 151 | else: 152 | # Validate defaults files and get the citations 153 | # abd table of contents request value 154 | citations, table_of_contents = self._check_defaults(defaults_files) 155 | return table_of_contents, citations 156 | 157 | def _check_defaults(self, defaults_files): 158 | """Check if the given Pandoc defaults file has valid values.""" 159 | citations = False 160 | table_of_contents = False 161 | 162 | # Get the data in all defaults files as a string 163 | defaults_data = "" 164 | for defaults_file in defaults_files: 165 | with open(defaults_file) as file_handle: 166 | for line in file_handle.readlines(): 167 | defaults_data += line 168 | 169 | # Convert YAML data to a Python dictionary 170 | defaults = {} 171 | try: 172 | yaml = YAML() 173 | defaults = yaml.load(defaults_data) 174 | except constructor.DuplicateKeyError as duplicate_key_error: 175 | raise ValueError( 176 | "Duplicate keys defined in multiple defaults files." 177 | ) from duplicate_key_error 178 | 179 | self._check_if_unsupported_settings(defaults) 180 | reader = self._check_input_format(defaults) 181 | self._check_output_format(defaults) 182 | 183 | if not citations: 184 | citeproc_specified = False 185 | 186 | # Cases where citeproc is specified as citeproc: true 187 | if defaults.get("citeproc", "") or "citeproc" in defaults.get( 188 | "filters", "" 189 | ): 190 | citeproc_specified = True 191 | 192 | # The extension +citations is enabled by default in Pandoc 2.11 193 | # we are checking that the extension is not disabled using -citations 194 | if citeproc_specified and "-citations" not in reader: 195 | citations = True 196 | 197 | if not table_of_contents: 198 | if defaults.get("table-of-contents", ""): 199 | table_of_contents = True 200 | 201 | return citations, table_of_contents 202 | 203 | def _calculate_reading_time(self, pandoc_executable, source_path): 204 | """Calculate time taken to read content.""" 205 | reading_speed = self.settings.get("READING_SPEED", DEFAULT_READING_SPEED) 206 | 207 | # Use the workcount.lua filter to calulcate the reading time 208 | output = subprocess.run( 209 | [ 210 | pandoc_executable, 211 | "--lua-filter", 212 | os.path.join(FILTERS_PATH, "wordcount.lua"), 213 | source_path, 214 | ], 215 | capture_output=True, 216 | encoding="utf-8", 217 | check=True, 218 | ) 219 | 220 | # We have to extract the word count from stdout which looks like 221 | # 102 words in body 222 | # 536 characters in body 223 | # 636 characters in body (including spaces) 224 | wordcount = output.stdout.split()[0] 225 | 226 | time_unit = "minutes" 227 | try: 228 | reading_time = math.ceil(float(wordcount) / float(reading_speed)) 229 | if reading_time == 1: 230 | time_unit = "minute" 231 | reading_time = f"{reading_time!s} {time_unit}" 232 | except ValueError as words_per_minute_nan: 233 | raise ValueError( 234 | "READING_SPEED setting must be a number." 235 | ) from words_per_minute_nan 236 | 237 | return reading_time 238 | 239 | def _process_metadata(self, pandoc_metadata): 240 | """Process Pandoc metadata and add it to Pelican.""" 241 | # Cycle through the metadata and process them 242 | metadata = {} 243 | for key, value in pandoc_metadata.items(): 244 | key = key.lower() 245 | if value and isinstance(value, str): 246 | value = value.strip().strip('"') 247 | 248 | # Process the metadata 249 | metadata[key] = self.process_metadata(key, value) 250 | return metadata 251 | 252 | @staticmethod 253 | def _check_pandoc_version(pandoc_executable): 254 | """Check that the specified version of Pandoc is 2.11 or higher.""" 255 | output = subprocess.run( 256 | [pandoc_executable, "--version"], 257 | capture_output=True, 258 | encoding="utf-8", 259 | check=True, 260 | ) 261 | 262 | # Returns a string of the form pandoc 263 | pandoc_version = output.stdout.split("\n")[0] 264 | 265 | # Get the major and minor version from the above version string 266 | major_version = pandoc_version.split()[1].split(".")[0] 267 | minor_version = pandoc_version.split()[1].split(".")[1] 268 | 269 | # Pandoc major version less than 2 are not supported 270 | if int(major_version) < PANDOC_SUPPORTED_MAJOR_VERSION: 271 | raise Exception("Pandoc version must be 2.11 or higher.") 272 | 273 | # Pandoc major version 2 minor version less than 11 are not supported 274 | if ( 275 | int(major_version) == PANDOC_SUPPORTED_MAJOR_VERSION 276 | and int(minor_version) < PANDOC_SUPPORTED_MINOR_VERSION 277 | ): 278 | raise Exception("Pandoc version must be 2.11 or higher.") 279 | 280 | @staticmethod 281 | def _check_yaml_metadata_block(content): 282 | """Check if the source content has a YAML metadata block.""" 283 | # Check that the given content is not empty 284 | if not content: 285 | raise Exception("Could not find metadata. File is empty.") 286 | 287 | # Split content into a list of lines 288 | content_lines = content.splitlines() 289 | 290 | # Check that the first line of the file starts with a YAML block 291 | if content_lines[0].rstrip() not in ["---"]: 292 | raise Exception("Could not find metadata header '---'.") 293 | 294 | # Find the end of the YAML block 295 | yaml_block_end = "" 296 | for line_num, line in enumerate(content_lines[1:]): 297 | if line.rstrip() in ["---", "..."]: 298 | yaml_block_end = line_num 299 | break 300 | 301 | # Check if the end of the YAML block was found 302 | if not yaml_block_end: 303 | raise Exception("Could not find end of metadata block.") 304 | 305 | @staticmethod 306 | def _construct_pandoc_command( 307 | pandoc_executable, defaults_files, arguments, extensions 308 | ): 309 | """Construct Pandoc command for content.""" 310 | pandoc_cmd = [ 311 | pandoc_executable, 312 | "--standalone", 313 | f"--template={os.path.join(TEMPLATES_PATH, PANDOC_READER_HTML_TEMPLATE)}", 314 | ] 315 | if not defaults_files: 316 | pandoc_cmd.extend(["--from", "markdown" + extensions, "--to", "html5"]) 317 | pandoc_cmd.extend(arguments) 318 | else: 319 | for defaults_file in defaults_files: 320 | pandoc_cmd.append(f"--defaults={defaults_file}") 321 | return pandoc_cmd 322 | 323 | @staticmethod 324 | def _run_pandoc(pandoc_cmd, content): 325 | """Execute the given pandoc command and return output.""" 326 | output = subprocess.run( 327 | pandoc_cmd, 328 | input=content, 329 | capture_output=True, 330 | encoding="utf-8", 331 | check=True, 332 | ) 333 | return output.stdout 334 | 335 | @staticmethod 336 | def _extract_contents(html_output, table_of_contents): 337 | """Extract body html, table of contents and metadata from output.""" 338 | # Extract pandoc metadata from html output 339 | pandoc_json_metadata, _, html_output = html_output.partition("\n") 340 | 341 | # Convert JSON string to dict 342 | pandoc_metadata = json.loads(pandoc_json_metadata) 343 | 344 | # Parse HTML output 345 | soup = bs4.BeautifulSoup(html_output, "html.parser") 346 | 347 | # Extract the table of contents if one was requested 348 | toc = "" 349 | if table_of_contents: 350 | # Find the table of contents 351 | toc = soup.body.find("nav", id="TOC") 352 | 353 | if toc: 354 | # Convert it to a string 355 | toc = str(toc) 356 | 357 | # Replace id=TOC with class="toc" 358 | toc = toc.replace('id="TOC"', 'class="toc"') 359 | 360 | # Remove the table of contents from the HTML output 361 | soup.body.find("nav", id="TOC").decompose() 362 | 363 | # Remove body tag around html output 364 | soup.body.unwrap() 365 | 366 | # Strip leading and trailing spaces 367 | html_output = str(soup).strip() 368 | 369 | return html_output, toc, pandoc_metadata 370 | 371 | @staticmethod 372 | def _check_if_citations(arguments, extensions): 373 | """Check if citations are specified.""" 374 | citations = False 375 | if arguments and extensions: 376 | # The +citations extension is enabled by default in Pandoc 2.11 377 | # therefore we do a check to see that it is not disabled in extensions 378 | if ( 379 | "--citeproc" in arguments or "-C" in arguments 380 | ) and "-citations" not in extensions: 381 | citations = True 382 | return citations 383 | 384 | @staticmethod 385 | def _check_if_toc(arguments): 386 | """Check if a table of contents should be generated.""" 387 | table_of_contents = False 388 | if arguments: 389 | if "--toc" in arguments or "--table-of-contents" in arguments: 390 | table_of_contents = True 391 | return table_of_contents 392 | 393 | @staticmethod 394 | def _find_bibs(source_path): 395 | """Find bibliographies recursively in the sourcepath given.""" 396 | bib_files = [] 397 | filename = os.path.splitext(os.path.basename(source_path))[0] 398 | directory_path = os.path.dirname(os.path.abspath(source_path)) 399 | for root, _, files in os.walk(directory_path): 400 | for extension in VALID_BIB_EXTENSIONS: 401 | bib_name = ".".join([filename, extension]) 402 | if bib_name in files: 403 | bib_files.append(os.path.join(root, bib_name)) 404 | return bib_files 405 | 406 | @staticmethod 407 | def _check_arguments(arguments): 408 | """Check to see that only supported arguments have been passed.""" 409 | for arg in arguments: 410 | if arg in UNSUPPORTED_ARGUMENTS: 411 | raise ValueError(f"Argument {arg} is not supported.") 412 | 413 | @staticmethod 414 | def _check_if_unsupported_settings(defaults): 415 | """Check if unsupported settings are specified in the defaults.""" 416 | for arg in UNSUPPORTED_ARGUMENTS: 417 | arg = arg[2:] 418 | if defaults.get(arg, ""): 419 | raise ValueError(f"The default {arg} should be set to false.") 420 | 421 | @staticmethod 422 | def _check_input_format(defaults): 423 | """Check if the input format given is a Markdown variant.""" 424 | reader = "" 425 | reader_input = defaults.get("reader", "") 426 | from_input = defaults.get("from", "") 427 | 428 | # Case where no input format is specified 429 | if not reader_input and not from_input: 430 | raise ValueError("No input format specified.") 431 | 432 | # Case where both reader and from are specified which is not supported 433 | if reader_input and from_input: 434 | raise ValueError( 435 | "Specifying both from and reader is not supported." 436 | " Please specify just one." 437 | ) 438 | 439 | if reader_input or from_input: 440 | if reader_input: 441 | reader = reader_input 442 | elif from_input: 443 | reader = from_input 444 | 445 | reader_prefix = reader.replace("+", "-").split("-")[0] 446 | 447 | # Check to see if the reader_prefix matches a valid input format 448 | if reader_prefix not in VALID_INPUT_FORMATS: 449 | raise ValueError("Input type has to be a Markdown variant.") 450 | return reader 451 | 452 | @staticmethod 453 | def _check_output_format(defaults): 454 | """Check if the output format is HTML or HTML5.""" 455 | writer_output = defaults.get("writer", "") 456 | to_output = defaults.get("to", "") 457 | 458 | # Case where both writer and to are specified which is not supported 459 | if writer_output and to_output: 460 | raise ValueError( 461 | "Specifying both to and writer is not supported." 462 | " Please specify just one." 463 | ) 464 | 465 | # Case where neither writer nor to value is set to html 466 | if ( 467 | writer_output not in VALID_OUTPUT_FORMATS 468 | and to_output not in VALID_OUTPUT_FORMATS 469 | ): 470 | output_formats = " or ".join(VALID_OUTPUT_FORMATS) 471 | raise ValueError(f"Output format type must be either {output_formats}.") 472 | 473 | 474 | def add_reader(readers): 475 | """Add the PandocReader as the reader for all Pandoc Markdown files.""" 476 | for ext in PandocReader.file_extensions: 477 | readers.reader_classes[ext] = PandocReader 478 | 479 | 480 | def register(): 481 | """Register the PandocReader.""" 482 | signals.readers_init.connect(add_reader) 483 | --------------------------------------------------------------------------------