├── .gitattributes ├── examples ├── plots.pdf ├── pptest.pdf ├── brochure.png ├── example.pdf ├── showtest.pdf ├── matrixinverse.pdf ├── example.tex ├── pptest.tex ├── showtest.tex ├── plots.tex └── matrixinverse.tex ├── tests ├── test1 │ ├── plot2.tex │ ├── plot1.tex │ ├── testgold1 │ │ ├── plot1.pyptex │ │ ├── plot2.pyptex │ │ ├── test.png │ │ ├── test.pyplog │ │ └── test.pyptex │ ├── testgold2 │ │ ├── plot1.pyptex │ │ ├── plot2.pyptex │ │ ├── test.png │ │ ├── test.pyplog │ │ └── test.pyptex │ ├── test1.tex │ └── test2.tex ├── test2 │ ├── test1.tex │ ├── test2.tex │ ├── test3.tex │ ├── test4.tex │ ├── testgold1 │ │ ├── test.pyptex │ │ ├── test.png │ │ └── test.pyplog │ ├── testgold2 │ │ ├── test.pyptex │ │ ├── test.png │ │ └── test.pyplog │ ├── testgold3 │ │ ├── test.pyptex │ │ ├── test.png │ │ └── test.pyplog │ └── testgold4 │ │ ├── test.pyptex │ │ ├── test.png │ │ └── test.pyplog ├── test3 │ ├── testgold1 │ │ ├── test.png │ │ ├── test.pyptex │ │ └── test.pyplog │ ├── testgold2 │ │ ├── test.png │ │ ├── test.pyptex │ │ └── test.pyplog │ ├── testgold3 │ │ ├── test.png │ │ ├── test.pyptex │ │ └── test.pyplog │ ├── testgold4 │ │ ├── test.png │ │ ├── test.pyptex │ │ └── test.pyplog │ ├── test1.tex │ ├── test2.tex │ ├── test3.tex │ └── test4.tex ├── test4 │ ├── testgold1 │ │ ├── test.png │ │ ├── test.pyptex │ │ └── test.pyplog │ ├── testgold2 │ │ ├── test.png │ │ ├── test.pyptex │ │ └── test.pyplog │ ├── testgold3 │ │ ├── test.png │ │ ├── test.pyptex │ │ └── test.pyplog │ ├── testgold4 │ │ ├── test.png │ │ ├── test.pyptex │ │ └── test.pyplog │ ├── test1.tex │ ├── test2.tex │ ├── test3.tex │ └── test4.tex ├── runtests └── runtest ├── pyptex ├── __main__.py └── __init__.py ├── scripts ├── pdflatex ├── makechangelog └── make-pypi-release ├── .github ├── pull_request_template.md ├── CODE_OF_CONDUCT.md └── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md ├── hooks ├── pre-commit └── commit-msg ├── CONTRIBUTING.md ├── .gitignore ├── pyproject.toml ├── LICENSE ├── setup.py ├── Makefile ├── README.md └── CHANGELOG.md /.gitattributes: -------------------------------------------------------------------------------- 1 | * linguist-vendored 2 | *.py linguist-vendored=false 3 | -------------------------------------------------------------------------------- /examples/plots.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sloisel/pyptex/HEAD/examples/plots.pdf -------------------------------------------------------------------------------- /examples/pptest.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sloisel/pyptex/HEAD/examples/pptest.pdf -------------------------------------------------------------------------------- /examples/brochure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sloisel/pyptex/HEAD/examples/brochure.png -------------------------------------------------------------------------------- /examples/example.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sloisel/pyptex/HEAD/examples/example.pdf -------------------------------------------------------------------------------- /examples/showtest.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sloisel/pyptex/HEAD/examples/showtest.pdf -------------------------------------------------------------------------------- /tests/test1/plot2.tex: -------------------------------------------------------------------------------- 1 | @{{{ 2 | from sympy import * 3 | plotting.plot3d(S('x*y')) 4 | }}} 5 | -------------------------------------------------------------------------------- /tests/test2/test1.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \begin{document} 3 | @{3} 4 | \end{document} -------------------------------------------------------------------------------- /tests/test2/test2.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \begin{document} 3 | @{3} 4 | \end{document} -------------------------------------------------------------------------------- /tests/test2/test3.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \begin{document} 3 | @{4} 4 | \end{document} -------------------------------------------------------------------------------- /tests/test2/test4.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \begin{document} 3 | @{4} 4 | \end{document} -------------------------------------------------------------------------------- /tests/test1/plot1.tex: -------------------------------------------------------------------------------- 1 | @{{{ 2 | from sympy import * 3 | plot(S('sin(x)+cos(pi*x)')) 4 | }}} 5 | -------------------------------------------------------------------------------- /examples/matrixinverse.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sloisel/pyptex/HEAD/examples/matrixinverse.pdf -------------------------------------------------------------------------------- /tests/test1/testgold1/plot1.pyptex: -------------------------------------------------------------------------------- 1 | \includegraphics[width=\textwidth]{plot1-generated/fig1.eps} 2 | -------------------------------------------------------------------------------- /tests/test1/testgold1/plot2.pyptex: -------------------------------------------------------------------------------- 1 | \includegraphics[width=\textwidth]{plot2-generated/fig1.eps} 2 | -------------------------------------------------------------------------------- /tests/test1/testgold2/plot1.pyptex: -------------------------------------------------------------------------------- 1 | \includegraphics[width=\textwidth]{plot1-generated/fig1.eps} 2 | -------------------------------------------------------------------------------- /tests/test1/testgold2/plot2.pyptex: -------------------------------------------------------------------------------- 1 | \includegraphics[width=\textwidth]{plot2-generated/fig1.eps} 2 | -------------------------------------------------------------------------------- /tests/test2/testgold1/test.pyptex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \begin{document} 3 | 3 4 | \end{document} -------------------------------------------------------------------------------- /tests/test2/testgold2/test.pyptex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \begin{document} 3 | 3 4 | \end{document} -------------------------------------------------------------------------------- /tests/test2/testgold3/test.pyptex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \begin{document} 3 | 4 4 | \end{document} -------------------------------------------------------------------------------- /tests/test2/testgold4/test.pyptex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \begin{document} 3 | 4 4 | \end{document} -------------------------------------------------------------------------------- /tests/test1/testgold1/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sloisel/pyptex/HEAD/tests/test1/testgold1/test.png -------------------------------------------------------------------------------- /tests/test1/testgold2/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sloisel/pyptex/HEAD/tests/test1/testgold2/test.png -------------------------------------------------------------------------------- /tests/test2/testgold1/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sloisel/pyptex/HEAD/tests/test2/testgold1/test.png -------------------------------------------------------------------------------- /tests/test2/testgold2/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sloisel/pyptex/HEAD/tests/test2/testgold2/test.png -------------------------------------------------------------------------------- /tests/test2/testgold3/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sloisel/pyptex/HEAD/tests/test2/testgold3/test.png -------------------------------------------------------------------------------- /tests/test2/testgold4/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sloisel/pyptex/HEAD/tests/test2/testgold4/test.png -------------------------------------------------------------------------------- /tests/test3/testgold1/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sloisel/pyptex/HEAD/tests/test3/testgold1/test.png -------------------------------------------------------------------------------- /tests/test3/testgold2/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sloisel/pyptex/HEAD/tests/test3/testgold2/test.png -------------------------------------------------------------------------------- /tests/test3/testgold3/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sloisel/pyptex/HEAD/tests/test3/testgold3/test.png -------------------------------------------------------------------------------- /tests/test3/testgold4/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sloisel/pyptex/HEAD/tests/test3/testgold4/test.png -------------------------------------------------------------------------------- /tests/test4/testgold1/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sloisel/pyptex/HEAD/tests/test4/testgold1/test.png -------------------------------------------------------------------------------- /tests/test4/testgold1/test.pyptex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \begin{document} 3 | 4 | hi 5 | \end{document} 6 | -------------------------------------------------------------------------------- /tests/test4/testgold2/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sloisel/pyptex/HEAD/tests/test4/testgold2/test.png -------------------------------------------------------------------------------- /tests/test4/testgold2/test.pyptex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \begin{document} 3 | 4 | hi 5 | \end{document} 6 | -------------------------------------------------------------------------------- /tests/test4/testgold3/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sloisel/pyptex/HEAD/tests/test4/testgold3/test.png -------------------------------------------------------------------------------- /tests/test4/testgold3/test.pyptex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \begin{document} 3 | 4 | hi 5 | \end{document} 6 | -------------------------------------------------------------------------------- /tests/test4/testgold4/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sloisel/pyptex/HEAD/tests/test4/testgold4/test.png -------------------------------------------------------------------------------- /tests/test4/testgold4/test.pyptex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \begin{document} 3 | 4 | hi 5 | \end{document} 6 | -------------------------------------------------------------------------------- /tests/test3/testgold1/test.pyptex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \begin{document} 3 | 4 | file.txt 5 | \end{document} 6 | -------------------------------------------------------------------------------- /tests/test3/testgold2/test.pyptex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \begin{document} 3 | 4 | file.txt 5 | \end{document} 6 | -------------------------------------------------------------------------------- /tests/test3/testgold3/test.pyptex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \begin{document} 3 | 4 | file.txt 5 | \end{document} 6 | -------------------------------------------------------------------------------- /tests/test3/testgold4/test.pyptex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \begin{document} 3 | 4 | file.txt 5 | \end{document} 6 | -------------------------------------------------------------------------------- /tests/test4/test1.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \begin{document} 3 | @{print("runtestsargv-2 foo")} 4 | hi 5 | \end{document} 6 | -------------------------------------------------------------------------------- /tests/test4/test2.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \begin{document} 3 | @{print("runtestsargv-2 foo")} 4 | hi 5 | \end{document} 6 | -------------------------------------------------------------------------------- /tests/test4/test3.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \begin{document} 3 | @{print("runtestsargv-2 foo")} 4 | hi 5 | \end{document} 6 | -------------------------------------------------------------------------------- /tests/test4/test4.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \begin{document} 3 | @{print("runtestsargv-2 foo")} 4 | hi 5 | \end{document} 6 | -------------------------------------------------------------------------------- /examples/example.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | @{from sympy import *} 3 | \begin{document} 4 | $$\int x^3\,dx = @{S('integrate(x^3,x)')}+C$$ 5 | \end{document} 6 | -------------------------------------------------------------------------------- /tests/test3/test1.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \begin{document} 3 | @{print("runtestsaction-2 touch file.txt")} 4 | @{pyp.dep('file.txt')} 5 | \end{document} 6 | -------------------------------------------------------------------------------- /tests/test3/test2.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \begin{document} 3 | @{print("runtestsaction-2 touch file.txt")} 4 | @{pyp.dep('file.txt')} 5 | \end{document} 6 | -------------------------------------------------------------------------------- /tests/test3/test3.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \begin{document} 3 | @{print("runtestsaction-2 touch file.txt")} 4 | @{pyp.dep('file.txt')} 5 | \end{document} 6 | -------------------------------------------------------------------------------- /tests/test3/test4.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \begin{document} 3 | @{print("runtestsaction-2 touch file.txt")} 4 | @{pyp.dep('file.txt')} 5 | \end{document} 6 | -------------------------------------------------------------------------------- /pyptex/__main__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from .__init__ import pyptexmain 3 | if __name__ == "__main__": 4 | # execute only if run as a script 5 | pyptexmain(sys.argv) 6 | -------------------------------------------------------------------------------- /examples/pptest.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \begin{document} 3 | @{{{ 4 | x = 17 5 | pyp.print(pyp.pp(r"""hello 6 | $$@x$$ 7 | $$@{x+3}$$ 8 | """)) 9 | }}} 10 | \end{document} 11 | -------------------------------------------------------------------------------- /scripts/pdflatex: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export PATH=$OPATH 3 | fn=${3%.*} 4 | pyp=$fn.pyptex 5 | gld=$fn.pyptexgold 6 | pdf=$fn.pdf 7 | if cmp -s $pyp $gld && [ -f $pdf ]; then 8 | echo "$pdf is cached" 9 | touch $pdf 10 | else 11 | (cmp -s $pyp $gld && echo "Reusing $gld") || (cp $pyp $gld && echo "Updating $gld") 12 | pdflatex $1 $2 $gld 13 | fi 14 | 15 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | Issue #3 (if applicable) 2 | 3 | Changes in this pull request: 4 | 1. Some 5 | 2. Things 6 | 7 | * I used the pyptex githooks for this pull request 8 | * I did not use the pyptex githooks for this pull request 9 | * `make` completes without error messages for this commit. 10 | * `make` fails with error messages for this commit. 11 | -------------------------------------------------------------------------------- /hooks/pre-commit: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | exec 1>&2 3 | myerr(){ 4 | echo -e "\033[31m\033[1mpre-commit: ERROR: $@\033[0m" 5 | } 6 | if make -q; then 7 | true 8 | else 9 | myerr 'Outdated Makefile targets found. run "make".' 10 | exit 1 11 | fi 12 | if git diff --quiet; then 13 | true 14 | else 15 | myerr 'Unstaged changes found. run "git add".' 16 | exit 1 17 | fi 18 | exit 0 -------------------------------------------------------------------------------- /tests/test2/testgold2/test.pyplog: -------------------------------------------------------------------------------- 1 | test.tex: pyptex compilation begins 2 | Found 1 lines of Python. 3 | Using cached Python outputs 4 | Saving to file: test.pyptex 5 | Dependencies are: 6 | {'.../__init__.py': '(datetime)'} 7 | Saving cache file test.pickle 8 | Running Latex command: 9 | pdflatex -file-line-error --synctex=1 test.pyptex && (test ! -f test.bib || bibtex test.aux) 10 | -------------------------------------------------------------------------------- /tests/test2/testgold4/test.pyplog: -------------------------------------------------------------------------------- 1 | test.tex: pyptex compilation begins 2 | Found 1 lines of Python. 3 | Using cached Python outputs 4 | Saving to file: test.pyptex 5 | Dependencies are: 6 | {'.../__init__.py': '(datetime)'} 7 | Saving cache file test.pickle 8 | Running Latex command: 9 | pdflatex -file-line-error --synctex=1 test.pyptex && (test ! -f test.bib || bibtex test.aux) 10 | -------------------------------------------------------------------------------- /tests/test4/testgold2/test.pyplog: -------------------------------------------------------------------------------- 1 | test.tex: pyptex compilation begins 2 | Found 1 lines of Python. 3 | Using cached Python outputs 4 | Saving to file: test.pyptex 5 | Dependencies are: 6 | {'.../__init__.py': '(datetime)'} 7 | Saving cache file test.pickle 8 | Running Latex command: 9 | pdflatex -file-line-error --synctex=1 test.pyptex && (test ! -f test.bib || bibtex test.aux) 10 | -------------------------------------------------------------------------------- /tests/test4/testgold4/test.pyplog: -------------------------------------------------------------------------------- 1 | test.tex: pyptex compilation begins 2 | Found 1 lines of Python. 3 | Using cached Python outputs 4 | Saving to file: test.pyptex 5 | Dependencies are: 6 | {'.../__init__.py': '(datetime)'} 7 | Saving cache file test.pickle 8 | Running Latex command: 9 | pdflatex -file-line-error --synctex=1 test.pyptex && (test ! -f test.bib || bibtex test.aux) 10 | -------------------------------------------------------------------------------- /tests/test1/testgold2/test.pyplog: -------------------------------------------------------------------------------- 1 | test.tex: pyptex compilation begins 2 | Found 175 lines of Python. 3 | Using cached Python outputs 4 | Saving to file: test.pyptex 5 | Dependencies are: 6 | {'.../__init__.py': '(datetime)', 'test.bib': '(datetime)'} 7 | Saving cache file test.pickle 8 | Running Latex command: 9 | pdflatex -file-line-error --synctex=1 test.pyptex && (test ! -f test.bib || bibtex test.aux) 10 | -------------------------------------------------------------------------------- /tests/test3/testgold2/test.pyplog: -------------------------------------------------------------------------------- 1 | test.tex: pyptex compilation begins 2 | Found 2 lines of Python. 3 | Using cached Python outputs 4 | Saving to file: test.pyptex 5 | Dependencies are: 6 | {'.../__init__.py': '(datetime)', 'file.txt': '(datetime)'} 7 | Saving cache file test.pickle 8 | Running Latex command: 9 | pdflatex -file-line-error --synctex=1 test.pyptex && (test ! -f test.bib || bibtex test.aux) 10 | -------------------------------------------------------------------------------- /tests/test3/testgold4/test.pyplog: -------------------------------------------------------------------------------- 1 | test.tex: pyptex compilation begins 2 | Found 2 lines of Python. 3 | Using cached Python outputs 4 | Saving to file: test.pyptex 5 | Dependencies are: 6 | {'.../__init__.py': '(datetime)', 'file.txt': '(datetime)'} 7 | Saving cache file test.pickle 8 | Running Latex command: 9 | pdflatex -file-line-error --synctex=1 test.pyptex && (test ! -f test.bib || bibtex test.aux) 10 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Rules for contributing: 2 | 3 | 1. Make sure you're using my githooks. You can "make reinstall-hooks" to 4 | install them. These hooks will ensure all your commits pass unit tests, 5 | and confirm this by adding a "validation line" to your commit messages. 6 | 2. Rebase to master if necessary. 7 | 3. Make your changes, commit them, send me a pull request. 8 | 9 | Cheers, 10 | 11 | S 12 | -------------------------------------------------------------------------------- /tests/test2/testgold1/test.pyplog: -------------------------------------------------------------------------------- 1 | test.tex: pyptex compilation begins 2 | Found 1 lines of Python. 3 | disable_cache=True 4 | Cache is invalidated. 5 | Executing Python code: 6 | 3 7 | Python result: 8 | [3] 9 | Saving to file: test.pyptex 10 | Dependencies are: 11 | {'.../__init__.py': '(datetime)'} 12 | Saving cache file test.pickle 13 | Running Latex command: 14 | pdflatex -file-line-error --synctex=1 test.pyptex && (test ! -f test.bib || bibtex test.aux) 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pdf 2 | *.eps 3 | *-generated 4 | *.pickle 5 | *.pyptex 6 | *.aux 7 | *.bbl 8 | *.bib 9 | *.blg 10 | *.log 11 | *.pyplog 12 | *.pyptexgold 13 | *.synctex.gz 14 | *.txt 15 | *.ppm 16 | *.ppm.gz 17 | *.png 18 | *.fls 19 | *.fdb_latexmk 20 | test.tex 21 | testgen* 22 | *.egg-info 23 | __pycache__ 24 | *~ 25 | *.orig 26 | doc 27 | build 28 | dist 29 | scripts/pyptex 30 | .DS_Store 31 | .mark 32 | !tests/test?/testgold?/* 33 | !examples/*.pdf 34 | !examples/brochure.png 35 | -------------------------------------------------------------------------------- /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | This file is supposed to explain the community guidelines 2 | for pyptex. However, at present there is no pyptex community. 3 | Anyone using pyptex is most probably academic. So the code of 4 | conduct shall be, 5 | 6 | "Be collegial." 7 | 8 | I'm told I should spell out repercussions for breaking this 9 | "code" but I can only thing of obvious things that go without 10 | saying, like banning, blocking or reporting people. 11 | 12 | Thanks, 13 | 14 | S 15 | -------------------------------------------------------------------------------- /examples/showtest.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | 3 | \usepackage{graphicx} 4 | \usepackage{amsmath} 5 | 6 | @{{{ 7 | import sympy 8 | import matplotlib.pyplot as plt 9 | }}} 10 | 11 | \begin{document} 12 | @{{{ 13 | sympy.plot(sympy.S('sin(x)')) 14 | }}} 15 | 16 | @{{{ 17 | plt.plot([1,2,3],[2,1,4]) 18 | plt.show() 19 | }}} 20 | 21 | @{{{ 22 | 23 | fig = plt.figure() 24 | fig.gca().plot([1,2,3],[3,3,1]) 25 | fig.show() 26 | pyp.print("\n\n") 27 | fig.show() 28 | }}} 29 | \end{document} 30 | -------------------------------------------------------------------------------- /tests/test2/testgold3/test.pyplog: -------------------------------------------------------------------------------- 1 | test.tex: pyptex compilation begins 2 | Found 1 lines of Python. 3 | Fragment # 0 4 | Cached version: 5 | 3 6 | Live version: 7 | 4 8 | Cache is invalidated. 9 | Executing Python code: 10 | 4 11 | Python result: 12 | [4] 13 | Saving to file: test.pyptex 14 | Dependencies are: 15 | {'.../__init__.py': '(datetime)'} 16 | Saving cache file test.pickle 17 | Running Latex command: 18 | pdflatex -file-line-error --synctex=1 test.pyptex && (test ! -f test.bib || bibtex test.aux) 19 | -------------------------------------------------------------------------------- /hooks/commit-msg: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | exec 1>&2 3 | myerr(){ 4 | echo -e "\033[31m\033[1mcommit-msg: ERROR: $@\033[0m" 5 | } 6 | mymsg(){ 7 | echo commit-msg: $@ 8 | } 9 | if make -q; then 10 | true 11 | else 12 | myerr 'Outdated Makefile targets found. run "make".' 13 | exit 1 14 | fi 15 | if git diff --quiet; then 16 | true 17 | else 18 | myerr 'Unstaged changes found. run "git add".' 19 | exit 1 20 | fi 21 | msg="commit-msg approval for `whoami` on `date '+%Y.%m.%d-%H:%M:%S'`" 22 | echo $msg >>$1 23 | exit 0 -------------------------------------------------------------------------------- /tests/test4/testgold1/test.pyplog: -------------------------------------------------------------------------------- 1 | test.tex: pyptex compilation begins 2 | Found 1 lines of Python. 3 | disable_cache=True 4 | Cache is invalidated. 5 | Executing Python code: 6 | print("runtestsargv-2 foo") 7 | runtestsargv-2 foo 8 | Python result: 9 | [None] 10 | Saving to file: test.pyptex 11 | Dependencies are: 12 | {'.../__init__.py': '(datetime)'} 13 | Saving cache file test.pickle 14 | Running Latex command: 15 | pdflatex -file-line-error --synctex=1 test.pyptex && (test ! -f test.bib || bibtex test.aux) 16 | -------------------------------------------------------------------------------- /tests/test4/testgold3/test.pyplog: -------------------------------------------------------------------------------- 1 | test.tex: pyptex compilation begins 2 | Found 1 lines of Python. 3 | argv differs ['foo'] [] 4 | Cache is invalidated. 5 | Executing Python code: 6 | print("runtestsargv-2 foo") 7 | runtestsargv-2 foo 8 | Python result: 9 | [None] 10 | Saving to file: test.pyptex 11 | Dependencies are: 12 | {'.../__init__.py': '(datetime)'} 13 | Saving cache file test.pickle 14 | Running Latex command: 15 | pdflatex -file-line-error --synctex=1 test.pyptex && (test ! -f test.bib || bibtex test.aux) 16 | -------------------------------------------------------------------------------- /scripts/makechangelog: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | verprev="master" 3 | vername=`grep 'version=' setup.py | sed -n "s/.*version='\\([0-9.]*\\)'.*,/v\\1/p"` 4 | for ver in $(git tag --sort=-v:refname) 5 | do 6 | echo 7 | echo "# $vername" 8 | echo 9 | git --no-pager log "$ver..$verprev" --pretty="format:* %s SUPERXXXMAGICXXXCODE (commit %h by %an)" -- . ':!CHANGELOG.md' | sed 's/ commit-msg approval for.*SUPERXXXMAGICXXXCODE//' | sed 's/SUPERXXXMAGICXXXCODE//' 10 | echo 11 | verprev="$ver" 12 | vername="$ver" 13 | done 14 | -------------------------------------------------------------------------------- /examples/plots.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | 3 | \usepackage[utf8]{inputenc} 4 | \usepackage[english]{babel} 5 | \usepackage{graphicx} 6 | \usepackage{verbatim} 7 | 8 | @{{{ 9 | from sympy import * 10 | pyp.includegraphics=r"\includegraphics[width=0.9\textwidth]{%s}" 11 | }}} 12 | 13 | \begin{document} 14 | Here's a 2d sympy plot: 15 | 16 | @{plot(S('sin(x)+cos(pi*x)'))} 17 | 18 | Here's a 3d sympy plot: 19 | 20 | \begin{verbatim} 21 | @@{plotting.plot3d(S('x*y'))} 22 | \end{verbatim} 23 | 24 | @{plotting.plot3d(S('x*y'))} 25 | 26 | \end{document} 27 | -------------------------------------------------------------------------------- /tests/test3/testgold1/test.pyplog: -------------------------------------------------------------------------------- 1 | test.tex: pyptex compilation begins 2 | Found 2 lines of Python. 3 | disable_cache=True 4 | Cache is invalidated. 5 | Executing Python code: 6 | print("runtestsaction-2 touch file.txt") 7 | runtestsaction-2 touch file.txt 8 | Python result: 9 | [None] 10 | Executing Python code: 11 | pyp.dep('file.txt') 12 | Python result: 13 | ['file.txt'] 14 | Saving to file: test.pyptex 15 | Dependencies are: 16 | {'.../__init__.py': '(datetime)', 'file.txt': '(datetime)'} 17 | Saving cache file test.pickle 18 | Running Latex command: 19 | pdflatex -file-line-error --synctex=1 test.pyptex && (test ! -f test.bib || bibtex test.aux) 20 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "pyptex" 7 | version = "1.5.6" 8 | license = {file = "LICENSE"} 9 | 10 | description = "pyptex" 11 | readme = "README.md" 12 | requires-python = ">=3.9" 13 | classifiers = [ 14 | "Programming Language :: Python :: 3", 15 | "Operating System :: OS Independent", 16 | 'Topic :: Software Development', 17 | 'Topic :: Scientific/Engineering', 18 | 'Typing :: Typed', 19 | 'Operating System :: Microsoft :: Windows', 20 | 'Operating System :: POSIX', 21 | 'Operating System :: Unix', 22 | 'Operating System :: MacOS', 23 | ] 24 | 25 | [project.urls] 26 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /tests/test3/testgold3/test.pyplog: -------------------------------------------------------------------------------- 1 | test.tex: pyptex compilation begins 2 | Found 2 lines of Python. 3 | Dependency mismatch file.txt 4 | Cached version: 5 | (datetime) 6 | Live version: 7 | (datetime) 8 | Cache is invalidated. 9 | Executing Python code: 10 | print("runtestsaction-2 touch file.txt") 11 | runtestsaction-2 touch file.txt 12 | Python result: 13 | [None] 14 | Executing Python code: 15 | pyp.dep('file.txt') 16 | Python result: 17 | ['file.txt'] 18 | Saving to file: test.pyptex 19 | Dependencies are: 20 | {'.../__init__.py': '(datetime)', 'file.txt': '(datetime)'} 21 | Saving cache file test.pickle 22 | Running Latex command: 23 | pdflatex -file-line-error --synctex=1 test.pyptex && (test ! -f test.bib || bibtex test.aux) 24 | -------------------------------------------------------------------------------- /scripts/make-pypi-release: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | myerr(){ 4 | echo -e "\033[31m\033[1mpre-commit: ERROR: $@\033[0m" 5 | exit 1 6 | } 7 | scripts/makechangelog >CHANGELOG.txt 8 | if diff -q CHANGELOG.txt CHANGELOG.md >/dev/null 2>&1; then 9 | rm CHANGELOG.txt 10 | else 11 | rm CHANGELOG.txt 12 | myerr "CHANGELOG.md is not up to date. Do 'make CHANGELOG.md'." 13 | fi 14 | 15 | if make -q; then 16 | true 17 | else 18 | myerr 'Outdated Makefile targets found. run "make".' 19 | fi 20 | if git diff --quiet; then 21 | true 22 | else 23 | myerr 'Unstaged changes found. run "git add".' 24 | fi 25 | vername=`grep 'version=' setup.py | sed -n "s/.*version='\\([0-9.]*\\)'.*,/v\\1/p"` 26 | if [ $(git tag -l "$vername") ]; then 27 | myerr "Tag $vername already exists. Either update the version number in setup.py, or delete the tag." 28 | fi 29 | git tag -a $vername -m 'Version tagged by make-pypi-release script.' && git push origin $vername 30 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Smartphone (please complete the following information):** 32 | - Device: [e.g. iPhone6] 33 | - OS: [e.g. iOS8.1] 34 | - Browser [e.g. stock browser, safari] 35 | - Version [e.g. 22] 36 | 37 | **Additional context** 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Sébastien Loisel 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tests/runtests: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | sp=`cd ../scripts && pwd` 4 | if [ "x$1" = "x--generate" ]; then 5 | rm -rf */testgold* 6 | shift 7 | fi 8 | 9 | rm -f runtests.log 10 | 11 | echo "runtests: launching unit tests (parallel execution)" > runtests.log 12 | dirs=`ls -F | grep /` 13 | for d in $dirs; do 14 | cmd="./runtest $d" 15 | echo "runtests: launching $cmd" > runtests.log 16 | $cmd & 17 | done 18 | wait 19 | exec > >(tee -ia runtests.log) 2>&1 20 | echo "runtests: parallel execution complete, collating log files." 21 | cat */runtests.log 22 | echo ======================================================== 23 | echo runtests: Summary 24 | GREEN=`echo -e '\033[32m'` 25 | NORMAL=`echo -e '\033[0m'` 26 | RED=`echo -e '\033[31m'` 27 | cat */runtests.log | grep -E 'runtests:.*(FAIL|pass)' 28 | fails=`cat */runtests.log | grep -E 'runtests:.*FAIL' | wc -l` 29 | passes=`cat */runtests.log | grep -E 'runtests:.*pass' | wc -l` 30 | total=`cat */runtests.log | grep -E 'runtests:.*(FAIL|pass)' | wc -l` 31 | echo "runtests: Statistics" 32 | echo "runtests: total number of tests : $total" 33 | echo "runtests: total number of tests passed: $passes" 34 | echo "runtests: total number of tests failed: $fails" 35 | if [ "$fails" -eq "0" ]; then 36 | touch runtests.success.log 37 | fi 38 | exit $fails 39 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | #from distutils.command.build_scripts import build_scripts 3 | from pathlib import Path 4 | import sys,os 5 | from contextlib import suppress 6 | 7 | 8 | this_directory = Path(__file__).parent.absolute() 9 | long_description = Path(this_directory, 'README.md').read_text(encoding='utf-8') 10 | 11 | def ensureFile(scriptname,thescript): 12 | writeScript = True 13 | with suppress(Exception): 14 | with open(scriptname,"rt") as file: 15 | if(file.read()==thescript): 16 | writeScript = False 17 | if(writeScript): 18 | with open(scriptname,"wt") as file: 19 | file.write(thescript) 20 | 21 | ensureFile("scripts/pyptex",r"""#!/bin/sh 22 | {} -u -m pyptex $@ 23 | """.format(sys.executable) 24 | ) 25 | os.system("chmod a+rx scripts/pyptex") 26 | 27 | 28 | setup( 29 | name='pyptex', 30 | description='Python Preprocessor for (La)TeX', 31 | version='1.5.6', 32 | packages=find_packages(), 33 | install_requires=['sympy>=1.5', 'numpy>=1.18', 'setuptools', 'pdoc3>=0.7','matplotlib','streamcapture>=1.2'], 34 | python_requires='>=3.8', 35 | author='Sébastien Loisel', 36 | author_email='sloisel@gmail.com', 37 | zip_safe=False, 38 | url='https://github.com/sloisel/pyptex', 39 | project_urls={ 40 | 'Documentation': 'https://htmlpreview.github.io/?https://github.com/sloisel/pyptex/blob/master/pyptex.html', 41 | 'Source': 'https://github.com/sloisel/pyptex', 42 | }, 43 | license='MIT', 44 | scripts=['scripts/pyptex'], 45 | long_description=long_description, 46 | long_description_content_type='text/markdown', 47 | ) 48 | -------------------------------------------------------------------------------- /examples/matrixinverse.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | 3 | \usepackage[utf8]{inputenc} 4 | \usepackage[english]{babel} 5 | \usepackage{amsmath} 6 | 7 | \author{Sébastien Loisel} 8 | \title{Example application: automatically generated matrix inverse problem} 9 | \date{} 10 | 11 | \begin{document} 12 | \maketitle 13 | 14 | To compute the matrix inverse, we can perform ``complete Gaussian elimination'' on the augmented matrix $[A|I]$, where $I$ is an identity. 15 | @{{{ 16 | from sympy import * 17 | pp = pyp.pp 18 | 19 | def GEinv(A): 20 | n = A.shape[0] 21 | I = eye(n) 22 | Ak = Matrix(BlockMatrix([[A,I]]).as_explicit()) 23 | sol = pp("$$@Ak$$\n") 24 | for j in range(n-1): 25 | for k in range(j+1,n): 26 | a = Ak[k,j]/Ak[j,j] 27 | kk = k+1 28 | jj = j+1 29 | sol = sol+pp(r"""$R_{@kk} := R_{@kk} - (@a) R_{@jj}$ """) 30 | Ak[k,:] = Ak[k,:] - a*Ak[j,:] 31 | sol = sol + pp(r"""$$@Ak$$""") 32 | for j in range(1,n): 33 | for k in range(j): 34 | a = Ak[k,j]/Ak[j,j] 35 | kk = k+1 36 | jj = j+1 37 | sol = sol+pp(r"""$R_{@kk} := R_{@kk} - (@a) R_{@jj}$ """) 38 | Ak[k,:] = Ak[k,:] - a*Ak[j,:] 39 | sol = sol + pp(r"""$$@Ak$$""") 40 | for j in range(n): 41 | a = Ak[j,j] 42 | jj = j+1 43 | Ak[j,:] = Ak[j,:]/a; 44 | sol = sol+pp(r"""$R_{@jj} := R_{@jj}/@a$ """) 45 | Ainv = A.inv() 46 | sol = sol+pp(" gives the final answer:\n$$A^{-1} = @Ainv.$$\n") 47 | 48 | return pp(r"""{\bf Problem:} Using the augmented matrix approach, 49 | compute $A^{-1}$, where $$A = @A.$$ 50 | {\bf Solution:} 51 | @sol""") 52 | }}} 53 | 54 | @{GEinv(Matrix([[3,-6,0],[1,4,1],[3,-3,3]]))} 55 | 56 | \end{document} 57 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: test doc hooks dist examples 2 | 3 | .PHONY: all clean test doc hooks delete-hooks reinstall-hooks dist examples pypi CHANGELOG.md 4 | 5 | pyptex.html: pyptex/__init__.py 6 | pdoc --html . 7 | cp -f html/pyptex/index.html pyptex.html 8 | rm -rf html 9 | git add pyptex.html 10 | 11 | clean: 12 | rm -rf html pyptex.egg-info tests/runtests.log dist build examples/*.pdf examples/*.pyptex examples/*.pickle scripts/pyptex 13 | 14 | tests/runtests.success.log: Makefile pyptex/*.py scripts/pyptex tests/runtest tests/runtests tests/test?/test?.tex 15 | cd tests && ./runtests 16 | 17 | scripts/pyptex: setup.py Makefile 18 | python3 setup.py --help > /dev/null 19 | touch scripts/pyptex 20 | 21 | test: tests/runtests.success.log 22 | 23 | doc: pyptex.html 24 | 25 | exsrc := $(wildcard examples/*.tex) 26 | exdst := $(patsubst examples/%.tex,examples/%.pdf,$(exsrc)) 27 | export PYTHONPATH := $(shell pwd) 28 | examples/%.pdf: examples/%.tex pyptex/*.py scripts/pyptex scripts/pdflatex Makefile 29 | export OPATH=$(PATH); PATH=$(PYTHONPATH)/scripts:$(PATH); cd examples; rm -f *.aux *.log *.pyplog *.pyptex *.synctex.gz; ../scripts/pyptex `echo $< | sed 's/examples\///'` 30 | examples: ${exdst} 31 | 32 | hooksrc := $(wildcard hooks/*) 33 | hookdst := $(patsubst hooks/%,.git/hooks/%,$(hooksrc)) 34 | 35 | .git/hooks/%: hooks/% 36 | @(echo "Installing hooks") 37 | @if [ -f $@ ]; then echo "Fatal Error: hook $@ already exists.\nTo recover 'make reinstall-hooks' will\ndelete and reinstall all the following hooks:\n" ${hookdst}; false; fi 38 | cp $< $@ 39 | 40 | CHANGELOG.md: 41 | scripts/makechangelog >CHANGELOG.md 42 | 43 | hooks: ${hookdst} 44 | 45 | delete-hooks: 46 | rm ${hookdst} 47 | 48 | reinstall-hooks: delete-hooks hooks 49 | 50 | dist/.mark: setup.py pyptex/__init__.py 51 | rm -rf dist 52 | python3 setup.py sdist 53 | touch dist/.mark 54 | 55 | dist: dist/.mark 56 | 57 | pypi: all 58 | scripts/make-pypi-release && twine upload dist/* 59 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PypTeX: the Python Preprocessor for TeX 2 | 3 | ### Author: Sébastien Loisel 4 | 5 | PypTeX is the Python Preprocessor for LaTeX. It allows one to embed Python 6 | code fragments in a LaTeX template file. 7 | 8 | An example plot with PypTeX 9 | 10 | # Installation 11 | 12 | `pip install pyptex` 13 | 14 | 1. You will also need a LaTeX installation, and the default LaTeX processor is `pdflatex`. 15 | 2. You need a Python 3 installation. 16 | 17 | # Hello, world 18 | 19 | Put the following in `example.tex`: 20 | 21 | \documentclass{article} 22 | @{from sympy import *} 23 | \begin{document} 24 | $$\int x^3\,dx = @{S('integrate(x^3,x)')}+C$$ 25 | \end{document} 26 | 27 | The command `pyptex example.tex` will generate `example.pdf` and an intermediary 28 | pure-LaTeX file `example.pyptex`. The resulting PDF can be found 29 | [here](https://github.com/sloisel/pyptex/blob/master/examples/example.pdf) 30 | 31 | * The `pyptex` executable tries to locate the Python 3 executable using `/usr/bin/env python3`. If this is causing you problems, try `python -u -m pyptex example.tex` instead. 32 | 33 | # Slightly bigger examples 34 | 35 | * 2d and 3d plotting [tex](https://github.com/sloisel/pyptex/blob/master/examples/plots.tex) 36 | | 37 | [pdf](https://github.com/sloisel/pyptex/blob/master/examples/plots.pdf) 38 | * Matrix inverse exercise [tex](https://github.com/sloisel/pyptex/blob/master/examples/matrixinverse.tex) 39 | | 40 | [pdf](https://github.com/sloisel/pyptex/blob/master/examples/matrixinverse.pdf) 41 | * The F19NB handout for numerical linear algebra at Heriot-Watt university is generated with PypTeX. [pdf](https://www.macs.hw.ac.uk/~sl398/notes.pdf) 42 | 43 | # TeXShop 44 | 45 | If you want to use TeXShop on Mac, put the following into `~/Library/TeXShop/Engines/pyptex.engine` and restart TeXShop: 46 | ``` 47 | #!/bin/bash 48 | pyptex $1 49 | ``` 50 | 51 | # Documentation 52 | 53 | Detailed documentation can be found [here](https://htmlpreview.github.io/?https://github.com/sloisel/pyptex/blob/master/pyptex.html) 54 | -------------------------------------------------------------------------------- /tests/runtest: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | clean() { 3 | rm -rf *.aux *.png *.bbl *.bib *.blg *.dvi *.cache *.log *.pdf *.pickle *.pyplog *.pyptex *.synctex.gz *-eps-converted-to.pdf *.eps *-generated *~ testgen* 4 | } 5 | failtest() { 6 | echo "runtests: $1: FAIL" 7 | } 8 | passtest() { 9 | echo "runtests: $1: pass" 10 | } 11 | argv="" 12 | cd $1 13 | clean 14 | exec > >(tee -i runtests.log) 2>&1 15 | trap "failtest FATAL.$1" EXIT 16 | sp=`cd ../.. && pwd` 17 | export PYTHONPATH=$sp 18 | pyptex="$sp/scripts/pyptex --pdb=no" 19 | cmdk='x' 20 | argk='x' 21 | for fn in test?.tex 22 | do 23 | k=`echo $fn | sed 's/test\(.\).tex/\1/g'` 24 | name=$1$k 25 | echo runtests: $name 26 | if [ -f $fn ]; then 27 | echo "runtests: installing file variant $fn" 28 | cp -f $fn test.tex 29 | fi 30 | mkdir testgen$k 31 | thecmd="$pyptex test.tex $argv" 32 | echo "runtests: $thecmd" 33 | $thecmd 34 | cp -r *.pyptex test-generated testgen$k 35 | if [ -f test.pdf ]; then 36 | rm -f test.png* 37 | pdftoppm -png test.pdf test -f 1 -singlefile 38 | cp test.png testgen$k 39 | fi 40 | sed -i '' '/^This is [a-zA-Z]*, Version/,$d' test.pyplog 41 | sed 's/\/.*\/\(__init__\.py\)/\.\.\.\/\1/g;' test.pyplog | sed 's/\([0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]@[0-9:\.]*\)/(datetime)/g' | grep -v 'Output written on test.pdf' > testgen$k/test.pyplog 42 | if [ -d testgold$k ]; then 43 | echo "runtests: Gold image already exists" 44 | else 45 | echo "runtests: Generating gold image" 46 | cp -r testgen$k testgold$k 47 | fi 48 | # diffcmd="diff -r testgen$k testgold$k" 49 | diffcmd="diff testgen$k/test.pyptex testgold$k/test.pyptex && diff testgen$k/test.pyplog testgold$k/test.pyplog && magick compare -metric AE testgen$k/test.png testgold$k/test.png testgen$k/difference.png" 50 | echo $diffcmd 51 | # if $diffcmd; then 52 | if diff testgen$k/test.pyptex testgold$k/test.pyptex && diff testgen$k/test.pyplog testgold$k/test.pyplog && magick compare -metric AE testgen$k/test.png testgold$k/test.png testgen$k/difference.png >/dev/null 2>&1; then 53 | passtest $name 54 | else 55 | failtest $name 56 | fi 57 | args=`grep -e "^runtestsargv-[0-9]* .*" ] 63 | \includegraphics[width=\textwidth]{plot1-generated/fig1.eps} 64 | Saving to file: plot1.pyptex 65 | Dependencies are: 66 | {'.../__init__.py': '(datetime)', 'plot1-generated/fig1.eps': '(datetime)'} 67 | Saving cache file plot1.pickle 68 | plot1.tex: pyptex compilation ends 69 | Python result: 70 | ['\\input{plot1.pyptex}'] 71 | Executing Python code: 72 | pyp.input('plot2.tex') 73 | plot2.tex: pyptex compilation begins 74 | Found 4 lines of Python. 75 | disable_cache=True 76 | Cache is invalidated. 77 | Executing Python code: 78 | 79 | from sympy import * 80 | plotting.plot3d(S('x*y')) 81 | 82 | \includegraphics[width=\textwidth]{plot2-generated/fig1.eps} 83 | The PostScript backend does not support transparency; partially transparent artists will be rendered opaque. 84 | Python result: 85 | [
] 86 | \includegraphics[width=\textwidth]{plot2-generated/fig1.eps} 87 | The PostScript backend does not support transparency; partially transparent artists will be rendered opaque. 88 | Saving to file: plot2.pyptex 89 | Dependencies are: 90 | {'.../__init__.py': '(datetime)', 'plot2-generated/fig1.eps': '(datetime)'} 91 | Saving cache file plot2.pickle 92 | plot2.tex: pyptex compilation ends 93 | Python result: 94 | ['\\input{plot2.pyptex}'] 95 | Executing Python code: 96 | pyp.latex 97 | Python result: 98 | ['pdflatex -file-line-error --synctex=1'] 99 | Executing Python code: 100 | 101 | pyp.bib(r""" 102 | @article{knuth1989errors, 103 | title={The errors of {TeX}}, 104 | author={Knuth, Donald E}, 105 | journal={Software: Practice and Experience}, 106 | volume={19}, 107 | number={7}, 108 | pages={607--685}, 109 | year={1989}, 110 | publisher={Wiley Online Library} 111 | } 112 | @book{knuth1984texbook, 113 | title={The {TEXbook}}, 114 | author={Knuth, Donald Ervin and Bibby, Duane}, 115 | volume={3}, 116 | year={1984}, 117 | publisher={Addison-Wesley Reading} 118 | } 119 | @book{lamport1994latex, 120 | title={{LATEX}: a document preparation system: user's guide and reference manual}, 121 | author={Lamport, Leslie}, 122 | year={1994}, 123 | publisher={Addison-wesley} 124 | } 125 | @article{greenwade1993comprehensive, 126 | title={The comprehensive {TEX} archive network (ctan)}, 127 | author={Greenwade, George D}, 128 | journal={TUGBoat}, 129 | volume={14}, 130 | number={3}, 131 | pages={342--351}, 132 | year={1993}, 133 | publisher={Addison-Wesley} 134 | } 135 | @article{seindal1997gnu, 136 | title={{GNU} m4, version 1.4}, 137 | author={Seindal, Ren{\'e}}, 138 | journal={Free Software Foundation}, 139 | volume={59}, 140 | year={1997} 141 | } 142 | @article{virtanen2020scipy, 143 | title={{SciPy} 1.0: fundamental algorithms for scientific computing in {P}ython}, 144 | author={Virtanen, Pauli and Gommers, Ralf and Oliphant, Travis E and Haberland, Matt and Reddy, Tyler and Cournapeau, David and Burovski, Evgeni and Peterson, Pearu and Weckesser, Warren and Bright, Jonathan and others}, 145 | journal={Nature Methods}, 146 | pages={1--12}, 147 | year={2020}, 148 | publisher={Nature Publishing Group} 149 | } 150 | @article{meurer2017sympy, 151 | title={{SymPy}: symbolic computing in {Python}}, 152 | author={Meurer, Aaron and Smith, Christopher P and Paprocki, Mateusz and {\v{C}}ert{\'\i}k, Ond{\v{r}}ej and Kirpichev, Sergey B and Rocklin, Matthew and Kumar, AMiT and Ivanov, Sergiu and Moore, Jason K and Singh, Sartaj and others}, 153 | journal={{PeerJ} Computer Science}, 154 | volume={3}, 155 | pages={e103}, 156 | year={2017}, 157 | publisher={PeerJ Inc.} 158 | } 159 | @article{poore2015pythontex, 160 | title={{PythonTeX: reproducible documents with LaTeX, Python, and more}}, 161 | author={Poore, Geoffrey M}, 162 | journal={Computational Science \& Discovery}, 163 | volume={8}, 164 | number={1}, 165 | pages={014010}, 166 | year={2015}, 167 | publisher={IOP Publishing} 168 | } 169 | @article{fine2005tex, 170 | title={{TEX} forever!}, 171 | author={Fine, Jonathan}, 172 | journal={Proceedings EuroTEX}, 173 | pages={140--149}, 174 | year={2005} 175 | } 176 | @misc{Loisel2020, 177 | author = {S{\'e}bastien Loisel}, 178 | title = {Numerical Analysis {B} (lecture notes)}, 179 | year = {2020}, 180 | publisher={Heriot-Watt University} 181 | } 182 | @book{kernighan1977m4, 183 | title={The {M4} macro processor}, 184 | author={Kernighan, Brian W and Ritchie, Dennis M}, 185 | year={1977}, 186 | publisher={Bell Laboratories Murray Hill, NJ} 187 | } 188 | @article{cimpanu2018twelve, 189 | title={Twelve Malicious {P}ython Libraries Found and Removed from {PyPI}}, 190 | author={Cimpanu, Catalin}, 191 | year={2018}, 192 | journal={ZDNet}, 193 | month={October}, 194 | } 195 | @article{pakin2004perltex, 196 | title={PerlTEX: Defining LATEX macros using Perl}, 197 | author={Pakin, Scott}, 198 | journal={TUGboat}, 199 | volume={25}, 200 | number={2}, 201 | pages={150--159}, 202 | year={2004} 203 | } 204 | @misc{ehmsen, 205 | title={Python.sty}, 206 | author={Martin R. Ehmsen}, 207 | publisher={CTAN}, 208 | year={2012}, 209 | } 210 | @misc{drake2009sagetex, 211 | title={The {SageTEX} package}, 212 | author={Drake, Dan and others}, 213 | publisher={CTAN}, 214 | year={2009} 215 | } 216 | @book{molteno2014sympytex, 217 | title={SympyTeX: Embedding Symbolic Computation Into {LaTeX} Documents}, 218 | author={Molteno, Timothy Christopher Anthony}, 219 | year={2014}, 220 | publisher={Electronics Group, University of Otago} 221 | } 222 | @article{hagen2005luatex, 223 | title={{LuaTEX}: Howling to the moon}, 224 | author={Hagen, Hans}, 225 | journal={The Communications of the TEX Users Group}, 226 | pages={152}, 227 | year={2005} 228 | } 229 | @article{hepple1998, 230 | title={Writing {HTML} with m4}, 231 | author={Bob Hepple}, 232 | journal={Linux journal}, 233 | year={1998}, 234 | month={March}, 235 | } 236 | @article{ronacher2008jinja2, 237 | title={Jinja2 Documentation}, 238 | author={Ronacher, Armin}, 239 | journal={Welcome to Jinja2—Jinja2 Documentation (2.8-dev)}, 240 | year={2008} 241 | } 242 | @book{alchin2013pro, 243 | title={Pro Django}, 244 | author={Alchin, Marty and Kaplan-Moss, Jacob and Vilches, George}, 245 | year={2013}, 246 | publisher={Springer} 247 | } 248 | """) 249 | 250 | Python result: 251 | ['test'] 252 | Saving to file: test.pyptex 253 | Dependencies are: 254 | {'.../__init__.py': '(datetime)', 'test.bib': '(datetime)'} 255 | Saving cache file test.pickle 256 | Running Latex command: 257 | pdflatex -file-line-error --synctex=1 test.pyptex && (test ! -f test.bib || bibtex test.aux) 258 | -------------------------------------------------------------------------------- /tests/test1/testgold1/test.pyptex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | 3 | 4 | 5 | 6 | 7 | 8 | % This doesn't actually work. 9 | \pdfinfoomitdate1 10 | \pdfsuppressptexinfo-1 11 | \pdftrailerid{} 12 | 13 | \usepackage[utf8]{inputenc} 14 | \usepackage[english]{babel} 15 | \usepackage{amsmath} 16 | \usepackage{verbatim} 17 | \usepackage{relsize} 18 | \usepackage{graphicx} 19 | 20 | \usepackage{hologo} 21 | 22 | 23 | 24 | \newcommand{\Longupdownarrow}{\Big\Updownarrow} 25 | \usepackage{listings} 26 | \lstset{ 27 | basicstyle=\footnotesize\ttfamily} 28 | 29 | \usepackage{natbib} 30 | 31 | \newcommand{\pyptex}{{P\kern-0.08em\raisebox{-0.5ex}{Y\kern-0.1emP}\kern-0.28em\TeX{}}} 32 | 33 | \title{\pyptex{}: The Python Preprocessor for \LaTeX{}.} 34 | \date{wololo} 35 | \author{Sébastien Loisel} 36 | 37 | 38 | \begin{document} 39 | \maketitle 40 | 41 | \begin{abstract} 42 | \pyptex{} is a Python template Preprocessor for \LaTeX{}. From a source template document \texttt{a.tex} containing specially marked Python code fragments, one or more pure \LaTeX{} files \texttt{a.pyptex} are generated and compiled into final \texttt{a.pdf} documents. Our new approach differs from previous approaches that embedded Python into the \LaTeX{} engine and allowed bidirectional communication between Python and \LaTeX{}. The embedded Python approach exposes users to security risks, but it also has a deadlock problem and is very hard to debug. By contrast, \pyptex{} avoids the security risks of embedded Python, completely eliminates the deadlock problem, and is easy to debug. We have used \pyptex{} to produce a 71-page handout for linear algebra students containing automatically generated problem sets. In combination with SymPy, SciPy and other Python packages, \pyptex{} can be used to create beautiful scientific documents. 43 | \end{abstract} 44 | 45 | \section{Introduction} 46 | 47 | \pyptex{} is a macro preprocessor (or template engine) for \LaTeX{} \citep{lamport1994latex} that allows one to use the rich Python ecosystem to automate parts of the redaction of sophisticated scientific documents. 48 | In combination with SciPy \citep{virtanen2020scipy} and SymPy \citep{meurer2017sympy}, \pyptex{} is a powerful tool for generating documents ranging from research papers and books to calculus problem sets for students. 49 | 50 | One installs \pyptex{} by typing \texttt{pip install pyptex} at the command prompt. This assumes that one already has working Python and \LaTeX{} installations; by default, \pyptex{} assumes that \texttt{pdflatex} is available at the command line, although alternate \LaTeX{} implementations can also be used. Then, using one's favorite text editor, one places the following text in a file \texttt{example.tex}: 51 | \begin{lstlisting} 52 | \documentclass{article} 53 | @{from sympy import *} 54 | \begin{document} 55 | $$\int x^3\,dx = @{S('integrate(x^3,x)')}+C$$ 56 | \end{document} 57 | \end{lstlisting} 58 | One compiles \texttt{example.tex} using the \pyptex{} command line \texttt{pyptex example.tex}. This generates the final output \texttt{example.pdf}, which contains this: 59 | $$\int x^3\,dx = \frac{x^{4}}{4}+C$$ 60 | As we can see, SymPy was used to compute an indefinite integral symbolically, and the result was typeset with standard \LaTeX{}. 61 | 62 | \pyptex{} also produces the intermediate file \texttt{example.pyptex}, which is a pure-\LaTeX{} file that is useful either for debugging purposes, or for sending a final manuscript to a journal. It contains the following: 63 | \begin{lstlisting} 64 | \documentclass{article} 65 | 66 | \begin{document} 67 | $$\int x^3\,dx = \frac{x^{4}}{4}+C$$ 68 | \end{document} 69 | \end{lstlisting} 70 | The SymPy Python code in the template file \texttt{example.tex} was executed and replaced with its own output in standard \LaTeX{} notation. 71 | The file \texttt{example.pdf} was automatically generated by \pyptex{} from the \texttt{example.pyptex} file (internally, \pyptex{} invoked the command \texttt{pdflatex example.pyptex}). 72 | 73 | \begin{figure} 74 | $$\texttt{a.tex} \implies \text{\pyptex{}} \implies \texttt{a.pyptex} \implies \texttt{pdflatex}\implies \texttt{a.pdf}$$ 75 | \caption{Dataflow of a the \pyptex{} Python template Preprocessor. \label{f:pre}} 76 | \end{figure} 77 | \begin{figure} 78 | $$\texttt{a.tex} \implies 79 | \left\{ 80 | \begin{array}{c} 81 | \texttt{pdflatex} \\ 82 | \Longupdownarrow \\ 83 | \text{Python} 84 | \end{array} 85 | \right\} 86 | \implies \texttt{a.pdf} 87 | $$ 88 | \caption{Dataflow when Python is embedded in the \texttt{pdflatex} engine. \label{f:embed}} 89 | \end{figure} 90 | 91 | In \pyptex{}, debugging and error handling are first-class citizens. If any errors occur during Python interpretation, \pyptex{} automatically launches the Python debugger Pdb in postmortem mode, just like standard \LaTeX{} enters \texttt{errorstopmode} on error. In Pdb, all standard Python debugging tools work as normal, and source code from the originating \texttt{.tex} file is visible in its context (e.g. with the \texttt{list} command). If an error occurs during \LaTeX{} compilation (e.g. during \texttt{pdflatex example.pyptex}), the usual error prompt is provided by \LaTeX{}. 92 | 93 | \pyptex{} is a {\em preprocessor} (or {\em template engine}) for \LaTeX{}, see Figure \ref{f:pre}. 94 | Python fragments are extracted from the (template) source \texttt{a.tex} file by a regular expression pattern matcher, executed by a Python interpreter, and substituted with the appropriate outputs. The collated file is written to disk as \texttt{a.pyptex}, which is a pure \LaTeX{} file. Because this procedure is done by regular expressions, the \LaTeX{} compiler is not invoked and not involved in parsing or processing \texttt{a.tex}. Once \texttt{a.pyptex} has been produced, then \texttt{a.pdf} is obtained, e.g. via \texttt{pdflatex a.pyptex}. 95 | 96 | As far as we know, \pyptex{} is the first time that Python is formally used as a preprocessor or templating engine for \LaTeX{}. However, Python has previously been {\em embedded} into \LaTeX{} engines, see Figure \ref{f:embed}; we mention \citet{fine2005tex}, \citet{ehmsen}, \citet{poore2015pythontex}. For SymPy there is \citet{molteno2014sympytex} and for Sage there is \citet{drake2009sagetex}. Other scripting languages have also been embedded into \LaTeX{}, for example Perl \citep{pakin2004perltex} and Lua \citep{hagen2005luatex}. When Python is embedded into \LaTeX{}, all the Python fragments are extracted from a source \texttt{a.tex} file by the \TeX{} macro engine itself. After executing these Python fragments, the relevant outputs are substituted into the macro stream being ``digested'' by the \TeX{} engine \citep{knuth1984texbook}. Although templating engines for \LaTeX{} seem rare, they are commonly used for other document formats, see e.g. \citet{hepple1998}, or \citet{alchin2013pro}. Usually, templating engines focus on generating multiple documents from a single template, whereas \pyptex{} focuses on generating a single document from a template. For that reason, we prefer the terminology ``preprocessor''. That being said, \pyptex{} can indeed be used in a more traditional templating mode where multiple documents are generated from a single template \texttt{a.tex}. 97 | 98 | With the help of \pyptex{}, we have produced a 71-page handout \citep{Loisel2020} for the course F19NB at Heriot-Watt University on numerical linear algebra. Previous to \pyptex{}, in 2018, we had used an ad-hoc and highly complex build system for \LaTeX{} code generation. While we distinguish this ad-hoc approach from our new template preprocessor approach, we mention that the ad-hoc generation of \LaTeX{} from pure Python programs is widespread. In 2019, we switched from an ad-hoc approach to PythonTex and it was a significant improvement, but we found three important drawbacks to the embedded Python approach, which motivated us to create \pyptex{}. 99 | 100 | First, there is the (somewhat theoretical) security risk of allowing third-party \LaTeX{} packages to execute arbitrary Python codes unbeknownst to the user. Second, there is the issue of ``deadlock'', which is when the source and cache files enter a state where compiling \texttt{a.tex} into \texttt{a.pdf} becomes impossible because of \TeX{} errors. This occurs when the \TeX{} macro processor is simultaneously extracting new Python fragments from \texttt{a.tex} while inserting outdated cached substitutions from a previous version of \texttt{a.tex}. To break deadlock, it is usually necessary to manually delete all cache files, but this is not always sufficient, and sometimes modifying \texttt{a.tex} is necessary. Finally, the third problem is the difficulty of debugging Python fragments and generated \LaTeX{} fragments. 101 | 102 | We have found that our new Python template preprocessor \pyptex{} avoids all three problems. First, it does not allow third-party \LaTeX{} packages to execute arbitrary Python codes because there is no bidirectional communication between \LaTeX{} and Python. Second, deadlock is impossible by design because the \LaTeX{} engine is not involved at all in the extraction, execution and substitution of Python fragments from \texttt{a.tex}. Finally, debugging with Pdb is treated as a first-class citizen and works as normal, and the collated \LaTeX{} document is made available in the human-preferred format of a pure-\LaTeX{} source file \texttt{a.pyptex}, as opposed to a ``partially digested'' \TeX{} macro stream. We have confirmed these significant improvements using our F19NB handout, which consists of 2,592 lines and 105KB of \LaTeX{} source in file \texttt{notes.tex}, including 1,256 lines of Python code. 103 | 104 | Our paper is organized as follows. In Section 2, we give a brief tour of \pyptex{} features. In Section 3, we discuss the F19NB case study, a 71 page PDF document containing automatically generated problem sets for linear algebra students. We end with some conclusions in Section 4. 105 | 106 | \section{A brief tour of \pyptex{} features} 107 | 108 | The \texttt{pyptex a.tex} command executes Python fragments in \texttt{a.tex} delimited by either \verb|@{...}| or \verb|@{{{...}}}|; the latter notation is necessary if the Python fragment contains curly braces. If a literal \verb|@{| is needed, it can be escaped by using a double \verb|@@{|; apart from this, it is not necessary to escape \verb|@| symbols. \pyptex{} also honors \LaTeX{}-style comments so that \verb|%@{error}| does not execute the Python expression \verb|error|. 109 | 110 | \pyptex{} allows one to execute arbitrary Python programs and import arbitrary Python packages, e.g. \verb|@{from sympy import *}|. One can use SymPy to create simple plots, save them to a file and include them via the standard \LaTeX{} graphicx package. For example: 111 | \lstinputlisting{plot1.tex} 112 | As with all modern template preprocessors, \pyptex{} enables modular development by assembling a project from multiple component files. For example, the preceding plotting snippet was saved in the \texttt{plot1.tex}, which will later be included in our master \texttt{a.tex} via the command 113 | \verb|@{pyp.input('plot1.tex')}|. The files \texttt{a.tex} and \texttt{plot1.tex} are analogous to Python modules in that they execute their own separate namespaces. As such, it is necessary to re-import SymPy into the namespace of \texttt{plot1.tex}, even though it already has been imported into the namespace of the master file \texttt{a.tex}. 114 | 115 | The pattern of plotting a figure, saving it to a file, and then including its filename into the generated \texttt{a.pyptex} file is very common, so \pyptex{} includes a convenience function \texttt{pyp.savefig()} that automatically generates distinct numbered filenames under the \texttt{a-generated/*} directory; in the present case, we create the file \texttt{a-generated/fig1.eps} with the following fragment: 116 | \lstinputlisting{plot2.tex} 117 | These plotting commands have also been stored in a separate file \texttt{plot2.tex}. 118 | Note that the Python fragment in \texttt{plot2.tex} consists of two Python statements. The Python programming language distinguishes {\em expressions} that return or produce a value, and typically consist of a single line; and {\em statements} that may run multiple lines and produce no value. The multiple lines in the \texttt{plot2.tex} Python code fragments means that they are statements that produce no value. In order to print the filename \texttt{a-generated/fig1.eps} into the curly braces of the \verb|\includegraphics{...}|, it is therefore required to use the \verb|pyp.print(...)| command. 119 | 120 | Embedding the files \texttt{plot1.tex} and \texttt{plot2.tex} via the \texttt{pyp.input(...)} mechanism described above, results in the following output:\\ 121 | \noindent \input{plot1.pyptex} 122 | \input{plot2.pyptex} 123 | 124 | Standard out and standard error print out to the console as usual, and are additionally logged in the \texttt{a.pyplog} file. If one wishes to print directly to the \texttt{a.pyptex} file, one should use the \texttt{pyp.print()} function. 125 | 126 | For performance reasons, \pyptex{} caches the outputs of Python fragments in a binary file \texttt{a.pickle}. If one edits purely textual portions of \texttt{a.tex} while leaving the Python fragments untouched, the cached outputs are used and the production of \texttt{a.pyptex} is nearly instantaneous. If the Python fragments in \texttt{a.tex} are not identical to the cached fragments, or if the command-line arguments to \texttt{pyptex} have changed, or if the \texttt{pyptex} package version has changed, the cache is automatically invalidated. Furthermore, the command \verb|pyp.dep(filename)| can be used to add further dependencies that will invalidate the cache when the file \verb|filename| changes. Any auxiliary \texttt{.tex} files (and their dependencies) incorporated via \verb|pyp.input(...)| are automatically added to the list of dependencies via \verb|pyp.dep()|, but if external data files are \texttt{open()}ed and used to generate, e.g. \LaTeX{} tables, these files should manually be added to the dependencies via \verb|pyp.dep()|. The helper function \verb|pyp.open(...)| automatically calls \verb|pyp.dep(...)| before calling the builtin \verb|open(...)| function. 127 | 128 | One can control how the \texttt{a.pyptex} file is compiled into \texttt{a.pdf} by specifying which \LaTeX{} engine to use. To completely disable all \LaTeX{} processing, do \verb|@{pyp.latexcommand=False}| anywhere in \texttt{a.tex}. The default \texttt{latexcommand} is 129 | \begin{lstlisting} 130 | {latex} {pyptexfilename} && 131 | (test ! -f {bibfilename} || bibtex {auxfilename}) 132 | \end{lstlisting} 133 | The usual Python substitutions are made from the \texttt{pyp} object, e.g. \verb|pyp.latex| defaults to \verb|"pdflatex -file-line-error --synctex=1"| but this can be overridden from anywhere in \texttt{a.tex}. 134 | 135 | \pyptex{} can be used programmatically in the Python interpreter with \texttt{import pyptex}. The module exports a \texttt{pyptex.pyptex} object; doing \\ \texttt{pyp = pyptex.pyptex('a.tex')} causes the file \texttt{a.tex} to be read in, the Python fragments are executed, and the file \texttt{a.pyptex} is written out. The compiled text \texttt{a.pyptex} is available as the string \texttt{pyp.compiled}. The function \\\texttt{pyptex.pyptexmain()} implements the standard operations of the shell command \texttt{pyptex}. Further documentation is available by doing e.g. \texttt{help(pyptex)}. 136 | 137 | On Mac, \pyptex{} can be used with TeXShop by creating the file \\ \verb|~/Library/TeXShop/Engines/pyptex.engine| with the following contents: 138 | \begin{lstlisting} 139 | #!/bin/bash 140 | pyptex $1 141 | \end{lstlisting} 142 | Upon restarting TeXShop, this adds the \texttt{pyptex} build option to the list of build engines. 143 | %\lstinputlisting{~/Library/TeXShop/Engines/pyptex.engine} 144 | 145 | \section{Case study: the F19NB handout} 146 | 147 | At Heriot-Watt University, F19NB is a course on numerical linear algebra for undergraduate students. Topics include Gaussian elimination, classical iterations (Jacobi and SOR), GMRES and the QR iteration. All of the theory, proofs, examples and exercises are collected in a 71-page handout. Many of the numerical exercises and their solutions are automatically generated. For some problems (e.g. eigenvalue problems, where the characteristic polynomial must be obtained), it is helpful to do a little bit of symbolic calculations using SymPy. Other exercises and examples are more numerical and use numpy and matplotlib for displaying solutions. 148 | 149 | As of this writing, there are 2,592 lines and 105KB\footnote{As usual, some \LaTeX{} lines can be very long or very short, so the size of the file in KB may be more indicative of the size of the project} in the source file \texttt{notes.tex}, of which 1,256 lines are Python scripts. However, up to 2018, the \texttt{notes.tex} file did not contain any Python code. The Python codes were stored in separate \texttt{.py} files under a \texttt{problib} directory. These problem sets were spread over 24 Python source files and 1,268 lines of Python code. The \texttt{problib} codes were executed in sequence and all the outputs collected in an automatically generated 1,211 line (78KB) \texttt{problems.tex} file. The file \texttt{problems.tex} is a pure \LaTeX{} file that contains problem questions and solutions, encapsulated in \verb|\newcommand| statements, and is then included in the main \texttt{notes.tex} file via the \verb|\input{problems.tex}| mechanism. Various problems and their solutions can then be instantiated in the \texttt{notes.tex} file by invoking the corresponding \LaTeX{} commands. 150 | 151 | The sheer number of files and building steps caused an explosion in complexity more typical of software engineering than mathematical typesetting. Identifying which Python code generated any particular problem set was becoming increasingly challenging. On occasions, it was found that \texttt{problems.tex} was inconsistent with the \texttt{problib} Python scripts, either because the \texttt{problib} Python scripts had not been rerun, or sometimes because a Python script had accidentally been deleted. Fixing these issues was taking an increasing amount of the author's time. 152 | 153 | \subsection{PythonTex and \texttt{notes.tex}} 154 | 155 | In 2019, we adopted PythonTex and it was a major success. This allowed us to immediately reduce the number of external Python scripts and build steps, because many of the simpler tasks could be done directly from \texttt{notes.tex}. By the end of 2019, all Python scripts were moved to \texttt{notes.tex}, and \texttt{problib} was deleted. 156 | 157 | Despite these major improvement, new ``software engineering-style'' problems arose. In the abstract, allowing third-party \LaTeX{} packages to execute arbitrary Python programs seems like a security problem. As of this writing, there are 7,093 potentially malicious \LaTeX{} packages on CTAN. That being said, there are 211,855 packages currently on the Python archive PyPI, and malicious Python packages are regularly identified, e.g. \citet{cimpanu2018twelve}. Ultimately, it was the deadlock and debugging problems that motivated us to create \pyptex{}. 158 | 159 | We now list a few examples of deadlock situations, inspired from deadlocks that happened during the development of \texttt{notes.tex}. First, if some Python fragment in \texttt{notes.tex} generates the image \texttt{dog.png}, then the \LaTeX{} command \verb|\includegraphics{dog.png}| will fail on first execution because PythonTex has not yet executed the Python code that generates \texttt{dog.png}. Because the first execution of \verb|pdflatex| has failed, the Python code to generate \verb|dog.png|. Repeatedly invoking \verb|pdflatex| or \verb|pythontex|, or deleting cache and aux files, does not break the deadlock. One must modify \texttt{notes.tex} so that it does not cause an error when the file \texttt{dog.png} does not exist. Furthermore, if the filename for the \verb|includegraphics| is generated programmatically, e.g. \\ \verb|\includegraphics{\py{"dog.png"}}|, then when there is no cache, \texttt{pdflatex} is likely to fail because \verb|\py{"dog.png"}| will macro-expand to the empty string, causing an error. One can ``prime'' the cache by temporarily deleting the \verb|\includegraphics|. After reinserting the \verb|\includegraphics|, \texttt{notes.tex} will compile successfully, but deleting the cache or \texttt{dog.png} will cause a compilation failure/deadlock again. 160 | We also found that PythonTex often tried to substitute mismatched cache outputs to various input Python fragments. 161 | We are not entirely certain how PythonTex keeps track of matching inputs and outputs in the cache, but it seemed to us that adding or deleting a Python code fragment \verb|\py{...}| almost always resulted in deadlock and having to delete all cache files. Of course, deleting all cache files itself sometimes caused deadlock elsewhere. 162 | 163 | Practically speaking, we found that almost any significant and even minor edit to \texttt{notes.tex} resulted in deadlock and had to be accompanied with a manual deletion of all temporary files, and further tweaks to \texttt{notes.tex} that are required by the deletion of the cache files. 164 | 165 | We found it difficult to debug Python code fragments in \texttt{notes.tex} because PythonTex actually executes Python fragments stored in some temporary files that PythonTex aggressively deletes, not straight from \texttt{notes.tex}. Furthermore, the Python interpreter often declared incorrectly that syntax errors were located in PythonTex glue code that is inserted between extracted Python fragments. Although PythonTex attempts to reconcile line numbers between \texttt{notes.tex} and the various temporary files, we found that this simply led to more confusion and difficulties in debugging. 166 | 167 | The generated \LaTeX{} is merged into the \TeX{} token stream from various temporary files, so there is no collated file similar to the \texttt{notes.pyptex} of \pyptex{}. It seemed to us that \TeX{} was struggling to provide informative error messages, and it was difficult to view generated \LaTeX{} fragments in context. We resorted to manually copy-and-pasting generated \LaTeX{} fragments from various temporary files into \texttt{notes.tex}. 168 | 169 | Altogether, debugging was challenging. Issues had to be identified and isolated by searching through multiple temporary files, and then locating the corresponding source in \texttt{notes.tex}. 170 | 171 | \subsection{The invention of \pyptex{}.} 172 | 173 | If the motivation of upgrading from ad-hoc scripting to PythonTex was to mitigate the burgeoning software engineering challenges in our old build process, then the motivation for moving to \pyptex{} was to address the perceived drawbacks of PythonTex: deadlock issues, and ease of debugging. 174 | 175 | Our starting point was that \TeX{} was not a good tool for the end-to-end process of extracting Python fragments from a template file, replacing them with their outputs and compiling the whole thing down into a PDF. When \TeX{} is performing the extraction task, if it is also simultaneously performing insertions from cache files, deadlock seems almost inevitable. \TeX{} must clearly be used to generate the PDF so it seemed natural to cut \TeX{} out of the beginning of the process, when the Python fragments are extracted from the \texttt{notes.tex} file. From this insight, we concluded we had to abandon the Python Embedding approach, in favor of the Python template Preprocessor approach. 176 | 177 | We have successfully used \pyptex{} for the 2020 version of the F19NB handout 178 | and found that \pyptex{} has the following advantages over PythonTex: 179 | \begin{enumerate} 180 | \item {\bf No deadlock.} \pyptex{} is always able to either generate \texttt{notes.pyptex} from \texttt{notes.tex}, regardless of any cache contents, or report genuine Python errors in the source file \texttt{notes.tex}. 181 | \item {\bf Easy to debug Python.} When \pyptex{} does report genuine Python errors in \texttt{notes.tex}, the error messages are clear and Python debugging is done directly in the source \texttt{notes.tex} file using standard Python tools such as Pdb. There is no auxiliary file for collecting Python code fragments, and no confusion between various line numberings. There is no glue code inserted between Python fragments, and hence Python errors are always correctly reported inside user codes. 182 | \item {\bf Easy to debug generated \LaTeX{}.} Debugging the generated \LaTeX{} code \texttt{notes.pyptex} is much easier, as it consists entirely of high-level, human-readable \LaTeX{} code, as opposed to a partially digested \TeX{} token stream that is difficult to debug for all but the most advanced \TeX{}nicians. 183 | All \LaTeX{} error messages correspond exactly to the contents of \texttt{notes.pyptex} and its line numbers. 184 | \item {\bf Security.} Third-party \LaTeX{} packages cannot execute arbitrary Python code on the user's machine. 185 | \end{enumerate} 186 | One can regard the \texttt{notes.pyptex} as an intermediate file that the user will occasionally have to inspect in order to debug generated \LaTeX{} fragments, and the experience of PythonTex is that such temporary user-facing files must be carefully thought through lest they become a source of problems. By only generating one such user-facing intermediate file, our experience with \texttt{notes.pyptex} is that we have avoided most of the problems we had tracking information across multiple temporary files in PythonTex. In addition, unlike PythonTex, our intermediate file \texttt{notes.pyptex} is designed to be used first by humans, and secondarily by the \LaTeX{} compiler. This is because \texttt{notes.pyptex} consists of high-level, human-style \LaTeX{} source commands, and not some low-level macro-expansion of the high-level \LaTeX{} source. 187 | 188 | In addition to the user-facing file \texttt{notes.pyptex}, \pyptex{} also produces a plain-text log file \texttt{notes.pyplog}, and a cache file \texttt{notes.pickle} that is in binary format and opaque to users. In our experience, we never had to manually inspect the contents of \texttt{notes.pickle} and this file never caused deadlock or debugging problems in \pyptex{}. Furthermore, \texttt{notes.pickle} can also be deleted without breaking the build. In extremis, \texttt{notes.pickle} can theoretically be loaded and inspected using Python's pickle module, but we have never needed to do this. 189 | 190 | Theoretically, an outdated \texttt{notes.pickle} cache could cause the a miscompilation of \texttt{notes.pyptex} and produce an incorrect \texttt{notes.pdf}, or even a \LaTeX{} compilation error. The previously-described dependency-tracking features of \pyptex{} are almost always able to detect that the cache should be invalidated, but a sufficiently determined user could defeat these safety measures. For example, if a user is reading data from a file without declaring it as a dependency, or if a user is falsifying ``last modification'' timestamps on files, 191 | or generating random numbers, or some other form of nondeterminism is taking place, then cache invalidation may fail. A concerned user can completely disable the \texttt{notes.pickle} cache file by issuing the Python command \verb|pyp.disable_cache=True| in \texttt{notes.tex}, trading efficiency for safety. 192 | 193 | \section{Conclusions and outlook} 194 | 195 | We have introduced \pyptex{}, the Python template Preprocessor for \LaTeX{}. Unlike other projects that have embedded Python into \LaTeX{}, \pyptex{} uses regular expressions to extract Python fragments and substitute their outputs. Our preprocessor approach is superior to the embedded approach because it avoids the deadlock and debugging issues that plague embedded Python approaches, and it also eliminates the attack vector of arbitrary code execution by malicious third-party \LaTeX{} packages. \pyptex{} has been used to generate problem sets for the F19NB class at Heriot-Watt University. This large 71-page document features 1,256 lines of Python in a 105KB source file \texttt{notes.tex}. 196 | 197 | \pyptex{} centers around the Python programming language, but one could extend on our idea by adding more languages to the preprocessor, e.g. the string \verb|@julia{...}| could invoke a Julia interpreter. This poses a challenging issue of how to tightly integrate any such ``guest language'', and particularly its native debugging tools, so that the user experience is best possible. The tight integration of Python's Pdb debugger, and the deliberate cache invalidation algorithm, required careful use of Python's extensive introspection and detailed code generation facilities. This is an avenue for future research. 198 | 199 | \bibliographystyle{plainnat} 200 | 201 | \bibliography{test} 202 | \end{document} 203 | -------------------------------------------------------------------------------- /tests/test1/testgold2/test.pyptex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | 3 | 4 | 5 | 6 | 7 | 8 | % This doesn't actually work. 9 | \pdfinfoomitdate1 10 | \pdfsuppressptexinfo-1 11 | \pdftrailerid{} 12 | 13 | \usepackage[utf8]{inputenc} 14 | \usepackage[english]{babel} 15 | \usepackage{amsmath} 16 | \usepackage{verbatim} 17 | \usepackage{relsize} 18 | \usepackage{graphicx} 19 | 20 | \usepackage{hologo} 21 | 22 | 23 | 24 | \newcommand{\Longupdownarrow}{\Big\Updownarrow} 25 | \usepackage{listings} 26 | \lstset{ 27 | basicstyle=\footnotesize\ttfamily} 28 | 29 | \usepackage{natbib} 30 | 31 | \newcommand{\pyptex}{{P\kern-0.08em\raisebox{-0.5ex}{Y\kern-0.1emP}\kern-0.28em\TeX{}}} 32 | 33 | \title{\pyptex{}: The Python Preprocessor for \LaTeX{}.} 34 | \date{wololo} 35 | \author{Sébastien Loisel} 36 | 37 | 38 | \begin{document} 39 | \maketitle 40 | 41 | \begin{abstract} 42 | \pyptex{} is a Python template Preprocessor for \LaTeX{}. From a source template document \texttt{a.tex} containing specially marked Python code fragments, one or more pure \LaTeX{} files \texttt{a.pyptex} are generated and compiled into final \texttt{a.pdf} documents. Our new approach differs from previous approaches that embedded Python into the \LaTeX{} engine and allowed bidirectional communication between Python and \LaTeX{}. The embedded Python approach exposes users to security risks, but it also has a deadlock problem and is very hard to debug. By contrast, \pyptex{} avoids the security risks of embedded Python, completely eliminates the deadlock problem, and is easy to debug. We have used \pyptex{} to produce a 71-page handout for linear algebra students containing automatically generated problem sets. In combination with SymPy, SciPy and other Python packages, \pyptex{} can be used to create beautiful scientific documents. 43 | \end{abstract} 44 | 45 | \section{Introduction} 46 | 47 | \pyptex{} is a macro preprocessor (or template engine) for \LaTeX{} \citep{lamport1994latex} that allows one to use the rich Python ecosystem to automate parts of the redaction of sophisticated scientific documents. 48 | In combination with SciPy \citep{virtanen2020scipy} and SymPy \citep{meurer2017sympy}, \pyptex{} is a powerful tool for generating documents ranging from research papers and books to calculus problem sets for students. 49 | 50 | One installs \pyptex{} by typing \texttt{pip install pyptex} at the command prompt. This assumes that one already has working Python and \LaTeX{} installations; by default, \pyptex{} assumes that \texttt{pdflatex} is available at the command line, although alternate \LaTeX{} implementations can also be used. Then, using one's favorite text editor, one places the following text in a file \texttt{example.tex}: 51 | \begin{lstlisting} 52 | \documentclass{article} 53 | @{from sympy import *} 54 | \begin{document} 55 | $$\int x^3\,dx = @{S('integrate(x^3,x)')}+C$$ 56 | \end{document} 57 | \end{lstlisting} 58 | One compiles \texttt{example.tex} using the \pyptex{} command line \texttt{pyptex example.tex}. This generates the final output \texttt{example.pdf}, which contains this: 59 | $$\int x^3\,dx = \frac{x^{4}}{4}+C$$ 60 | As we can see, SymPy was used to compute an indefinite integral symbolically, and the result was typeset with standard \LaTeX{}. 61 | 62 | \pyptex{} also produces the intermediate file \texttt{example.pyptex}, which is a pure-\LaTeX{} file that is useful either for debugging purposes, or for sending a final manuscript to a journal. It contains the following: 63 | \begin{lstlisting} 64 | \documentclass{article} 65 | 66 | \begin{document} 67 | $$\int x^3\,dx = \frac{x^{4}}{4}+C$$ 68 | \end{document} 69 | \end{lstlisting} 70 | The SymPy Python code in the template file \texttt{example.tex} was executed and replaced with its own output in standard \LaTeX{} notation. 71 | The file \texttt{example.pdf} was automatically generated by \pyptex{} from the \texttt{example.pyptex} file (internally, \pyptex{} invoked the command \texttt{pdflatex example.pyptex}). 72 | 73 | \begin{figure} 74 | $$\texttt{a.tex} \implies \text{\pyptex{}} \implies \texttt{a.pyptex} \implies \texttt{pdflatex}\implies \texttt{a.pdf}$$ 75 | \caption{Dataflow of a the \pyptex{} Python template Preprocessor. \label{f:pre}} 76 | \end{figure} 77 | \begin{figure} 78 | $$\texttt{a.tex} \implies 79 | \left\{ 80 | \begin{array}{c} 81 | \texttt{pdflatex} \\ 82 | \Longupdownarrow \\ 83 | \text{Python} 84 | \end{array} 85 | \right\} 86 | \implies \texttt{a.pdf} 87 | $$ 88 | \caption{Dataflow when Python is embedded in the \texttt{pdflatex} engine. \label{f:embed}} 89 | \end{figure} 90 | 91 | In \pyptex{}, debugging and error handling are first-class citizens. If any errors occur during Python interpretation, \pyptex{} automatically launches the Python debugger Pdb in postmortem mode, just like standard \LaTeX{} enters \texttt{errorstopmode} on error. In Pdb, all standard Python debugging tools work as normal, and source code from the originating \texttt{.tex} file is visible in its context (e.g. with the \texttt{list} command). If an error occurs during \LaTeX{} compilation (e.g. during \texttt{pdflatex example.pyptex}), the usual error prompt is provided by \LaTeX{}. 92 | 93 | \pyptex{} is a {\em preprocessor} (or {\em template engine}) for \LaTeX{}, see Figure \ref{f:pre}. 94 | Python fragments are extracted from the (template) source \texttt{a.tex} file by a regular expression pattern matcher, executed by a Python interpreter, and substituted with the appropriate outputs. The collated file is written to disk as \texttt{a.pyptex}, which is a pure \LaTeX{} file. Because this procedure is done by regular expressions, the \LaTeX{} compiler is not invoked and not involved in parsing or processing \texttt{a.tex}. Once \texttt{a.pyptex} has been produced, then \texttt{a.pdf} is obtained, e.g. via \texttt{pdflatex a.pyptex}. 95 | 96 | As far as we know, \pyptex{} is the first time that Python is formally used as a preprocessor or templating engine for \LaTeX{}. However, Python has previously been {\em embedded} into \LaTeX{} engines, see Figure \ref{f:embed}; we mention \citet{fine2005tex}, \citet{ehmsen}, \citet{poore2015pythontex}. For SymPy there is \citet{molteno2014sympytex} and for Sage there is \citet{drake2009sagetex}. Other scripting languages have also been embedded into \LaTeX{}, for example Perl \citep{pakin2004perltex} and Lua \citep{hagen2005luatex}. When Python is embedded into \LaTeX{}, all the Python fragments are extracted from a source \texttt{a.tex} file by the \TeX{} macro engine itself. After executing these Python fragments, the relevant outputs are substituted into the macro stream being ``digested'' by the \TeX{} engine \citep{knuth1984texbook}. Although templating engines for \LaTeX{} seem rare, they are commonly used for other document formats, see e.g. \citet{hepple1998}, or \citet{alchin2013pro}. Usually, templating engines focus on generating multiple documents from a single template, whereas \pyptex{} focuses on generating a single document from a template. For that reason, we prefer the terminology ``preprocessor''. That being said, \pyptex{} can indeed be used in a more traditional templating mode where multiple documents are generated from a single template \texttt{a.tex}. 97 | 98 | With the help of \pyptex{}, we have produced a 71-page handout \citep{Loisel2020} for the course F19NB at Heriot-Watt University on numerical linear algebra. Previous to \pyptex{}, in 2018, we had used an ad-hoc and highly complex build system for \LaTeX{} code generation. While we distinguish this ad-hoc approach from our new template preprocessor approach, we mention that the ad-hoc generation of \LaTeX{} from pure Python programs is widespread. In 2019, we switched from an ad-hoc approach to PythonTex and it was a significant improvement, but we found three important drawbacks to the embedded Python approach, which motivated us to create \pyptex{}. 99 | 100 | First, there is the (somewhat theoretical) security risk of allowing third-party \LaTeX{} packages to execute arbitrary Python codes unbeknownst to the user. Second, there is the issue of ``deadlock'', which is when the source and cache files enter a state where compiling \texttt{a.tex} into \texttt{a.pdf} becomes impossible because of \TeX{} errors. This occurs when the \TeX{} macro processor is simultaneously extracting new Python fragments from \texttt{a.tex} while inserting outdated cached substitutions from a previous version of \texttt{a.tex}. To break deadlock, it is usually necessary to manually delete all cache files, but this is not always sufficient, and sometimes modifying \texttt{a.tex} is necessary. Finally, the third problem is the difficulty of debugging Python fragments and generated \LaTeX{} fragments. 101 | 102 | We have found that our new Python template preprocessor \pyptex{} avoids all three problems. First, it does not allow third-party \LaTeX{} packages to execute arbitrary Python codes because there is no bidirectional communication between \LaTeX{} and Python. Second, deadlock is impossible by design because the \LaTeX{} engine is not involved at all in the extraction, execution and substitution of Python fragments from \texttt{a.tex}. Finally, debugging with Pdb is treated as a first-class citizen and works as normal, and the collated \LaTeX{} document is made available in the human-preferred format of a pure-\LaTeX{} source file \texttt{a.pyptex}, as opposed to a ``partially digested'' \TeX{} macro stream. We have confirmed these significant improvements using our F19NB handout, which consists of 2,592 lines and 105KB of \LaTeX{} source in file \texttt{notes.tex}, including 1,256 lines of Python code. 103 | 104 | Our paper is organized as follows. In Section 2, we give a brief tour of \pyptex{} features. In Section 3, we discuss the F19NB case study, a 71 page PDF document containing automatically generated problem sets for linear algebra students. We end with some conclusions in Section 4. 105 | 106 | \section{A brief tour of \pyptex{} features} 107 | 108 | The \texttt{pyptex a.tex} command executes Python fragments in \texttt{a.tex} delimited by either \verb|@{...}| or \verb|@{{{...}}}|; the latter notation is necessary if the Python fragment contains curly braces. If a literal \verb|@{| is needed, it can be escaped by using a double \verb|@@{|; apart from this, it is not necessary to escape \verb|@| symbols. \pyptex{} also honors \LaTeX{}-style comments so that \verb|%@{error}| does not execute the Python expression \verb|error|. 109 | 110 | \pyptex{} allows one to execute arbitrary Python programs and import arbitrary Python packages, e.g. \verb|@{from sympy import *}|. One can use SymPy to create simple plots, save them to a file and include them via the standard \LaTeX{} graphicx package. For example: 111 | \lstinputlisting{plot1.tex} 112 | As with all modern template preprocessors, \pyptex{} enables modular development by assembling a project from multiple component files. For example, the preceding plotting snippet was saved in the \texttt{plot1.tex}, which will later be included in our master \texttt{a.tex} via the command 113 | \verb|@{pyp.input('plot1.tex')}|. The files \texttt{a.tex} and \texttt{plot1.tex} are analogous to Python modules in that they execute their own separate namespaces. As such, it is necessary to re-import SymPy into the namespace of \texttt{plot1.tex}, even though it already has been imported into the namespace of the master file \texttt{a.tex}. 114 | 115 | The pattern of plotting a figure, saving it to a file, and then including its filename into the generated \texttt{a.pyptex} file is very common, so \pyptex{} includes a convenience function \texttt{pyp.savefig()} that automatically generates distinct numbered filenames under the \texttt{a-generated/*} directory; in the present case, we create the file \texttt{a-generated/fig1.eps} with the following fragment: 116 | \lstinputlisting{plot2.tex} 117 | These plotting commands have also been stored in a separate file \texttt{plot2.tex}. 118 | Note that the Python fragment in \texttt{plot2.tex} consists of two Python statements. The Python programming language distinguishes {\em expressions} that return or produce a value, and typically consist of a single line; and {\em statements} that may run multiple lines and produce no value. The multiple lines in the \texttt{plot2.tex} Python code fragments means that they are statements that produce no value. In order to print the filename \texttt{a-generated/fig1.eps} into the curly braces of the \verb|\includegraphics{...}|, it is therefore required to use the \verb|pyp.print(...)| command. 119 | 120 | Embedding the files \texttt{plot1.tex} and \texttt{plot2.tex} via the \texttt{pyp.input(...)} mechanism described above, results in the following output:\\ 121 | \noindent \input{plot1.pyptex} 122 | \input{plot2.pyptex} 123 | 124 | Standard out and standard error print out to the console as usual, and are additionally logged in the \texttt{a.pyplog} file. If one wishes to print directly to the \texttt{a.pyptex} file, one should use the \texttt{pyp.print()} function. 125 | 126 | For performance reasons, \pyptex{} caches the outputs of Python fragments in a binary file \texttt{a.pickle}. If one edits purely textual portions of \texttt{a.tex} while leaving the Python fragments untouched, the cached outputs are used and the production of \texttt{a.pyptex} is nearly instantaneous. If the Python fragments in \texttt{a.tex} are not identical to the cached fragments, or if the command-line arguments to \texttt{pyptex} have changed, or if the \texttt{pyptex} package version has changed, the cache is automatically invalidated. Furthermore, the command \verb|pyp.dep(filename)| can be used to add further dependencies that will invalidate the cache when the file \verb|filename| changes. Any auxiliary \texttt{.tex} files (and their dependencies) incorporated via \verb|pyp.input(...)| are automatically added to the list of dependencies via \verb|pyp.dep()|, but if external data files are \texttt{open()}ed and used to generate, e.g. \LaTeX{} tables, these files should manually be added to the dependencies via \verb|pyp.dep()|. The helper function \verb|pyp.open(...)| automatically calls \verb|pyp.dep(...)| before calling the builtin \verb|open(...)| function. 127 | 128 | One can control how the \texttt{a.pyptex} file is compiled into \texttt{a.pdf} by specifying which \LaTeX{} engine to use. To completely disable all \LaTeX{} processing, do \verb|@{pyp.latexcommand=False}| anywhere in \texttt{a.tex}. The default \texttt{latexcommand} is 129 | \begin{lstlisting} 130 | {latex} {pyptexfilename} && 131 | (test ! -f {bibfilename} || bibtex {auxfilename}) 132 | \end{lstlisting} 133 | The usual Python substitutions are made from the \texttt{pyp} object, e.g. \verb|pyp.latex| defaults to \verb|"pdflatex -file-line-error --synctex=1"| but this can be overridden from anywhere in \texttt{a.tex}. 134 | 135 | \pyptex{} can be used programmatically in the Python interpreter with \texttt{import pyptex}. The module exports a \texttt{pyptex.pyptex} object; doing \\ \texttt{pyp = pyptex.pyptex('a.tex')} causes the file \texttt{a.tex} to be read in, the Python fragments are executed, and the file \texttt{a.pyptex} is written out. The compiled text \texttt{a.pyptex} is available as the string \texttt{pyp.compiled}. The function \\\texttt{pyptex.pyptexmain()} implements the standard operations of the shell command \texttt{pyptex}. Further documentation is available by doing e.g. \texttt{help(pyptex)}. 136 | 137 | On Mac, \pyptex{} can be used with TeXShop by creating the file \\ \verb|~/Library/TeXShop/Engines/pyptex.engine| with the following contents: 138 | \begin{lstlisting} 139 | #!/bin/bash 140 | pyptex $1 141 | \end{lstlisting} 142 | Upon restarting TeXShop, this adds the \texttt{pyptex} build option to the list of build engines. 143 | %\lstinputlisting{~/Library/TeXShop/Engines/pyptex.engine} 144 | 145 | \section{Case study: the F19NB handout} 146 | 147 | At Heriot-Watt University, F19NB is a course on numerical linear algebra for undergraduate students. Topics include Gaussian elimination, classical iterations (Jacobi and SOR), GMRES and the QR iteration. All of the theory, proofs, examples and exercises are collected in a 71-page handout. Many of the numerical exercises and their solutions are automatically generated. For some problems (e.g. eigenvalue problems, where the characteristic polynomial must be obtained), it is helpful to do a little bit of symbolic calculations using SymPy. Other exercises and examples are more numerical and use numpy and matplotlib for displaying solutions. 148 | 149 | As of this writing, there are 2,592 lines and 105KB\footnote{As usual, some \LaTeX{} lines can be very long or very short, so the size of the file in KB may be more indicative of the size of the project} in the source file \texttt{notes.tex}, of which 1,256 lines are Python scripts. However, up to 2018, the \texttt{notes.tex} file did not contain any Python code. The Python codes were stored in separate \texttt{.py} files under a \texttt{problib} directory. These problem sets were spread over 24 Python source files and 1,268 lines of Python code. The \texttt{problib} codes were executed in sequence and all the outputs collected in an automatically generated 1,211 line (78KB) \texttt{problems.tex} file. The file \texttt{problems.tex} is a pure \LaTeX{} file that contains problem questions and solutions, encapsulated in \verb|\newcommand| statements, and is then included in the main \texttt{notes.tex} file via the \verb|\input{problems.tex}| mechanism. Various problems and their solutions can then be instantiated in the \texttt{notes.tex} file by invoking the corresponding \LaTeX{} commands. 150 | 151 | The sheer number of files and building steps caused an explosion in complexity more typical of software engineering than mathematical typesetting. Identifying which Python code generated any particular problem set was becoming increasingly challenging. On occasions, it was found that \texttt{problems.tex} was inconsistent with the \texttt{problib} Python scripts, either because the \texttt{problib} Python scripts had not been rerun, or sometimes because a Python script had accidentally been deleted. Fixing these issues was taking an increasing amount of the author's time. 152 | 153 | \subsection{PythonTex and \texttt{notes.tex}} 154 | 155 | In 2019, we adopted PythonTex and it was a major success. This allowed us to immediately reduce the number of external Python scripts and build steps, because many of the simpler tasks could be done directly from \texttt{notes.tex}. By the end of 2019, all Python scripts were moved to \texttt{notes.tex}, and \texttt{problib} was deleted. 156 | 157 | Despite these major improvement, new ``software engineering-style'' problems arose. In the abstract, allowing third-party \LaTeX{} packages to execute arbitrary Python programs seems like a security problem. As of this writing, there are 7,093 potentially malicious \LaTeX{} packages on CTAN. That being said, there are 211,855 packages currently on the Python archive PyPI, and malicious Python packages are regularly identified, e.g. \citet{cimpanu2018twelve}. Ultimately, it was the deadlock and debugging problems that motivated us to create \pyptex{}. 158 | 159 | We now list a few examples of deadlock situations, inspired from deadlocks that happened during the development of \texttt{notes.tex}. First, if some Python fragment in \texttt{notes.tex} generates the image \texttt{dog.png}, then the \LaTeX{} command \verb|\includegraphics{dog.png}| will fail on first execution because PythonTex has not yet executed the Python code that generates \texttt{dog.png}. Because the first execution of \verb|pdflatex| has failed, the Python code to generate \verb|dog.png|. Repeatedly invoking \verb|pdflatex| or \verb|pythontex|, or deleting cache and aux files, does not break the deadlock. One must modify \texttt{notes.tex} so that it does not cause an error when the file \texttt{dog.png} does not exist. Furthermore, if the filename for the \verb|includegraphics| is generated programmatically, e.g. \\ \verb|\includegraphics{\py{"dog.png"}}|, then when there is no cache, \texttt{pdflatex} is likely to fail because \verb|\py{"dog.png"}| will macro-expand to the empty string, causing an error. One can ``prime'' the cache by temporarily deleting the \verb|\includegraphics|. After reinserting the \verb|\includegraphics|, \texttt{notes.tex} will compile successfully, but deleting the cache or \texttt{dog.png} will cause a compilation failure/deadlock again. 160 | We also found that PythonTex often tried to substitute mismatched cache outputs to various input Python fragments. 161 | We are not entirely certain how PythonTex keeps track of matching inputs and outputs in the cache, but it seemed to us that adding or deleting a Python code fragment \verb|\py{...}| almost always resulted in deadlock and having to delete all cache files. Of course, deleting all cache files itself sometimes caused deadlock elsewhere. 162 | 163 | Practically speaking, we found that almost any significant and even minor edit to \texttt{notes.tex} resulted in deadlock and had to be accompanied with a manual deletion of all temporary files, and further tweaks to \texttt{notes.tex} that are required by the deletion of the cache files. 164 | 165 | We found it difficult to debug Python code fragments in \texttt{notes.tex} because PythonTex actually executes Python fragments stored in some temporary files that PythonTex aggressively deletes, not straight from \texttt{notes.tex}. Furthermore, the Python interpreter often declared incorrectly that syntax errors were located in PythonTex glue code that is inserted between extracted Python fragments. Although PythonTex attempts to reconcile line numbers between \texttt{notes.tex} and the various temporary files, we found that this simply led to more confusion and difficulties in debugging. 166 | 167 | The generated \LaTeX{} is merged into the \TeX{} token stream from various temporary files, so there is no collated file similar to the \texttt{notes.pyptex} of \pyptex{}. It seemed to us that \TeX{} was struggling to provide informative error messages, and it was difficult to view generated \LaTeX{} fragments in context. We resorted to manually copy-and-pasting generated \LaTeX{} fragments from various temporary files into \texttt{notes.tex}. 168 | 169 | Altogether, debugging was challenging. Issues had to be identified and isolated by searching through multiple temporary files, and then locating the corresponding source in \texttt{notes.tex}. 170 | 171 | \subsection{The invention of \pyptex{}.} 172 | 173 | If the motivation of upgrading from ad-hoc scripting to PythonTex was to mitigate the burgeoning software engineering challenges in our old build process, then the motivation for moving to \pyptex{} was to address the perceived drawbacks of PythonTex: deadlock issues, and ease of debugging. 174 | 175 | Our starting point was that \TeX{} was not a good tool for the end-to-end process of extracting Python fragments from a template file, replacing them with their outputs and compiling the whole thing down into a PDF. When \TeX{} is performing the extraction task, if it is also simultaneously performing insertions from cache files, deadlock seems almost inevitable. \TeX{} must clearly be used to generate the PDF so it seemed natural to cut \TeX{} out of the beginning of the process, when the Python fragments are extracted from the \texttt{notes.tex} file. From this insight, we concluded we had to abandon the Python Embedding approach, in favor of the Python template Preprocessor approach. 176 | 177 | We have successfully used \pyptex{} for the 2020 version of the F19NB handout 178 | and found that \pyptex{} has the following advantages over PythonTex: 179 | \begin{enumerate} 180 | \item {\bf No deadlock.} \pyptex{} is always able to either generate \texttt{notes.pyptex} from \texttt{notes.tex}, regardless of any cache contents, or report genuine Python errors in the source file \texttt{notes.tex}. 181 | \item {\bf Easy to debug Python.} When \pyptex{} does report genuine Python errors in \texttt{notes.tex}, the error messages are clear and Python debugging is done directly in the source \texttt{notes.tex} file using standard Python tools such as Pdb. There is no auxiliary file for collecting Python code fragments, and no confusion between various line numberings. There is no glue code inserted between Python fragments, and hence Python errors are always correctly reported inside user codes. 182 | \item {\bf Easy to debug generated \LaTeX{}.} Debugging the generated \LaTeX{} code \texttt{notes.pyptex} is much easier, as it consists entirely of high-level, human-readable \LaTeX{} code, as opposed to a partially digested \TeX{} token stream that is difficult to debug for all but the most advanced \TeX{}nicians. 183 | All \LaTeX{} error messages correspond exactly to the contents of \texttt{notes.pyptex} and its line numbers. 184 | \item {\bf Security.} Third-party \LaTeX{} packages cannot execute arbitrary Python code on the user's machine. 185 | \end{enumerate} 186 | One can regard the \texttt{notes.pyptex} as an intermediate file that the user will occasionally have to inspect in order to debug generated \LaTeX{} fragments, and the experience of PythonTex is that such temporary user-facing files must be carefully thought through lest they become a source of problems. By only generating one such user-facing intermediate file, our experience with \texttt{notes.pyptex} is that we have avoided most of the problems we had tracking information across multiple temporary files in PythonTex. In addition, unlike PythonTex, our intermediate file \texttt{notes.pyptex} is designed to be used first by humans, and secondarily by the \LaTeX{} compiler. This is because \texttt{notes.pyptex} consists of high-level, human-style \LaTeX{} source commands, and not some low-level macro-expansion of the high-level \LaTeX{} source. 187 | 188 | In addition to the user-facing file \texttt{notes.pyptex}, \pyptex{} also produces a plain-text log file \texttt{notes.pyplog}, and a cache file \texttt{notes.pickle} that is in binary format and opaque to users. In our experience, we never had to manually inspect the contents of \texttt{notes.pickle} and this file never caused deadlock or debugging problems in \pyptex{}. Furthermore, \texttt{notes.pickle} can also be deleted without breaking the build. In extremis, \texttt{notes.pickle} can theoretically be loaded and inspected using Python's pickle module, but we have never needed to do this. 189 | 190 | Theoretically, an outdated \texttt{notes.pickle} cache could cause the a miscompilation of \texttt{notes.pyptex} and produce an incorrect \texttt{notes.pdf}, or even a \LaTeX{} compilation error. The previously-described dependency-tracking features of \pyptex{} are almost always able to detect that the cache should be invalidated, but a sufficiently determined user could defeat these safety measures. For example, if a user is reading data from a file without declaring it as a dependency, or if a user is falsifying ``last modification'' timestamps on files, 191 | or generating random numbers, or some other form of nondeterminism is taking place, then cache invalidation may fail. A concerned user can completely disable the \texttt{notes.pickle} cache file by issuing the Python command \verb|pyp.disable_cache=True| in \texttt{notes.tex}, trading efficiency for safety. 192 | 193 | \section{Conclusions and outlook} 194 | 195 | We have introduced \pyptex{}, the Python template Preprocessor for \LaTeX{}. Unlike other projects that have embedded Python into \LaTeX{}, \pyptex{} uses regular expressions to extract Python fragments and substitute their outputs. Our preprocessor approach is superior to the embedded approach because it avoids the deadlock and debugging issues that plague embedded Python approaches, and it also eliminates the attack vector of arbitrary code execution by malicious third-party \LaTeX{} packages. \pyptex{} has been used to generate problem sets for the F19NB class at Heriot-Watt University. This large 71-page document features 1,256 lines of Python in a 105KB source file \texttt{notes.tex}. 196 | 197 | \pyptex{} centers around the Python programming language, but one could extend on our idea by adding more languages to the preprocessor, e.g. the string \verb|@julia{...}| could invoke a Julia interpreter. This poses a challenging issue of how to tightly integrate any such ``guest language'', and particularly its native debugging tools, so that the user experience is best possible. The tight integration of Python's Pdb debugger, and the deliberate cache invalidation algorithm, required careful use of Python's extensive introspection and detailed code generation facilities. This is an avenue for future research. 198 | 199 | \bibliographystyle{plainnat} 200 | 201 | \bibliography{test} 202 | \end{document} 203 | -------------------------------------------------------------------------------- /tests/test1/test1.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | 3 | 4 | @{{{ 5 | aaa = 1 6 | pyp.freeze() 7 | bbb = 2 8 | print(aaa,bbb) 9 | pyp.clear() 10 | print(aaa) 11 | try: 12 | print(bbb) 13 | except: 14 | print("As expected, bbb is now undefined.") 15 | }}} 16 | @{{{ 17 | try: 18 | print(bbb) 19 | except: 20 | print("As expected again, bbb is still undefined.") 21 | }}} 22 | 23 | 24 | % This doesn't actually work. 25 | \pdfinfoomitdate1 26 | \pdfsuppressptexinfo-1 27 | \pdftrailerid{} 28 | 29 | \usepackage[utf8]{inputenc} 30 | \usepackage[english]{babel} 31 | \usepackage{amsmath} 32 | \usepackage{verbatim} 33 | \usepackage{relsize} 34 | \usepackage{graphicx} 35 | 36 | \usepackage{hologo} 37 | 38 | @{from sympy import *} 39 | 40 | \newcommand{\Longupdownarrow}{\Big\Updownarrow} 41 | \usepackage{listings} 42 | \lstset{ 43 | basicstyle=\footnotesize\ttfamily} 44 | 45 | \usepackage{natbib} 46 | 47 | \newcommand{\pyptex}{{P\kern-0.08em\raisebox{-0.5ex}{Y\kern-0.1emP}\kern-0.28em\TeX{}}} 48 | 49 | \title{\pyptex{}: The Python Preprocessor for \LaTeX{}.} 50 | \date{wololo} 51 | \author{Sébastien Loisel} 52 | 53 | 54 | \begin{document} 55 | \maketitle 56 | 57 | \begin{abstract} 58 | \pyptex{} is a Python template Preprocessor for \LaTeX{}. From a source template document \texttt{a.tex} containing specially marked Python code fragments, one or more pure \LaTeX{} files \texttt{a.pyptex} are generated and compiled into final \texttt{a.pdf} documents. Our new approach differs from previous approaches that embedded Python into the \LaTeX{} engine and allowed bidirectional communication between Python and \LaTeX{}. The embedded Python approach exposes users to security risks, but it also has a deadlock problem and is very hard to debug. By contrast, \pyptex{} avoids the security risks of embedded Python, completely eliminates the deadlock problem, and is easy to debug. We have used \pyptex{} to produce a 71-page handout for linear algebra students containing automatically generated problem sets. In combination with SymPy, SciPy and other Python packages, \pyptex{} can be used to create beautiful scientific documents. 59 | \end{abstract} 60 | 61 | \section{Introduction} 62 | 63 | \pyptex{} is a macro preprocessor (or template engine) for \LaTeX{} \citep{lamport1994latex} that allows one to use the rich Python ecosystem to automate parts of the redaction of sophisticated scientific documents. 64 | In combination with SciPy \citep{virtanen2020scipy} and SymPy \citep{meurer2017sympy}, \pyptex{} is a powerful tool for generating documents ranging from research papers and books to calculus problem sets for students. 65 | 66 | One installs \pyptex{} by typing \texttt{pip install pyptex} at the command prompt. This assumes that one already has working Python and \LaTeX{} installations; by default, \pyptex{} assumes that \texttt{pdflatex} is available at the command line, although alternate \LaTeX{} implementations can also be used. Then, using one's favorite text editor, one places the following text in a file \texttt{example.tex}: 67 | \begin{lstlisting} 68 | \documentclass{article} 69 | @@{from sympy import *} 70 | \begin{document} 71 | $$\int x^3\,dx = @@{S('integrate(x^3,x)')}+C$$ 72 | \end{document} 73 | \end{lstlisting} 74 | One compiles \texttt{example.tex} using the \pyptex{} command line \texttt{pyptex example.tex}. This generates the final output \texttt{example.pdf}, which contains this: 75 | $$\int x^3\,dx = @{S('integrate(x^3,x)')}+C$$ 76 | As we can see, SymPy was used to compute an indefinite integral symbolically, and the result was typeset with standard \LaTeX{}. 77 | 78 | \pyptex{} also produces the intermediate file \texttt{example.pyptex}, which is a pure-\LaTeX{} file that is useful either for debugging purposes, or for sending a final manuscript to a journal. It contains the following: 79 | \begin{lstlisting} 80 | \documentclass{article} 81 | @{from sympy import *} 82 | \begin{document} 83 | $$\int x^3\,dx = @{S('integrate(x^3,x)')}+C$$ 84 | \end{document} 85 | \end{lstlisting} 86 | The SymPy Python code in the template file \texttt{example.tex} was executed and replaced with its own output in standard \LaTeX{} notation. 87 | The file \texttt{example.pdf} was automatically generated by \pyptex{} from the \texttt{example.pyptex} file (internally, \pyptex{} invoked the command \texttt{pdflatex example.pyptex}). 88 | 89 | \begin{figure} 90 | $$\texttt{a.tex} \implies \text{\pyptex{}} \implies \texttt{a.pyptex} \implies \texttt{pdflatex}\implies \texttt{a.pdf}$$ 91 | \caption{Dataflow of a the \pyptex{} Python template Preprocessor. \label{f:pre}} 92 | \end{figure} 93 | \begin{figure} 94 | $$\texttt{a.tex} \implies 95 | \left\{ 96 | \begin{array}{c} 97 | \texttt{pdflatex} \\ 98 | \Longupdownarrow \\ 99 | \text{Python} 100 | \end{array} 101 | \right\} 102 | \implies \texttt{a.pdf} 103 | $$ 104 | \caption{Dataflow when Python is embedded in the \texttt{pdflatex} engine. \label{f:embed}} 105 | \end{figure} 106 | 107 | In \pyptex{}, debugging and error handling are first-class citizens. If any errors occur during Python interpretation, \pyptex{} automatically launches the Python debugger Pdb in postmortem mode, just like standard \LaTeX{} enters \texttt{errorstopmode} on error. In Pdb, all standard Python debugging tools work as normal, and source code from the originating \texttt{.tex} file is visible in its context (e.g. with the \texttt{list} command). If an error occurs during \LaTeX{} compilation (e.g. during \texttt{pdflatex example.pyptex}), the usual error prompt is provided by \LaTeX{}. 108 | 109 | \pyptex{} is a {\em preprocessor} (or {\em template engine}) for \LaTeX{}, see Figure \ref{f:pre}. 110 | Python fragments are extracted from the (template) source \texttt{a.tex} file by a regular expression pattern matcher, executed by a Python interpreter, and substituted with the appropriate outputs. The collated file is written to disk as \texttt{a.pyptex}, which is a pure \LaTeX{} file. Because this procedure is done by regular expressions, the \LaTeX{} compiler is not invoked and not involved in parsing or processing \texttt{a.tex}. Once \texttt{a.pyptex} has been produced, then \texttt{a.pdf} is obtained, e.g. via \texttt{pdflatex a.pyptex}. 111 | 112 | As far as we know, \pyptex{} is the first time that Python is formally used as a preprocessor or templating engine for \LaTeX{}. However, Python has previously been {\em embedded} into \LaTeX{} engines, see Figure \ref{f:embed}; we mention \citet{fine2005tex}, \citet{ehmsen}, \citet{poore2015pythontex}. For SymPy there is \citet{molteno2014sympytex} and for Sage there is \citet{drake2009sagetex}. Other scripting languages have also been embedded into \LaTeX{}, for example Perl \citep{pakin2004perltex} and Lua \citep{hagen2005luatex}. When Python is embedded into \LaTeX{}, all the Python fragments are extracted from a source \texttt{a.tex} file by the \TeX{} macro engine itself. After executing these Python fragments, the relevant outputs are substituted into the macro stream being ``digested'' by the \TeX{} engine \citep{knuth1984texbook}. Although templating engines for \LaTeX{} seem rare, they are commonly used for other document formats, see e.g. \citet{hepple1998}, or \citet{alchin2013pro}. Usually, templating engines focus on generating multiple documents from a single template, whereas \pyptex{} focuses on generating a single document from a template. For that reason, we prefer the terminology ``preprocessor''. That being said, \pyptex{} can indeed be used in a more traditional templating mode where multiple documents are generated from a single template \texttt{a.tex}. 113 | 114 | With the help of \pyptex{}, we have produced a 71-page handout \citep{Loisel2020} for the course F19NB at Heriot-Watt University on numerical linear algebra. Previous to \pyptex{}, in 2018, we had used an ad-hoc and highly complex build system for \LaTeX{} code generation. While we distinguish this ad-hoc approach from our new template preprocessor approach, we mention that the ad-hoc generation of \LaTeX{} from pure Python programs is widespread. In 2019, we switched from an ad-hoc approach to PythonTex and it was a significant improvement, but we found three important drawbacks to the embedded Python approach, which motivated us to create \pyptex{}. 115 | 116 | First, there is the (somewhat theoretical) security risk of allowing third-party \LaTeX{} packages to execute arbitrary Python codes unbeknownst to the user. Second, there is the issue of ``deadlock'', which is when the source and cache files enter a state where compiling \texttt{a.tex} into \texttt{a.pdf} becomes impossible because of \TeX{} errors. This occurs when the \TeX{} macro processor is simultaneously extracting new Python fragments from \texttt{a.tex} while inserting outdated cached substitutions from a previous version of \texttt{a.tex}. To break deadlock, it is usually necessary to manually delete all cache files, but this is not always sufficient, and sometimes modifying \texttt{a.tex} is necessary. Finally, the third problem is the difficulty of debugging Python fragments and generated \LaTeX{} fragments. 117 | 118 | We have found that our new Python template preprocessor \pyptex{} avoids all three problems. First, it does not allow third-party \LaTeX{} packages to execute arbitrary Python codes because there is no bidirectional communication between \LaTeX{} and Python. Second, deadlock is impossible by design because the \LaTeX{} engine is not involved at all in the extraction, execution and substitution of Python fragments from \texttt{a.tex}. Finally, debugging with Pdb is treated as a first-class citizen and works as normal, and the collated \LaTeX{} document is made available in the human-preferred format of a pure-\LaTeX{} source file \texttt{a.pyptex}, as opposed to a ``partially digested'' \TeX{} macro stream. We have confirmed these significant improvements using our F19NB handout, which consists of 2,592 lines and 105KB of \LaTeX{} source in file \texttt{notes.tex}, including 1,256 lines of Python code. 119 | 120 | Our paper is organized as follows. In Section 2, we give a brief tour of \pyptex{} features. In Section 3, we discuss the F19NB case study, a 71 page PDF document containing automatically generated problem sets for linear algebra students. We end with some conclusions in Section 4. 121 | 122 | \section{A brief tour of \pyptex{} features} 123 | 124 | The \texttt{pyptex a.tex} command executes Python fragments in \texttt{a.tex} delimited by either \verb|@@{...}| or \verb|@@{{{...}}}|; the latter notation is necessary if the Python fragment contains curly braces. If a literal \verb|@@{| is needed, it can be escaped by using a double \verb|@@@{|; apart from this, it is not necessary to escape \verb|@| symbols. \pyptex{} also honors \LaTeX{}-style comments so that \verb|%@{error}| does not execute the Python expression \verb|error|. 125 | 126 | \pyptex{} allows one to execute arbitrary Python programs and import arbitrary Python packages, e.g. \verb|@@{from sympy import *}|. One can use SymPy to create simple plots, save them to a file and include them via the standard \LaTeX{} graphicx package. For example: 127 | \lstinputlisting{plot1.tex} 128 | As with all modern template preprocessors, \pyptex{} enables modular development by assembling a project from multiple component files. For example, the preceding plotting snippet was saved in the \texttt{plot1.tex}, which will later be included in our master \texttt{a.tex} via the command 129 | \verb|@@{pyp.input('plot1.tex')}|. The files \texttt{a.tex} and \texttt{plot1.tex} are analogous to Python modules in that they execute their own separate namespaces. As such, it is necessary to re-import SymPy into the namespace of \texttt{plot1.tex}, even though it already has been imported into the namespace of the master file \texttt{a.tex}. 130 | 131 | The pattern of plotting a figure, saving it to a file, and then including its filename into the generated \texttt{a.pyptex} file is very common, so \pyptex{} includes a convenience function \texttt{pyp.savefig()} that automatically generates distinct numbered filenames under the \texttt{a-generated/*} directory; in the present case, we create the file \texttt{a-generated/fig1.eps} with the following fragment: 132 | \lstinputlisting{plot2.tex} 133 | These plotting commands have also been stored in a separate file \texttt{plot2.tex}. 134 | Note that the Python fragment in \texttt{plot2.tex} consists of two Python statements. The Python programming language distinguishes {\em expressions} that return or produce a value, and typically consist of a single line; and {\em statements} that may run multiple lines and produce no value. The multiple lines in the \texttt{plot2.tex} Python code fragments means that they are statements that produce no value. In order to print the filename \texttt{a-generated/fig1.eps} into the curly braces of the \verb|\includegraphics{...}|, it is therefore required to use the \verb|pyp.print(...)| command. 135 | 136 | Embedding the files \texttt{plot1.tex} and \texttt{plot2.tex} via the \texttt{pyp.input(...)} mechanism described above, results in the following output:\\ 137 | \noindent @{pyp.input('plot1.tex')} 138 | @{pyp.input('plot2.tex')} 139 | 140 | Standard out and standard error print out to the console as usual, and are additionally logged in the \texttt{a.pyplog} file. If one wishes to print directly to the \texttt{a.pyptex} file, one should use the \texttt{pyp.print()} function. 141 | 142 | For performance reasons, \pyptex{} caches the outputs of Python fragments in a binary file \texttt{a.pickle}. If one edits purely textual portions of \texttt{a.tex} while leaving the Python fragments untouched, the cached outputs are used and the production of \texttt{a.pyptex} is nearly instantaneous. If the Python fragments in \texttt{a.tex} are not identical to the cached fragments, or if the command-line arguments to \texttt{pyptex} have changed, or if the \texttt{pyptex} package version has changed, the cache is automatically invalidated. Furthermore, the command \verb|pyp.dep(filename)| can be used to add further dependencies that will invalidate the cache when the file \verb|filename| changes. Any auxiliary \texttt{.tex} files (and their dependencies) incorporated via \verb|pyp.input(...)| are automatically added to the list of dependencies via \verb|pyp.dep()|, but if external data files are \texttt{open()}ed and used to generate, e.g. \LaTeX{} tables, these files should manually be added to the dependencies via \verb|pyp.dep()|. The helper function \verb|pyp.open(...)| automatically calls \verb|pyp.dep(...)| before calling the builtin \verb|open(...)| function. 143 | 144 | One can control how the \texttt{a.pyptex} file is compiled into \texttt{a.pdf} by specifying which \LaTeX{} engine to use. To completely disable all \LaTeX{} processing, do \verb|@@{pyp.latexcommand=False}| anywhere in \texttt{a.tex}. The default \texttt{latexcommand} is 145 | \begin{lstlisting} 146 | {latex} {pyptexfilename} && 147 | (test ! -f {bibfilename} || bibtex {auxfilename}) 148 | \end{lstlisting} 149 | The usual Python substitutions are made from the \texttt{pyp} object, e.g. \verb|pyp.latex| defaults to \verb|"@{pyp.latex}"| but this can be overridden from anywhere in \texttt{a.tex}. 150 | 151 | \pyptex{} can be used programmatically in the Python interpreter with \texttt{import pyptex}. The module exports a \texttt{pyptex.pyptex} object; doing \\ \texttt{pyp = pyptex.pyptex('a.tex')} causes the file \texttt{a.tex} to be read in, the Python fragments are executed, and the file \texttt{a.pyptex} is written out. The compiled text \texttt{a.pyptex} is available as the string \texttt{pyp.compiled}. The function \\\texttt{pyptex.pyptexmain()} implements the standard operations of the shell command \texttt{pyptex}. Further documentation is available by doing e.g. \texttt{help(pyptex)}. 152 | 153 | On Mac, \pyptex{} can be used with TeXShop by creating the file \\ \verb|~/Library/TeXShop/Engines/pyptex.engine| with the following contents: 154 | \begin{lstlisting} 155 | #!/bin/bash 156 | pyptex $1 157 | \end{lstlisting} 158 | Upon restarting TeXShop, this adds the \texttt{pyptex} build option to the list of build engines. 159 | %\lstinputlisting{~/Library/TeXShop/Engines/pyptex.engine} 160 | 161 | \section{Case study: the F19NB handout} 162 | 163 | At Heriot-Watt University, F19NB is a course on numerical linear algebra for undergraduate students. Topics include Gaussian elimination, classical iterations (Jacobi and SOR), GMRES and the QR iteration. All of the theory, proofs, examples and exercises are collected in a 71-page handout. Many of the numerical exercises and their solutions are automatically generated. For some problems (e.g. eigenvalue problems, where the characteristic polynomial must be obtained), it is helpful to do a little bit of symbolic calculations using SymPy. Other exercises and examples are more numerical and use numpy and matplotlib for displaying solutions. 164 | 165 | As of this writing, there are 2,592 lines and 105KB\footnote{As usual, some \LaTeX{} lines can be very long or very short, so the size of the file in KB may be more indicative of the size of the project} in the source file \texttt{notes.tex}, of which 1,256 lines are Python scripts. However, up to 2018, the \texttt{notes.tex} file did not contain any Python code. The Python codes were stored in separate \texttt{.py} files under a \texttt{problib} directory. These problem sets were spread over 24 Python source files and 1,268 lines of Python code. The \texttt{problib} codes were executed in sequence and all the outputs collected in an automatically generated 1,211 line (78KB) \texttt{problems.tex} file. The file \texttt{problems.tex} is a pure \LaTeX{} file that contains problem questions and solutions, encapsulated in \verb|\newcommand| statements, and is then included in the main \texttt{notes.tex} file via the \verb|\input{problems.tex}| mechanism. Various problems and their solutions can then be instantiated in the \texttt{notes.tex} file by invoking the corresponding \LaTeX{} commands. 166 | 167 | The sheer number of files and building steps caused an explosion in complexity more typical of software engineering than mathematical typesetting. Identifying which Python code generated any particular problem set was becoming increasingly challenging. On occasions, it was found that \texttt{problems.tex} was inconsistent with the \texttt{problib} Python scripts, either because the \texttt{problib} Python scripts had not been rerun, or sometimes because a Python script had accidentally been deleted. Fixing these issues was taking an increasing amount of the author's time. 168 | 169 | \subsection{PythonTex and \texttt{notes.tex}} 170 | 171 | In 2019, we adopted PythonTex and it was a major success. This allowed us to immediately reduce the number of external Python scripts and build steps, because many of the simpler tasks could be done directly from \texttt{notes.tex}. By the end of 2019, all Python scripts were moved to \texttt{notes.tex}, and \texttt{problib} was deleted. 172 | 173 | Despite these major improvement, new ``software engineering-style'' problems arose. In the abstract, allowing third-party \LaTeX{} packages to execute arbitrary Python programs seems like a security problem. As of this writing, there are 7,093 potentially malicious \LaTeX{} packages on CTAN. That being said, there are 211,855 packages currently on the Python archive PyPI, and malicious Python packages are regularly identified, e.g. \citet{cimpanu2018twelve}. Ultimately, it was the deadlock and debugging problems that motivated us to create \pyptex{}. 174 | 175 | We now list a few examples of deadlock situations, inspired from deadlocks that happened during the development of \texttt{notes.tex}. First, if some Python fragment in \texttt{notes.tex} generates the image \texttt{dog.png}, then the \LaTeX{} command \verb|\includegraphics{dog.png}| will fail on first execution because PythonTex has not yet executed the Python code that generates \texttt{dog.png}. Because the first execution of \verb|pdflatex| has failed, the Python code to generate \verb|dog.png|. Repeatedly invoking \verb|pdflatex| or \verb|pythontex|, or deleting cache and aux files, does not break the deadlock. One must modify \texttt{notes.tex} so that it does not cause an error when the file \texttt{dog.png} does not exist. Furthermore, if the filename for the \verb|includegraphics| is generated programmatically, e.g. \\ \verb|\includegraphics{\py{"dog.png"}}|, then when there is no cache, \texttt{pdflatex} is likely to fail because \verb|\py{"dog.png"}| will macro-expand to the empty string, causing an error. One can ``prime'' the cache by temporarily deleting the \verb|\includegraphics|. After reinserting the \verb|\includegraphics|, \texttt{notes.tex} will compile successfully, but deleting the cache or \texttt{dog.png} will cause a compilation failure/deadlock again. 176 | We also found that PythonTex often tried to substitute mismatched cache outputs to various input Python fragments. 177 | We are not entirely certain how PythonTex keeps track of matching inputs and outputs in the cache, but it seemed to us that adding or deleting a Python code fragment \verb|\py{...}| almost always resulted in deadlock and having to delete all cache files. Of course, deleting all cache files itself sometimes caused deadlock elsewhere. 178 | 179 | Practically speaking, we found that almost any significant and even minor edit to \texttt{notes.tex} resulted in deadlock and had to be accompanied with a manual deletion of all temporary files, and further tweaks to \texttt{notes.tex} that are required by the deletion of the cache files. 180 | 181 | We found it difficult to debug Python code fragments in \texttt{notes.tex} because PythonTex actually executes Python fragments stored in some temporary files that PythonTex aggressively deletes, not straight from \texttt{notes.tex}. Furthermore, the Python interpreter often declared incorrectly that syntax errors were located in PythonTex glue code that is inserted between extracted Python fragments. Although PythonTex attempts to reconcile line numbers between \texttt{notes.tex} and the various temporary files, we found that this simply led to more confusion and difficulties in debugging. 182 | 183 | The generated \LaTeX{} is merged into the \TeX{} token stream from various temporary files, so there is no collated file similar to the \texttt{notes.pyptex} of \pyptex{}. It seemed to us that \TeX{} was struggling to provide informative error messages, and it was difficult to view generated \LaTeX{} fragments in context. We resorted to manually copy-and-pasting generated \LaTeX{} fragments from various temporary files into \texttt{notes.tex}. 184 | 185 | Altogether, debugging was challenging. Issues had to be identified and isolated by searching through multiple temporary files, and then locating the corresponding source in \texttt{notes.tex}. 186 | 187 | \subsection{The invention of \pyptex{}.} 188 | 189 | If the motivation of upgrading from ad-hoc scripting to PythonTex was to mitigate the burgeoning software engineering challenges in our old build process, then the motivation for moving to \pyptex{} was to address the perceived drawbacks of PythonTex: deadlock issues, and ease of debugging. 190 | 191 | Our starting point was that \TeX{} was not a good tool for the end-to-end process of extracting Python fragments from a template file, replacing them with their outputs and compiling the whole thing down into a PDF. When \TeX{} is performing the extraction task, if it is also simultaneously performing insertions from cache files, deadlock seems almost inevitable. \TeX{} must clearly be used to generate the PDF so it seemed natural to cut \TeX{} out of the beginning of the process, when the Python fragments are extracted from the \texttt{notes.tex} file. From this insight, we concluded we had to abandon the Python Embedding approach, in favor of the Python template Preprocessor approach. 192 | 193 | We have successfully used \pyptex{} for the 2020 version of the F19NB handout 194 | and found that \pyptex{} has the following advantages over PythonTex: 195 | \begin{enumerate} 196 | \item {\bf No deadlock.} \pyptex{} is always able to either generate \texttt{notes.pyptex} from \texttt{notes.tex}, regardless of any cache contents, or report genuine Python errors in the source file \texttt{notes.tex}. 197 | \item {\bf Easy to debug Python.} When \pyptex{} does report genuine Python errors in \texttt{notes.tex}, the error messages are clear and Python debugging is done directly in the source \texttt{notes.tex} file using standard Python tools such as Pdb. There is no auxiliary file for collecting Python code fragments, and no confusion between various line numberings. There is no glue code inserted between Python fragments, and hence Python errors are always correctly reported inside user codes. 198 | \item {\bf Easy to debug generated \LaTeX{}.} Debugging the generated \LaTeX{} code \texttt{notes.pyptex} is much easier, as it consists entirely of high-level, human-readable \LaTeX{} code, as opposed to a partially digested \TeX{} token stream that is difficult to debug for all but the most advanced \TeX{}nicians. 199 | All \LaTeX{} error messages correspond exactly to the contents of \texttt{notes.pyptex} and its line numbers. 200 | \item {\bf Security.} Third-party \LaTeX{} packages cannot execute arbitrary Python code on the user's machine. 201 | \end{enumerate} 202 | One can regard the \texttt{notes.pyptex} as an intermediate file that the user will occasionally have to inspect in order to debug generated \LaTeX{} fragments, and the experience of PythonTex is that such temporary user-facing files must be carefully thought through lest they become a source of problems. By only generating one such user-facing intermediate file, our experience with \texttt{notes.pyptex} is that we have avoided most of the problems we had tracking information across multiple temporary files in PythonTex. In addition, unlike PythonTex, our intermediate file \texttt{notes.pyptex} is designed to be used first by humans, and secondarily by the \LaTeX{} compiler. This is because \texttt{notes.pyptex} consists of high-level, human-style \LaTeX{} source commands, and not some low-level macro-expansion of the high-level \LaTeX{} source. 203 | 204 | In addition to the user-facing file \texttt{notes.pyptex}, \pyptex{} also produces a plain-text log file \texttt{notes.pyplog}, and a cache file \texttt{notes.pickle} that is in binary format and opaque to users. In our experience, we never had to manually inspect the contents of \texttt{notes.pickle} and this file never caused deadlock or debugging problems in \pyptex{}. Furthermore, \texttt{notes.pickle} can also be deleted without breaking the build. In extremis, \texttt{notes.pickle} can theoretically be loaded and inspected using Python's pickle module, but we have never needed to do this. 205 | 206 | Theoretically, an outdated \texttt{notes.pickle} cache could cause the a miscompilation of \texttt{notes.pyptex} and produce an incorrect \texttt{notes.pdf}, or even a \LaTeX{} compilation error. The previously-described dependency-tracking features of \pyptex{} are almost always able to detect that the cache should be invalidated, but a sufficiently determined user could defeat these safety measures. For example, if a user is reading data from a file without declaring it as a dependency, or if a user is falsifying ``last modification'' timestamps on files, 207 | or generating random numbers, or some other form of nondeterminism is taking place, then cache invalidation may fail. A concerned user can completely disable the \texttt{notes.pickle} cache file by issuing the Python command \verb|pyp.disable_cache=True| in \texttt{notes.tex}, trading efficiency for safety. 208 | 209 | \section{Conclusions and outlook} 210 | 211 | We have introduced \pyptex{}, the Python template Preprocessor for \LaTeX{}. Unlike other projects that have embedded Python into \LaTeX{}, \pyptex{} uses regular expressions to extract Python fragments and substitute their outputs. Our preprocessor approach is superior to the embedded approach because it avoids the deadlock and debugging issues that plague embedded Python approaches, and it also eliminates the attack vector of arbitrary code execution by malicious third-party \LaTeX{} packages. \pyptex{} has been used to generate problem sets for the F19NB class at Heriot-Watt University. This large 71-page document features 1,256 lines of Python in a 105KB source file \texttt{notes.tex}. 212 | 213 | \pyptex{} centers around the Python programming language, but one could extend on our idea by adding more languages to the preprocessor, e.g. the string \verb|@julia{...}| could invoke a Julia interpreter. This poses a challenging issue of how to tightly integrate any such ``guest language'', and particularly its native debugging tools, so that the user experience is best possible. The tight integration of Python's Pdb debugger, and the deliberate cache invalidation algorithm, required careful use of Python's extensive introspection and detailed code generation facilities. This is an avenue for future research. 214 | 215 | \bibliographystyle{plainnat} 216 | 217 | \bibliography{@{{{ 218 | pyp.bib(r""" 219 | @article{knuth1989errors, 220 | title={The errors of {TeX}}, 221 | author={Knuth, Donald E}, 222 | journal={Software: Practice and Experience}, 223 | volume={19}, 224 | number={7}, 225 | pages={607--685}, 226 | year={1989}, 227 | publisher={Wiley Online Library} 228 | } 229 | @book{knuth1984texbook, 230 | title={The {TEXbook}}, 231 | author={Knuth, Donald Ervin and Bibby, Duane}, 232 | volume={3}, 233 | year={1984}, 234 | publisher={Addison-Wesley Reading} 235 | } 236 | @book{lamport1994latex, 237 | title={{LATEX}: a document preparation system: user's guide and reference manual}, 238 | author={Lamport, Leslie}, 239 | year={1994}, 240 | publisher={Addison-wesley} 241 | } 242 | @article{greenwade1993comprehensive, 243 | title={The comprehensive {TEX} archive network (ctan)}, 244 | author={Greenwade, George D}, 245 | journal={TUGBoat}, 246 | volume={14}, 247 | number={3}, 248 | pages={342--351}, 249 | year={1993}, 250 | publisher={Addison-Wesley} 251 | } 252 | @article{seindal1997gnu, 253 | title={{GNU} m4, version 1.4}, 254 | author={Seindal, Ren{\'e}}, 255 | journal={Free Software Foundation}, 256 | volume={59}, 257 | year={1997} 258 | } 259 | @article{virtanen2020scipy, 260 | title={{SciPy} 1.0: fundamental algorithms for scientific computing in {P}ython}, 261 | author={Virtanen, Pauli and Gommers, Ralf and Oliphant, Travis E and Haberland, Matt and Reddy, Tyler and Cournapeau, David and Burovski, Evgeni and Peterson, Pearu and Weckesser, Warren and Bright, Jonathan and others}, 262 | journal={Nature Methods}, 263 | pages={1--12}, 264 | year={2020}, 265 | publisher={Nature Publishing Group} 266 | } 267 | @article{meurer2017sympy, 268 | title={{SymPy}: symbolic computing in {Python}}, 269 | author={Meurer, Aaron and Smith, Christopher P and Paprocki, Mateusz and {\v{C}}ert{\'\i}k, Ond{\v{r}}ej and Kirpichev, Sergey B and Rocklin, Matthew and Kumar, AMiT and Ivanov, Sergiu and Moore, Jason K and Singh, Sartaj and others}, 270 | journal={{PeerJ} Computer Science}, 271 | volume={3}, 272 | pages={e103}, 273 | year={2017}, 274 | publisher={PeerJ Inc.} 275 | } 276 | @article{poore2015pythontex, 277 | title={{PythonTeX: reproducible documents with LaTeX, Python, and more}}, 278 | author={Poore, Geoffrey M}, 279 | journal={Computational Science \& Discovery}, 280 | volume={8}, 281 | number={1}, 282 | pages={014010}, 283 | year={2015}, 284 | publisher={IOP Publishing} 285 | } 286 | @article{fine2005tex, 287 | title={{TEX} forever!}, 288 | author={Fine, Jonathan}, 289 | journal={Proceedings EuroTEX}, 290 | pages={140--149}, 291 | year={2005} 292 | } 293 | @misc{Loisel2020, 294 | author = {S{\'e}bastien Loisel}, 295 | title = {Numerical Analysis {B} (lecture notes)}, 296 | year = {2020}, 297 | publisher={Heriot-Watt University} 298 | } 299 | @book{kernighan1977m4, 300 | title={The {M4} macro processor}, 301 | author={Kernighan, Brian W and Ritchie, Dennis M}, 302 | year={1977}, 303 | publisher={Bell Laboratories Murray Hill, NJ} 304 | } 305 | @article{cimpanu2018twelve, 306 | title={Twelve Malicious {P}ython Libraries Found and Removed from {PyPI}}, 307 | author={Cimpanu, Catalin}, 308 | year={2018}, 309 | journal={ZDNet}, 310 | month={October}, 311 | } 312 | @article{pakin2004perltex, 313 | title={PerlTEX: Defining LATEX macros using Perl}, 314 | author={Pakin, Scott}, 315 | journal={TUGboat}, 316 | volume={25}, 317 | number={2}, 318 | pages={150--159}, 319 | year={2004} 320 | } 321 | @misc{ehmsen, 322 | title={Python.sty}, 323 | author={Martin R. Ehmsen}, 324 | publisher={CTAN}, 325 | year={2012}, 326 | } 327 | @misc{drake2009sagetex, 328 | title={The {SageTEX} package}, 329 | author={Drake, Dan and others}, 330 | publisher={CTAN}, 331 | year={2009} 332 | } 333 | @book{molteno2014sympytex, 334 | title={SympyTeX: Embedding Symbolic Computation Into {LaTeX} Documents}, 335 | author={Molteno, Timothy Christopher Anthony}, 336 | year={2014}, 337 | publisher={Electronics Group, University of Otago} 338 | } 339 | @article{hagen2005luatex, 340 | title={{LuaTEX}: Howling to the moon}, 341 | author={Hagen, Hans}, 342 | journal={The Communications of the TEX Users Group}, 343 | pages={152}, 344 | year={2005} 345 | } 346 | @article{hepple1998, 347 | title={Writing {HTML} with m4}, 348 | author={Bob Hepple}, 349 | journal={Linux journal}, 350 | year={1998}, 351 | month={March}, 352 | } 353 | @article{ronacher2008jinja2, 354 | title={Jinja2 Documentation}, 355 | author={Ronacher, Armin}, 356 | journal={Welcome to Jinja2—Jinja2 Documentation (2.8-dev)}, 357 | year={2008} 358 | } 359 | @book{alchin2013pro, 360 | title={Pro Django}, 361 | author={Alchin, Marty and Kaplan-Moss, Jacob and Vilches, George}, 362 | year={2013}, 363 | publisher={Springer} 364 | } 365 | """) 366 | }}}} 367 | \end{document} 368 | -------------------------------------------------------------------------------- /tests/test1/test2.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | 3 | 4 | @{{{ 5 | aaa = 1 6 | pyp.freeze() 7 | bbb = 2 8 | print(aaa,bbb) 9 | pyp.clear() 10 | print(aaa) 11 | try: 12 | print(bbb) 13 | except: 14 | print("As expected, bbb is now undefined.") 15 | }}} 16 | @{{{ 17 | try: 18 | print(bbb) 19 | except: 20 | print("As expected again, bbb is still undefined.") 21 | }}} 22 | 23 | 24 | % This doesn't actually work. 25 | \pdfinfoomitdate1 26 | \pdfsuppressptexinfo-1 27 | \pdftrailerid{} 28 | 29 | \usepackage[utf8]{inputenc} 30 | \usepackage[english]{babel} 31 | \usepackage{amsmath} 32 | \usepackage{verbatim} 33 | \usepackage{relsize} 34 | \usepackage{graphicx} 35 | 36 | \usepackage{hologo} 37 | 38 | @{from sympy import *} 39 | 40 | \newcommand{\Longupdownarrow}{\Big\Updownarrow} 41 | \usepackage{listings} 42 | \lstset{ 43 | basicstyle=\footnotesize\ttfamily} 44 | 45 | \usepackage{natbib} 46 | 47 | \newcommand{\pyptex}{{P\kern-0.08em\raisebox{-0.5ex}{Y\kern-0.1emP}\kern-0.28em\TeX{}}} 48 | 49 | \title{\pyptex{}: The Python Preprocessor for \LaTeX{}.} 50 | \date{wololo} 51 | \author{Sébastien Loisel} 52 | 53 | 54 | \begin{document} 55 | \maketitle 56 | 57 | \begin{abstract} 58 | \pyptex{} is a Python template Preprocessor for \LaTeX{}. From a source template document \texttt{a.tex} containing specially marked Python code fragments, one or more pure \LaTeX{} files \texttt{a.pyptex} are generated and compiled into final \texttt{a.pdf} documents. Our new approach differs from previous approaches that embedded Python into the \LaTeX{} engine and allowed bidirectional communication between Python and \LaTeX{}. The embedded Python approach exposes users to security risks, but it also has a deadlock problem and is very hard to debug. By contrast, \pyptex{} avoids the security risks of embedded Python, completely eliminates the deadlock problem, and is easy to debug. We have used \pyptex{} to produce a 71-page handout for linear algebra students containing automatically generated problem sets. In combination with SymPy, SciPy and other Python packages, \pyptex{} can be used to create beautiful scientific documents. 59 | \end{abstract} 60 | 61 | \section{Introduction} 62 | 63 | \pyptex{} is a macro preprocessor (or template engine) for \LaTeX{} \citep{lamport1994latex} that allows one to use the rich Python ecosystem to automate parts of the redaction of sophisticated scientific documents. 64 | In combination with SciPy \citep{virtanen2020scipy} and SymPy \citep{meurer2017sympy}, \pyptex{} is a powerful tool for generating documents ranging from research papers and books to calculus problem sets for students. 65 | 66 | One installs \pyptex{} by typing \texttt{pip install pyptex} at the command prompt. This assumes that one already has working Python and \LaTeX{} installations; by default, \pyptex{} assumes that \texttt{pdflatex} is available at the command line, although alternate \LaTeX{} implementations can also be used. Then, using one's favorite text editor, one places the following text in a file \texttt{example.tex}: 67 | \begin{lstlisting} 68 | \documentclass{article} 69 | @@{from sympy import *} 70 | \begin{document} 71 | $$\int x^3\,dx = @@{S('integrate(x^3,x)')}+C$$ 72 | \end{document} 73 | \end{lstlisting} 74 | One compiles \texttt{example.tex} using the \pyptex{} command line \texttt{pyptex example.tex}. This generates the final output \texttt{example.pdf}, which contains this: 75 | $$\int x^3\,dx = @{S('integrate(x^3,x)')}+C$$ 76 | As we can see, SymPy was used to compute an indefinite integral symbolically, and the result was typeset with standard \LaTeX{}. 77 | 78 | \pyptex{} also produces the intermediate file \texttt{example.pyptex}, which is a pure-\LaTeX{} file that is useful either for debugging purposes, or for sending a final manuscript to a journal. It contains the following: 79 | \begin{lstlisting} 80 | \documentclass{article} 81 | @{from sympy import *} 82 | \begin{document} 83 | $$\int x^3\,dx = @{S('integrate(x^3,x)')}+C$$ 84 | \end{document} 85 | \end{lstlisting} 86 | The SymPy Python code in the template file \texttt{example.tex} was executed and replaced with its own output in standard \LaTeX{} notation. 87 | The file \texttt{example.pdf} was automatically generated by \pyptex{} from the \texttt{example.pyptex} file (internally, \pyptex{} invoked the command \texttt{pdflatex example.pyptex}). 88 | 89 | \begin{figure} 90 | $$\texttt{a.tex} \implies \text{\pyptex{}} \implies \texttt{a.pyptex} \implies \texttt{pdflatex}\implies \texttt{a.pdf}$$ 91 | \caption{Dataflow of a the \pyptex{} Python template Preprocessor. \label{f:pre}} 92 | \end{figure} 93 | \begin{figure} 94 | $$\texttt{a.tex} \implies 95 | \left\{ 96 | \begin{array}{c} 97 | \texttt{pdflatex} \\ 98 | \Longupdownarrow \\ 99 | \text{Python} 100 | \end{array} 101 | \right\} 102 | \implies \texttt{a.pdf} 103 | $$ 104 | \caption{Dataflow when Python is embedded in the \texttt{pdflatex} engine. \label{f:embed}} 105 | \end{figure} 106 | 107 | In \pyptex{}, debugging and error handling are first-class citizens. If any errors occur during Python interpretation, \pyptex{} automatically launches the Python debugger Pdb in postmortem mode, just like standard \LaTeX{} enters \texttt{errorstopmode} on error. In Pdb, all standard Python debugging tools work as normal, and source code from the originating \texttt{.tex} file is visible in its context (e.g. with the \texttt{list} command). If an error occurs during \LaTeX{} compilation (e.g. during \texttt{pdflatex example.pyptex}), the usual error prompt is provided by \LaTeX{}. 108 | 109 | \pyptex{} is a {\em preprocessor} (or {\em template engine}) for \LaTeX{}, see Figure \ref{f:pre}. 110 | Python fragments are extracted from the (template) source \texttt{a.tex} file by a regular expression pattern matcher, executed by a Python interpreter, and substituted with the appropriate outputs. The collated file is written to disk as \texttt{a.pyptex}, which is a pure \LaTeX{} file. Because this procedure is done by regular expressions, the \LaTeX{} compiler is not invoked and not involved in parsing or processing \texttt{a.tex}. Once \texttt{a.pyptex} has been produced, then \texttt{a.pdf} is obtained, e.g. via \texttt{pdflatex a.pyptex}. 111 | 112 | As far as we know, \pyptex{} is the first time that Python is formally used as a preprocessor or templating engine for \LaTeX{}. However, Python has previously been {\em embedded} into \LaTeX{} engines, see Figure \ref{f:embed}; we mention \citet{fine2005tex}, \citet{ehmsen}, \citet{poore2015pythontex}. For SymPy there is \citet{molteno2014sympytex} and for Sage there is \citet{drake2009sagetex}. Other scripting languages have also been embedded into \LaTeX{}, for example Perl \citep{pakin2004perltex} and Lua \citep{hagen2005luatex}. When Python is embedded into \LaTeX{}, all the Python fragments are extracted from a source \texttt{a.tex} file by the \TeX{} macro engine itself. After executing these Python fragments, the relevant outputs are substituted into the macro stream being ``digested'' by the \TeX{} engine \citep{knuth1984texbook}. Although templating engines for \LaTeX{} seem rare, they are commonly used for other document formats, see e.g. \citet{hepple1998}, or \citet{alchin2013pro}. Usually, templating engines focus on generating multiple documents from a single template, whereas \pyptex{} focuses on generating a single document from a template. For that reason, we prefer the terminology ``preprocessor''. That being said, \pyptex{} can indeed be used in a more traditional templating mode where multiple documents are generated from a single template \texttt{a.tex}. 113 | 114 | With the help of \pyptex{}, we have produced a 71-page handout \citep{Loisel2020} for the course F19NB at Heriot-Watt University on numerical linear algebra. Previous to \pyptex{}, in 2018, we had used an ad-hoc and highly complex build system for \LaTeX{} code generation. While we distinguish this ad-hoc approach from our new template preprocessor approach, we mention that the ad-hoc generation of \LaTeX{} from pure Python programs is widespread. In 2019, we switched from an ad-hoc approach to PythonTex and it was a significant improvement, but we found three important drawbacks to the embedded Python approach, which motivated us to create \pyptex{}. 115 | 116 | First, there is the (somewhat theoretical) security risk of allowing third-party \LaTeX{} packages to execute arbitrary Python codes unbeknownst to the user. Second, there is the issue of ``deadlock'', which is when the source and cache files enter a state where compiling \texttt{a.tex} into \texttt{a.pdf} becomes impossible because of \TeX{} errors. This occurs when the \TeX{} macro processor is simultaneously extracting new Python fragments from \texttt{a.tex} while inserting outdated cached substitutions from a previous version of \texttt{a.tex}. To break deadlock, it is usually necessary to manually delete all cache files, but this is not always sufficient, and sometimes modifying \texttt{a.tex} is necessary. Finally, the third problem is the difficulty of debugging Python fragments and generated \LaTeX{} fragments. 117 | 118 | We have found that our new Python template preprocessor \pyptex{} avoids all three problems. First, it does not allow third-party \LaTeX{} packages to execute arbitrary Python codes because there is no bidirectional communication between \LaTeX{} and Python. Second, deadlock is impossible by design because the \LaTeX{} engine is not involved at all in the extraction, execution and substitution of Python fragments from \texttt{a.tex}. Finally, debugging with Pdb is treated as a first-class citizen and works as normal, and the collated \LaTeX{} document is made available in the human-preferred format of a pure-\LaTeX{} source file \texttt{a.pyptex}, as opposed to a ``partially digested'' \TeX{} macro stream. We have confirmed these significant improvements using our F19NB handout, which consists of 2,592 lines and 105KB of \LaTeX{} source in file \texttt{notes.tex}, including 1,256 lines of Python code. 119 | 120 | Our paper is organized as follows. In Section 2, we give a brief tour of \pyptex{} features. In Section 3, we discuss the F19NB case study, a 71 page PDF document containing automatically generated problem sets for linear algebra students. We end with some conclusions in Section 4. 121 | 122 | \section{A brief tour of \pyptex{} features} 123 | 124 | The \texttt{pyptex a.tex} command executes Python fragments in \texttt{a.tex} delimited by either \verb|@@{...}| or \verb|@@{{{...}}}|; the latter notation is necessary if the Python fragment contains curly braces. If a literal \verb|@@{| is needed, it can be escaped by using a double \verb|@@@{|; apart from this, it is not necessary to escape \verb|@| symbols. \pyptex{} also honors \LaTeX{}-style comments so that \verb|%@{error}| does not execute the Python expression \verb|error|. 125 | 126 | \pyptex{} allows one to execute arbitrary Python programs and import arbitrary Python packages, e.g. \verb|@@{from sympy import *}|. One can use SymPy to create simple plots, save them to a file and include them via the standard \LaTeX{} graphicx package. For example: 127 | \lstinputlisting{plot1.tex} 128 | As with all modern template preprocessors, \pyptex{} enables modular development by assembling a project from multiple component files. For example, the preceding plotting snippet was saved in the \texttt{plot1.tex}, which will later be included in our master \texttt{a.tex} via the command 129 | \verb|@@{pyp.input('plot1.tex')}|. The files \texttt{a.tex} and \texttt{plot1.tex} are analogous to Python modules in that they execute their own separate namespaces. As such, it is necessary to re-import SymPy into the namespace of \texttt{plot1.tex}, even though it already has been imported into the namespace of the master file \texttt{a.tex}. 130 | 131 | The pattern of plotting a figure, saving it to a file, and then including its filename into the generated \texttt{a.pyptex} file is very common, so \pyptex{} includes a convenience function \texttt{pyp.savefig()} that automatically generates distinct numbered filenames under the \texttt{a-generated/*} directory; in the present case, we create the file \texttt{a-generated/fig1.eps} with the following fragment: 132 | \lstinputlisting{plot2.tex} 133 | These plotting commands have also been stored in a separate file \texttt{plot2.tex}. 134 | Note that the Python fragment in \texttt{plot2.tex} consists of two Python statements. The Python programming language distinguishes {\em expressions} that return or produce a value, and typically consist of a single line; and {\em statements} that may run multiple lines and produce no value. The multiple lines in the \texttt{plot2.tex} Python code fragments means that they are statements that produce no value. In order to print the filename \texttt{a-generated/fig1.eps} into the curly braces of the \verb|\includegraphics{...}|, it is therefore required to use the \verb|pyp.print(...)| command. 135 | 136 | Embedding the files \texttt{plot1.tex} and \texttt{plot2.tex} via the \texttt{pyp.input(...)} mechanism described above, results in the following output:\\ 137 | \noindent @{pyp.input('plot1.tex')} 138 | @{pyp.input('plot2.tex')} 139 | 140 | Standard out and standard error print out to the console as usual, and are additionally logged in the \texttt{a.pyplog} file. If one wishes to print directly to the \texttt{a.pyptex} file, one should use the \texttt{pyp.print()} function. 141 | 142 | For performance reasons, \pyptex{} caches the outputs of Python fragments in a binary file \texttt{a.pickle}. If one edits purely textual portions of \texttt{a.tex} while leaving the Python fragments untouched, the cached outputs are used and the production of \texttt{a.pyptex} is nearly instantaneous. If the Python fragments in \texttt{a.tex} are not identical to the cached fragments, or if the command-line arguments to \texttt{pyptex} have changed, or if the \texttt{pyptex} package version has changed, the cache is automatically invalidated. Furthermore, the command \verb|pyp.dep(filename)| can be used to add further dependencies that will invalidate the cache when the file \verb|filename| changes. Any auxiliary \texttt{.tex} files (and their dependencies) incorporated via \verb|pyp.input(...)| are automatically added to the list of dependencies via \verb|pyp.dep()|, but if external data files are \texttt{open()}ed and used to generate, e.g. \LaTeX{} tables, these files should manually be added to the dependencies via \verb|pyp.dep()|. The helper function \verb|pyp.open(...)| automatically calls \verb|pyp.dep(...)| before calling the builtin \verb|open(...)| function. 143 | 144 | One can control how the \texttt{a.pyptex} file is compiled into \texttt{a.pdf} by specifying which \LaTeX{} engine to use. To completely disable all \LaTeX{} processing, do \verb|@@{pyp.latexcommand=False}| anywhere in \texttt{a.tex}. The default \texttt{latexcommand} is 145 | \begin{lstlisting} 146 | {latex} {pyptexfilename} && 147 | (test ! -f {bibfilename} || bibtex {auxfilename}) 148 | \end{lstlisting} 149 | The usual Python substitutions are made from the \texttt{pyp} object, e.g. \verb|pyp.latex| defaults to \verb|"@{pyp.latex}"| but this can be overridden from anywhere in \texttt{a.tex}. 150 | 151 | \pyptex{} can be used programmatically in the Python interpreter with \texttt{import pyptex}. The module exports a \texttt{pyptex.pyptex} object; doing \\ \texttt{pyp = pyptex.pyptex('a.tex')} causes the file \texttt{a.tex} to be read in, the Python fragments are executed, and the file \texttt{a.pyptex} is written out. The compiled text \texttt{a.pyptex} is available as the string \texttt{pyp.compiled}. The function \\\texttt{pyptex.pyptexmain()} implements the standard operations of the shell command \texttt{pyptex}. Further documentation is available by doing e.g. \texttt{help(pyptex)}. 152 | 153 | On Mac, \pyptex{} can be used with TeXShop by creating the file \\ \verb|~/Library/TeXShop/Engines/pyptex.engine| with the following contents: 154 | \begin{lstlisting} 155 | #!/bin/bash 156 | pyptex $1 157 | \end{lstlisting} 158 | Upon restarting TeXShop, this adds the \texttt{pyptex} build option to the list of build engines. 159 | %\lstinputlisting{~/Library/TeXShop/Engines/pyptex.engine} 160 | 161 | \section{Case study: the F19NB handout} 162 | 163 | At Heriot-Watt University, F19NB is a course on numerical linear algebra for undergraduate students. Topics include Gaussian elimination, classical iterations (Jacobi and SOR), GMRES and the QR iteration. All of the theory, proofs, examples and exercises are collected in a 71-page handout. Many of the numerical exercises and their solutions are automatically generated. For some problems (e.g. eigenvalue problems, where the characteristic polynomial must be obtained), it is helpful to do a little bit of symbolic calculations using SymPy. Other exercises and examples are more numerical and use numpy and matplotlib for displaying solutions. 164 | 165 | As of this writing, there are 2,592 lines and 105KB\footnote{As usual, some \LaTeX{} lines can be very long or very short, so the size of the file in KB may be more indicative of the size of the project} in the source file \texttt{notes.tex}, of which 1,256 lines are Python scripts. However, up to 2018, the \texttt{notes.tex} file did not contain any Python code. The Python codes were stored in separate \texttt{.py} files under a \texttt{problib} directory. These problem sets were spread over 24 Python source files and 1,268 lines of Python code. The \texttt{problib} codes were executed in sequence and all the outputs collected in an automatically generated 1,211 line (78KB) \texttt{problems.tex} file. The file \texttt{problems.tex} is a pure \LaTeX{} file that contains problem questions and solutions, encapsulated in \verb|\newcommand| statements, and is then included in the main \texttt{notes.tex} file via the \verb|\input{problems.tex}| mechanism. Various problems and their solutions can then be instantiated in the \texttt{notes.tex} file by invoking the corresponding \LaTeX{} commands. 166 | 167 | The sheer number of files and building steps caused an explosion in complexity more typical of software engineering than mathematical typesetting. Identifying which Python code generated any particular problem set was becoming increasingly challenging. On occasions, it was found that \texttt{problems.tex} was inconsistent with the \texttt{problib} Python scripts, either because the \texttt{problib} Python scripts had not been rerun, or sometimes because a Python script had accidentally been deleted. Fixing these issues was taking an increasing amount of the author's time. 168 | 169 | \subsection{PythonTex and \texttt{notes.tex}} 170 | 171 | In 2019, we adopted PythonTex and it was a major success. This allowed us to immediately reduce the number of external Python scripts and build steps, because many of the simpler tasks could be done directly from \texttt{notes.tex}. By the end of 2019, all Python scripts were moved to \texttt{notes.tex}, and \texttt{problib} was deleted. 172 | 173 | Despite these major improvement, new ``software engineering-style'' problems arose. In the abstract, allowing third-party \LaTeX{} packages to execute arbitrary Python programs seems like a security problem. As of this writing, there are 7,093 potentially malicious \LaTeX{} packages on CTAN. That being said, there are 211,855 packages currently on the Python archive PyPI, and malicious Python packages are regularly identified, e.g. \citet{cimpanu2018twelve}. Ultimately, it was the deadlock and debugging problems that motivated us to create \pyptex{}. 174 | 175 | We now list a few examples of deadlock situations, inspired from deadlocks that happened during the development of \texttt{notes.tex}. First, if some Python fragment in \texttt{notes.tex} generates the image \texttt{dog.png}, then the \LaTeX{} command \verb|\includegraphics{dog.png}| will fail on first execution because PythonTex has not yet executed the Python code that generates \texttt{dog.png}. Because the first execution of \verb|pdflatex| has failed, the Python code to generate \verb|dog.png|. Repeatedly invoking \verb|pdflatex| or \verb|pythontex|, or deleting cache and aux files, does not break the deadlock. One must modify \texttt{notes.tex} so that it does not cause an error when the file \texttt{dog.png} does not exist. Furthermore, if the filename for the \verb|includegraphics| is generated programmatically, e.g. \\ \verb|\includegraphics{\py{"dog.png"}}|, then when there is no cache, \texttt{pdflatex} is likely to fail because \verb|\py{"dog.png"}| will macro-expand to the empty string, causing an error. One can ``prime'' the cache by temporarily deleting the \verb|\includegraphics|. After reinserting the \verb|\includegraphics|, \texttt{notes.tex} will compile successfully, but deleting the cache or \texttt{dog.png} will cause a compilation failure/deadlock again. 176 | We also found that PythonTex often tried to substitute mismatched cache outputs to various input Python fragments. 177 | We are not entirely certain how PythonTex keeps track of matching inputs and outputs in the cache, but it seemed to us that adding or deleting a Python code fragment \verb|\py{...}| almost always resulted in deadlock and having to delete all cache files. Of course, deleting all cache files itself sometimes caused deadlock elsewhere. 178 | 179 | Practically speaking, we found that almost any significant and even minor edit to \texttt{notes.tex} resulted in deadlock and had to be accompanied with a manual deletion of all temporary files, and further tweaks to \texttt{notes.tex} that are required by the deletion of the cache files. 180 | 181 | We found it difficult to debug Python code fragments in \texttt{notes.tex} because PythonTex actually executes Python fragments stored in some temporary files that PythonTex aggressively deletes, not straight from \texttt{notes.tex}. Furthermore, the Python interpreter often declared incorrectly that syntax errors were located in PythonTex glue code that is inserted between extracted Python fragments. Although PythonTex attempts to reconcile line numbers between \texttt{notes.tex} and the various temporary files, we found that this simply led to more confusion and difficulties in debugging. 182 | 183 | The generated \LaTeX{} is merged into the \TeX{} token stream from various temporary files, so there is no collated file similar to the \texttt{notes.pyptex} of \pyptex{}. It seemed to us that \TeX{} was struggling to provide informative error messages, and it was difficult to view generated \LaTeX{} fragments in context. We resorted to manually copy-and-pasting generated \LaTeX{} fragments from various temporary files into \texttt{notes.tex}. 184 | 185 | Altogether, debugging was challenging. Issues had to be identified and isolated by searching through multiple temporary files, and then locating the corresponding source in \texttt{notes.tex}. 186 | 187 | \subsection{The invention of \pyptex{}.} 188 | 189 | If the motivation of upgrading from ad-hoc scripting to PythonTex was to mitigate the burgeoning software engineering challenges in our old build process, then the motivation for moving to \pyptex{} was to address the perceived drawbacks of PythonTex: deadlock issues, and ease of debugging. 190 | 191 | Our starting point was that \TeX{} was not a good tool for the end-to-end process of extracting Python fragments from a template file, replacing them with their outputs and compiling the whole thing down into a PDF. When \TeX{} is performing the extraction task, if it is also simultaneously performing insertions from cache files, deadlock seems almost inevitable. \TeX{} must clearly be used to generate the PDF so it seemed natural to cut \TeX{} out of the beginning of the process, when the Python fragments are extracted from the \texttt{notes.tex} file. From this insight, we concluded we had to abandon the Python Embedding approach, in favor of the Python template Preprocessor approach. 192 | 193 | We have successfully used \pyptex{} for the 2020 version of the F19NB handout 194 | and found that \pyptex{} has the following advantages over PythonTex: 195 | \begin{enumerate} 196 | \item {\bf No deadlock.} \pyptex{} is always able to either generate \texttt{notes.pyptex} from \texttt{notes.tex}, regardless of any cache contents, or report genuine Python errors in the source file \texttt{notes.tex}. 197 | \item {\bf Easy to debug Python.} When \pyptex{} does report genuine Python errors in \texttt{notes.tex}, the error messages are clear and Python debugging is done directly in the source \texttt{notes.tex} file using standard Python tools such as Pdb. There is no auxiliary file for collecting Python code fragments, and no confusion between various line numberings. There is no glue code inserted between Python fragments, and hence Python errors are always correctly reported inside user codes. 198 | \item {\bf Easy to debug generated \LaTeX{}.} Debugging the generated \LaTeX{} code \texttt{notes.pyptex} is much easier, as it consists entirely of high-level, human-readable \LaTeX{} code, as opposed to a partially digested \TeX{} token stream that is difficult to debug for all but the most advanced \TeX{}nicians. 199 | All \LaTeX{} error messages correspond exactly to the contents of \texttt{notes.pyptex} and its line numbers. 200 | \item {\bf Security.} Third-party \LaTeX{} packages cannot execute arbitrary Python code on the user's machine. 201 | \end{enumerate} 202 | One can regard the \texttt{notes.pyptex} as an intermediate file that the user will occasionally have to inspect in order to debug generated \LaTeX{} fragments, and the experience of PythonTex is that such temporary user-facing files must be carefully thought through lest they become a source of problems. By only generating one such user-facing intermediate file, our experience with \texttt{notes.pyptex} is that we have avoided most of the problems we had tracking information across multiple temporary files in PythonTex. In addition, unlike PythonTex, our intermediate file \texttt{notes.pyptex} is designed to be used first by humans, and secondarily by the \LaTeX{} compiler. This is because \texttt{notes.pyptex} consists of high-level, human-style \LaTeX{} source commands, and not some low-level macro-expansion of the high-level \LaTeX{} source. 203 | 204 | In addition to the user-facing file \texttt{notes.pyptex}, \pyptex{} also produces a plain-text log file \texttt{notes.pyplog}, and a cache file \texttt{notes.pickle} that is in binary format and opaque to users. In our experience, we never had to manually inspect the contents of \texttt{notes.pickle} and this file never caused deadlock or debugging problems in \pyptex{}. Furthermore, \texttt{notes.pickle} can also be deleted without breaking the build. In extremis, \texttt{notes.pickle} can theoretically be loaded and inspected using Python's pickle module, but we have never needed to do this. 205 | 206 | Theoretically, an outdated \texttt{notes.pickle} cache could cause the a miscompilation of \texttt{notes.pyptex} and produce an incorrect \texttt{notes.pdf}, or even a \LaTeX{} compilation error. The previously-described dependency-tracking features of \pyptex{} are almost always able to detect that the cache should be invalidated, but a sufficiently determined user could defeat these safety measures. For example, if a user is reading data from a file without declaring it as a dependency, or if a user is falsifying ``last modification'' timestamps on files, 207 | or generating random numbers, or some other form of nondeterminism is taking place, then cache invalidation may fail. A concerned user can completely disable the \texttt{notes.pickle} cache file by issuing the Python command \verb|pyp.disable_cache=True| in \texttt{notes.tex}, trading efficiency for safety. 208 | 209 | \section{Conclusions and outlook} 210 | 211 | We have introduced \pyptex{}, the Python template Preprocessor for \LaTeX{}. Unlike other projects that have embedded Python into \LaTeX{}, \pyptex{} uses regular expressions to extract Python fragments and substitute their outputs. Our preprocessor approach is superior to the embedded approach because it avoids the deadlock and debugging issues that plague embedded Python approaches, and it also eliminates the attack vector of arbitrary code execution by malicious third-party \LaTeX{} packages. \pyptex{} has been used to generate problem sets for the F19NB class at Heriot-Watt University. This large 71-page document features 1,256 lines of Python in a 105KB source file \texttt{notes.tex}. 212 | 213 | \pyptex{} centers around the Python programming language, but one could extend on our idea by adding more languages to the preprocessor, e.g. the string \verb|@julia{...}| could invoke a Julia interpreter. This poses a challenging issue of how to tightly integrate any such ``guest language'', and particularly its native debugging tools, so that the user experience is best possible. The tight integration of Python's Pdb debugger, and the deliberate cache invalidation algorithm, required careful use of Python's extensive introspection and detailed code generation facilities. This is an avenue for future research. 214 | 215 | \bibliographystyle{plainnat} 216 | 217 | \bibliography{@{{{ 218 | pyp.bib(r""" 219 | @article{knuth1989errors, 220 | title={The errors of {TeX}}, 221 | author={Knuth, Donald E}, 222 | journal={Software: Practice and Experience}, 223 | volume={19}, 224 | number={7}, 225 | pages={607--685}, 226 | year={1989}, 227 | publisher={Wiley Online Library} 228 | } 229 | @book{knuth1984texbook, 230 | title={The {TEXbook}}, 231 | author={Knuth, Donald Ervin and Bibby, Duane}, 232 | volume={3}, 233 | year={1984}, 234 | publisher={Addison-Wesley Reading} 235 | } 236 | @book{lamport1994latex, 237 | title={{LATEX}: a document preparation system: user's guide and reference manual}, 238 | author={Lamport, Leslie}, 239 | year={1994}, 240 | publisher={Addison-wesley} 241 | } 242 | @article{greenwade1993comprehensive, 243 | title={The comprehensive {TEX} archive network (ctan)}, 244 | author={Greenwade, George D}, 245 | journal={TUGBoat}, 246 | volume={14}, 247 | number={3}, 248 | pages={342--351}, 249 | year={1993}, 250 | publisher={Addison-Wesley} 251 | } 252 | @article{seindal1997gnu, 253 | title={{GNU} m4, version 1.4}, 254 | author={Seindal, Ren{\'e}}, 255 | journal={Free Software Foundation}, 256 | volume={59}, 257 | year={1997} 258 | } 259 | @article{virtanen2020scipy, 260 | title={{SciPy} 1.0: fundamental algorithms for scientific computing in {P}ython}, 261 | author={Virtanen, Pauli and Gommers, Ralf and Oliphant, Travis E and Haberland, Matt and Reddy, Tyler and Cournapeau, David and Burovski, Evgeni and Peterson, Pearu and Weckesser, Warren and Bright, Jonathan and others}, 262 | journal={Nature Methods}, 263 | pages={1--12}, 264 | year={2020}, 265 | publisher={Nature Publishing Group} 266 | } 267 | @article{meurer2017sympy, 268 | title={{SymPy}: symbolic computing in {Python}}, 269 | author={Meurer, Aaron and Smith, Christopher P and Paprocki, Mateusz and {\v{C}}ert{\'\i}k, Ond{\v{r}}ej and Kirpichev, Sergey B and Rocklin, Matthew and Kumar, AMiT and Ivanov, Sergiu and Moore, Jason K and Singh, Sartaj and others}, 270 | journal={{PeerJ} Computer Science}, 271 | volume={3}, 272 | pages={e103}, 273 | year={2017}, 274 | publisher={PeerJ Inc.} 275 | } 276 | @article{poore2015pythontex, 277 | title={{PythonTeX: reproducible documents with LaTeX, Python, and more}}, 278 | author={Poore, Geoffrey M}, 279 | journal={Computational Science \& Discovery}, 280 | volume={8}, 281 | number={1}, 282 | pages={014010}, 283 | year={2015}, 284 | publisher={IOP Publishing} 285 | } 286 | @article{fine2005tex, 287 | title={{TEX} forever!}, 288 | author={Fine, Jonathan}, 289 | journal={Proceedings EuroTEX}, 290 | pages={140--149}, 291 | year={2005} 292 | } 293 | @misc{Loisel2020, 294 | author = {S{\'e}bastien Loisel}, 295 | title = {Numerical Analysis {B} (lecture notes)}, 296 | year = {2020}, 297 | publisher={Heriot-Watt University} 298 | } 299 | @book{kernighan1977m4, 300 | title={The {M4} macro processor}, 301 | author={Kernighan, Brian W and Ritchie, Dennis M}, 302 | year={1977}, 303 | publisher={Bell Laboratories Murray Hill, NJ} 304 | } 305 | @article{cimpanu2018twelve, 306 | title={Twelve Malicious {P}ython Libraries Found and Removed from {PyPI}}, 307 | author={Cimpanu, Catalin}, 308 | year={2018}, 309 | journal={ZDNet}, 310 | month={October}, 311 | } 312 | @article{pakin2004perltex, 313 | title={PerlTEX: Defining LATEX macros using Perl}, 314 | author={Pakin, Scott}, 315 | journal={TUGboat}, 316 | volume={25}, 317 | number={2}, 318 | pages={150--159}, 319 | year={2004} 320 | } 321 | @misc{ehmsen, 322 | title={Python.sty}, 323 | author={Martin R. Ehmsen}, 324 | publisher={CTAN}, 325 | year={2012}, 326 | } 327 | @misc{drake2009sagetex, 328 | title={The {SageTEX} package}, 329 | author={Drake, Dan and others}, 330 | publisher={CTAN}, 331 | year={2009} 332 | } 333 | @book{molteno2014sympytex, 334 | title={SympyTeX: Embedding Symbolic Computation Into {LaTeX} Documents}, 335 | author={Molteno, Timothy Christopher Anthony}, 336 | year={2014}, 337 | publisher={Electronics Group, University of Otago} 338 | } 339 | @article{hagen2005luatex, 340 | title={{LuaTEX}: Howling to the moon}, 341 | author={Hagen, Hans}, 342 | journal={The Communications of the TEX Users Group}, 343 | pages={152}, 344 | year={2005} 345 | } 346 | @article{hepple1998, 347 | title={Writing {HTML} with m4}, 348 | author={Bob Hepple}, 349 | journal={Linux journal}, 350 | year={1998}, 351 | month={March}, 352 | } 353 | @article{ronacher2008jinja2, 354 | title={Jinja2 Documentation}, 355 | author={Ronacher, Armin}, 356 | journal={Welcome to Jinja2—Jinja2 Documentation (2.8-dev)}, 357 | year={2008} 358 | } 359 | @book{alchin2013pro, 360 | title={Pro Django}, 361 | author={Alchin, Marty and Kaplan-Moss, Jacob and Vilches, George}, 362 | year={2013}, 363 | publisher={Springer} 364 | } 365 | """) 366 | }}}} 367 | \end{document} 368 | -------------------------------------------------------------------------------- /pyptex/__init__.py: -------------------------------------------------------------------------------- 1 | r""" 2 | ## PypTeX: the Python Preprocessor for TeX 3 | 4 | ### Author: Sébastien Loisel 5 | 6 | PypTeX is the Python Preprocessor for LaTeX. It allows one to embed Python 7 | code fragments in a LaTeX template file. 8 | 9 | # Installation 10 | 11 | `pip install pyptex` 12 | 13 | 1. You will also need a LaTeX installation, and the default LaTeX processor is `pdflatex`. 14 | 2. You need a Python 3 installation. 15 | 16 | An example plot with PypTeX 17 | 18 | # Introduction 19 | 20 | Assume `example.tex` contains the following text: 21 | 22 | \documentclass{article} 23 | @{from sympy import *} 24 | \begin{document} 25 | $$\int x^3\,dx = @{S('integrate(x^3,x)')}+C$$ 26 | \end{document} 27 | 28 | The command `pyptex example.tex` will generate `example.pdf`, 29 | as well as the intermediary file `example.pyptex`. PypTeX works by extracting Python 30 | fragments in `example.tex` indicated by either `@{...}` or `@{{{...}}}` and substituting the 31 | corresponding outputs to produce `example.pyptex`, which is then compiled with 32 | `pdflatex example.pyptex`, although one can use any desired LaTeX processor in lieu of 33 | `pdflatex`. The intermediary file `example.pyptex` is pure LaTeX. 34 | 35 | When processing Python fragments, the global scope contains an object `pyp` that is a 36 | (weakref proxy for a) `pyptex.pyptex` object that makes available several helper functions 37 | and useful data. For example, `pyp.print("hello, world")` inserts the string `hello, world` 38 | into the generated `example.pyptex` file. 39 | 40 | * The `pyptex` executable tries to locate the Python 3 executable using `/usr/bin/env python3`. 41 | If this is causing you problems, try `python -u -m pyptex example.tex` instead. 42 | 43 | # Slightly bigger examples 44 | 45 | * 2d and 3d plotting [tex](examples/plots.tex) 46 | | 47 | [pdf](examples/plots.pdf) 48 | * Matrix inverse exercise [tex](examples/matrixinverse.tex) 49 | | 50 | [pdf](examples/matrixinverse.pdf) 51 | * The F19NB handout for numerical linear algebra at Heriot-Watt university is generated with PypTeX. [pdf](https://www.macs.hw.ac.uk/~sl398/notes.pdf) 52 | 53 | # Plotting with `sympy` and `matplotlib` 54 | 55 | PypTeX implements its own `matplotlib` backend, a thin wrapper around the built-in postscript backend. 56 | The PypTeX backend takes care of generating `.eps` files and importing them into your document via 57 | `\includegraphics`. In that scenario, you must do `\usepackage{graphicx}` in your LaTeX preamble. 58 | The precise "includegraphics" command can be set, e.g. by 59 | `pyp.includegraphics=r"\includegraphics[width=0.9\textwidth]{%s}"`. 60 | 61 | To create a plot with `sympy`, one can do: 62 | ```python 63 | sympy.plot(sympy.S('sin(x)+cos(pi*x)')) 64 | ``` 65 | At the end of each Python fragment `@{...}`, PypTeX saves each generated figure to a 66 | `x.eps` file, and these figures are then inserted via `includegraphics` into the generated 67 | `.tex` file. Once a figure has been auto-showed in this manner, it will not be 68 | auto-showed again. The auto-show behavior can be disabled by setting `pyp.autoshow = False`. 69 | Figures can also be displayed manually via `pyp.pp('{myfig})`. 70 | 71 | ```python 72 | plt.plot([1,2,3],[2,1,4]) 73 | ``` 74 | 75 | # Template preprocessing vs embedding 76 | 77 | PypTeX is a template preprocessor for LaTeX based on the Python language. When Python 78 | is embedded into LaTeX, Python code fragments are identified by LaTeX commands that use 79 | standard TeX notation, such as `\py{...}`. The code extraction is performed by TeX, then 80 | the code fragments are executed by Python, finally TeX is run again to merge the 81 | Python-generated LaTeX fragments back into the master file. 82 | 83 | By contrast, PypTeX is a preprocessor that extracts Python code fragments indicated by 84 | `@{...}` using regular expressions. Once the relevant Python outputs are collected, they 85 | are also inserted by regular expressions. LaTeX is only invoked once, on the final output. 86 | 87 | There may be specialized cases where Python embeddings are preferred, but we found 88 | that template preprocessing is superior to embedding. There are many reasons (that 89 | will be described elsewhere in detail) but we briefly mention the following reasons: 90 | 1. Embeddings can result in deadlock. If we have `\includegraphics{dog.png}`, but 91 | `dog.png` is generated by a Python fragment, the first run of LaTeX will fail because 92 | `dog.png` does not yet exist. Since LaTeX failed, it did not extract the Python fragments 93 | and we cannot run the Python code that would generate `dog.png` unless we temporarily 94 | delete the `\includegraphics{dog.png}` from `a.tex`. In our experience, deadlock 95 | occurs almost every time we edit our large `.tex` files. 96 | 2. Embedding makes debugging difficult. By contrast, PypTeX treats Python's debugger Pdb 97 | as a first-class citizen and everything should work as normal. Please let us know if some 98 | debugging task somehow fails for you. 99 | 3. Performance. Substituting using regular expressions is faster than running the 100 | LaTeX processor. 101 | 102 | # Pretty-printing template strings from Python with `pp` 103 | 104 | The function ```pyp.pp(X)``` pretty-prints the template string `X` with substitutions 105 | from the local scope of the caller. This is useful for medium length LaTeX fragments 106 | containing a few Python substitutions: 107 | ```python 108 | from sympy import * 109 | p = S('x^2-2*x+3') 110 | dpdx = p.diff(S('x')) 111 | pyp.print(pyp.pp('The minimum of $y=@p$ is at $x=@{solve(dpdx)[0]}$.')) 112 | ``` 113 | 114 | # Caching 115 | 116 | When compiling `a.tex`, PypTeX creates a cache file `a.pickle`. This file is 117 | automatically invalidated if the Python fragments in `a.tex` change, or if some 118 | other dependencies have changed. Dependencies can be declared from inside `a.tex` via 119 | `pyp.dep(...)`. Caching can be completely disabled with `pyp.disable_cache=True`, 120 | and users can delete `a.pickle` as necessary. 121 | 122 | # Scopes 123 | 124 | For each template file `a.tex`, `b.tex`, ... a private global scope is created for 125 | executing Python fragments. This means that Python fragments in `a.tex` cannot use 126 | functions or variables defined in `b.tex`, although shared functions could be 127 | implemented in a shared `c.py` Python module that is `import`ed into 128 | `a.tex` and `b.tex`. 129 | 130 | In particular, when does `pyp.input('b.tex')` from `a.tex`, the code in `b.tex` cannot 131 | use functions and data generated in `a.tex`. This means that `b.tex` is effectively 132 | a "compilation unit" whose semantics are essentially independent of `a.tex`. 133 | 134 | For any given `a.tex` file, its private global scope is initialized with the 135 | standard Python builtins and with a single `pyp` object, which is a `weakref.proxy` 136 | to the `pyptex('a.tex')` instance. We use a `weakref.proxy` because the global 137 | scope of `a.tex` is a `dict` stored in the (private) variable `pyp.__global__`. The 138 | use of `weakref.proxy` avoids creating a circular data structure that would otherwise 139 | stymie the Python garbage collector. For most purposes, this global `pyp` variable 140 | acts exactly like a concrete `pyptex` instance. 141 | 142 | # TeXShop 143 | 144 | If you want to use TeXShop on Mac, put the following into `~/Library/TeXShop/Engines/pyptex.engine` and restart TeXShop: 145 | ``` 146 | #!/bin/bash 147 | pyptex $1 148 | ``` 149 | """ 150 | 151 | from contextlib import suppress 152 | import datetime 153 | import glob 154 | import inspect 155 | import os 156 | import pickle 157 | import re 158 | import string 159 | import subprocess 160 | import shlex 161 | import sys 162 | import time 163 | import traceback 164 | import weakref 165 | import streamcapture 166 | import numpy 167 | import sympy 168 | import types 169 | import matplotlib 170 | import matplotlib.pyplot 171 | import matplotlib.artist 172 | from pathlib import Path 173 | from matplotlib.backend_bases import Gcf, FigureManagerBase 174 | from matplotlib.backends.backend_ps import FigureCanvasPS 175 | 176 | __pdoc__ = { 177 | 'pyptex.compile': False, 178 | 'pyptex.generateddir': False, 179 | 'pyptex.process': False, 180 | 'pyptex.resolvedeps': False, 181 | 'pyptex.run': False, 182 | 'FigureManager': False, 183 | 'FigureManager.show': False, 184 | } 185 | 186 | __pdoc__['pyptexNameSpace'] = False 187 | class pyptexNameSpace: 188 | def __init__(self,d): 189 | self.__dict__.update(d) 190 | def __str__(self): 191 | return fr'\input{{{self.pyp.pyptexfilename}}}' 192 | def __repr__(self): 193 | return repr(str(self)) 194 | def __eq__(self, other): 195 | if isinstance(self, pyptexNameSpace) and isinstance(other, pyptexNameSpace): 196 | return self.__dict__ == other.__dict__ 197 | return NotImplemented 198 | 199 | ###################################################################### 200 | # The stuff below makes pyptex into a matplotlib backend 201 | FigureCanvas = FigureCanvasPS 202 | 203 | class FigureManager(FigureManagerBase): 204 | def show(self, **kwargs): 205 | pass 206 | 207 | __pdoc__['show'] = False 208 | def show(*args, **kwargs): 209 | pass 210 | 211 | ppparser = re.compile(r"(@@)|@([a-zA-Z_][a-zA-Z0-9_]*)|@{([^{}}]*)}",re.DOTALL) 212 | pypparser = re.compile(r'((?" 216 | 217 | __pdoc__['format_my_nanos'] = False 218 | # Credit: abarnet on StackOverflow 219 | def format_my_nanos(nanos: int): 220 | """Convert nanoseconds to a human-readable format""" 221 | dt = datetime.datetime.fromtimestamp(nanos / 1e9) 222 | return '{}.{:09.0f}'.format(dt.strftime('%Y-%m-%d@%H:%M:%S'), nanos % 1e9) 223 | 224 | 225 | __pdoc__['dictdiff'] = False 226 | def dictdiff(A, B): 227 | A = set(A.items()) 228 | B = set(B.items()) 229 | D = A ^ B 230 | if len(D) == 0: 231 | return None 232 | return next(iter(D)) 233 | 234 | __pdoc__["filter_exception"] = False 235 | def filter_exception(e): 236 | global __stringtag__ 237 | tb = e.__traceback__ 238 | if tb is None: 239 | return e 240 | me = tb.tb_frame.f_code.co_filename 241 | while tb.tb_next is not None: 242 | code0 = tb.tb_frame.f_code 243 | code1 = tb.tb_next.tb_frame.f_code 244 | if code1.co_filename == me or code1.co_filename == __stringtag__: 245 | tb.tb_next = tb.tb_next.tb_next 246 | else: 247 | tb = tb.tb_next 248 | return e.with_traceback(e.__traceback__.tb_next) 249 | 250 | __pdoc__["__format_exception__"] = False 251 | def __format_exception__(e): # This is a workaround for broken things in Python 3.9 252 | return '\n'.join(traceback.TracebackException( 253 | type(e), e, e.__traceback__,limit=None,compact=True).format()) 254 | 255 | __pdoc__['exec_and_catch'] = False 256 | def exec_and_catch(cmd,glob,loc,filename,linecount,modes=[eval,exec]): 257 | for k in range(len(modes)): 258 | mode = modes[k] 259 | modename = 'exec' if mode==exec else 'eval' 260 | if k= 0: 470 | return '@' 471 | for k in [2,3]: 472 | if m.start(k) >= 0: 473 | return self.mylatex(eval(compile(m.group(k),__stringtag__,mode='eval'), 474 | foo.f_globals, foo.f_locals)) 475 | raise Exception("Tragic regular expression committed seppuku") 476 | 477 | return ppparser.sub(do_work, Z) 478 | 479 | def run(self, S, k): 480 | """An internal function for executing Python code.""" 481 | print(f'Executing Python code:\n{S}') 482 | glob_ = self.__globals__ 483 | doeval = False 484 | self.__accum__ = [] 485 | (ret,mode) = exec_and_catch( 486 | cmd=S,glob=glob_,loc=None, 487 | filename=self.texfilename,linecount=k 488 | ) 489 | if mode==eval: 490 | self.__accum__.append(ret) 491 | if self.autoshow: 492 | self.showall() 493 | print(f'Python result:\n{self.__accum__!s}') 494 | return self.__accum__ 495 | 496 | def print(self, *argv): 497 | """If `pyp` is an object of type `pyptex`, `pyp.print(X)` causes `X` to be converted 498 | to its latex representation and substituted into the `a.pyptex` output file. 499 | The conversion is given by `sympy.latex(X)`, except that `None` is converted 500 | to the empty string. 501 | 502 | Many values can be printed at once with the notation `pyp.print(X, Y, ...)`.""" 503 | for k in range(len(argv)): 504 | if isinstance(argv[k],matplotlib.pyplot.Figure): 505 | self.mylatex(argv[k]) 506 | self.__accum__.extend(argv) 507 | 508 | def cite(self,b): 509 | r"""If `pyp` is an object of type `pyptex`, then `pyp.cite(X)` adds the relevant 510 | entry to the bibTeX file and returns the entry name. Example usage: 511 | 512 | `\cite{@{{{pyp.cite(r"@article{seb97,title=Some title etc...}")}}}}` 513 | """ 514 | self.bibs.append(b) 515 | return bibentryname.match(b).group(1).strip() 516 | 517 | def process(self, S, runner, record_substitutions): 518 | """An internal helper function for parsing the input file.""" 519 | ln = numpy.cumsum(numpy.array(numpy.array(list(S), dtype='U1') == '\n', int)) 520 | ln = numpy.insert(ln, 0, 0) 521 | 522 | def do_work(m): 523 | if m.start(1) >= 0: 524 | return m.group(0) 525 | if m.start(2) >= 0: 526 | return '@' 527 | for k in [6,9]: 528 | if m.start(k) >= 0: 529 | z = m.group(k) 530 | z0 = m.start(k) 531 | z1 = m.end(k) 532 | o = m.group(k-1) or '' 533 | break 534 | self.lc += ln[z1] - ln[z0] + 1 535 | ret = runner(z, ln[z0], o) 536 | if record_substitutions: 537 | self.__substarts__.append(ln[m.start(0)]) 538 | self.__subends__.append(ln[m.end(0)]) 539 | return ret 540 | 541 | return pypparser.sub(do_work, S) 542 | 543 | __pdoc__['mylatex'] = False 544 | def mylatex(self, X): 545 | if X is None: 546 | return '' 547 | if isinstance(X, str): 548 | return X 549 | if isinstance(X,pyptexNameSpace): 550 | return str(X) 551 | if isinstance(X,matplotlib.pyplot.Figure): 552 | self.__setupfig__(X) 553 | print(X.__IG__) 554 | X.canvas.print_figure(X.__FIGNAME__) 555 | X.drawn = True 556 | return X.__IG__ 557 | if isinstance(X,self.__sympy_plot__): 558 | return "" 559 | if isinstance(X,matplotlib.artist.Artist): 560 | return "" 561 | if isinstance(X,list) and isinstance(X[0],matplotlib.artist.Artist): 562 | return "" 563 | return sympy.latex(X) 564 | 565 | def compile(self): 566 | """An internal function for compiling the input file.""" 567 | with open(self.texfilename, 'rt') as file: 568 | text = file.read() 569 | try: 570 | with open(self.cachefilename, 'rb') as file: 571 | cache = pickle.load(file) 572 | except Exception: 573 | cache = {} 574 | defaults = { 575 | 'fragments': [], 576 | 'outputs': [], 577 | 'deps': {}, 578 | 'argv': [], 579 | 'disable_cache': True, 580 | } 581 | for k, v in defaults.items(): 582 | if k not in cache: 583 | cache[k] = v 584 | self.fragments = [] 585 | 586 | def scanner(C, k, o): 587 | self.fragments.append(C) 588 | assert o in ['','verbatim'],"Invalid option: "+o 589 | return '' 590 | 591 | self.process(text, runner=scanner, record_substitutions=True) 592 | print(f'Found {self.lc!s} lines of Python.') 593 | saveddeps = self.deps 594 | self.deps = {} 595 | for k in cache['deps']: 596 | self.dep(k) 597 | self.resolvedeps() 598 | cached = True 599 | if cache['disable_cache']: 600 | print('disable_cache=True') 601 | cached = False 602 | elif cache['argv'] != self.argv: 603 | print('argv differs', self.argv, cache['argv']) 604 | cached = False 605 | elif cache['fragments'] != self.fragments: 606 | F1 = dict(enumerate(cache['fragments'])) 607 | F2 = dict(enumerate(self.fragments)) 608 | k = dictdiff(F1, F2)[0] 609 | print('Fragment #', k, 610 | '\nCached version:\n', F1[k] if k in F1 else None, 611 | '\nLive version:\n', F2[k] if k in F2 else None) 612 | cached = False 613 | elif self.deps != cache['deps']: 614 | F1 = cache['deps'] 615 | F2 = self.deps 616 | k = dictdiff(F1, F2)[0] 617 | print('Dependency mismatch', k, 618 | '\nCached version:\n', F1[k] if k in F1 else None, 619 | '\nLive version:\n', F2[k] if k in F2 else None) 620 | cached = False 621 | if cached: 622 | print('Using cached Python outputs') 623 | for k, v in cache.items(): 624 | self.__dict__[k] = v 625 | self.subcount = -1 626 | 627 | def subber(C, k, o): 628 | self.subcount += 1 629 | if(o==''): 630 | return self.outputs[self.subcount] 631 | if(o=='verbatim'): 632 | return C 633 | 634 | self.compiled = self.process(text, runner=subber, record_substitutions=False) 635 | else: 636 | print('Cache is invalidated.') 637 | self.deps = saveddeps 638 | self.outputs = [] 639 | 640 | def appender(C, k, o): 641 | result = self.run(C, k) 642 | self.outputs.append(''.join(map(self.mylatex, result))) 643 | if(o==''): 644 | return self.outputs[-1] 645 | if(o=='verbatim'): 646 | return C 647 | 648 | self.compiled = self.process(text, runner=appender, record_substitutions=False) 649 | sys.stdout.flush() 650 | if self.pyptexfilename: 651 | print(f'Saving to file: {self.pyptexfilename}') 652 | with open(self.pyptexfilename, 'wt') as file: 653 | file.write(self.compiled) 654 | self.resolvedeps() 655 | print(f'Dependencies are:\n{self.deps!s}') 656 | numlines = len(text.split('\n')) 657 | linemaps = [] 658 | prevline = 0 659 | for k in range(len(self.outputs)): 660 | linemaps.append(list(range(prevline+1,self.__substarts__[k]+1))) 661 | count = len(self.outputs[k].split('\n')) 662 | linemaps.append([self.__substarts__[k]]*(count-1)) 663 | prevline = self.__subends__[k] 664 | linemaps.append(list(range(prevline+1,numlines+1))) 665 | self.linemap = [str(x) for sublist in linemaps for x in sublist] 666 | print('Saving cache file', self.cachefilename) 667 | with open(self.cachefilename, 'wb') as file: 668 | cache = {} 669 | for k, v in self.__dict__.items(): 670 | if k[0:2] == '__' and k[-2:] == '__': 671 | pass 672 | elif callable(v): 673 | pass 674 | else: 675 | cache[k] = v 676 | pickle.dump(cache, file) 677 | if self.latexcommand: 678 | cmd = self.latexcommand.format(**self.__dict__) 679 | print(f'Running Latex command:\n{cmd}') 680 | self.exitcode = subprocess.Popen(shlex.split(cmd),close_fds=True).wait() 681 | 682 | def bib(self, bib=""): 683 | """A helper function for creating a `.bib` file. If `pyp=pyptex('a.tex')`, 684 | then `pyp.bib('''@book{knuth1984texbook, title={The {TEXbook}}, 685 | author={Knuth, Donald Ervin and Bibby, Duane}}''')` creates a file 686 | `a.bib` with the given text. This is just a convenience function 687 | that makes it easier to incorporate the bibtex file straight into the 688 | `a.tex` source. In `a.tex`, the typical way of using it is: 689 | `\\bibliography{@{{{pyp.bib("...")}}}}`. 690 | """ 691 | self.bibs.append(bib) 692 | with self.open(self.bibfilename, 'wt') as file: 693 | file.write("\n".join(self.bibs)) 694 | return self.filename 695 | 696 | def dep(self, filename): 697 | """If `pyp=pyptex('a.tex')`, then `pyp.dep(filename)` declares that the Python code 698 | in `a.tex` depends on the file designated by `filename`. When the object 699 | `pyptex('a.tex')` is constructed, the file `a.pickle` will be loaded (if it exists). 700 | `a.pickle` is a cache of the results of the Python calculations in `a.tex`. 701 | If the cache is deemed valid, the `pyptex` constructor does not rerun all 702 | the Python fragments in `a.tex` but instead uses the previously cached outputs. 703 | 704 | The cache is invalidated under the following scenarios: 705 | 1. The new Python fragments in `a.tex` are not identical to the cached fragments. 706 | 2. The "last modification" timestamp on dependencies is not the same as in the cache. 707 | 3. `pyp.disable_cache==True`. 708 | 709 | The list of dependencies defaults to only the `pyptex` executable. Additional 710 | dependencies can be manually declared via `pyp.dep(filename)`. 711 | 712 | For convenience, `pyp.dep(filename)` returns filename. 713 | """ 714 | self.deps[filename] = '' 715 | return filename 716 | 717 | def resolvedeps(self): 718 | """An internal function that actually computes the datestamps of dependencies.""" 719 | for k in self.deps: 720 | try: 721 | ds = format_my_nanos(os.stat(k).st_mtime_ns) 722 | except Exception: 723 | ds = '' 724 | self.deps[k] = ds 725 | 726 | def input(self, filename, argv=False): 727 | r"""If `pyp = pyptex('a.tex')` then 728 | `pyp.input('b.tex')` 729 | returns the string `\input{"b.pyptex"}`. The common way of using this is to 730 | put `@{pyp.input('b.tex')}` somewhere in `a.tex`. 731 | The function `pyp.input('b.tex')` internally calls the constructor 732 | `pyptex('b.tex')` so that `b.pyptex` is compiled from `b.tex`. 733 | 734 | Note that the two files `a.tex` and `b.tex` are "semantically isolated". All 735 | calculations, variables and functions defined in `a.tex` live in a global scope 736 | that is private to `a.tex`, much like each Python module has a private global 737 | scope. In a similar fashion, `b.tex` has its own private global scope. 738 | The global `pyp` objects in `a.tex` and `b.tex` are also different instances 739 | of the `pyptex` class. This is similar to the notion of "compilation units" in 740 | the C programming language. 741 | 742 | From `a.tex`, one can retrieve global variables of `b.tex` as follows. If 743 | `foo = pyp.input('b.tex')`, and if `b.tex` defines a global variable `x`, 744 | then it can be retrieved by `foo.x`. The `foo` variable is an instance of a 745 | `pyptexNameSpace` that contains the global scope of `b.tex`. This type has a 746 | custom string representation, so that `str(foo)` or `@{foo}` is 747 | `'\input{b.pyptex}'`. 748 | 749 | If one wishes to pass some parameters from `a.tex` to `b.tex`, one may use 750 | the notation `pyp.input('b.tex', argv)`, which will initialize the global 751 | `pyp` object of `b.tex` so that it contains the field `pyp.argv=argv`. 752 | """ 753 | ret = pyptex(filename, argv or self.argv, False) 754 | ret2 = pyptexNameSpace(ret.__globals__) 755 | return ret2 756 | 757 | def open(self, filename, *argv, **kwargs): 758 | """If pyp = pyptex('a.tex') then pyp.open(filename, ...) is a wrapper for 759 | the builtin function open(filename, ...) that further adds filename to 760 | the list of dependencies via pyp.dep(filename). 761 | """ 762 | self.dep(filename) 763 | return open(filename, *argv, **kwargs) 764 | 765 | 766 | class MyWriter(streamcapture.Writer): 767 | def __init__(self,stream): 768 | super(MyWriter, self).__init__(stream) 769 | self.last = b"" 770 | self.matcher = re.compile(b'([^:]*):([0-9]+): ') 771 | self.caches = {} 772 | def write_from(self,data,cap): 773 | foo = data.split(b"\n") 774 | n = len(foo) 775 | for k in range(n): 776 | bar = b"" if k>0 else self.last 777 | baz = self.matcher.match(bar+foo[k]) 778 | if baz: 779 | pyptexfile = baz.group(1).decode() 780 | basename = stripext.sub(lambda m: m.group(1),pyptexfile) 781 | picklefile = basename+'.pickle' 782 | if picklefile not in self.caches: 783 | try: 784 | with open(picklefile, 'rb') as file: 785 | self.caches[picklefile] = pickle.load(file) 786 | except Exception: 787 | self.caches[picklefile] = None 788 | cache = self.caches[picklefile] 789 | if cache is None: 790 | continue 791 | texfile = cache['texfilename'] 792 | pyptexlinenumber = int(baz.group(2).decode()) 793 | texlinenumber = cache['linemap'][pyptexlinenumber-1] 794 | foo[k] += (f"\n{texfile}:{texlinenumber}: PypTeX source file").encode() 795 | data = b"\n".join(foo) 796 | if n<2: 797 | self.last = b"" 798 | self.last += foo[n-1] 799 | self._write(data) 800 | os.write(cap.dup_fd,data) 801 | 802 | 803 | def pyptexmain(argv: list = None): 804 | """This function parses an input file a.tex to produce a.pyptex and a.pdf, by 805 | doing pyp = pyptex('a.tex', ...) object. The filename a.tex must be in argv[1]; 806 | if argv is not provided, it is taken from sys.argv. 807 | The default pyp.latexcommand invokes pdflatex and, if a.bib is present, also bibtex. 808 | If an exception occurs, pdb is automatically invoked in postmortem mode. 809 | If "--pdb=no" is in argv, it is removed from argv and automatic pdb postmortem is disabled. 810 | If "--pdb=yes" is in argv, automatic pdb postmortem is enabled. This is the default. 811 | """ 812 | argv = argv or sys.argv 813 | dopdb = True 814 | with suppress(Exception): 815 | argv.remove('--pdb=no') 816 | dopdb = False 817 | with suppress(Exception): 818 | argv.remove('--pdb=yes') 819 | dopdb = True 820 | if len(argv) < 2: 821 | print('Usage: pyptex ...') 822 | sys.exit(1) 823 | writer = MyWriter(open(f'{os.path.splitext(argv[1])[0]}.pyplog','wb')) 824 | # writer = streamcapture.Writer(open(f'{os.path.splitext(argv[1])[0]}.pyplog','wb'),2) 825 | with streamcapture.StreamCapture(sys.stdout,writer,echo=False), streamcapture.StreamCapture(sys.stderr,writer,echo=False): 826 | try: 827 | pyp = pyptex(argv[1], argv[2:], 828 | latexcommand=r'{latex} {pyptexfilename} && (test ! -f {bibfilename} || bibtex {auxfilename})', 829 | ) 830 | except Exception as e: 831 | import pdb 832 | e = filter_exception(e) 833 | print(__format_exception__(e)) 834 | # print('\n'.join(traceback.TracebackException(exc_type=type(foo), exc_value=foo, exc_traceback=foo.__traceback__).format())) 835 | if e.__traceback__ is not None and dopdb: 836 | print('A Python error has occurred. Launching the debugger pdb.\n' 837 | "Type 'help' for a list of commands, and 'quit' when done.") 838 | pdb.post_mortem(e.__traceback__) 839 | sys.exit(1) 840 | return pyp.exitcode 841 | --------------------------------------------------------------------------------