├── main.pdf
├── main.tex
└── latexrun


/main.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattjj/ode-diff-notes/HEAD/main.pdf


--------------------------------------------------------------------------------
/main.tex:
--------------------------------------------------------------------------------
  1 | \documentclass{article}
  2 | \usepackage{amsmath}
  3 | \usepackage{amssymb}
  4 | 
  5 | \begin{document}
  6 | \section{Jacobian-vector products}
  7 | Consider the parameterized ODE initial value problem
  8 | \begin{equation}
  9 |     \dot y = f(t, y, a), \qquad y(0, a) = y_0(a),
 10 | \end{equation}
 11 | by which we mean
 12 | \begin{equation}
 13 |     \partial_0 y(t, a)[1] = f(t, y(t, a), a), \qquad y(0, a) = y_0(a),
 14 | \end{equation}
 15 | for all $t$ and $a$ in some domains.
 16 | We want to understand how the solution to the ODE changes (e.g. at particular
 17 | values of $t$) for small perturbations of $a$. That is, we want to be able to
 18 | compute the Jacobian-vector product
 19 | \begin{equation}
 20 |     (a, v) \mapsto \partial_1 y(t, a)[v]
 21 | \end{equation}
 22 | at any particular values of $t$ and $a$, where $v$ can be interpreted as a
 23 | small perturbation to the value of $a$.
 24 | 
 25 | Since the ODE holds true for all values of $a$ (or at least those close to a
 26 | particular $a_0$ in which we are interested), we can view both sides as
 27 | functions of $a$, and assuming differentiability we can differentiate both
 28 | sides with respect to $a$ to find a new equation that must be satisfied:
 29 | \begin{equation}
 30 |     \partial_1 (\, (t, a) \mapsto \partial_0 y(t, a)[1] \,) = \partial_2 f(t, y(t, a), a) + \partial_1 f(t, y(t, a), a) \circ \partial_1 y(t, a).
 31 | \end{equation}
 32 | Applying both sides to a particular perturbation vector $v$ and using the fact
 33 | that partial derivaives commute, we have
 34 | \begin{equation}
 35 |     \partial_0 (\, (t, a) \mapsto \partial_1 y(t, a)[v] \,)[1] = \partial_2 f(t, y(t, a), a)[v] + \partial_1 f(t, y(t, a), a) [ \, \partial_1 y(t, a)[v] \,].
 36 |     \notag
 37 | \end{equation}
 38 | We can identify $z(t, a) \triangleq \partial_1 y(t, a)[v]$ as a new state
 39 | vector to write a joint ODE system
 40 | \begin{gather}
 41 |     \begin{bmatrix}
 42 |         \dot y \\
 43 |         \dot z
 44 |     \end{bmatrix}
 45 |     =
 46 |     \begin{bmatrix}
 47 |         f(t, y, a) \\
 48 |         g(t, y, z, a)
 49 |     \end{bmatrix},
 50 |     \qquad
 51 |     \begin{bmatrix}
 52 |         y(0, a) \\
 53 |         z(0, a)
 54 |     \end{bmatrix}
 55 |     =
 56 |     \begin{bmatrix}
 57 |         y_0(a) \\
 58 |         \partial y_0(a)[v]
 59 |     \end{bmatrix},
 60 |     \\
 61 |     g(t, y, z, a) = \partial_1 f(t, y, a)[z] + \partial_2 f(t, y, a)[v].
 62 | \end{gather}
 63 | Notice that the dynamics on the $z$ component are linear/affine in $z$ (and $v$!).
 64 | 
 65 | \section{Transposing linear ODEs}
 66 | Consider the parameterized linear ODE IVP
 67 | \begin{equation}
 68 |     \dot z(t) = A(t) z(t) + B(t) v, \qquad z(0) = C v,
 69 |     \label{eq:linear_ode}
 70 | \end{equation}
 71 | as a function of $v$.
 72 | The implicit mapping $\mathcal{T}_1: v \mapsto z$ is linear, and so for any
 73 | linear functional on solution functions $\mathcal{T}_2 : z \mapsto \mathbb{R}$
 74 | there is linear function on perturbations $v$ defined by $\mathcal{T}_2 \circ
 75 | \mathcal{T}_1 : v \mapsto \mathbb{R}$. Given a representer for such a linear
 76 | functional $\mathcal{T}_2$, we wish to find an explicit representer vector for
 77 | $\mathcal{T}_2 \circ \mathcal{T}_1$.
 78 | 
 79 | % Concretely, consider linear functionals of the form
 80 | % \begin{equation}
 81 | %     \mathcal{T}_2[z] = w_T^\mathsf{T} z(T) + \int_0^T w(t)^\mathsf{T} z(t) \, \mathrm{d}t.
 82 | %     \label{eq:functional}
 83 | % \end{equation}
 84 | % We wish to find a function $\lambda(t)$ such that
 85 | % \begin{equation}
 86 | %     w_T^\mathsf{T} z(T) + \int_0^T w(t)^\mathsf{T} z(t) \, \mathrm{d}t
 87 | %     =
 88 | %     \lambda(0)^\mathsf{T} C v + \int_0^T \lambda(t)^\mathsf{T} B(t)[v] \, \mathrm{d}t,
 89 | % \end{equation}
 90 | % for all $v$, when $z$ is a solution to~\eqref{eq:linear_ode}.
 91 | 
 92 | Consider first the special case when $B \equiv 0$, so that we have the ODE
 93 | \begin{equation}
 94 |     \dot z(t) = A(t) z(t), \qquad z(0) = C v.
 95 |     \label{eq:linear_ode_simple}
 96 | \end{equation}
 97 | Moreover consider the special case of a weighted evaluation functional
 98 | \begin{equation}
 99 |     \mathcal{T}_2[z] = w^\mathsf{T} z(T).
100 |     \label{eq:evaluation_functional}
101 | \end{equation}
102 | We wish to find a representer vector $\lambda$ such that
103 | \begin{equation}
104 | \lambda^\mathsf{T} z(0) = w^\mathsf{T} z(T).
105 | \end{equation}
106 | Since the particular time $t=0$ is arbitrary, a more general problem would be
107 | to find a function $\lambda(t)$ such that
108 | \begin{equation}
109 |     \lambda(t)^\mathsf{T} z(t) = w^\mathsf{T} z(T)
110 |     \label{eq:representer_simple}
111 | \end{equation}
112 | for all times $t$.
113 | That is, we can fix $\lambda(T) = w$ and ensure that the value of
114 | $\lambda(t)^\mathsf{T} z(t)$ does not change with time:
115 | \begin{align}
116 |     0 &= \partial (\, t \mapsto \lambda(t)^\mathsf{T} z(t) \,)
117 |     = \dot \lambda(t)^\mathsf{T} z(t) + \lambda(t)^\mathsf{T} \dot z(t)
118 |     \\
119 |     &= \dot \lambda(t)^\mathsf{T} z(t) + \lambda(t)^\mathsf{T} A(t) z(t),
120 | \end{align}
121 | where on the last line we have used the ODE~\eqref{eq:linear_ode_simple}.
122 | To satisfy this equation for all $t$ and arbitrary solutions $z(t)$, we can
123 | choose
124 | \begin{equation}
125 |     \dot \lambda(t) = - A(t)^\mathsf{T} \lambda(t), \qquad \lambda(T) = w.
126 |     \label{eq:adjoint_ode_simple}
127 | \end{equation}
128 | This gives us a means of computing a representer for the linear functional
129 | $\mathcal{T}_2 \circ \mathcal{T}_1$ by solving the ODE
130 | IVP~\eqref{eq:adjoint_ode_simple}, integrating backward in time from $t=T$ to
131 | $t=0$ to compute $\lambda(0)$.
132 | Notice that by linearity we can handle a functional that is a linear
133 | combination of such weighted evaluation functionals, say at times $0 < T_1 <
134 | T_2$, by pulling back the functional at time $t=T_2$ to a representer at time
135 | $t=T_1$ and summing before pulling back the sum to $t=0$.
136 | 
137 | We can follow a similar argument when $B \not \equiv 0$. For a $z$
138 | solving~\eqref{eq:linear_ode} and a linear functional of the
139 | form~\eqref{eq:evaluation_functional}, we can seek a function $\lambda(t)$
140 | that represents the linear function by satisfying
141 | \begin{equation}
142 |     w^\mathsf{T} z(T) = \lambda(t)^\mathsf{T} z(t) - \int_T^t \lambda(\tau)^\mathsf{T} B(\tau) v \, \mathrm{d} \tau,
143 |     \label{eq:representer}
144 | \end{equation}
145 | for all times $t$. In particular, at time $t=0$ we would have
146 | \begin{equation}
147 |     w^\mathsf{T} z(T) = \lambda(0)^\mathsf{T} z(0) - \int_T^0 \lambda(\tau)^\mathsf{T} B(\tau) v \, \mathrm{d} \tau,
148 | \end{equation}
149 | which gives us a representer of $\mathcal{T}_2 \circ \mathcal{T}_1$, namely as
150 | \begin{equation}
151 |     (\mathcal{T}_2 \circ \mathcal{T}_1)[v] = u^\mathsf{T} v
152 |     \quad \text{where} \quad
153 |     u = \lambda(0) - \int_T^0 B(\tau)^\mathsf{T} \lambda(\tau) \, \mathrm{d} \tau.
154 |     \label{eq:representer2}
155 | \end{equation}
156 | We can find an ODE which $\lambda(t)$ must satisfy by
157 | differentiating both sides of~\eqref{eq:representer} with respect to time:
158 | \begin{align}
159 |     0 &= \lambda(t)^\mathsf{T} \dot z(t) + \dot \lambda(t)^\mathsf{T} z(t) - \lambda(t)^\mathsf{T} B(t) v
160 |     \\
161 |     &=
162 |     \lambda(t)^\mathsf{T} \left( A(t) z(t) + B(t) v \right) + \dot \lambda(t)^\mathsf{T} z(t) - \lambda(t)^\mathsf{T} B(t) v
163 |     \\
164 |     &= \lambda(t)^\mathsf{T} A(t) z(t) + \dot \lambda(t)^\mathsf{T} z(t),
165 | \end{align}
166 | and so as before $\lambda(t)$ must satisfy the linear ODE IVP
167 | \begin{equation}
168 |     \dot \lambda(t) = -A(t)^\mathsf{T} \lambda(t), \qquad \lambda(T) = w.
169 | \end{equation}
170 | To compute the integral in~\eqref{eq:representer2}, we can augment the ODE
171 | system to
172 | \begin{equation}
173 |     \begin{bmatrix}
174 |         \dot \lambda \\
175 |         \dot \omega
176 |     \end{bmatrix}
177 |     =
178 |     \begin{bmatrix}
179 |         - A(t)^\mathsf{T} \lambda(t)
180 |         \\
181 |         B(t)^\mathsf{T} \lambda(t)
182 |     \end{bmatrix},
183 |     \qquad
184 |     \begin{bmatrix}
185 |         \lambda(T) \\
186 |         \omega(T)
187 |     \end{bmatrix}
188 |     =
189 |     \begin{bmatrix}
190 |         w \\
191 |         0
192 |     \end{bmatrix}.
193 | \end{equation}
194 | 
195 | \end{document}
196 | 


--------------------------------------------------------------------------------
/latexrun:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env python3
   2 | 
   3 | # Copyright (c) 2013, 2014 Austin Clements
   4 | 
   5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
   6 | # of this software and associated documentation files (the "Software"), to deal
   7 | # in the Software without restriction, including without limitation the rights
   8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   9 | # copies of the Software, and to permit persons to whom the Software is
  10 | # furnished to do so, subject to the following conditions:
  11 | 
  12 | # The above copyright notice and this permission notice shall be included in
  13 | # all copies or substantial portions of the Software.
  14 | 
  15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  21 | # THE SOFTWARE.
  22 | 
  23 | import sys
  24 | import os
  25 | import errno
  26 | import argparse
  27 | import shlex
  28 | import json
  29 | import subprocess
  30 | import re
  31 | import collections
  32 | import hashlib
  33 | import shutil
  34 | import curses
  35 | import filecmp
  36 | import io
  37 | import traceback
  38 | import time
  39 | 
  40 | try:
  41 |     import fcntl
  42 | except ImportError:
  43 |     # Non-UNIX platform
  44 |     fcntl = None
  45 | 
  46 | def debug(string, *args):
  47 |     if debug.enabled:
  48 |         print(string.format(*args), file=sys.stderr)
  49 | debug.enabled = False
  50 | 
  51 | def debug_exc():
  52 |     if debug.enabled:
  53 |         traceback.print_exc()
  54 | 
  55 | def main():
  56 |     # Parse command-line
  57 |     arg_parser = argparse.ArgumentParser(
  58 |         description='''A 21st century LaTeX wrapper,
  59 |         %(prog)s runs latex (and bibtex) the right number of times so you
  60 |         don't have to,
  61 |         strips the log spew to make errors visible,
  62 |         and plays well with standard build tools.''')
  63 |     arg_parser.add_argument(
  64 |         '-o', metavar='FILE', dest='output', default=None,
  65 |         help='Output file name (default: derived from input file)')
  66 |     arg_parser.add_argument(
  67 |         '--latex-cmd', metavar='CMD', default='pdflatex',
  68 |         help='Latex command (default: %(default)s)')
  69 |     arg_parser.add_argument(
  70 |         '--latex-args', metavar='ARGS', type=arg_parser_shlex,
  71 |         help='Additional command-line arguments for latex.'
  72 |         ' This will be parsed and split using POSIX shell rules.')
  73 |     arg_parser.add_argument(
  74 |         '--bibtex-cmd', metavar='CMD', default='bibtex',
  75 |         help='Bibtex command (default: %(default)s)')
  76 |     arg_parser.add_argument(
  77 |         '--bibtex-args', metavar='ARGS', type=arg_parser_shlex,
  78 |         help='Additional command-line arguments for bibtex')
  79 |     arg_parser.add_argument(
  80 |         '--max-iterations', metavar='N', type=int, default=10,
  81 |         help='Max number of times to run latex before giving up'
  82 |         ' (default: %(default)s)')
  83 |     arg_parser.add_argument(
  84 |         '-W', metavar='(no-)CLASS',
  85 |         action=ArgParserWarnAction, dest='nowarns', default=set(['underfull']),
  86 |         help='Enable/disable warning from CLASS, which can be any package name, '
  87 |         'LaTeX warning class (e.g., font), bad box type '
  88 |         '(underfull, overfull, loose, tight), or "all"')
  89 |     arg_parser.add_argument(
  90 |         '-O', metavar='DIR', dest='obj_dir', default='latex.out',
  91 |         help='Directory for intermediate files and control database '
  92 |         '(default: %(default)s)')
  93 |     arg_parser.add_argument(
  94 |         '--color', choices=('auto', 'always', 'never'), default='auto',
  95 |         help='When to colorize messages')
  96 |     arg_parser.add_argument(
  97 |         '--verbose-cmds', action='store_true', default=False,
  98 |         help='Print commands as they are executed')
  99 |     arg_parser.add_argument(
 100 |         '--debug', action='store_true',
 101 |         help='Enable detailed debug output')
 102 |     actions = arg_parser.add_argument_group('actions')
 103 |     actions.add_argument(
 104 |         '--clean-all', action='store_true', help='Delete output files')
 105 |     actions.add_argument(
 106 |         'file', nargs='?', help='.tex file to compile')
 107 |     args = arg_parser.parse_args()
 108 |     if not any([args.clean_all, args.file]):
 109 |         arg_parser.error('at least one action is required')
 110 |     args.latex_args = args.latex_args or []
 111 |     args.bibtex_args = args.bibtex_args or []
 112 | 
 113 |     verbose_cmd.enabled = args.verbose_cmds
 114 |     debug.enabled = args.debug
 115 | 
 116 |     # A note about encodings: POSIX encoding is a mess; TeX encoding
 117 |     # is a disaster.  Our goal is to make things no worse, so we want
 118 |     # byte-accurate round-tripping of TeX messages.  Since TeX
 119 |     # messages are *basically* text, we use strings and
 120 |     # surrogateescape'ing for both input and output.  I'm not fond of
 121 |     # setting surrogateescape globally, but it's far easier than
 122 |     # dealing with every place we pass TeX output through.
 123 |     # Conveniently, JSON can round-trip surrogateescape'd strings, so
 124 |     # our control database doesn't need special handling.
 125 |     sys.stdout = io.TextIOWrapper(
 126 |         sys.stdout.buffer, encoding=sys.stdout.encoding,
 127 |         errors='surrogateescape', line_buffering=sys.stdout.line_buffering)
 128 |     sys.stderr = io.TextIOWrapper(
 129 |         sys.stderr.buffer, encoding=sys.stderr.encoding,
 130 |         errors='surrogateescape', line_buffering=sys.stderr.line_buffering)
 131 | 
 132 |     Message.setup_color(args.color)
 133 | 
 134 |     # Open control database.
 135 |     dbpath = os.path.join(args.obj_dir, '.latexrun.db')
 136 |     if not os.path.exists(dbpath) and os.path.exists('.latexrun.db'):
 137 |         # The control database used to live in the source directory.
 138 |         # Support this for backwards compatibility.
 139 |         dbpath = '.latexrun.db'
 140 |     try:
 141 |         db = DB(dbpath)
 142 |     except (ValueError, OSError) as e:
 143 |         print('error opening {}: {}'.format(e.filename if hasattr(e, 'filename')
 144 |                                             else dbpath, e),
 145 |               file=sys.stderr)
 146 |         debug_exc()
 147 |         sys.exit(1)
 148 | 
 149 |     # Clean
 150 |     if args.clean_all:
 151 |         try:
 152 |             db.do_clean(args.obj_dir)
 153 |         except OSError as e:
 154 |             print(e, file=sys.stderr)
 155 |             debug_exc()
 156 |             sys.exit(1)
 157 | 
 158 |     # Build
 159 |     if not args.file:
 160 |         return
 161 |     task_commit = None
 162 |     try:
 163 |         task_latex = LaTeX(db, args.file, args.latex_cmd, args.latex_args,
 164 |                            args.obj_dir, args.nowarns)
 165 |         task_commit = LaTeXCommit(db, task_latex, args.output)
 166 |         task_bibtex = BibTeX(db, task_latex, args.bibtex_cmd, args.bibtex_args,
 167 |                              args.nowarns, args.obj_dir)
 168 |         tasks = [task_latex, task_commit, task_bibtex]
 169 |         stable = run_tasks(tasks, args.max_iterations)
 170 | 
 171 |         # Print final task output and gather exit status
 172 |         status = 0
 173 |         for task in tasks:
 174 |             status = max(task.report(), status)
 175 | 
 176 |         if not stable:
 177 |             print('error: files are still changing after {} iterations; giving up'
 178 |                   .format(args.max_iterations), file=sys.stderr)
 179 |             status = max(status, 1)
 180 |     except TaskError as e:
 181 |         print(str(e), file=sys.stderr)
 182 |         debug_exc()
 183 |         status = 1
 184 | 
 185 |     # Report final status, if interesting
 186 |     fstatus = 'There were errors' if task_commit is None else task_commit.status
 187 |     if fstatus:
 188 |         output = args.output
 189 |         if output is None:
 190 |             if task_latex.get_outname() is not None:
 191 |                 output = os.path.basename(task_latex.get_outname())
 192 |             else:
 193 |                 output = 'output'
 194 |         if Message._color:
 195 |             terminfo.send('bold', ('setaf', 1))
 196 |         print('{}; {} not updated'.format(fstatus, output))
 197 |         if Message._color:
 198 |             terminfo.send('sgr0')
 199 |     sys.exit(status)
 200 | 
 201 | def arg_parser_shlex(string):
 202 |     """Argument parser for shell token lists."""
 203 |     try:
 204 |         return shlex.split(string)
 205 |     except ValueError as e:
 206 |         raise argparse.ArgumentTypeError(str(e)) from None
 207 | 
 208 | class ArgParserWarnAction(argparse.Action):
 209 |     def __call__(self, parser, namespace, value, option_string=None):
 210 |         nowarn = getattr(namespace, self.dest)
 211 |         if value == 'all':
 212 |             nowarn.clear()
 213 |         elif value.startswith('no-'):
 214 |             nowarn.add(value[3:])
 215 |         else:
 216 |             nowarn.discard(value)
 217 |         setattr(namespace, self.dest, nowarn)
 218 | 
 219 | def verbose_cmd(args, cwd=None, env=None):
 220 |     if verbose_cmd.enabled:
 221 |         cmd = ' '.join(map(shlex.quote, args))
 222 |         if cwd is not None:
 223 |             cmd = '(cd {} && {})'.format(shlex.quote(cwd), cmd)
 224 |         if env is not None:
 225 |             for k, v in env.items():
 226 |                 if os.environ.get(k) != v:
 227 |                     cmd = '{}={} {}'.format(k, shlex.quote(v), cmd)
 228 |         print(cmd, file=sys.stderr)
 229 | verbose_cmd.enabled = False
 230 | 
 231 | def mkdir_p(path):
 232 |     try:
 233 |         os.makedirs(path)
 234 |     except OSError as exc:
 235 |         if exc.errno == errno.EEXIST and os.path.isdir(path):
 236 |             pass
 237 |         else: raise
 238 | 
 239 | class DB:
 240 |     """A latexrun control database."""
 241 | 
 242 |     _VERSION = 'latexrun-db-v2'
 243 | 
 244 |     def __init__(self, filename):
 245 |         self.__filename = filename
 246 | 
 247 |         # Make sure database directory exists
 248 |         if os.path.dirname(self.__filename):
 249 |             os.makedirs(os.path.dirname(self.__filename), exist_ok=True)
 250 | 
 251 |         # Lock the database if possible. We don't release this lock
 252 |         # until the process exits.
 253 |         lockpath = self.__filename + '.lock'
 254 |         if fcntl is not None:
 255 |             lockfd = os.open(lockpath, os.O_CREAT|os.O_WRONLY|os.O_CLOEXEC, 0o666)
 256 |             # Note that this is actually an fcntl lock, not a lockf
 257 |             # lock. Don't be fooled.
 258 |             fcntl.lockf(lockfd, fcntl.LOCK_EX, 1)
 259 | 
 260 |         try:
 261 |             fp = open(filename, 'r')
 262 |         except FileNotFoundError:
 263 |             debug('creating new database')
 264 |             self.__val = {'version': DB._VERSION}
 265 |         else:
 266 |             debug('loading database')
 267 |             self.__val = json.load(fp)
 268 |             if 'version' not in self.__val:
 269 |                 raise ValueError('file exists, but does not appear to be a latexrun database'.format(filename))
 270 |             if self.__val['version'] != DB._VERSION:
 271 |                 raise ValueError('unknown database version {!r}'
 272 |                                  .format(self.__val['version']))
 273 | 
 274 |     def commit(self):
 275 |         debug('committing database')
 276 |         # Atomically commit database
 277 |         tmp_filename = self.__filename + '.tmp'
 278 |         with open(tmp_filename, 'w') as fp:
 279 |             json.dump(self.__val, fp, indent=2, separators=(',', ': '))
 280 |             fp.flush()
 281 |             os.fsync(fp.fileno())
 282 |         os.rename(tmp_filename, self.__filename)
 283 | 
 284 |     def get_summary(self, task_id):
 285 |         """Return the recorded summary for the given task or None."""
 286 |         return self.__val.get('tasks', {}).get(task_id)
 287 | 
 288 |     def set_summary(self, task_id, summary):
 289 |         """Set the summary for the given task."""
 290 |         self.__val.setdefault('tasks', {})[task_id] = summary
 291 | 
 292 |     def add_clean(self, filename):
 293 |         """Add an output file to be cleaned.
 294 | 
 295 |         Unlike the output files recorded in the task summaries,
 296 |         cleanable files strictly accumulate until a clean is
 297 |         performed.
 298 |         """
 299 |         self.__val.setdefault('clean', {})[filename] = hash_cache.get(filename)
 300 | 
 301 |     def do_clean(self, obj_dir=None):
 302 |         """Remove output files and delete database.
 303 | 
 304 |         If obj_dir is not None and it is empty after all files are
 305 |         removed, it will also be removed.
 306 |         """
 307 | 
 308 |         for f, want_hash in self.__val.get('clean', {}).items():
 309 |             have_hash = hash_cache.get(f)
 310 |             if have_hash is not None:
 311 |                 if want_hash == have_hash:
 312 |                     debug('unlinking {}', f)
 313 |                     hash_cache.invalidate(f)
 314 |                     os.unlink(f)
 315 |                 else:
 316 |                     print('warning: {} has changed; not removing'.format(f),
 317 |                           file=sys.stderr)
 318 |         self.__val = {'version': DB._VERSION}
 319 |         try:
 320 |             os.unlink(self.__filename)
 321 |         except FileNotFoundError:
 322 |             pass
 323 |         if obj_dir is not None:
 324 |             try:
 325 |                 os.rmdir(obj_dir)
 326 |             except OSError:
 327 |                 pass
 328 | 
 329 | class HashCache:
 330 |     """Cache of file hashes.
 331 | 
 332 |     As latexrun reaches fixed-point, it hashes the same files over and
 333 |     over, many of which never change.  Since hashing is somewhat
 334 |     expensive, we keep a simple cache of these hashes.
 335 |     """
 336 | 
 337 |     def __init__(self):
 338 |         self.__cache = {}
 339 | 
 340 |     def get(self, filename):
 341 |         """Return the hash of filename, or * if it was clobbered."""
 342 |         try:
 343 |             with open(filename, 'rb') as fp:
 344 |                 st = os.fstat(fp.fileno())
 345 |                 key = (st.st_dev, st.st_ino)
 346 |                 if key in self.__cache:
 347 |                     return self.__cache[key]
 348 | 
 349 |                 debug('hashing {}', filename)
 350 |                 h = hashlib.sha256()
 351 |                 while True:
 352 |                     block = fp.read(256*1024)
 353 |                     if not len(block):
 354 |                         break
 355 |                     h.update(block)
 356 |                 self.__cache[key] = h.hexdigest()
 357 |                 return self.__cache[key]
 358 |         except (FileNotFoundError, IsADirectoryError):
 359 |             return None
 360 | 
 361 |     def clobber(self, filename):
 362 |         """If filename's hash is not known, record an invalid hash.
 363 | 
 364 |         This can be used when filename was overwritten before we were
 365 |         necessarily able to obtain its hash.  filename must exist.
 366 |         """
 367 |         st = os.stat(filename)
 368 |         key = (st.st_dev, st.st_ino)
 369 |         if key not in self.__cache:
 370 |             self.__cache[key] = '*'
 371 | 
 372 |     def invalidate(self, filename):
 373 |         try:
 374 |             st = os.stat(filename)
 375 |         except OSError as e:
 376 |             # Pessimistically wipe the whole cache
 377 |             debug('wiping hash cache ({})', e)
 378 |             self.__cache.clear()
 379 |         else:
 380 |             key = (st.st_dev, st.st_ino)
 381 |             if key in self.__cache:
 382 |                 del self.__cache[key]
 383 | hash_cache = HashCache()
 384 | 
 385 | class _Terminfo:
 386 |     def __init__(self):
 387 |         self.__tty = os.isatty(sys.stdout.fileno())
 388 |         if self.__tty:
 389 |             curses.setupterm()
 390 |         self.__ti = {}
 391 | 
 392 |     def __ensure(self, cap):
 393 |         if cap not in self.__ti:
 394 |             if not self.__tty:
 395 |                 string = None
 396 |             else:
 397 |                 string = curses.tigetstr(cap)
 398 |                 if string is None or b'$<' in string:
 399 |                     # Don't have this capability or it has a pause
 400 |                     string = None
 401 |             self.__ti[cap] = string
 402 |         return self.__ti[cap]
 403 | 
 404 |     def has(self, *caps):
 405 |         return all(self.__ensure(cap) is not None for cap in caps)
 406 | 
 407 |     def send(self, *caps):
 408 |         # Flush TextIOWrapper to the binary IO buffer
 409 |         sys.stdout.flush()
 410 |         for cap in caps:
 411 |             # We should use curses.putp here, but it's broken in
 412 |             # Python3 because it writes directly to C's buffered
 413 |             # stdout and there's no way to flush that.
 414 |             if isinstance(cap, tuple):
 415 |                 s = curses.tparm(self.__ensure(cap[0]), *cap[1:])
 416 |             else:
 417 |                 s = self.__ensure(cap)
 418 |             sys.stdout.buffer.write(s)
 419 | terminfo = _Terminfo()
 420 | 
 421 | class Progress:
 422 |     _enabled = None
 423 | 
 424 |     def __init__(self, prefix):
 425 |         self.__prefix = prefix
 426 |         if Progress._enabled is None:
 427 |             Progress._enabled = (not debug.enabled) and \
 428 |                                 terminfo.has('cr', 'el', 'rmam', 'smam')
 429 | 
 430 |     def __enter__(self):
 431 |         self.last = ''
 432 |         self.update('')
 433 |         return self
 434 | 
 435 |     def __exit__(self, typ, value, traceback):
 436 |         if Progress._enabled:
 437 |             # Beginning of line and clear
 438 |             terminfo.send('cr', 'el')
 439 |             sys.stdout.flush()
 440 | 
 441 |     def update(self, msg):
 442 |         if not Progress._enabled:
 443 |             return
 444 |         out = '[' + self.__prefix + ']'
 445 |         if msg:
 446 |             out += ' ' + msg
 447 |         if out != self.last:
 448 |             # Beginning of line, clear line, disable wrap
 449 |             terminfo.send('cr', 'el', 'rmam')
 450 |             sys.stdout.write(out)
 451 |             # Enable wrap
 452 |             terminfo.send('smam')
 453 |             self.last = out
 454 |             sys.stdout.flush()
 455 | 
 456 | class Message(collections.namedtuple(
 457 |         'Message', 'typ filename lineno msg')):
 458 |     def emit(self):
 459 |         if self.filename:
 460 |             if self.filename.startswith('./'):
 461 |                 finfo = self.filename[2:]
 462 |             else:
 463 |                 finfo = self.filename
 464 |         else:
 465 |             finfo = '<no file>'
 466 |         if self.lineno is not None:
 467 |             finfo += ':' + str(self.lineno)
 468 |         finfo += ': '
 469 |         if self._color:
 470 |             terminfo.send('bold')
 471 |         sys.stdout.write(finfo)
 472 | 
 473 |         if self.typ != 'info':
 474 |             if self._color:
 475 |                 terminfo.send(('setaf', 5 if self.typ == 'warning' else 1))
 476 |             sys.stdout.write(self.typ + ': ')
 477 |         if self._color:
 478 |             terminfo.send('sgr0')
 479 |         sys.stdout.write(self.msg + '\n')
 480 | 
 481 |     @classmethod
 482 |     def setup_color(cls, state):
 483 |         if state == 'never':
 484 |             cls._color = False
 485 |         elif state == 'always':
 486 |             cls._color = True
 487 |         elif state == 'auto':
 488 |             cls._color = terminfo.has('setaf', 'bold', 'sgr0')
 489 |         else:
 490 |             raise ValueError('Illegal color state {:r}'.format(state))
 491 | 
 492 | 
 493 | ##################################################################
 494 | # Task framework
 495 | #
 496 | 
 497 | terminate_task_loop = False
 498 | start_time = time.time()
 499 | 
 500 | def run_tasks(tasks, max_iterations):
 501 |     """Execute tasks in round-robin order until all are stable.
 502 | 
 503 |     This will also exit if terminate_task_loop is true.  Tasks may use
 504 |     this to terminate after a fatal error (even if that fatal error
 505 |     doesn't necessarily indicate stability; as long as re-running the
 506 |     task will never eliminate the fatal error).
 507 | 
 508 |     Return True if fixed-point is reached or terminate_task_loop is
 509 |     set within max_iterations iterations.
 510 |     """
 511 | 
 512 |     global terminate_task_loop
 513 |     terminate_task_loop = False
 514 | 
 515 |     nstable = 0
 516 |     for iteration in range(max_iterations):
 517 |         for task in tasks:
 518 |             if task.stable():
 519 |                 nstable += 1
 520 |                 if nstable == len(tasks):
 521 |                     debug('fixed-point reached')
 522 |                     return True
 523 |             else:
 524 |                 task.run()
 525 |                 nstable = 0
 526 |                 if terminate_task_loop:
 527 |                     debug('terminate_task_loop set')
 528 |                     return True
 529 |     debug('fixed-point not reached')
 530 |     return False
 531 | 
 532 | class TaskError(Exception):
 533 |     pass
 534 | 
 535 | class Task:
 536 |     """A deterministic computation whose inputs and outputs can be captured."""
 537 | 
 538 |     def __init__(self, db, task_id):
 539 |         self.__db = db
 540 |         self.__task_id = task_id
 541 | 
 542 |     def __debug(self, string, *args):
 543 |         if debug.enabled:
 544 |             debug('task {}: {}', self.__task_id, string.format(*args))
 545 | 
 546 |     def stable(self):
 547 |         """Return True if running this task will not affect system state.
 548 | 
 549 |         Functionally, let f be the task, and s be the system state.
 550 |         Then s' = f(s).  If it must be that s' == s (that is, f has
 551 |         reached a fixed point), then this function must return True.
 552 |         """
 553 |         last_summary = self.__db.get_summary(self.__task_id)
 554 |         if last_summary is None:
 555 |             # Task has never run, so running it will modify system
 556 |             # state
 557 |             changed = 'never run'
 558 |         else:
 559 |             # If any of the inputs have changed since the last run of
 560 |             # this task, the result may change, so re-run the task.
 561 |             # Also, it's possible something else changed an output
 562 |             # file, in which case we also want to re-run the task, so
 563 |             # check the outputs, too.
 564 |             changed = self.__summary_changed(last_summary)
 565 | 
 566 |         if changed:
 567 |             self.__debug('unstable (changed: {})', changed)
 568 |             return False
 569 |         else:
 570 |             self.__debug('stable')
 571 |             return True
 572 | 
 573 |     def __summary_changed(self, summary):
 574 |         """Test if any inputs changed from summary.
 575 | 
 576 |         Returns a string describing the changed input, or None.
 577 |         """
 578 |         for dep in summary['deps']:
 579 |             fn, args, val = dep
 580 |             method = getattr(self, '_input_' + fn, None)
 581 |             if method is None:
 582 |                 return 'unknown dependency method {}'.format(fn)
 583 |             if method == self._input_unstable or method(*args) != val:
 584 |                 return '{}{}'.format(fn, tuple(args))
 585 |         return None
 586 | 
 587 |     def _input(self, name, *args):
 588 |         """Register an input for this run.
 589 | 
 590 |         This calls self._input_<name>(*args) to get the value of this
 591 |         input.  This function should run quickly and return some
 592 |         projection of system state that affects the result of this
 593 |         computation.
 594 | 
 595 |         Both args and the return value must be JSON serializable.
 596 |         """
 597 |         method = getattr(self, '_input_' + name)
 598 |         val = method(*args)
 599 |         if [name, args, val] not in self.__deps:
 600 |             self.__deps.append([name, args, val])
 601 |         return val
 602 | 
 603 |     def run(self):
 604 |         # Before we run the task, pre-hash any files that were output
 605 |         # files in the last run.  These may be input by this run and
 606 |         # then clobbered, at which point it will be too late to get an
 607 |         # input hash.  Ideally we would only hash files that were
 608 |         # *both* input and output files, but latex doesn't tell us
 609 |         # about input files that didn't exist, so if we start from a
 610 |         # clean slate, we often require an extra run because we don't
 611 |         # know a file is input/output until after the second run.
 612 |         last_summary = self.__db.get_summary(self.__task_id)
 613 |         if last_summary is not None:
 614 |             for io_filename in last_summary['output_files']:
 615 |                 self.__debug('pre-hashing {}', io_filename)
 616 |                 hash_cache.get(io_filename)
 617 | 
 618 |         # Run the task
 619 |         self.__debug('running')
 620 |         self.__deps = []
 621 |         result = self._execute()
 622 | 
 623 |         # Clear cached output file hashes
 624 |         for filename in result.output_filenames:
 625 |             hash_cache.invalidate(filename)
 626 | 
 627 |         # If the output files change, then the computation needs to be
 628 |         # re-run, so record them as inputs
 629 |         for filename in result.output_filenames:
 630 |             self._input('file', filename)
 631 | 
 632 |         # Update task summary in database
 633 |         self.__db.set_summary(self.__task_id,
 634 |                               self.__make_summary(self.__deps, result))
 635 |         del self.__deps
 636 | 
 637 |         # Add output files to be cleaned
 638 |         for f in result.output_filenames:
 639 |             self.__db.add_clean(f)
 640 | 
 641 |         try:
 642 |             self.__db.commit()
 643 |         except OSError as e:
 644 |             raise TaskError('error committing control database {}: {}'.format(
 645 |                 getattr(e, 'filename', '<unknown path>'), e)) from e
 646 | 
 647 |     def __make_summary(self, deps, run_result):
 648 |         """Construct a new task summary."""
 649 |         return {
 650 |             'deps': deps,
 651 |             'output_files': {f: hash_cache.get(f)
 652 |                              for f in run_result.output_filenames},
 653 |             'extra': run_result.extra,
 654 |         }
 655 | 
 656 |     def _execute(self):
 657 |         """Abstract: Execute this task.
 658 | 
 659 |         Subclasses should implement this method to execute this task.
 660 |         This method must return a RunResult giving the inputs that
 661 |         were used by the task and the outputs it produced.
 662 |         """
 663 |         raise NotImplementedError('Task._execute is abstract')
 664 | 
 665 |     def _get_result_extra(self):
 666 |         """Return the 'extra' result from the previous run, or None."""
 667 |         summary = self.__db.get_summary(self.__task_id)
 668 |         if summary is None:
 669 |             return None
 670 |         return summary['extra']
 671 | 
 672 |     def report(self):
 673 |         """Report the task's results to stdout and return exit status.
 674 | 
 675 |         This may be called when the task has never executed.
 676 |         Subclasses should override this.  The default implementation
 677 |         reports nothing and returns 0.
 678 |         """
 679 |         return 0
 680 | 
 681 |     # Standard input functions
 682 | 
 683 |     def _input_env(self, var):
 684 |         return os.environ.get(var)
 685 | 
 686 |     def _input_file(self, path):
 687 |         return hash_cache.get(path)
 688 | 
 689 |     def _input_unstable(self):
 690 |         """Mark this run as unstable, regardless of other inputs."""
 691 |         return None
 692 | 
 693 |     def _input_unknown_input(self):
 694 |         """An unknown input that may change after latexrun exits.
 695 | 
 696 |         This conservatively marks some unknown input that definitely
 697 |         won't change while latexrun is running, but may change before
 698 |         the user next runs latexrun.  This allows the task to
 699 |         stabilize during this invocation, but will cause the task to
 700 |         re-run on the next invocation.
 701 |         """
 702 |         return start_time
 703 | 
 704 | class RunResult(collections.namedtuple(
 705 |         'RunResult', 'output_filenames extra')):
 706 |     """The result of a single task execution.
 707 | 
 708 |     This captures all files written by the task, and task-specific
 709 |     results that need to be persisted between runs (for example, to
 710 |     enable reporting of a task's results).
 711 |     """
 712 |     pass
 713 | 
 714 | ##################################################################
 715 | # LaTeX task
 716 | #
 717 | 
 718 | def normalize_input_path(path):
 719 |     # Resolve the directory of the input path, but leave the file
 720 |     # component alone because it affects TeX's behavior.
 721 |     head, tail = os.path.split(path)
 722 |     npath = os.path.join(os.path.realpath(head), tail)
 723 |     return os.path.relpath(path)
 724 | 
 725 | class LaTeX(Task):
 726 |     def __init__(self, db, tex_filename, cmd, cmd_args, obj_dir, nowarns):
 727 |         super().__init__(db, 'latex::' + normalize_input_path(tex_filename))
 728 |         self.__tex_filename = tex_filename
 729 |         self.__cmd = cmd
 730 |         self.__cmd_args = cmd_args
 731 |         self.__obj_dir = obj_dir
 732 |         self.__nowarns = nowarns
 733 | 
 734 |         self.__pass = 0
 735 | 
 736 |     def _input_args(self):
 737 |         # If filename starts with a character the tex command-line
 738 |         # treats specially, then tweak it so it doesn't.
 739 |         filename = self.__tex_filename
 740 |         if filename.startswith(('-', '&', '\\')):
 741 |             filename = './' + filename
 742 |         # XXX Put these at the beginning in case the provided
 743 |         # arguments are malformed.  Might want to do a best-effort
 744 |         # check for incompatible user-provided arguments (note:
 745 |         # arguments can be given with one or two dashes and those with
 746 |         # values can use an equals or a space).
 747 |         return [self.__cmd] + self.__cmd_args + \
 748 |             ['-interaction', 'nonstopmode', '-recorder',
 749 |              '-output-directory', self.__obj_dir, filename]
 750 | 
 751 |     def _execute(self):
 752 |         # Run latex
 753 |         self.__pass += 1
 754 |         args = self._input('args')
 755 |         debug('running {}', args)
 756 |         try:
 757 |             os.makedirs(self.__obj_dir, exist_ok=True)
 758 |         except OSError as e:
 759 |             raise TaskError('failed to create %s: ' % self.__obj_dir + str(e)) \
 760 |                 from e
 761 |         try:
 762 |             verbose_cmd(args)
 763 |             p = subprocess.Popen(args,
 764 |                                  stdin=subprocess.DEVNULL,
 765 |                                  stdout=subprocess.PIPE,
 766 |                                  stderr=subprocess.STDOUT)
 767 |             stdout, has_errors, missing_includes = self.__feed_terminal(p.stdout)
 768 |             status = p.wait()
 769 |         except OSError as e:
 770 |             raise TaskError('failed to execute latex task: ' + str(e)) from e
 771 | 
 772 |         # Register environment variable inputs
 773 |         for env_var in ['TEXMFOUTPUT', 'TEXINPUTS', 'TEXFORMATS', 'TEXPOOL',
 774 |                         'TFMFONTS', 'PATH']:
 775 |             self._input('env', env_var)
 776 | 
 777 |         jobname, outname = self.__parse_jobname(stdout)
 778 |         inputs, outputs = self.__parse_recorder(jobname)
 779 | 
 780 |         # LaTeX overwrites its own inputs.  Mark its output files as
 781 |         # clobbered before we hash its input files.
 782 |         for path in outputs:
 783 |             # In some abort cases (e.g., >=100 errors), LaTeX claims
 784 |             # output files that don't actually exist.
 785 |             if os.path.exists(path):
 786 |                 hash_cache.clobber(path)
 787 |         # Depend on input files.  Task.run pre-hashed outputs from the
 788 |         # previous run, so if this isn't the first run and as long as
 789 |         # the set of outputs didn't change, we'll be able to get the
 790 |         # input hashes, even if they were clobbered.
 791 |         for path in inputs:
 792 |             self._input('file', path)
 793 | 
 794 |         if missing_includes:
 795 |             # Missing \includes are tricky.  Ideally we'd depend on
 796 |             # the absence of some file, but in fact we'd have to
 797 |             # depend on the failure of a whole kpathsea lookup.
 798 |             # Rather than try to be clever, just mark this as an
 799 |             # unknown input so we'll run at least once on the next
 800 |             # invocation.
 801 |             self._input('unknown_input')
 802 | 
 803 |         if not self.__create_outdirs(stdout) and has_errors:
 804 |             # LaTeX reported unrecoverable errors (other than output
 805 |             # directory errors, which we just fixed).  We could
 806 |             # continue to stabilize the document, which may change
 807 |             # some of the other problems reported (but not the
 808 |             # unrecoverable errors), or we can just abort now and get
 809 |             # back to the user quickly with the major errors.  We opt
 810 |             # for the latter.
 811 |             global terminate_task_loop
 812 |             terminate_task_loop = True
 813 |             # This error could depend on something we failed to track.
 814 |             # It would be really confusing if we continued to report
 815 |             # the error after the user fixed it, so be conservative
 816 |             # and force a re-run next time.
 817 |             self._input('unknown_input')
 818 | 
 819 |         return RunResult(outputs,
 820 |                          {'jobname': jobname, 'outname': outname,
 821 |                           'status': status})
 822 | 
 823 |     def __feed_terminal(self, stdout):
 824 |         prefix = 'latex'
 825 |         if self.__pass > 1:
 826 |             prefix += ' ({})'.format(self.__pass)
 827 |         with Progress(prefix) as progress:
 828 |             buf = []
 829 |             filt = LaTeXFilter()
 830 |             while True:
 831 |                 # Use os.read to read only what's available on the pipe,
 832 |                 # without waiting to fill a buffer
 833 |                 data = os.read(stdout.fileno(), 4096)
 834 |                 if not data:
 835 |                     break
 836 |                 # See "A note about encoding" above
 837 |                 data = data.decode('ascii', errors='surrogateescape')
 838 |                 buf.append(data)
 839 |                 filt.feed(data)
 840 |                 file_stack = filt.get_file_stack()
 841 |                 if file_stack:
 842 |                     tos = file_stack[-1]
 843 |                     if tos.startswith('./'):
 844 |                         tos = tos[2:]
 845 |                     progress.update('>' * len(file_stack) + ' ' + tos)
 846 |                 else:
 847 |                     progress.update('')
 848 | 
 849 |             # Were there unrecoverable errors?
 850 |             has_errors = any(msg.typ == 'error' for msg in filt.get_messages())
 851 | 
 852 |             return ''.join(buf), has_errors, filt.has_missing_includes()
 853 | 
 854 |     def __parse_jobname(self, stdout):
 855 |         """Extract the job name and output name from latex's output.
 856 | 
 857 |         We get these from latex because they depend on complicated
 858 |         file name parsing rules, are affected by arguments like
 859 |         -output-directory, and may be just "texput" if things fail
 860 |         really early.  The output name may be None if there were no
 861 |         pages of output.
 862 |         """
 863 |         jobname = outname = None
 864 |         for m in re.finditer(r'^Transcript written on "?(.*)\.log"?\.$', stdout,
 865 |                              re.MULTILINE | re.DOTALL):
 866 |             jobname = m.group(1).replace('\n', '')
 867 |         if jobname is None:
 868 |             print(stdout, file=sys.stderr)
 869 |             raise TaskError('failed to extract job name from latex log')
 870 |         for m in re.finditer(r'^Output written on "?(.*\.[^ ."]+)"? \([0-9]+ page',
 871 |                              stdout, re.MULTILINE | re.DOTALL):
 872 |             outname = m.group(1).replace('\n', '')
 873 |         if outname is None and not \
 874 |            re.search(r'^No pages of output\.$|^! Emergency stop\.$'
 875 |                      r'|^!  ==> Fatal error occurred, no output PDF file produced!$',
 876 |                      stdout, re.MULTILINE):
 877 |             print(stdout, file=sys.stderr)
 878 |             raise TaskError('failed to extract output name from latex log')
 879 | 
 880 |         # LuaTeX (0.76.0) doesn't include the output directory in the
 881 |         # logged transcript or output file name.
 882 |         if os.path.basename(jobname) == jobname and \
 883 |            os.path.exists(os.path.join(self.__obj_dir, jobname + '.log')):
 884 |             jobname = os.path.join(self.__obj_dir, jobname)
 885 |             if outname is not None:
 886 |                 outname = os.path.join(self.__obj_dir, outname)
 887 | 
 888 |         return jobname, outname
 889 | 
 890 |     def __parse_recorder(self, jobname):
 891 |         """Parse file recorder output."""
 892 |         # XXX If latex fails because a file isn't found, that doesn't
 893 |         # go into the .fls file, but creating that file will affect
 894 |         # the computation, so it should be included as an input.
 895 |         # Though it's generally true that files can be added earlier
 896 |         # in search paths and will affect the output without us knowing.
 897 |         #
 898 |         # XXX This is a serious problem for bibtex, since the first
 899 |         # run won't depend on the .bbl file!  But maybe the .aux file
 900 |         # will always cause a re-run, at which point the .bbl will
 901 |         # exist?
 902 |         filename = jobname + '.fls'
 903 |         try:
 904 |             recorder = open(filename)
 905 |         except OSError as e:
 906 |             raise TaskError('failed to open file recorder output: ' + str(e)) \
 907 |                 from e
 908 |         pwd, inputs, outputs = '', set(), set()
 909 |         for linenum, line in enumerate(recorder):
 910 |             parts = line.rstrip('\n').split(' ', 1)
 911 |             if parts[0] == 'PWD':
 912 |                 pwd = parts[1]
 913 |             elif parts[0] in ('INPUT', 'OUTPUT'):
 914 |                 if parts[1].startswith('/'):
 915 |                     path = parts[1]
 916 |                 else:
 917 |                     # Try to make "nice" paths, especially for clean
 918 |                     path = os.path.relpath(os.path.join(pwd, parts[1]))
 919 |                 if parts[0] == 'INPUT':
 920 |                     inputs.add(path)
 921 |                 else:
 922 |                     outputs.add(path)
 923 |             else:
 924 |                 raise TaskError('syntax error on line {} of {}'
 925 |                                 .format(linenum, filename))
 926 |         # Ironically, latex omits the .fls file itself
 927 |         outputs.add(filename)
 928 |         return inputs, outputs
 929 | 
 930 |     def __create_outdirs(self, stdout):
 931 |         # In some cases, such as \include'ing a file from a
 932 |         # subdirectory, TeX will attempt to create files in
 933 |         # subdirectories of the output directory that don't exist.
 934 |         # Detect this, create the output directory, and re-run.
 935 |         m = re.search('^! I can\'t write on file `(.*)\'\\.$', stdout, re.M)
 936 |         if m and m.group(1).find('/') > 0 and '../' not in m.group(1):
 937 |             debug('considering creating output sub-directory for {}'.
 938 |                   format(m.group(1)))
 939 |             subdir = os.path.dirname(m.group(1))
 940 |             newdir = os.path.join(self.__obj_dir, subdir)
 941 |             if os.path.isdir(subdir) and not os.path.isdir(newdir):
 942 |                 debug('creating output subdirectory {}'.format(newdir))
 943 |                 try:
 944 |                     mkdir_p(newdir)
 945 |                 except OSError as e:
 946 |                     raise TaskError('failed to create output subdirectory: ' +
 947 |                                     str(e)) from e
 948 |                 self._input('unstable')
 949 |                 return True
 950 | 
 951 |     def report(self):
 952 |         extra = self._get_result_extra()
 953 |         if extra is None:
 954 |             return 0
 955 | 
 956 |         # Parse the log
 957 |         logfile = open(extra['jobname'] + '.log', 'rt', errors='surrogateescape')
 958 |         for msg in self.__clean_messages(
 959 |                 LaTeXFilter(self.__nowarns).feed(
 960 |                     logfile.read(), True).get_messages()):
 961 |             msg.emit()
 962 | 
 963 |         # Return LaTeX's exit status
 964 |         return extra['status']
 965 | 
 966 |     def __clean_messages(self, msgs):
 967 |         """Make some standard log messages more user-friendly."""
 968 |         have_undefined_reference = False
 969 |         for msg in msgs:
 970 |             if msg.msg == '==> Fatal error occurred, no output PDF file produced!':
 971 |                 msg = msg._replace(typ='info',
 972 |                                    msg='Fatal error (no output file produced)')
 973 |             if msg.msg.startswith('[LaTeX] '):
 974 |                 # Strip unnecessary package name
 975 |                 msg = msg._replace(msg=msg.msg.split(' ', 1)[1])
 976 |             if re.match(r'Reference .* undefined', msg.msg):
 977 |                 have_undefined_reference = True
 978 |             if have_undefined_reference and \
 979 |                re.match(r'There were undefined references', msg.msg):
 980 |                 # LaTeX prints this at the end so the user knows it's
 981 |                 # worthwhile looking back at the log.  Since latexrun
 982 |                 # makes the earlier messages obvious, this is
 983 |                 # redundant.
 984 |                 continue
 985 |             yield msg
 986 | 
 987 |     def get_tex_filename(self):
 988 |         return self.__tex_filename
 989 | 
 990 |     def get_jobname(self):
 991 |         extra = self._get_result_extra()
 992 |         if extra is None:
 993 |             return None
 994 |         return extra['jobname']
 995 | 
 996 |     def get_outname(self):
 997 |         extra = self._get_result_extra()
 998 |         if extra is None:
 999 |             return None
1000 |         return extra['outname']
1001 | 
1002 |     def get_status(self):
1003 |         extra = self._get_result_extra()
1004 |         if extra is None:
1005 |             return None
1006 |         return extra['status']
1007 | 
1008 | class LaTeXCommit(Task):
1009 |     def __init__(self, db, latex_task, output_path):
1010 |         super().__init__(db, 'latex_commit::' +
1011 |                          normalize_input_path(latex_task.get_tex_filename()))
1012 |         self.__latex_task = latex_task
1013 |         self.__output_path = output_path
1014 |         self.status = 'There were errors'
1015 | 
1016 |     def _input_latex(self):
1017 |         return self.__latex_task.get_status(), self.__latex_task.get_outname()
1018 | 
1019 |     def _execute(self):
1020 |         self.status = 'There were errors'
1021 | 
1022 |         # If latex succeeded with output, atomically commit the output
1023 |         status, outname = self._input('latex')
1024 |         if status != 0 or outname is None:
1025 |             debug('not committing (status {}, outname {})', status, outname)
1026 |             if outname is None:
1027 |                 self.status = 'No pages of output'
1028 |             return RunResult([], None)
1029 | 
1030 |         commit = self.__output_path or os.path.basename(outname)
1031 |         if os.path.abspath(commit) == os.path.abspath(outname):
1032 |             debug('skipping commit (outname is commit name)')
1033 |             self.status = None
1034 |             return RunResult([], None)
1035 | 
1036 |         try:
1037 |             if os.path.exists(commit) and filecmp.cmp(outname, commit):
1038 |                 debug('skipping commit ({} and {} are identical)',
1039 |                       outname, commit)
1040 |                 # To avoid confusion, touch the output file
1041 |                 open(outname, 'r+b').close()
1042 |             else:
1043 |                 debug('commiting {} to {}', outname, commit)
1044 |                 shutil.copy(outname, outname + '~')
1045 |                 os.rename(outname + '~', commit)
1046 |         except OSError as e:
1047 |             raise TaskError('error committing latex output: {}'.format(e)) from e
1048 |         self._input('file', outname)
1049 |         self.status = None
1050 |         return RunResult([commit], None)
1051 | 
1052 | class LaTeXFilter:
1053 |     TRACE = False               # Set to enable detailed parse tracing
1054 | 
1055 |     def __init__(self, nowarns=[]):
1056 |         self.__data = ''
1057 |         self.__restart_pos = 0
1058 |         self.__restart_file_stack = []
1059 |         self.__restart_messages_len = 0
1060 |         self.__messages = []
1061 |         self.__first_file = None
1062 |         self.__fatal_error = False
1063 |         self.__missing_includes = False
1064 |         self.__pageno = 1
1065 |         self.__restart_pageno = 1
1066 | 
1067 |         self.__suppress = {cls: 0 for cls in nowarns}
1068 | 
1069 |     def feed(self, data, eof=False):
1070 |         """Feed LaTeX log data to the parser.
1071 | 
1072 |         The log data can be from LaTeX's standard output, or from the
1073 |         log file.  If there will be no more data, set eof to True.
1074 |         """
1075 | 
1076 |         self.__data += data
1077 |         self.__data_complete = eof
1078 | 
1079 |         # Reset to last known-good restart point
1080 |         self.__pos = self.__restart_pos
1081 |         self.__file_stack = self.__restart_file_stack.copy()
1082 |         self.__messages = self.__messages[:self.__restart_messages_len]
1083 |         self.__lstart = self.__lend = -1
1084 |         self.__pageno = self.__restart_pageno
1085 | 
1086 |         # Parse forward
1087 |         while self.__pos < len(self.__data):
1088 |             self.__noise()
1089 | 
1090 |         # Handle suppressed warnings
1091 |         if eof:
1092 |             msgs = ['%d %s warning%s' % (count, cls, "s" if count > 1 else "")
1093 |                     for cls, count in self.__suppress.items() if count]
1094 |             if msgs:
1095 |                 self.__message('info', None,
1096 |                                '%s not shown (use -Wall to show them)' %
1097 |                                ', '.join(msgs), filename=self.__first_file)
1098 | 
1099 |         if eof and len(self.__file_stack) and not self.__fatal_error:
1100 |             # Fatal errors generally cause TeX to "succumb" without
1101 |             # closing the file stack, so don't complain in that case.
1102 |             self.__message('warning', None,
1103 |                            "unbalanced `(' in log; file names may be wrong")
1104 |         return self
1105 | 
1106 |     def get_messages(self):
1107 |         """Return a list of warning and error Messages."""
1108 |         return self.__messages
1109 | 
1110 |     def get_file_stack(self):
1111 |         """Return the file stack for the data that has been parsed.
1112 | 
1113 |         This results a list from outermost file to innermost file.
1114 |         The list may be empty.
1115 |         """
1116 | 
1117 |         return self.__file_stack
1118 | 
1119 |     def has_missing_includes(self):
1120 |         """Return True if the log reported missing \\include files."""
1121 |         return self.__missing_includes
1122 | 
1123 |     def __save_restart_point(self):
1124 |         """Save the current state as a known-good restart point.
1125 | 
1126 |         On the next call to feed, the parser will reset to this point.
1127 |         """
1128 |         self.__restart_pos = self.__pos
1129 |         self.__restart_file_stack = self.__file_stack.copy()
1130 |         self.__restart_messages_len = len(self.__messages)
1131 |         self.__restart_pageno = self.__pageno
1132 | 
1133 |     def __message(self, typ, lineno, msg, cls=None, filename=None):
1134 |         if cls is not None and cls in self.__suppress:
1135 |             self.__suppress[cls] += 1
1136 |             return
1137 |         filename = filename or (self.__file_stack[-1] if self.__file_stack
1138 |                                 else self.__first_file)
1139 |         self.__messages.append(Message(typ, filename, lineno, msg))
1140 | 
1141 |     def __ensure_line(self):
1142 |         """Update lstart and lend."""
1143 |         if self.__lstart <= self.__pos < self.__lend:
1144 |             return
1145 |         self.__lstart = self.__data.rfind('\n', 0, self.__pos) + 1
1146 |         self.__lend = self.__data.find('\n', self.__pos) + 1
1147 |         if self.__lend == 0:
1148 |             self.__lend = len(self.__data)
1149 | 
1150 |     @property
1151 |     def __col(self):
1152 |         """The 0-based column number of __pos."""
1153 |         self.__ensure_line()
1154 |         return self.__pos - self.__lstart
1155 | 
1156 |     @property
1157 |     def __avail(self):
1158 |         return self.__pos < len(self.__data)
1159 | 
1160 |     def __lookingat(self, needle):
1161 |         return self.__data.startswith(needle, self.__pos)
1162 | 
1163 |     def __lookingatre(self, regexp, flags=0):
1164 |         return re.compile(regexp, flags=flags).match(self.__data, self.__pos)
1165 | 
1166 |     def __skip_line(self):
1167 |         self.__ensure_line()
1168 |         self.__pos = self.__lend
1169 | 
1170 |     def __consume_line(self, unwrap=False):
1171 |         self.__ensure_line()
1172 |         data = self.__data[self.__pos:self.__lend]
1173 |         self.__pos = self.__lend
1174 |         if unwrap:
1175 |             # TeX helpfully wraps all terminal output at 79 columns
1176 |             # (max_print_line).  If requested, unwrap it.  There's
1177 |             # simply no way to do this perfectly, since there could be
1178 |             # a line that happens to be 79 columns.
1179 |             #
1180 |             # We check for >=80 because a bug in LuaTeX causes it to
1181 |             # wrap at 80 columns instead of 79 (LuaTeX #900).
1182 |             while self.__lend - self.__lstart >= 80:
1183 |                 if self.TRACE: print('<{}> wrapping'.format(self.__pos))
1184 |                 self.__ensure_line()
1185 |                 data = data[:-1] + self.__data[self.__pos:self.__lend]
1186 |                 self.__pos = self.__lend
1187 |         return data
1188 | 
1189 |     # Parser productions
1190 | 
1191 |     def __noise(self):
1192 |         # Most of TeX's output is line noise that combines error
1193 |         # messages, warnings, file names, user errors and warnings,
1194 |         # and echos of token lists and other input.  This attempts to
1195 |         # tease these apart, paying particular attention to all of the
1196 |         # places where TeX echos input so that parens in the input do
1197 |         # not confuse the file name scanner.  There are three
1198 |         # functions in TeX that echo input: show_token_list (used by
1199 |         # runaway and show_context, which is used by print_err),
1200 |         # short_display (used by overfull/etc h/vbox), and show_print
1201 |         # (used in issue_message and the same places as
1202 |         # show_token_list).
1203 |         lookingat, lookingatre = self.__lookingat, self.__lookingatre
1204 |         if self.__col == 0:
1205 |             # The following messages are always preceded by a newline
1206 |             if lookingat('! '):
1207 |                 return self.__errmessage()
1208 |             if lookingat('!pdfTeX error: '):
1209 |                 return self.__pdftex_fail()
1210 |             if lookingat('Runaway '):
1211 |                 return self.__runaway()
1212 |             if lookingatre(r'(Overfull|Underfull|Loose|Tight) \\[hv]box \('):
1213 |                 return self.__bad_box()
1214 |             if lookingatre('(Package |Class |LaTeX |pdfTeX )?(\w+ )?warning: ', re.I):
1215 |                 return self.__generic_warning()
1216 |             if lookingatre('No file .*\\.tex\\.$', re.M):
1217 |                 # This happens with \includes of missing files.  For
1218 |                 # whatever reason, LaTeX doesn't consider this even
1219 |                 # worth a warning, but I do!
1220 |                 self.__message('warning', None,
1221 |                                self.__simplify_message(
1222 |                                    self.__consume_line(unwrap=True).strip()))
1223 |                 self.__missing_includes = True
1224 |                 return
1225 |             # Other things that are common and irrelevant
1226 |             if lookingatre(r'(Package|Class|LaTeX) (\w+ )?info: ', re.I):
1227 |                 return self.__generic_info()
1228 |             if lookingatre(r'(Document Class|File|Package): '):
1229 |                 # Output from "\ProvidesX"
1230 |                 return self.__consume_line(unwrap=True)
1231 |             if lookingatre(r'\\\w+=\\[a-z]+\d+\n'):
1232 |                 # Output from "\new{count,dimen,skip,...}"
1233 |                 return self.__consume_line(unwrap=True)
1234 | 
1235 |         # print(self.__data[self.__lstart:self.__lend].rstrip())
1236 |         # self.__pos = self.__lend
1237 |         # return
1238 | 
1239 |         # Now that we've substantially reduced the spew and hopefully
1240 |         # eliminated all input echoing, we're left with the file name
1241 |         # stack, page outs, and random other messages from both TeX
1242 |         # and various packages.  We'll assume at this point that all
1243 |         # parentheses belong to the file name stack or, if they're in
1244 |         # random other messages, they're at least balanced and nothing
1245 |         # interesting happens between them.  For page outs, ship_out
1246 |         # prints a space if not at the beginning of a line, then a
1247 |         # "[", then the page number being shipped out (this is
1248 |         # usually, but not always, followed by "]").
1249 |         m = re.compile(r'[(){}\n]|(?<=[\n ])\[\d+', re.M).\
1250 |             search(self.__data, self.__pos)
1251 |         if m is None:
1252 |             self.__pos = len(self.__data)
1253 |             return
1254 |         self.__pos = m.start() + 1
1255 |         ch = self.__data[m.start()]
1256 |         if ch == '\n':
1257 |             # Save this as a known-good restart point for incremental
1258 |             # parsing, since we definitely didn't match any of the
1259 |             # known message types above.
1260 |             self.__save_restart_point()
1261 |         elif ch == '[':
1262 |             # This is printed at the end of a page, so we're beginning
1263 |             # page n+1.
1264 |             self.__pageno = int(self.__lookingatre(r'\d+').group(0)) + 1
1265 |         elif ((self.__data.startswith('`', m.start() - 1) or
1266 |                self.__data.startswith('`\\', m.start() - 2)) and
1267 |                self.__data.startswith('\'', m.start() + 1)):
1268 |             # (, ), {, and } sometimes appear in TeX's error
1269 |             # descriptions, but they're always in `'s (and sometimes
1270 |             # backslashed)
1271 |             return
1272 |         elif ch == '(':
1273 |             # XXX Check that the stack doesn't drop to empty and then re-grow
1274 |             first = self.__first_file is None and self.__col == 1
1275 |             filename = self.__filename()
1276 |             self.__file_stack.append(filename)
1277 |             if first:
1278 |                 self.__first_file = filename
1279 |             if self.TRACE:
1280 |                 print('<{}>{}enter {}'.format(
1281 |                     m.start(), ' '*len(self.__file_stack), filename))
1282 |         elif ch == ')':
1283 |             if len(self.__file_stack):
1284 |                 if self.TRACE:
1285 |                     print('<{}>{}exit {}'.format(
1286 |                         m.start(), ' '*len(self.__file_stack),
1287 |                         self.__file_stack[-1]))
1288 |                 self.__file_stack.pop()
1289 |             else:
1290 |                 self.__message('warning', None,
1291 |                                "extra `)' in log; file names may be wrong ")
1292 |         elif ch == '{':
1293 |             # TeX uses this for various things we want to ignore, like
1294 |             # file names and print_mark.  Consume up to the '}'
1295 |             epos = self.__data.find('}', self.__pos)
1296 |             if epos != -1:
1297 |                 self.__pos = epos + 1
1298 |             else:
1299 |                 self.__message('warning', None,
1300 |                                "unbalanced `{' in log; file names may be wrong")
1301 |         elif ch == '}':
1302 |             self.__message('warning', None,
1303 |                            "extra `}' in log; file names may be wrong")
1304 | 
1305 |     def __filename(self):
1306 |         initcol = self.__col
1307 |         first = True
1308 |         name = ''
1309 |         # File names may wrap, but if they do, TeX will always print a
1310 |         # newline before the open paren
1311 |         while first or (initcol == 1 and self.__lookingat('\n')
1312 |                         and self.__col >= 79):
1313 |             if not first:
1314 |                 self.__pos += 1
1315 |             m = self.__lookingatre(r'[^(){} \n]*')
1316 |             name += m.group()
1317 |             self.__pos = m.end()
1318 |             first = False
1319 |         return name
1320 | 
1321 |     def __simplify_message(self, msg):
1322 |         msg = re.sub(r'^(?:Package |Class |LaTeX |pdfTeX )?([^ ]+) (?:Error|Warning): ',
1323 |                      r'[\1] ', msg, flags=re.I)
1324 |         msg = re.sub(r'\.$', '', msg)
1325 |         msg = re.sub(r'has occurred (while \\output is active)', r'\1', msg)
1326 |         return msg
1327 | 
1328 |     def __errmessage(self):
1329 |         # Procedure print_err (including \errmessage, itself used by
1330 |         # LaTeX's \GenericError and all of its callers), as well as
1331 |         # fatal_error.  Prints "\n!  " followed by error text
1332 |         # ("Emergency stop" in the case of fatal_error).  print_err is
1333 |         # always followed by a call to error, which prints a period,
1334 |         # and a newline...
1335 |         msg = self.__consume_line(unwrap=True)[1:].strip()
1336 |         is_fatal_error = (msg == 'Emergency stop.')
1337 |         msg = self.__simplify_message(msg)
1338 |         # ... and then calls show_context, which prints the input
1339 |         # stack as pairs of lines giving the context.  These context
1340 |         # lines are truncated so they never wrap.  Each pair of lines
1341 |         # will start with either "<something> " if the context is a
1342 |         # token list, "<*> " for terminal input (or command line),
1343 |         # "<read ...>" for stream reads, something like "\macroname
1344 |         # #1->" for macros (though everything after \macroname is
1345 |         # subject to being elided as "..."), or "l.[0-9]+ " if it's a
1346 |         # file.  This is followed by the errant input with a line
1347 |         # break where the error occurred.
1348 |         lineno = None
1349 |         found_context = False
1350 |         stack = []
1351 |         while self.__avail:
1352 |             m1 = self.__lookingatre(r'<([a-z ]+|\*|read [^ >]*)> |\\.*(->|...)')
1353 |             m2 = self.__lookingatre('l\.[0-9]+ ')
1354 |             if m1:
1355 |                 found_context = True
1356 |                 pre = self.__consume_line().rstrip('\n')
1357 |                 stack.append(pre)
1358 |             elif m2:
1359 |                 found_context = True
1360 |                 pre = self.__consume_line().rstrip('\n')
1361 |                 info, rest = pre.split(' ', 1)
1362 |                 lineno = int(info[2:])
1363 |                 stack.append(rest)
1364 |             elif found_context:
1365 |                 # Done with context
1366 |                 break
1367 |             if found_context:
1368 |                 # Consume the second context line
1369 |                 post = self.__consume_line().rstrip('\n')
1370 |                 # Clean up goofy trailing ^^M TeX sometimes includes
1371 |                 post = re.sub(r'\^\^M$', '', post)
1372 |                 if post[:len(pre)].isspace() and not post.isspace():
1373 |                     stack.append(len(stack[-1]))
1374 |                     stack[-2] += post[len(pre):]
1375 |             else:
1376 |                 # If we haven't found the context, skip the line.
1377 |                 self.__skip_line()
1378 |         stack_msg = ''
1379 |         for i, trace in enumerate(stack):
1380 |             stack_msg += ('\n         ' + (' ' * trace) + '^'
1381 |                           if isinstance(trace, int) else
1382 |                           '\n      at ' + trace.rstrip() if i == 0 else
1383 |                           '\n    from ' + trace.rstrip())
1384 | 
1385 |         if is_fatal_error:
1386 |             # fatal_error always prints one additional line of message
1387 |             info = self.__consume_line().strip()
1388 |             if info.startswith('*** '):
1389 |                 info = info[4:]
1390 |             msg += ': '  + info.lstrip('(').rstrip(')')
1391 | 
1392 |         self.__message('error', lineno, msg + stack_msg)
1393 |         self.__fatal_error = True
1394 | 
1395 |     def __pdftex_fail(self):
1396 |         # Procedure pdftex_fail.  Prints "\n!pdfTeX error: ", the
1397 |         # message, and a newline.  Unlike print_err, there's never
1398 |         # context.
1399 |         msg = self.__consume_line(unwrap=True)[1:].strip()
1400 |         msg = self.__simplify_message(msg)
1401 |         self.__message('error', None, msg)
1402 | 
1403 |     def __runaway(self):
1404 |         # Procedure runaway.  Prints "\nRunaway ...\n" possibly
1405 |         # followed by token list (user text).  Always followed by a
1406 |         # call to print_err, so skip lines until we see the print_err.
1407 |         self.__skip_line()      # Skip "Runaway ...\n"
1408 |         if not self.__lookingat('! ') and self.__avail:
1409 |             # Skip token list, which is limited to one line
1410 |             self.__skip_line()
1411 | 
1412 |     def __bad_box(self):
1413 |         # Function hpack and vpack.  hpack prints a warning, a
1414 |         # newline, then a short_display of the offending text.
1415 |         # Unfortunately, there's nothing indicating the end of the
1416 |         # offending text, but it should be on one (possible wrapped)
1417 |         # line.  vpack prints a warning and then, *unless output is
1418 |         # active*, a newline.  The missing newline is probably a bug,
1419 |         # but it sure makes our lives harder.
1420 |         origpos = self.__pos
1421 |         msg = self.__consume_line()
1422 |         m = re.search(r' in (?:paragraph|alignment) at lines ([0-9]+)--([0-9]+)', msg) or \
1423 |             re.search(r' detected at line ([0-9]+)', msg)
1424 |         if m:
1425 |             # Sometimes TeX prints crazy line ranges like "at lines
1426 |             # 8500--250".  The lower number seems roughly sane, so use
1427 |             # that.  I'm not sure what causes this, but it may be
1428 |             # related to shipout routines messing up line registers.
1429 |             lineno = min(int(m.group(1)), int(m.groups()[-1]))
1430 |             msg = msg[:m.start()]
1431 |         else:
1432 |             m = re.search(r' while \\output is active', msg)
1433 |             if m:
1434 |                 lineno = None
1435 |                 msg = msg[:m.end()]
1436 |             else:
1437 |                 self.__message('warning', None,
1438 |                                'malformed bad box message in log')
1439 |                 return
1440 |         # Back up to the end of the known message text
1441 |         self.__pos = origpos + m.end()
1442 |         if self.__lookingat('\n'):
1443 |             # We have a newline, so consume it and look for the
1444 |             # offending text.
1445 |             self.__pos += 1
1446 |             # If there is offending text, it will start with a font
1447 |             # name, which will start with a \.
1448 |             if 'hbox' in msg and self.__lookingat('\\'):
1449 |                 self.__consume_line(unwrap=True)
1450 |         msg = self.__simplify_message(msg) + ' (page {})'.format(self.__pageno)
1451 |         cls = msg.split(None, 1)[0].lower()
1452 |         self.__message('warning', lineno, msg, cls=cls)
1453 | 
1454 |     def __generic_warning(self):
1455 |         # Warnings produced by LaTeX's \GenericWarning (which is
1456 |         # called by \{Package,Class}Warning and \@latex@warning),
1457 |         # warnings produced by pdftex_warn, and other random warnings.
1458 |         msg, cls = self.__generic_info()
1459 |         # Most warnings include an input line emitted by \on@line
1460 |         m = re.search(' on input line ([0-9]+)', msg)
1461 |         if m:
1462 |             lineno = int(m.group(1))
1463 |             msg = msg[:m.start()]
1464 |         else:
1465 |             lineno = None
1466 |         msg = self.__simplify_message(msg)
1467 |         self.__message('warning', lineno, msg, cls=cls)
1468 | 
1469 |     def __generic_info(self):
1470 |         # Messages produced by LaTeX's \Generic{Error,Warning,Info}
1471 |         # and things that look like them
1472 |         msg = self.__consume_line(unwrap=True).strip()
1473 |         # Package and class messages are continued with lines
1474 |         # containing '(package name)            '
1475 |         pkg_name = msg.split(' ', 2)[1]
1476 |         prefix = '(' + pkg_name + ')            '
1477 |         while self.__lookingat(prefix):
1478 |             # Collect extra lines.  It's important that we keep these
1479 |             # because they may contain context information like line
1480 |             # numbers.
1481 |             extra = self.__consume_line(unwrap=True)
1482 |             msg += ' ' + extra[len(prefix):].strip()
1483 |         return msg, pkg_name.lower()
1484 | 
1485 | ##################################################################
1486 | # BibTeX task
1487 | #
1488 | 
1489 | class BibTeX(Task):
1490 |     def __init__(self, db, latex_task, cmd, cmd_args, nowarns, obj_dir):
1491 |         super().__init__(db, 'bibtex::' + normalize_input_path(
1492 |             latex_task.get_tex_filename()))
1493 |         self.__latex_task = latex_task
1494 |         self.__cmd = cmd
1495 |         self.__cmd_args = cmd_args
1496 |         self.__obj_dir = obj_dir
1497 | 
1498 |     def stable(self):
1499 |         # If bibtex doesn't have its inputs, then it's stable because
1500 |         # it has no effect on system state.
1501 |         jobname = self.__latex_task.get_jobname()
1502 |         if jobname is None:
1503 |             # We don't know where the .aux file is until latex has run
1504 |             return True
1505 |         if not os.path.exists(jobname + '.aux'):
1506 |             # Input isn't ready, so bibtex will simply fail without
1507 |             # affecting system state.  Hence, this task is trivially
1508 |             # stable.
1509 |             return True
1510 |         if not self.__find_bib_cmds(os.path.dirname(jobname), jobname + '.aux'):
1511 |             # The tex file doesn't refer to any bibliographic data, so
1512 |             # don't run bibtex.
1513 |             return True
1514 | 
1515 |         return super().stable()
1516 | 
1517 |     def __find_bib_cmds(self, basedir, auxname, stack=()):
1518 |         debug('scanning for bib commands in {}'.format(auxname))
1519 |         if auxname in stack:
1520 |             raise TaskError('.aux file loop')
1521 |         stack = stack + (auxname,)
1522 | 
1523 |         try:
1524 |             aux_data = open(auxname, errors='surrogateescape').read()
1525 |         except FileNotFoundError:
1526 |             # The aux file may not exist if latex aborted
1527 |             return False
1528 |         if re.search(r'^\\bibstyle\{', aux_data, flags=re.M) or \
1529 |            re.search(r'^\\bibdata\{',  aux_data, flags=re.M):
1530 |             return True
1531 | 
1532 |         if re.search(r'^\\abx@aux@cite\{', aux_data, flags=re.M):
1533 |             # biber citation
1534 |             return True
1535 | 
1536 |         # Recurse into included aux files (see aux_input_command), in
1537 |         # case \bibliography appears in an \included file.
1538 |         for m in re.finditer(r'^\\@input\{([^}]*)\}', aux_data, flags=re.M):
1539 |             if self.__find_bib_cmds(basedir, os.path.join(basedir, m.group(1)),
1540 |                                     stack):
1541 |                 return True
1542 | 
1543 |         return False
1544 | 
1545 |     def _input_args(self):
1546 |         if self.__is_biber():
1547 |             aux_name = os.path.basename(self.__latex_task.get_jobname())
1548 |         else:
1549 |             aux_name = os.path.basename(self.__latex_task.get_jobname()) + '.aux'
1550 |         return [self.__cmd] + self.__cmd_args + [aux_name]
1551 | 
1552 |     def _input_cwd(self):
1553 |         return os.path.dirname(self.__latex_task.get_jobname())
1554 | 
1555 |     def _input_auxfile(self, auxname):
1556 |         # We don't consider the .aux files regular inputs.
1557 |         # Instead, we extract just the bit that BibTeX cares about
1558 |         # and depend on that.  See get_aux_command_and_process in
1559 |         # bibtex.web.
1560 |         debug('hashing filtered aux file {}', auxname)
1561 |         try:
1562 |             with open(auxname, 'rb') as aux:
1563 |                 h = hashlib.sha256()
1564 |                 for line in aux:
1565 |                     if line.startswith((b'\\citation{', b'\\bibdata{',
1566 |                                         b'\\bibstyle{', b'\\@input{',
1567 |                                         b'\\abx@aux@cite{')):
1568 |                         h.update(line)
1569 |                 return h.hexdigest()
1570 |         except FileNotFoundError:
1571 |             debug('{} does not exist', auxname)
1572 |             return None
1573 | 
1574 |     def __path_join(self, first, rest):
1575 |         if rest is None:
1576 |             # Append ':' to keep the default search path
1577 |             return first + ':'
1578 |         return first + ':' + rest
1579 | 
1580 |     def __is_biber(self):
1581 |         return "biber" in self.__cmd
1582 | 
1583 |     def _execute(self):
1584 |         # This gets complicated when \include is involved.  \include
1585 |         # switches to a different aux file and records its path in the
1586 |         # main aux file.  However, BibTeX does not consider this path
1587 |         # to be relative to the location of the main aux file, so we
1588 |         # have to run BibTeX *in the output directory* for it to
1589 |         # follow these includes (there's no way to tell BibTeX other
1590 |         # locations to search).  Unfortunately, this means BibTeX will
1591 |         # no longer be able to find local bib or bst files, but so we
1592 |         # tell it where to look by setting BIBINPUTS and BSTINPUTS
1593 |         # (luckily we can control this search).  We have to pass this
1594 |         # same environment down to Kpathsea when we resolve the paths
1595 |         # in BibTeX's log.
1596 |         args, cwd = self._input('args'), self._input('cwd')
1597 |         debug('running {} in {}', args, cwd)
1598 | 
1599 |         env = os.environ.copy()
1600 |         env['BIBINPUTS'] = self.__path_join(os.getcwd(), env.get('BIBINPUTS'))
1601 |         env['BSTINPUTS'] = self.__path_join(os.getcwd(), env.get('BSTINPUTS'))
1602 | 
1603 |         try:
1604 |             verbose_cmd(args, cwd, env)
1605 |             p = subprocess.Popen(args, cwd=cwd, env=env,
1606 |                                  stdin=subprocess.DEVNULL,
1607 |                                  stdout=subprocess.PIPE,
1608 |                                  stderr=subprocess.STDOUT)
1609 |             stdout = self.__feed_terminal(p.stdout)
1610 |             status = p.wait()
1611 |         except OSError as e:
1612 |             raise TaskError('failed to execute bibtex task: ' + str(e)) from e
1613 | 
1614 |         inputs, auxnames, outbase = self.__parse_inputs(stdout, cwd, env)
1615 |         if not inputs and not auxnames:
1616 |             # BibTeX failed catastrophically.
1617 |             print(stdout, file=sys.stderr)
1618 |             raise TaskError('failed to execute bibtex task')
1619 | 
1620 |         # Register environment variable inputs
1621 |         for env_var in ['TEXMFOUTPUT', 'BSTINPUTS', 'BIBINPUTS', 'PATH']:
1622 |             self._input('env', env_var)
1623 | 
1624 |         # Register file inputs
1625 |         for path in auxnames:
1626 |             self._input('auxfile', path)
1627 |         for path in inputs:
1628 |             self._input('file', path)
1629 | 
1630 |         if self.__is_biber():
1631 |             outbase = os.path.join(cwd, outbase)
1632 |         outputs = [outbase + '.bbl', outbase + '.blg']
1633 |         return RunResult(outputs, {'outbase': outbase, 'status': status,
1634 |                                    'inputs': inputs})
1635 | 
1636 |     def __feed_terminal(self, stdout):
1637 |         with Progress('bibtex') as progress:
1638 |             buf, linebuf = [], ''
1639 |             while True:
1640 |                 data = os.read(stdout.fileno(), 4096)
1641 |                 if not data:
1642 |                     break
1643 |                 # See "A note about encoding" above
1644 |                 data = data.decode('ascii', errors='surrogateescape')
1645 |                 buf.append(data)
1646 |                 linebuf += data
1647 |                 while '\n' in linebuf:
1648 |                     line, _, linebuf = linebuf.partition('\n')
1649 |                     if line.startswith('Database file'):
1650 |                         progress.update(line.split(': ', 1)[1])
1651 |         return ''.join(buf)
1652 | 
1653 |     def __parse_inputs(self, log, cwd, env):
1654 |         # BibTeX conveniently logs every file that it opens, and its
1655 |         # log is actually sensible (see calls to a_open_in in
1656 |         # bibtex.web.)  The only trick is that these file names are
1657 |         # pre-kpathsea lookup and may be relative to the directory we
1658 |         # ran BibTeX in.
1659 |         #
1660 |         # Because BibTeX actually depends on very little in the .aux
1661 |         # file (and it's likely other things will change in the .aux
1662 |         # file), we don't count the whole .aux file as an input, but
1663 |         # instead depend only on the lines that matter to BibTeX.
1664 |         kpathsea = Kpathsea('bibtex')
1665 |         inputs = []
1666 |         auxnames = []
1667 |         outbase = None
1668 |         for line in log.splitlines():
1669 |             m = re.match('(?:The top-level auxiliary file:'
1670 |                          '|A level-[0-9]+ auxiliary file:) (.*)', line)
1671 |             if m:
1672 |                 auxnames.append(os.path.join(cwd, m.group(1)))
1673 |                 continue
1674 |             m = re.match('(?:(The style file:)|(Database file #[0-9]+:)) (.*)',
1675 |                          line)
1676 |             if m:
1677 |                 filename = m.group(3)
1678 |                 if m.group(1):
1679 |                     filename = kpathsea.find_file(filename, 'bst', cwd, env)
1680 |                 elif m.group(2):
1681 |                     filename = kpathsea.find_file(filename, 'bib', cwd, env)
1682 | 
1683 |                 # If this path is relative to the source directory,
1684 |                 # clean it up for error reporting and portability of
1685 |                 # the dependency DB
1686 |                 if filename.startswith('/'):
1687 |                     relname = os.path.relpath(filename)
1688 |                     if '../' not in relname:
1689 |                         filename = relname
1690 | 
1691 |                 inputs.append(filename)
1692 | 
1693 |             # biber output
1694 |             m = re.search("Found BibTeX data source '(.*?)'",
1695 |                          line)
1696 |             if m:
1697 |                 filename = m.group(1)
1698 |                 inputs.append(filename)
1699 | 
1700 |             m = re.search("Logfile is '(.*?)'", line)
1701 |             if m:
1702 |                 outbase = m.group(1)[:-4]
1703 | 
1704 |         if outbase is None:
1705 |             outbase = auxnames[0][:-4]
1706 | 
1707 |         return inputs, auxnames, outbase
1708 | 
1709 |     def report(self):
1710 |         extra = self._get_result_extra()
1711 |         if extra is None:
1712 |             return 0
1713 | 
1714 |         # Parse and pretty-print the log
1715 |         log = open(extra['outbase'] + '.blg', 'rt').read()
1716 |         inputs = extra['inputs']
1717 |         for msg in BibTeXFilter(log, inputs).get_messages():
1718 |             msg.emit()
1719 | 
1720 |         # BibTeX exits with 1 if there are warnings, 2 if there are
1721 |         # errors, and 3 if there are fatal errors (sysdep.h).
1722 |         # Translate to a normal UNIX exit status.
1723 |         if extra['status'] >= 2:
1724 |             return 1
1725 |         return 0
1726 | 
1727 | class BibTeXFilter:
1728 |     def __init__(self, data, inputs):
1729 |         self.__inputs = inputs
1730 |         self.__key_locs = None
1731 | 
1732 |         self.__messages = []
1733 | 
1734 |         prev_line = ''
1735 |         for line in data.splitlines():
1736 |             msg = self.__process_line(prev_line, line)
1737 |             if msg is not None:
1738 |                 self.__messages.append(Message(*msg))
1739 |             prev_line = line
1740 | 
1741 |     def get_messages(self):
1742 |         """Return a list of warning and error Messages."""
1743 |         # BibTeX reports most errors in no particular order.  Sort by
1744 |         # file and line.
1745 |         return sorted(self.__messages,
1746 |                       key=lambda msg: (msg.filename or '', msg.lineno or 0))
1747 | 
1748 |     def __process_line(self, prev_line, line):
1749 |         m = None
1750 |         def match(regexp):
1751 |             nonlocal m
1752 |             m = re.match(regexp, line)
1753 |             return m
1754 | 
1755 |         # BibTeX has many error paths, but luckily the set is closed,
1756 |         # so we can find all of them.  This first case is the
1757 |         # workhorse format.
1758 |         #
1759 |         # AUX errors: aux_err/aux_err_return/aux_err_print
1760 |         #
1761 |         # BST errors: bst_ln_num_print/bst_err/
1762 |         # bst_err_print_and_look_for_blank_line_return/
1763 |         # bst_warn_print/bst_warn/
1764 |         # skip_token/skip_token_print/
1765 |         # bst_ext_warn/bst_ext_warn_print/
1766 |         # bst_ex_warn/bst_ex_warn_print/
1767 |         # bst_mild_ex_warn/bst_mild_ex_warn_print/
1768 |         # bst_string_size_exceeded
1769 |         #
1770 |         # BIB errors: bib_ln_num_print/
1771 |         # bib_err_print/bib_err/
1772 |         # bib_warn_print/bib_warn/
1773 |         # bib_one_of_two_expected_err/macro_name_warning/
1774 |         if match('(.*?)---?line ([0-9]+) of file (.*)'):
1775 |             # Sometimes the real error is printed on the previous line
1776 |             if m.group(1) == 'while executing':
1777 |                 # bst_ex_warn.  The real message is on the previous line
1778 |                 text = prev_line
1779 |             else:
1780 |                 text = m.group(1) or prev_line
1781 |             typ, msg = self.__canonicalize(text)
1782 |             return (typ, m.group(3), int(m.group(2)), msg)
1783 | 
1784 |         # overflow/print_overflow
1785 |         if match('Sorry---you\'ve exceeded BibTeX\'s (.*)'):
1786 |             return ('error', None, None, 'capacity exceeded: ' + m.group(1))
1787 |         # confusion/print_confusion
1788 |         if match('(.*)---this can\'t happen$'):
1789 |             return ('error', None, None, 'internal error: ' + m.group(1))
1790 |         # aux_end_err
1791 |         if match('I found (no .*)---while reading file (.*)'):
1792 |             return ('error', m.group(2), None, m.group(1))
1793 |         # bad_cross_reference_print/
1794 |         # nonexistent_cross_reference_error/
1795 |         # @<Complain about a nested cross reference@>
1796 |         #
1797 |         # This is split across two lines.  Match the second.
1798 |         if match('^refers to entry "'):
1799 |             typ, msg = self.__canonicalize(prev_line + ' ' + line)
1800 |             msg = re.sub('^a (bad cross reference)', '\\1', msg)
1801 |             # Try to give this key a location
1802 |             filename = lineno = None
1803 |             m2 = re.search(r'--entry "[^"]"', prev_line)
1804 |             if m2:
1805 |                 filename, lineno = self.__find_key(m2.group(1))
1806 |             return (typ, filename, lineno, msg)
1807 |         # print_missing_entry
1808 |         if match('Warning--I didn\'t find a database entry for (".*")'):
1809 |             return ('warning', None, None,
1810 |                     'no database entry for ' + m.group(1))
1811 |         # x_warning
1812 |         if match('Warning--(.*)'):
1813 |             # Most formats give warnings about "something in <key>".
1814 |             # Try to match it up.
1815 |             filename = lineno = None
1816 |             for m2 in reversed(list(re.finditer(r' in ([^, \t\n]+)\b', line))):
1817 |                 if m2:
1818 |                     filename, lineno = self.__find_key(m2.group(1))
1819 |                     if filename:
1820 |                         break
1821 |             return ('warning', filename, lineno, m.group(1))
1822 |         # @<Clean up and leave@>
1823 |         if match('Aborted at line ([0-9]+) of file (.*)'):
1824 |             return ('info', m.group(2), int(m.group(1)), 'aborted')
1825 | 
1826 |         # biber type errors
1827 |         if match('^.*> WARN - (.*)$'):
1828 |             print ('warning', None, None, m.group(1))
1829 |             m2 = re.match("(.*) in file '(.*?)', skipping ...", m.group(1))
1830 |             if m2:
1831 |                 return ('warning', m2.group(2), "0", m2.group(1))
1832 |             return ('warning', None, None, m.group(1))
1833 | 
1834 |         if match('^.*> ERROR - (.*)$'):
1835 |             m2 = re.match("BibTeX subsystem: (.*?), line (\d+), (.*)$", m.group(1))
1836 |             if m2:
1837 |                 return ('error', m2.group(1), m2.group(2), m2.group(3))
1838 |             return ('error', None, None, m.group(1))
1839 | 
1840 | 
1841 |     def __canonicalize(self, msg):
1842 |         if msg.startswith('Warning'):
1843 |             msg = re.sub('^Warning-*', '', msg)
1844 |             typ = 'warning'
1845 |         else:
1846 |             typ = 'error'
1847 |         msg = re.sub('^I(\'m| was)? ', '', msg)
1848 |         msg = msg[:1].lower() + msg[1:]
1849 |         return typ, msg
1850 | 
1851 |     def __find_key(self, key):
1852 |         if self.__key_locs is None:
1853 |             p = BibTeXKeyParser()
1854 |             self.__key_locs = {}
1855 |             for filename in self.__inputs:
1856 |                 data = open(filename, 'rt', errors='surrogateescape').read()
1857 |                 for pkey, lineno in p.parse(data):
1858 |                     self.__key_locs.setdefault(pkey, (filename, lineno))
1859 |         return self.__key_locs.get(key, (None, None))
1860 | 
1861 | class BibTeXKeyParser:
1862 |     """Just enough of a BibTeX parser to find keys."""
1863 | 
1864 |     def parse(self, data):
1865 |         IDENT_RE = '(?![0-9])([^\x00-\x20\x80-\xff \t"#%\'(),={}]+)'
1866 |         self.__pos, self.__data = 0, data
1867 |         # Find the next entry
1868 |         while self.__consume('[^@]*@[ \t\n]*'):
1869 |             # What type of entry?
1870 |             if not self.__consume(IDENT_RE + '[ \t\n]*'):
1871 |                 continue
1872 |             typ = self.__m.group(1)
1873 |             if typ == 'comment':
1874 |                 continue
1875 |             start = self.__pos
1876 |             if not self.__consume('([{(])[ \t\n]*'):
1877 |                 continue
1878 |             closing, key_re = {'{' : ('}', '([^, \t\n}]*)'),
1879 |                                '(' : (')', '([^, \t\n]*)')}[self.__m.group(1)]
1880 |             if typ not in ('preamble', 'string'):
1881 |                 # Regular entry; get key
1882 |                 if self.__consume(key_re):
1883 |                     yield self.__m.group(1), self.__lineno()
1884 |             # Consume body of entry
1885 |             self.__pos = start
1886 |             self.__balanced(closing)
1887 | 
1888 |     def __consume(self, regexp):
1889 |         self.__m = re.compile(regexp).match(self.__data, self.__pos)
1890 |         if self.__m:
1891 |             self.__pos = self.__m.end()
1892 |         return self.__m
1893 | 
1894 |     def __lineno(self):
1895 |         return self.__data.count('\n', 0, self.__pos) + 1
1896 | 
1897 |     def __balanced(self, closing):
1898 |         self.__pos += 1
1899 |         level = 0
1900 |         skip = re.compile('[{}' + closing + ']')
1901 |         while True:
1902 |             m = skip.search(self.__data, self.__pos)
1903 |             if not m:
1904 |                 break
1905 |             self.__pos = m.end()
1906 |             ch = m.group(0)
1907 |             if level == 0 and ch == closing:
1908 |                 break
1909 |             elif ch == '{':
1910 |                 level += 1
1911 |             elif ch == '}':
1912 |                 level -= 1
1913 | 
1914 | class Kpathsea:
1915 |     def __init__(self, program_name):
1916 |         self.__progname = program_name
1917 | 
1918 |     def find_file(self, name, format, cwd=None, env=None):
1919 |         """Return the resolved path of 'name' or None."""
1920 | 
1921 |         args = ['kpsewhich', '-progname', self.__progname, '-format', format,
1922 |                 name]
1923 |         try:
1924 |             verbose_cmd(args, cwd, env)
1925 |             path = subprocess.check_output(
1926 |                 args, cwd=cwd, env=env, universal_newlines=True).strip()
1927 |         except subprocess.CalledProcessError as e:
1928 |             if e.returncode != 1:
1929 |                 raise
1930 |             return None
1931 |         if cwd is None:
1932 |             return path
1933 |         return os.path.join(cwd, path)
1934 | 
1935 | if __name__ == "__main__":
1936 |     main()
1937 | 


--------------------------------------------------------------------------------