├── .gitignore
├── 14882.css
├── CxxParser.hs
├── Document.hs
├── LICENSE
├── LaTeXBase.hs
├── LaTeXParser.hs
├── Load14882.hs
├── MathJax.hs
├── Pages.hs
├── README
├── RawDocument.hs
├── Render.hs
├── SectionPages.hs
├── Sentences.hs
├── Setup.hs
├── Toc.hs
├── Util.hs
├── colored.css
├── cxxdraft-htmlgen.cabal
├── expanded.css
├── fulltoc.css
├── genhtml.hs
├── icon.png
├── macros.tex
├── mathjax-batch
├── normative-only.css
├── stack.yaml
└── toc.css
/.gitignore:
--------------------------------------------------------------------------------
1 | 14882
2 | dist
3 | tags
4 | node_modules
5 | .stack-work/
6 |
--------------------------------------------------------------------------------
/14882.css:
--------------------------------------------------------------------------------
1 | body {
2 | font-family: 'Noto Serif';
3 | hyphens: auto;
4 | line-height: 1.5;
5 | margin-left: 20mm;
6 | margin-right: 16mm;
7 | margin-top: 12mm;
8 | margin-bottom: 12mm;
9 | font-size: 10pt;
10 | }
11 |
12 | div {
13 | background: inherit;
14 | }
15 |
16 | div.wrapper {
17 | max-width: 20cm;
18 | margin: auto;
19 | }
20 |
21 | div.texpara {
22 | margin-top: 3pt;
23 | margin-bottom: 3pt;
24 | }
25 |
26 | table div.texpara {
27 | margin-top: 0;
28 | margin-bottom: 0;
29 | }
30 |
31 | table.enumerate div.texpara {
32 | margin-top: 3pt;
33 | margin-bottom: 3pt;
34 | }
35 |
36 | ul {
37 | list-style-type: none;
38 | padding-left: 9mm;
39 | margin-top: 0;
40 | margin-bottom: 0;
41 | }
42 |
43 | ol {
44 | margin-top: 0;
45 | margin-bottom: 0;
46 | }
47 |
48 | a { text-decoration: none; }
49 |
50 | a.hidden_link {
51 | text-decoration: none;
52 | color: inherit;
53 | }
54 |
55 | li {
56 | margin-top: 3pt;
57 | margin-bottom: 3pt;
58 | }
59 |
60 | h1 {
61 | line-height: 1;
62 | margin-top: 10pt;
63 | margin-bottom: 10pt;
64 | }
65 |
66 | h2 {
67 | line-height: 1;
68 | font-size: 14pt;
69 | margin-top: 10pt;
70 | margin-bottom: 10pt;
71 | }
72 |
73 | h2::after {
74 | content: "";
75 | clear: both;
76 | display: table;
77 | }
78 |
79 | h3 {
80 | line-height: 1;
81 | margin-top: 10pt;
82 | margin-bottom: 10pt;
83 | }
84 |
85 | h3::after {
86 | content: "";
87 | clear: both;
88 | display: table;
89 | }
90 |
91 | h4 {
92 | line-height: 1;
93 | margin-top: 10pt;
94 | margin-bottom: 10pt;
95 | }
96 |
97 | h4::after {
98 | content: "";
99 | clear: both;
100 | display: table;
101 | }
102 |
103 | ul > li:before {
104 | content: "\2014";
105 | position: absolute;
106 | margin-left: -1.5em;
107 | }
108 |
109 | .shy:before {
110 | content: "\00ad";
111 | /* This is U+00AD SOFT HYPHEN, same as ­, but we put it in :before
112 | to stop it from being included when the text is copied to the clipboard
113 | with Firefox, which is especially annoying when copying to a terminal,
114 | where the hyphen characters will show up. */
115 | }
116 |
117 | :target { background-color: #C9FBC9; }
118 | :target .codeblock { background-color: #C9FBC9; }
119 | :target ul { background-color: #C9FBC9; }
120 |
121 | .abbr_ref { float: right; }
122 |
123 | .folded_abbr_ref { float: right; }
124 | :target .folded_abbr_ref { display: none; }
125 |
126 | :target .unfolded_abbr_ref { float: right; display: inherit; }
127 | .unfolded_abbr_ref { display: none; }
128 |
129 | .secnum { display: inline-block; min-width: 35pt; }
130 | .annexnum { display: block; }
131 |
132 | div.sourceLinkParent {
133 | float: right;
134 | }
135 |
136 | a.sourceLink {
137 | position: absolute;
138 | opacity: 0;
139 | margin-left: 10pt;
140 | }
141 |
142 | a.sourceLink:hover {
143 | opacity: 1;
144 | }
145 |
146 | a.itemDeclLink {
147 | position: absolute;
148 | font-size: 75%;
149 | text-align: right;
150 | width: 5em;
151 | opacity: 0;
152 | }
153 | a.itemDeclLink:hover { opacity: 1; }
154 |
155 | div.marginalizedparent {
156 | position: relative;
157 | text-align: left;
158 | left: -18mm;
159 | }
160 |
161 | a.marginalized {
162 | width: 15mm;
163 | position: absolute;
164 | font-size: 7pt;
165 | text-align: right;
166 | }
167 |
168 | a.enumerated_item_num {
169 | display: block;
170 | margin-top: 3pt;
171 | margin-bottom: 3pt;
172 | margin-right: 6pt;
173 | }
174 |
175 | div.para {
176 | margin-bottom: 6pt;
177 | margin-top: 6pt;
178 | text-align: justify;
179 | min-height: 1.2em;
180 | }
181 |
182 | div.section { text-align: justify; }
183 | div.sentence { display: inline; }
184 |
185 | a.index {
186 | position: relative;
187 | float: right;
188 | right: -1em;
189 | display: none;
190 | }
191 |
192 | a.index:before {
193 | position: absolute;
194 | content: "⟵";
195 | background-color: #C9FBC9;
196 | }
197 |
198 | a.index:target {
199 | display: inline;
200 | }
201 |
202 | .indexitems {
203 | margin-left: 2em;
204 | text-indent: -2em;
205 | }
206 |
207 | div.itemdescr {
208 | margin-left: 12mm;
209 | }
210 |
211 | .bnf {
212 | font-family: 'Noto Sans';
213 | font-size: 10pt;
214 | font-style: italic;
215 | margin-left: 25pt;
216 | margin-right: -15mm;
217 | margin-top: 0.5em;
218 | margin-bottom: 0.5em;
219 | text-indent: -3em;
220 | padding-left: 3em;
221 | line-height: 1.5;
222 | }
223 |
224 | div.bnf span.texttt { font-family: 'Noto Sans Mono'; font-style: normal; }
225 |
226 | .rebnf {
227 | font-family: 'Noto Serif';
228 | font-style: italic;
229 | margin-top: 0.5em;
230 | margin-bottom: 0.5em;
231 | margin-left: 30pt;
232 | text-indent: -3em;
233 | padding-left: 3em;
234 | line-height: 1.5;
235 | }
236 |
237 | .simplebnf {
238 | font-family: 'Noto Serif';
239 | font-style: italic;
240 | font-size: 10pt;
241 | margin-top: 0.5em;
242 | margin-bottom: 0.5em;
243 | margin-left: 30pt;
244 | line-height: 1.5;
245 | }
246 |
247 | span.textnormal {
248 | font-style: normal;
249 | font-family: 'Noto Serif';
250 | font-size: 10pt;
251 | white-space: normal;
252 | }
253 |
254 | .bnf span.textnormal {
255 | font-style: normal;
256 | font-family: 'Noto Serif';
257 | font-size: 10pt;
258 | white-space: normal;
259 | }
260 |
261 | p {
262 | margin-top: 4pt;
263 | margin-bottom: 4pt;
264 | }
265 |
266 | span.rlap {
267 | display: inline-block;
268 | width: 0px;
269 | text-indent: 0;
270 | }
271 |
272 | span.terminal {
273 | font-family: 'Noto Sans Mono';
274 | font-style: normal;
275 | font-size: 9pt;
276 | white-space: pre-wrap;
277 | }
278 |
279 | span.noncxxterminal {
280 | font-family: 'Noto Sans Mono';
281 | font-style: normal;
282 | font-size: 9pt;
283 | }
284 |
285 | span.term { font-style: italic; }
286 | span.tcode { font-family: 'Noto Sans Mono'; font-style: normal; }
287 | span.textbf { font-weight: bold; }
288 | span.textsf { font-family: 'Noto Sans'; font-size: 10pt; }
289 | div.footnote span.textsf { font-family: 'Noto Sans'; font-size: 8pt; }
290 | .bnf span.textsf { font-family: 'Noto Sans'; font-size: 10pt; }
291 | .simplebnf span.textsf { font-family: 'Noto Sans'; font-size: 10pt; }
292 | .example span.textsf { font-family: 'Noto Sans'; font-size: 10pt; }
293 | span.textsc { font-variant: small-caps; }
294 | span.nontermdef { font-style: italic; font-family: 'Noto Sans'; font-size: 10pt; }
295 | span.emph { font-style: italic; }
296 | span.techterm { font-style: italic; }
297 | span.mathit { font-style: italic; }
298 | span.mathsf { font-family: 'Noto Sans'; }
299 | span.mathrm { font-family: 'Noto Serif'; font-style: normal; }
300 | span.textrm { font-family: 'Noto Serif'; font-size: 10pt; }
301 | span.textsl { font-style: italic; }
302 | span.mathtt { font-family: 'Noto Sans Mono'; font-style: normal; }
303 | span.mbox { font-family: 'Noto Serif'; font-style: normal; }
304 | span.ungap { display: inline-block; width: 2pt; }
305 | span.texttt { font-family: 'Noto Sans Mono'; }
306 | span.textit { font-style: italic; }
307 | div.footnote span.texttt { font-family: 'Noto Sans Mono'; }
308 | span.tcode_in_codeblock { font-family: 'Noto Sans Mono'; font-style: normal; font-size: 9pt; }
309 |
310 | span.phantom { color: white; }
311 | /* Unfortunately, this way the text is still selectable. Another
312 | option is display:none, but then we lose the nice layout.
313 | Todo: find proper solution. */
314 |
315 | span.math {
316 | font-style: normal;
317 | font-family: 'Noto Serif';
318 | font-size: 10pt;
319 | }
320 |
321 | span.mathblock {
322 | display: block;
323 | margin-left: auto;
324 | margin-right: auto;
325 | margin-top: 1.2em;
326 | margin-bottom: 1.2em;
327 | text-align: center;
328 | }
329 |
330 | span.mathalpha {
331 | font-style: italic;
332 | }
333 |
334 | span.synopsis {
335 | font-weight: bold;
336 | margin-top: 0.5em;
337 | display: block;
338 | }
339 |
340 | span.definition {
341 | font-weight: bold;
342 | display: block;
343 | }
344 |
345 | .codeblock {
346 | font-family: 'Noto Sans Mono';
347 | margin-left: 1.2em;
348 | line-height: 1.5;
349 | font-size: 9pt;
350 | white-space: pre;
351 | display: block;
352 | margin-top: 3pt;
353 | margin-bottom: 3pt;
354 | overflow: auto;
355 | margin-right: -15mm;
356 | }
357 |
358 | table .codeblock { margin-right: 0; }
359 |
360 | .outputblock {
361 | margin-left: 1.2em;
362 | line-height: 1.5;
363 | font-family: 'Noto Sans Mono';
364 | font-size: 9pt;
365 | }
366 |
367 | code {
368 | font-family: 'Noto Sans Mono';
369 | font-style: normal;
370 | }
371 |
372 | div.itemdecl {
373 | margin-top: 2ex;
374 | }
375 |
376 | code.itemdeclcode {
377 | white-space: pre;
378 | font-family: 'Noto Sans Mono';
379 | font-size: 9pt;
380 | display: block;
381 | overflow: auto;
382 | margin-right: -15mm;
383 | }
384 |
385 | .comment { color: green; font-style: italic; font-family: 'Noto Serif'; font-size: 10pt; }
386 | .footnote .comment { color: green; font-style: italic; font-family: 'Noto Serif'; font-size: 8pt; }
387 | .example .comment { color: green; font-style: italic; font-family: 'Noto Serif'; font-size: 9pt; }
388 | .note .comment { color: green; font-style: italic; font-family: 'Noto Serif'; font-size: 9pt; }
389 |
390 | span.keyword { color: #00607c; font-style: normal; }
391 | span.parenthesis { color: #af1915; }
392 | span.curlybracket { color: #af1915; }
393 | span.squarebracket { color: #af1915; }
394 | span.literal { color: #9F6807; }
395 | span.literalterminal { color: #9F6807; font-family: 'Noto Sans Mono'; font-style: normal; }
396 | span.operator { color: #570057; }
397 | span.anglebracket { color: #570057; }
398 | span.preprocessordirective { color: #6F4E37; }
399 |
400 | span.textsuperscript {
401 | vertical-align: super;
402 | font-size: smaller;
403 | line-height: 0;
404 | }
405 |
406 | .footnoteref {
407 | vertical-align: super;
408 | font-size: smaller;
409 | line-height: 0;
410 | }
411 |
412 | .footnote {
413 | font-size: 8pt;
414 | }
415 |
416 | .footnote .math {
417 | font-size: 8pt;
418 | }
419 |
420 | .footnotenum {
421 | display: inline-block;
422 | text-align: right;
423 | margin-right: 1mm;
424 | width: 4ch;
425 | }
426 |
427 | .footnoteBacklink {
428 | display: none;
429 | }
430 |
431 | :target .footnoteBacklink {
432 | display: inline-block;
433 | text-align: right;
434 | margin-right: 1mm;
435 | width: 4ch;
436 | }
437 |
438 | :target .footnotenum {
439 | display: none;
440 | }
441 |
442 | .footnoteSeparator {
443 | background: black;
444 | margin-top: 5mm;
445 | height: 1px;
446 | width: 6cm;
447 | }
448 |
449 | div.minipage {
450 | display: inline-block;
451 | margin-right: 3em;
452 | }
453 |
454 | div.numberedTable {
455 | text-align: center;
456 | margin-left: 1em;
457 | margin-right: 1em;
458 | margin-bottom: 12pt;
459 | margin-top: 8pt;
460 | }
461 |
462 | div.figure {
463 | text-align: center;
464 | margin-left: 2em;
465 | margin-right: 2em;
466 | margin-bottom: 12pt;
467 | margin-top: 3pt;
468 | }
469 |
470 | table {
471 | border: 1px solid black;
472 | border-collapse: collapse;
473 | margin-left: auto;
474 | margin-right: auto;
475 | margin-top: 7pt;
476 | text-align: left;
477 | }
478 |
479 | td, th {
480 | padding-left: 8pt;
481 | padding-right: 8pt;
482 | vertical-align: top;
483 | }
484 |
485 | td.empty {
486 | padding: 0px;
487 | padding-left: 1px;
488 | }
489 |
490 | td.left {
491 | text-align: left;
492 | }
493 |
494 | td.hidden {
495 | padding: 0;
496 | width: 0;
497 | }
498 |
499 | td.right {
500 | text-align: right;
501 | }
502 |
503 | td.center {
504 | text-align: center;
505 | }
506 |
507 | td.justify {
508 | text-align: justify;
509 | }
510 |
511 | td.border {
512 | border-left: 1px solid black;
513 | }
514 |
515 | tr.rowsep, td.cline {
516 | border-top: 1px solid black;
517 | }
518 |
519 | tr.capsep {
520 | border-top: 3px solid black;
521 | border-top-style: double;
522 | }
523 |
524 | th {
525 | border-bottom: 1px solid black;
526 | }
527 |
528 | span.centry {
529 | font-weight: bold;
530 | }
531 |
532 | div.table {
533 | display: block;
534 | margin-left: auto;
535 | margin-right: auto;
536 | text-align: center;
537 | width: 90%;
538 | }
539 |
540 | span.indented {
541 | background: inherit;
542 | display: block;
543 | margin-left: 2em;
544 | margin-bottom: 1em;
545 | margin-top: 1em;
546 | }
547 |
548 | span.uppercase {
549 | text-transform: uppercase;
550 | }
551 |
552 | span.ucode {
553 | font-variant: small-caps;
554 | text-transform: uppercase;
555 | font-size: 90%;
556 | }
557 |
558 | span.uname {
559 | font-variant: small-caps;
560 | text-transform: uppercase;
561 | font-size: 90%;
562 | }
563 |
564 | table.enumerate {
565 | border: 0;
566 | margin: 0;
567 | }
568 |
569 | table.enumerate td {
570 | padding: 0;
571 | }
572 |
573 | table.enumerate td:first-child {
574 | width: 1cm;
575 | text-align: right;
576 | }
577 |
578 | @media (prefers-color-scheme: dark) {
579 | body {
580 | background-color: #171717;
581 | color: #d0d0d0;
582 | }
583 |
584 | span.mjx-mstyle { color: #d0d0d0 !important }
585 |
586 | a:link { color: #64adff; }
587 | a:visited { color: #a36ae6; }
588 |
589 | a.hidden_link {
590 | text-decoration: none;
591 | color: inherit;
592 | }
593 |
594 | span.phantom { color: #171717; }
595 |
596 | a.index:before { color: #d0d0d0; background-color: #4b6353; }
597 |
598 | .comment { color: #35da00; }
599 | .footnote .comment { color: #35da00; }
600 | .example .comment { color: #35da00; }
601 | .note .comment { color: #35da00; }
602 |
603 | span.keyword { color: #12cabe; }
604 | span.parenthesis { color: #ff1515; }
605 | span.curlybracket { color: #ff1515; }
606 | span.squarebracket { color: #ff1515; }
607 | span.literal { color: #dfa837; }
608 | span.literalterminal { color: #dfa837; }
609 | span.operator { color: #baa6b9; }
610 | span.anglebracket { color: #baa6b9; }
611 | span.preprocessordirective { color: #b27c58; }
612 |
613 | table { border-color: #d0d0d0; }
614 | td.border { border-color: #d0d0d0; }
615 | td.border { border-left-color: #d0d0d0; }
616 | tr.rowsep, td.cline { border-top-color: #d0d0d0; }
617 | tr.capsep { border-top-color: #d0d0d0; }
618 | th { border-bottom-color: #d0d0d0; }
619 |
620 | .footnoteSeparator { background-color: #d0d0d0; }
621 |
622 | text { fill: #d0d0d0; }
623 | path { stroke: #d0d0d0; }
624 | polygon { stroke: #d0d0d0; fill: #d0d0d0; }
625 | ellipse { stroke: #d0d0d0; }
626 |
627 | :target { background-color: #4b6345; color: #ffffff; }
628 | :target .codeblock { background-color: #4b6345; }
629 | :target ul { background-color: #4b6345; }
630 | :target a:link { color: #9fcdff; }
631 | :target a:visited { color: #d39aff; }
632 | :target a.hidden_link { text-decoration: none; color: inherit; }
633 | :target span.keyword { color: #32eade; }
634 | :target span.parenthesis { color: #ff4060; font-weight: bold; }
635 | :target span.curlybracket { color: #ff4060; font-weight: bold; }
636 | :target span.squarebracket { color: #ff4060; font-weight: bold; }
637 | :target span.literal { color: #f0d060; }
638 | :target span.literalterminal { color: #f0d060; }
639 | :target span.operator { color: #dac6d9; }
640 | :target span.anglebracket { color: #dac6d9; }
641 | :target span.preprocessordirective { color: #e0968f; }
642 | :target .comment { color: #55ff00; }
643 | :target .footnote .comment { color: #55ff00; }
644 | :target .example .comment { color: #55ff00; }
645 | :target .note .comment { color: #55ff00; }
646 | }
647 |
--------------------------------------------------------------------------------
/CxxParser.hs:
--------------------------------------------------------------------------------
1 | {-# OPTIONS_GHC -fno-warn-tabs #-}
2 | {-# LANGUAGE
3 | OverloadedStrings,
4 | RecordWildCards,
5 | TupleSections,
6 | ViewPatterns,
7 | LambdaCase,
8 | TypeSynonymInstances,
9 | FlexibleInstances #-}
10 |
11 | module CxxParser (parseLiteral, parseComment, parseCppDirective) where
12 |
13 | import LaTeXBase (LaTeX, LaTeXUnit(..), ArgKind(..), concatRaws,
14 | texStripPrefix, texStripAnyPrefix, texStripInfix, texSpan, unconsRaw)
15 | import qualified Data.Text as Text
16 | import Data.Char (isAlpha, isSpace, isAlphaNum, isDigit)
17 | import Control.Arrow (first)
18 | import Prelude hiding ((.), (++))
19 | import Util ((.), (++), Text)
20 |
21 | texStripHash :: LaTeX -> Maybe LaTeX
22 | texStripHash x
23 | | Just x' <- texStripPrefix "#" x = Just x'
24 | | TeXComm "#" _ [] : x' <- x = Just x'
25 | | otherwise = Nothing
26 |
27 | cppDirectives :: [Text]
28 | cppDirectives = Text.words "include define elifndef elifdef ifndef endif ifdef pragma error undef line elif warning else if embed"
29 |
30 | spanLiteralChars :: String -> Maybe (String, String {- rest without the closing ' -})
31 | spanLiteralChars [] = Nothing
32 | spanLiteralChars ('\\' : '\'' : rest) = first ("\\'"++) . spanLiteralChars rest
33 | spanLiteralChars ('\'' : x) = Just ([], x)
34 | spanLiteralChars (c : rest) = first (c :) . spanLiteralChars rest
35 |
36 | parseLiteralChars :: LaTeX -> Maybe (LaTeX, LaTeX)
37 | parseLiteralChars [] = Nothing
38 | parseLiteralChars (TeXRaw s : rest) = case spanLiteralChars (Text.unpack s) of
39 | Nothing -> Nothing
40 | Just (_, []) -> first (TeXRaw s :) . (parseLiteralChars rest)
41 | Just (x, more) -> Just ([TeXRaw (Text.pack x)], TeXRaw (Text.pack more) : rest)
42 | parseLiteralChars (x : rest) = first (x :) . (parseLiteralChars rest)
43 |
44 | parseCharLiteral :: LaTeX -> Maybe (LaTeX, LaTeX {- rest -})
45 | parseCharLiteral x
46 | | Just (pre, x') <- texStripAnyPrefix ["'", "u'", "L'", "U'", "u8'"] x
47 | , Just (before, x'') <- parseLiteralChars x'
48 | , (suffix, x''') <- texSpan (\c -> isAlphaNum c || c == '_') x''
49 | = Just ([TeXRaw pre] ++ before ++ [TeXRaw $ "'" ++ suffix], x''')
50 | | otherwise = Nothing
51 |
52 | parseCppDirective :: LaTeX -> Maybe (LaTeX, LaTeX {- rest -})
53 | parseCppDirective x
54 | | Just x'' <- texStripHash x
55 | , (spaces, x''') <- texSpan isSpace x''
56 | , Just (directive, x'''') <- texStripAnyPrefix cppDirectives x'''
57 | = Just ([TeXRaw ("#" ++ spaces ++ directive)], x'''')
58 | | otherwise = Nothing
59 |
60 | parseSingleLineComment :: LaTeX -> Maybe (LaTeX {- comment -}, LaTeX {- subsequent lines -})
61 | parseSingleLineComment x
62 | | Just x' <- texStripPrefix "//" x = Just $ case texStripInfix "\n" x' of
63 | Just (commentLine, moreLines) -> (TeXRaw "//" : commentLine, TeXRaw "\n" : moreLines)
64 | Nothing -> (x, [])
65 | | rlap@(TeXComm "rlap" _ [(FixArg, [TeXComm "textnormal" _ [(FixArg,[TeXComm "textit" _ [(FixArg,[TeXRaw "//"])]])]])]) : more <- x
66 | , Just (commentLine, moreLines) <- texStripInfix "\n" more
67 | = Just ([rlap, TeXComm "tcode" "" [(FixArg, commentLine)]], TeXRaw "\n" : moreLines)
68 | | TeXComm "comment" _ [(FixArg, c)] : x' <- x = Just (c, x')
69 | | otherwise = Nothing
70 |
71 | fromTeXRaw :: LaTeXUnit -> Text
72 | fromTeXRaw (TeXRaw x) = x
73 | fromTeXRaw x = error $ "fromTeXRaw (" ++ show x ++ ")"
74 |
75 | parseStringLiteral :: LaTeX -> Maybe (LaTeX, LaTeX {- rest -})
76 | parseStringLiteral x
77 | -- raw:
78 | | Just (pre, x') <- texStripAnyPrefix ["R\"", "u8R\"", "uR\"", "UR\"", "LR\""] x
79 | , Just (delim, x'') <- texStripInfix "(" x'
80 | , Just (body, x''') <- texStripInfix (")" ++ Text.concat (map fromTeXRaw delim) ++ "\"") (concatRaws $ f x'')
81 | , (suffix, x'''') <- texSpan (\c -> isAlphaNum c || c == '_') x'''
82 | = Just ([TeXRaw pre] ++ delim ++ [TeXRaw "("] ++ body ++ [TeXRaw ")"] ++ delim ++ [TeXRaw $ "\"" ++ suffix], x'''')
83 | -- normal:
84 | | Just (pre, x') <- texStripAnyPrefix ["\"", "u\"", "U\"", "L\"", "u8\""] x
85 | , Just (body, x'') <- parseBody x'
86 | , (suffix, x''') <- texSpan (\c -> isAlphaNum c || c == '_') x''
87 | = Just ([TeXRaw pre] ++ body ++ [TeXRaw $ "\"" ++ suffix], x''')
88 | | otherwise = Nothing
89 | where
90 | f :: LaTeX -> LaTeX
91 | f [] = []
92 | f (TeXComm "~" _ [] : more) = TeXRaw "~" : f more
93 | f (TeXBraces [] : more) = f more
94 | f (hd : t) = hd : f t
95 | parseBody :: LaTeX -> Maybe (LaTeX, LaTeX {- rest -})
96 | parseBody [] = Nothing
97 | parseBody (TeXComm "textbackslash" _ [] : more) = parseBody $ concatRaws $ TeXRaw "\\" : more
98 | parseBody (TeXRaw (Text.unpack -> raw) : more)
99 | | '\\':'"':t <- raw = first (TeXRaw "\\\"" :) . parseBody (TeXRaw (Text.pack t) : more)
100 | | "\"" <- raw = Just ([], more)
101 | | '"':t <- raw = Just ([], TeXRaw (Text.pack t) : more)
102 | | raw == "" = parseBody more
103 | | hd:t <- raw = first (TeXRaw (Text.pack [hd]) :) . parseBody (TeXRaw (Text.pack t) : more)
104 | parseBody (TeXComm "%" ws [] : more) = first (TeXComm "%" ws [] :) . parseBody more
105 | parseBody (y : more) = first (y :) . parseBody more
106 |
107 | parseNumber :: LaTeX -> Maybe (Text, LaTeX)
108 | parseNumber x
109 | | (raw, more) <- unconsRaw x
110 | , Just (n, rest) <- (parseStart `parseSeq` (\t -> Just (parseMany parseSuffix t))) raw
111 | = Just (n, TeXRaw rest : more)
112 | | otherwise = Nothing
113 | where
114 | parseDigit = parseChar isDigit
115 | parseNonDigit = parseChar (\c -> isAlpha c || c == '_')
116 | parseStart :: Text -> Maybe (Text, Text)
117 | parseStart = parseFirstOf [parseChar (== '.') `parseSeq` parseDigit, parseDigit]
118 | parseSign :: Text -> Maybe (Text, Text)
119 | parseSign = parseChar (\c -> c == '-' || c == '+')
120 | parseSuffix :: Text -> Maybe (Text, Text)
121 | parseSuffix = parseFirstOf
122 | [ parseDigit
123 | , parseChar (== '\'') `parseSeq` parseDigit
124 | , parseChar (== '\'') `parseSeq` parseNonDigit
125 | , parseChar (`elem` ("eEpP"::String)) `parseSeq` parseSign
126 | , parseChar (== '.')
127 | , parseNonDigit
128 | ]
129 |
130 | parseLiteral :: LaTeX -> Maybe (LaTeX, LaTeX)
131 | parseLiteral x
132 | | Just (number, x') <- parseNumber x = Just ([TeXRaw number], x')
133 | | Just (lit, x') <- parseCharLiteral x = Just (lit, x')
134 | | Just (lit, x') <- parseStringLiteral x = Just (lit, x')
135 | | otherwise = Nothing
136 |
137 | parseComment :: LaTeX -> Maybe (LaTeX, LaTeX)
138 | parseComment x
139 | | Just x' <- texStripPrefix "/*" x, Just (comment, x'') <- texStripInfix "*/" x'
140 | = Just ([TeXRaw "/*"] ++ comment ++ [TeXRaw "*/"], x'')
141 | | Just x' <- texStripPrefix "/*" x
142 | = Just ([TeXRaw "/*"], x')
143 | | Just x' <- texStripPrefix "*/" x
144 | = Just ([TeXRaw "*/"], x')
145 | | Just (comment, x') <- parseSingleLineComment x
146 | = Just (comment, x')
147 | | otherwise = Nothing
148 |
149 | parseChar :: (Char -> Bool) -> Text -> Maybe (Text, Text)
150 | parseChar p t
151 | | t /= "", p (Text.head t) = Just (Text.take 1 t, Text.drop 1 t)
152 | | otherwise = Nothing
153 |
154 | parseSeq :: (Text -> Maybe (Text, Text)) -> (Text -> Maybe (Text, Text)) -> Text -> Maybe (Text, Text)
155 | parseSeq p q t
156 | | Just (x, t') <- p t
157 | , Just (y, t'') <- q t' = Just (x ++ y, t'')
158 | | otherwise = Nothing
159 |
160 | parseFirstOf :: [Text -> Maybe (a, Text)] -> Text -> Maybe (a, Text)
161 | parseFirstOf [] _ = Nothing
162 | parseFirstOf (p:pp) t
163 | | Just r <- p t = Just r
164 | | otherwise = parseFirstOf pp t
165 |
166 | parseMany :: (Text -> Maybe (Text, Text)) -> Text -> (Text, Text)
167 | parseMany p t = case p t of
168 | Nothing -> ("", t)
169 | Just (x, t') -> first (x++) (parseMany p t')
170 |
--------------------------------------------------------------------------------
/Document.hs:
--------------------------------------------------------------------------------
1 | {-# OPTIONS_GHC -fno-warn-tabs #-}
2 | {-# LANGUAGE OverloadedStrings, RecordWildCards, ViewPatterns #-}
3 |
4 | module Document (
5 | CellSpan(..), Cell(..), RowSepKind(..), Row(..), Element(..), Paragraph(..),
6 | Section(..), Chapter(..), Draft(..), Table(..), Figure(..), Item(..), Footnote(..),
7 | IndexPath, IndexComponent(..), IndexCategory, Index, IndexTree, IndexNode(..),
8 | ColumnSpec(..), TextAlignment(..), normative, Formula(..), chapterOfSection,
9 | IndexEntry(..), IndexKind(..), Note(..), Example(..), TeXPara(..), Sentence(..),
10 | texParaTex, texParaElems, XrefDelta, sectionByAbbr, isDefinitionSection, Abbreviation,
11 | indexKeyContent, indexCatName, Sections(sections), SectionKind(..), mergeIndices, SourceLocation(..),
12 | figures, tables, tableByAbbr, figureByAbbr, formulaByAbbr, elemTex, footnotes, allElements,
13 | LaTeX, makeAbbrMap, formulas) where
14 |
15 | import LaTeXBase (LaTeXUnit(..), LaTeX, MathType(Dollar))
16 | import Data.Text (Text, replace)
17 | import qualified Data.Text as Text
18 | import qualified Data.List as List
19 | import Data.IntMap (IntMap)
20 | import Prelude hiding (take, (.), takeWhile, (++), lookup, readFile)
21 | import Data.Map (Map)
22 | import qualified Data.Map as Map
23 | import Data.String (IsString)
24 | import Util ((.), (++), greekAlphabet)
25 |
26 | -- Document structure:
27 |
28 | data CellSpan = Normal | Multicolumn { width :: Int, colspec :: ColumnSpec } deriving (Eq, Show)
29 | data Cell a = Cell { cellSpan :: CellSpan, content :: a } deriving (Eq, Show)
30 | data RowSepKind = RowSep | CapSep | Clines [(Int, Int)] | NoSep deriving (Eq, Show)
31 | data Row a = Row { rowSep :: RowSepKind, cells :: [Cell a] } deriving (Eq, Show)
32 |
33 | data TextAlignment = AlignLeft | AlignRight | AlignCenter | Justify
34 | deriving Eq
35 |
36 | instance Show TextAlignment where
37 | show AlignLeft = "left"
38 | show AlignRight = "right"
39 | show AlignCenter = "center"
40 | show Justify = "justify"
41 |
42 | data ColumnSpec = ColumnSpec
43 | { columnAlignment :: TextAlignment
44 | , columnBorder :: Bool
45 | , columnWidth :: Maybe Text}
46 | deriving (Eq, Show)
47 |
48 | data Table = Table
49 | { tableNumber :: Int
50 | , tableCaption :: LaTeX
51 | , columnSpec :: [ColumnSpec]
52 | , tableAbbr :: Abbreviation
53 | , tableBody :: [Row [TeXPara]]
54 | , tableSection :: Section }
55 |
56 | instance Show Table where
57 | show _ = "
"
58 |
59 | data Figure = Figure
60 | { figureNumber :: Int
61 | , figureName :: LaTeX
62 | , figureAbbr :: Abbreviation
63 | , figureSvg :: Text
64 | , figureSection :: Section }
65 |
66 | instance Show Figure where
67 | show _ = ""
68 |
69 | data Formula = Formula
70 | { formulaNumber :: Int
71 | , formulaAbbr :: Abbreviation
72 | , formulaContent :: LaTeX
73 | , formulaSection :: Section }
74 |
75 | instance Show Formula where
76 | show _ = ""
77 |
78 | data Item = Item
79 | { itemNumber :: Maybe [String]
80 | , itemLabel :: Maybe LaTeX
81 | , itemInlineContent :: [Element]
82 | , itemBlockContent :: [TeXPara] }
83 | deriving Show
84 |
85 | itemElements :: Item -> [Element]
86 | itemElements Item{..} = itemInlineContent ++ allElements itemBlockContent
87 |
88 | data Footnote = Footnote
89 | { footnoteNumber :: Int
90 | , footnoteContent :: [TeXPara] }
91 | deriving Show
92 |
93 | data Note = Note { noteNumber :: Int, noteLabel :: Text, noteContent :: [TeXPara] }
94 | deriving Show
95 |
96 | data Example = Example { exampleNumber :: Int, exampleContent :: [TeXPara] }
97 | deriving Show
98 |
99 | data Sentence = Sentence { sentenceNumber :: Maybe Int, sentenceElems :: [Element] }
100 | deriving Show
101 |
102 | newtype TeXPara = TeXPara { sentences :: [Sentence] }
103 | deriving Show
104 |
105 | data Element
106 | = LatexElement LaTeXUnit
107 | | Enumerated { enumCmd :: String, enumItems :: [Item] }
108 | | Bnf String LaTeX
109 | | TableElement Table
110 | | Tabbing LaTeX
111 | | FigureElement Figure
112 | | FormulaElement Formula
113 | | Codeblock LaTeXUnit
114 | | Itemdescr [TeXPara] -- needed because there can be notes in itemdescr envs
115 | | NoteElement Note
116 | | ExampleElement Example
117 | | HtmlElement Text
118 | deriving Show
119 |
120 | normative :: Element -> Bool
121 | normative (NoteElement _) = False
122 | normative (ExampleElement _) = False
123 | normative (LatexElement (TeXComm "index" _ _)) = False
124 | normative _ = True
125 |
126 | data SectionKind
127 | = NormalSection { _level :: Int }
128 | | DefinitionSection { _level :: Int }
129 | | InformativeAnnexSection
130 | | NormativeAnnexSection
131 | deriving (Eq, Show)
132 |
133 | isDefinitionSection :: SectionKind -> Bool
134 | isDefinitionSection (DefinitionSection _) = True
135 | isDefinitionSection _ = False
136 |
137 | data Chapter = NormalChapter | InformativeAnnex | NormativeAnnex
138 | deriving (Eq, Show)
139 |
140 | data SourceLocation = SourceLocation
141 | { sourceFile :: FilePath
142 | , sourceLine :: Int }
143 | deriving (Eq, Show)
144 |
145 | data Paragraph = Paragraph
146 | { paraNumber :: Maybe Int
147 | , paraInItemdescr :: Bool
148 | , paraElems :: [TeXPara]
149 | , paraSection :: Section
150 | , paraSourceLoc :: Maybe SourceLocation
151 | , allParaElems :: [Element] } -- derivable but stored for efficiency
152 | deriving Show
153 |
154 | type Abbreviation = Text -- of a section, figure, or table
155 |
156 | data Section = Section
157 | { abbreviation :: Abbreviation
158 | , sectionName :: LaTeX
159 | , paragraphs :: [Paragraph]
160 | , sectionFootnotes :: [Footnote]
161 | , subsections :: [Section]
162 | , sectionNumber :: Int
163 | , chapter :: Chapter
164 | , parents :: [Section] -- if empty, this is the chapter
165 | , sectionKind :: SectionKind
166 | , secIndexEntries :: IntMap IndexEntry
167 | , secIndexEntriesByPath :: Map IndexPath [(Int, IndexEntry)]
168 | }
169 | deriving Show
170 |
171 | chapterOfSection :: Section -> Section
172 | chapterOfSection s@Section{..}
173 | | null parents = s
174 | | otherwise = last parents
175 |
176 | instance Eq Section where
177 | x == y = abbreviation x == abbreviation y
178 |
179 | type XrefDelta = [(Abbreviation, [LaTeX])]
180 |
181 | data StablyNamedItem
182 | = StablyNamedTable Table
183 | | StablyNamedSection Section
184 | | StablyNamedFigure Figure
185 | | StablyNamedFormula Formula
186 |
187 | data Draft = Draft
188 | { commitUrl :: Text
189 | , chapters :: [Section]
190 | , index :: Index
191 | , indexEntryMap :: IntMap IndexEntry
192 | , indexEntriesByPath :: Map IndexPath [(Int, IndexEntry)]
193 | , xrefDelta :: XrefDelta
194 | , abbrMap :: Abbreviation -> Maybe StablyNamedItem
195 | , labels :: Map Text Section }
196 |
197 | -- (The index entry maps are derivable but stored for efficiency.)
198 |
199 | stablyNamedItems :: Draft -> [(Abbreviation, StablyNamedItem)]
200 | stablyNamedItems d =
201 | [(abbreviation s, StablyNamedSection s) | s <- sections d] ++
202 | [(tableAbbr t, StablyNamedTable t) | p <- allParagraphs d, TableElement t <- allParaElems p] ++
203 | [(formulaAbbr f, StablyNamedFormula f) | p <- allParagraphs d, FormulaElement f <- allParaElems p] ++
204 | [(figureAbbr f, StablyNamedFigure f) | p <- allParagraphs d, FigureElement f <- allParaElems p]
205 |
206 | makeAbbrMap :: Draft -> Abbreviation -> Maybe StablyNamedItem
207 | makeAbbrMap = flip Map.lookup . Map.fromList . stablyNamedItems
208 |
209 | -- Indices:
210 |
211 | data IndexComponent = IndexComponent { distinctIndexSortKey, indexKey :: LaTeX }
212 | deriving (Ord, Show)
213 |
214 | instance Eq IndexComponent where
215 | x == y =
216 | distinctIndexSortKey x == distinctIndexSortKey y &&
217 | indexKeyContent (indexKey x) == indexKeyContent (indexKey y)
218 |
219 | type IndexPath = [IndexComponent]
220 |
221 | data IndexKind = See { _also :: Bool, _ref :: LaTeX } | IndexOpen | IndexClose | DefinitionIndexEntry
222 | deriving (Eq, Show)
223 |
224 | type IndexCategory = Text
225 |
226 | type Index = Map IndexCategory IndexTree
227 |
228 | instance Show IndexEntry where
229 | show IndexEntry{..} =
230 | "IndexEntry"
231 | ++ "{indexSection=" ++ show indexEntrySection
232 | ++ ",indexCategory=" ++ show indexCategory
233 | ++ ",indexPath=" ++ show indexPath
234 | ++ ",indexEntryKind=" ++ show indexEntryKind
235 | ++ "}"
236 |
237 | data IndexEntry = IndexEntry
238 | { indexEntrySection :: Abbreviation
239 | , indexEntryKind :: Maybe IndexKind
240 | , indexPath :: IndexPath
241 | , indexEntryNr :: Maybe Int
242 | , indexCategory :: Text
243 | }
244 |
245 | type IndexTree = Map IndexComponent IndexNode
246 |
247 | data IndexNode = IndexNode
248 | { indexEntries :: [IndexEntry]
249 | , indexSubnodes :: IndexTree }
250 |
251 | mergeIndices :: [Index] -> Index
252 | mergeIndices = Map.unionsWith (Map.unionWith mergeIndexNodes)
253 |
254 | mergeIndexNodes :: IndexNode -> IndexNode -> IndexNode
255 | mergeIndexNodes x y = IndexNode
256 | { indexEntries = indexEntries x ++ indexEntries y
257 | , indexSubnodes = Map.unionWith mergeIndexNodes (indexSubnodes x) (indexSubnodes y) }
258 |
259 | indexKeyContent :: LaTeX -> Text
260 | indexKeyContent = mconcat . map ikc
261 | where
262 | ikc :: LaTeXUnit -> Text
263 | ikc (TeXRaw t) = replace "\n" " " t
264 | ikc (TeXComm "tcode" _ [(_, x)]) = indexKeyContent x
265 | ikc (TeXComm "idxcode" _ [(_, x)]) = indexKeyContent x
266 | ikc (TeXComm "noncxxtcode" _ [(_, x)]) = indexKeyContent x
267 | ikc (TeXComm "indexedspan" _ [(_, x), _]) = indexKeyContent x
268 | ikc (TeXComm "texttt" _ [(_, x)]) = indexKeyContent x
269 | ikc (TeXComm "textit" _ [(_, x)]) = indexKeyContent x
270 | ikc (TeXComm "textsc" _ [(_, x)]) = indexKeyContent x
271 | ikc (TeXComm "mathsf" _ [(_, x)]) = indexKeyContent x
272 | ikc (TeXComm "textsf" _ [(_, x)]) = indexKeyContent x
273 | ikc (TeXComm "textcolor" _ [_, (_, x)]) = indexKeyContent x
274 | ikc (TeXComm "xspace" _ []) = "_"
275 | ikc (TeXComm "Cpp" _ []) = "C++"
276 | ikc (TeXComm "&" _ []) = "&"
277 | ikc (TeXComm "%" _ []) = "%"
278 | ikc (TeXComm "-" _ []) = ""
279 | ikc (TeXComm "ell" _ []) = "ℓ"
280 | ikc (TeXComm "~" _ []) = "~"
281 | ikc (TeXComm "#" _ []) = "#"
282 | ikc (TeXComm "{" _ []) = "{"
283 | ikc (TeXComm "}" _ []) = "}"
284 | ikc (TeXComm "protect" _ []) = ""
285 | ikc (TeXComm "frenchspacing" _ []) = ""
286 | ikc (TeXComm "caret" _ []) = "^"
287 | ikc (TeXComm "tilde" _ []) = "~"
288 | ikc (TeXComm "^" _ []) = "^"
289 | ikc (TeXComm "\"" _ []) = "\""
290 | ikc (TeXComm "" _ []) = ""
291 | ikc (TeXComm "x" _ []) = "TODO"
292 | ikc (TeXComm "textbackslash" _ []) = "\\"
293 | ikc (TeXComm "textunderscore" _ []) = "_"
294 | ikc (TeXComm "discretionary" _ _) = ""
295 | ikc (TeXComm "texorpdfstring" _ [_, (_, x)]) = indexKeyContent x
296 | ikc (TeXComm s _ [])
297 | | Just c <- List.lookup s greekAlphabet = Text.pack [c]
298 | ikc (TeXBraces x) = indexKeyContent x
299 | ikc (TeXMath Dollar x) = indexKeyContent x
300 | ikc (TeXComm "index" _ _) = ""
301 | ikc (TeXComm "indexlink" _ ((_, x):_)) = indexKeyContent x
302 | ikc (TeXComm "hiddenindexlink" _ ((_, x):_)) = indexKeyContent x
303 | ikc x = error $ "indexKeyContent: unexpected: " ++ show x
304 |
305 | indexCatName :: (Eq b, Show b, IsString a, IsString b) => b -> a
306 | indexCatName "impldefindex" = "Index of implementation-defined behavior"
307 | indexCatName "libraryindex" = "Index of library names"
308 | indexCatName "headerindex" = "Index of library headers"
309 | indexCatName "generalindex" = "Index"
310 | indexCatName "grammarindex" = "Index of grammar productions"
311 | indexCatName "conceptindex" = "Index of library concepts"
312 | indexCatName "bibliography" = "Bibliography"
313 | indexCatName x = error $ "indexCatName: " ++ show x
314 |
315 | -- Gathering entities:
316 |
317 | class Sections a where sections :: a -> [Section]
318 |
319 | instance Sections Section where sections s = s : (subsections s >>= sections)
320 | instance Sections Draft where sections = concatMap sections . chapters
321 | instance Sections a => Sections (Maybe a) where sections = maybe [] sections
322 |
323 | allParagraphs :: Sections a => a -> [Paragraph]
324 | allParagraphs = (>>= paragraphs) . sections
325 |
326 | tables :: Sections a => a -> [(Paragraph, Table)]
327 | tables x = [(p, t) | p <- allParagraphs x, TableElement t <- allParaElems p]
328 |
329 | figures :: Sections a => a -> [(Paragraph, Figure)]
330 | figures x = [(p, f) | p <- allParagraphs x, FigureElement f <- allParaElems p]
331 |
332 | formulas :: Sections a => a -> [(Paragraph, Formula)]
333 | formulas x = [(p, f) | p <- allParagraphs x, FormulaElement f <- allParaElems p]
334 |
335 | footnotes :: Sections a => a -> [(Section, Footnote)]
336 | footnotes x = [(s, f) | s <- sections x, f <- sectionFootnotes s]
337 |
338 | allElements :: [TeXPara] -> [Element]
339 | allElements x = x >>= sentences >>= sentenceElems >>= f
340 | where
341 | f :: Element -> [Element]
342 | f e = e : case e of
343 | Enumerated {..} -> enumItems >>= itemElements
344 | TableElement Table{..} -> allElements $ tableBody >>= cells >>= content
345 | NoteElement Note{..} -> allElements noteContent
346 | Codeblock y -> [LatexElement y]
347 | ExampleElement Example{..} -> allElements exampleContent
348 | Tabbing y -> LatexElement . y
349 | Bnf _ y -> LatexElement . y
350 | _ -> []
351 |
352 | -- Misc:
353 |
354 | texParaElems :: TeXPara -> [Element]
355 | texParaElems = (>>= sentenceElems) . sentences
356 |
357 | texParaTex :: TeXPara -> LaTeX
358 | texParaTex = (>>= elemTex) . texParaElems
359 |
360 | itemTex :: Item -> LaTeX
361 | itemTex Item{..} = (itemInlineContent >>= elemTex) ++ (itemBlockContent >>= texParaTex)
362 |
363 | elemTex :: Element -> LaTeX
364 | elemTex (NoteElement n) = noteContent n >>= texParaTex
365 | elemTex (ExampleElement x) = exampleContent x >>= texParaTex
366 | elemTex (LatexElement l) = [l]
367 | elemTex (Enumerated _ e) = e >>= itemTex
368 | elemTex (Bnf _ l) = l
369 | elemTex (Tabbing t) = t
370 | elemTex (Codeblock t) = [t]
371 | elemTex (Itemdescr t) = t >>= texParaTex
372 | elemTex (TableElement Table{..}) = tableCaption ++ (tableBody >>= rowTex)
373 | where
374 | rowTex :: Row [TeXPara] -> LaTeX
375 | rowTex r = content . cells r >>= (>>= texParaTex)
376 | elemTex (FigureElement _) = []
377 | elemTex (FormulaElement f) = formulaContent f
378 | elemTex (HtmlElement _) = []
379 |
380 | tableByAbbr :: Draft -> Abbreviation -> Maybe Table
381 | -- only returns Maybe because some of our tables are broken
382 | tableByAbbr d a = case abbrMap d a of
383 | Just (StablyNamedTable t) -> Just t
384 | _ -> Nothing
385 |
386 | figureByAbbr :: Draft -> Abbreviation -> Figure
387 | figureByAbbr d a = case abbrMap d a of
388 | Just (StablyNamedFigure f) -> f
389 | _ -> error $ "figureByAbbr: " ++ show a
390 |
391 | formulaByAbbr :: Draft -> Abbreviation -> Formula
392 | formulaByAbbr d a = case abbrMap d a of
393 | Just (StablyNamedFormula f) -> f
394 | _ -> error $ "formulaByAbbr: " ++ show a
395 |
396 | sectionByAbbr :: Draft -> Abbreviation -> Maybe Section
397 | sectionByAbbr d a = case abbrMap d a of
398 | Just (StablyNamedSection s) -> Just s
399 | _ -> Nothing
400 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | All authors involved in the creation of the contents of this package have agreed to release their respective contributions into the Public Domain.
2 |
--------------------------------------------------------------------------------
/LaTeXBase.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE ViewPatterns, OverloadedStrings #-}
2 |
3 | module LaTeXBase
4 | ( MathType(..), LaTeXUnit(..), LaTeX, TeXArg, ArgKind(..), concatRaws, hasCommand, isJustRaw
5 | , matchCommand, lookForCommand, matchEnv, mapTeX, mapCommandName, renderLaTeX, mapTeXRaw, isTeXEnv, texSpan, unconsRaw
6 | , trim, trimr, triml, texStripInfix, isCodeblock, isMath, texStripPrefix, texStripAnyPrefix, AllUnits(..) ) where
7 |
8 | import Data.String (fromString)
9 | import Prelude hiding ((.), (++), writeFile, dropWhile)
10 | import Data.Text (Text, pack)
11 | import qualified Data.Text as Text
12 | import Data.Char (isSpace)
13 | import Util ((.), (++), textStripInfix)
14 | import Control.Arrow (first, second)
15 |
16 | data MathType = Parentheses | Square | Dollar
17 | deriving (Eq, Show, Ord)
18 |
19 | data ArgKind = FixArg | OptArg
20 | deriving (Eq, Show, Ord)
21 |
22 | type TeXArg = (ArgKind, LaTeX)
23 |
24 | data LaTeXUnit
25 | = TeXRaw Text
26 | | TeXComm String String [TeXArg] -- first string is command name, second is trailing whitespace
27 | | TeXEnv String [TeXArg] LaTeX
28 | | TeXMath MathType LaTeX
29 | | TeXLineBreak
30 | | TeXBraces LaTeX
31 | deriving (Eq, Show, Ord)
32 |
33 | isTeXEnv :: String -> LaTeXUnit -> Bool
34 | isTeXEnv x (TeXEnv y _ _) = x == y
35 | isTeXEnv _ _ = False
36 |
37 | type LaTeX = [LaTeXUnit]
38 |
39 | lookForCommand :: String -> LaTeX -> [[TeXArg]]
40 | lookForCommand n = (snd .) . matchCommand (n ==)
41 |
42 | class AllUnits a where
43 | allUnits :: a -> [LaTeXUnit]
44 |
45 | instance AllUnits LaTeXUnit where
46 | allUnits u = u : case u of
47 | TeXMath _ l -> allUnits l
48 | TeXBraces l -> allUnits l
49 | TeXComm _ _ a -> (snd . a) >>= allUnits
50 | TeXEnv _ a l -> (l : snd . a) >>= allUnits
51 | _ -> []
52 |
53 | instance AllUnits a => AllUnits [a] where
54 | allUnits = concatMap allUnits
55 |
56 | matchCommand :: AllUnits a => (String -> Bool) -> a -> [(String, [TeXArg])]
57 | matchCommand f x = [(str, as) | TeXComm str _ as <- allUnits x, f str]
58 |
59 | hasCommand :: (String -> Bool) -> LaTeX -> Bool
60 | hasCommand f = not . null . matchCommand f
61 |
62 | matchEnv :: AllUnits a => (String -> Bool) -> a -> [(String, [TeXArg], LaTeX)]
63 | matchEnv f x = [(str, as, l) | TeXEnv str as l <- allUnits x, f str]
64 |
65 | mapTeX :: (LaTeXUnit -> Maybe LaTeX) -> LaTeX -> LaTeX
66 | mapTeX f = concatMap g
67 | where
68 | g :: LaTeXUnit -> LaTeX
69 | g (f -> Just x) = x
70 | g (TeXComm c ws a) = [TeXComm c ws (h . a)]
71 | g (TeXBraces x) = [TeXBraces (mapTeX f x)]
72 | g (TeXMath t b) = [TeXMath t (mapTeX f b)]
73 | g (TeXEnv n a b) = [TeXEnv n (h . a) (mapTeX f b)]
74 | g x = [x]
75 | h = second (mapTeX f)
76 |
77 | mapCommandName :: (String -> String) -> LaTeX -> LaTeX
78 | mapCommandName f = concatMap g
79 | where
80 | g :: LaTeXUnit -> LaTeX
81 | g (TeXComm c ws a) = [TeXComm (f c) ws (h . a)]
82 | g (TeXBraces x) = [TeXBraces (mapCommandName f x)]
83 | g (TeXMath t b) = [TeXMath t (mapCommandName f b)]
84 | g (TeXEnv n a b) = [TeXEnv n (h . a) (mapCommandName f b)]
85 | g x = [x]
86 | h = second (mapCommandName f)
87 |
88 | renderLaTeX :: LaTeX -> Text
89 | renderLaTeX = mconcat . (renderUnit .)
90 |
91 | renderUnit :: LaTeXUnit -> Text
92 | renderUnit (TeXRaw t) = t
93 | renderUnit (TeXComm "right" _ [(FixArg, [TeXRaw "."])]) = "\\right."
94 | renderUnit (TeXComm name ws [])
95 | | name `elem` ["left", "sum", "int", "sin", "cos", "right", "bigl", "bigr", "big", "small", "smaller"] = pack $ "\\" <> name <> ws
96 | | otherwise = "\\" <> fromString name <> "{}"
97 | renderUnit (TeXComm name ws args) = "\\" <> pack (fromString name) <> pack (fromString ws) <> renderArgs args
98 | renderUnit (TeXEnv name args c) =
99 | "\\begin{" <> fromString name <> "}"
100 | <> renderArgs args
101 | <> renderLaTeX c
102 | <> "\\end{" <> fromString name <> "}"
103 | renderUnit (TeXMath Dollar l) = "$" <> renderLaTeX l <> "$"
104 | renderUnit (TeXMath Square l) = "\\[" <> renderLaTeX l <> "\\]"
105 | renderUnit (TeXMath Parentheses l) = "\\(" <> renderLaTeX l <> "\\)"
106 | renderUnit TeXLineBreak = "\\\\"
107 | renderUnit (TeXBraces l) = "{" <> renderLaTeX l <> "}"
108 |
109 | renderArgs :: [TeXArg] -> Text
110 | renderArgs = mconcat . (renderArg .)
111 |
112 | renderArg :: TeXArg -> Text
113 | renderArg (FixArg, l) = "{" <> renderLaTeX l <> "}"
114 | renderArg (OptArg, l) = "[" <> renderLaTeX l <> "]"
115 |
116 | mapTeXRaw :: (Text -> LaTeXUnit) -> (LaTeX -> LaTeX)
117 | mapTeXRaw f = map go
118 | where
119 | go :: LaTeXUnit -> LaTeXUnit
120 | go (TeXRaw t) = f t
121 | go (TeXComm s ws args) = TeXComm s ws (second (go .) . args)
122 | go (TeXEnv s args body) = TeXEnv s (second (go .) . args) (go . body)
123 | go (TeXBraces l) = TeXBraces $ go . l
124 | go t@(TeXMath _ _) = t
125 | go t@TeXLineBreak = t
126 |
127 | concatRaws :: LaTeX -> LaTeX
128 | concatRaws (TeXRaw a : TeXRaw b : more) = concatRaws (TeXRaw (a ++ b) : more)
129 | concatRaws (TeXComm s ws args : more) = TeXComm s ws (second concatRaws . args) : concatRaws more
130 | concatRaws (TeXEnv s args bd : more) = TeXEnv s (second concatRaws . args) (concatRaws bd) : concatRaws more
131 | concatRaws (TeXBraces x : more) = TeXBraces (concatRaws x) : concatRaws more
132 | concatRaws (x : more) = x : concatRaws more
133 | concatRaws [] = []
134 |
135 | unconsRaw :: LaTeX -> (Text, LaTeX)
136 | unconsRaw (TeXRaw x : y) = first (x ++) (unconsRaw y)
137 | unconsRaw x = ("", x)
138 |
139 | texStripPrefix :: Text -> LaTeX -> Maybe LaTeX
140 | texStripPrefix t (TeXRaw s : y) = case Text.stripPrefix t s of
141 | Just "" -> Just y
142 | Just s' -> Just (TeXRaw s' : y)
143 | Nothing -> Nothing
144 | texStripPrefix _ _ = Nothing
145 |
146 | texStripAnyPrefix :: [Text] -> LaTeX -> Maybe (Text, LaTeX)
147 | texStripAnyPrefix [] _ = Nothing
148 | texStripAnyPrefix (x:y) z
149 | | Just a <- texStripPrefix x z = Just (x, a)
150 | | otherwise = texStripAnyPrefix y z
151 |
152 | texStripInfix :: Text -> LaTeX -> Maybe (LaTeX, LaTeX)
153 | texStripInfix t = go
154 | where
155 | go [] = Nothing
156 | go (x : rest)
157 | | TeXRaw s <- x
158 | , Just (y, z) <- textStripInfix t s
159 | = Just (h y, h z ++ rest)
160 | | otherwise = first (x :) . go rest
161 | h "" = []
162 | h x = [TeXRaw x]
163 |
164 | texSpan :: (Char -> Bool) -> LaTeX -> (Text, LaTeX)
165 | texSpan p (TeXRaw x : y) = case Text.span p x of
166 | (stuff, "") -> first (stuff ++) (texSpan p y)
167 | (stuff, rest) -> (stuff, TeXRaw rest : y)
168 | texSpan _ x = ("", x)
169 |
170 | invisible :: LaTeXUnit -> Bool
171 | invisible (TeXComm "index" _ _) = True
172 | invisible _ = False
173 |
174 | dropWhileEnd :: (Char -> Bool) -> LaTeX -> LaTeX
175 | dropWhileEnd _ [] = []
176 | dropWhileEnd p x
177 | | invisible (last x) = dropWhileEnd p (init x) ++ [last x]
178 | | TeXRaw y <- last x = init x ++ case Text.dropWhileEnd p y of
179 | "" -> []
180 | a -> [TeXRaw a]
181 | | otherwise = x
182 |
183 | trimr, trim :: LaTeX -> LaTeX
184 | trimr = dropWhileEnd isSpace
185 | trim = triml . trimr
186 |
187 | triml :: LaTeX -> LaTeX
188 | triml (TeXRaw x : y) = case Text.dropWhile isSpace x of
189 | "" -> triml y
190 | x' -> TeXRaw x' : y
191 | triml x = x
192 |
193 | isMath :: LaTeXUnit -> Bool
194 | isMath (TeXMath _ _) = True
195 | isMath (TeXComm "ensuremath" _ _) = True
196 | isMath (TeXEnv "eqnarray*" _ _) = True
197 | isMath (TeXEnv "equation*" _ _) = True
198 | isMath _ = False
199 |
200 | isCodeblock :: LaTeXUnit -> Bool
201 | isCodeblock (TeXEnv "codeblock" _ _) = True
202 | isCodeblock (TeXEnv "indexedcodeblock" _ _) = True
203 | isCodeblock (TeXEnv "codeblocktu" _ _) = True
204 | isCodeblock (TeXEnv "codeblockdigitsep" _ _) = True
205 | isCodeblock _ = False
206 |
207 | isJustRaw :: LaTeX -> Maybe Text
208 | isJustRaw [TeXRaw x] = Just x
209 | isJustRaw _ = Nothing
210 |
--------------------------------------------------------------------------------
/LaTeXParser.hs:
--------------------------------------------------------------------------------
1 | {-# OPTIONS_GHC -fno-warn-tabs #-}
2 | {-# LANGUAGE OverloadedStrings, RecordWildCards, ViewPatterns, TupleSections #-}
3 |
4 | module LaTeXParser (parseString,
5 | Token(Token), Context(..), defaultContext, Signature(..), Macros(..), Environment(..), Command(..), ParseResult(ParseResult),
6 | defaultMacros,
7 | nullCmd, storeCmd, codeEnv, normalCmd,
8 | storeEnv) where
9 |
10 | import LaTeXBase (LaTeXUnit(..), LaTeX, TeXArg, ArgKind(..), MathType(..), concatRaws)
11 | import Data.Text (Text)
12 | import qualified Data.Text as Text
13 | import Data.Char (isAlphaNum, isSpace, isAlpha)
14 | import Data.Maybe (fromJust)
15 | import Control.Arrow (first)
16 | import Data.Map (Map)
17 | import qualified Data.Map as Map
18 | import Prelude hiding ((++), (.))
19 | import Util ((.), (++), getDigit, stripInfix)
20 |
21 | newtype Token = Token { tokenChars :: String }
22 | deriving (Eq, Show)
23 |
24 | data Environment = Environment (Context -> [Token] -> ParseResult)
25 | data Command = Command { runCommand :: Context -> String {- ws -} -> [Token] -> ParseResult }
26 |
27 | data Macros = Macros
28 | { commands :: Map Text Command
29 | , environments :: Map Text Environment
30 | , counters :: Map Text Int }
31 |
32 | newCommand :: Bool {- overwrite -} -> (Text, Command) -> Macros -> Macros
33 | newCommand True (name, cmd) Macros{..} = Macros{commands = Map.insert name cmd commands, ..}
34 | newCommand False (name, cmd) Macros{..} = Macros{commands = Map.insertWith (\_ y -> y) name cmd commands, ..}
35 |
36 | instance Semigroup Macros where
37 | x <> y = Macros
38 | (commands x ++ commands y)
39 | (environments x ++ environments y)
40 | (counters x ++ counters y)
41 |
42 | instance Monoid Macros where
43 | mempty = Macros mempty mempty mempty
44 |
45 | data ParseResult = ParseResult
46 | { content :: LaTeX
47 | , newMacros :: Macros
48 | , remainder :: [Token] }
49 |
50 | data Signature = Signature
51 | { nrFixArgs :: Int
52 | , defaultArg :: Maybe [Token] }
53 | deriving Show
54 |
55 | data Context = Context
56 | { commentsEnabled :: Bool
57 | , parsingOptArg :: Bool
58 | , macros :: Macros }
59 |
60 | prependContent :: LaTeX -> ParseResult -> ParseResult
61 | prependContent t p = p{content = t ++ content p}
62 |
63 | combineMacros :: Bool {- left biased -} -> Macros -> Macros -> Macros
64 | combineMacros b x y = if b then x ++ y else y ++ x
65 |
66 | addMacros :: Bool {- overwrite -} -> Macros -> ParseResult -> ParseResult
67 | addMacros b m p = p{newMacros = combineMacros b m (newMacros p)}
68 |
69 | defaultEnvs :: [(Text, Environment)]
70 | defaultEnvs = [outputblockEnv]
71 |
72 | codeEnv :: Text -> Signature -> (Text, Environment)
73 | codeEnv name sig = (name, Environment f)
74 | where
75 | f :: Context -> [Token] -> ParseResult
76 | f ctx toks = ParseResult [env] mempty rest'
77 | where
78 | (arguments, rest) = parseArgs sig toks
79 | Just (code, rest') = stripInfix [Token "\\end", Token "{", Token (Text.unpack name), Token "}"] rest
80 | env = TeXEnv (Text.unpack name) (map ((FixArg, ) . fullParse ctx) arguments) (parseCode name ctx code)
81 |
82 | outputblockEnv :: (Text, Environment)
83 | outputblockEnv = ("outputblock", Environment f)
84 | where
85 | f :: Context -> [Token] -> ParseResult
86 | f ctx toks = ParseResult [env] mempty rest
87 | where
88 | Just (content, rest) = stripInfix [Token "\\end", Token "{", Token "outputblock", Token "}"] toks
89 | env = TeXEnv "outputblock" [] (parseOutputBlock ctx content)
90 |
91 | parseOutputBlock :: Context -> [Token] -> LaTeX
92 | parseOutputBlock c = concatRaws . go
93 | where
94 | go :: [Token] -> LaTeX
95 | go [] = []
96 | go (Token "@" : rest) = fullParse c cmd ++ go rest'
97 | where (cmd, Token "@" : rest') = break (== Token "@") rest
98 | go s = TeXRaw (Text.pack $ concatMap tokenChars code) : go rest
99 | where (code, rest) = break (== Token "@") s
100 |
101 | storeEnv :: String -> Signature -> (Text, Environment)
102 | storeEnv name sig = (Text.pack name, Environment act)
103 | where
104 | act :: Context -> [Token] -> ParseResult
105 | act ctx toks = ParseResult [env] mempty afterend
106 | where
107 | (arguments, rest) = parseArgs sig toks
108 | ParseResult body _ afterend = parse ctx rest
109 | env = TeXEnv name (map ((FixArg, ) . fullParse ctx) arguments) (concatRaws body)
110 | -- todo: not all fixargs
111 |
112 | defaultCmds :: [(Text, Command)]
113 | defaultCmds =
114 | [ ("newcommand", newCommandCommand)
115 | , ("renewcommand", newCommandCommand)
116 | , ("DeclareMathOperator", declareMathOperator)
117 | , ("newcolumntype", newColumnTypeCommand)
118 | , ("newenvironment", newEnvCommand)
119 | , ("lstnewenvironment", newEnvCommand)
120 | , ("raisebox", raiseBoxCommand)
121 | , ("let", Command $ \ctx _ws rest -> parse ctx (drop 2 rest))
122 | , beginCommand
123 | , endCommand
124 | , oldDefCommand
125 | ]
126 |
127 | oldDefCommand :: (Text, Command)
128 | oldDefCommand = ("def", Command pars)
129 | where
130 | pars ctx@Context{..} _ws rest
131 | | (Token ('\\' : name) : rest') <- rest
132 | , Just (body, rest'') <- balanced ('{', '}') rest' =
133 | let
134 | m = Macros (Map.fromList [defCmd (Text.pack name) (Signature 0 Nothing) body]) mempty mempty
135 | ParseResult p mm r = parse ctx{macros=macros++m} rest''
136 | in
137 | ParseResult p (m ++ mm) r
138 | | otherwise = parse ctx $ snd $ fromJust $ balanced ('{', '}') $ dropWhile (/= Token "{") rest
139 |
140 | endCommand :: (Text, Command)
141 | endCommand = ("end", Command $ \c _ws rest ->
142 | let Just (_, rest') = parseFixArg c rest in ParseResult mempty mempty rest')
143 |
144 | beginCommand :: (Text, Command)
145 | beginCommand = ("begin", normalCmd $ Command pars)
146 | where
147 | pars c@Context{..} _ws rest
148 | | Just (Environment f) <- Map.lookup (envname) (environments macros) = f c rest'
149 | | otherwise = error $ "undefined env: " ++ Text.unpack envname
150 | where
151 | Just (arg, rest') = parseFixArg c rest
152 | [TeXRaw envname] = concatRaws arg
153 |
154 | raiseBoxCommand :: Command
155 | raiseBoxCommand = normalCmd $ Command $ \c@Context{..} _ws rest ->
156 | let
157 | Just (a0, rest') = balanced ('{', '}') rest
158 | (a1, rest'') = case parseOptArg rest' of
159 | Nothing -> (Nothing, rest')
160 | Just (x, y) -> (Just x, y)
161 | Just (a2, rest''') = balanced ('{', '}') rest''
162 | args = [(FixArg, fullParse c a0)]
163 | ++ case a1 of
164 | Nothing -> []
165 | Just x -> [(OptArg, fullParse c x)]
166 | ++ [(FixArg, fullParse c a2)]
167 | in
168 | ParseResult [TeXComm "raisebox" "" args] mempty rest'''
169 |
170 | newCommandCommand :: Command
171 | newCommandCommand = normalCmd $ Command $ \Context{..} _ws (Token "{" : Token ('\\' : name) : Token "}" : rest) ->
172 | let
173 | (sig, rest') = parseSignature rest
174 | Just (body, rest'') = balanced ('{', '}') rest'
175 | newMacros = newCommand True (defCmd (Text.pack name) sig body) mempty
176 | in
177 | ParseResult [] newMacros rest''
178 |
179 | declareMathOperator :: Command
180 | declareMathOperator = normalCmd $ Command $ \Context{..} _ws (Token "{" : Token ('\\' : name) : Token "}" : rest) ->
181 | let
182 | Just (body, rest') = balanced ('{', '}') rest
183 | newBody = [Token "\\operatorname", Token "{"] ++ body ++ [Token "}"]
184 | newMacros = newCommand True (defCmd (Text.pack name) (Signature 0 Nothing) newBody) mempty
185 | in
186 | ParseResult [] newMacros rest'
187 |
188 | newColumnTypeCommand :: Command
189 | newColumnTypeCommand = normalCmd $ Command $ \Context{..} _ws (Token "{" : Token _ : Token "}" : rest) ->
190 | let
191 | (_, rest') = parseSignature rest
192 | Just (_, rest'') = balanced ('{', '}') rest'
193 | in
194 | ParseResult [] mempty rest''
195 |
196 | defaultMacros :: Macros
197 | defaultMacros = Macros (Map.fromList defaultCmds) (Map.fromList defaultEnvs) mempty
198 |
199 | defaultContext :: Context
200 | defaultContext = Context
201 | { commentsEnabled = True
202 | , parsingOptArg = False
203 | , macros = defaultMacros }
204 |
205 | rmLine :: [Token] -> [Token]
206 | rmLine s = case dropWhile (/= Token "\n") s of
207 | Token "\n" : x -> x
208 | x -> x
209 |
210 | parseOptArg :: [Token] -> Maybe ([Token], [Token])
211 | parseOptArg = balanced ('[', ']')
212 |
213 | parseOptArgs :: [Token] -> ([[Token]], [Token])
214 | parseOptArgs s
215 | | Just (r, s') <- parseOptArg s = first (r:) (parseOptArgs s')
216 | | otherwise = ([], s)
217 |
218 | parseFixArg :: Context -> [Token] -> Maybe (LaTeX, [Token])
219 | parseFixArg ctx (Token [c] : more) | isSpace c = parseFixArg ctx more
220 | parseFixArg ctx (Token "{" : more) =
221 | let ParseResult t _macros s = parse ctx more in Just (t, s)
222 | parseFixArg _ _ = Nothing
223 |
224 | parseSignature :: [Token] -> (Signature, [Token])
225 | parseSignature t = case optArgs of
226 | [] -> (Signature 0 Nothing, t')
227 | [[Token a]] -> (Signature (read a) Nothing, t')
228 | [[Token a], deflt] -> (Signature (read a) (Just deflt), t')
229 | _ -> error "unrecognized signature"
230 | where (optArgs, t') = parseOptArgs t
231 |
232 | balanced :: (Char, Char) -> [Token] -> Maybe ([Token], [Token])
233 | balanced (open, close) (dropWhile (all isSpace . tokenChars) -> (Token [o] : s))
234 | | o == open = Just $ go 0 s
235 | where
236 | go :: Int -> [Token] -> ([Token], [Token])
237 | go 0 [] = ([], [])
238 | go 0 (Token [c] : x) | c == close = ([], x)
239 | go n (Token "}" : x) = first (Token "}" :) (go (n-1) x)
240 | go n (Token "{" : x) = first (Token "{" :) (go (n+1) x)
241 | go n (x:y) = first (x :) (go n y)
242 | go n x = error $ "\n\nbalanced: " ++ show (n, x)
243 | balanced oc (dropWhile (all isSpace. tokenChars) -> (Token "%" : x)) = balanced oc (dropWhile (/= Token "\n") x)
244 | balanced _ _ = Nothing
245 |
246 | balanced_body :: Context -> String -> [Token] -> ([Token], [Token])
247 | balanced_body ctx end = go 0
248 | where
249 | go :: Int -> [Token] -> ([Token], [Token])
250 | go 0 [] = ([], [])
251 | go 0 (Token "\\end" : Token "{" : e : Token "}" : x) | fullParse ctx [e] == [TeXRaw $ Text.pack end] = ([], x)
252 | go n (Token "}" : x) = first (Token "}" :) (go (n-1) x)
253 | go n (Token "{" : x) = first (Token "{" :) (go (n+1) x)
254 | go n (x:y) = first (x :) (go n y)
255 | go n s = error $ "\n\nbalanced: " ++ show (n, s)
256 |
257 | parseArgs :: Signature -> [Token] -> ([[Token]], [Token])
258 | parseArgs Signature{..} s = case defaultArg of
259 | Nothing -> n_balanced ('{', '}') nrFixArgs s
260 | Just dfl -> case parseOptArg s of
261 | Nothing ->
262 | first (dfl :) (n_balanced ('{', '}') (nrFixArgs - 1) s)
263 | Just (optArg, s') ->
264 | first (optArg :) (n_balanced ('{', '}') (nrFixArgs - 1) s')
265 |
266 | parseArgs2 :: Context -> Signature -> [Token] -> ([TeXArg], [Token])
267 | parseArgs2 c Signature{..} s
268 | | defaultArg == Nothing = first (map fa) (n_balanced ('{', '}') nrFixArgs s)
269 | | Just (optArg, s') <- parseOptArg s =
270 | first (\a -> (OptArg, fullParse c optArg) : map fa a)
271 | (n_balanced ('{', '}') (nrFixArgs - 1) s')
272 | | otherwise = first (map fa) (n_balanced ('{', '}') (nrFixArgs - 1) s)
273 | where
274 | fa = (FixArg, ) . fullParse c
275 |
276 | -- todo: clean up parseArgs/parseArgs2 above
277 |
278 | n_balanced :: (Char, Char) -> Int -> [Token] -> ([[Token]], [Token])
279 | n_balanced oc n s
280 | | n > 0, Just (x, s') <- balanced oc s = first (x:) $ n_balanced oc (n-1) s'
281 | | otherwise = ([], s)
282 |
283 | newEnvCommand :: Command
284 | newEnvCommand = normalCmd $ Command $ \Context{..} _ws (Token "{" : (span (/= Token "}") -> (name, Token "}" : rest))) ->
285 | let
286 | nameStr = concatMap tokenChars name
287 | (sig, rest') = parseSignature rest
288 | Just (begin, rest'') = balanced ('{', '}') rest'
289 | Just (end, rest''') = balanced ('{', '}') rest''
290 | pa :: Context -> [Token] -> ParseResult
291 | pa c' toks = ParseResult replaced mempty toks''
292 | where
293 | replaced = fullParse c' $ replArgs args begin ++ body ++ end
294 | (args, toks') = parseArgs sig toks
295 | (body, toks'') = balanced_body c' nameStr toks'
296 | m = Macros mempty (Map.singleton (Text.pack nameStr) (Environment pa)) mempty
297 | in
298 | ParseResult [] m rest'''
299 |
300 | parseString :: Context -> String -> (LaTeX, Macros, [Token])
301 | parseString c s = (concatRaws x, y, z)
302 | where ParseResult x y z = parse c (tokenize s)
303 |
304 | literal :: String
305 | literal = " @_{}&,%-#/~>!$;:^"
306 |
307 | breakComment :: [Token] -> ([Token], [Token])
308 | breakComment x@(Token "\n" : _) = ([], x)
309 | breakComment (Token ('\\' : cmd) : xs)
310 | | (c, r@(_:_)) <- span (/= '\n') cmd = ([Token ('\\':c)], Token r : xs)
311 | breakComment (Token "%" : Token "\n" : x) = first ((Token "%" :) . (Token "\n" :)) (breakComment x)
312 | breakComment (x : xs) = first (x:) (breakComment xs)
313 | breakComment [] = ([], [])
314 |
315 | data LiteralKind = StringLiteral | CharLiteral
316 |
317 | parseCode :: Text -> Context -> [Token] -> LaTeX
318 | parseCode envname c = concatRaws . go Nothing
319 | where
320 | go :: Maybe LiteralKind -> [Token] -> LaTeX
321 | go _ [] = []
322 | go b (Token "@" : rest) = fullParse c cmd ++ go b rest'
323 | where (cmd, Token "@" : rest') = break (== Token "@") rest
324 | go (Just StringLiteral) (Token "\"" : rest) = TeXRaw "\"" : go Nothing rest
325 | go (Just CharLiteral) (Token "'" : rest) = TeXRaw "'" : go Nothing rest
326 | go Nothing (Token "\"" : rest) = TeXRaw "\"" : (go (Just StringLiteral) lit ++ go Nothing rest')
327 | where (lit, rest') = stringLiteral rest
328 | go Nothing (Token "'" : rest)
329 | | envname == "codeblockdigitsep" = TeXRaw "'" : go Nothing rest
330 | | otherwise = TeXRaw "'" : (go (Just CharLiteral) lit ++ go Nothing rest')
331 | where (lit, rest') = charLiteral rest
332 | go Nothing (Token "/" : Token "/" : (breakComment -> (comment, rest')))
333 | = TeXComm "comment" "" [(FixArg, TeXRaw "//" : noncode comment)] : go Nothing rest'
334 | go Nothing (Token "/" : Token "*" : rest)
335 | | Just (comment, rest') <- stripInfix [Token "*", Token "/"] rest
336 | = TeXComm "comment" "" [(FixArg, [TeXRaw "/*"] ++ noncode comment ++ [TeXRaw "*/"])] : go Nothing rest'
337 | go b (Token "/" : rest) = TeXRaw "/" : go b rest
338 | go b s = TeXRaw (Text.pack $ concatMap tokenChars code) : go b rest
339 | where
340 | breakToks = [Token "@", Token "/"] ++
341 | case b of
342 | Nothing -> [Token "\"", Token "'"]
343 | Just StringLiteral -> [Token "\""]
344 | Just CharLiteral -> [Token "'"]
345 | (code, rest) = break (`elem` breakToks) s
346 | noncode :: [Token] -> LaTeX
347 | noncode toks =
348 | fullParse c nc ++ case more of
349 | [] -> []
350 | Token "@" : (break (== Token "@") -> (code, _ : rest)) ->
351 | TeXComm "tcode" "" [(FixArg, fullParse c code)] : noncode rest
352 | _ -> error "no"
353 | where (nc, more) = span (/= Token "@") toks
354 | stringLiteral :: [Token] -> ([Token], [Token])
355 | stringLiteral (Token "\\" : Token "\"" : x) = first (Token "\\\"" :) (stringLiteral x)
356 | stringLiteral (Token "\\" : Token "\\" : x) = first (Token "\\\\" :) (stringLiteral x)
357 | stringLiteral (Token "\"" : x) = ([Token "\""], x)
358 | stringLiteral (y : x) = first (y :) (stringLiteral x)
359 | stringLiteral [] = ([], [])
360 | charLiteral :: [Token] -> ([Token], [Token])
361 | charLiteral (Token "\\" : Token "'" : x) = first (Token "\\'" :) (charLiteral x)
362 | charLiteral (Token "\\" : Token "\\" : x) = first (Token "\\\\" :) (charLiteral x)
363 | charLiteral (Token "'" : x) = ([Token "'"], x)
364 | charLiteral (y : x) = first (y :) (charLiteral x)
365 | charLiteral [] = ([], [])
366 |
367 | isCommandChar :: Char -> Bool
368 | isCommandChar c = isAlpha c || c == '*'
369 |
370 | tokenize :: String -> [Token]
371 | tokenize "" = []
372 | tokenize ('\\':'v':'e':'r':'b': delim : (break (== delim) -> (arg, _ : rest))) =
373 | Token ("\\verb:" ++ arg) : tokenize rest
374 | tokenize ('\\' : (span isCommandChar -> (cmd@(_:_), (span isSpace -> (ws, rest)))))
375 | = Token ('\\' : cmd ++ ws) : tokenize rest
376 | tokenize ('\\' : c : rest) = Token ['\\', c] : tokenize rest
377 | tokenize x@((isAlpha -> True): _) = let (a, b) = span isAlphaNum x in Token a : tokenize b
378 | tokenize (x:y) = Token [x] : tokenize y
379 |
380 | -- \verb is handled in tokenize so that the 'balanced' function doesn't
381 | -- get confused by \verb|{|
382 |
383 | -- Notice how the whitespace following a command like \bla is included in the Token
384 | -- This lets the parser include it in the TeXComm/TeXCommS's command field, so that
385 | -- the whitespace is not lost when serializing back to text when sending to MathJax.
386 |
387 | replArgs :: [[Token]] -> [Token] -> [Token]
388 | replArgs args = go
389 | where
390 | go [] = []
391 | go (Token "%" : (span (/= Token "\n") -> (x, y))) = Token "%" : x ++ go y
392 | go (Token "#" : Token "#" : y) = Token "#" : go y
393 | go (Token "#" : Token [getDigit -> Just i] : y)
394 | | length args >= i = (args !! (i-1)) ++ go y
395 | | otherwise = error $ "need more args than " ++ show args ++ " to replace in " ++ show (concatMap tokenChars y)
396 | go (x:y) = x : go y
397 |
398 | nullCmd :: Text -> Signature -> (Text, Command)
399 | nullCmd name sig = defCmd name sig []
400 |
401 | storeCmd :: String -> Signature -> (Text, Command)
402 | storeCmd name sig = (Text.pack name, normalCmd $ Command pars)
403 | where
404 | pars context ws tokens = ParseResult [TeXComm name ws args] mempty rest
405 | where (args, rest) = parseArgs2 context sig tokens
406 |
407 | defCmd :: Text -> Signature -> [Token] -> (Text, Command)
408 | defCmd name sig body = (name, normalCmd $ Command pars)
409 | where
410 | pars context _ws tokens = ParseResult (fullParse context $ replArgs args body) mempty rest
411 | where (args, rest) = parseArgs sig tokens
412 |
413 | normalCmd :: Command -> Command
414 | normalCmd (Command f) = Command $ \ctx ws toks ->
415 | let ParseResult content newMacros rest = f ctx ws toks
416 | in addMacros False newMacros (prependContent content (parse ctx{macros=macros ctx ++ newMacros} rest))
417 |
418 | consumeMath :: [Token] -> ([Token], [Token])
419 | consumeMath = f 0
420 | where
421 | f :: Integer -> [Token] -> ([Token], [Token])
422 | f 0 (Token "$" : rest) = ([], rest)
423 | f depth (Token "{" : rest) = first (Token "{" :) (f (depth + 1) rest)
424 | f depth (Token "}" : rest) = first (Token "}" :) (f (depth - 1) rest)
425 | f depth (tok : rest) = first (tok :) (f depth rest)
426 | f _ [] = error "unexpected end of math"
427 |
428 | parse :: Context -> [Token] -> ParseResult
429 | parse c (Token "$" : (consumeMath -> (math, rest))) =
430 | prependContent [TeXMath Dollar (fullParse c math)] (parse c rest)
431 | parse c (Token "\\[" : (span (/= Token "\\]") -> (math, Token "\\]" : rest))) =
432 | prependContent [TeXMath Square (fullParse c math)] (parse c rest)
433 | parse c (Token "]" : x)
434 | | parsingOptArg c = ParseResult mempty mempty x
435 | parse _ (Token "}" : x) = ParseResult mempty mempty x
436 | parse c (Token "{" : x) = prependContent [TeXBraces y] $ parse c rest
437 | where ParseResult y _ rest = parse c x
438 | parse c (Token "%" : x)
439 | | commentsEnabled c = parse c (rmLine x)
440 | parse _ [] = ParseResult mempty mempty mempty
441 | parse c (Token "\\\\" : x) = prependContent [TeXLineBreak] (parse c x)
442 | parse c (Token ['\\', ch] : x)
443 | | ch `elem` literal = prependContent [TeXComm [ch] "" []] (parse c x)
444 | parse c (Token ('\\':'v':'e':'r':'b':':':arg) : rest) =
445 | prependContent [TeXComm "verb" "" [(FixArg, [TeXRaw $ Text.pack arg])]] (parse c rest)
446 | parse c (Token "\\rSec" : Token [getDigit -> Just i] : s)
447 | = prependContent [TeXComm "rSec" "" args] $ parse c s''
448 | where
449 | Just (a, s') = parseOptArg s
450 | Just (b, s'') = parseFixArg c s'
451 | args = [(FixArg, [TeXRaw $ Text.pack $ show i]), (FixArg, fullParse c a), (FixArg, b)]
452 | parse c@Context{..} (Token ('\\' : (span (not . isSpace) -> (nos, w))) : rest)
453 | | Just f <- Map.lookup (Text.pack cmd) (commands macros) = runCommand f c ws rest
454 | | otherwise = error $
455 | "\n\nundefined command: " ++ show cmd ++ " at: " ++ take 50 (concatMap tokenChars rest)
456 | where (cmd, ws) | nos == "", (x : xx) <- w = ([x], xx)
457 | | otherwise = (nos, w)
458 | parse ctx (Token c : rest)
459 | | all isAlphaNum c
460 | = prependContent [TeXRaw $ Text.pack c] $ parse ctx rest
461 | parse ctx (Token [c] : rest)
462 | = prependContent [TeXRaw $ Text.pack [c]] $ parse ctx rest
463 | parse _ s = error $ "parse: unexpected: " ++ take 100 (concatMap tokenChars s)
464 |
465 | fullParse :: Context -> [Token] -> LaTeX
466 | fullParse c t
467 | | all isSpace (concatMap tokenChars remainder) = concatRaws content
468 | | otherwise = error $ "could not fully parse: "
469 | ++ concatMap tokenChars t
470 | ++ "\n\nremainder: "
471 | ++ concatMap tokenChars remainder
472 | where ParseResult{..} = parse c t
473 |
--------------------------------------------------------------------------------
/Load14882.hs:
--------------------------------------------------------------------------------
1 | {-# OPTIONS_GHC -fno-warn-tabs #-}
2 | {-# LANGUAGE
3 | OverloadedStrings,
4 | ScopedTypeVariables,
5 | RecordWildCards,
6 | ViewPatterns,
7 | LambdaCase,
8 | TupleSections,
9 | NamedFieldPuns,
10 | FlexibleInstances,
11 | FlexibleContexts,
12 | RankNTypes,
13 | MultiParamTypeClasses,
14 | FunctionalDependencies,
15 | UndecidableInstances,
16 | RecursiveDo #-}
17 |
18 | module Load14882 (parseIndex, load14882) where
19 |
20 | import qualified LaTeXParser as Parser
21 | import qualified Data.IntMap as IntMap
22 | import qualified Data.List as List
23 | import Data.IntMap (IntMap)
24 | import LaTeXBase
25 | ( LaTeXUnit(..), TeXArg, ArgKind(..), lookForCommand
26 | , mapTeX, mapTeXRaw, concatRaws, texStripInfix, allUnits)
27 | import Data.Text (Text, replace, isPrefixOf)
28 | import Data.Text.IO (readFile)
29 | import Text.Regex (mkRegex, matchRegexAll)
30 | import qualified Data.Text as Text
31 | import Control.Monad (forM, when)
32 | import Prelude hiding (take, (.), takeWhile, (++), lookup, readFile)
33 | import Data.Char (isAlpha)
34 | import Control.Arrow (first)
35 | import Data.Map (Map)
36 | import Data.Maybe (isJust, fromJust)
37 | import qualified Data.Map as Map
38 | import Data.List (unfoldr, (\\), takeWhile)
39 | import System.Process (readProcess)
40 | import System.IO.Unsafe (unsafePerformIO)
41 | import Control.Monad.Fix (MonadFix)
42 | import Control.Monad.State (MonadState, evalState, get, put, liftM2, modify)
43 | import Util ((.), (++), mapLast, stripInfix, measure, textStripInfix)
44 | import RawDocument
45 | import Sentences (splitIntoSentences, isActualSentence, breakSentence)
46 | import Document
47 |
48 | getCommitUrl :: IO Text
49 | getCommitUrl = do
50 | url <- gitGetRemoteUrl
51 | commit <- gitGetCommitRef
52 | return $
53 | ( Text.replace "git@github.com:" "http://github.com/"
54 | $ Text.replace ".git" "/commit/" url)
55 | ++ commit
56 |
57 | gitGetRemoteUrl :: IO Text
58 | gitGetRemoteUrl = do
59 | x <- readProcess "git" ["ls-remote", "--get-url"] ""
60 | return $ Text.strip $ Text.pack x
61 |
62 | gitGetCommitRef :: IO Text
63 | gitGetCommitRef = do
64 | x <- readProcess "git" ["rev-parse", "HEAD"] ""
65 | return $ Text.strip $ Text.pack $ x
66 |
67 | -- In the LaTeX sources, \definition is often preceded by corresponding \indexdefns.
68 | -- Since we treat definitions like sections (and generate pages for them), we need
69 | -- to move the \indexdefns inside (after) the \definition, so that the index entries
70 | -- don't link to the page for the preceding section.
71 |
72 | moveIndexEntriesIntoDefs :: [Text] -> [Text]
73 | moveIndexEntriesIntoDefs [] = []
74 | moveIndexEntriesIntoDefs (x:xs)
75 | | "\\indexdefn{" `isPrefixOf` x = case moveIndexEntriesIntoDefs xs of
76 | [] -> [x]
77 | y:ys
78 | | "\\definition{" `isPrefixOf` y -> y : x : ys
79 | | otherwise -> x : y : ys
80 | | otherwise = x : moveIndexEntriesIntoDefs xs
81 |
82 | moveIndexEntriesIntoSecs :: [Text] -> [Text]
83 | moveIndexEntriesIntoSecs = go []
84 | where
85 | go x [] = x
86 | go x (h:t)
87 | | "\\indextext{" `isPrefixOf` h = go (h : x) t
88 | | "\\rSec" `isPrefixOf` h = h : reverse x ++ go [] t
89 | | otherwise = reverse x ++ [h] ++ go [] t
90 |
91 | {- The document has a ton of:
92 |
93 | \indexlibraryglobal{bla}%
94 | \begin{itemdecl}
95 | void bla();
96 | \end{itemdecl}
97 |
98 | To highlight the whole itemdecl, indexItemDecls converts this to:
99 |
100 | \begin{indexeditemdecl}{
101 | \indexlibraryglobal{bla}%
102 | }
103 | void bla();
104 | \end{indexeditemdecl}
105 | -}
106 |
107 | indexCodeEnvs :: [Text] -> [Text] -> [Text]
108 | indexCodeEnvs envs = go []
109 | where
110 | go collected [] = collected
111 | go collected (x:xs)
112 | | "\\index" `isPrefixOf` x = go (collected ++ [x]) xs
113 | | [e] <- [e | e <- envs, ("\\begin{" ++ e ++ "}") `isPrefixOf` x] =
114 | let (code, _ : rest) = span (not . (("\\end{" ++ e ++ "}") `isPrefixOf`)) xs
115 | in (if null collected then
116 | ["\\begin{" ++ e ++ "}"]
117 | ++ code
118 | ++ ["\\end{" ++ e ++ "}"]
119 | else
120 | ["\\begin{indexed" ++ e ++ "}{"] ++ collected ++ ["}"]
121 | ++ code
122 | ++ ["\\end{indexed" ++ e ++ "}"])
123 | ++ go [] rest
124 | | otherwise = collected ++ (x : go [] xs)
125 |
126 | data Numbers = Numbers
127 | { tableNr, figureNr, footnoteRefNr, footnoteNr, itemDeclNr
128 | , nextIndexEntryNr, noteNr, exampleNr, nextSentenceNr, formulaNr :: Int }
129 |
130 | class AssignNumbers a b | a -> b where
131 | assignNumbers :: forall m . (Functor m, MonadFix m, MonadState Numbers m) => Section -> a -> m b
132 |
133 | instance AssignNumbers TeXArg TeXArg where
134 | assignNumbers s (y, x) = (y, ) . assignNumbers s x
135 |
136 | instance AssignNumbers LaTeXUnit LaTeXUnit where
137 | assignNumbers s (TeXEnv "itemdecl" [] x) = do
138 | n <- get
139 | put n{itemDeclNr = itemDeclNr n + 1}
140 | TeXEnv "itemdecl" [(FixArg, [TeXRaw $ Text.pack $ show $ itemDeclNr n])] . assignNumbers s x
141 | assignNumbers s (TeXEnv "indexeditemdecl" indices x) = do
142 | n <- get
143 | put n{itemDeclNr = itemDeclNr n + 1}
144 | liftM2 (TeXEnv "indexeditemdecl") (assignNumbers s indices) (assignNumbers s x)
145 | assignNumbers s (TeXEnv x y z) = liftM2 (TeXEnv x) (assignNumbers s y) (assignNumbers s z)
146 | assignNumbers _ (TeXComm "index" ws args) = do
147 | n <- get
148 | put n{nextIndexEntryNr = nextIndexEntryNr n + 1}
149 | return $ TeXComm "index" ws $ (FixArg, [TeXRaw $ Text.pack $ show $ nextIndexEntryNr n]) : args
150 | assignNumbers _ (TeXComm "defnx" ws args) = do
151 | n <- get
152 | put n{nextIndexEntryNr = nextIndexEntryNr n + 1}
153 | return $ TeXComm "defnx" ws $ (FixArg, [TeXRaw $ Text.pack $ show $ nextIndexEntryNr n]) : args
154 | assignNumbers _ (TeXComm "footnoteref" ws []) = do
155 | Numbers{..} <- get
156 | put Numbers{footnoteRefNr = footnoteRefNr+1, ..}
157 | return $ TeXComm "footnoteref" ws [(FixArg, [TeXRaw $ Text.pack $ show footnoteRefNr])]
158 | assignNumbers s (TeXComm x ws args) = TeXComm x ws . assignNumbers s args
159 | assignNumbers _ x = return x
160 |
161 | instance AssignNumbers a b => AssignNumbers (Cell a) (Cell b) where
162 | assignNumbers s x@Cell{..} = do
163 | n <- get
164 | put n{nextSentenceNr=1}
165 | content' <- assignNumbers s content
166 | modify $ \m -> m{nextSentenceNr = nextSentenceNr n}
167 | return x{content=content'}
168 |
169 | instance AssignNumbers a b => AssignNumbers (Row a) (Row b) where
170 | assignNumbers s x@Row{..} = do
171 | cells' <- assignNumbers s cells
172 | return x{cells=cells'}
173 |
174 | instance AssignNumbers RawTexPara TeXPara where
175 | assignNumbers s (RawTexPara (splitIntoSentences -> x)) = TeXPara . f x
176 | where
177 | f [] = return []
178 | f (h:t) = do
179 | h' <- assignNumbers s h
180 | let actual = isActualSentence h
181 | n <- get
182 | put n{nextSentenceNr = nextSentenceNr n + (if actual then 1 else 0)}
183 | (Sentence (if actual then Just (nextSentenceNr n) else Nothing) h' :) . f t
184 |
185 | assignNonInlineItem :: (MonadState Numbers m, MonadFix m) => Section -> RawItem -> m Item
186 | assignNonInlineItem s (RawItem label content) = do
187 | n <- get
188 | put n{nextSentenceNr = 1}
189 | Item Nothing (if null label then Nothing else Just label) [] . assignNumbers s content
190 |
191 | breakFirstSentence :: [TeXPara] -> (Sentence, [TeXPara])
192 | breakFirstSentence (TeXPara [x] : z) = (x, z)
193 | breakFirstSentence (TeXPara (x:y) : z) = (x, TeXPara y : z)
194 | breakFirstSentence x = error $ "breakFirstSentence: " ++ show x
195 |
196 | assignInlineItem :: (MonadState Numbers m, MonadFix m) => Section -> RawItem -> m Item
197 | assignInlineItem s (RawItem label content) = do
198 | n <- get
199 | put n{nextSentenceNr = 1}
200 | content' <- assignNumbers s content
201 | let (Sentence _ x, y) = breakFirstSentence content'
202 | return $ Item Nothing (if null label then Nothing else Just label) x y
203 |
204 | endsWithFullStop :: [RawElement] -> Bool
205 | endsWithFullStop = isJust . breakSentence
206 |
207 | instance AssignNumbers RawElement Element where
208 | assignNumbers section RawFigure{..} = do
209 | Numbers{..} <- get
210 | put Numbers{figureNr = figureNr+1, ..}
211 | return $ FigureElement Figure
212 | { figureNumber = figureNr
213 | , figureName = rawFigureName
214 | , figureAbbr = "fig:" ++ rawFigureAbbr
215 | , figureSvg = rawFigureSvg
216 | , figureSection = section }
217 | assignNumbers section RawFormula{..} = do
218 | Numbers{..} <- get
219 | put Numbers{formulaNr = formulaNr + 1, ..}
220 | return $ FormulaElement Formula
221 | { formulaNumber = formulaNr
222 | , formulaAbbr = "eq:" ++ rawFormulaAbbr
223 | , formulaContent = rawFormulaContent
224 | , formulaSection = section }
225 | assignNumbers s RawTable{..} = do
226 | Numbers{..} <- get
227 | put Numbers{tableNr = tableNr+1, ..}
228 | tableCaption <- assignNumbers s rawTableCaption
229 | tableBody <- assignNumbers s rawTableBody
230 | return $ TableElement Table
231 | { tableNumber = tableNr
232 | , columnSpec = rawColumnSpec
233 | , tableAbbr = rawTableAbbr
234 | , tableCaption = tableCaption
235 | , tableSection = s
236 | , .. }
237 | assignNumbers s (RawEnumerated x p) = do
238 | origNum <- nextSentenceNr . get
239 | let c = length (filter (any (endsWithFullStop . rawTexParaElems) . rawItemContent) p)
240 | r <- mapM (if c > 1 then assignNonInlineItem s else assignInlineItem s) p
241 | modify $ \y -> y{nextSentenceNr = origNum}
242 | return $ Enumerated x r
243 | assignNumbers s (RawLatexElement x) = LatexElement . assignNumbers s x
244 | assignNumbers s (RawBnf x y) = Bnf x . assignNumbers s y
245 | assignNumbers _ (RawTabbing x) = return $ Tabbing x
246 | assignNumbers s (RawCodeblock x) = Codeblock . assignNumbers s x
247 | assignNumbers s (RawItemdescr x) = Itemdescr . assignNumbers s x
248 | assignNumbers s (RawNote label x) = do
249 | Numbers{..} <- get
250 | put Numbers{noteNr = noteNr+1, ..}
251 | x' <- assignNumbers s x
252 | return $ NoteElement $ Note noteNr label x'
253 | assignNumbers s (RawExample x) = do
254 | Numbers{..} <- get
255 | put Numbers{exampleNr = exampleNr+1, ..}
256 | x' <- assignNumbers s x
257 | return $ ExampleElement $ Example exampleNr x'
258 |
259 | instance AssignNumbers RawFootnote Footnote where
260 | assignNumbers s (RawFootnote t) = do
261 | Numbers{..} <- get
262 | put Numbers{footnoteNr = footnoteNr+1, nextSentenceNr = 1, ..}
263 | t' <- assignNumbers s t
264 | return $ Footnote{footnoteNumber=footnoteNr,footnoteContent=t'}
265 |
266 | lsectionLevel :: LinearSection -> Int
267 | lsectionLevel (lsectionKind -> NormalSection l) = l
268 | lsectionLevel (lsectionKind -> DefinitionSection l) = l
269 | lsectionLevel _ = 0
270 |
271 | paraNumbers :: [Bool] -> [Maybe Int]
272 | paraNumbers = f 1
273 | where
274 | f _ [] = []
275 | f i (True : x) = Just i : f (i + 1) x
276 | f i (False : x) = Nothing : f i x
277 |
278 | treeizeChapters :: forall m . (Functor m, MonadFix m, MonadState Numbers m) =>
279 | Bool -> Int -> [LinearSection] -> m [Section]
280 | treeizeChapters _ _ [] = return []
281 | treeizeChapters annexes secNumber (LinearSection{..} : more) = mdo
282 | nums <- get
283 | put nums{formulaNr = 1}
284 | sectionFootnotes <- assignNumbers newSec lsectionFootnotes
285 | let
286 | ie = rawIndexEntriesForSec newSec
287 | newSec = Section{sectionKind=lsectionKind, secIndexEntries=ie, secIndexEntriesByPath=reverseIndexEntryMap ie, ..}
288 | let pn = paraNumbers $ paraNumbered . lsectionParagraphs
289 | paragraphs <- forM (zip pn lsectionParagraphs) $ assignNumbers newSec
290 | subsections <- treeizeSections 1 chapter [newSec] lsubsections
291 | (newSec :) . treeizeChapters annexes' (sectionNumber + 1) more'
292 | where
293 | sectionNumber = if annexes' /= annexes then 0 else secNumber
294 | annexes' = chapter /= NormalChapter
295 | parents = []
296 | chapter
297 | | lsectionKind == InformativeAnnexSection = InformativeAnnex
298 | | lsectionKind == NormativeAnnexSection = NormativeAnnex
299 | | otherwise = NormalChapter
300 | abbreviation = lsectionAbbreviation
301 | sectionName = lsectionName
302 | (lsubsections, more') = span ((> 0) . lsectionLevel) more
303 |
304 | rawIndexEntriesForSec :: Section -> IntMap IndexEntry
305 | rawIndexEntriesForSec s = IntMap.fromList
306 | [(n, e) | e@IndexEntry{indexEntryNr=Just n} <- sectionIndexEntries s]
307 |
308 | reverseIndexEntryMap :: IntMap IndexEntry -> Map IndexPath [(Int, IndexEntry)]
309 | reverseIndexEntryMap m = Map.fromListWith (++) [(indexPath x, [(i, x)]) | (i, x) <- IntMap.assocs m]
310 |
311 | assignItemNumbers :: Paragraph -> Paragraph
312 | assignItemNumbers p
313 | | Just n <- paraNumber p = p{ paraElems = fst $ goParas [n, 1] $ paraElems p }
314 | | otherwise = p
315 | where
316 |
317 | goParas :: [Int] -> [TeXPara] -> ([TeXPara], [Int])
318 | goParas nn [] = ([], nn)
319 | goParas nn (TeXPara e : pp) = first (TeXPara e' :) (goParas nn' pp)
320 | where (e', nn') = goSentences nn e
321 |
322 | goSentences :: [Int] -> [Sentence] -> ([Sentence], [Int])
323 | goSentences nn [] = ([], nn)
324 | goSentences nn (Sentence m e : ss) = first (Sentence m e' :) (goSentences nn' ss)
325 | where (e', nn') = goElems nn e
326 |
327 | goElems :: [Int] -> [Element] -> ([Element], [Int])
328 | goElems nn [] = ([], nn)
329 | goElems nn (e:ee) = first (e' :) (goElems nn' ee)
330 | where (e', nn') = goElem nn e
331 |
332 | goElem :: [Int] -> Element -> (Element, [Int])
333 | goElem nn Enumerated{..} = (Enumerated enumCmd items', mapLast (+ length enumItems) nn)
334 | where
335 | items' = map (\(i, Item{..}) ->
336 | Item
337 | (Just (map show $ mapLast (+i) nn))
338 | itemLabel
339 | (fst $ goElems (mapLast (+i) nn ++ [1]) itemInlineContent)
340 | (fst $ goParas (mapLast (+i) nn ++ [1]) itemBlockContent)
341 | ) (zip [0..] enumItems)
342 | goElem nn (NoteElement (Note nr label paras)) = (NoteElement (Note nr label paras'), nn')
343 | where (paras', nn') = goParas nn paras
344 | goElem nn (ExampleElement (Example nr paras)) = (ExampleElement (Example nr paras'), nn')
345 | where (paras', nn') = goParas nn paras
346 | goElem nn x = (x, nn)
347 |
348 | instance AssignNumbers (Maybe Int, RawParagraph) Paragraph where
349 | assignNumbers paraSection (paraNumber, RawParagraph{..}) = do
350 | nums <- get
351 | put nums{nextSentenceNr=if paraNumbered then 1 else nextSentenceNr nums}
352 | paraElems <- assignNumbers paraSection rawParaElems
353 | when paraNumbered $ modify $ \newnums -> newnums{nextSentenceNr = nextSentenceNr nums}
354 | return $ assignItemNumbers Paragraph
355 | { paraInItemdescr = rawParaInItemdescr
356 | , paraSourceLoc = rawParaSourceLoc
357 | , allParaElems = allElements paraElems
358 | , .. }
359 |
360 | treeizeSections :: forall m . (Functor m, MonadFix m, MonadState Numbers m) =>
361 | Int -> Chapter -> [Section] -> [LinearSection] -> m [Section]
362 | treeizeSections _ _ _ [] = return []
363 | treeizeSections sectionNumber chapter parents
364 | (s@LinearSection{..} : (span ((> lsectionLevel s) . lsectionLevel) -> (lsubsections, more'))) = mdo
365 | let
366 | ie = rawIndexEntriesForSec newSec
367 | newSec = Section
368 | { sectionKind = lsectionKind
369 | , secIndexEntries = ie
370 | , secIndexEntriesByPath = reverseIndexEntryMap ie
371 | , sectionName = lsectionName
372 | , abbreviation = lsectionAbbreviation
373 | , .. }
374 | let pn = paraNumbers $ paraNumbered . lsectionParagraphs
375 | nums <- get
376 | put nums{noteNr=1, exampleNr=1, itemDeclNr=1}
377 | sectionFootnotes <- assignNumbers newSec lsectionFootnotes
378 | modify $ \n -> n{nextSentenceNr=1}
379 | paragraphs <- forM (zip pn lsectionParagraphs) $ assignNumbers newSec
380 | subsections <- treeizeSections 1 chapter (newSec : parents) lsubsections
381 | (newSec :) . treeizeSections (sectionNumber + 1) chapter parents more'
382 |
383 | instance AssignNumbers a b => AssignNumbers [a] [b] where
384 | assignNumbers s = mapM (assignNumbers s)
385 |
386 | resolveGrammarterms :: Parser.Macros -> [Text] -> LinearSection -> LinearSection
387 | resolveGrammarterms macros links LinearSection{..} =
388 | LinearSection{lsectionParagraphs = map resolve lsectionParagraphs, ..}
389 | where
390 | resolveTexPara :: RawTexPara -> RawTexPara
391 | resolveTexPara RawTexPara{..} = RawTexPara{rawTexParaElems = map resolveRawElem rawTexParaElems, ..}
392 | resolveRawElem :: RawElement -> RawElement
393 | resolveRawElem (RawBnf s tex) = RawBnf s (bnfGrammarterms macros links tex)
394 | resolveRawElem (RawEnumerated s items) = RawEnumerated s (map resolveItem items)
395 | resolveRawElem y = y
396 | resolveItem :: RawItem -> RawItem
397 | resolveItem (RawItem label content) = RawItem label (map resolveTexPara content)
398 | resolve :: RawParagraph -> RawParagraph
399 | resolve RawParagraph{..} = RawParagraph{rawParaElems = map resolveTexPara rawParaElems, ..}
400 |
401 | bnfGrammarterms :: Parser.Macros -> [Text] -> LaTeX -> LaTeX
402 | bnfGrammarterms macros links = mapTeX go . mapTeX wordify
403 | where
404 | wordify :: LaTeXUnit -> Maybe LaTeX
405 | wordify (TeXRaw stuff) = Just $ map TeXRaw $ unfoldr f stuff
406 | where
407 | f s | Text.null s = Nothing
408 | f s | isName $ Text.head s = Just $ Text.span isName s
409 | f s = Just $ Text.break isName s
410 |
411 | isName c = isAlpha c || c `elem` ['-', '_']
412 | wordify _ = Nothing
413 |
414 | go :: LaTeXUnit -> Maybe LaTeX
415 | go d@(TeXComm cmd _ _) | cmd `elem` ["tcode", "index", "textnormal", "indexlink", "hiddenindexlink", "indexedspan", "terminal", "literalterminal", "noncxxterminal"] = Just [d]
416 | go (TeXRaw name)
417 | | name `elem` links = Just $ fst $ RawDocument.doParse macros $ "\\grammarterm{" ++ name ++ "}"
418 | go _ = Nothing
419 |
420 | parseIndex :: LaTeX -> (IndexPath, Maybe IndexKind)
421 | parseIndex = go . mapTeXRaw unescapeIndexPath . concatRaws
422 | where
423 | go (texStripInfix "|seealso" -> Just (x, [TeXBraces y])) = (parseIndexPath x, Just $ See True y)
424 | go (texStripInfix "|see " -> Just (x, [TeXBraces y])) = (parseIndexPath x, Just $ See False y)
425 | go (texStripInfix "|see" -> Just (x, [TeXBraces y])) = (parseIndexPath x, Just $ See False y)
426 | go (texStripInfix "|(" -> Just (t, _)) = (parseIndexPath t, Just IndexOpen)
427 | go (texStripInfix "|)" -> Just (t, _)) = (parseIndexPath t, Just IndexClose)
428 | go (texStripInfix "|idxbfpage" -> Just (t, _)) = (parseIndexPath t, Just DefinitionIndexEntry)
429 | go t = (parseIndexPath t, Nothing)
430 |
431 | unescapeIndexPath :: Text -> LaTeXUnit
432 | unescapeIndexPath = TeXRaw
433 | . replace "\5" "\""
434 |
435 | . replace "\2" "!"
436 | . replace "!" "\1"
437 | . replace "\"!" "\2"
438 |
439 | . replace "\4" "@"
440 | . replace "@" "\3"
441 | . replace "\"@" "\4"
442 |
443 | . replace "\"|" "|"
444 | . replace "\"\"" "\5"
445 |
446 | . (!! 10) . iterate (replace " " " ")
447 | . replace "\n" " "
448 |
449 | parseIndexPath :: LaTeX -> IndexPath
450 | parseIndexPath (texStripInfix "\1" -> Just (x, y)) = parseIndexPath x ++ parseIndexPath y
451 | parseIndexPath (texStripInfix "\3" -> Just (x, y)) = [IndexComponent x y]
452 | parseIndexPath t = [IndexComponent [] t]
453 |
454 | sectionTexParas :: Section -> [TeXPara]
455 | sectionTexParas s = (paragraphs s >>= paraElems) ++ (sectionFootnotes s >>= footnoteContent)
456 |
457 | sectionTex :: Section -> LaTeX
458 | sectionTex s = sectionTexParas s >>= texParaTex
459 |
460 | sectionIndexEntries :: Section -> [IndexEntry]
461 | sectionIndexEntries s =
462 | [ IndexEntry{indexEntrySection=abbreviation sec, ..}
463 | | sec <- sections s
464 | , [ (FixArg, [TeXRaw (Text.unpack -> read -> Just -> indexEntryNr)])
465 | , (OptArg, [TeXRaw indexCategory]), (FixArg, (parseIndex -> (indexPath, indexEntryKind)))
466 | ] <- lookForCommand "index" (sectionTex sec)]
467 |
468 | sectionLabels :: Section -> [(Text, Section)]
469 | sectionLabels s =
470 | [ (label, sec) | sec <- sections s
471 | , [ (FixArg, [TeXRaw label]) ] <- lookForCommand "label" (sectionTex sec)]
472 |
473 | toIndex :: IndexEntry -> Index
474 | toIndex IndexEntry{..} = Map.singleton indexCategory $ go indexPath
475 | where
476 | go :: [IndexComponent] -> IndexTree
477 | go [c] = Map.singleton c (IndexNode [IndexEntry indexEntrySection indexEntryKind indexPath indexEntryNr indexCategory] Map.empty)
478 | go (c:cs) = Map.singleton c $ IndexNode [] $ go cs
479 | go _ = error "toIndex"
480 |
481 | trackPnums :: FilePath -> Text -> Text
482 | -- Replaces \pnum with \pnum{file}{line}
483 | trackPnums file = Text.pack . unlines . map (uncurry f) . zip [1..] . lines . Text.unpack
484 | where
485 | f :: Integer -> String -> String
486 | f lineNr line
487 | | Just (pre, post) <- stripInfix "\\pnum" line
488 | = pre ++ "\\pnum{" ++ file ++ "}{" ++ show lineNr ++ "}" ++ (if null post then "%" else post)
489 | | otherwise = line
490 |
491 | getFileList :: IO [FilePath]
492 | getFileList =
493 | (\\ ["front", "back"]) .
494 | map (Text.unpack . Text.dropEnd 1 . Text.drop (Text.length pre)) .
495 | filter (pre `isPrefixOf`) .
496 | Text.lines . readFile "std.tex"
497 | where pre = "\\include{"
498 |
499 | grabBnf :: [String] -> [String]
500 | grabBnf [] = []
501 | grabBnf (line : rest)
502 | | "\\begin{bnf}" `List.isPrefixOf` line =
503 | let (x, end : more) = break ("\\end{bnf}" `List.isPrefixOf`) rest
504 | in ["", line] ++ x ++ [end] ++ grabBnf more
505 | | "\\gramSec" `List.isPrefixOf` line = ["", line] ++ grabBnf rest
506 | | otherwise = grabBnf rest
507 |
508 | generateStdGramExt :: [FilePath] -> IO Text
509 | generateStdGramExt files =
510 | Text.pack . unlines . grabBnf . lines . Text.unpack .
511 | Text.concat . mapM readFile ((++ ".tex") . files)
512 |
513 | importExampleFile :: FilePath -> IO Text
514 | importExampleFile =
515 | (Text.strip .
516 | Text.unlines .
517 | takeWhile (/= "\\end{document}") .
518 | tail .
519 | dropWhile (/= "\\begin{document}") .
520 | Text.lines .) .
521 | readFile
522 |
523 | importExamples :: Text -> Text
524 | importExamples x = case matchRegexAll r (Text.unpack x) of
525 | Nothing -> x
526 | Just (before, _match, after, subs) ->
527 | Text.pack before ++
528 | unsafePerformIO (importExampleFile $ "assets/" ++ (subs !! 1) ++ ".tex") ++
529 | importExamples (Text.pack after)
530 | where r = mkRegex "\\\\importexample(\\[[0-9a-zA-Z.-]*\\])?{([a-zA-Z0-9_-]*)}"
531 |
532 | parseFiles :: Parser.Macros -> IO ([LinearSection], Parser.Macros)
533 | parseFiles m = do
534 | files <- getFileList
535 | stdGramExt <- generateStdGramExt files
536 | let
537 | go [] macros = return ([], macros)
538 | go (c:cc) macros = do
539 | let p = c ++ ".tex"
540 |
541 | stuff <-
542 | importExamples .
543 | replace "multicolfloattable" "floattable" .
544 | replace "\\indeximpldef{" "\\index[impldefindex]{" .
545 | Text.unlines .
546 | indexCodeEnvs ["codeblock", "itemdecl"] .
547 | moveIndexEntriesIntoSecs .
548 | moveIndexEntriesIntoDefs .
549 | Text.lines .
550 | trackPnums p .
551 | replace "\\nodiffref\n\\change" "\n\\pnum\\textbf{Change:}\\space" .
552 | replace "\n\\diffref" "\n\\pnum\\nopnumdiffref" .
553 | -- Done here because (1) the real \nodiffref is defined with \def in a way
554 | -- we don't support yet, and (2) this way a source link is generated for the pnum.
555 | readFile p
556 |
557 | let extra = if c /= "grammar" then "" else replace "\\gramSec" "\\rSec1" stdGramExt
558 | let (r, macros') = parseFile macros (stuff ++ extra)
559 | if length r == 0 then undefined else
560 | first (r ++) . go cc (macros ++ macros')
561 |
562 | bib <- fst . parseFile m .
563 | fst . fromJust .
564 | textStripInfix "\\clearpage" .
565 | ("\\rSec0[bibliography]{Bibliography}\n" ++) .
566 | readFile "back.tex"
567 |
568 | first (++ bib) . go files m
569 |
570 | load14882 :: Text -> IO Draft
571 | load14882 extraMacros = do
572 |
573 | commitUrl <- getCommitUrl
574 |
575 | (macros@Parser.Macros{..}, took) <- measure (loadMacros extraMacros)
576 | putStrLn $ "Loaded macros in " ++ show (took * 1000) ++ "ms."
577 |
578 | (secs :: [LinearSection], took2) <- measure $ fst . parseFiles macros
579 | putStrLn $ "Parsed LaTeX in " ++ show (took2 * 1000) ++ "ms."
580 |
581 | xrefDelta <- loadXrefDelta
582 |
583 | (r, took3) <- measure $ if length (show secs) == 0 then undefined else do
584 | -- force eval before we leave the dir
585 | let
586 | grammarNames = [n |
587 | TeXComm "index" _ [
588 | (OptArg, [TeXRaw "grammarindex"]) ,
589 | (FixArg, [TeXRaw _
590 | ,TeXComm "textcolor" "" [(FixArg,[TeXRaw "grammar-gray"]),(FixArg,[TeXComm "textsf" _ [(FixArg,[TeXComm "textit" "" [(FixArg,[TeXRaw n])]])]])]
591 | ,TeXRaw "|idxbfpage"]
592 | )] <- allUnits secs]
593 |
594 | secs' = map (resolveGrammarterms macros grammarNames) secs
595 | chapters = evalState (treeizeChapters False 1 secs') (Numbers 1 1 1 1 0 0 1 1 1 1)
596 | allEntries :: [IndexEntry]
597 | allEntries = chapters >>= sectionIndexEntries
598 | index = mergeIndices $ map toIndex allEntries
599 | indexEntryMap = IntMap.fromList [(n, e) | e@IndexEntry{indexEntryNr=Just n} <- allEntries]
600 | indexEntriesByPath = reverseIndexEntryMap indexEntryMap
601 | labels = Map.fromList $ chapters >>= sectionLabels
602 |
603 | abbrMap = makeAbbrMap dr
604 | dr = Draft{..}
605 | return dr
606 |
607 | putStrLn $ "Processed in " ++ show (took3 * 1000) ++ "ms."
608 | return r
609 |
--------------------------------------------------------------------------------
/MathJax.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE OverloadedStrings, ViewPatterns #-}
2 |
3 | module MathJax (render) where
4 |
5 | import Control.Concurrent.MVar (takeMVar, putMVar, newMVar)
6 | import Data.Text (Text)
7 | import qualified Data.Text as Text
8 | import System.IO.Unsafe (unsafePerformIO)
9 | import System.Process (shell, CreateProcess(..), createProcess, StdStream(CreatePipe))
10 | import System.IO (BufferMode(..), hGetLine, hPutStrLn, hSetBuffering)
11 | import Text.Regex (mkRegex, subRegex)
12 | import Prelude hiding ((++))
13 | import Util ((++))
14 | import qualified Data.Map as Map
15 | import Data.Map (Map)
16 |
17 | rmTrailingNewline :: Text -> Text
18 | rmTrailingNewline (Text.stripSuffix "\n" -> Just x) = x
19 | rmTrailingNewline x = x
20 |
21 | type Renderer = String {- formula -} -> Bool {- inline -} -> Text
22 |
23 | data Input = Input { _formula :: String, _inline :: Bool }
24 | deriving (Eq, Ord)
25 |
26 | makeRenderer :: IO Renderer
27 | makeRenderer = do
28 |
29 | (Just stdinPipe, Just stdoutPipe, _, _) <- createProcess (shell "./mathjax-batch")
30 | {std_in = CreatePipe, std_out = CreatePipe}
31 |
32 | hSetBuffering stdinPipe LineBuffering
33 | hSetBuffering stdoutPipe LineBuffering
34 |
35 | let
36 | rm r s = subRegex (mkRegex r) s ""
37 | readResult = do
38 | line <- hGetLine stdoutPipe
39 | if line == "DONE"
40 | then return ""
41 | else do
42 | more <- readResult
43 | return $ line ++ "\n" ++ more
44 |
45 | mutex <- newMVar (Map.empty :: Map Input Text)
46 |
47 | return $ \formula inline -> unsafePerformIO $ do
48 | let input = Input formula inline
49 | cache <- takeMVar mutex
50 | (result, cache') <- case Map.lookup input cache of
51 | Just output -> return (output, cache)
52 | Nothing -> do
53 | hPutStrLn stdinPipe formula
54 | hPutStrLn stdinPipe (if inline then "INLINE" else "NONINLINE")
55 | rawResult <- readResult
56 | let
57 | output
58 | = Text.replace " focusable=\"false\"" ""
59 | $ rmTrailingNewline -- Prevents artifacts in [rand.adapt.ibits]#4
60 | $ Text.pack
61 | $ rm " id=\"(MJXc|MathJax)-[0-9A-Za-z-]+\""
62 | $ rm " style=\"\""
63 | $ rawResult
64 | return (output, Map.insert input output cache)
65 | putMVar mutex cache'
66 | return result
67 |
68 | render :: Renderer
69 | render = unsafePerformIO $ makeRenderer
70 |
--------------------------------------------------------------------------------
/Pages.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE OverloadedStrings, RecordWildCards, ViewPatterns #-}
2 |
3 | module Pages (fileContent, pageContent, pagePath, writePage, applyPageStyle, Link(..), outputDir, PageStyle(..)) where
4 |
5 | import Prelude hiding ((++), (.), writeFile)
6 | import System.Directory (createDirectoryIfMissing)
7 | import Control.Monad (when)
8 | import qualified Data.Text as Text
9 | import qualified Data.Text.Lazy as LazyText
10 | import qualified Data.Text.Lazy.Builder as TextBuilder
11 | import Util ((++), (.), Text, writeFile)
12 |
13 | outputDir :: FilePath
14 | outputDir = "14882/"
15 |
16 | data PageStyle = Bare | WithExtension | InSubdir
17 | deriving (Eq, Read)
18 |
19 | fileContent :: TextBuilder.Builder -> TextBuilder.Builder -> TextBuilder.Builder -> TextBuilder.Builder -> TextBuilder.Builder
20 | fileContent pathHome title extraHead body =
21 | "" ++
22 | "" ++
23 | "" ++
24 | "" ++ title ++ "" ++
25 | "" ++
26 | "" ++
27 | "" ++
28 | "" ++
29 | "" ++
30 | "" ++
31 | extraHead ++
32 | "" ++
33 | "" ++ body ++ "
" ++
34 | ""
35 |
36 | data Link = TocToSection | SectionToToc | SectionToSection
37 | deriving Show
38 |
39 | doLink :: PageStyle -> Link -> Text -> Text
40 | doLink sfs l = LazyText.toStrict . TextBuilder.toLazyText . go . Text.splitOn (Text.pack (show l) ++ "/")
41 | where
42 | go :: [Text] -> TextBuilder.Builder
43 | go (x : (Text.break (`elem` ("'#" :: String)) -> (a, b)) : z) = TextBuilder.fromText x ++ f (TextBuilder.fromText a) ++ go (b : z)
44 | go [x] = TextBuilder.fromText x
45 | go _ = undefined
46 | f :: TextBuilder.Builder -> TextBuilder.Builder
47 | f = case (sfs, l) of
48 | (Bare, SectionToToc) -> ("./#" ++)
49 | (Bare, TocToSection) -> id
50 | (Bare, SectionToSection) -> id
51 | (InSubdir, SectionToToc) -> ("../#" ++)
52 | (InSubdir, TocToSection) -> (++ "/")
53 | (InSubdir, SectionToSection) -> ("../" ++)
54 | (WithExtension, SectionToToc) -> ("index.html#" ++)
55 | (WithExtension, TocToSection) -> (++ ".html")
56 | (WithExtension, SectionToSection) -> (++ ".html")
57 |
58 | applyPageStyle :: PageStyle -> Text -> Text
59 | applyPageStyle sfs =
60 | doLink sfs SectionToSection
61 | . doLink sfs SectionToToc
62 | . doLink sfs TocToSection
63 |
64 | pagePath :: FilePath -> PageStyle -> String
65 | pagePath n Bare = outputDir ++ n
66 | pagePath n WithExtension = outputDir ++ n ++ ".html"
67 | pagePath n InSubdir = outputDir ++ n ++ "/index.html"
68 |
69 | pageContent :: PageStyle -> TextBuilder.Builder -> Text
70 | pageContent sfs content = applyPageStyle sfs $ LazyText.toStrict $ TextBuilder.toLazyText $ content
71 |
72 | writePage :: FilePath -> PageStyle -> Text -> IO ()
73 | writePage n sfs content = do
74 | when (sfs == InSubdir) $ createDirectoryIfMissing True (outputDir ++ n)
75 | writeFile (pagePath n sfs) content
76 |
--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
1 | Introduction
2 |
3 | cxxdraft-htmlgen parses the LaTeX sources of the draft,
4 | and generates static HTML pages from them.
5 |
6 | Prerequisites
7 |
8 | - Git
9 | - The Haskell Platform (https://www.haskell.org/platform/)
10 | - Graphviz
11 | - Node.js
12 | - The 'split' NPM package
13 | - mathjax-node-cli (https://github.com/mathjax/mathjax-node-cli/)
14 |
15 | Usage
16 |
17 | Do:
18 | git clone https://github.com/Eelis/cxxdraft-htmlgen.git
19 | cd cxxdraft-htmlgen
20 | cabal build
21 | dist/build/cxxdraft-htmlgen/cxxdraft-htmlgen path/to/draft [sectionfilestyle]
22 |
23 | Or with stack:
24 | stack build
25 | stack exec cxxdraft-htmlgen path/to/draft [sectionfilestyle]
26 |
27 | The sectionfilestyle parameter is one of:
28 |
29 | Bare (to generate e.g. intro.execution)
30 | WithExtension (to generate e.g. intro.execution.html)
31 | InSubdir (to generate e.g. intro.execution/index.html)
32 |
33 | The default is WithExtension, since this is suitable
34 | for direct browsing on a filesystem without a web server.
35 |
36 | Bare may be used in conjunction with web server configuration
37 | specifying a default text/html mime type for the directory
38 | containing the section pages, to get URLs such as:
39 |
40 | temp.res#temp.dep
41 | temp.dep#3
42 |
43 | InSubdir only requires defaulting to index.html, to give:
44 |
45 | temp.res/#temp.dep
46 | temp.dep/#3
47 |
48 | Custom draft branch
49 |
50 | While cxxdraft-htmlgen works with the official draft sources as-is,
51 | better results can be obtained by using the following branch:
52 |
53 | https://github.com/Eelis/draft/tree/cxxdraft-htmlgen-fixes
54 |
55 | This branch tracks the official draft sources, but makes some changes to:
56 | - improve syntax highlighting
57 | - clean up hyperlinks
58 | - work around MathJax limitations
59 | - work around cxxdraft-htmlgen limitations
60 |
61 | Output
62 |
63 | The following will be created in ./14882/ :
64 |
65 | - index.html A table of contents with links to...
66 |
67 | - ~2300 interlinked section pages
68 |
69 | These are named after the section abbreviation, which for
70 | the Bare section file style look like:
71 |
72 | stmt.goto
73 | class.member.lookup
74 | cpp
75 | iterator.requirements.general
76 | locale.moneypunct.virtuals
77 |
78 | Since sections nest, content is duplicated at every level.
79 | This allows one to specify more or less context for a given
80 | citation. For example, one can link to:
81 |
82 | basic.scope.hiding (section 6.4.10 "Name hiding" on
83 | a page of its own)
84 |
85 | basic.scope#hiding (the same section highlighted on
86 | the page for section 6.4 "Scope")
87 |
88 | basic#scope.hiding (the same section highlighted on
89 | the page for chapter 6 "Basics")
90 |
91 | - full The entire document (~24 mbyte, or ~2 mbyte compressed).
92 |
93 | - 14882.css Used by all of the above.
94 |
95 | Hidden links
96 |
97 | On any page:
98 |
99 | - defined terms/concepts/nonterminals are links that select themselves;
100 |
101 | - a full stop at the end of a sentence is a link that selects the sentence;
102 |
103 | - moving the mouse over the right margin of a numbered paragraph reveals a link
104 | to the LaTeX source for that paragraph;
105 |
106 | - moving the mouse over the left margin of an itemdecl or table row reveals a link
107 | that selects it.
108 |
--------------------------------------------------------------------------------
/RawDocument.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE
2 | OverloadedStrings,
3 | RecordWildCards,
4 | ViewPatterns,
5 | LambdaCase,
6 | TupleSections,
7 | NamedFieldPuns,
8 | FlexibleInstances,
9 | FlexibleContexts,
10 | RankNTypes,
11 | MultiParamTypeClasses,
12 | FunctionalDependencies,
13 | UndecidableInstances,
14 | RecursiveDo #-}
15 |
16 | module RawDocument
17 | ( RawElement(..), RawTexPara(..), RawFootnote(..), RawParagraph(..), LinearSection(..), RawItem(..)
18 | , loadMacros, parseFile, loadXrefDelta, doParse) where
19 |
20 | import qualified LaTeXParser as Parser
21 | import qualified Data.Text as Text
22 | import Data.Text (Text, replace)
23 | import Document (Row(..), SourceLocation(..), RowSepKind(..), SectionKind(..), Cell(..), CellSpan(..), XrefDelta, Abbreviation, ColumnSpec(..), TextAlignment(..))
24 | import Data.Maybe (isJust, fromJust)
25 | import LaTeXParser (Macros(..), Signature(..), nullCmd, storeCmd, storeEnv, Environment(..), Command(..), codeEnv, Token(..), normalCmd, ParseResult(..))
26 | import Data.Text.IO (readFile)
27 | import Text.Regex (mkRegex)
28 | import qualified Data.Map as Map
29 | import Data.Map (Map)
30 | import Data.List (transpose, take, isPrefixOf)
31 | import Util ((.), (++), mapHead, textStripInfix, textSubRegex, splitOn)
32 | import Prelude hiding (take, (.), takeWhile, (++), lookup, readFile)
33 | import System.IO.Unsafe (unsafePerformIO)
34 | import System.Process (readProcess)
35 | import Control.Arrow (first)
36 | import Data.Char (isSpace, isDigit)
37 | import LaTeXBase
38 |
39 | data RawItem = RawItem
40 | { rawItemLabel :: LaTeX
41 | , rawItemContent :: [RawTexPara] }
42 | deriving (Eq, Show)
43 |
44 | data RawElement
45 | = RawLatexElement LaTeXUnit
46 | | RawEnumerated String [RawItem]
47 | | RawCodeblock LaTeXUnit
48 | | RawExample [RawTexPara]
49 | | RawNote Text [RawTexPara]
50 | | RawItemdescr [RawTexPara]
51 | | RawBnf String LaTeX
52 | | RawTable
53 | { rawTableCaption :: LaTeX
54 | , rawColumnSpec :: [ColumnSpec]
55 | , rawTableAbbr :: Abbreviation
56 | , rawTableBody :: [Row [RawTexPara]] }
57 | | RawTabbing LaTeX
58 | | RawFormula { rawFormulaAbbr :: Abbreviation, rawFormulaContent :: LaTeX }
59 | | RawFigure { rawFigureName :: LaTeX, rawFigureAbbr :: Abbreviation, rawFigureSvg :: Text }
60 | deriving (Eq, Show)
61 |
62 | newtype RawTexPara = RawTexPara { rawTexParaElems :: [RawElement] }
63 | deriving (Eq, Show)
64 |
65 | newtype RawFootnote = RawFootnote [RawTexPara]
66 | deriving Show
67 |
68 | data RawParagraph = RawParagraph
69 | { paraNumbered :: Bool
70 | , rawParaInItemdescr :: Bool
71 | , rawParaElems :: [RawTexPara]
72 | , rawParaSourceLoc :: Maybe SourceLocation }
73 | deriving Show
74 |
75 | data LinearSection = LinearSection
76 | { lsectionAbbreviation :: Abbreviation
77 | , lsectionKind :: SectionKind
78 | , lsectionName :: LaTeX
79 | , lsectionParagraphs :: [RawParagraph]
80 | , lsectionFootnotes :: [RawFootnote] }
81 | deriving Show
82 |
83 | instance AllUnits RawElement where
84 | allUnits (RawLatexElement x) = allUnits x
85 | allUnits (RawBnf _ x) = allUnits x
86 | allUnits (RawTabbing x) = allUnits x
87 | allUnits (RawNote _ x) = allUnits x
88 | allUnits (RawExample x) = allUnits x
89 | allUnits (RawCodeblock x) = allUnits x
90 | allUnits (RawItemdescr x) = allUnits x
91 | allUnits (RawEnumerated _ x) = allUnits x
92 | allUnits (RawFormula _ x) = allUnits x
93 | allUnits RawFigure{} = []
94 | allUnits RawTable{..} = allUnits rawTableCaption ++ concatMap (allUnits . concat . map content) (map cells rawTableBody)
95 |
96 | instance AllUnits RawTexPara where
97 | allUnits = allUnits . rawTexParaElems
98 |
99 | instance AllUnits RawItem where
100 | allUnits RawItem{..} = allUnits rawItemLabel ++ allUnits rawItemContent
101 |
102 | instance AllUnits LinearSection where
103 | allUnits LinearSection{..} = allUnits lsectionName ++ allUnits lsectionParagraphs ++ allUnits lsectionFootnotes
104 |
105 | instance AllUnits RawParagraph where
106 | allUnits RawParagraph{..} = allUnits rawParaElems
107 |
108 | instance AllUnits RawFootnote where
109 | allUnits (RawFootnote x) = allUnits x
110 |
111 | bnfEnvs :: [String]
112 | bnfEnvs = ["bnf", "ncbnf", "bnfkeywordtab", "simplebnf", "ncsimplebnf", "ncrebnf"]
113 |
114 | isBnf :: LaTeXUnit -> Bool
115 | isBnf (TeXEnv s _ _)
116 | | s `elem` bnfEnvs = True
117 | isBnf _ = False
118 |
119 | isTable, isTabbing, isFigure :: LaTeXUnit -> Bool
120 | isTable x = isTeXEnv "floattablebasex" x || isTeXEnv "htmlTable" x
121 | isTabbing = isTeXEnv "tabbing"
122 | isFigure = isTeXEnv "importgraphic"
123 |
124 | isEnumerate :: LaTeXUnit -> Maybe String
125 | isEnumerate (TeXEnv s _ _)
126 | | s `elem` ["enumerate", "itemize", "description", "thebibliography"] = Just s
127 | isEnumerate _ = Nothing
128 |
129 | isParaEnd :: LaTeXUnit -> Bool
130 | isParaEnd (TeXEnv "itemdecl" _ _) = True
131 | isParaEnd (TeXEnv "indexeditemdecl" _ _) = True
132 | isParaEnd (TeXEnv "itemdescr" _ _) = True
133 | isParaEnd (TeXComm "pnum" _ _) = True
134 | isParaEnd x = isParasEnd x
135 |
136 | isParasEnd :: LaTeXUnit -> Bool
137 | isParasEnd (TeXComm "definition" _ _) = True
138 | isParasEnd (TeXComm "rSec" _ _) = True
139 | isParasEnd (TeXComm "infannex" _ _) = True
140 | isParasEnd (TeXComm "normannex" _ _) = True
141 | isParasEnd _ = False
142 |
143 | isJunk :: LaTeXUnit -> Bool
144 | isJunk (TeXRaw x) = all isSpace (Text.unpack x)
145 | isJunk (TeXComm "index" _ _) = True
146 | isJunk (TeXComm "setlength" _ _) = True
147 | isJunk _ = False
148 |
149 | isItem :: LaTeXUnit -> Maybe LaTeX
150 | isItem (TeXComm "item" _ []) = Just []
151 | isItem (TeXComm "item" _ [(_, label)]) = Just label
152 | isItem (TeXComm "bibitem" _ [(_, [TeXRaw label])]) = Just [TeXRaw $ "bib:" ++ label]
153 | isItem _ = Nothing
154 |
155 | parseItems :: LaTeX -> [RawItem]
156 | parseItems [] = []
157 | parseItems (x : rest)
158 | | isJunk x = mapHead (mapItemContent (mapHead addJunk)) (parseItems rest)
159 | | Just label <- isItem x, (item, rest') <- break (isJust . isItem) rest =
160 | RawItem label (parsePara item) : parseItems rest'
161 | where
162 | mapItemContent f (RawItem l c) = RawItem l (f c)
163 | addJunk :: RawTexPara -> RawTexPara
164 | addJunk (RawTexPara z) = RawTexPara (dropWhile isOnlySpace $ RawLatexElement x : z)
165 | parseItems _ = error "need items or nothing"
166 |
167 | doParse :: Macros -> Text -> (LaTeX, Macros)
168 | doParse m t = (x, y)
169 | where
170 | (x, y, []) = Parser.parseString ctx (Text.unpack t)
171 | ctx = initialContext{Parser.macros=m}
172 |
173 | nullCmds :: [(Int, String)]
174 | nullCmds =
175 | [ (0, "clearpage kill rmfamily hfill vfill nocorr small larger noindent itcorrwidth itletterwidth global")
176 | , (1, "enlargethispage lstset newsavebox vspace input")
177 | , (2, "glossary settowidth addtolength")
178 | , (3, "definecolor")
179 | ]
180 |
181 | storeCmds :: [(Int, String)]
182 | storeCmds =
183 | [ (0, "today def makeatletter bottomline makeatother Sec bmod mod long prime " ++
184 | "chapter section paragraph subparagraph fi otextup linebreak newpage log " ++
185 | "textup edef x BnfIndent par leq " ++
186 | "leftmargini BnfInc BnfRest protect caret sum " ++
187 | "xspace onelineskip textlangle textrangle tilde raggedright = " ++
188 | "space copyright textregistered textbackslash hsize br Gamma " ++
189 | "frenchspacing list leftmargin listparindent itemindent itshape relax " ++
190 | "nonfrenchspacing endlist upshape ttfamily baselineskip nobreak " ++
191 | "endfirsthead quad qquad cdot cdots dotsc bnfindentinc footnotemark ldots capsep max min " ++
192 | "continuedcaption hline endhead footnotesize le times dotsb rightarrow to equiv " ++
193 | "lfloor rfloor pi geq neq ge lceil rceil ell alpha bigl bigr mu lambda beta " ++
194 | "tabularnewline exp sigma big delta rho Pi nu infty displaystyle lim sin cos " ++
195 | "phi int theta zeta FlushAndPrintGrammar break backslash centering " ++
196 | "normalbaselineskip land lor mapsto normalfont textmu tablerefname figurerefname newline " ++
197 | "obeyspaces bnfindent vdots tabcolsep columnbreak emergencystretch commentellip " ++
198 | "gamma widowpenalties sffamily parskip left right `")
199 | , (1, "hspace footnote textit textrm textnormal texttt textbf ensuremath ref ref* mbox bibitem mathop " ++
200 | "terminal literalterminal noncxxterminal textsl textsc textsf text term overline " ++
201 | "tcode noncxxtcode literaltcode footnotetext microtypesetup cline mathtt mathit mathrm mathsf " ++
202 | "label newlength uline value newcounter mathscr c uppercase iref operatorname " ++
203 | "phantom hphantom sqrt ln emph minipage url indexescape changeglossnumformat textasciitilde " ++
204 | "removedxref deprxref textsuperscript rlap mathrel mathbin nopnumdiffref color ucode uname")
205 | , (2, "pnum definition addtocounter setcounter frac " ++
206 | "binom infannex normannex parbox link weblink indexedspan movedxref movedxrefs " ++
207 | "equal setlength textcolor")
208 | , (3, "multicolumn discretionary movedxrefii ifthenelse PackageError NewEnviron")
209 | , (4, "movedxrefiii indexlink hiddenindexlink")
210 | ]
211 |
212 | initialCmds :: Map Text Command
213 | initialCmds = Map.fromList $
214 | [ storeCmd "item" (Signature 0 (Just []))
215 | , storeCmd "caption" (Signature 2 (Just []))
216 | , storeCmd "index" (Signature 2 (Just []))
217 | , storeCmd "hyperref" (Signature 2 (Just []))
218 | , nullCmd "makebox" (Signature 2 (Just []))
219 | , storeCmd "\n" (Signature 0 Nothing)
220 | , storeCmd "nolinebreak" (Signature 0 (Just []))
221 | , storeCmd "textsmaller" (Signature 2 (Just []))
222 | , nullCmd "gramSec" (Signature 2 (Just []))
223 | , ("kern", normalCmd $ Command $ \_ctx _ws -> ParseResult [] mempty . snd . parseDimen)
224 | ]
225 | ++ [storeCmd c (Signature a Nothing) | (a, l) <- storeCmds, c <- words l]
226 | ++ [nullCmd (Text.pack c) (Signature a Nothing) | (a, l) <- nullCmds, c <- words l]
227 |
228 | parseDimen :: [Token] -> ([Token], [Token])
229 | parseDimen toks
230 | | t@(Token txt) : more <- toks, txt `elem` [".", "pt", "-", "em"] || all isDigit txt = first (t :) (parseDimen more)
231 | | otherwise = ([], toks)
232 |
233 | initialEnvs :: Map Text Environment
234 | initialEnvs = Map.fromList $
235 | [ (storeEnv e (Signature 0 Nothing))
236 | | e <- bnfEnvs ++
237 | words "indented description itemize center tabbing defnote enumerate eqnarray* equation* itemdescr footnote matrix"
238 | ] ++
239 | [ storeEnv "example" (Signature 1 (Just []))
240 | , storeEnv "tailexample" (Signature 1 (Just []))
241 | , storeEnv "note" (Signature 0 (Just [Token "Note"]))
242 | , storeEnv "tailnote" (Signature 0 (Just [Token "Note"]))
243 | , storeEnv "table" (Signature 1 Nothing)
244 | , storeEnv "tabular" (Signature 1 Nothing)
245 | , storeEnv "longtable" (Signature 1 Nothing)
246 | , storeEnv "importgraphic" (Signature 3 Nothing)
247 | , storeEnv "formula" (Signature 1 Nothing)
248 | , storeEnv "minipage" (Signature 1 Nothing)
249 | , storeEnv "thebibliography" (Signature 1 Nothing)
250 | , codeEnv "indexeditemdecl" (Signature 1 Nothing)
251 | , codeEnv "itemdecl" (Signature 0 Nothing)
252 | , codeEnv "indexedcodeblock" (Signature 1 Nothing)
253 | , codeEnv "codeblock" (Signature 0 Nothing)
254 | , codeEnv "codeblockdigitsep" (Signature 0 Nothing)
255 | , codeEnv "codeblocktu" (Signature 1 Nothing)
256 | , storeEnv "array" (Signature 1 Nothing)
257 | , storeEnv "floattablebasex" (Signature 4 Nothing)
258 | , storeEnv "htmlTable" (Signature 3 Nothing)
259 | ]
260 |
261 | initialMacros :: Parser.Macros
262 | initialMacros = Parser.defaultMacros ++ mempty{Parser.commands=initialCmds, Parser.environments=initialEnvs}
263 |
264 | initialContext :: Parser.Context
265 | initialContext = Parser.defaultContext{Parser.macros=initialMacros}
266 |
267 | parseFile :: Macros -> Text -> ([LinearSection], Macros)
268 | parseFile macros =
269 | first (parseSections 0)
270 | . doParse macros
271 | . replace "$$" "$"
272 | . replace "\\hspace*" "\\hspace"
273 | . replace "``" "“"
274 | . textSubRegex (mkRegex "(\\grammarterm\\{[A-Za-z-]*\\})(\\{s\\}|s)") "\\1\\textit{s}"
275 | -- Mixing italic and upright looks okay in the PDF, but looks bad in browsers,
276 | -- and our linkification makes clear enough that the plural 's' is not part
277 | -- of the grammarterm.
278 |
279 | loadFigure :: Text -> Text
280 | loadFigure f = unsafePerformIO $ do
281 | dot <- readFile $ "assets/" ++ p
282 | svg <- readProcess "dot" ["-Tsvg",
283 | "-Gbgcolor=transparent",
284 | "-Gsize=8",
285 | "-Nfontsize=10",
286 | "-Gfontsize=10",
287 | "-Efontsize=10",
288 | "-Nfontname=Noto Serif",
289 | "-Efontname=Noto Serif",
290 | "-Gfontname=Noto Serif"] (Text.unpack $ Text.replace "Courier New" "Noto Sans Mono" $ Text.replace ", fontsize=24" "" dot)
291 | return $ rmIds $ snd $ Text.breakOn "