├── .gitignore ├── 14882.css ├── CxxParser.hs ├── Document.hs ├── LICENSE ├── LaTeXBase.hs ├── LaTeXParser.hs ├── Load14882.hs ├── MathJax.hs ├── Pages.hs ├── README ├── RawDocument.hs ├── Render.hs ├── SectionPages.hs ├── Sentences.hs ├── Setup.hs ├── Toc.hs ├── Util.hs ├── colored.css ├── cxxdraft-htmlgen.cabal ├── expanded.css ├── fulltoc.css ├── genhtml.hs ├── icon.png ├── macros.tex ├── mathjax-batch ├── normative-only.css ├── stack.yaml └── toc.css /.gitignore: -------------------------------------------------------------------------------- 1 | 14882 2 | dist 3 | tags 4 | node_modules 5 | .stack-work/ 6 | -------------------------------------------------------------------------------- /14882.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: 'Noto Serif'; 3 | hyphens: auto; 4 | line-height: 1.5; 5 | margin-left: 20mm; 6 | margin-right: 16mm; 7 | margin-top: 12mm; 8 | margin-bottom: 12mm; 9 | font-size: 10pt; 10 | } 11 | 12 | div { 13 | background: inherit; 14 | } 15 | 16 | div.wrapper { 17 | max-width: 20cm; 18 | margin: auto; 19 | } 20 | 21 | div.texpara { 22 | margin-top: 3pt; 23 | margin-bottom: 3pt; 24 | } 25 | 26 | table div.texpara { 27 | margin-top: 0; 28 | margin-bottom: 0; 29 | } 30 | 31 | table.enumerate div.texpara { 32 | margin-top: 3pt; 33 | margin-bottom: 3pt; 34 | } 35 | 36 | ul { 37 | list-style-type: none; 38 | padding-left: 9mm; 39 | margin-top: 0; 40 | margin-bottom: 0; 41 | } 42 | 43 | ol { 44 | margin-top: 0; 45 | margin-bottom: 0; 46 | } 47 | 48 | a { text-decoration: none; } 49 | 50 | a.hidden_link { 51 | text-decoration: none; 52 | color: inherit; 53 | } 54 | 55 | li { 56 | margin-top: 3pt; 57 | margin-bottom: 3pt; 58 | } 59 | 60 | h1 { 61 | line-height: 1; 62 | margin-top: 10pt; 63 | margin-bottom: 10pt; 64 | } 65 | 66 | h2 { 67 | line-height: 1; 68 | font-size: 14pt; 69 | margin-top: 10pt; 70 | margin-bottom: 10pt; 71 | } 72 | 73 | h2::after { 74 | content: ""; 75 | clear: both; 76 | display: table; 77 | } 78 | 79 | h3 { 80 | line-height: 1; 81 | margin-top: 10pt; 82 | margin-bottom: 10pt; 83 | } 84 | 85 | h3::after { 86 | content: ""; 87 | clear: both; 88 | display: table; 89 | } 90 | 91 | h4 { 92 | line-height: 1; 93 | margin-top: 10pt; 94 | margin-bottom: 10pt; 95 | } 96 | 97 | h4::after { 98 | content: ""; 99 | clear: both; 100 | display: table; 101 | } 102 | 103 | ul > li:before { 104 | content: "\2014"; 105 | position: absolute; 106 | margin-left: -1.5em; 107 | } 108 | 109 | .shy:before { 110 | content: "\00ad"; 111 | /* This is U+00AD SOFT HYPHEN, same as ­, but we put it in :before 112 | to stop it from being included when the text is copied to the clipboard 113 | with Firefox, which is especially annoying when copying to a terminal, 114 | where the hyphen characters will show up. */ 115 | } 116 | 117 | :target { background-color: #C9FBC9; } 118 | :target .codeblock { background-color: #C9FBC9; } 119 | :target ul { background-color: #C9FBC9; } 120 | 121 | .abbr_ref { float: right; } 122 | 123 | .folded_abbr_ref { float: right; } 124 | :target .folded_abbr_ref { display: none; } 125 | 126 | :target .unfolded_abbr_ref { float: right; display: inherit; } 127 | .unfolded_abbr_ref { display: none; } 128 | 129 | .secnum { display: inline-block; min-width: 35pt; } 130 | .annexnum { display: block; } 131 | 132 | div.sourceLinkParent { 133 | float: right; 134 | } 135 | 136 | a.sourceLink { 137 | position: absolute; 138 | opacity: 0; 139 | margin-left: 10pt; 140 | } 141 | 142 | a.sourceLink:hover { 143 | opacity: 1; 144 | } 145 | 146 | a.itemDeclLink { 147 | position: absolute; 148 | font-size: 75%; 149 | text-align: right; 150 | width: 5em; 151 | opacity: 0; 152 | } 153 | a.itemDeclLink:hover { opacity: 1; } 154 | 155 | div.marginalizedparent { 156 | position: relative; 157 | text-align: left; 158 | left: -18mm; 159 | } 160 | 161 | a.marginalized { 162 | width: 15mm; 163 | position: absolute; 164 | font-size: 7pt; 165 | text-align: right; 166 | } 167 | 168 | a.enumerated_item_num { 169 | display: block; 170 | margin-top: 3pt; 171 | margin-bottom: 3pt; 172 | margin-right: 6pt; 173 | } 174 | 175 | div.para { 176 | margin-bottom: 6pt; 177 | margin-top: 6pt; 178 | text-align: justify; 179 | min-height: 1.2em; 180 | } 181 | 182 | div.section { text-align: justify; } 183 | div.sentence { display: inline; } 184 | 185 | a.index { 186 | position: relative; 187 | float: right; 188 | right: -1em; 189 | display: none; 190 | } 191 | 192 | a.index:before { 193 | position: absolute; 194 | content: "⟵"; 195 | background-color: #C9FBC9; 196 | } 197 | 198 | a.index:target { 199 | display: inline; 200 | } 201 | 202 | .indexitems { 203 | margin-left: 2em; 204 | text-indent: -2em; 205 | } 206 | 207 | div.itemdescr { 208 | margin-left: 12mm; 209 | } 210 | 211 | .bnf { 212 | font-family: 'Noto Sans'; 213 | font-size: 10pt; 214 | font-style: italic; 215 | margin-left: 25pt; 216 | margin-right: -15mm; 217 | margin-top: 0.5em; 218 | margin-bottom: 0.5em; 219 | text-indent: -3em; 220 | padding-left: 3em; 221 | line-height: 1.5; 222 | } 223 | 224 | div.bnf span.texttt { font-family: 'Noto Sans Mono'; font-style: normal; } 225 | 226 | .rebnf { 227 | font-family: 'Noto Serif'; 228 | font-style: italic; 229 | margin-top: 0.5em; 230 | margin-bottom: 0.5em; 231 | margin-left: 30pt; 232 | text-indent: -3em; 233 | padding-left: 3em; 234 | line-height: 1.5; 235 | } 236 | 237 | .simplebnf { 238 | font-family: 'Noto Serif'; 239 | font-style: italic; 240 | font-size: 10pt; 241 | margin-top: 0.5em; 242 | margin-bottom: 0.5em; 243 | margin-left: 30pt; 244 | line-height: 1.5; 245 | } 246 | 247 | span.textnormal { 248 | font-style: normal; 249 | font-family: 'Noto Serif'; 250 | font-size: 10pt; 251 | white-space: normal; 252 | } 253 | 254 | .bnf span.textnormal { 255 | font-style: normal; 256 | font-family: 'Noto Serif'; 257 | font-size: 10pt; 258 | white-space: normal; 259 | } 260 | 261 | p { 262 | margin-top: 4pt; 263 | margin-bottom: 4pt; 264 | } 265 | 266 | span.rlap { 267 | display: inline-block; 268 | width: 0px; 269 | text-indent: 0; 270 | } 271 | 272 | span.terminal { 273 | font-family: 'Noto Sans Mono'; 274 | font-style: normal; 275 | font-size: 9pt; 276 | white-space: pre-wrap; 277 | } 278 | 279 | span.noncxxterminal { 280 | font-family: 'Noto Sans Mono'; 281 | font-style: normal; 282 | font-size: 9pt; 283 | } 284 | 285 | span.term { font-style: italic; } 286 | span.tcode { font-family: 'Noto Sans Mono'; font-style: normal; } 287 | span.textbf { font-weight: bold; } 288 | span.textsf { font-family: 'Noto Sans'; font-size: 10pt; } 289 | div.footnote span.textsf { font-family: 'Noto Sans'; font-size: 8pt; } 290 | .bnf span.textsf { font-family: 'Noto Sans'; font-size: 10pt; } 291 | .simplebnf span.textsf { font-family: 'Noto Sans'; font-size: 10pt; } 292 | .example span.textsf { font-family: 'Noto Sans'; font-size: 10pt; } 293 | span.textsc { font-variant: small-caps; } 294 | span.nontermdef { font-style: italic; font-family: 'Noto Sans'; font-size: 10pt; } 295 | span.emph { font-style: italic; } 296 | span.techterm { font-style: italic; } 297 | span.mathit { font-style: italic; } 298 | span.mathsf { font-family: 'Noto Sans'; } 299 | span.mathrm { font-family: 'Noto Serif'; font-style: normal; } 300 | span.textrm { font-family: 'Noto Serif'; font-size: 10pt; } 301 | span.textsl { font-style: italic; } 302 | span.mathtt { font-family: 'Noto Sans Mono'; font-style: normal; } 303 | span.mbox { font-family: 'Noto Serif'; font-style: normal; } 304 | span.ungap { display: inline-block; width: 2pt; } 305 | span.texttt { font-family: 'Noto Sans Mono'; } 306 | span.textit { font-style: italic; } 307 | div.footnote span.texttt { font-family: 'Noto Sans Mono'; } 308 | span.tcode_in_codeblock { font-family: 'Noto Sans Mono'; font-style: normal; font-size: 9pt; } 309 | 310 | span.phantom { color: white; } 311 | /* Unfortunately, this way the text is still selectable. Another 312 | option is display:none, but then we lose the nice layout. 313 | Todo: find proper solution. */ 314 | 315 | span.math { 316 | font-style: normal; 317 | font-family: 'Noto Serif'; 318 | font-size: 10pt; 319 | } 320 | 321 | span.mathblock { 322 | display: block; 323 | margin-left: auto; 324 | margin-right: auto; 325 | margin-top: 1.2em; 326 | margin-bottom: 1.2em; 327 | text-align: center; 328 | } 329 | 330 | span.mathalpha { 331 | font-style: italic; 332 | } 333 | 334 | span.synopsis { 335 | font-weight: bold; 336 | margin-top: 0.5em; 337 | display: block; 338 | } 339 | 340 | span.definition { 341 | font-weight: bold; 342 | display: block; 343 | } 344 | 345 | .codeblock { 346 | font-family: 'Noto Sans Mono'; 347 | margin-left: 1.2em; 348 | line-height: 1.5; 349 | font-size: 9pt; 350 | white-space: pre; 351 | display: block; 352 | margin-top: 3pt; 353 | margin-bottom: 3pt; 354 | overflow: auto; 355 | margin-right: -15mm; 356 | } 357 | 358 | table .codeblock { margin-right: 0; } 359 | 360 | .outputblock { 361 | margin-left: 1.2em; 362 | line-height: 1.5; 363 | font-family: 'Noto Sans Mono'; 364 | font-size: 9pt; 365 | } 366 | 367 | code { 368 | font-family: 'Noto Sans Mono'; 369 | font-style: normal; 370 | } 371 | 372 | div.itemdecl { 373 | margin-top: 2ex; 374 | } 375 | 376 | code.itemdeclcode { 377 | white-space: pre; 378 | font-family: 'Noto Sans Mono'; 379 | font-size: 9pt; 380 | display: block; 381 | overflow: auto; 382 | margin-right: -15mm; 383 | } 384 | 385 | .comment { color: green; font-style: italic; font-family: 'Noto Serif'; font-size: 10pt; } 386 | .footnote .comment { color: green; font-style: italic; font-family: 'Noto Serif'; font-size: 8pt; } 387 | .example .comment { color: green; font-style: italic; font-family: 'Noto Serif'; font-size: 9pt; } 388 | .note .comment { color: green; font-style: italic; font-family: 'Noto Serif'; font-size: 9pt; } 389 | 390 | span.keyword { color: #00607c; font-style: normal; } 391 | span.parenthesis { color: #af1915; } 392 | span.curlybracket { color: #af1915; } 393 | span.squarebracket { color: #af1915; } 394 | span.literal { color: #9F6807; } 395 | span.literalterminal { color: #9F6807; font-family: 'Noto Sans Mono'; font-style: normal; } 396 | span.operator { color: #570057; } 397 | span.anglebracket { color: #570057; } 398 | span.preprocessordirective { color: #6F4E37; } 399 | 400 | span.textsuperscript { 401 | vertical-align: super; 402 | font-size: smaller; 403 | line-height: 0; 404 | } 405 | 406 | .footnoteref { 407 | vertical-align: super; 408 | font-size: smaller; 409 | line-height: 0; 410 | } 411 | 412 | .footnote { 413 | font-size: 8pt; 414 | } 415 | 416 | .footnote .math { 417 | font-size: 8pt; 418 | } 419 | 420 | .footnotenum { 421 | display: inline-block; 422 | text-align: right; 423 | margin-right: 1mm; 424 | width: 4ch; 425 | } 426 | 427 | .footnoteBacklink { 428 | display: none; 429 | } 430 | 431 | :target .footnoteBacklink { 432 | display: inline-block; 433 | text-align: right; 434 | margin-right: 1mm; 435 | width: 4ch; 436 | } 437 | 438 | :target .footnotenum { 439 | display: none; 440 | } 441 | 442 | .footnoteSeparator { 443 | background: black; 444 | margin-top: 5mm; 445 | height: 1px; 446 | width: 6cm; 447 | } 448 | 449 | div.minipage { 450 | display: inline-block; 451 | margin-right: 3em; 452 | } 453 | 454 | div.numberedTable { 455 | text-align: center; 456 | margin-left: 1em; 457 | margin-right: 1em; 458 | margin-bottom: 12pt; 459 | margin-top: 8pt; 460 | } 461 | 462 | div.figure { 463 | text-align: center; 464 | margin-left: 2em; 465 | margin-right: 2em; 466 | margin-bottom: 12pt; 467 | margin-top: 3pt; 468 | } 469 | 470 | table { 471 | border: 1px solid black; 472 | border-collapse: collapse; 473 | margin-left: auto; 474 | margin-right: auto; 475 | margin-top: 7pt; 476 | text-align: left; 477 | } 478 | 479 | td, th { 480 | padding-left: 8pt; 481 | padding-right: 8pt; 482 | vertical-align: top; 483 | } 484 | 485 | td.empty { 486 | padding: 0px; 487 | padding-left: 1px; 488 | } 489 | 490 | td.left { 491 | text-align: left; 492 | } 493 | 494 | td.hidden { 495 | padding: 0; 496 | width: 0; 497 | } 498 | 499 | td.right { 500 | text-align: right; 501 | } 502 | 503 | td.center { 504 | text-align: center; 505 | } 506 | 507 | td.justify { 508 | text-align: justify; 509 | } 510 | 511 | td.border { 512 | border-left: 1px solid black; 513 | } 514 | 515 | tr.rowsep, td.cline { 516 | border-top: 1px solid black; 517 | } 518 | 519 | tr.capsep { 520 | border-top: 3px solid black; 521 | border-top-style: double; 522 | } 523 | 524 | th { 525 | border-bottom: 1px solid black; 526 | } 527 | 528 | span.centry { 529 | font-weight: bold; 530 | } 531 | 532 | div.table { 533 | display: block; 534 | margin-left: auto; 535 | margin-right: auto; 536 | text-align: center; 537 | width: 90%; 538 | } 539 | 540 | span.indented { 541 | background: inherit; 542 | display: block; 543 | margin-left: 2em; 544 | margin-bottom: 1em; 545 | margin-top: 1em; 546 | } 547 | 548 | span.uppercase { 549 | text-transform: uppercase; 550 | } 551 | 552 | span.ucode { 553 | font-variant: small-caps; 554 | text-transform: uppercase; 555 | font-size: 90%; 556 | } 557 | 558 | span.uname { 559 | font-variant: small-caps; 560 | text-transform: uppercase; 561 | font-size: 90%; 562 | } 563 | 564 | table.enumerate { 565 | border: 0; 566 | margin: 0; 567 | } 568 | 569 | table.enumerate td { 570 | padding: 0; 571 | } 572 | 573 | table.enumerate td:first-child { 574 | width: 1cm; 575 | text-align: right; 576 | } 577 | 578 | @media (prefers-color-scheme: dark) { 579 | body { 580 | background-color: #171717; 581 | color: #d0d0d0; 582 | } 583 | 584 | span.mjx-mstyle { color: #d0d0d0 !important } 585 | 586 | a:link { color: #64adff; } 587 | a:visited { color: #a36ae6; } 588 | 589 | a.hidden_link { 590 | text-decoration: none; 591 | color: inherit; 592 | } 593 | 594 | span.phantom { color: #171717; } 595 | 596 | a.index:before { color: #d0d0d0; background-color: #4b6353; } 597 | 598 | .comment { color: #35da00; } 599 | .footnote .comment { color: #35da00; } 600 | .example .comment { color: #35da00; } 601 | .note .comment { color: #35da00; } 602 | 603 | span.keyword { color: #12cabe; } 604 | span.parenthesis { color: #ff1515; } 605 | span.curlybracket { color: #ff1515; } 606 | span.squarebracket { color: #ff1515; } 607 | span.literal { color: #dfa837; } 608 | span.literalterminal { color: #dfa837; } 609 | span.operator { color: #baa6b9; } 610 | span.anglebracket { color: #baa6b9; } 611 | span.preprocessordirective { color: #b27c58; } 612 | 613 | table { border-color: #d0d0d0; } 614 | td.border { border-color: #d0d0d0; } 615 | td.border { border-left-color: #d0d0d0; } 616 | tr.rowsep, td.cline { border-top-color: #d0d0d0; } 617 | tr.capsep { border-top-color: #d0d0d0; } 618 | th { border-bottom-color: #d0d0d0; } 619 | 620 | .footnoteSeparator { background-color: #d0d0d0; } 621 | 622 | text { fill: #d0d0d0; } 623 | path { stroke: #d0d0d0; } 624 | polygon { stroke: #d0d0d0; fill: #d0d0d0; } 625 | ellipse { stroke: #d0d0d0; } 626 | 627 | :target { background-color: #4b6345; color: #ffffff; } 628 | :target .codeblock { background-color: #4b6345; } 629 | :target ul { background-color: #4b6345; } 630 | :target a:link { color: #9fcdff; } 631 | :target a:visited { color: #d39aff; } 632 | :target a.hidden_link { text-decoration: none; color: inherit; } 633 | :target span.keyword { color: #32eade; } 634 | :target span.parenthesis { color: #ff4060; font-weight: bold; } 635 | :target span.curlybracket { color: #ff4060; font-weight: bold; } 636 | :target span.squarebracket { color: #ff4060; font-weight: bold; } 637 | :target span.literal { color: #f0d060; } 638 | :target span.literalterminal { color: #f0d060; } 639 | :target span.operator { color: #dac6d9; } 640 | :target span.anglebracket { color: #dac6d9; } 641 | :target span.preprocessordirective { color: #e0968f; } 642 | :target .comment { color: #55ff00; } 643 | :target .footnote .comment { color: #55ff00; } 644 | :target .example .comment { color: #55ff00; } 645 | :target .note .comment { color: #55ff00; } 646 | } 647 | -------------------------------------------------------------------------------- /CxxParser.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -fno-warn-tabs #-} 2 | {-# LANGUAGE 3 | OverloadedStrings, 4 | RecordWildCards, 5 | TupleSections, 6 | ViewPatterns, 7 | LambdaCase, 8 | TypeSynonymInstances, 9 | FlexibleInstances #-} 10 | 11 | module CxxParser (parseLiteral, parseComment, parseCppDirective) where 12 | 13 | import LaTeXBase (LaTeX, LaTeXUnit(..), ArgKind(..), concatRaws, 14 | texStripPrefix, texStripAnyPrefix, texStripInfix, texSpan, unconsRaw) 15 | import qualified Data.Text as Text 16 | import Data.Char (isAlpha, isSpace, isAlphaNum, isDigit) 17 | import Control.Arrow (first) 18 | import Prelude hiding ((.), (++)) 19 | import Util ((.), (++), Text) 20 | 21 | texStripHash :: LaTeX -> Maybe LaTeX 22 | texStripHash x 23 | | Just x' <- texStripPrefix "#" x = Just x' 24 | | TeXComm "#" _ [] : x' <- x = Just x' 25 | | otherwise = Nothing 26 | 27 | cppDirectives :: [Text] 28 | cppDirectives = Text.words "include define elifndef elifdef ifndef endif ifdef pragma error undef line elif warning else if embed" 29 | 30 | spanLiteralChars :: String -> Maybe (String, String {- rest without the closing ' -}) 31 | spanLiteralChars [] = Nothing 32 | spanLiteralChars ('\\' : '\'' : rest) = first ("\\'"++) . spanLiteralChars rest 33 | spanLiteralChars ('\'' : x) = Just ([], x) 34 | spanLiteralChars (c : rest) = first (c :) . spanLiteralChars rest 35 | 36 | parseLiteralChars :: LaTeX -> Maybe (LaTeX, LaTeX) 37 | parseLiteralChars [] = Nothing 38 | parseLiteralChars (TeXRaw s : rest) = case spanLiteralChars (Text.unpack s) of 39 | Nothing -> Nothing 40 | Just (_, []) -> first (TeXRaw s :) . (parseLiteralChars rest) 41 | Just (x, more) -> Just ([TeXRaw (Text.pack x)], TeXRaw (Text.pack more) : rest) 42 | parseLiteralChars (x : rest) = first (x :) . (parseLiteralChars rest) 43 | 44 | parseCharLiteral :: LaTeX -> Maybe (LaTeX, LaTeX {- rest -}) 45 | parseCharLiteral x 46 | | Just (pre, x') <- texStripAnyPrefix ["'", "u'", "L'", "U'", "u8'"] x 47 | , Just (before, x'') <- parseLiteralChars x' 48 | , (suffix, x''') <- texSpan (\c -> isAlphaNum c || c == '_') x'' 49 | = Just ([TeXRaw pre] ++ before ++ [TeXRaw $ "'" ++ suffix], x''') 50 | | otherwise = Nothing 51 | 52 | parseCppDirective :: LaTeX -> Maybe (LaTeX, LaTeX {- rest -}) 53 | parseCppDirective x 54 | | Just x'' <- texStripHash x 55 | , (spaces, x''') <- texSpan isSpace x'' 56 | , Just (directive, x'''') <- texStripAnyPrefix cppDirectives x''' 57 | = Just ([TeXRaw ("#" ++ spaces ++ directive)], x'''') 58 | | otherwise = Nothing 59 | 60 | parseSingleLineComment :: LaTeX -> Maybe (LaTeX {- comment -}, LaTeX {- subsequent lines -}) 61 | parseSingleLineComment x 62 | | Just x' <- texStripPrefix "//" x = Just $ case texStripInfix "\n" x' of 63 | Just (commentLine, moreLines) -> (TeXRaw "//" : commentLine, TeXRaw "\n" : moreLines) 64 | Nothing -> (x, []) 65 | | rlap@(TeXComm "rlap" _ [(FixArg, [TeXComm "textnormal" _ [(FixArg,[TeXComm "textit" _ [(FixArg,[TeXRaw "//"])]])]])]) : more <- x 66 | , Just (commentLine, moreLines) <- texStripInfix "\n" more 67 | = Just ([rlap, TeXComm "tcode" "" [(FixArg, commentLine)]], TeXRaw "\n" : moreLines) 68 | | TeXComm "comment" _ [(FixArg, c)] : x' <- x = Just (c, x') 69 | | otherwise = Nothing 70 | 71 | fromTeXRaw :: LaTeXUnit -> Text 72 | fromTeXRaw (TeXRaw x) = x 73 | fromTeXRaw x = error $ "fromTeXRaw (" ++ show x ++ ")" 74 | 75 | parseStringLiteral :: LaTeX -> Maybe (LaTeX, LaTeX {- rest -}) 76 | parseStringLiteral x 77 | -- raw: 78 | | Just (pre, x') <- texStripAnyPrefix ["R\"", "u8R\"", "uR\"", "UR\"", "LR\""] x 79 | , Just (delim, x'') <- texStripInfix "(" x' 80 | , Just (body, x''') <- texStripInfix (")" ++ Text.concat (map fromTeXRaw delim) ++ "\"") (concatRaws $ f x'') 81 | , (suffix, x'''') <- texSpan (\c -> isAlphaNum c || c == '_') x''' 82 | = Just ([TeXRaw pre] ++ delim ++ [TeXRaw "("] ++ body ++ [TeXRaw ")"] ++ delim ++ [TeXRaw $ "\"" ++ suffix], x'''') 83 | -- normal: 84 | | Just (pre, x') <- texStripAnyPrefix ["\"", "u\"", "U\"", "L\"", "u8\""] x 85 | , Just (body, x'') <- parseBody x' 86 | , (suffix, x''') <- texSpan (\c -> isAlphaNum c || c == '_') x'' 87 | = Just ([TeXRaw pre] ++ body ++ [TeXRaw $ "\"" ++ suffix], x''') 88 | | otherwise = Nothing 89 | where 90 | f :: LaTeX -> LaTeX 91 | f [] = [] 92 | f (TeXComm "~" _ [] : more) = TeXRaw "~" : f more 93 | f (TeXBraces [] : more) = f more 94 | f (hd : t) = hd : f t 95 | parseBody :: LaTeX -> Maybe (LaTeX, LaTeX {- rest -}) 96 | parseBody [] = Nothing 97 | parseBody (TeXComm "textbackslash" _ [] : more) = parseBody $ concatRaws $ TeXRaw "\\" : more 98 | parseBody (TeXRaw (Text.unpack -> raw) : more) 99 | | '\\':'"':t <- raw = first (TeXRaw "\\\"" :) . parseBody (TeXRaw (Text.pack t) : more) 100 | | "\"" <- raw = Just ([], more) 101 | | '"':t <- raw = Just ([], TeXRaw (Text.pack t) : more) 102 | | raw == "" = parseBody more 103 | | hd:t <- raw = first (TeXRaw (Text.pack [hd]) :) . parseBody (TeXRaw (Text.pack t) : more) 104 | parseBody (TeXComm "%" ws [] : more) = first (TeXComm "%" ws [] :) . parseBody more 105 | parseBody (y : more) = first (y :) . parseBody more 106 | 107 | parseNumber :: LaTeX -> Maybe (Text, LaTeX) 108 | parseNumber x 109 | | (raw, more) <- unconsRaw x 110 | , Just (n, rest) <- (parseStart `parseSeq` (\t -> Just (parseMany parseSuffix t))) raw 111 | = Just (n, TeXRaw rest : more) 112 | | otherwise = Nothing 113 | where 114 | parseDigit = parseChar isDigit 115 | parseNonDigit = parseChar (\c -> isAlpha c || c == '_') 116 | parseStart :: Text -> Maybe (Text, Text) 117 | parseStart = parseFirstOf [parseChar (== '.') `parseSeq` parseDigit, parseDigit] 118 | parseSign :: Text -> Maybe (Text, Text) 119 | parseSign = parseChar (\c -> c == '-' || c == '+') 120 | parseSuffix :: Text -> Maybe (Text, Text) 121 | parseSuffix = parseFirstOf 122 | [ parseDigit 123 | , parseChar (== '\'') `parseSeq` parseDigit 124 | , parseChar (== '\'') `parseSeq` parseNonDigit 125 | , parseChar (`elem` ("eEpP"::String)) `parseSeq` parseSign 126 | , parseChar (== '.') 127 | , parseNonDigit 128 | ] 129 | 130 | parseLiteral :: LaTeX -> Maybe (LaTeX, LaTeX) 131 | parseLiteral x 132 | | Just (number, x') <- parseNumber x = Just ([TeXRaw number], x') 133 | | Just (lit, x') <- parseCharLiteral x = Just (lit, x') 134 | | Just (lit, x') <- parseStringLiteral x = Just (lit, x') 135 | | otherwise = Nothing 136 | 137 | parseComment :: LaTeX -> Maybe (LaTeX, LaTeX) 138 | parseComment x 139 | | Just x' <- texStripPrefix "/*" x, Just (comment, x'') <- texStripInfix "*/" x' 140 | = Just ([TeXRaw "/*"] ++ comment ++ [TeXRaw "*/"], x'') 141 | | Just x' <- texStripPrefix "/*" x 142 | = Just ([TeXRaw "/*"], x') 143 | | Just x' <- texStripPrefix "*/" x 144 | = Just ([TeXRaw "*/"], x') 145 | | Just (comment, x') <- parseSingleLineComment x 146 | = Just (comment, x') 147 | | otherwise = Nothing 148 | 149 | parseChar :: (Char -> Bool) -> Text -> Maybe (Text, Text) 150 | parseChar p t 151 | | t /= "", p (Text.head t) = Just (Text.take 1 t, Text.drop 1 t) 152 | | otherwise = Nothing 153 | 154 | parseSeq :: (Text -> Maybe (Text, Text)) -> (Text -> Maybe (Text, Text)) -> Text -> Maybe (Text, Text) 155 | parseSeq p q t 156 | | Just (x, t') <- p t 157 | , Just (y, t'') <- q t' = Just (x ++ y, t'') 158 | | otherwise = Nothing 159 | 160 | parseFirstOf :: [Text -> Maybe (a, Text)] -> Text -> Maybe (a, Text) 161 | parseFirstOf [] _ = Nothing 162 | parseFirstOf (p:pp) t 163 | | Just r <- p t = Just r 164 | | otherwise = parseFirstOf pp t 165 | 166 | parseMany :: (Text -> Maybe (Text, Text)) -> Text -> (Text, Text) 167 | parseMany p t = case p t of 168 | Nothing -> ("", t) 169 | Just (x, t') -> first (x++) (parseMany p t') 170 | -------------------------------------------------------------------------------- /Document.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -fno-warn-tabs #-} 2 | {-# LANGUAGE OverloadedStrings, RecordWildCards, ViewPatterns #-} 3 | 4 | module Document ( 5 | CellSpan(..), Cell(..), RowSepKind(..), Row(..), Element(..), Paragraph(..), 6 | Section(..), Chapter(..), Draft(..), Table(..), Figure(..), Item(..), Footnote(..), 7 | IndexPath, IndexComponent(..), IndexCategory, Index, IndexTree, IndexNode(..), 8 | ColumnSpec(..), TextAlignment(..), normative, Formula(..), chapterOfSection, 9 | IndexEntry(..), IndexKind(..), Note(..), Example(..), TeXPara(..), Sentence(..), 10 | texParaTex, texParaElems, XrefDelta, sectionByAbbr, isDefinitionSection, Abbreviation, 11 | indexKeyContent, indexCatName, Sections(sections), SectionKind(..), mergeIndices, SourceLocation(..), 12 | figures, tables, tableByAbbr, figureByAbbr, formulaByAbbr, elemTex, footnotes, allElements, 13 | LaTeX, makeAbbrMap, formulas) where 14 | 15 | import LaTeXBase (LaTeXUnit(..), LaTeX, MathType(Dollar)) 16 | import Data.Text (Text, replace) 17 | import qualified Data.Text as Text 18 | import qualified Data.List as List 19 | import Data.IntMap (IntMap) 20 | import Prelude hiding (take, (.), takeWhile, (++), lookup, readFile) 21 | import Data.Map (Map) 22 | import qualified Data.Map as Map 23 | import Data.String (IsString) 24 | import Util ((.), (++), greekAlphabet) 25 | 26 | -- Document structure: 27 | 28 | data CellSpan = Normal | Multicolumn { width :: Int, colspec :: ColumnSpec } deriving (Eq, Show) 29 | data Cell a = Cell { cellSpan :: CellSpan, content :: a } deriving (Eq, Show) 30 | data RowSepKind = RowSep | CapSep | Clines [(Int, Int)] | NoSep deriving (Eq, Show) 31 | data Row a = Row { rowSep :: RowSepKind, cells :: [Cell a] } deriving (Eq, Show) 32 | 33 | data TextAlignment = AlignLeft | AlignRight | AlignCenter | Justify 34 | deriving Eq 35 | 36 | instance Show TextAlignment where 37 | show AlignLeft = "left" 38 | show AlignRight = "right" 39 | show AlignCenter = "center" 40 | show Justify = "justify" 41 | 42 | data ColumnSpec = ColumnSpec 43 | { columnAlignment :: TextAlignment 44 | , columnBorder :: Bool 45 | , columnWidth :: Maybe Text} 46 | deriving (Eq, Show) 47 | 48 | data Table = Table 49 | { tableNumber :: Int 50 | , tableCaption :: LaTeX 51 | , columnSpec :: [ColumnSpec] 52 | , tableAbbr :: Abbreviation 53 | , tableBody :: [Row [TeXPara]] 54 | , tableSection :: Section } 55 | 56 | instance Show Table where 57 | show _ = "" 58 | 59 | data Figure = Figure 60 | { figureNumber :: Int 61 | , figureName :: LaTeX 62 | , figureAbbr :: Abbreviation 63 | , figureSvg :: Text 64 | , figureSection :: Section } 65 | 66 | instance Show Figure where 67 | show _ = "
" 68 | 69 | data Formula = Formula 70 | { formulaNumber :: Int 71 | , formulaAbbr :: Abbreviation 72 | , formulaContent :: LaTeX 73 | , formulaSection :: Section } 74 | 75 | instance Show Formula where 76 | show _ = "" 77 | 78 | data Item = Item 79 | { itemNumber :: Maybe [String] 80 | , itemLabel :: Maybe LaTeX 81 | , itemInlineContent :: [Element] 82 | , itemBlockContent :: [TeXPara] } 83 | deriving Show 84 | 85 | itemElements :: Item -> [Element] 86 | itemElements Item{..} = itemInlineContent ++ allElements itemBlockContent 87 | 88 | data Footnote = Footnote 89 | { footnoteNumber :: Int 90 | , footnoteContent :: [TeXPara] } 91 | deriving Show 92 | 93 | data Note = Note { noteNumber :: Int, noteLabel :: Text, noteContent :: [TeXPara] } 94 | deriving Show 95 | 96 | data Example = Example { exampleNumber :: Int, exampleContent :: [TeXPara] } 97 | deriving Show 98 | 99 | data Sentence = Sentence { sentenceNumber :: Maybe Int, sentenceElems :: [Element] } 100 | deriving Show 101 | 102 | newtype TeXPara = TeXPara { sentences :: [Sentence] } 103 | deriving Show 104 | 105 | data Element 106 | = LatexElement LaTeXUnit 107 | | Enumerated { enumCmd :: String, enumItems :: [Item] } 108 | | Bnf String LaTeX 109 | | TableElement Table 110 | | Tabbing LaTeX 111 | | FigureElement Figure 112 | | FormulaElement Formula 113 | | Codeblock LaTeXUnit 114 | | Itemdescr [TeXPara] -- needed because there can be notes in itemdescr envs 115 | | NoteElement Note 116 | | ExampleElement Example 117 | | HtmlElement Text 118 | deriving Show 119 | 120 | normative :: Element -> Bool 121 | normative (NoteElement _) = False 122 | normative (ExampleElement _) = False 123 | normative (LatexElement (TeXComm "index" _ _)) = False 124 | normative _ = True 125 | 126 | data SectionKind 127 | = NormalSection { _level :: Int } 128 | | DefinitionSection { _level :: Int } 129 | | InformativeAnnexSection 130 | | NormativeAnnexSection 131 | deriving (Eq, Show) 132 | 133 | isDefinitionSection :: SectionKind -> Bool 134 | isDefinitionSection (DefinitionSection _) = True 135 | isDefinitionSection _ = False 136 | 137 | data Chapter = NormalChapter | InformativeAnnex | NormativeAnnex 138 | deriving (Eq, Show) 139 | 140 | data SourceLocation = SourceLocation 141 | { sourceFile :: FilePath 142 | , sourceLine :: Int } 143 | deriving (Eq, Show) 144 | 145 | data Paragraph = Paragraph 146 | { paraNumber :: Maybe Int 147 | , paraInItemdescr :: Bool 148 | , paraElems :: [TeXPara] 149 | , paraSection :: Section 150 | , paraSourceLoc :: Maybe SourceLocation 151 | , allParaElems :: [Element] } -- derivable but stored for efficiency 152 | deriving Show 153 | 154 | type Abbreviation = Text -- of a section, figure, or table 155 | 156 | data Section = Section 157 | { abbreviation :: Abbreviation 158 | , sectionName :: LaTeX 159 | , paragraphs :: [Paragraph] 160 | , sectionFootnotes :: [Footnote] 161 | , subsections :: [Section] 162 | , sectionNumber :: Int 163 | , chapter :: Chapter 164 | , parents :: [Section] -- if empty, this is the chapter 165 | , sectionKind :: SectionKind 166 | , secIndexEntries :: IntMap IndexEntry 167 | , secIndexEntriesByPath :: Map IndexPath [(Int, IndexEntry)] 168 | } 169 | deriving Show 170 | 171 | chapterOfSection :: Section -> Section 172 | chapterOfSection s@Section{..} 173 | | null parents = s 174 | | otherwise = last parents 175 | 176 | instance Eq Section where 177 | x == y = abbreviation x == abbreviation y 178 | 179 | type XrefDelta = [(Abbreviation, [LaTeX])] 180 | 181 | data StablyNamedItem 182 | = StablyNamedTable Table 183 | | StablyNamedSection Section 184 | | StablyNamedFigure Figure 185 | | StablyNamedFormula Formula 186 | 187 | data Draft = Draft 188 | { commitUrl :: Text 189 | , chapters :: [Section] 190 | , index :: Index 191 | , indexEntryMap :: IntMap IndexEntry 192 | , indexEntriesByPath :: Map IndexPath [(Int, IndexEntry)] 193 | , xrefDelta :: XrefDelta 194 | , abbrMap :: Abbreviation -> Maybe StablyNamedItem 195 | , labels :: Map Text Section } 196 | 197 | -- (The index entry maps are derivable but stored for efficiency.) 198 | 199 | stablyNamedItems :: Draft -> [(Abbreviation, StablyNamedItem)] 200 | stablyNamedItems d = 201 | [(abbreviation s, StablyNamedSection s) | s <- sections d] ++ 202 | [(tableAbbr t, StablyNamedTable t) | p <- allParagraphs d, TableElement t <- allParaElems p] ++ 203 | [(formulaAbbr f, StablyNamedFormula f) | p <- allParagraphs d, FormulaElement f <- allParaElems p] ++ 204 | [(figureAbbr f, StablyNamedFigure f) | p <- allParagraphs d, FigureElement f <- allParaElems p] 205 | 206 | makeAbbrMap :: Draft -> Abbreviation -> Maybe StablyNamedItem 207 | makeAbbrMap = flip Map.lookup . Map.fromList . stablyNamedItems 208 | 209 | -- Indices: 210 | 211 | data IndexComponent = IndexComponent { distinctIndexSortKey, indexKey :: LaTeX } 212 | deriving (Ord, Show) 213 | 214 | instance Eq IndexComponent where 215 | x == y = 216 | distinctIndexSortKey x == distinctIndexSortKey y && 217 | indexKeyContent (indexKey x) == indexKeyContent (indexKey y) 218 | 219 | type IndexPath = [IndexComponent] 220 | 221 | data IndexKind = See { _also :: Bool, _ref :: LaTeX } | IndexOpen | IndexClose | DefinitionIndexEntry 222 | deriving (Eq, Show) 223 | 224 | type IndexCategory = Text 225 | 226 | type Index = Map IndexCategory IndexTree 227 | 228 | instance Show IndexEntry where 229 | show IndexEntry{..} = 230 | "IndexEntry" 231 | ++ "{indexSection=" ++ show indexEntrySection 232 | ++ ",indexCategory=" ++ show indexCategory 233 | ++ ",indexPath=" ++ show indexPath 234 | ++ ",indexEntryKind=" ++ show indexEntryKind 235 | ++ "}" 236 | 237 | data IndexEntry = IndexEntry 238 | { indexEntrySection :: Abbreviation 239 | , indexEntryKind :: Maybe IndexKind 240 | , indexPath :: IndexPath 241 | , indexEntryNr :: Maybe Int 242 | , indexCategory :: Text 243 | } 244 | 245 | type IndexTree = Map IndexComponent IndexNode 246 | 247 | data IndexNode = IndexNode 248 | { indexEntries :: [IndexEntry] 249 | , indexSubnodes :: IndexTree } 250 | 251 | mergeIndices :: [Index] -> Index 252 | mergeIndices = Map.unionsWith (Map.unionWith mergeIndexNodes) 253 | 254 | mergeIndexNodes :: IndexNode -> IndexNode -> IndexNode 255 | mergeIndexNodes x y = IndexNode 256 | { indexEntries = indexEntries x ++ indexEntries y 257 | , indexSubnodes = Map.unionWith mergeIndexNodes (indexSubnodes x) (indexSubnodes y) } 258 | 259 | indexKeyContent :: LaTeX -> Text 260 | indexKeyContent = mconcat . map ikc 261 | where 262 | ikc :: LaTeXUnit -> Text 263 | ikc (TeXRaw t) = replace "\n" " " t 264 | ikc (TeXComm "tcode" _ [(_, x)]) = indexKeyContent x 265 | ikc (TeXComm "idxcode" _ [(_, x)]) = indexKeyContent x 266 | ikc (TeXComm "noncxxtcode" _ [(_, x)]) = indexKeyContent x 267 | ikc (TeXComm "indexedspan" _ [(_, x), _]) = indexKeyContent x 268 | ikc (TeXComm "texttt" _ [(_, x)]) = indexKeyContent x 269 | ikc (TeXComm "textit" _ [(_, x)]) = indexKeyContent x 270 | ikc (TeXComm "textsc" _ [(_, x)]) = indexKeyContent x 271 | ikc (TeXComm "mathsf" _ [(_, x)]) = indexKeyContent x 272 | ikc (TeXComm "textsf" _ [(_, x)]) = indexKeyContent x 273 | ikc (TeXComm "textcolor" _ [_, (_, x)]) = indexKeyContent x 274 | ikc (TeXComm "xspace" _ []) = "_" 275 | ikc (TeXComm "Cpp" _ []) = "C++" 276 | ikc (TeXComm "&" _ []) = "&" 277 | ikc (TeXComm "%" _ []) = "%" 278 | ikc (TeXComm "-" _ []) = "" 279 | ikc (TeXComm "ell" _ []) = "ℓ" 280 | ikc (TeXComm "~" _ []) = "~" 281 | ikc (TeXComm "#" _ []) = "#" 282 | ikc (TeXComm "{" _ []) = "{" 283 | ikc (TeXComm "}" _ []) = "}" 284 | ikc (TeXComm "protect" _ []) = "" 285 | ikc (TeXComm "frenchspacing" _ []) = "" 286 | ikc (TeXComm "caret" _ []) = "^" 287 | ikc (TeXComm "tilde" _ []) = "~" 288 | ikc (TeXComm "^" _ []) = "^" 289 | ikc (TeXComm "\"" _ []) = "\"" 290 | ikc (TeXComm "" _ []) = "" 291 | ikc (TeXComm "x" _ []) = "TODO" 292 | ikc (TeXComm "textbackslash" _ []) = "\\" 293 | ikc (TeXComm "textunderscore" _ []) = "_" 294 | ikc (TeXComm "discretionary" _ _) = "" 295 | ikc (TeXComm "texorpdfstring" _ [_, (_, x)]) = indexKeyContent x 296 | ikc (TeXComm s _ []) 297 | | Just c <- List.lookup s greekAlphabet = Text.pack [c] 298 | ikc (TeXBraces x) = indexKeyContent x 299 | ikc (TeXMath Dollar x) = indexKeyContent x 300 | ikc (TeXComm "index" _ _) = "" 301 | ikc (TeXComm "indexlink" _ ((_, x):_)) = indexKeyContent x 302 | ikc (TeXComm "hiddenindexlink" _ ((_, x):_)) = indexKeyContent x 303 | ikc x = error $ "indexKeyContent: unexpected: " ++ show x 304 | 305 | indexCatName :: (Eq b, Show b, IsString a, IsString b) => b -> a 306 | indexCatName "impldefindex" = "Index of implementation-defined behavior" 307 | indexCatName "libraryindex" = "Index of library names" 308 | indexCatName "headerindex" = "Index of library headers" 309 | indexCatName "generalindex" = "Index" 310 | indexCatName "grammarindex" = "Index of grammar productions" 311 | indexCatName "conceptindex" = "Index of library concepts" 312 | indexCatName "bibliography" = "Bibliography" 313 | indexCatName x = error $ "indexCatName: " ++ show x 314 | 315 | -- Gathering entities: 316 | 317 | class Sections a where sections :: a -> [Section] 318 | 319 | instance Sections Section where sections s = s : (subsections s >>= sections) 320 | instance Sections Draft where sections = concatMap sections . chapters 321 | instance Sections a => Sections (Maybe a) where sections = maybe [] sections 322 | 323 | allParagraphs :: Sections a => a -> [Paragraph] 324 | allParagraphs = (>>= paragraphs) . sections 325 | 326 | tables :: Sections a => a -> [(Paragraph, Table)] 327 | tables x = [(p, t) | p <- allParagraphs x, TableElement t <- allParaElems p] 328 | 329 | figures :: Sections a => a -> [(Paragraph, Figure)] 330 | figures x = [(p, f) | p <- allParagraphs x, FigureElement f <- allParaElems p] 331 | 332 | formulas :: Sections a => a -> [(Paragraph, Formula)] 333 | formulas x = [(p, f) | p <- allParagraphs x, FormulaElement f <- allParaElems p] 334 | 335 | footnotes :: Sections a => a -> [(Section, Footnote)] 336 | footnotes x = [(s, f) | s <- sections x, f <- sectionFootnotes s] 337 | 338 | allElements :: [TeXPara] -> [Element] 339 | allElements x = x >>= sentences >>= sentenceElems >>= f 340 | where 341 | f :: Element -> [Element] 342 | f e = e : case e of 343 | Enumerated {..} -> enumItems >>= itemElements 344 | TableElement Table{..} -> allElements $ tableBody >>= cells >>= content 345 | NoteElement Note{..} -> allElements noteContent 346 | Codeblock y -> [LatexElement y] 347 | ExampleElement Example{..} -> allElements exampleContent 348 | Tabbing y -> LatexElement . y 349 | Bnf _ y -> LatexElement . y 350 | _ -> [] 351 | 352 | -- Misc: 353 | 354 | texParaElems :: TeXPara -> [Element] 355 | texParaElems = (>>= sentenceElems) . sentences 356 | 357 | texParaTex :: TeXPara -> LaTeX 358 | texParaTex = (>>= elemTex) . texParaElems 359 | 360 | itemTex :: Item -> LaTeX 361 | itemTex Item{..} = (itemInlineContent >>= elemTex) ++ (itemBlockContent >>= texParaTex) 362 | 363 | elemTex :: Element -> LaTeX 364 | elemTex (NoteElement n) = noteContent n >>= texParaTex 365 | elemTex (ExampleElement x) = exampleContent x >>= texParaTex 366 | elemTex (LatexElement l) = [l] 367 | elemTex (Enumerated _ e) = e >>= itemTex 368 | elemTex (Bnf _ l) = l 369 | elemTex (Tabbing t) = t 370 | elemTex (Codeblock t) = [t] 371 | elemTex (Itemdescr t) = t >>= texParaTex 372 | elemTex (TableElement Table{..}) = tableCaption ++ (tableBody >>= rowTex) 373 | where 374 | rowTex :: Row [TeXPara] -> LaTeX 375 | rowTex r = content . cells r >>= (>>= texParaTex) 376 | elemTex (FigureElement _) = [] 377 | elemTex (FormulaElement f) = formulaContent f 378 | elemTex (HtmlElement _) = [] 379 | 380 | tableByAbbr :: Draft -> Abbreviation -> Maybe Table 381 | -- only returns Maybe because some of our tables are broken 382 | tableByAbbr d a = case abbrMap d a of 383 | Just (StablyNamedTable t) -> Just t 384 | _ -> Nothing 385 | 386 | figureByAbbr :: Draft -> Abbreviation -> Figure 387 | figureByAbbr d a = case abbrMap d a of 388 | Just (StablyNamedFigure f) -> f 389 | _ -> error $ "figureByAbbr: " ++ show a 390 | 391 | formulaByAbbr :: Draft -> Abbreviation -> Formula 392 | formulaByAbbr d a = case abbrMap d a of 393 | Just (StablyNamedFormula f) -> f 394 | _ -> error $ "formulaByAbbr: " ++ show a 395 | 396 | sectionByAbbr :: Draft -> Abbreviation -> Maybe Section 397 | sectionByAbbr d a = case abbrMap d a of 398 | Just (StablyNamedSection s) -> Just s 399 | _ -> Nothing 400 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | All authors involved in the creation of the contents of this package have agreed to release their respective contributions into the Public Domain. 2 | -------------------------------------------------------------------------------- /LaTeXBase.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ViewPatterns, OverloadedStrings #-} 2 | 3 | module LaTeXBase 4 | ( MathType(..), LaTeXUnit(..), LaTeX, TeXArg, ArgKind(..), concatRaws, hasCommand, isJustRaw 5 | , matchCommand, lookForCommand, matchEnv, mapTeX, mapCommandName, renderLaTeX, mapTeXRaw, isTeXEnv, texSpan, unconsRaw 6 | , trim, trimr, triml, texStripInfix, isCodeblock, isMath, texStripPrefix, texStripAnyPrefix, AllUnits(..) ) where 7 | 8 | import Data.String (fromString) 9 | import Prelude hiding ((.), (++), writeFile, dropWhile) 10 | import Data.Text (Text, pack) 11 | import qualified Data.Text as Text 12 | import Data.Char (isSpace) 13 | import Util ((.), (++), textStripInfix) 14 | import Control.Arrow (first, second) 15 | 16 | data MathType = Parentheses | Square | Dollar 17 | deriving (Eq, Show, Ord) 18 | 19 | data ArgKind = FixArg | OptArg 20 | deriving (Eq, Show, Ord) 21 | 22 | type TeXArg = (ArgKind, LaTeX) 23 | 24 | data LaTeXUnit 25 | = TeXRaw Text 26 | | TeXComm String String [TeXArg] -- first string is command name, second is trailing whitespace 27 | | TeXEnv String [TeXArg] LaTeX 28 | | TeXMath MathType LaTeX 29 | | TeXLineBreak 30 | | TeXBraces LaTeX 31 | deriving (Eq, Show, Ord) 32 | 33 | isTeXEnv :: String -> LaTeXUnit -> Bool 34 | isTeXEnv x (TeXEnv y _ _) = x == y 35 | isTeXEnv _ _ = False 36 | 37 | type LaTeX = [LaTeXUnit] 38 | 39 | lookForCommand :: String -> LaTeX -> [[TeXArg]] 40 | lookForCommand n = (snd .) . matchCommand (n ==) 41 | 42 | class AllUnits a where 43 | allUnits :: a -> [LaTeXUnit] 44 | 45 | instance AllUnits LaTeXUnit where 46 | allUnits u = u : case u of 47 | TeXMath _ l -> allUnits l 48 | TeXBraces l -> allUnits l 49 | TeXComm _ _ a -> (snd . a) >>= allUnits 50 | TeXEnv _ a l -> (l : snd . a) >>= allUnits 51 | _ -> [] 52 | 53 | instance AllUnits a => AllUnits [a] where 54 | allUnits = concatMap allUnits 55 | 56 | matchCommand :: AllUnits a => (String -> Bool) -> a -> [(String, [TeXArg])] 57 | matchCommand f x = [(str, as) | TeXComm str _ as <- allUnits x, f str] 58 | 59 | hasCommand :: (String -> Bool) -> LaTeX -> Bool 60 | hasCommand f = not . null . matchCommand f 61 | 62 | matchEnv :: AllUnits a => (String -> Bool) -> a -> [(String, [TeXArg], LaTeX)] 63 | matchEnv f x = [(str, as, l) | TeXEnv str as l <- allUnits x, f str] 64 | 65 | mapTeX :: (LaTeXUnit -> Maybe LaTeX) -> LaTeX -> LaTeX 66 | mapTeX f = concatMap g 67 | where 68 | g :: LaTeXUnit -> LaTeX 69 | g (f -> Just x) = x 70 | g (TeXComm c ws a) = [TeXComm c ws (h . a)] 71 | g (TeXBraces x) = [TeXBraces (mapTeX f x)] 72 | g (TeXMath t b) = [TeXMath t (mapTeX f b)] 73 | g (TeXEnv n a b) = [TeXEnv n (h . a) (mapTeX f b)] 74 | g x = [x] 75 | h = second (mapTeX f) 76 | 77 | mapCommandName :: (String -> String) -> LaTeX -> LaTeX 78 | mapCommandName f = concatMap g 79 | where 80 | g :: LaTeXUnit -> LaTeX 81 | g (TeXComm c ws a) = [TeXComm (f c) ws (h . a)] 82 | g (TeXBraces x) = [TeXBraces (mapCommandName f x)] 83 | g (TeXMath t b) = [TeXMath t (mapCommandName f b)] 84 | g (TeXEnv n a b) = [TeXEnv n (h . a) (mapCommandName f b)] 85 | g x = [x] 86 | h = second (mapCommandName f) 87 | 88 | renderLaTeX :: LaTeX -> Text 89 | renderLaTeX = mconcat . (renderUnit .) 90 | 91 | renderUnit :: LaTeXUnit -> Text 92 | renderUnit (TeXRaw t) = t 93 | renderUnit (TeXComm "right" _ [(FixArg, [TeXRaw "."])]) = "\\right." 94 | renderUnit (TeXComm name ws []) 95 | | name `elem` ["left", "sum", "int", "sin", "cos", "right", "bigl", "bigr", "big", "small", "smaller"] = pack $ "\\" <> name <> ws 96 | | otherwise = "\\" <> fromString name <> "{}" 97 | renderUnit (TeXComm name ws args) = "\\" <> pack (fromString name) <> pack (fromString ws) <> renderArgs args 98 | renderUnit (TeXEnv name args c) = 99 | "\\begin{" <> fromString name <> "}" 100 | <> renderArgs args 101 | <> renderLaTeX c 102 | <> "\\end{" <> fromString name <> "}" 103 | renderUnit (TeXMath Dollar l) = "$" <> renderLaTeX l <> "$" 104 | renderUnit (TeXMath Square l) = "\\[" <> renderLaTeX l <> "\\]" 105 | renderUnit (TeXMath Parentheses l) = "\\(" <> renderLaTeX l <> "\\)" 106 | renderUnit TeXLineBreak = "\\\\" 107 | renderUnit (TeXBraces l) = "{" <> renderLaTeX l <> "}" 108 | 109 | renderArgs :: [TeXArg] -> Text 110 | renderArgs = mconcat . (renderArg .) 111 | 112 | renderArg :: TeXArg -> Text 113 | renderArg (FixArg, l) = "{" <> renderLaTeX l <> "}" 114 | renderArg (OptArg, l) = "[" <> renderLaTeX l <> "]" 115 | 116 | mapTeXRaw :: (Text -> LaTeXUnit) -> (LaTeX -> LaTeX) 117 | mapTeXRaw f = map go 118 | where 119 | go :: LaTeXUnit -> LaTeXUnit 120 | go (TeXRaw t) = f t 121 | go (TeXComm s ws args) = TeXComm s ws (second (go .) . args) 122 | go (TeXEnv s args body) = TeXEnv s (second (go .) . args) (go . body) 123 | go (TeXBraces l) = TeXBraces $ go . l 124 | go t@(TeXMath _ _) = t 125 | go t@TeXLineBreak = t 126 | 127 | concatRaws :: LaTeX -> LaTeX 128 | concatRaws (TeXRaw a : TeXRaw b : more) = concatRaws (TeXRaw (a ++ b) : more) 129 | concatRaws (TeXComm s ws args : more) = TeXComm s ws (second concatRaws . args) : concatRaws more 130 | concatRaws (TeXEnv s args bd : more) = TeXEnv s (second concatRaws . args) (concatRaws bd) : concatRaws more 131 | concatRaws (TeXBraces x : more) = TeXBraces (concatRaws x) : concatRaws more 132 | concatRaws (x : more) = x : concatRaws more 133 | concatRaws [] = [] 134 | 135 | unconsRaw :: LaTeX -> (Text, LaTeX) 136 | unconsRaw (TeXRaw x : y) = first (x ++) (unconsRaw y) 137 | unconsRaw x = ("", x) 138 | 139 | texStripPrefix :: Text -> LaTeX -> Maybe LaTeX 140 | texStripPrefix t (TeXRaw s : y) = case Text.stripPrefix t s of 141 | Just "" -> Just y 142 | Just s' -> Just (TeXRaw s' : y) 143 | Nothing -> Nothing 144 | texStripPrefix _ _ = Nothing 145 | 146 | texStripAnyPrefix :: [Text] -> LaTeX -> Maybe (Text, LaTeX) 147 | texStripAnyPrefix [] _ = Nothing 148 | texStripAnyPrefix (x:y) z 149 | | Just a <- texStripPrefix x z = Just (x, a) 150 | | otherwise = texStripAnyPrefix y z 151 | 152 | texStripInfix :: Text -> LaTeX -> Maybe (LaTeX, LaTeX) 153 | texStripInfix t = go 154 | where 155 | go [] = Nothing 156 | go (x : rest) 157 | | TeXRaw s <- x 158 | , Just (y, z) <- textStripInfix t s 159 | = Just (h y, h z ++ rest) 160 | | otherwise = first (x :) . go rest 161 | h "" = [] 162 | h x = [TeXRaw x] 163 | 164 | texSpan :: (Char -> Bool) -> LaTeX -> (Text, LaTeX) 165 | texSpan p (TeXRaw x : y) = case Text.span p x of 166 | (stuff, "") -> first (stuff ++) (texSpan p y) 167 | (stuff, rest) -> (stuff, TeXRaw rest : y) 168 | texSpan _ x = ("", x) 169 | 170 | invisible :: LaTeXUnit -> Bool 171 | invisible (TeXComm "index" _ _) = True 172 | invisible _ = False 173 | 174 | dropWhileEnd :: (Char -> Bool) -> LaTeX -> LaTeX 175 | dropWhileEnd _ [] = [] 176 | dropWhileEnd p x 177 | | invisible (last x) = dropWhileEnd p (init x) ++ [last x] 178 | | TeXRaw y <- last x = init x ++ case Text.dropWhileEnd p y of 179 | "" -> [] 180 | a -> [TeXRaw a] 181 | | otherwise = x 182 | 183 | trimr, trim :: LaTeX -> LaTeX 184 | trimr = dropWhileEnd isSpace 185 | trim = triml . trimr 186 | 187 | triml :: LaTeX -> LaTeX 188 | triml (TeXRaw x : y) = case Text.dropWhile isSpace x of 189 | "" -> triml y 190 | x' -> TeXRaw x' : y 191 | triml x = x 192 | 193 | isMath :: LaTeXUnit -> Bool 194 | isMath (TeXMath _ _) = True 195 | isMath (TeXComm "ensuremath" _ _) = True 196 | isMath (TeXEnv "eqnarray*" _ _) = True 197 | isMath (TeXEnv "equation*" _ _) = True 198 | isMath _ = False 199 | 200 | isCodeblock :: LaTeXUnit -> Bool 201 | isCodeblock (TeXEnv "codeblock" _ _) = True 202 | isCodeblock (TeXEnv "indexedcodeblock" _ _) = True 203 | isCodeblock (TeXEnv "codeblocktu" _ _) = True 204 | isCodeblock (TeXEnv "codeblockdigitsep" _ _) = True 205 | isCodeblock _ = False 206 | 207 | isJustRaw :: LaTeX -> Maybe Text 208 | isJustRaw [TeXRaw x] = Just x 209 | isJustRaw _ = Nothing 210 | -------------------------------------------------------------------------------- /LaTeXParser.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -fno-warn-tabs #-} 2 | {-# LANGUAGE OverloadedStrings, RecordWildCards, ViewPatterns, TupleSections #-} 3 | 4 | module LaTeXParser (parseString, 5 | Token(Token), Context(..), defaultContext, Signature(..), Macros(..), Environment(..), Command(..), ParseResult(ParseResult), 6 | defaultMacros, 7 | nullCmd, storeCmd, codeEnv, normalCmd, 8 | storeEnv) where 9 | 10 | import LaTeXBase (LaTeXUnit(..), LaTeX, TeXArg, ArgKind(..), MathType(..), concatRaws) 11 | import Data.Text (Text) 12 | import qualified Data.Text as Text 13 | import Data.Char (isAlphaNum, isSpace, isAlpha) 14 | import Data.Maybe (fromJust) 15 | import Control.Arrow (first) 16 | import Data.Map (Map) 17 | import qualified Data.Map as Map 18 | import Prelude hiding ((++), (.)) 19 | import Util ((.), (++), getDigit, stripInfix) 20 | 21 | newtype Token = Token { tokenChars :: String } 22 | deriving (Eq, Show) 23 | 24 | data Environment = Environment (Context -> [Token] -> ParseResult) 25 | data Command = Command { runCommand :: Context -> String {- ws -} -> [Token] -> ParseResult } 26 | 27 | data Macros = Macros 28 | { commands :: Map Text Command 29 | , environments :: Map Text Environment 30 | , counters :: Map Text Int } 31 | 32 | newCommand :: Bool {- overwrite -} -> (Text, Command) -> Macros -> Macros 33 | newCommand True (name, cmd) Macros{..} = Macros{commands = Map.insert name cmd commands, ..} 34 | newCommand False (name, cmd) Macros{..} = Macros{commands = Map.insertWith (\_ y -> y) name cmd commands, ..} 35 | 36 | instance Semigroup Macros where 37 | x <> y = Macros 38 | (commands x ++ commands y) 39 | (environments x ++ environments y) 40 | (counters x ++ counters y) 41 | 42 | instance Monoid Macros where 43 | mempty = Macros mempty mempty mempty 44 | 45 | data ParseResult = ParseResult 46 | { content :: LaTeX 47 | , newMacros :: Macros 48 | , remainder :: [Token] } 49 | 50 | data Signature = Signature 51 | { nrFixArgs :: Int 52 | , defaultArg :: Maybe [Token] } 53 | deriving Show 54 | 55 | data Context = Context 56 | { commentsEnabled :: Bool 57 | , parsingOptArg :: Bool 58 | , macros :: Macros } 59 | 60 | prependContent :: LaTeX -> ParseResult -> ParseResult 61 | prependContent t p = p{content = t ++ content p} 62 | 63 | combineMacros :: Bool {- left biased -} -> Macros -> Macros -> Macros 64 | combineMacros b x y = if b then x ++ y else y ++ x 65 | 66 | addMacros :: Bool {- overwrite -} -> Macros -> ParseResult -> ParseResult 67 | addMacros b m p = p{newMacros = combineMacros b m (newMacros p)} 68 | 69 | defaultEnvs :: [(Text, Environment)] 70 | defaultEnvs = [outputblockEnv] 71 | 72 | codeEnv :: Text -> Signature -> (Text, Environment) 73 | codeEnv name sig = (name, Environment f) 74 | where 75 | f :: Context -> [Token] -> ParseResult 76 | f ctx toks = ParseResult [env] mempty rest' 77 | where 78 | (arguments, rest) = parseArgs sig toks 79 | Just (code, rest') = stripInfix [Token "\\end", Token "{", Token (Text.unpack name), Token "}"] rest 80 | env = TeXEnv (Text.unpack name) (map ((FixArg, ) . fullParse ctx) arguments) (parseCode name ctx code) 81 | 82 | outputblockEnv :: (Text, Environment) 83 | outputblockEnv = ("outputblock", Environment f) 84 | where 85 | f :: Context -> [Token] -> ParseResult 86 | f ctx toks = ParseResult [env] mempty rest 87 | where 88 | Just (content, rest) = stripInfix [Token "\\end", Token "{", Token "outputblock", Token "}"] toks 89 | env = TeXEnv "outputblock" [] (parseOutputBlock ctx content) 90 | 91 | parseOutputBlock :: Context -> [Token] -> LaTeX 92 | parseOutputBlock c = concatRaws . go 93 | where 94 | go :: [Token] -> LaTeX 95 | go [] = [] 96 | go (Token "@" : rest) = fullParse c cmd ++ go rest' 97 | where (cmd, Token "@" : rest') = break (== Token "@") rest 98 | go s = TeXRaw (Text.pack $ concatMap tokenChars code) : go rest 99 | where (code, rest) = break (== Token "@") s 100 | 101 | storeEnv :: String -> Signature -> (Text, Environment) 102 | storeEnv name sig = (Text.pack name, Environment act) 103 | where 104 | act :: Context -> [Token] -> ParseResult 105 | act ctx toks = ParseResult [env] mempty afterend 106 | where 107 | (arguments, rest) = parseArgs sig toks 108 | ParseResult body _ afterend = parse ctx rest 109 | env = TeXEnv name (map ((FixArg, ) . fullParse ctx) arguments) (concatRaws body) 110 | -- todo: not all fixargs 111 | 112 | defaultCmds :: [(Text, Command)] 113 | defaultCmds = 114 | [ ("newcommand", newCommandCommand) 115 | , ("renewcommand", newCommandCommand) 116 | , ("DeclareMathOperator", declareMathOperator) 117 | , ("newcolumntype", newColumnTypeCommand) 118 | , ("newenvironment", newEnvCommand) 119 | , ("lstnewenvironment", newEnvCommand) 120 | , ("raisebox", raiseBoxCommand) 121 | , ("let", Command $ \ctx _ws rest -> parse ctx (drop 2 rest)) 122 | , beginCommand 123 | , endCommand 124 | , oldDefCommand 125 | ] 126 | 127 | oldDefCommand :: (Text, Command) 128 | oldDefCommand = ("def", Command pars) 129 | where 130 | pars ctx@Context{..} _ws rest 131 | | (Token ('\\' : name) : rest') <- rest 132 | , Just (body, rest'') <- balanced ('{', '}') rest' = 133 | let 134 | m = Macros (Map.fromList [defCmd (Text.pack name) (Signature 0 Nothing) body]) mempty mempty 135 | ParseResult p mm r = parse ctx{macros=macros++m} rest'' 136 | in 137 | ParseResult p (m ++ mm) r 138 | | otherwise = parse ctx $ snd $ fromJust $ balanced ('{', '}') $ dropWhile (/= Token "{") rest 139 | 140 | endCommand :: (Text, Command) 141 | endCommand = ("end", Command $ \c _ws rest -> 142 | let Just (_, rest') = parseFixArg c rest in ParseResult mempty mempty rest') 143 | 144 | beginCommand :: (Text, Command) 145 | beginCommand = ("begin", normalCmd $ Command pars) 146 | where 147 | pars c@Context{..} _ws rest 148 | | Just (Environment f) <- Map.lookup (envname) (environments macros) = f c rest' 149 | | otherwise = error $ "undefined env: " ++ Text.unpack envname 150 | where 151 | Just (arg, rest') = parseFixArg c rest 152 | [TeXRaw envname] = concatRaws arg 153 | 154 | raiseBoxCommand :: Command 155 | raiseBoxCommand = normalCmd $ Command $ \c@Context{..} _ws rest -> 156 | let 157 | Just (a0, rest') = balanced ('{', '}') rest 158 | (a1, rest'') = case parseOptArg rest' of 159 | Nothing -> (Nothing, rest') 160 | Just (x, y) -> (Just x, y) 161 | Just (a2, rest''') = balanced ('{', '}') rest'' 162 | args = [(FixArg, fullParse c a0)] 163 | ++ case a1 of 164 | Nothing -> [] 165 | Just x -> [(OptArg, fullParse c x)] 166 | ++ [(FixArg, fullParse c a2)] 167 | in 168 | ParseResult [TeXComm "raisebox" "" args] mempty rest''' 169 | 170 | newCommandCommand :: Command 171 | newCommandCommand = normalCmd $ Command $ \Context{..} _ws (Token "{" : Token ('\\' : name) : Token "}" : rest) -> 172 | let 173 | (sig, rest') = parseSignature rest 174 | Just (body, rest'') = balanced ('{', '}') rest' 175 | newMacros = newCommand True (defCmd (Text.pack name) sig body) mempty 176 | in 177 | ParseResult [] newMacros rest'' 178 | 179 | declareMathOperator :: Command 180 | declareMathOperator = normalCmd $ Command $ \Context{..} _ws (Token "{" : Token ('\\' : name) : Token "}" : rest) -> 181 | let 182 | Just (body, rest') = balanced ('{', '}') rest 183 | newBody = [Token "\\operatorname", Token "{"] ++ body ++ [Token "}"] 184 | newMacros = newCommand True (defCmd (Text.pack name) (Signature 0 Nothing) newBody) mempty 185 | in 186 | ParseResult [] newMacros rest' 187 | 188 | newColumnTypeCommand :: Command 189 | newColumnTypeCommand = normalCmd $ Command $ \Context{..} _ws (Token "{" : Token _ : Token "}" : rest) -> 190 | let 191 | (_, rest') = parseSignature rest 192 | Just (_, rest'') = balanced ('{', '}') rest' 193 | in 194 | ParseResult [] mempty rest'' 195 | 196 | defaultMacros :: Macros 197 | defaultMacros = Macros (Map.fromList defaultCmds) (Map.fromList defaultEnvs) mempty 198 | 199 | defaultContext :: Context 200 | defaultContext = Context 201 | { commentsEnabled = True 202 | , parsingOptArg = False 203 | , macros = defaultMacros } 204 | 205 | rmLine :: [Token] -> [Token] 206 | rmLine s = case dropWhile (/= Token "\n") s of 207 | Token "\n" : x -> x 208 | x -> x 209 | 210 | parseOptArg :: [Token] -> Maybe ([Token], [Token]) 211 | parseOptArg = balanced ('[', ']') 212 | 213 | parseOptArgs :: [Token] -> ([[Token]], [Token]) 214 | parseOptArgs s 215 | | Just (r, s') <- parseOptArg s = first (r:) (parseOptArgs s') 216 | | otherwise = ([], s) 217 | 218 | parseFixArg :: Context -> [Token] -> Maybe (LaTeX, [Token]) 219 | parseFixArg ctx (Token [c] : more) | isSpace c = parseFixArg ctx more 220 | parseFixArg ctx (Token "{" : more) = 221 | let ParseResult t _macros s = parse ctx more in Just (t, s) 222 | parseFixArg _ _ = Nothing 223 | 224 | parseSignature :: [Token] -> (Signature, [Token]) 225 | parseSignature t = case optArgs of 226 | [] -> (Signature 0 Nothing, t') 227 | [[Token a]] -> (Signature (read a) Nothing, t') 228 | [[Token a], deflt] -> (Signature (read a) (Just deflt), t') 229 | _ -> error "unrecognized signature" 230 | where (optArgs, t') = parseOptArgs t 231 | 232 | balanced :: (Char, Char) -> [Token] -> Maybe ([Token], [Token]) 233 | balanced (open, close) (dropWhile (all isSpace . tokenChars) -> (Token [o] : s)) 234 | | o == open = Just $ go 0 s 235 | where 236 | go :: Int -> [Token] -> ([Token], [Token]) 237 | go 0 [] = ([], []) 238 | go 0 (Token [c] : x) | c == close = ([], x) 239 | go n (Token "}" : x) = first (Token "}" :) (go (n-1) x) 240 | go n (Token "{" : x) = first (Token "{" :) (go (n+1) x) 241 | go n (x:y) = first (x :) (go n y) 242 | go n x = error $ "\n\nbalanced: " ++ show (n, x) 243 | balanced oc (dropWhile (all isSpace. tokenChars) -> (Token "%" : x)) = balanced oc (dropWhile (/= Token "\n") x) 244 | balanced _ _ = Nothing 245 | 246 | balanced_body :: Context -> String -> [Token] -> ([Token], [Token]) 247 | balanced_body ctx end = go 0 248 | where 249 | go :: Int -> [Token] -> ([Token], [Token]) 250 | go 0 [] = ([], []) 251 | go 0 (Token "\\end" : Token "{" : e : Token "}" : x) | fullParse ctx [e] == [TeXRaw $ Text.pack end] = ([], x) 252 | go n (Token "}" : x) = first (Token "}" :) (go (n-1) x) 253 | go n (Token "{" : x) = first (Token "{" :) (go (n+1) x) 254 | go n (x:y) = first (x :) (go n y) 255 | go n s = error $ "\n\nbalanced: " ++ show (n, s) 256 | 257 | parseArgs :: Signature -> [Token] -> ([[Token]], [Token]) 258 | parseArgs Signature{..} s = case defaultArg of 259 | Nothing -> n_balanced ('{', '}') nrFixArgs s 260 | Just dfl -> case parseOptArg s of 261 | Nothing -> 262 | first (dfl :) (n_balanced ('{', '}') (nrFixArgs - 1) s) 263 | Just (optArg, s') -> 264 | first (optArg :) (n_balanced ('{', '}') (nrFixArgs - 1) s') 265 | 266 | parseArgs2 :: Context -> Signature -> [Token] -> ([TeXArg], [Token]) 267 | parseArgs2 c Signature{..} s 268 | | defaultArg == Nothing = first (map fa) (n_balanced ('{', '}') nrFixArgs s) 269 | | Just (optArg, s') <- parseOptArg s = 270 | first (\a -> (OptArg, fullParse c optArg) : map fa a) 271 | (n_balanced ('{', '}') (nrFixArgs - 1) s') 272 | | otherwise = first (map fa) (n_balanced ('{', '}') (nrFixArgs - 1) s) 273 | where 274 | fa = (FixArg, ) . fullParse c 275 | 276 | -- todo: clean up parseArgs/parseArgs2 above 277 | 278 | n_balanced :: (Char, Char) -> Int -> [Token] -> ([[Token]], [Token]) 279 | n_balanced oc n s 280 | | n > 0, Just (x, s') <- balanced oc s = first (x:) $ n_balanced oc (n-1) s' 281 | | otherwise = ([], s) 282 | 283 | newEnvCommand :: Command 284 | newEnvCommand = normalCmd $ Command $ \Context{..} _ws (Token "{" : (span (/= Token "}") -> (name, Token "}" : rest))) -> 285 | let 286 | nameStr = concatMap tokenChars name 287 | (sig, rest') = parseSignature rest 288 | Just (begin, rest'') = balanced ('{', '}') rest' 289 | Just (end, rest''') = balanced ('{', '}') rest'' 290 | pa :: Context -> [Token] -> ParseResult 291 | pa c' toks = ParseResult replaced mempty toks'' 292 | where 293 | replaced = fullParse c' $ replArgs args begin ++ body ++ end 294 | (args, toks') = parseArgs sig toks 295 | (body, toks'') = balanced_body c' nameStr toks' 296 | m = Macros mempty (Map.singleton (Text.pack nameStr) (Environment pa)) mempty 297 | in 298 | ParseResult [] m rest''' 299 | 300 | parseString :: Context -> String -> (LaTeX, Macros, [Token]) 301 | parseString c s = (concatRaws x, y, z) 302 | where ParseResult x y z = parse c (tokenize s) 303 | 304 | literal :: String 305 | literal = " @_{}&,%-#/~>!$;:^" 306 | 307 | breakComment :: [Token] -> ([Token], [Token]) 308 | breakComment x@(Token "\n" : _) = ([], x) 309 | breakComment (Token ('\\' : cmd) : xs) 310 | | (c, r@(_:_)) <- span (/= '\n') cmd = ([Token ('\\':c)], Token r : xs) 311 | breakComment (Token "%" : Token "\n" : x) = first ((Token "%" :) . (Token "\n" :)) (breakComment x) 312 | breakComment (x : xs) = first (x:) (breakComment xs) 313 | breakComment [] = ([], []) 314 | 315 | data LiteralKind = StringLiteral | CharLiteral 316 | 317 | parseCode :: Text -> Context -> [Token] -> LaTeX 318 | parseCode envname c = concatRaws . go Nothing 319 | where 320 | go :: Maybe LiteralKind -> [Token] -> LaTeX 321 | go _ [] = [] 322 | go b (Token "@" : rest) = fullParse c cmd ++ go b rest' 323 | where (cmd, Token "@" : rest') = break (== Token "@") rest 324 | go (Just StringLiteral) (Token "\"" : rest) = TeXRaw "\"" : go Nothing rest 325 | go (Just CharLiteral) (Token "'" : rest) = TeXRaw "'" : go Nothing rest 326 | go Nothing (Token "\"" : rest) = TeXRaw "\"" : (go (Just StringLiteral) lit ++ go Nothing rest') 327 | where (lit, rest') = stringLiteral rest 328 | go Nothing (Token "'" : rest) 329 | | envname == "codeblockdigitsep" = TeXRaw "'" : go Nothing rest 330 | | otherwise = TeXRaw "'" : (go (Just CharLiteral) lit ++ go Nothing rest') 331 | where (lit, rest') = charLiteral rest 332 | go Nothing (Token "/" : Token "/" : (breakComment -> (comment, rest'))) 333 | = TeXComm "comment" "" [(FixArg, TeXRaw "//" : noncode comment)] : go Nothing rest' 334 | go Nothing (Token "/" : Token "*" : rest) 335 | | Just (comment, rest') <- stripInfix [Token "*", Token "/"] rest 336 | = TeXComm "comment" "" [(FixArg, [TeXRaw "/*"] ++ noncode comment ++ [TeXRaw "*/"])] : go Nothing rest' 337 | go b (Token "/" : rest) = TeXRaw "/" : go b rest 338 | go b s = TeXRaw (Text.pack $ concatMap tokenChars code) : go b rest 339 | where 340 | breakToks = [Token "@", Token "/"] ++ 341 | case b of 342 | Nothing -> [Token "\"", Token "'"] 343 | Just StringLiteral -> [Token "\""] 344 | Just CharLiteral -> [Token "'"] 345 | (code, rest) = break (`elem` breakToks) s 346 | noncode :: [Token] -> LaTeX 347 | noncode toks = 348 | fullParse c nc ++ case more of 349 | [] -> [] 350 | Token "@" : (break (== Token "@") -> (code, _ : rest)) -> 351 | TeXComm "tcode" "" [(FixArg, fullParse c code)] : noncode rest 352 | _ -> error "no" 353 | where (nc, more) = span (/= Token "@") toks 354 | stringLiteral :: [Token] -> ([Token], [Token]) 355 | stringLiteral (Token "\\" : Token "\"" : x) = first (Token "\\\"" :) (stringLiteral x) 356 | stringLiteral (Token "\\" : Token "\\" : x) = first (Token "\\\\" :) (stringLiteral x) 357 | stringLiteral (Token "\"" : x) = ([Token "\""], x) 358 | stringLiteral (y : x) = first (y :) (stringLiteral x) 359 | stringLiteral [] = ([], []) 360 | charLiteral :: [Token] -> ([Token], [Token]) 361 | charLiteral (Token "\\" : Token "'" : x) = first (Token "\\'" :) (charLiteral x) 362 | charLiteral (Token "\\" : Token "\\" : x) = first (Token "\\\\" :) (charLiteral x) 363 | charLiteral (Token "'" : x) = ([Token "'"], x) 364 | charLiteral (y : x) = first (y :) (charLiteral x) 365 | charLiteral [] = ([], []) 366 | 367 | isCommandChar :: Char -> Bool 368 | isCommandChar c = isAlpha c || c == '*' 369 | 370 | tokenize :: String -> [Token] 371 | tokenize "" = [] 372 | tokenize ('\\':'v':'e':'r':'b': delim : (break (== delim) -> (arg, _ : rest))) = 373 | Token ("\\verb:" ++ arg) : tokenize rest 374 | tokenize ('\\' : (span isCommandChar -> (cmd@(_:_), (span isSpace -> (ws, rest))))) 375 | = Token ('\\' : cmd ++ ws) : tokenize rest 376 | tokenize ('\\' : c : rest) = Token ['\\', c] : tokenize rest 377 | tokenize x@((isAlpha -> True): _) = let (a, b) = span isAlphaNum x in Token a : tokenize b 378 | tokenize (x:y) = Token [x] : tokenize y 379 | 380 | -- \verb is handled in tokenize so that the 'balanced' function doesn't 381 | -- get confused by \verb|{| 382 | 383 | -- Notice how the whitespace following a command like \bla is included in the Token 384 | -- This lets the parser include it in the TeXComm/TeXCommS's command field, so that 385 | -- the whitespace is not lost when serializing back to text when sending to MathJax. 386 | 387 | replArgs :: [[Token]] -> [Token] -> [Token] 388 | replArgs args = go 389 | where 390 | go [] = [] 391 | go (Token "%" : (span (/= Token "\n") -> (x, y))) = Token "%" : x ++ go y 392 | go (Token "#" : Token "#" : y) = Token "#" : go y 393 | go (Token "#" : Token [getDigit -> Just i] : y) 394 | | length args >= i = (args !! (i-1)) ++ go y 395 | | otherwise = error $ "need more args than " ++ show args ++ " to replace in " ++ show (concatMap tokenChars y) 396 | go (x:y) = x : go y 397 | 398 | nullCmd :: Text -> Signature -> (Text, Command) 399 | nullCmd name sig = defCmd name sig [] 400 | 401 | storeCmd :: String -> Signature -> (Text, Command) 402 | storeCmd name sig = (Text.pack name, normalCmd $ Command pars) 403 | where 404 | pars context ws tokens = ParseResult [TeXComm name ws args] mempty rest 405 | where (args, rest) = parseArgs2 context sig tokens 406 | 407 | defCmd :: Text -> Signature -> [Token] -> (Text, Command) 408 | defCmd name sig body = (name, normalCmd $ Command pars) 409 | where 410 | pars context _ws tokens = ParseResult (fullParse context $ replArgs args body) mempty rest 411 | where (args, rest) = parseArgs sig tokens 412 | 413 | normalCmd :: Command -> Command 414 | normalCmd (Command f) = Command $ \ctx ws toks -> 415 | let ParseResult content newMacros rest = f ctx ws toks 416 | in addMacros False newMacros (prependContent content (parse ctx{macros=macros ctx ++ newMacros} rest)) 417 | 418 | consumeMath :: [Token] -> ([Token], [Token]) 419 | consumeMath = f 0 420 | where 421 | f :: Integer -> [Token] -> ([Token], [Token]) 422 | f 0 (Token "$" : rest) = ([], rest) 423 | f depth (Token "{" : rest) = first (Token "{" :) (f (depth + 1) rest) 424 | f depth (Token "}" : rest) = first (Token "}" :) (f (depth - 1) rest) 425 | f depth (tok : rest) = first (tok :) (f depth rest) 426 | f _ [] = error "unexpected end of math" 427 | 428 | parse :: Context -> [Token] -> ParseResult 429 | parse c (Token "$" : (consumeMath -> (math, rest))) = 430 | prependContent [TeXMath Dollar (fullParse c math)] (parse c rest) 431 | parse c (Token "\\[" : (span (/= Token "\\]") -> (math, Token "\\]" : rest))) = 432 | prependContent [TeXMath Square (fullParse c math)] (parse c rest) 433 | parse c (Token "]" : x) 434 | | parsingOptArg c = ParseResult mempty mempty x 435 | parse _ (Token "}" : x) = ParseResult mempty mempty x 436 | parse c (Token "{" : x) = prependContent [TeXBraces y] $ parse c rest 437 | where ParseResult y _ rest = parse c x 438 | parse c (Token "%" : x) 439 | | commentsEnabled c = parse c (rmLine x) 440 | parse _ [] = ParseResult mempty mempty mempty 441 | parse c (Token "\\\\" : x) = prependContent [TeXLineBreak] (parse c x) 442 | parse c (Token ['\\', ch] : x) 443 | | ch `elem` literal = prependContent [TeXComm [ch] "" []] (parse c x) 444 | parse c (Token ('\\':'v':'e':'r':'b':':':arg) : rest) = 445 | prependContent [TeXComm "verb" "" [(FixArg, [TeXRaw $ Text.pack arg])]] (parse c rest) 446 | parse c (Token "\\rSec" : Token [getDigit -> Just i] : s) 447 | = prependContent [TeXComm "rSec" "" args] $ parse c s'' 448 | where 449 | Just (a, s') = parseOptArg s 450 | Just (b, s'') = parseFixArg c s' 451 | args = [(FixArg, [TeXRaw $ Text.pack $ show i]), (FixArg, fullParse c a), (FixArg, b)] 452 | parse c@Context{..} (Token ('\\' : (span (not . isSpace) -> (nos, w))) : rest) 453 | | Just f <- Map.lookup (Text.pack cmd) (commands macros) = runCommand f c ws rest 454 | | otherwise = error $ 455 | "\n\nundefined command: " ++ show cmd ++ " at: " ++ take 50 (concatMap tokenChars rest) 456 | where (cmd, ws) | nos == "", (x : xx) <- w = ([x], xx) 457 | | otherwise = (nos, w) 458 | parse ctx (Token c : rest) 459 | | all isAlphaNum c 460 | = prependContent [TeXRaw $ Text.pack c] $ parse ctx rest 461 | parse ctx (Token [c] : rest) 462 | = prependContent [TeXRaw $ Text.pack [c]] $ parse ctx rest 463 | parse _ s = error $ "parse: unexpected: " ++ take 100 (concatMap tokenChars s) 464 | 465 | fullParse :: Context -> [Token] -> LaTeX 466 | fullParse c t 467 | | all isSpace (concatMap tokenChars remainder) = concatRaws content 468 | | otherwise = error $ "could not fully parse: " 469 | ++ concatMap tokenChars t 470 | ++ "\n\nremainder: " 471 | ++ concatMap tokenChars remainder 472 | where ParseResult{..} = parse c t 473 | -------------------------------------------------------------------------------- /Load14882.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -fno-warn-tabs #-} 2 | {-# LANGUAGE 3 | OverloadedStrings, 4 | ScopedTypeVariables, 5 | RecordWildCards, 6 | ViewPatterns, 7 | LambdaCase, 8 | TupleSections, 9 | NamedFieldPuns, 10 | FlexibleInstances, 11 | FlexibleContexts, 12 | RankNTypes, 13 | MultiParamTypeClasses, 14 | FunctionalDependencies, 15 | UndecidableInstances, 16 | RecursiveDo #-} 17 | 18 | module Load14882 (parseIndex, load14882) where 19 | 20 | import qualified LaTeXParser as Parser 21 | import qualified Data.IntMap as IntMap 22 | import qualified Data.List as List 23 | import Data.IntMap (IntMap) 24 | import LaTeXBase 25 | ( LaTeXUnit(..), TeXArg, ArgKind(..), lookForCommand 26 | , mapTeX, mapTeXRaw, concatRaws, texStripInfix, allUnits) 27 | import Data.Text (Text, replace, isPrefixOf) 28 | import Data.Text.IO (readFile) 29 | import Text.Regex (mkRegex, matchRegexAll) 30 | import qualified Data.Text as Text 31 | import Control.Monad (forM, when) 32 | import Prelude hiding (take, (.), takeWhile, (++), lookup, readFile) 33 | import Data.Char (isAlpha) 34 | import Control.Arrow (first) 35 | import Data.Map (Map) 36 | import Data.Maybe (isJust, fromJust) 37 | import qualified Data.Map as Map 38 | import Data.List (unfoldr, (\\), takeWhile) 39 | import System.Process (readProcess) 40 | import System.IO.Unsafe (unsafePerformIO) 41 | import Control.Monad.Fix (MonadFix) 42 | import Control.Monad.State (MonadState, evalState, get, put, liftM2, modify) 43 | import Util ((.), (++), mapLast, stripInfix, measure, textStripInfix) 44 | import RawDocument 45 | import Sentences (splitIntoSentences, isActualSentence, breakSentence) 46 | import Document 47 | 48 | getCommitUrl :: IO Text 49 | getCommitUrl = do 50 | url <- gitGetRemoteUrl 51 | commit <- gitGetCommitRef 52 | return $ 53 | ( Text.replace "git@github.com:" "http://github.com/" 54 | $ Text.replace ".git" "/commit/" url) 55 | ++ commit 56 | 57 | gitGetRemoteUrl :: IO Text 58 | gitGetRemoteUrl = do 59 | x <- readProcess "git" ["ls-remote", "--get-url"] "" 60 | return $ Text.strip $ Text.pack x 61 | 62 | gitGetCommitRef :: IO Text 63 | gitGetCommitRef = do 64 | x <- readProcess "git" ["rev-parse", "HEAD"] "" 65 | return $ Text.strip $ Text.pack $ x 66 | 67 | -- In the LaTeX sources, \definition is often preceded by corresponding \indexdefns. 68 | -- Since we treat definitions like sections (and generate pages for them), we need 69 | -- to move the \indexdefns inside (after) the \definition, so that the index entries 70 | -- don't link to the page for the preceding section. 71 | 72 | moveIndexEntriesIntoDefs :: [Text] -> [Text] 73 | moveIndexEntriesIntoDefs [] = [] 74 | moveIndexEntriesIntoDefs (x:xs) 75 | | "\\indexdefn{" `isPrefixOf` x = case moveIndexEntriesIntoDefs xs of 76 | [] -> [x] 77 | y:ys 78 | | "\\definition{" `isPrefixOf` y -> y : x : ys 79 | | otherwise -> x : y : ys 80 | | otherwise = x : moveIndexEntriesIntoDefs xs 81 | 82 | moveIndexEntriesIntoSecs :: [Text] -> [Text] 83 | moveIndexEntriesIntoSecs = go [] 84 | where 85 | go x [] = x 86 | go x (h:t) 87 | | "\\indextext{" `isPrefixOf` h = go (h : x) t 88 | | "\\rSec" `isPrefixOf` h = h : reverse x ++ go [] t 89 | | otherwise = reverse x ++ [h] ++ go [] t 90 | 91 | {- The document has a ton of: 92 | 93 | \indexlibraryglobal{bla}% 94 | \begin{itemdecl} 95 | void bla(); 96 | \end{itemdecl} 97 | 98 | To highlight the whole itemdecl, indexItemDecls converts this to: 99 | 100 | \begin{indexeditemdecl}{ 101 | \indexlibraryglobal{bla}% 102 | } 103 | void bla(); 104 | \end{indexeditemdecl} 105 | -} 106 | 107 | indexCodeEnvs :: [Text] -> [Text] -> [Text] 108 | indexCodeEnvs envs = go [] 109 | where 110 | go collected [] = collected 111 | go collected (x:xs) 112 | | "\\index" `isPrefixOf` x = go (collected ++ [x]) xs 113 | | [e] <- [e | e <- envs, ("\\begin{" ++ e ++ "}") `isPrefixOf` x] = 114 | let (code, _ : rest) = span (not . (("\\end{" ++ e ++ "}") `isPrefixOf`)) xs 115 | in (if null collected then 116 | ["\\begin{" ++ e ++ "}"] 117 | ++ code 118 | ++ ["\\end{" ++ e ++ "}"] 119 | else 120 | ["\\begin{indexed" ++ e ++ "}{"] ++ collected ++ ["}"] 121 | ++ code 122 | ++ ["\\end{indexed" ++ e ++ "}"]) 123 | ++ go [] rest 124 | | otherwise = collected ++ (x : go [] xs) 125 | 126 | data Numbers = Numbers 127 | { tableNr, figureNr, footnoteRefNr, footnoteNr, itemDeclNr 128 | , nextIndexEntryNr, noteNr, exampleNr, nextSentenceNr, formulaNr :: Int } 129 | 130 | class AssignNumbers a b | a -> b where 131 | assignNumbers :: forall m . (Functor m, MonadFix m, MonadState Numbers m) => Section -> a -> m b 132 | 133 | instance AssignNumbers TeXArg TeXArg where 134 | assignNumbers s (y, x) = (y, ) . assignNumbers s x 135 | 136 | instance AssignNumbers LaTeXUnit LaTeXUnit where 137 | assignNumbers s (TeXEnv "itemdecl" [] x) = do 138 | n <- get 139 | put n{itemDeclNr = itemDeclNr n + 1} 140 | TeXEnv "itemdecl" [(FixArg, [TeXRaw $ Text.pack $ show $ itemDeclNr n])] . assignNumbers s x 141 | assignNumbers s (TeXEnv "indexeditemdecl" indices x) = do 142 | n <- get 143 | put n{itemDeclNr = itemDeclNr n + 1} 144 | liftM2 (TeXEnv "indexeditemdecl") (assignNumbers s indices) (assignNumbers s x) 145 | assignNumbers s (TeXEnv x y z) = liftM2 (TeXEnv x) (assignNumbers s y) (assignNumbers s z) 146 | assignNumbers _ (TeXComm "index" ws args) = do 147 | n <- get 148 | put n{nextIndexEntryNr = nextIndexEntryNr n + 1} 149 | return $ TeXComm "index" ws $ (FixArg, [TeXRaw $ Text.pack $ show $ nextIndexEntryNr n]) : args 150 | assignNumbers _ (TeXComm "defnx" ws args) = do 151 | n <- get 152 | put n{nextIndexEntryNr = nextIndexEntryNr n + 1} 153 | return $ TeXComm "defnx" ws $ (FixArg, [TeXRaw $ Text.pack $ show $ nextIndexEntryNr n]) : args 154 | assignNumbers _ (TeXComm "footnoteref" ws []) = do 155 | Numbers{..} <- get 156 | put Numbers{footnoteRefNr = footnoteRefNr+1, ..} 157 | return $ TeXComm "footnoteref" ws [(FixArg, [TeXRaw $ Text.pack $ show footnoteRefNr])] 158 | assignNumbers s (TeXComm x ws args) = TeXComm x ws . assignNumbers s args 159 | assignNumbers _ x = return x 160 | 161 | instance AssignNumbers a b => AssignNumbers (Cell a) (Cell b) where 162 | assignNumbers s x@Cell{..} = do 163 | n <- get 164 | put n{nextSentenceNr=1} 165 | content' <- assignNumbers s content 166 | modify $ \m -> m{nextSentenceNr = nextSentenceNr n} 167 | return x{content=content'} 168 | 169 | instance AssignNumbers a b => AssignNumbers (Row a) (Row b) where 170 | assignNumbers s x@Row{..} = do 171 | cells' <- assignNumbers s cells 172 | return x{cells=cells'} 173 | 174 | instance AssignNumbers RawTexPara TeXPara where 175 | assignNumbers s (RawTexPara (splitIntoSentences -> x)) = TeXPara . f x 176 | where 177 | f [] = return [] 178 | f (h:t) = do 179 | h' <- assignNumbers s h 180 | let actual = isActualSentence h 181 | n <- get 182 | put n{nextSentenceNr = nextSentenceNr n + (if actual then 1 else 0)} 183 | (Sentence (if actual then Just (nextSentenceNr n) else Nothing) h' :) . f t 184 | 185 | assignNonInlineItem :: (MonadState Numbers m, MonadFix m) => Section -> RawItem -> m Item 186 | assignNonInlineItem s (RawItem label content) = do 187 | n <- get 188 | put n{nextSentenceNr = 1} 189 | Item Nothing (if null label then Nothing else Just label) [] . assignNumbers s content 190 | 191 | breakFirstSentence :: [TeXPara] -> (Sentence, [TeXPara]) 192 | breakFirstSentence (TeXPara [x] : z) = (x, z) 193 | breakFirstSentence (TeXPara (x:y) : z) = (x, TeXPara y : z) 194 | breakFirstSentence x = error $ "breakFirstSentence: " ++ show x 195 | 196 | assignInlineItem :: (MonadState Numbers m, MonadFix m) => Section -> RawItem -> m Item 197 | assignInlineItem s (RawItem label content) = do 198 | n <- get 199 | put n{nextSentenceNr = 1} 200 | content' <- assignNumbers s content 201 | let (Sentence _ x, y) = breakFirstSentence content' 202 | return $ Item Nothing (if null label then Nothing else Just label) x y 203 | 204 | endsWithFullStop :: [RawElement] -> Bool 205 | endsWithFullStop = isJust . breakSentence 206 | 207 | instance AssignNumbers RawElement Element where 208 | assignNumbers section RawFigure{..} = do 209 | Numbers{..} <- get 210 | put Numbers{figureNr = figureNr+1, ..} 211 | return $ FigureElement Figure 212 | { figureNumber = figureNr 213 | , figureName = rawFigureName 214 | , figureAbbr = "fig:" ++ rawFigureAbbr 215 | , figureSvg = rawFigureSvg 216 | , figureSection = section } 217 | assignNumbers section RawFormula{..} = do 218 | Numbers{..} <- get 219 | put Numbers{formulaNr = formulaNr + 1, ..} 220 | return $ FormulaElement Formula 221 | { formulaNumber = formulaNr 222 | , formulaAbbr = "eq:" ++ rawFormulaAbbr 223 | , formulaContent = rawFormulaContent 224 | , formulaSection = section } 225 | assignNumbers s RawTable{..} = do 226 | Numbers{..} <- get 227 | put Numbers{tableNr = tableNr+1, ..} 228 | tableCaption <- assignNumbers s rawTableCaption 229 | tableBody <- assignNumbers s rawTableBody 230 | return $ TableElement Table 231 | { tableNumber = tableNr 232 | , columnSpec = rawColumnSpec 233 | , tableAbbr = rawTableAbbr 234 | , tableCaption = tableCaption 235 | , tableSection = s 236 | , .. } 237 | assignNumbers s (RawEnumerated x p) = do 238 | origNum <- nextSentenceNr . get 239 | let c = length (filter (any (endsWithFullStop . rawTexParaElems) . rawItemContent) p) 240 | r <- mapM (if c > 1 then assignNonInlineItem s else assignInlineItem s) p 241 | modify $ \y -> y{nextSentenceNr = origNum} 242 | return $ Enumerated x r 243 | assignNumbers s (RawLatexElement x) = LatexElement . assignNumbers s x 244 | assignNumbers s (RawBnf x y) = Bnf x . assignNumbers s y 245 | assignNumbers _ (RawTabbing x) = return $ Tabbing x 246 | assignNumbers s (RawCodeblock x) = Codeblock . assignNumbers s x 247 | assignNumbers s (RawItemdescr x) = Itemdescr . assignNumbers s x 248 | assignNumbers s (RawNote label x) = do 249 | Numbers{..} <- get 250 | put Numbers{noteNr = noteNr+1, ..} 251 | x' <- assignNumbers s x 252 | return $ NoteElement $ Note noteNr label x' 253 | assignNumbers s (RawExample x) = do 254 | Numbers{..} <- get 255 | put Numbers{exampleNr = exampleNr+1, ..} 256 | x' <- assignNumbers s x 257 | return $ ExampleElement $ Example exampleNr x' 258 | 259 | instance AssignNumbers RawFootnote Footnote where 260 | assignNumbers s (RawFootnote t) = do 261 | Numbers{..} <- get 262 | put Numbers{footnoteNr = footnoteNr+1, nextSentenceNr = 1, ..} 263 | t' <- assignNumbers s t 264 | return $ Footnote{footnoteNumber=footnoteNr,footnoteContent=t'} 265 | 266 | lsectionLevel :: LinearSection -> Int 267 | lsectionLevel (lsectionKind -> NormalSection l) = l 268 | lsectionLevel (lsectionKind -> DefinitionSection l) = l 269 | lsectionLevel _ = 0 270 | 271 | paraNumbers :: [Bool] -> [Maybe Int] 272 | paraNumbers = f 1 273 | where 274 | f _ [] = [] 275 | f i (True : x) = Just i : f (i + 1) x 276 | f i (False : x) = Nothing : f i x 277 | 278 | treeizeChapters :: forall m . (Functor m, MonadFix m, MonadState Numbers m) => 279 | Bool -> Int -> [LinearSection] -> m [Section] 280 | treeizeChapters _ _ [] = return [] 281 | treeizeChapters annexes secNumber (LinearSection{..} : more) = mdo 282 | nums <- get 283 | put nums{formulaNr = 1} 284 | sectionFootnotes <- assignNumbers newSec lsectionFootnotes 285 | let 286 | ie = rawIndexEntriesForSec newSec 287 | newSec = Section{sectionKind=lsectionKind, secIndexEntries=ie, secIndexEntriesByPath=reverseIndexEntryMap ie, ..} 288 | let pn = paraNumbers $ paraNumbered . lsectionParagraphs 289 | paragraphs <- forM (zip pn lsectionParagraphs) $ assignNumbers newSec 290 | subsections <- treeizeSections 1 chapter [newSec] lsubsections 291 | (newSec :) . treeizeChapters annexes' (sectionNumber + 1) more' 292 | where 293 | sectionNumber = if annexes' /= annexes then 0 else secNumber 294 | annexes' = chapter /= NormalChapter 295 | parents = [] 296 | chapter 297 | | lsectionKind == InformativeAnnexSection = InformativeAnnex 298 | | lsectionKind == NormativeAnnexSection = NormativeAnnex 299 | | otherwise = NormalChapter 300 | abbreviation = lsectionAbbreviation 301 | sectionName = lsectionName 302 | (lsubsections, more') = span ((> 0) . lsectionLevel) more 303 | 304 | rawIndexEntriesForSec :: Section -> IntMap IndexEntry 305 | rawIndexEntriesForSec s = IntMap.fromList 306 | [(n, e) | e@IndexEntry{indexEntryNr=Just n} <- sectionIndexEntries s] 307 | 308 | reverseIndexEntryMap :: IntMap IndexEntry -> Map IndexPath [(Int, IndexEntry)] 309 | reverseIndexEntryMap m = Map.fromListWith (++) [(indexPath x, [(i, x)]) | (i, x) <- IntMap.assocs m] 310 | 311 | assignItemNumbers :: Paragraph -> Paragraph 312 | assignItemNumbers p 313 | | Just n <- paraNumber p = p{ paraElems = fst $ goParas [n, 1] $ paraElems p } 314 | | otherwise = p 315 | where 316 | 317 | goParas :: [Int] -> [TeXPara] -> ([TeXPara], [Int]) 318 | goParas nn [] = ([], nn) 319 | goParas nn (TeXPara e : pp) = first (TeXPara e' :) (goParas nn' pp) 320 | where (e', nn') = goSentences nn e 321 | 322 | goSentences :: [Int] -> [Sentence] -> ([Sentence], [Int]) 323 | goSentences nn [] = ([], nn) 324 | goSentences nn (Sentence m e : ss) = first (Sentence m e' :) (goSentences nn' ss) 325 | where (e', nn') = goElems nn e 326 | 327 | goElems :: [Int] -> [Element] -> ([Element], [Int]) 328 | goElems nn [] = ([], nn) 329 | goElems nn (e:ee) = first (e' :) (goElems nn' ee) 330 | where (e', nn') = goElem nn e 331 | 332 | goElem :: [Int] -> Element -> (Element, [Int]) 333 | goElem nn Enumerated{..} = (Enumerated enumCmd items', mapLast (+ length enumItems) nn) 334 | where 335 | items' = map (\(i, Item{..}) -> 336 | Item 337 | (Just (map show $ mapLast (+i) nn)) 338 | itemLabel 339 | (fst $ goElems (mapLast (+i) nn ++ [1]) itemInlineContent) 340 | (fst $ goParas (mapLast (+i) nn ++ [1]) itemBlockContent) 341 | ) (zip [0..] enumItems) 342 | goElem nn (NoteElement (Note nr label paras)) = (NoteElement (Note nr label paras'), nn') 343 | where (paras', nn') = goParas nn paras 344 | goElem nn (ExampleElement (Example nr paras)) = (ExampleElement (Example nr paras'), nn') 345 | where (paras', nn') = goParas nn paras 346 | goElem nn x = (x, nn) 347 | 348 | instance AssignNumbers (Maybe Int, RawParagraph) Paragraph where 349 | assignNumbers paraSection (paraNumber, RawParagraph{..}) = do 350 | nums <- get 351 | put nums{nextSentenceNr=if paraNumbered then 1 else nextSentenceNr nums} 352 | paraElems <- assignNumbers paraSection rawParaElems 353 | when paraNumbered $ modify $ \newnums -> newnums{nextSentenceNr = nextSentenceNr nums} 354 | return $ assignItemNumbers Paragraph 355 | { paraInItemdescr = rawParaInItemdescr 356 | , paraSourceLoc = rawParaSourceLoc 357 | , allParaElems = allElements paraElems 358 | , .. } 359 | 360 | treeizeSections :: forall m . (Functor m, MonadFix m, MonadState Numbers m) => 361 | Int -> Chapter -> [Section] -> [LinearSection] -> m [Section] 362 | treeizeSections _ _ _ [] = return [] 363 | treeizeSections sectionNumber chapter parents 364 | (s@LinearSection{..} : (span ((> lsectionLevel s) . lsectionLevel) -> (lsubsections, more'))) = mdo 365 | let 366 | ie = rawIndexEntriesForSec newSec 367 | newSec = Section 368 | { sectionKind = lsectionKind 369 | , secIndexEntries = ie 370 | , secIndexEntriesByPath = reverseIndexEntryMap ie 371 | , sectionName = lsectionName 372 | , abbreviation = lsectionAbbreviation 373 | , .. } 374 | let pn = paraNumbers $ paraNumbered . lsectionParagraphs 375 | nums <- get 376 | put nums{noteNr=1, exampleNr=1, itemDeclNr=1} 377 | sectionFootnotes <- assignNumbers newSec lsectionFootnotes 378 | modify $ \n -> n{nextSentenceNr=1} 379 | paragraphs <- forM (zip pn lsectionParagraphs) $ assignNumbers newSec 380 | subsections <- treeizeSections 1 chapter (newSec : parents) lsubsections 381 | (newSec :) . treeizeSections (sectionNumber + 1) chapter parents more' 382 | 383 | instance AssignNumbers a b => AssignNumbers [a] [b] where 384 | assignNumbers s = mapM (assignNumbers s) 385 | 386 | resolveGrammarterms :: Parser.Macros -> [Text] -> LinearSection -> LinearSection 387 | resolveGrammarterms macros links LinearSection{..} = 388 | LinearSection{lsectionParagraphs = map resolve lsectionParagraphs, ..} 389 | where 390 | resolveTexPara :: RawTexPara -> RawTexPara 391 | resolveTexPara RawTexPara{..} = RawTexPara{rawTexParaElems = map resolveRawElem rawTexParaElems, ..} 392 | resolveRawElem :: RawElement -> RawElement 393 | resolveRawElem (RawBnf s tex) = RawBnf s (bnfGrammarterms macros links tex) 394 | resolveRawElem (RawEnumerated s items) = RawEnumerated s (map resolveItem items) 395 | resolveRawElem y = y 396 | resolveItem :: RawItem -> RawItem 397 | resolveItem (RawItem label content) = RawItem label (map resolveTexPara content) 398 | resolve :: RawParagraph -> RawParagraph 399 | resolve RawParagraph{..} = RawParagraph{rawParaElems = map resolveTexPara rawParaElems, ..} 400 | 401 | bnfGrammarterms :: Parser.Macros -> [Text] -> LaTeX -> LaTeX 402 | bnfGrammarterms macros links = mapTeX go . mapTeX wordify 403 | where 404 | wordify :: LaTeXUnit -> Maybe LaTeX 405 | wordify (TeXRaw stuff) = Just $ map TeXRaw $ unfoldr f stuff 406 | where 407 | f s | Text.null s = Nothing 408 | f s | isName $ Text.head s = Just $ Text.span isName s 409 | f s = Just $ Text.break isName s 410 | 411 | isName c = isAlpha c || c `elem` ['-', '_'] 412 | wordify _ = Nothing 413 | 414 | go :: LaTeXUnit -> Maybe LaTeX 415 | go d@(TeXComm cmd _ _) | cmd `elem` ["tcode", "index", "textnormal", "indexlink", "hiddenindexlink", "indexedspan", "terminal", "literalterminal", "noncxxterminal"] = Just [d] 416 | go (TeXRaw name) 417 | | name `elem` links = Just $ fst $ RawDocument.doParse macros $ "\\grammarterm{" ++ name ++ "}" 418 | go _ = Nothing 419 | 420 | parseIndex :: LaTeX -> (IndexPath, Maybe IndexKind) 421 | parseIndex = go . mapTeXRaw unescapeIndexPath . concatRaws 422 | where 423 | go (texStripInfix "|seealso" -> Just (x, [TeXBraces y])) = (parseIndexPath x, Just $ See True y) 424 | go (texStripInfix "|see " -> Just (x, [TeXBraces y])) = (parseIndexPath x, Just $ See False y) 425 | go (texStripInfix "|see" -> Just (x, [TeXBraces y])) = (parseIndexPath x, Just $ See False y) 426 | go (texStripInfix "|(" -> Just (t, _)) = (parseIndexPath t, Just IndexOpen) 427 | go (texStripInfix "|)" -> Just (t, _)) = (parseIndexPath t, Just IndexClose) 428 | go (texStripInfix "|idxbfpage" -> Just (t, _)) = (parseIndexPath t, Just DefinitionIndexEntry) 429 | go t = (parseIndexPath t, Nothing) 430 | 431 | unescapeIndexPath :: Text -> LaTeXUnit 432 | unescapeIndexPath = TeXRaw 433 | . replace "\5" "\"" 434 | 435 | . replace "\2" "!" 436 | . replace "!" "\1" 437 | . replace "\"!" "\2" 438 | 439 | . replace "\4" "@" 440 | . replace "@" "\3" 441 | . replace "\"@" "\4" 442 | 443 | . replace "\"|" "|" 444 | . replace "\"\"" "\5" 445 | 446 | . (!! 10) . iterate (replace " " " ") 447 | . replace "\n" " " 448 | 449 | parseIndexPath :: LaTeX -> IndexPath 450 | parseIndexPath (texStripInfix "\1" -> Just (x, y)) = parseIndexPath x ++ parseIndexPath y 451 | parseIndexPath (texStripInfix "\3" -> Just (x, y)) = [IndexComponent x y] 452 | parseIndexPath t = [IndexComponent [] t] 453 | 454 | sectionTexParas :: Section -> [TeXPara] 455 | sectionTexParas s = (paragraphs s >>= paraElems) ++ (sectionFootnotes s >>= footnoteContent) 456 | 457 | sectionTex :: Section -> LaTeX 458 | sectionTex s = sectionTexParas s >>= texParaTex 459 | 460 | sectionIndexEntries :: Section -> [IndexEntry] 461 | sectionIndexEntries s = 462 | [ IndexEntry{indexEntrySection=abbreviation sec, ..} 463 | | sec <- sections s 464 | , [ (FixArg, [TeXRaw (Text.unpack -> read -> Just -> indexEntryNr)]) 465 | , (OptArg, [TeXRaw indexCategory]), (FixArg, (parseIndex -> (indexPath, indexEntryKind))) 466 | ] <- lookForCommand "index" (sectionTex sec)] 467 | 468 | sectionLabels :: Section -> [(Text, Section)] 469 | sectionLabels s = 470 | [ (label, sec) | sec <- sections s 471 | , [ (FixArg, [TeXRaw label]) ] <- lookForCommand "label" (sectionTex sec)] 472 | 473 | toIndex :: IndexEntry -> Index 474 | toIndex IndexEntry{..} = Map.singleton indexCategory $ go indexPath 475 | where 476 | go :: [IndexComponent] -> IndexTree 477 | go [c] = Map.singleton c (IndexNode [IndexEntry indexEntrySection indexEntryKind indexPath indexEntryNr indexCategory] Map.empty) 478 | go (c:cs) = Map.singleton c $ IndexNode [] $ go cs 479 | go _ = error "toIndex" 480 | 481 | trackPnums :: FilePath -> Text -> Text 482 | -- Replaces \pnum with \pnum{file}{line} 483 | trackPnums file = Text.pack . unlines . map (uncurry f) . zip [1..] . lines . Text.unpack 484 | where 485 | f :: Integer -> String -> String 486 | f lineNr line 487 | | Just (pre, post) <- stripInfix "\\pnum" line 488 | = pre ++ "\\pnum{" ++ file ++ "}{" ++ show lineNr ++ "}" ++ (if null post then "%" else post) 489 | | otherwise = line 490 | 491 | getFileList :: IO [FilePath] 492 | getFileList = 493 | (\\ ["front", "back"]) . 494 | map (Text.unpack . Text.dropEnd 1 . Text.drop (Text.length pre)) . 495 | filter (pre `isPrefixOf`) . 496 | Text.lines . readFile "std.tex" 497 | where pre = "\\include{" 498 | 499 | grabBnf :: [String] -> [String] 500 | grabBnf [] = [] 501 | grabBnf (line : rest) 502 | | "\\begin{bnf}" `List.isPrefixOf` line = 503 | let (x, end : more) = break ("\\end{bnf}" `List.isPrefixOf`) rest 504 | in ["", line] ++ x ++ [end] ++ grabBnf more 505 | | "\\gramSec" `List.isPrefixOf` line = ["", line] ++ grabBnf rest 506 | | otherwise = grabBnf rest 507 | 508 | generateStdGramExt :: [FilePath] -> IO Text 509 | generateStdGramExt files = 510 | Text.pack . unlines . grabBnf . lines . Text.unpack . 511 | Text.concat . mapM readFile ((++ ".tex") . files) 512 | 513 | importExampleFile :: FilePath -> IO Text 514 | importExampleFile = 515 | (Text.strip . 516 | Text.unlines . 517 | takeWhile (/= "\\end{document}") . 518 | tail . 519 | dropWhile (/= "\\begin{document}") . 520 | Text.lines .) . 521 | readFile 522 | 523 | importExamples :: Text -> Text 524 | importExamples x = case matchRegexAll r (Text.unpack x) of 525 | Nothing -> x 526 | Just (before, _match, after, subs) -> 527 | Text.pack before ++ 528 | unsafePerformIO (importExampleFile $ "assets/" ++ (subs !! 1) ++ ".tex") ++ 529 | importExamples (Text.pack after) 530 | where r = mkRegex "\\\\importexample(\\[[0-9a-zA-Z.-]*\\])?{([a-zA-Z0-9_-]*)}" 531 | 532 | parseFiles :: Parser.Macros -> IO ([LinearSection], Parser.Macros) 533 | parseFiles m = do 534 | files <- getFileList 535 | stdGramExt <- generateStdGramExt files 536 | let 537 | go [] macros = return ([], macros) 538 | go (c:cc) macros = do 539 | let p = c ++ ".tex" 540 | 541 | stuff <- 542 | importExamples . 543 | replace "multicolfloattable" "floattable" . 544 | replace "\\indeximpldef{" "\\index[impldefindex]{" . 545 | Text.unlines . 546 | indexCodeEnvs ["codeblock", "itemdecl"] . 547 | moveIndexEntriesIntoSecs . 548 | moveIndexEntriesIntoDefs . 549 | Text.lines . 550 | trackPnums p . 551 | replace "\\nodiffref\n\\change" "\n\\pnum\\textbf{Change:}\\space" . 552 | replace "\n\\diffref" "\n\\pnum\\nopnumdiffref" . 553 | -- Done here because (1) the real \nodiffref is defined with \def in a way 554 | -- we don't support yet, and (2) this way a source link is generated for the pnum. 555 | readFile p 556 | 557 | let extra = if c /= "grammar" then "" else replace "\\gramSec" "\\rSec1" stdGramExt 558 | let (r, macros') = parseFile macros (stuff ++ extra) 559 | if length r == 0 then undefined else 560 | first (r ++) . go cc (macros ++ macros') 561 | 562 | bib <- fst . parseFile m . 563 | fst . fromJust . 564 | textStripInfix "\\clearpage" . 565 | ("\\rSec0[bibliography]{Bibliography}\n" ++) . 566 | readFile "back.tex" 567 | 568 | first (++ bib) . go files m 569 | 570 | load14882 :: Text -> IO Draft 571 | load14882 extraMacros = do 572 | 573 | commitUrl <- getCommitUrl 574 | 575 | (macros@Parser.Macros{..}, took) <- measure (loadMacros extraMacros) 576 | putStrLn $ "Loaded macros in " ++ show (took * 1000) ++ "ms." 577 | 578 | (secs :: [LinearSection], took2) <- measure $ fst . parseFiles macros 579 | putStrLn $ "Parsed LaTeX in " ++ show (took2 * 1000) ++ "ms." 580 | 581 | xrefDelta <- loadXrefDelta 582 | 583 | (r, took3) <- measure $ if length (show secs) == 0 then undefined else do 584 | -- force eval before we leave the dir 585 | let 586 | grammarNames = [n | 587 | TeXComm "index" _ [ 588 | (OptArg, [TeXRaw "grammarindex"]) , 589 | (FixArg, [TeXRaw _ 590 | ,TeXComm "textcolor" "" [(FixArg,[TeXRaw "grammar-gray"]),(FixArg,[TeXComm "textsf" _ [(FixArg,[TeXComm "textit" "" [(FixArg,[TeXRaw n])]])]])] 591 | ,TeXRaw "|idxbfpage"] 592 | )] <- allUnits secs] 593 | 594 | secs' = map (resolveGrammarterms macros grammarNames) secs 595 | chapters = evalState (treeizeChapters False 1 secs') (Numbers 1 1 1 1 0 0 1 1 1 1) 596 | allEntries :: [IndexEntry] 597 | allEntries = chapters >>= sectionIndexEntries 598 | index = mergeIndices $ map toIndex allEntries 599 | indexEntryMap = IntMap.fromList [(n, e) | e@IndexEntry{indexEntryNr=Just n} <- allEntries] 600 | indexEntriesByPath = reverseIndexEntryMap indexEntryMap 601 | labels = Map.fromList $ chapters >>= sectionLabels 602 | 603 | abbrMap = makeAbbrMap dr 604 | dr = Draft{..} 605 | return dr 606 | 607 | putStrLn $ "Processed in " ++ show (took3 * 1000) ++ "ms." 608 | return r 609 | -------------------------------------------------------------------------------- /MathJax.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings, ViewPatterns #-} 2 | 3 | module MathJax (render) where 4 | 5 | import Control.Concurrent.MVar (takeMVar, putMVar, newMVar) 6 | import Data.Text (Text) 7 | import qualified Data.Text as Text 8 | import System.IO.Unsafe (unsafePerformIO) 9 | import System.Process (shell, CreateProcess(..), createProcess, StdStream(CreatePipe)) 10 | import System.IO (BufferMode(..), hGetLine, hPutStrLn, hSetBuffering) 11 | import Text.Regex (mkRegex, subRegex) 12 | import Prelude hiding ((++)) 13 | import Util ((++)) 14 | import qualified Data.Map as Map 15 | import Data.Map (Map) 16 | 17 | rmTrailingNewline :: Text -> Text 18 | rmTrailingNewline (Text.stripSuffix "\n" -> Just x) = x 19 | rmTrailingNewline x = x 20 | 21 | type Renderer = String {- formula -} -> Bool {- inline -} -> Text 22 | 23 | data Input = Input { _formula :: String, _inline :: Bool } 24 | deriving (Eq, Ord) 25 | 26 | makeRenderer :: IO Renderer 27 | makeRenderer = do 28 | 29 | (Just stdinPipe, Just stdoutPipe, _, _) <- createProcess (shell "./mathjax-batch") 30 | {std_in = CreatePipe, std_out = CreatePipe} 31 | 32 | hSetBuffering stdinPipe LineBuffering 33 | hSetBuffering stdoutPipe LineBuffering 34 | 35 | let 36 | rm r s = subRegex (mkRegex r) s "" 37 | readResult = do 38 | line <- hGetLine stdoutPipe 39 | if line == "DONE" 40 | then return "" 41 | else do 42 | more <- readResult 43 | return $ line ++ "\n" ++ more 44 | 45 | mutex <- newMVar (Map.empty :: Map Input Text) 46 | 47 | return $ \formula inline -> unsafePerformIO $ do 48 | let input = Input formula inline 49 | cache <- takeMVar mutex 50 | (result, cache') <- case Map.lookup input cache of 51 | Just output -> return (output, cache) 52 | Nothing -> do 53 | hPutStrLn stdinPipe formula 54 | hPutStrLn stdinPipe (if inline then "INLINE" else "NONINLINE") 55 | rawResult <- readResult 56 | let 57 | output 58 | = Text.replace " focusable=\"false\"" "" 59 | $ rmTrailingNewline -- Prevents artifacts in [rand.adapt.ibits]#4 60 | $ Text.pack 61 | $ rm " id=\"(MJXc|MathJax)-[0-9A-Za-z-]+\"" 62 | $ rm " style=\"\"" 63 | $ rawResult 64 | return (output, Map.insert input output cache) 65 | putMVar mutex cache' 66 | return result 67 | 68 | render :: Renderer 69 | render = unsafePerformIO $ makeRenderer 70 | -------------------------------------------------------------------------------- /Pages.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings, RecordWildCards, ViewPatterns #-} 2 | 3 | module Pages (fileContent, pageContent, pagePath, writePage, applyPageStyle, Link(..), outputDir, PageStyle(..)) where 4 | 5 | import Prelude hiding ((++), (.), writeFile) 6 | import System.Directory (createDirectoryIfMissing) 7 | import Control.Monad (when) 8 | import qualified Data.Text as Text 9 | import qualified Data.Text.Lazy as LazyText 10 | import qualified Data.Text.Lazy.Builder as TextBuilder 11 | import Util ((++), (.), Text, writeFile) 12 | 13 | outputDir :: FilePath 14 | outputDir = "14882/" 15 | 16 | data PageStyle = Bare | WithExtension | InSubdir 17 | deriving (Eq, Read) 18 | 19 | fileContent :: TextBuilder.Builder -> TextBuilder.Builder -> TextBuilder.Builder -> TextBuilder.Builder -> TextBuilder.Builder 20 | fileContent pathHome title extraHead body = 21 | "" ++ 22 | "" ++ 23 | "" ++ 24 | "" ++ title ++ "" ++ 25 | "" ++ 26 | "" ++ 27 | "" ++ 28 | "" ++ 29 | "" ++ 30 | "" ++ 31 | extraHead ++ 32 | "" ++ 33 | "
" ++ body ++ "
" ++ 34 | "" 35 | 36 | data Link = TocToSection | SectionToToc | SectionToSection 37 | deriving Show 38 | 39 | doLink :: PageStyle -> Link -> Text -> Text 40 | doLink sfs l = LazyText.toStrict . TextBuilder.toLazyText . go . Text.splitOn (Text.pack (show l) ++ "/") 41 | where 42 | go :: [Text] -> TextBuilder.Builder 43 | go (x : (Text.break (`elem` ("'#" :: String)) -> (a, b)) : z) = TextBuilder.fromText x ++ f (TextBuilder.fromText a) ++ go (b : z) 44 | go [x] = TextBuilder.fromText x 45 | go _ = undefined 46 | f :: TextBuilder.Builder -> TextBuilder.Builder 47 | f = case (sfs, l) of 48 | (Bare, SectionToToc) -> ("./#" ++) 49 | (Bare, TocToSection) -> id 50 | (Bare, SectionToSection) -> id 51 | (InSubdir, SectionToToc) -> ("../#" ++) 52 | (InSubdir, TocToSection) -> (++ "/") 53 | (InSubdir, SectionToSection) -> ("../" ++) 54 | (WithExtension, SectionToToc) -> ("index.html#" ++) 55 | (WithExtension, TocToSection) -> (++ ".html") 56 | (WithExtension, SectionToSection) -> (++ ".html") 57 | 58 | applyPageStyle :: PageStyle -> Text -> Text 59 | applyPageStyle sfs = 60 | doLink sfs SectionToSection 61 | . doLink sfs SectionToToc 62 | . doLink sfs TocToSection 63 | 64 | pagePath :: FilePath -> PageStyle -> String 65 | pagePath n Bare = outputDir ++ n 66 | pagePath n WithExtension = outputDir ++ n ++ ".html" 67 | pagePath n InSubdir = outputDir ++ n ++ "/index.html" 68 | 69 | pageContent :: PageStyle -> TextBuilder.Builder -> Text 70 | pageContent sfs content = applyPageStyle sfs $ LazyText.toStrict $ TextBuilder.toLazyText $ content 71 | 72 | writePage :: FilePath -> PageStyle -> Text -> IO () 73 | writePage n sfs content = do 74 | when (sfs == InSubdir) $ createDirectoryIfMissing True (outputDir ++ n) 75 | writeFile (pagePath n sfs) content 76 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | Introduction 2 | 3 | cxxdraft-htmlgen parses the LaTeX sources of the draft, 4 | and generates static HTML pages from them. 5 | 6 | Prerequisites 7 | 8 | - Git 9 | - The Haskell Platform (https://www.haskell.org/platform/) 10 | - Graphviz 11 | - Node.js 12 | - The 'split' NPM package 13 | - mathjax-node-cli (https://github.com/mathjax/mathjax-node-cli/) 14 | 15 | Usage 16 | 17 | Do: 18 | git clone https://github.com/Eelis/cxxdraft-htmlgen.git 19 | cd cxxdraft-htmlgen 20 | cabal build 21 | dist/build/cxxdraft-htmlgen/cxxdraft-htmlgen path/to/draft [sectionfilestyle] 22 | 23 | Or with stack: 24 | stack build 25 | stack exec cxxdraft-htmlgen path/to/draft [sectionfilestyle] 26 | 27 | The sectionfilestyle parameter is one of: 28 | 29 | Bare (to generate e.g. intro.execution) 30 | WithExtension (to generate e.g. intro.execution.html) 31 | InSubdir (to generate e.g. intro.execution/index.html) 32 | 33 | The default is WithExtension, since this is suitable 34 | for direct browsing on a filesystem without a web server. 35 | 36 | Bare may be used in conjunction with web server configuration 37 | specifying a default text/html mime type for the directory 38 | containing the section pages, to get URLs such as: 39 | 40 | temp.res#temp.dep 41 | temp.dep#3 42 | 43 | InSubdir only requires defaulting to index.html, to give: 44 | 45 | temp.res/#temp.dep 46 | temp.dep/#3 47 | 48 | Custom draft branch 49 | 50 | While cxxdraft-htmlgen works with the official draft sources as-is, 51 | better results can be obtained by using the following branch: 52 | 53 | https://github.com/Eelis/draft/tree/cxxdraft-htmlgen-fixes 54 | 55 | This branch tracks the official draft sources, but makes some changes to: 56 | - improve syntax highlighting 57 | - clean up hyperlinks 58 | - work around MathJax limitations 59 | - work around cxxdraft-htmlgen limitations 60 | 61 | Output 62 | 63 | The following will be created in ./14882/ : 64 | 65 | - index.html A table of contents with links to... 66 | 67 | - ~2300 interlinked section pages 68 | 69 | These are named after the section abbreviation, which for 70 | the Bare section file style look like: 71 | 72 | stmt.goto 73 | class.member.lookup 74 | cpp 75 | iterator.requirements.general 76 | locale.moneypunct.virtuals 77 | 78 | Since sections nest, content is duplicated at every level. 79 | This allows one to specify more or less context for a given 80 | citation. For example, one can link to: 81 | 82 | basic.scope.hiding (section 6.4.10 "Name hiding" on 83 | a page of its own) 84 | 85 | basic.scope#hiding (the same section highlighted on 86 | the page for section 6.4 "Scope") 87 | 88 | basic#scope.hiding (the same section highlighted on 89 | the page for chapter 6 "Basics") 90 | 91 | - full The entire document (~24 mbyte, or ~2 mbyte compressed). 92 | 93 | - 14882.css Used by all of the above. 94 | 95 | Hidden links 96 | 97 | On any page: 98 | 99 | - defined terms/concepts/nonterminals are links that select themselves; 100 | 101 | - a full stop at the end of a sentence is a link that selects the sentence; 102 | 103 | - moving the mouse over the right margin of a numbered paragraph reveals a link 104 | to the LaTeX source for that paragraph; 105 | 106 | - moving the mouse over the left margin of an itemdecl or table row reveals a link 107 | that selects it. 108 | -------------------------------------------------------------------------------- /RawDocument.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE 2 | OverloadedStrings, 3 | RecordWildCards, 4 | ViewPatterns, 5 | LambdaCase, 6 | TupleSections, 7 | NamedFieldPuns, 8 | FlexibleInstances, 9 | FlexibleContexts, 10 | RankNTypes, 11 | MultiParamTypeClasses, 12 | FunctionalDependencies, 13 | UndecidableInstances, 14 | RecursiveDo #-} 15 | 16 | module RawDocument 17 | ( RawElement(..), RawTexPara(..), RawFootnote(..), RawParagraph(..), LinearSection(..), RawItem(..) 18 | , loadMacros, parseFile, loadXrefDelta, doParse) where 19 | 20 | import qualified LaTeXParser as Parser 21 | import qualified Data.Text as Text 22 | import Data.Text (Text, replace) 23 | import Document (Row(..), SourceLocation(..), RowSepKind(..), SectionKind(..), Cell(..), CellSpan(..), XrefDelta, Abbreviation, ColumnSpec(..), TextAlignment(..)) 24 | import Data.Maybe (isJust, fromJust) 25 | import LaTeXParser (Macros(..), Signature(..), nullCmd, storeCmd, storeEnv, Environment(..), Command(..), codeEnv, Token(..), normalCmd, ParseResult(..)) 26 | import Data.Text.IO (readFile) 27 | import Text.Regex (mkRegex) 28 | import qualified Data.Map as Map 29 | import Data.Map (Map) 30 | import Data.List (transpose, take, isPrefixOf) 31 | import Util ((.), (++), mapHead, textStripInfix, textSubRegex, splitOn) 32 | import Prelude hiding (take, (.), takeWhile, (++), lookup, readFile) 33 | import System.IO.Unsafe (unsafePerformIO) 34 | import System.Process (readProcess) 35 | import Control.Arrow (first) 36 | import Data.Char (isSpace, isDigit) 37 | import LaTeXBase 38 | 39 | data RawItem = RawItem 40 | { rawItemLabel :: LaTeX 41 | , rawItemContent :: [RawTexPara] } 42 | deriving (Eq, Show) 43 | 44 | data RawElement 45 | = RawLatexElement LaTeXUnit 46 | | RawEnumerated String [RawItem] 47 | | RawCodeblock LaTeXUnit 48 | | RawExample [RawTexPara] 49 | | RawNote Text [RawTexPara] 50 | | RawItemdescr [RawTexPara] 51 | | RawBnf String LaTeX 52 | | RawTable 53 | { rawTableCaption :: LaTeX 54 | , rawColumnSpec :: [ColumnSpec] 55 | , rawTableAbbr :: Abbreviation 56 | , rawTableBody :: [Row [RawTexPara]] } 57 | | RawTabbing LaTeX 58 | | RawFormula { rawFormulaAbbr :: Abbreviation, rawFormulaContent :: LaTeX } 59 | | RawFigure { rawFigureName :: LaTeX, rawFigureAbbr :: Abbreviation, rawFigureSvg :: Text } 60 | deriving (Eq, Show) 61 | 62 | newtype RawTexPara = RawTexPara { rawTexParaElems :: [RawElement] } 63 | deriving (Eq, Show) 64 | 65 | newtype RawFootnote = RawFootnote [RawTexPara] 66 | deriving Show 67 | 68 | data RawParagraph = RawParagraph 69 | { paraNumbered :: Bool 70 | , rawParaInItemdescr :: Bool 71 | , rawParaElems :: [RawTexPara] 72 | , rawParaSourceLoc :: Maybe SourceLocation } 73 | deriving Show 74 | 75 | data LinearSection = LinearSection 76 | { lsectionAbbreviation :: Abbreviation 77 | , lsectionKind :: SectionKind 78 | , lsectionName :: LaTeX 79 | , lsectionParagraphs :: [RawParagraph] 80 | , lsectionFootnotes :: [RawFootnote] } 81 | deriving Show 82 | 83 | instance AllUnits RawElement where 84 | allUnits (RawLatexElement x) = allUnits x 85 | allUnits (RawBnf _ x) = allUnits x 86 | allUnits (RawTabbing x) = allUnits x 87 | allUnits (RawNote _ x) = allUnits x 88 | allUnits (RawExample x) = allUnits x 89 | allUnits (RawCodeblock x) = allUnits x 90 | allUnits (RawItemdescr x) = allUnits x 91 | allUnits (RawEnumerated _ x) = allUnits x 92 | allUnits (RawFormula _ x) = allUnits x 93 | allUnits RawFigure{} = [] 94 | allUnits RawTable{..} = allUnits rawTableCaption ++ concatMap (allUnits . concat . map content) (map cells rawTableBody) 95 | 96 | instance AllUnits RawTexPara where 97 | allUnits = allUnits . rawTexParaElems 98 | 99 | instance AllUnits RawItem where 100 | allUnits RawItem{..} = allUnits rawItemLabel ++ allUnits rawItemContent 101 | 102 | instance AllUnits LinearSection where 103 | allUnits LinearSection{..} = allUnits lsectionName ++ allUnits lsectionParagraphs ++ allUnits lsectionFootnotes 104 | 105 | instance AllUnits RawParagraph where 106 | allUnits RawParagraph{..} = allUnits rawParaElems 107 | 108 | instance AllUnits RawFootnote where 109 | allUnits (RawFootnote x) = allUnits x 110 | 111 | bnfEnvs :: [String] 112 | bnfEnvs = ["bnf", "ncbnf", "bnfkeywordtab", "simplebnf", "ncsimplebnf", "ncrebnf"] 113 | 114 | isBnf :: LaTeXUnit -> Bool 115 | isBnf (TeXEnv s _ _) 116 | | s `elem` bnfEnvs = True 117 | isBnf _ = False 118 | 119 | isTable, isTabbing, isFigure :: LaTeXUnit -> Bool 120 | isTable x = isTeXEnv "floattablebasex" x || isTeXEnv "htmlTable" x 121 | isTabbing = isTeXEnv "tabbing" 122 | isFigure = isTeXEnv "importgraphic" 123 | 124 | isEnumerate :: LaTeXUnit -> Maybe String 125 | isEnumerate (TeXEnv s _ _) 126 | | s `elem` ["enumerate", "itemize", "description", "thebibliography"] = Just s 127 | isEnumerate _ = Nothing 128 | 129 | isParaEnd :: LaTeXUnit -> Bool 130 | isParaEnd (TeXEnv "itemdecl" _ _) = True 131 | isParaEnd (TeXEnv "indexeditemdecl" _ _) = True 132 | isParaEnd (TeXEnv "itemdescr" _ _) = True 133 | isParaEnd (TeXComm "pnum" _ _) = True 134 | isParaEnd x = isParasEnd x 135 | 136 | isParasEnd :: LaTeXUnit -> Bool 137 | isParasEnd (TeXComm "definition" _ _) = True 138 | isParasEnd (TeXComm "rSec" _ _) = True 139 | isParasEnd (TeXComm "infannex" _ _) = True 140 | isParasEnd (TeXComm "normannex" _ _) = True 141 | isParasEnd _ = False 142 | 143 | isJunk :: LaTeXUnit -> Bool 144 | isJunk (TeXRaw x) = all isSpace (Text.unpack x) 145 | isJunk (TeXComm "index" _ _) = True 146 | isJunk (TeXComm "setlength" _ _) = True 147 | isJunk _ = False 148 | 149 | isItem :: LaTeXUnit -> Maybe LaTeX 150 | isItem (TeXComm "item" _ []) = Just [] 151 | isItem (TeXComm "item" _ [(_, label)]) = Just label 152 | isItem (TeXComm "bibitem" _ [(_, [TeXRaw label])]) = Just [TeXRaw $ "bib:" ++ label] 153 | isItem _ = Nothing 154 | 155 | parseItems :: LaTeX -> [RawItem] 156 | parseItems [] = [] 157 | parseItems (x : rest) 158 | | isJunk x = mapHead (mapItemContent (mapHead addJunk)) (parseItems rest) 159 | | Just label <- isItem x, (item, rest') <- break (isJust . isItem) rest = 160 | RawItem label (parsePara item) : parseItems rest' 161 | where 162 | mapItemContent f (RawItem l c) = RawItem l (f c) 163 | addJunk :: RawTexPara -> RawTexPara 164 | addJunk (RawTexPara z) = RawTexPara (dropWhile isOnlySpace $ RawLatexElement x : z) 165 | parseItems _ = error "need items or nothing" 166 | 167 | doParse :: Macros -> Text -> (LaTeX, Macros) 168 | doParse m t = (x, y) 169 | where 170 | (x, y, []) = Parser.parseString ctx (Text.unpack t) 171 | ctx = initialContext{Parser.macros=m} 172 | 173 | nullCmds :: [(Int, String)] 174 | nullCmds = 175 | [ (0, "clearpage kill rmfamily hfill vfill nocorr small larger noindent itcorrwidth itletterwidth global") 176 | , (1, "enlargethispage lstset newsavebox vspace input") 177 | , (2, "glossary settowidth addtolength") 178 | , (3, "definecolor") 179 | ] 180 | 181 | storeCmds :: [(Int, String)] 182 | storeCmds = 183 | [ (0, "today def makeatletter bottomline makeatother Sec bmod mod long prime " ++ 184 | "chapter section paragraph subparagraph fi otextup linebreak newpage log " ++ 185 | "textup edef x BnfIndent par leq " ++ 186 | "leftmargini BnfInc BnfRest protect caret sum " ++ 187 | "xspace onelineskip textlangle textrangle tilde raggedright = " ++ 188 | "space copyright textregistered textbackslash hsize br Gamma " ++ 189 | "frenchspacing list leftmargin listparindent itemindent itshape relax " ++ 190 | "nonfrenchspacing endlist upshape ttfamily baselineskip nobreak " ++ 191 | "endfirsthead quad qquad cdot cdots dotsc bnfindentinc footnotemark ldots capsep max min " ++ 192 | "continuedcaption hline endhead footnotesize le times dotsb rightarrow to equiv " ++ 193 | "lfloor rfloor pi geq neq ge lceil rceil ell alpha bigl bigr mu lambda beta " ++ 194 | "tabularnewline exp sigma big delta rho Pi nu infty displaystyle lim sin cos " ++ 195 | "phi int theta zeta FlushAndPrintGrammar break backslash centering " ++ 196 | "normalbaselineskip land lor mapsto normalfont textmu tablerefname figurerefname newline " ++ 197 | "obeyspaces bnfindent vdots tabcolsep columnbreak emergencystretch commentellip " ++ 198 | "gamma widowpenalties sffamily parskip left right `") 199 | , (1, "hspace footnote textit textrm textnormal texttt textbf ensuremath ref ref* mbox bibitem mathop " ++ 200 | "terminal literalterminal noncxxterminal textsl textsc textsf text term overline " ++ 201 | "tcode noncxxtcode literaltcode footnotetext microtypesetup cline mathtt mathit mathrm mathsf " ++ 202 | "label newlength uline value newcounter mathscr c uppercase iref operatorname " ++ 203 | "phantom hphantom sqrt ln emph minipage url indexescape changeglossnumformat textasciitilde " ++ 204 | "removedxref deprxref textsuperscript rlap mathrel mathbin nopnumdiffref color ucode uname") 205 | , (2, "pnum definition addtocounter setcounter frac " ++ 206 | "binom infannex normannex parbox link weblink indexedspan movedxref movedxrefs " ++ 207 | "equal setlength textcolor") 208 | , (3, "multicolumn discretionary movedxrefii ifthenelse PackageError NewEnviron") 209 | , (4, "movedxrefiii indexlink hiddenindexlink") 210 | ] 211 | 212 | initialCmds :: Map Text Command 213 | initialCmds = Map.fromList $ 214 | [ storeCmd "item" (Signature 0 (Just [])) 215 | , storeCmd "caption" (Signature 2 (Just [])) 216 | , storeCmd "index" (Signature 2 (Just [])) 217 | , storeCmd "hyperref" (Signature 2 (Just [])) 218 | , nullCmd "makebox" (Signature 2 (Just [])) 219 | , storeCmd "\n" (Signature 0 Nothing) 220 | , storeCmd "nolinebreak" (Signature 0 (Just [])) 221 | , storeCmd "textsmaller" (Signature 2 (Just [])) 222 | , nullCmd "gramSec" (Signature 2 (Just [])) 223 | , ("kern", normalCmd $ Command $ \_ctx _ws -> ParseResult [] mempty . snd . parseDimen) 224 | ] 225 | ++ [storeCmd c (Signature a Nothing) | (a, l) <- storeCmds, c <- words l] 226 | ++ [nullCmd (Text.pack c) (Signature a Nothing) | (a, l) <- nullCmds, c <- words l] 227 | 228 | parseDimen :: [Token] -> ([Token], [Token]) 229 | parseDimen toks 230 | | t@(Token txt) : more <- toks, txt `elem` [".", "pt", "-", "em"] || all isDigit txt = first (t :) (parseDimen more) 231 | | otherwise = ([], toks) 232 | 233 | initialEnvs :: Map Text Environment 234 | initialEnvs = Map.fromList $ 235 | [ (storeEnv e (Signature 0 Nothing)) 236 | | e <- bnfEnvs ++ 237 | words "indented description itemize center tabbing defnote enumerate eqnarray* equation* itemdescr footnote matrix" 238 | ] ++ 239 | [ storeEnv "example" (Signature 1 (Just [])) 240 | , storeEnv "tailexample" (Signature 1 (Just [])) 241 | , storeEnv "note" (Signature 0 (Just [Token "Note"])) 242 | , storeEnv "tailnote" (Signature 0 (Just [Token "Note"])) 243 | , storeEnv "table" (Signature 1 Nothing) 244 | , storeEnv "tabular" (Signature 1 Nothing) 245 | , storeEnv "longtable" (Signature 1 Nothing) 246 | , storeEnv "importgraphic" (Signature 3 Nothing) 247 | , storeEnv "formula" (Signature 1 Nothing) 248 | , storeEnv "minipage" (Signature 1 Nothing) 249 | , storeEnv "thebibliography" (Signature 1 Nothing) 250 | , codeEnv "indexeditemdecl" (Signature 1 Nothing) 251 | , codeEnv "itemdecl" (Signature 0 Nothing) 252 | , codeEnv "indexedcodeblock" (Signature 1 Nothing) 253 | , codeEnv "codeblock" (Signature 0 Nothing) 254 | , codeEnv "codeblockdigitsep" (Signature 0 Nothing) 255 | , codeEnv "codeblocktu" (Signature 1 Nothing) 256 | , storeEnv "array" (Signature 1 Nothing) 257 | , storeEnv "floattablebasex" (Signature 4 Nothing) 258 | , storeEnv "htmlTable" (Signature 3 Nothing) 259 | ] 260 | 261 | initialMacros :: Parser.Macros 262 | initialMacros = Parser.defaultMacros ++ mempty{Parser.commands=initialCmds, Parser.environments=initialEnvs} 263 | 264 | initialContext :: Parser.Context 265 | initialContext = Parser.defaultContext{Parser.macros=initialMacros} 266 | 267 | parseFile :: Macros -> Text -> ([LinearSection], Macros) 268 | parseFile macros = 269 | first (parseSections 0) 270 | . doParse macros 271 | . replace "$$" "$" 272 | . replace "\\hspace*" "\\hspace" 273 | . replace "``" "“" 274 | . textSubRegex (mkRegex "(\\grammarterm\\{[A-Za-z-]*\\})(\\{s\\}|s)") "\\1\\textit{s}" 275 | -- Mixing italic and upright looks okay in the PDF, but looks bad in browsers, 276 | -- and our linkification makes clear enough that the plural 's' is not part 277 | -- of the grammarterm. 278 | 279 | loadFigure :: Text -> Text 280 | loadFigure f = unsafePerformIO $ do 281 | dot <- readFile $ "assets/" ++ p 282 | svg <- readProcess "dot" ["-Tsvg", 283 | "-Gbgcolor=transparent", 284 | "-Gsize=8", 285 | "-Nfontsize=10", 286 | "-Gfontsize=10", 287 | "-Efontsize=10", 288 | "-Nfontname=Noto Serif", 289 | "-Efontname=Noto Serif", 290 | "-Gfontname=Noto Serif"] (Text.unpack $ Text.replace "Courier New" "Noto Sans Mono" $ Text.replace ", fontsize=24" "" dot) 291 | return $ rmIds $ snd $ Text.breakOn " Bool 300 | isOnlySpace (RawLatexElement x) = triml [x] == [] 301 | isOnlySpace _ = False 302 | 303 | parsePara :: LaTeX -> [RawTexPara] 304 | parsePara u = RawTexPara . dropWhile isOnlySpace . fmap f . splitElems (trim (filter (not . kill) u)) 305 | where 306 | kill (TeXComm "hline" _ []) = True 307 | kill (TeXComm "capsep" _ []) = True 308 | kill (TeXComm "endhead" _ _) = True 309 | kill _ = False 310 | f :: LaTeXUnit -> RawElement 311 | f e@(TeXEnv k a stuff) 312 | | isFigure e 313 | , [(FixArg, rawFigureName), (FixArg, [TeXRaw rawFigureAbbr]), (FixArg, [TeXRaw figureFile])] <- a 314 | = RawFigure{rawFigureSvg=loadFigure figureFile, ..} 315 | | k == "formula", [(FixArg, [TeXRaw rawFormulaAbbr])] <- a = RawFormula{rawFormulaContent = stuff, ..} 316 | | isTable e 317 | , ((_, cap) : (_, [TeXRaw abbr]) : (_, y) : _) <- a 318 | = RawTable 319 | { rawTableCaption = cap 320 | , rawColumnSpec = parseColspec y 321 | , rawTableAbbr = "tab:" ++ abbr 322 | , rawTableBody = breakMultiCols $ parseTable stuff } 323 | | isTable e = error $ "other table: " ++ show e 324 | | isTabbing e = RawTabbing stuff 325 | | isBnf e = RawBnf (if "nc" `isPrefixOf` k then drop 2 k else k) stuff 326 | | Just ek <- isEnumerate e = RawEnumerated ek (parseItems stuff) 327 | | isCodeblock e = RawCodeblock e 328 | | k `elem` ["note", "defnote", "tailnote"] = 329 | let label = case a of [(FixArg, [TeXRaw x])] -> x; _ -> "Note" 330 | in RawNote label $ parsePara stuff 331 | | k `elem` ["example", "tailexample"] = RawExample $ parsePara stuff 332 | | k == "itemdecl" || k == "minipage" || k == "indexeditemdecl" = RawLatexElement e 333 | | k == "itemdescr" = RawItemdescr $ parsePara stuff 334 | f x = RawLatexElement x 335 | splitElems :: LaTeX -> [LaTeX] 336 | splitElems [] = [] 337 | splitElems (x:xs) 338 | | TeXRaw (textStripInfix "\n\n" -> Just (a, (Text.stripStart -> b))) <- x = 339 | (if a == "" then ([] :) else ([TeXRaw a] :)) $ 340 | splitElems (if b == "" then xs else TeXRaw b : xs) 341 | | otherwise = case splitElems xs of 342 | [] -> [[x]] 343 | a:b -> ((x:a):b) 344 | 345 | class ExtractFootnotes a where extractFootnotes :: a -> (a, [RawFootnote]) 346 | 347 | instance ExtractFootnotes LaTeX where 348 | extractFootnotes [] = ([], []) 349 | extractFootnotes (TeXRaw x : t@(TeXEnv "footnote" _ _ : _)) 350 | = (TeXRaw (Text.stripEnd x) : t', ft) 351 | where (t', ft) = extractFootnotes t 352 | -- stripEnd here implements the footnote's \unskip 353 | extractFootnotes (h:t) = (h' : t', fh ++ ft) 354 | where 355 | (h', fh) = extractFootnotes h 356 | (t', ft) = extractFootnotes t 357 | 358 | instance ExtractFootnotes LaTeXUnit where 359 | extractFootnotes (TeXEnv "footnote" [] content) = 360 | (TeXComm "footnoteref" "" [], [RawFootnote $ parsePara content]) 361 | extractFootnotes (TeXComm "footnotemark" _ []) = 362 | (TeXComm "footnoteref" "" [], []) 363 | extractFootnotes (TeXComm "footnotetext" _ [(_, content)]) = 364 | (TeXRaw "" {- todo.. -}, [RawFootnote $ parsePara content]) 365 | extractFootnotes (TeXComm a ws [(FixArg, content)]) = 366 | first (\c -> TeXComm a ws [(FixArg, c)]) (extractFootnotes content) 367 | extractFootnotes (TeXEnv env args content) = first (TeXEnv env args) (extractFootnotes content) 368 | extractFootnotes other = (other, []) 369 | 370 | parseParas :: LaTeX -> ([RawParagraph], [RawFootnote], LaTeX {- rest -}) 371 | parseParas (break isParasEnd -> (extractFootnotes -> (stuff, fs), rest)) 372 | = (collectParas stuff, fs, rest) 373 | where 374 | collectParas :: LaTeX -> [RawParagraph] 375 | collectParas (t@(TeXEnv "indexeditemdecl" _ _) : more) = 376 | RawParagraph False False (parsePara [t]) Nothing : collectParas more 377 | collectParas (t@(TeXEnv "itemdecl" _ _) : more) = 378 | RawParagraph False False (parsePara [t]) Nothing : collectParas more 379 | collectParas (TeXEnv "itemdescr" _ desc : more) = 380 | map (\p -> p{rawParaInItemdescr=True}) (collectParas desc) 381 | ++ collectParas more 382 | collectParas (TeXComm "pnum" _ 383 | [ (FixArg, [TeXRaw (Text.unpack -> file)]) 384 | , (FixArg, [TeXRaw (Text.unpack -> read -> lineNr)])] : more) = 385 | (\(p : x) -> p{paraNumbered=True, rawParaSourceLoc=Just (SourceLocation file lineNr)} : x) 386 | (collectParas more) 387 | collectParas (TeXComm "pnum" _ [] : more) = 388 | (\(p : x) -> p{paraNumbered=True, rawParaSourceLoc=Nothing} : x) 389 | (collectParas more) 390 | collectParas [] = [] 391 | collectParas x = (if null p then id else (RawParagraph False False p Nothing :)) (collectParas more) 392 | where (parsePara -> p, more) = break isParaEnd x 393 | 394 | parseSections :: Int -> LaTeX -> [LinearSection] 395 | parseSections level 396 | (TeXComm c _ args : (parseParas -> (lsectionParagraphs, lsectionFootnotes, more))) 397 | | ((FixArg, isJustRaw -> fromJust -> lsectionAbbreviation), (FixArg, lsectionName), lsectionKind, level') <- case (c, args) of 398 | ("normannex", [abbr, name]) -> (abbr, name, NormativeAnnexSection, level) 399 | ("infannex", [abbr, name]) -> (abbr, name, InformativeAnnexSection, level) 400 | ("definition", [name, abbr]) -> (abbr, name, DefinitionSection (level + 1), level) 401 | ("rSec", [(FixArg, [TeXRaw (Text.unpack -> read -> l)]), abbr, name]) -> 402 | (abbr, name, NormalSection l, l) 403 | _ -> error $ "not a section command: " ++ show (c, args) 404 | = LinearSection{..} : parseSections level' more 405 | parseSections _ [] = [] 406 | parseSections l (x:xx) 407 | | TeXRaw t <- x, all isSpace (Text.unpack t) = parseSections l xx 408 | | otherwise = error $ "parseSections: " ++ show x 409 | 410 | parseTable :: LaTeX -> [Row [RawTexPara]] 411 | parseTable latex 412 | | triml latex == [] = [] 413 | | triml row == [] = parseTable $ tail rest 414 | | hasCommand (== "endfirsthead") row = parseTable $ findEndHead rest 415 | | hasCommand (`elem` ["caption", "bottomline"]) row = parseTable rest 416 | | otherwise = makeRow row : parseTable rest 417 | where 418 | (row, rest) = break (== TeXLineBreak) latex 419 | findEndHead l 420 | | row' == [] = findEndHead $ tail rest' 421 | | hasCommand (== "endhead") row' = l 422 | | otherwise = findEndHead rest' 423 | where 424 | (row', rest') = break (== TeXLineBreak) l 425 | 426 | columnBreakCell :: Cell [RawTexPara] 427 | columnBreakCell = Cell Normal [RawTexPara [RawLatexElement (TeXComm "columnbreak" "" [])]] 428 | isColumnBreakCell :: Cell [RawTexPara] -> Bool 429 | isColumnBreakCell (Cell Normal [RawTexPara [RawLatexElement (TeXComm "columnbreak" _ [])]]) = True 430 | isColumnBreakCell _ = False 431 | 432 | makeRectangular :: a -> [[a]] -> [[a]] 433 | makeRectangular filler rows = (take numCols . (++ repeat filler)) . rows 434 | where numCols = maximum (length . rows) 435 | -- Todo: Remove this when the bugs in Chrome's collapsed border rendering are fixed. 436 | 437 | breakMultiCols :: [Row [RawTexPara]] -> [Row [RawTexPara]] 438 | -- implements the multicolfloattable environment's \columnbreak, which is left intact by parseTable 439 | breakMultiCols rows 440 | | all (\Row{..} -> length cells == 1 && rowSep == NoSep) rows = 441 | Row NoSep . makeRectangular (Cell Normal []) (transpose $ splitOn isColumnBreakCell $ separateColumnBreaks $ (head . cells) . rows) 442 | | otherwise = rows 443 | where 444 | separateColumnBreaks :: [Cell [RawTexPara]] -> [Cell [RawTexPara]] 445 | separateColumnBreaks = concatMap f 446 | where 447 | f :: Cell [RawTexPara] -> [Cell [RawTexPara]] 448 | f c@Cell{..} | [RawTexPara (RawLatexElement (TeXComm "columnbreak" _ []) : rest)] <- content = 449 | [columnBreakCell, c{content = [RawTexPara rest]}] 450 | | otherwise = [c] 451 | 452 | makeRow :: LaTeX -> Row [RawTexPara] 453 | makeRow l = Row sep $ makeRowCells l 454 | where 455 | sep 456 | | hasCommand (== "hline") l = RowSep 457 | | hasCommand (== "capsep") l = CapSep 458 | | hasCommand (== "cline") l = Clines $ clines $ lookForCommand "cline" l 459 | | otherwise = NoSep 460 | 461 | clines [] = [] 462 | clines (([(FixArg, [TeXRaw c])]) : rest) = (begin, end) : clines rest 463 | where 464 | (begin', end') = Text.breakOn "-" c 465 | begin = read $ Text.unpack begin' :: Int 466 | end = read $ Text.unpack $ Text.tail end' :: Int 467 | clines other = error $ "Unexpected \\clines syntax: " ++ show other 468 | 469 | parseWidth :: LaTeX -> (Maybe Text, LaTeX) 470 | parseWidth (TeXRaw "" : x) = parseWidth x 471 | parseWidth (TeXBraces [TeXRaw x] : rest) = (Just x, rest) 472 | parseWidth (TeXBraces [TeXRaw x, TeXComm "hsize" "" []] : rest) = 473 | (Just $ Text.pack (show (round ((read ("0" ++ Text.unpack x) :: Double) * 100) :: Int)) ++ "%", rest) 474 | parseWidth (TeXBraces _ : rest) = (Nothing, rest) -- remaining cases unsupported for now 475 | parseWidth x = (Nothing, x) 476 | 477 | parseColspec :: LaTeX -> [ColumnSpec] 478 | parseColspec = \x -> case x of 479 | [] -> [] 480 | TeXRaw (Text.unpack -> '|' : z) : y -> go (TeXRaw (Text.pack z) : y) 481 | _ -> go x 482 | where 483 | go :: LaTeX -> [ColumnSpec] 484 | go [] = [] 485 | go [TeXRaw "|"] = [] 486 | go (TeXRaw "@" : TeXBraces _ : x) = go x -- unimplemented 487 | go (TeXRaw ">" : TeXBraces _ : x) = go x -- unimplemented 488 | go (TeXRaw "" : y) = go y 489 | go (TeXRaw (Text.uncons -> Just (letter, rest)) : y) 490 | | letter == ' ' = go (TeXRaw rest : y) 491 | | letter == '|' = mapHead (\(ColumnSpec x _ z) -> ColumnSpec x True z) $ go (TeXRaw rest : y) 492 | | otherwise = 493 | let (w, rest') = parseWidth (TeXRaw rest : y) 494 | in ColumnSpec (colClass letter) False w : go rest' 495 | go x = error ("parseColspec: " ++ show x) 496 | 497 | colClass :: Char -> TextAlignment 498 | colClass x | x `elem` ['l', 'm', 'x'] = AlignLeft 499 | colClass 'p' = Justify 500 | colClass 'r' = AlignRight 501 | colClass 'c' = AlignCenter 502 | colClass other = error $ "Unexpected column type " ++ (other : []) 503 | 504 | makeRowCells :: LaTeX -> [Cell [RawTexPara]] 505 | makeRowCells [] = [] 506 | makeRowCells latex = 507 | case rest of 508 | [] -> [makeCell cell] 509 | _ : r -> 510 | (makeCell $ cell <> [TeXRaw cell']) : makeRowCells (TeXRaw rest'' : r) 511 | where 512 | (cell, rest) = break isColEnd latex 513 | isColEnd (TeXRaw c) = isJust $ Text.find (== '&') c 514 | isColEnd _ = False 515 | 516 | (cell', rest') = Text.break (== '&') $ getText rest 517 | rest'' = Text.drop 1 rest' 518 | getText (TeXRaw s : _) = s 519 | getText other = error $ "Didn't expect " ++ show other 520 | 521 | makeCell content 522 | | [[(FixArg, [TeXRaw w]), (FixArg, cs), (FixArg, content')]] <- lookForCommand "multicolumn" content = 523 | Cell (Multicolumn (read $ Text.unpack w) (head $ parseColspec cs)) $ parsePara content' 524 | | otherwise = 525 | Cell Normal $ parsePara content 526 | 527 | rmExplSyntax :: Text -> Text 528 | rmExplSyntax = Text.unlines . f . Text.lines 529 | where 530 | f [] = [] 531 | f ("\\ExplSyntaxOn" : (dropWhile (/= "\\ExplSyntaxOff") -> (_ : x))) = f x 532 | f (h : t) = h : f t 533 | 534 | loadMacros :: Text -> IO Macros 535 | loadMacros extraMacros = 536 | (initialMacros ++) 537 | . snd 538 | . doParse initialMacros 539 | . replace "\\indeximpldef{" "\\index[impldefindex]{" 540 | . textSubRegex (mkRegex "\\\\penalty[0-9]+{}") "" 541 | . textSubRegex (mkRegex "\\\\verbtocs{[\\a-zA-Z]*}\\|[^|]*\\|") "" 542 | . rmExplSyntax 543 | . (++ extraMacros) 544 | . mconcat 545 | . mapM readFile 546 | ["config.tex", "macros.tex", "tables.tex"] 547 | 548 | loadXrefDelta :: IO XrefDelta 549 | loadXrefDelta = do 550 | (tex, _, _) <- Parser.parseString initialContext . Text.unpack . readFile "xrefdelta.tex" 551 | let lfc c = lookForCommand c tex 552 | return $ 553 | [ (fromJust $ isJustRaw $ snd from, [snd to]) 554 | | [from, to] <- lfc "movedxrefs" ] ++ 555 | [ (fromJust $ isJustRaw $ snd from, (:[]) . TeXComm "ref" "" . (:[]) . tos) 556 | | from : tos <- lfc "movedxref" ++ lfc "movedxrefii" ++ lfc "movedxrefiii" ] ++ 557 | [ (abbr, []) 558 | | [(_, [TeXRaw abbr])] <- lfc "removedxref" ] ++ 559 | [ (abbr, [[TeXComm "ref" "" [(FixArg, [TeXRaw ("depr." ++ abbr)])]]]) 560 | | [(_, [TeXRaw abbr])] <- lfc "deprxref" ] 561 | -------------------------------------------------------------------------------- /SectionPages.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -fno-warn-tabs #-} 2 | {-# LANGUAGE OverloadedStrings, RecordWildCards, TupleSections, ViewPatterns #-} 3 | 4 | module SectionPages 5 | ( writeSectionFiles 6 | , writeSingleSectionFile 7 | , writeFiguresFile 8 | , writeFigureFiles 9 | , writeTablesFile 10 | , writeTableFiles 11 | , writeIndexFiles 12 | , writeFootnotesFile 13 | , writeCssFile 14 | , writeXrefDeltaFiles 15 | ) where 16 | 17 | import Prelude hiding ((++), (.), writeFile) 18 | import System.Directory (createDirectoryIfMissing) 19 | import Control.Monad (when, forM_) 20 | import Control.Arrow (first) 21 | import Data.Maybe (fromJust) 22 | import System.Process (readProcess) 23 | import qualified Data.Map as Map 24 | import qualified Data.Text as Text 25 | import qualified Data.Text.Lazy.Builder as TextBuilder 26 | import Pages (writePage, pageContent, pagePath, PageStyle(..), fileContent, outputDir, Link(..)) 27 | import Render (render, concatRender, simpleRender2, renderFig, 28 | defaultRenderContext, renderTab, RenderContext(..), Page(..),linkToSection, squareAbbr, 29 | secnum, renderLatexParas, isSectionPage, parentLink, renderIndex) 30 | import Document 31 | import Util (urlChars, (++), (.), h, anchor, xml, Anchor(..), Text, writeFile, intercalateBuilders) 32 | 33 | renderParagraph :: RenderContext -> TextBuilder.Builder 34 | renderParagraph ctx@RenderContext{nearestEnclosing=Left Paragraph{..}, draft=Draft{..}} = 35 | (case paraNumber of 36 | Just i -> renderNumbered (Text.pack $ show i) 37 | Nothing -> id) 38 | $ (if paraInItemdescr then xml "div" [("class", "itemdescr")] else id) 39 | $ (sourceLink 40 | ++ renderLatexParas paraElems ctx'{extraIndentation=if paraInItemdescr then 12 else 0}) 41 | -- the 12 here must match div.itemdescr's margin-left value in mm 42 | where 43 | urlBase = Text.replace "/commit/" "/tree/" commitUrl ++ "/source/" 44 | sourceLink :: TextBuilder.Builder 45 | sourceLink 46 | | Just SourceLocation{..} <- paraSourceLoc = 47 | xml "div" [("class", "sourceLinkParent")] 48 | $ simpleRender2 $ anchor 49 | { aClass = "sourceLink" 50 | , aText = "#" 51 | , aHref = urlBase ++ Text.pack (sourceFile ++ "#L" ++ show sourceLine) } 52 | | otherwise = "" 53 | 54 | renderNumbered :: Text -> TextBuilder.Builder -> TextBuilder.Builder 55 | renderNumbered n = 56 | let 57 | idTag = if isSectionPage (page ctx) then [("id", mconcat (idPrefixes ctx) ++ n)] else [] 58 | a = anchor 59 | { aClass = "marginalized" 60 | , aHref = 61 | if isSectionPage (page ctx) 62 | then "#" ++ urlChars (mconcat (idPrefixes ctx)) ++ n 63 | else "SectionToSection/" ++ urlChars (abbreviation paraSection) ++ "#" ++ n 64 | , aText = TextBuilder.fromText n } 65 | classes = "para" ++ 66 | (if all (not . normative) (paraElems >>= sentences >>= sentenceElems) 67 | then " nonNormativeOnly" 68 | else "") 69 | in 70 | xml "div" (("class", classes) : idTag) . 71 | (xml "div" [("class", "marginalizedparent")] (render a ctx') ++) 72 | ctx' = case paraNumber of 73 | Just n -> ctx{ idPrefixes = idPrefixes ctx ++ [Text.pack (show n) ++ "."] } 74 | Nothing -> ctx 75 | renderParagraph _ = undefined 76 | 77 | renderSection :: RenderContext -> Maybe Section -> Bool -> Section -> (TextBuilder.Builder, Bool) 78 | renderSection context specific parasEmitted s@Section{..} 79 | | full = (, True) $ 80 | idDiv $ header ++ 81 | mconcat (map 82 | (\p -> renderParagraph (context{nearestEnclosing=Left p,idPrefixes=if parasEmitted then [secOnPage ++ "-"] else []})) 83 | paragraphs) ++ 84 | (if null sectionFootnotes then "" else "
") ++ 85 | concatRender sectionFootnotes context{nearestEnclosing=Right s} ++ 86 | mconcat (fst . renderSection context Nothing True . subsections) 87 | | not anysubcontent = ("", False) 88 | | otherwise = 89 | ( header ++ 90 | mconcat (fst . renderSection context specific False . subsections) 91 | , anysubcontent ) 92 | where 93 | idDiv 94 | | specific == Just s = id 95 | | otherwise = xml "div" [("id", secOnPage), ("class", "section")] 96 | secOnPage :: Text 97 | secOnPage = case page context of 98 | SectionPage parent -> parentLink parent abbreviation 99 | _ -> abbreviation 100 | full = specific == Nothing || specific == Just s 101 | header = sectionHeader (min 4 $ 1 + length parents) s 102 | (if specific == Nothing && isSectionPage (page context) then "#" ++ urlChars secOnPage else "") 103 | abbr context 104 | abbr 105 | | specific == Just s && not (null parents) 106 | = anchor 107 | | Just sp <- specific, sp /= s, not (null parents) 108 | = anchor{aHref = "SectionToSection/" ++ urlChars abbreviation ++ "#" ++ parentLink s (Document.abbreviation sp)} 109 | | otherwise = linkToSection 110 | (if null parents then SectionToToc else SectionToSection) 111 | abbreviation 112 | anysubcontent = 113 | or $ map (snd . renderSection context specific True) 114 | $ subsections 115 | 116 | sectionFileContent :: PageStyle -> TextBuilder.Builder -> TextBuilder.Builder -> Text 117 | sectionFileContent sfs title body = pageContent sfs $ fileContent pathHome title sectionPageCss body 118 | where 119 | pathHome = if sfs == InSubdir then "../" else "" 120 | sectionPageCss = 121 | "" ++ 122 | "" ++ 123 | "" 124 | 125 | writeSectionFile :: FilePath -> PageStyle -> TextBuilder.Builder -> TextBuilder.Builder -> IO () 126 | writeSectionFile n sfs title body = writePage n sfs (sectionFileContent sfs title body) 127 | 128 | sectionHeader :: Int -> Section -> Text -> Anchor -> RenderContext -> TextBuilder.Builder 129 | sectionHeader hLevel s@Section{..} secnumHref abbr_ref ctx 130 | | isDef = xml "h4" [("style", "margin-bottom:3pt")] $ num ++ abbrR ++ name 131 | | abbreviation == "bibliography" = h hLevel name 132 | | otherwise = h hLevel $ num ++ " " ++ name ++ " " ++ abbrR 133 | where 134 | num = secnum secnumHref s 135 | abbrR = simpleRender2 abbr_ref{aClass = "abbr_ref", aText = squareAbbr False abbreviation} 136 | name = render sectionName ctx{inSectionTitle=True} 137 | isDef = isDefinitionSection sectionKind 138 | 139 | writeFiguresFile :: PageStyle -> Draft -> IO () 140 | writeFiguresFile sfs draft = writeSectionFile "fig" sfs "14882: Figures" $ 141 | "

Figures [fig]

" 142 | ++ mconcat (uncurry r . figures draft) 143 | where 144 | r :: Paragraph -> Figure -> TextBuilder.Builder 145 | r p f@Figure{..} = 146 | renderFig True f ("./SectionToSection/" ++ urlChars figureAbbr) False True ctx 147 | where ctx = defaultRenderContext{draft=draft, nearestEnclosing=Left p, page=FiguresPage} 148 | 149 | writeTablesFile :: PageStyle -> Draft -> IO () 150 | writeTablesFile sfs draft = writeSectionFile "tab" sfs "14882: Tables" $ 151 | "

Tables [tab]

" 152 | ++ mconcat (uncurry r . tables draft) 153 | where 154 | r :: Paragraph -> Table -> TextBuilder.Builder 155 | r p t@Table{tableSection=Section{..}, ..} = 156 | renderTab True t ("./SectionToSection/" ++ urlChars tableAbbr) False True ctx 157 | where ctx = defaultRenderContext{ 158 | draft = draft, 159 | nearestEnclosing = Left p, 160 | page = TablesPage, 161 | idPrefixes = [fromJust (Text.stripPrefix "tab:" tableAbbr) ++ "-"]} 162 | 163 | writeFootnotesFile :: PageStyle -> Draft -> IO () 164 | writeFootnotesFile sfs draft = writeSectionFile "footnotes" sfs "14882: Footnotes" $ 165 | "

List of Footnotes

" 166 | ++ mconcat (uncurry r . footnotes draft) 167 | where 168 | r :: Section -> Footnote -> TextBuilder.Builder 169 | r s fn = render fn defaultRenderContext{draft=draft, nearestEnclosing = Right s, page=FootnotesPage} 170 | 171 | writeSingleSectionFile :: PageStyle -> Draft -> String -> IO () 172 | writeSingleSectionFile sfs draft abbr = do 173 | let Just section@Section{..} = Document.sectionByAbbr draft (Text.pack abbr) 174 | let baseFilename = Text.unpack abbreviation 175 | writeSectionFile baseFilename sfs (squareAbbr False abbreviation) $ mconcat $ fst . renderSection (defaultRenderContext{draft=draft,page=SectionPage section}) (Just section) False . chapters draft 176 | putStrLn $ " " ++ baseFilename 177 | 178 | writeTableFiles :: PageStyle -> Draft -> IO () 179 | writeTableFiles sfs draft = 180 | forM_ (snd . tables draft) $ \tab@Table{..} -> do 181 | let 182 | context = defaultRenderContext{draft=draft, page=TablePage tab, nearestEnclosing=Right tableSection} 183 | header :: Section -> TextBuilder.Builder 184 | header sec = sectionHeader (min 4 $ 1 + length (parents sec)) sec "" anchor{aHref=href} context 185 | where href="SectionToSection/" ++ urlChars (abbreviation sec) ++ "#" ++ urlChars tableAbbr 186 | headers = mconcat $ map header $ reverse $ tableSection : parents tableSection 187 | writeSectionFile (Text.unpack tableAbbr) sfs (TextBuilder.fromText $ "[" ++ tableAbbr ++ "]") $ 188 | headers ++ renderTab True tab "" True False context 189 | 190 | writeFigureFiles :: PageStyle -> Draft -> IO () 191 | writeFigureFiles sfs draft = 192 | forM_ (snd . figures draft) $ \fig@Figure{..} -> do 193 | let 194 | context = defaultRenderContext{draft=draft, page=FigurePage fig, nearestEnclosing=Right figureSection} 195 | header :: Section -> TextBuilder.Builder 196 | header sec = sectionHeader (min 4 $ 1 + length (parents sec)) sec "" anchor{aHref=href} context 197 | where href="SectionToSection/" ++ urlChars (abbreviation sec) ++ "#" ++ urlChars figureAbbr 198 | headers = mconcat $ map header $ reverse $ figureSection : parents figureSection 199 | writeSectionFile (Text.unpack figureAbbr) sfs (TextBuilder.fromText $ "[" ++ figureAbbr ++ "]") $ 200 | headers ++ renderFig True fig "" True False context 201 | 202 | writeSectionFiles :: PageStyle -> Draft -> [IO ()] 203 | writeSectionFiles sfs draft = flip map (zip names contents) $ \(n, content) -> do 204 | when (sfs == InSubdir) $ createDirectoryIfMissing True (outputDir ++ n) 205 | writeFile (pagePath n sfs) content 206 | where 207 | secs = Document.sections draft 208 | renSec section@Section{..} = (Text.unpack abbreviation, sectionFileContent sfs title body) 209 | where 210 | title = squareAbbr False abbreviation 211 | body = mconcat $ fst . renderSection (defaultRenderContext{draft=draft,page=SectionPage section}) (Just section) False . chapters draft 212 | fullbody = mconcat $ fst . renderSection defaultRenderContext{draft=draft, page=FullPage} Nothing True . chapters draft 213 | fullfile = ("full", sectionFileContent sfs "14882" fullbody) 214 | files = fullfile : map renSec secs 215 | names = fst . files 216 | contents = snd . files 217 | 218 | writeIndexFile :: PageStyle -> Draft -> String -> IndexTree -> IO () 219 | writeIndexFile sfs draft cat index = 220 | writeSectionFile cat sfs ("14882: " ++ indexCatName cat) $ 221 | h 1 (indexCatName cat) ++ renderIndex defaultRenderContext{page=IndexPage (Text.pack cat), draft=draft} index 222 | 223 | writeIndexFiles :: PageStyle -> Draft -> Index -> [IO ()] 224 | writeIndexFiles sfs draft index = flip map (Map.toList index) $ uncurry (writeIndexFile sfs draft) . first Text.unpack 225 | 226 | writeCssFile :: IO () 227 | writeCssFile = do 228 | base <- Text.pack . readFile "14882.css" 229 | let 230 | replaceFonts = 231 | Text.replace 232 | ".MJXc-TeX-sans-R {font-family: MJXc-TeX-sans-R,MJXc-TeX-sans-Rw}" 233 | ".MJXc-TeX-sans-R {font-family: 'Noto Sans'; font-size: 10pt; }" . 234 | Text.replace 235 | ".MJXc-TeX-type-R {font-family: MJXc-TeX-type-R,MJXc-TeX-type-Rw}" 236 | ".MJXc-TeX-type-R {font-family: 'Noto Sans Mono'; font-size: 10pt; }" . 237 | Text.replace 238 | ".MJXc-TeX-main-R {font-family: MJXc-TeX-main-R,MJXc-TeX-main-Rw}" 239 | ".MJXc-TeX-main-R {}" . 240 | Text.replace 241 | ".MJXc-TeX-math-I {font-family: MJXc-TeX-math-I,MJXc-TeX-math-Ix,MJXc-TeX-math-Iw}" 242 | ".MJXc-TeX-math-I {font-style: italic}" . 243 | Text.replace 244 | ".MJXc-TeX-main-I {font-family: MJXc-TeX-main-I,MJXc-TeX-main-Ix,MJXc-TeX-main-Iw}" 245 | ".MJXc-TeX-main-I {font-style: italic}" 246 | -- Replace fonts to make sure code in formulas matches code in code blocks, etc. 247 | mjx <- Text.replace "display: block" "display: block;background:inherit" . replaceFonts . Text.pack . 248 | readProcess "tex2html" ["--css", ""] "" 249 | writeFile (outputDir ++ "/14882.css") (base ++ mjx) 250 | 251 | writeXrefDeltaFiles :: PageStyle -> Draft -> [IO ()] 252 | writeXrefDeltaFiles sfs draft = flip map (xrefDelta draft) $ \(from, to) -> 253 | writeSectionFile (Text.unpack from) sfs (squareAbbr False from) $ 254 | if to == [] 255 | then "Subclause " ++ squareAbbr False from ++ " was removed." 256 | else "See " ++ intercalateBuilders ", " (flip render ctx . to) ++ "." 257 | where ctx = defaultRenderContext{draft=draft, page=XrefDeltaPage} 258 | -------------------------------------------------------------------------------- /Sentences.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings, ViewPatterns, LambdaCase, TypeSynonymInstances, FlexibleInstances #-} 2 | 3 | module Sentences (splitIntoSentences, isActualSentence, linkifyFullStop, breakSentence) where 4 | 5 | import LaTeXBase (LaTeXUnit(..), triml, ArgKind(FixArg)) 6 | import Data.Text (isPrefixOf, isSuffixOf, stripPrefix, Text) 7 | import qualified Data.Text as Text 8 | import Prelude hiding (take, (.), takeWhile, (++), lookup, readFile) 9 | import Data.Char (isSpace, isDigit, isAlphaNum, isUpper, isLower) 10 | import Control.Arrow (first) 11 | import Data.Maybe (isNothing) 12 | import Util ((++), textStripInfix, dropTrailingWs, (.)) 13 | import RawDocument 14 | import Document 15 | 16 | startsSentence :: RawElement -> Bool 17 | startsSentence (RawLatexElement e) | [TeXRaw x] <- triml [e], x /= "" = isUpper (Text.head x) 18 | startsSentence _ = False 19 | 20 | unitContinuesSentence :: LaTeXUnit -> Bool 21 | unitContinuesSentence (TeXComm " " _ []) = True 22 | unitContinuesSentence (TeXRaw txt) = "," `isPrefixOf` txt 23 | unitContinuesSentence _ = False 24 | 25 | elemContinuesSentence :: RawElement -> Bool 26 | elemContinuesSentence (RawLatexElement u) = unitContinuesSentence u 27 | elemContinuesSentence _ = False 28 | 29 | elemsContinueSentence :: [RawElement] -> Bool 30 | elemsContinueSentence (RawLatexElement (TeXRaw "") : more) = elemsContinueSentence more 31 | elemsContinueSentence (x : _) = elemContinuesSentence x 32 | elemsContinueSentence _ = False 33 | 34 | simpleHead :: [RawElement] -> Maybe Char 35 | simpleHead [] = Nothing 36 | simpleHead (RawLatexElement (TeXRaw x) : more) 37 | | x == "" = simpleHead more 38 | | otherwise = Just (Text.head x) 39 | simpleHead (RawLatexElement (TeXComm " " "" []) : _) = Just ' ' 40 | simpleHead (RawLatexElement (TeXComm "tcode" _ [(_, x)]) : more) = simpleHead (map RawLatexElement x ++ more) 41 | simpleHead (RawLatexElement (TeXComm "index" _ _) : more) = simpleHead more 42 | simpleHead (RawLatexElement (TeXComm "footnoteref" _ _) : _) = Nothing -- hmm 43 | simpleHead (RawLatexElement TeXLineBreak : _) = Nothing 44 | simpleHead (RawLatexElement (TeXComm "br" _ _) : _) = Nothing 45 | simpleHead (RawLatexElement (TeXComm "linebreak" _ _) : _) = Nothing 46 | simpleHead (RawLatexElement (TeXComm "newline" _ _) : _) = Nothing 47 | simpleHead (RawLatexElement (TeXComm "par" _ _) : _) = Nothing 48 | simpleHead (RawLatexElement (TeXComm "nolinebreak" _ _) : _) = Nothing 49 | simpleHead (RawLatexElement (TeXComm "iref" _ _) : _) = Nothing 50 | simpleHead (RawLatexElement (TeXComm "," _ _) : _) = Just ',' 51 | simpleHead x = error $ "simpleHead: " ++ show x 52 | 53 | splitIntoSentences :: [RawElement] -> [[RawElement]] 54 | splitIntoSentences = go [] 55 | where 56 | go [] [] = [] 57 | go [] (RawLatexElement (TeXRaw "\n") : y) = go [] y 58 | go [] (x@(RawExample _) : y) = [x] : go [] y 59 | go [] (x@(RawNote _ _) : y) = [x] : go [] y 60 | go partial (x@(RawCodeblock _) : y) | z : _ <- rmIndices y, startsSentence z = (partial ++ [x]) : go [] y 61 | go x [] = [x] 62 | go x z@(e : y) 63 | | Just (s, rest) <- breakSentence z = (x ++ s) : go [] rest 64 | | otherwise = go (x ++ [e]) y 65 | rmIndices (RawLatexElement (TeXRaw "\n") : RawLatexElement (TeXComm "index" _ _) : x) = rmIndices x 66 | rmIndices x = x 67 | 68 | breakSentence :: [RawElement] -> Maybe ([RawElement] {- sentence -}, [RawElement] {- remainder -}) 69 | breakSentence (e@(RawLatexElement (TeXMath _ math)) : more) 70 | | f (reverse math) = Just ([e], more) 71 | | otherwise = first (e :) . breakSentence more 72 | where 73 | f :: LaTeX -> Bool 74 | f (TeXRaw y : z) | all isSpace (Text.unpack y) = f z 75 | f (TeXComm "text" _ [(FixArg, a)] : _) = f (reverse a) 76 | f (TeXComm "mbox" _ [(FixArg, a)] : _) = f (reverse a) 77 | f (TeXRaw ".\n" : TeXComm "right" "" [] : y) = f y 78 | f (TeXRaw y : _) = "." `isSuffixOf` (Text.pack $ dropTrailingWs $ Text.unpack y) 79 | f _ = False 80 | breakSentence (b@(RawLatexElement TeXLineBreak) : more) = Just ([b], more) 81 | breakSentence (RawLatexElement (TeXBraces x) : more) = breakSentence (map RawLatexElement x ++ more) 82 | breakSentence (e@(RawLatexElement (TeXEnv "eqnarray*" _ _)) : more) = first (e :) . breakSentence more 83 | breakSentence (b@(RawLatexElement (TeXComm cmd _ _)) : more) = 84 | if cmd `elem` ["break"] 85 | then Just ([b], more) 86 | else (first (b :)) . breakSentence more 87 | breakSentence (e@(RawLatexElement (TeXRaw (textStripInfix "." -> (Just ((++ ".") -> pr, po))))) : more) 88 | = f pr po 89 | where 90 | f :: Text -> Text -> Maybe ([RawElement], [RawElement]) 91 | f pre post 92 | | "''" `isPrefixOf` post = f (pre ++ "''") (Text.drop 2 post) 93 | | not (("(." `isSuffixOf` pre) && (")" `isPrefixOf` post)) 94 | , not ("" == post && maybe False (\c -> isLower c || isDigit c) (simpleHead more)) 95 | , not ("" == post && length more /= 0 && head more == RawLatexElement (TeXComm " " "" [])) 96 | , not (Text.length post > 0 && ((Text.head post == '.') 97 | || isLower (Text.head post) 98 | || isDigit (Text.head post))) 99 | , not (Text.length pre > 1 && Text.length post > 0 && isAlphaNum (Text.last $ Text.init pre) && isDigit (Text.head post)) 100 | , not (elemsContinueSentence (RawLatexElement (TeXRaw post) : more)) 101 | , not (Text.length pre >= 2 && ("." `isSuffixOf` pre) && isUpper (Text.last $ Text.init pre)) 102 | , not ("e.g." `isSuffixOf` pre) 103 | , not ("i.e." `isSuffixOf` pre) = 104 | let 105 | post' = Text.stripStart post 106 | (pre', post'') = case stripPrefix ")" post' of 107 | Just z -> (pre ++ ")" , Text.stripStart z) 108 | Nothing -> (pre, post') 109 | more' = if post'' == "" then more else RawLatexElement (TeXRaw post'') : more 110 | (maybefootnote, more'') = case more' of 111 | fn@(RawLatexElement (TeXComm "footnoteref" _ _)) : z -> ([fn], z) 112 | _ -> ([], more') 113 | sentence = [RawLatexElement (TeXRaw pre')] ++ maybefootnote 114 | in 115 | Just (sentence, more'') 116 | | Just ((++ ".") -> pre', post') <- textStripInfix "." post = f (pre ++ pre') post' 117 | | otherwise = first (e :) . breakSentence more 118 | breakSentence (e@(RawLatexElement (TeXRaw _)) : more) = first (e :) . breakSentence more 119 | breakSentence (enum@(RawEnumerated _ (last -> rawItemContent -> (_ : _ : _))) : more) 120 | = Just ([enum], more) 121 | breakSentence (enum@(RawEnumerated _ (last -> rawItemContent -> [RawTexPara y])) : more) 122 | | Just _ <- breakSentence y = Just ([enum], more) 123 | breakSentence _ = Nothing 124 | 125 | isActualSentence :: [RawElement] -> Bool 126 | isActualSentence (RawEnumerated _ _ : _) = False 127 | isActualSentence l = any p l 128 | where 129 | yes = words $ 130 | "link tcode noncxxtcode textit ref grammarterm indexedspan " ++ 131 | "defnx textbf textrm textsl textsc indexlink hiddenindexlink" 132 | 133 | q :: LaTeXUnit -> Bool 134 | q (TeXRaw s) = not $ all isSpace $ Text.unpack s 135 | q (TeXComm c _ _) | c `elem` yes = True 136 | q (TeXEnv c _ _) | c `elem` yes = True 137 | q (TeXEnv "indexed" _ body) = any q body 138 | q (TeXBraces body) = any q body 139 | q _ = False 140 | 141 | p :: RawElement -> Bool 142 | p (RawLatexElement u) = q u 143 | p RawEnumerated{} = True 144 | p _ = False 145 | 146 | class LinkifyFullStop a where 147 | linkifyFullStop :: LaTeXUnit -> a -> Maybe a 148 | 149 | instance LinkifyFullStop LaTeX where 150 | linkifyFullStop link l = reverse . f (reverse l) 151 | where 152 | f [] = Nothing 153 | f (x@(TeXRaw ".\n") : y@(TeXComm "right" _ _) : more) = ([x, y] ++) . f more 154 | f (u : uu) 155 | | Just u' <- inUnit u = Just (reverse u' ++ uu) 156 | | otherwise = (u :) . f uu 157 | inUnit :: LaTeXUnit -> Maybe LaTeX -- returns content in regular order 158 | inUnit (TeXEnv "array" args body) 159 | | Just body' <- linkifyFullStop link body = Just [TeXEnv "array" args body'] 160 | inUnit (TeXEnv "indented" [] body) 161 | | Just body' <- linkifyFullStop link body = Just [TeXEnv "indented" [] body'] 162 | inUnit (TeXComm "text" ws [(FixArg, x)]) 163 | | Just x' <- linkifyFullStop link x = Just (moveStuffOutsideText (TeXComm "text" ws [(FixArg, x')])) 164 | | otherwise = Nothing 165 | inUnit (TeXComm "mbox" ws [(FixArg, x)]) 166 | | Just x' <- linkifyFullStop link x = Just (moveStuffOutsideText (TeXComm "mbox" ws [(FixArg, x')])) 167 | | otherwise = Nothing 168 | inUnit (TeXMath kind m) 169 | | Just m' <- linkifyFullStop link m = Just [TeXMath kind m'] 170 | inUnit (TeXRaw (Text.dropWhileEnd (=='\n') -> Text.stripSuffix "." -> Just s)) = Just [TeXRaw s, link] 171 | inUnit (TeXRaw (Text.stripSuffix ".)" -> Just s)) = Just [TeXRaw s, link, TeXRaw ")"] 172 | inUnit (TeXRaw (Text.stripSuffix ".''" -> Just s)) = Just [TeXRaw s, link, TeXRaw "''"] 173 | inUnit _ = Nothing 174 | 175 | instance LinkifyFullStop Item where 176 | linkifyFullStop link it@Item{itemInlineContent=e} 177 | | Just y <- linkifyFullStop link e 178 | = Just it{itemInlineContent=y} 179 | linkifyFullStop _ _ = Nothing 180 | 181 | instance LinkifyFullStop [Element] where 182 | linkifyFullStop link = (reverse .) . f . reverse 183 | where 184 | f :: [Element] -> Maybe [Element] 185 | f (Enumerated cmd (reverse -> (lastItem : moreItems)) : more) 186 | | all (isNothing . linkifyFullStop link) moreItems 187 | , Just lastItem' <- linkifyFullStop link lastItem 188 | = Just $ Enumerated cmd (reverse (lastItem' : moreItems)) : more 189 | f (LatexElement u : more) 190 | | Just u' <- linkifyFullStop link [u] = Just $ map LatexElement (reverse u') ++ more 191 | | otherwise = (LatexElement u :) . f more 192 | f _ = Nothing 193 | 194 | moveStuffOutsideText :: LaTeXUnit -> LaTeX 195 | -- Turns \text{ \class{bla} } into \text{ }\class{\text{bla}}\text{ }, and similar for \href, 196 | -- because MathJax does not support \class and \href in \text. 197 | moveStuffOutsideText (TeXComm parent pws [(FixArg, [TeXComm nested nws [x, y]])]) 198 | | parent `elem` ["text", "mbox"] 199 | , nested `elem` ["class", "href"] = [TeXComm nested nws [x, (FixArg, moveStuffOutsideText (TeXComm parent pws [y]))]] 200 | moveStuffOutsideText (TeXComm parent pws [(FixArg, t)]) 201 | | parent `elem` ["text", "mbox"] 202 | , length t >= 2 = concatMap (\u -> moveStuffOutsideText $ TeXComm parent pws [(FixArg, [u])]) t 203 | moveStuffOutsideText u = [u] 204 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /Toc.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -fno-warn-tabs #-} 2 | {-# LANGUAGE RecordWildCards, OverloadedStrings, ViewPatterns, NamedFieldPuns #-} 3 | 4 | module Toc (writeTocFiles) where 5 | 6 | import qualified Data.Text as Text 7 | import qualified Data.Text.Lazy as LazyText 8 | import qualified Data.Text.Lazy.Builder as TextBuilder 9 | import Data.Time.Format (formatTime, defaultTimeLocale) 10 | import Data.Time.Clock (getCurrentTime, UTCTime) 11 | import Prelude hiding ((.), (++), writeFile) 12 | import LaTeXBase (LaTeXUnit(..)) 13 | import Pages (Link(..), fileContent, applyPageStyle, PageStyle(..), outputDir, writePage) 14 | import Render (secnum, linkToSection, simpleRender2, RenderContext(..), render, defaultRenderContext, Page(..)) 15 | import Util 16 | import Document (Section(..), Draft(..), SectionKind(..), indexCatName, isDefinitionSection) 17 | 18 | tocSection :: Draft -> Bool -> Section -> TextBuilder.Builder 19 | tocSection _ _ Section{sectionKind=DefinitionSection _} = "" 20 | tocSection draft expanded s@Section{..} = 21 | xml "div" [("id", abbreviation)] $ header ++ mconcat (tocSection draft expanded . subsections) 22 | where 23 | header = h (min 4 $ 2 + length parents) $ 24 | secnum (if expanded then "#" ++ urlChars abbreviation else "") s ++ " " 25 | ++ render ( sectionName ++ [TeXRaw " "] 26 | , (linkToSection (if expanded then SectionToSection else TocToSection) abbreviation){aClass="abbr_ref"}) 27 | defaultRenderContext{page=if expanded then ExpandedTocPage else TocPage, inSectionTitle=True, draft=draft} 28 | ++ "
" 29 | 30 | tocChapter :: Draft -> Bool -> Section -> TextBuilder.Builder 31 | tocChapter draft expanded s@Section{abbreviation, sectionName, subsections, parents} = 32 | xml "div" [("id", abbreviation)] $ 33 | h (min 4 $ 2 + length parents) header ++ 34 | xml "div" [("class", "tocChapter")] (mconcat (tocSection draft expanded . subsections)) 35 | where 36 | href 37 | | expanded = "SectionToSection/" ++ urlChars abbreviation 38 | | otherwise = (if any (not . isDefinitionSection . sectionKind) subsections then "#" else "TocToSection/") ++ urlChars abbreviation 39 | link = anchor{ 40 | aClass = "folded_abbr_ref", 41 | aText = TextBuilder.fromText $ "[" ++ abbreviation ++ "]", 42 | aHref = href} 43 | header 44 | | abbreviation == "bibliography" = 45 | render anchor{aText = "Bibliography", aHref = href} 46 | defaultRenderContext{inSectionTitle=True, draft=draft} 47 | | otherwise = 48 | secnum (if expanded then "#" ++ urlChars abbreviation else "") s ++ " " ++ 49 | render (sectionName ++ [TeXRaw " "], link) defaultRenderContext{inSectionTitle=True, draft=draft} ++ 50 | (if expanded then "" else simpleRender2 (linkToSection TocToSection abbreviation){aClass="unfolded_abbr_ref"}) 51 | 52 | tocHeader :: UTCTime -> Text -> Text 53 | tocHeader date commitUrl = 54 | "(Generated on " ++ Text.pack (formatTime defaultTimeLocale "%F" date) 55 | ++ " from the LaTeX sources" 56 | ++ " by cxxdraft-htmlgen." 57 | ++ " This is not an ISO publication.)" 58 | ++ "

" 59 | ++ "Note: this is an early draft. It's known to be incomplet and incorrekt, and it has lots of" 60 | ++ " bad" 61 | ++ " formatting." 62 | 63 | writeTocFiles :: PageStyle -> Draft -> IO () 64 | writeTocFiles sfs draft@Draft{..} = do 65 | date <- getCurrentTime 66 | tocCss <- readFile "toc.css" 67 | let 68 | descMeta = "" 69 | tocStyle = "" 70 | writeFile (outputDir ++ "/index.html") $ applyPageStyle sfs $ LazyText.toStrict $ TextBuilder.toLazyText $ 71 | fileContent "" "Draft C++ Standard: Contents" (descMeta ++ tocStyle) $ 72 | "

Working Draft
Programming Languages — C++

" ++ 73 | xml "div" [("class", "tocHeader")] (TextBuilder.fromText $ tocHeader date commitUrl) ++ 74 | "

Contents

" ++ 75 | mconcat (tocChapter draft False . chapters) ++ 76 | mconcat (h 2 77 | . (\cat -> simpleRender2 anchor{aHref="TocToSection/" ++ cat, aText=indexCatName cat}) 78 | . ["generalindex", "grammarindex", "headerindex", "libraryindex", "conceptindex", "impldefindex"]) 79 | 80 | fullTocCss <- readFile "fulltoc.css" 81 | let 82 | fullTocStyle = "" 83 | pathHome = if sfs == InSubdir then "../" else "" 84 | writePage "fulltoc" sfs $ applyPageStyle sfs $ LazyText.toStrict $ TextBuilder.toLazyText $ 85 | fileContent pathHome "Draft C++ Standard: Contents" (descMeta ++ fullTocStyle) $ 86 | "

Working Draft
Programming Languages — C++

" ++ 87 | xml "div" [("class", "tocHeader")] (TextBuilder.fromText $ tocHeader date commitUrl) ++ 88 | "

Contents

" ++ 89 | mconcat (tocChapter draft True . chapters) ++ 90 | mconcat (h 2 91 | . (\cat -> simpleRender2 anchor{aHref="SectionToSection/" ++ cat, aText=indexCatName cat}) 92 | . ["generalindex", "grammarindex", "headerindex", "libraryindex", "conceptindex", "impldefindex"]) 93 | -------------------------------------------------------------------------------- /Util.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -fno-warn-tabs #-} 2 | {-# LANGUAGE TupleSections, OverloadedStrings, ViewPatterns #-} 3 | 4 | module Util ( 5 | mconcat, (.), (++), Text, replace, xml, spanTag, h, getDigit, startsWith, urlChars, 6 | anchor, Anchor(..), writeFile, readFile, greekAlphabet, mapLast, mapHead, stripInfix, dropTrailingWs, 7 | textStripInfix, textSubRegex, splitOn, intercalateBuilders, replaceXmlChars, stripAnyPrefix, trimString, 8 | spanJust, measure, partitionBy 9 | ) where 10 | 11 | import Prelude hiding ((.), (++), writeFile) 12 | import qualified Data.Text as Text 13 | import qualified Data.Map as Map 14 | import Data.List (stripPrefix, intersperse) 15 | import Data.Char (ord, isDigit, isSpace) 16 | import Data.Text (Text, replace) 17 | import Data.Text.IO (writeFile) 18 | import Data.Time (getCurrentTime, diffUTCTime) 19 | import Control.Arrow (first) 20 | import Text.Regex (subRegex, Regex) 21 | import qualified Data.Text.Lazy.Builder as TextBuilder 22 | 23 | (.) :: Functor f => (a -> b) -> (f a -> f b) 24 | (.) = fmap 25 | 26 | (++) :: Monoid a => a -> a -> a 27 | (++) = mappend 28 | 29 | xml :: Text -> [(Text, Text)] -> TextBuilder.Builder -> TextBuilder.Builder 30 | xml t attrs = (TextBuilder.fromText ("<" ++ t ++ " " ++ Text.unwords (map f attrs) ++ ">") ++) . (++ TextBuilder.fromText ("")) 31 | where 32 | f (n, v) = n ++ "='" ++ v ++ "'" 33 | 34 | spanTag :: Text -> TextBuilder.Builder -> TextBuilder.Builder 35 | spanTag = xml "span" . (:[]) . ("class",) 36 | 37 | h :: Int -> TextBuilder.Builder -> TextBuilder.Builder 38 | h = flip xml [] . ("h" ++) . Text.pack . show 39 | 40 | data Anchor = Anchor 41 | { aClass, aId, aHref :: Text 42 | , aText :: TextBuilder.Builder 43 | , aStyle, aTitle :: Text } 44 | 45 | intercalateBuilders :: TextBuilder.Builder -> [TextBuilder.Builder] -> TextBuilder.Builder 46 | intercalateBuilders x y = mconcat $ intersperse x y 47 | 48 | anchor :: Anchor 49 | anchor = Anchor{aClass="", aId="", aHref="", aText=TextBuilder.fromText "", aStyle="", aTitle=""} 50 | 51 | greekAlphabet :: [(String, Char)] 52 | greekAlphabet = 53 | [ ("alpha" , 'α') 54 | , ("beta" , 'β') 55 | , ("gamma" , 'γ') 56 | , ("delta" , 'δ') 57 | , ("mu" , 'μ') 58 | , ("nu" , 'ν') 59 | , ("lambda" , 'λ') 60 | , ("pi" , 'π') 61 | , ("phi" , 'φ') 62 | , ("rho" , 'ρ') 63 | , ("sigma" , 'σ') 64 | , ("theta" , 'θ') 65 | , ("zeta" , 'ζ') 66 | 67 | , ("Gamma" , 'Γ') 68 | , ("Pi" , 'Π') ] 69 | 70 | mapLast :: (a -> a) -> [a] -> [a] 71 | mapLast _ [] = [] 72 | mapLast f [x] = [f x] 73 | mapLast f (x:xx) = x : mapLast f xx 74 | 75 | mapHead :: (a -> a) -> [a] -> [a] 76 | mapHead f (x:y) = f x : y 77 | mapHead _ [] = [] 78 | 79 | getDigit :: Char -> Maybe Int 80 | getDigit c 81 | | isDigit c = Just $ ord c - ord '0' 82 | | otherwise = Nothing 83 | 84 | stripInfix :: Eq a => [a] -> [a] -> Maybe ([a], [a]) 85 | stripInfix p s | Just r <- stripPrefix p s = Just ([], r) 86 | stripInfix p (hd:t) = first (hd:) . stripInfix p t 87 | stripInfix _ _ = Nothing 88 | 89 | textStripInfix :: Text -> Text -> Maybe (Text, Text) 90 | textStripInfix inf (Text.breakOn inf -> (a, b)) 91 | | b == "" = Nothing 92 | | otherwise = Just (a, Text.drop (Text.length inf) b) 93 | 94 | startsWith :: (Char -> Bool) -> (Text -> Bool) 95 | startsWith _ "" = False 96 | startsWith p t = p (Text.head t) 97 | 98 | dropTrailingWs :: String -> String 99 | dropTrailingWs = reverse . dropWhile isSpace . reverse 100 | 101 | urlChars :: Text -> Text 102 | urlChars = 103 | replace "'" "'" . 104 | replace "<" "%3c" . 105 | replace ">" "%3e" . 106 | replace "\"" "%22" . 107 | replace "#" "%23" . 108 | replace "{" "%7b" . 109 | replace "|" "%7c" . 110 | replace "}" "%7d" . 111 | replace "[" "%5b" . 112 | replace "\\" "%5c" . 113 | replace "]" "%5d" . 114 | replace "^" "%5e" . 115 | replace " " "%20" . 116 | replace "%" "%25" 117 | 118 | textSubRegex :: Regex -> String -> Text -> Text 119 | textSubRegex pat repl txt = Text.pack $ subRegex pat (Text.unpack txt) repl 120 | 121 | splitOn :: (a -> Bool) -> [a] -> [[a]] 122 | splitOn _ [] = [[]] 123 | splitOn sep (x:y) 124 | | sep x = [] : splitOn sep y 125 | | otherwise = mapHead (x :) $ splitOn sep y 126 | 127 | replaceXmlChars :: Text -> Text 128 | replaceXmlChars = 129 | replace ">" ">" . 130 | replace "<" "<" . 131 | replace "&" "&" 132 | 133 | stripAnyPrefix :: [Text] -> Text -> Maybe (Text, Text) 134 | stripAnyPrefix [] _ = Nothing 135 | stripAnyPrefix (x:y) z 136 | | Just a <- Text.stripPrefix x z = Just (x, a) 137 | | otherwise = stripAnyPrefix y z 138 | 139 | trimString :: String -> String 140 | trimString = reverse . dropWhile isSpace . reverse . dropWhile isSpace 141 | 142 | spanJust :: [a] -> (a -> Maybe b) -> ([b], [a]) 143 | spanJust (x : z) f 144 | | Just y <- f x = first (y :) (spanJust z f) 145 | spanJust z _ = ([], z) 146 | 147 | measure :: IO a -> IO (a, Float) 148 | measure f = do 149 | start <- getCurrentTime 150 | r <- f 151 | end <- getCurrentTime 152 | return (r, realToFrac $ diffUTCTime end start) 153 | 154 | partitionBy :: (Ord b, Eq b) => (a -> b) -> [a] -> [(b, [a])] 155 | partitionBy f l = Map.assocs $ Map.fromListWith (flip (++)) [(f x, [x]) | x <- l] 156 | -------------------------------------------------------------------------------- /colored.css: -------------------------------------------------------------------------------- 1 | div.example { 2 | display: block; 3 | margin-top: 5pt; 4 | margin-bottom: 5pt; 5 | font-size: 11pt; 6 | color: #bb00bb; 7 | } 8 | 9 | div.note { 10 | display: block; 11 | margin-top: 5pt; 12 | margin-bottom: 5pt; 13 | font-size: 11pt; 14 | color: #bb00bb; 15 | } 16 | -------------------------------------------------------------------------------- /cxxdraft-htmlgen.cabal: -------------------------------------------------------------------------------- 1 | name: cxxdraft-htmlgen 2 | version: 0 3 | synopsis: Converts C++ Standard draft documents from their LaTeX sources to HTML 4 | license: PublicDomain 5 | license-file: LICENSE 6 | author: Eelis 7 | maintainer: eelis@eelis.net 8 | category: Text 9 | build-type: Simple 10 | extra-source-files: README 11 | cabal-version: >=1.10 12 | 13 | executable cxxdraft-htmlgen 14 | main-is: genhtml.hs 15 | other-modules: Load14882, Render, Util, SectionPages, Toc, Document, LaTeXBase, LaTeXParser, RawDocument, MathJax, Sentences, CxxParser, Pages 16 | other-extensions: OverloadedStrings, RecordWildCards, TupleSections, ViewPatterns 17 | build-depends: base >=4.6 18 | , text >=1.2 19 | , process >=1.1 20 | , directory >=1.2 21 | , hashable >=1.2 22 | , containers >=0.5 23 | , mtl >=2.2 24 | , time >=1.4 25 | , regex-compat-tdfa 26 | , temporary 27 | , parallel 28 | , tagsoup 29 | , monad-parallel 30 | hs-source-dirs: . 31 | default-language: Haskell2010 32 | ghc-options: -Wall -fno-warn-tabs -threaded "-with-rtsopts=-N" 33 | -------------------------------------------------------------------------------- /expanded.css: -------------------------------------------------------------------------------- 1 | div.example { 2 | display: block; 3 | margin-top: 5pt; 4 | margin-bottom: 5pt; 5 | font-size: 9pt; 6 | } 7 | 8 | div.note { 9 | display: block; 10 | margin-top: 5pt; 11 | margin-bottom: 5pt; 12 | font-size: 9pt; 13 | } 14 | 15 | div.note .texttt { font-size: 9pt; } 16 | div.example .texttt { font-size: 9pt; } 17 | 18 | div.note .textsf { font-family: 'Noto Sans'; font-size: 9pt; } 19 | div.example .textsf { font-family: 'Noto Sans'; font-size: 9pt; } 20 | 21 | div.note .math { font-size: 9pt; } 22 | div.example .math { font-size: 9pt; } 23 | -------------------------------------------------------------------------------- /fulltoc.css: -------------------------------------------------------------------------------- 1 | h1 { margin: 0.2em 5pt 0.2em 5pt; line-height: 1.5; } 2 | h2 { margin: 0.2em 5pt 0.2em 5pt; border-bottom: 1px dashed rgba(0, 0, 0, 0.2); line-height: 1.5; } 3 | h3 { margin: 0.2em 5pt 0.2em 5pt; border-bottom: 1px dashed rgba(0, 0, 0, 0.2); line-height: 1.5; } 4 | h4 { margin: 0.1em 5pt 0.1em 5pt; border-bottom: 1px dashed rgba(0, 0, 0, 0.2); line-height: 1.5; } 5 | :target h2 { border-bottom: none; } 6 | .tocHeader { text-align: center; } 7 | :target > div.tocChapter { display: block; } 8 | 9 | @media (prefers-color-scheme: dark) { 10 | h2 { border-bottom-color: #b0b0b05a; } 11 | h3 { border-bottom-color: #b0b0b05a; } 12 | h4 { border-bottom-color: #b0b0b05a; } 13 | } 14 | -------------------------------------------------------------------------------- /genhtml.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -fno-warn-tabs #-} 2 | {-# LANGUAGE LambdaCase, ViewPatterns, RecordWildCards, OverloadedStrings #-} 3 | 4 | import Document (Draft(..)) 5 | import Load14882 (load14882) 6 | import Prelude hiding ((++), (.), writeFile, readFile) 7 | import System.Directory (createDirectoryIfMissing, setCurrentDirectory, getCurrentDirectory, copyFile) 8 | import System.Environment (getArgs) 9 | import Control.Monad (forM_) 10 | import Data.Text.IO (readFile) 11 | import qualified Control.Monad.Parallel as ParallelMonad 12 | import Util hiding (readFile) 13 | import Toc (writeTocFiles) 14 | import Pages (outputDir, PageStyle(..)) 15 | import SectionPages 16 | 17 | data CmdLineArgs = CmdLineArgs 18 | { repo :: FilePath 19 | , sectionFileStyle :: PageStyle 20 | , sectionToWrite :: Maybe String } 21 | 22 | readCmdLineArgs :: [String] -> CmdLineArgs 23 | readCmdLineArgs = \case 24 | [repo, read -> sectionFileStyle, sec] -> CmdLineArgs{sectionToWrite=Just sec, ..} 25 | [repo, read -> sectionFileStyle] -> CmdLineArgs{sectionToWrite=Nothing,..} 26 | [repo] -> CmdLineArgs{sectionFileStyle=WithExtension,sectionToWrite=Nothing,..} 27 | _ -> error "param: path/to/repo" 28 | 29 | main :: IO () 30 | main = do 31 | cwd <- getCurrentDirectory 32 | CmdLineArgs{..} <- readCmdLineArgs . getArgs 33 | 34 | extraMacros <- readFile "macros.tex" 35 | 36 | setCurrentDirectory $ repo ++ "/source" 37 | draft@Draft{..} <- load14882 extraMacros 38 | 39 | setCurrentDirectory cwd 40 | createDirectoryIfMissing True outputDir 41 | copyFile "icon.png" (outputDir ++ "/icon.png") 42 | forM_ ["expanded.css", "colored.css", "normative-only.css"] $ 43 | \f -> do 44 | copyFile f (outputDir ++ "/" ++ f) 45 | case sectionToWrite of 46 | Just abbr -> writeSingleSectionFile sectionFileStyle draft abbr 47 | Nothing -> do 48 | let acts = 49 | [ writeTocFiles sectionFileStyle draft 50 | , writeCssFile 51 | , writeFiguresFile sectionFileStyle draft 52 | , writeFigureFiles sectionFileStyle draft 53 | , writeFootnotesFile sectionFileStyle draft 54 | , writeTablesFile sectionFileStyle draft 55 | , writeTableFiles sectionFileStyle draft 56 | ] ++ 57 | writeXrefDeltaFiles sectionFileStyle draft ++ 58 | writeIndexFiles sectionFileStyle draft index ++ 59 | writeSectionFiles sectionFileStyle draft 60 | 61 | ((), took) <- measure $ ParallelMonad.sequence_ acts 62 | putStrLn $ "Wrote files to " ++ outputDir ++ " in " ++ show (took * 1000) ++ "ms." 63 | -------------------------------------------------------------------------------- /icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eelis/cxxdraft-htmlgen/eee5307c45c04761ef5c27956cbed742e08e5fb4/icon.png -------------------------------------------------------------------------------- /macros.tex: -------------------------------------------------------------------------------- 1 | %% cxxdraft-htmlgen builtins: 2 | % 3 | % \link 4 | % Link to section. 5 | % arg 0: link text 6 | % arg 1: section abbreviation 7 | % 8 | % \weblink 9 | % arg 0: link text 10 | % arg 1: URL 11 | % 12 | % \indexlink 13 | % Link to indexed position. 14 | % arg 0: link text 15 | % arg 1: index category 16 | % arg 2: index key 17 | % arg 3: abbreviation of section to link to (empty to auto-resolve) 18 | % 19 | % \hiddenindexlink 20 | % Hidden link to indexed position. 21 | % arg 0: link text 22 | % arg 1: index category 23 | % arg 2: index key 24 | % arg 3: abbreviation of section to link to (empty to auto-resolve) 25 | % 26 | % \indexedspan 27 | % arg 0: text 28 | % arg 1: indices (zero or more \index commands) 29 | 30 | %% cxxdraft-htmlgen derived macros: 31 | 32 | \newcommand{\linkx}[3]{\indexlink{#1}{generalindex}{#2}{#3}} 33 | % Link to indexed position. 34 | % arg 0: link text 35 | % arg 1: generalindex key 36 | % arg 2: section abbreviation 37 | 38 | \newcommand{\deflinkx}[3]{\indexlink{#1}{generalindex}{#2|idxbfpage}{#3}} 39 | % Link to definition. 40 | % arg 0: link text 41 | % arg 1: definition key 42 | % arg 2: section abbreviation 43 | 44 | \newcommand{\deflink}[2]{\deflinkx{#1}{#1}{#2}} 45 | % Convenience macro for when the link 46 | % text is also the definition key. 47 | 48 | \newcommand{\libmemberrefx}[3]{\indexlink{\tcode{#1}}{libraryindex}{\idxcode{#2}!\idxcode{#3}}{}} 49 | \newcommand{\libglobalref}[1]{\libglobalrefx{#1}{#1}} 50 | \newcommand{\libglobalrefx}[2]{\indexlink{\tcode{#1}}{libraryindex}{\idxcode{#2}}{}} 51 | \newcommand{\noncxxtcode}[1]{\tcode{#1}} 52 | \newcommand{\literaltcode}[1]{\tcode{#1}} 53 | \newcommand{\literalterminal}[1]{\terminal{##1}} 54 | \newcommand{\noncxxterminal}[1]{\terminal{##1}} 55 | \newcommand{\oldconceptref}[1]{\indexlink{\oldconcept{#1}}{generalindex}{\idxoldconcept{#1}}{}} 56 | 57 | %% replacements for existing macros: 58 | 59 | \newcommand{\defnoldconcept}[1]{\indexedspan{\oldconcept{#1}}{\indextext{\idxoldconcept{#1}}}} 60 | \newcommand{\indexdefn}[1]{\indextext{#1|idxbfpage}} 61 | \newcommand{\idxcode}[1]{#1@\tcode{#1}} 62 | \newcommand{\nontermdef}[1]{\hiddenindexlink{\indexedspan{#1\textnormal{:}}{\indexgrammar{\idxgram{#1}}}}{grammarindex}{\idxgram{#1}|idxbfpage}{}} 63 | \newcommand{\renontermdef}[1]{#1\,\textnormal{::}} 64 | \newcommand{\fmtnontermdef}[1]{#1\,\textnormal{:}} 65 | \newcommand{\locnontermdef}[1]{#1\,\textnormal{:}} 66 | \newcommand{\grammarterm}[1]{\indexlink{\indexedspan{\gterm{#1}}{\indexgram{\idxgram{#1}}}}{grammarindex}{\idxgram{#1}|idxbfpage}{}} 67 | \newcommand{\cite}[1]{\indexlink{[bib]}{bibliography}{#1}{bibliography}} 68 | \newcommand{\libglobal}[1]{\indexedspan{\hiddenindexlink{#1}{libraryindex}{\idxcode{#1}}{}}{\indexlibraryglobal{#1}}} 69 | \newcommand{\libmember}[2]{\indexedspan{\hiddenindexlink{#1}{libraryindex}{\idxcode{#2}!\idxcode{#1}}{}}{\indexlibrarymember{#1}{#2}}} 70 | \newcommand{\libheader}[1]{\indexlink{\indexedspan{\tcode{<#1>}}{\indexhdr{#1}}}{headerindex}{\idxhdr{#1}|idxbfpage}{}} 71 | \newcommand{\libheaderdef}[1]{\indexedspan{\tcode{<#1>}}{\indexheader{#1}}} 72 | \newcommand{\libheaderrefx}[2]{\libheader{#1}} 73 | \newcommand{\libconceptx}[2]{\indexlink{\indexedspan{\cname{#1}}{\indexconcept{\idxconcept{#2}}}}{conceptindex}{\idxconcept{#2}|idxbfpage}{}} 74 | \newcommand{\libmacro}[1]{\indexedspan{\tcode{#1}}{\indexlibraryglobal{#1}}} 75 | \newcommand{\libxmacro}[1]{\indexedspan{\tcode{__#1}}{\indexlibraryglobal{__#1}}} 76 | \newcommand{\Range}[4]{#1\tcode{#3,\penalty2000{} #4}#2} 77 | \newcommand{\deflibconcept}[1]{\hiddenindexlink{\indexedspan{\cname{#1}}{\indexlibrary{\idxconcept{#1}}\indexconcept{\idxconcept{#1}|idxbfpage}}}{conceptindex}{\idxconcept{#1}|idxbfpage}{}} 78 | \newcommand{\defexposconcept}[1]{\hiddenindexlink{\indexedspan{\ecname{#1}}{\indexconcept{\idxexposconcept{#1}|idxbfpage}}}{conceptindex}{\idxexposconcept{#1}|idxbfpage}{}} 79 | \newcommand{\defexposconceptnc}[1]{\defexposconcept{#1}} 80 | \newcommand{\exposconcept}[1]{\indexlink{\indexedspan{\ecname{#1}}{\indexconcept{\idxexposconcept{#1}}}}{conceptindex}{\idxexposconcept{#1}|idxbfpage}{}} 81 | \newcommand{\exposconceptx}[2]{\indexedspan{\ecname{#1}}{\indexconcept{\idxexposconcept{#2}}}} 82 | \newcommand{\exposconceptnc}[1]{\exposconcept{#1}} 83 | \newcommand{\keyword}[1]{\indexedspan{\tcode{#1}}{\indextext{\idxcode{#1}}}} 84 | \newcommand{\itcorr}[1][]{} 85 | \newcommand{\diffdef}[1]{\break\diffhead{#1}} 86 | \newcommand{\defnx}[2]{\hiddenindexlink{\indexedspan{\textit{#1}}{\indexdefn{#2}}}{generalindex}{#2|idxbfpage}{}} 87 | \newcommand{\defnxname}[1]{\indexedspan{\xname{#1}}{\indextext{\idxxname{#1}}}} 88 | \newcommand{\defnadj}[2]{\indextext{#1 #2|see{#2, #1}}\defnx{#1 #2}{#2!#1}} 89 | \newcommand{\defnadjx}[3]{\indextext{#1 #3|see{#3, #1}}\defnx{#1 #2}{#3!#1}} 90 | \newcommand{\defnlibxname}[1]{\indexedspan{\xname{#1}}{\indexlibrary{\idxxname{#1}}}} 91 | \newcommand{\descr}[1]{\textnormal{#1}} 92 | \newcommand{\cv}{\mathit{cv}} 93 | \newcommand{\texorpdfstring}[2]{#2} 94 | \newcommand{\textunderscore}{_} 95 | \newcommand{\emo}[1]{#1} 96 | \newcommand{\bm}[1]{\textbf{#1}} 97 | 98 | \newenvironment{LongTable}[3] 99 | { 100 | \newcommand{\continuedcaption}{\caption[]{#1 (continued)}} 101 | \begin{htmlTable}{#1}{#2}{#3} 102 | \begin{TableBase} 103 | } 104 | { 105 | \bottomline 106 | \end{TableBase} 107 | \end{htmltable} 108 | } 109 | -------------------------------------------------------------------------------- /mathjax-batch: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env node 2 | 3 | var mjAPI = require("mathjax-node"); 4 | var split = require("split"); 5 | 6 | mjAPI.config( 7 | { extensions: "" 8 | , fontURL: "https://cdn.mathjax.org/mathjax/latest/fonts/HTML-CSS" }); 9 | mjAPI.start(); 10 | 11 | var math = ''; 12 | function processLine(line) 13 | { 14 | var format; 15 | if (line == "NONINLINE") format = "TeX"; 16 | else if (line == "INLINE") format = "inline-TeX"; 17 | else 18 | { 19 | if (math != '') math += '\n'; 20 | math += line; 21 | return; 22 | } 23 | 24 | mjAPI.typeset({ 25 | math: math, format: format, html: true, css: false, 26 | speakText: true, ex: 6, width: 100, linebreaks: true 27 | }, function (data) { 28 | // todo: if (data.errors) abort 29 | console.log(data.html) 30 | console.log("DONE") 31 | }); 32 | 33 | math = ''; 34 | } 35 | 36 | process.stdin.pipe(split(/\n/, null, {trailing: false})).on('data', processLine) 37 | -------------------------------------------------------------------------------- /normative-only.css: -------------------------------------------------------------------------------- 1 | div.example { display: none; } 2 | div.note { display: none; } 3 | 4 | a.footnotenum { display: none; } 5 | div.footnote { display: none; } 6 | div.footnoteSeparator { display: none; } 7 | .footnoteref { display: none; } 8 | 9 | div.nonNormativeOnly { display: none; } 10 | -------------------------------------------------------------------------------- /stack.yaml: -------------------------------------------------------------------------------- 1 | resolver: lts-12.17 2 | packages: 3 | - . 4 | 5 | extra-deps: [] 6 | 7 | flags: {} 8 | 9 | extra-package-dbs: [] 10 | -------------------------------------------------------------------------------- /toc.css: -------------------------------------------------------------------------------- 1 | h1 { margin: 0.2em 5pt 0.2em 5pt; line-height: 1.5; } 2 | h2 { margin: 0.2em 5pt 0.2em 5pt; border-bottom: 1px dashed rgba(0, 0, 0, 0.2); line-height: 1.5; } 3 | h3 { margin: 0.2em 5pt 0.2em 5pt; border-bottom: 1px dashed rgba(0, 0, 0, 0.2); line-height: 1.5; } 4 | h4 { margin: 0.1em 5pt 0.1em 5pt; border-bottom: 1px dashed rgba(0, 0, 0, 0.2); line-height: 1.5; } 5 | :target h2 { border-bottom: none; } 6 | .tocHeader { text-align: center; } 7 | div.tocChapter { display: none; } 8 | :target > div.tocChapter { display: block; } 9 | 10 | @media (prefers-color-scheme: dark) { 11 | h2 { border-bottom-color: #b0b0b05a; } 12 | h3 { border-bottom-color: #b0b0b05a; } 13 | h4 { border-bottom-color: #b0b0b05a; } 14 | } 15 | --------------------------------------------------------------------------------