├── .gitignore
├── .travis.yml
├── Cargo.toml
├── LICENSE
├── README.md
├── build.rs
├── doc
    └── docs.yml
├── linter_notes.md
└── src
    ├── ast.rs
    ├── default_transformations.rs
    ├── error.rs
    ├── grammar.rs
    ├── grammar.rustpeg
    ├── lib.rs
    ├── main.rs
    ├── tests
        └── mod.rs
    ├── transformations.rs
    ├── traversion.rs
    └── util.rs


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | target/
3 | src/generated_tests.rs
4 | testfiles
5 | test.md
6 | Cargo.lock
7 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: rust
 2 | sudo: required
 3 | 
 4 | rust:
 5 |     - stable
 6 |     - nightly
 7 | 
 8 | script:
 9 |     - cargo build --all --verbose
10 |     - cargo test --all --verbose
11 |     - cargo doc --all --verbose
12 | 
13 | after_success:
14 |     - |
15 |         bash <(curl https://raw.githubusercontent.com/xd009642/tarpaulin/master/travis-install.sh)
16 |         cargo tarpaulin --out Xml
17 |         bash <(curl -s https://codecov.io/bash)
18 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "mediawiki_parser"
 3 | version = "0.4.2"
 4 | authors = ["Valentin Roland <valentin@vroland.de>"]
 5 | description = "A strict parser for MediaWiki markdown."
 6 | repository = "https://github.com/vroland/mediawiki-parser"
 7 | documentation = "https://docs.rs/mediawiki_parser/"
 8 | build = "build.rs"
 9 | readme = "README.md"
10 | keywords = ["mediawiki", "parser", "wikipedia", "wikibooks", "markdown"]
11 | categories = ["parsing", "text-processing"]
12 | license = "MIT"
13 | edition = "2018"
14 | 
15 | [lib]
16 | name = "mediawiki_parser"
17 | path = "src/lib.rs"
18 | doc = true
19 | 
20 | [features]
21 | default = []
22 | no_position = []
23 | ptime = ["time"]
24 | 
25 | [[bin]]
26 | name = "mwtoast"
27 | path = "src/main.rs"
28 | doc = true
29 | 
30 | [dependencies]
31 | serde = "1.0"
32 | serde_yaml = "0.8"
33 | serde_json = "1.0"
34 | serde_derive = "1.0"
35 | structopt = "0.2"
36 | colored = "1.6"
37 | time = { version = "0.1", optional = true }
38 | 
39 | [build-dependencies]
40 | serde = "1.0"
41 | serde_derive = "1.0"
42 | peg = "0.5"
43 | serde_yaml = "0.8"
44 | 
45 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 vroland
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # mediawiki-parser
  2 | This project aims to develop a parser for a subset of mediawiki markdown on the basis of Parsing Expression Grammars. 
  3 | It currently features a generated parser and test generation from a specification document. A simple binary to read from a file and write yaml to stdout is provided.
  4 | 
  5 | ## Disclaimer
  6 | 
  7 | The goal of mediawiki-parser is *not* full compatibility with MediaWiki and all of it's quirks. It is intended to be used if rejecting exotic or malformed input is fine. 
  8 | The markup supported is currently largely oriented towards the need of a specific MediaWiki Project and will likely not change drastically without external contributions. 
  9 | 
 10 | If you want to parse any MediaWiki with all its weirdness, take a look at [Parse Wiki Text](https://github.com/portstrom/parse_wiki_text) instead.
 11 | 
 12 | ## Currently supported MediaWiki:
 13 | 
 14 | * Text formatting: `''italic'', '''bold''', <math>\LaTex</math>, <code></code>, ...`
 15 | * Paragraphs
 16 | * Heading hierarchies
 17 | * Lists
 18 | * Internal references (files) `[[File.ext|option|caption]]`
 19 | * External references `[https://example.com/ example]`
 20 | * Tables
 21 | * Generic templates `{{name|anon_arg|arg=value}}`
 22 | * Galleries
 23 | * Generic html tags and comments `<thing>content</thing>`
 24 | 
 25 | ## Known Limitations
 26 | 
 27 | This project has some known limitations, which might or might not be lifted in the future. 
 28 | Part of this comes from treating WikiText as a context-free formal language, which is not entrierly true.
 29 | 
 30 | * `{,},[,]`  cannot be used in plain text, as they normally indicate special syntax. However, using them in math or `<nowiki>` is fine.
 31 | * Indentation is currently not parsed as `pre`.
 32 | * Templates are only pared on a syntactical level, they have no effects on their content whatsoever.
 33 | 
 34 | 
 35 | ## Example
 36 | 
 37 | Parsing will result in either a syntax tree with position information (mostly omitted here for conciseness):
 38 | 
 39 | Input:
 40 | ``` markdown
 41 | this is some ''formatted'' [https://example.com example] text.
 42 | ```
 43 | Output (as pseudo-YAML):
 44 | ``` yaml
 45 | ---
 46 | type: document
 47 | position: ...
 48 | content:
 49 |   - type: paragraph
 50 |     position: ...
 51 |     content:
 52 |       - type: text
 53 |         position: ...
 54 |         text: "this is some "
 55 |       - type: formatted
 56 |         position: ...
 57 |         markup: italic
 58 |         content:
 59 |           - type: text
 60 |             position:
 61 |               start:
 62 |                 offset: 15
 63 |                 line: 1
 64 |                 col: 16
 65 |               end:
 66 |                 offset: 24
 67 |                 line: 1
 68 |                 col: 25
 69 |             text: formatted
 70 |       - type: text
 71 |         position: ...
 72 |         text: " "
 73 |       - type: externalreference
 74 |         position: ...
 75 |         target: "https://example.com"
 76 |         caption:
 77 |           - type: text
 78 |             position: ...
 79 |             text: example
 80 |       - type: text
 81 |         position: ...
 82 |         text: " text."
 83 | ```
 84 | 
 85 | Or a syntax error (here is a pretty representation):
 86 | ```
 87 | ERROR in line 1 at column 57: Could not continue to parse, expected one of: ''', [, <!--, '', [[, EOF, "\n", {{, [ 	], opening html tag, <, normal text
 88 | 1 | this is some ''formatted'' [https://example.com example]] text.
 89 | 2 |
 90 | ``` 
 91 | 
 92 | ## API
 93 | 
 94 | The library provides a straight forward `parse()` function:
 95 | 
 96 | ```rust
 97 | let input = "Hello World";
 98 | let result = mediawiki_parser::parse(&input)
 99 |     .expect(\"Parsing of the input for {} failed!\");
100 | println!(\"{{}}\", &serde_yaml::to_string(&result).unwrap());
101 | ```
102 | 
103 | The result is a custom abstract syntax tree (AST). See the documentation for details.
104 | 
105 | 


--------------------------------------------------------------------------------
/build.rs:
--------------------------------------------------------------------------------
  1 | use peg;
  2 | use serde_derive::{Deserialize, Serialize};
  3 | use serde_yaml;
  4 | use std::env;
  5 | use std::fs;
  6 | use std::io;
  7 | use std::io::{Read, Write};
  8 | use std::path::{Path, PathBuf};
  9 | 
 10 | #[allow(dead_code)]
 11 | mod ast {
 12 |     include!("src/ast.rs");
 13 | }
 14 | 
 15 | macro_rules! TEST_SOUCE {
 16 |     () => {
 17 |         "
 18 | // {}
 19 | #[test]
 20 | fn {} () {{
 21 |     let input = {:?};
 22 |     let target_source = {:?};
 23 | 
 24 |     let result = parse(&input)
 25 |         .expect(\"Parsing of the input for {} failed!\");
 26 |     eprintln!(\"{{}}\", &serde_yaml::to_string(&result).unwrap());
 27 |     let target: ast::Element = serde_yaml::from_str(&target_source)
 28 |         .expect(\"Parsing the documentation of {} failed!\");
 29 |     assert_eq!(&target, &result,
 30 |         \"comparing documentation (left) with parse result (right) failed!\");
 31 | }}
 32 | "
 33 |     };
 34 | }
 35 | 
 36 | macro_rules! TEST_HEADER {
 37 |     () => {
 38 |         "
 39 | // THIS DOCUMENT IS AUTO-GENERATED AND SHOULD NOT BE EDITED BY HAND!
 40 | 
 41 | use crate::ast;
 42 | use serde_yaml;
 43 | use crate::parse;
 44 | 
 45 | "
 46 |     };
 47 | }
 48 | 
 49 | #[derive(Debug, Serialize, Deserialize)]
 50 | struct Test {
 51 |     case: String,
 52 |     input: String,
 53 |     out: ast::Element,
 54 | }
 55 | 
 56 | fn escape_test_name(input: String) -> String {
 57 |     input.replace(" ", "_").to_lowercase()
 58 | }
 59 | 
 60 | impl Test {
 61 |     fn write_code(&self, file: &mut fs::File) -> io::Result<()> {
 62 |         writeln!(
 63 |             file,
 64 |             TEST_SOUCE!(),
 65 |             self.case,
 66 |             escape_test_name(self.case.clone()),
 67 |             self.input,
 68 |             serde_yaml::to_string(&self.out).expect("Error serializing test input!"),
 69 |             self.case,
 70 |             self.case
 71 |         )
 72 |     }
 73 | }
 74 | 
 75 | fn generate_tests() {
 76 |     // tell cargo to rerun if the documentation changes.
 77 |     println!("cargo:rerun-if-changed=doc/docs.yml");
 78 |     let out_dir: PathBuf = env::var_os("OUT_DIR").unwrap().into();
 79 |     let out_path = out_dir
 80 |         .join(Path::new("tests_generated.rs"))
 81 |         .with_extension("rs");
 82 | 
 83 |     let mut in_file = fs::File::open(Path::new("doc/docs.yml"))
 84 |         .ok()
 85 |         .expect("Could not open input file!");
 86 |     let mut out_file = fs::File::create(Path::new(&out_path))
 87 |         .ok()
 88 |         .expect("Could not open output file!");
 89 | 
 90 |     let mut content = String::new();
 91 |     in_file
 92 |         .read_to_string(&mut content)
 93 |         .ok()
 94 |         .expect("Could not read file!");
 95 | 
 96 |     let tests: Vec<Test> =
 97 |         serde_yaml::from_str(&content).expect("Could not parse the documentation!");
 98 | 
 99 |     write!(out_file, TEST_HEADER!()).unwrap();
100 | 
101 |     for test in &tests {
102 |         test.write_code(&mut out_file).unwrap();
103 |     }
104 | }
105 | 
106 | fn main() {
107 |     peg::cargo_build("src/grammar.rustpeg");
108 |     generate_tests();
109 | }
110 | 


--------------------------------------------------------------------------------
/doc/docs.yml:
--------------------------------------------------------------------------------
   1 | # This document specifies valid MFNF wiki syntax.
   2 | # It serves as documentation and is used to generate tests for the parser.
   3 | # The whole document is structured as an array of tests cases:
   4 | #
   5 | # - case: descriptive name of a test here.
   6 | #   in: |
   7 | #     Here goes the input ''wikitext'' to parse.
   8 | #   out:
   9 | #     type: document
  10 | #     offset: 0
  11 | #     content:
  12 | #       - type: paragraph
  13 | #         ...
  14 | #
  15 | # If the position of an element is not important for the test,
  16 | # you may ommit its position info:
  17 | # 
  18 | # ...
  19 | # type: paragraph
  20 | # position: {}
  21 | # content: []
  22 | 
  23 | # An empty document should parse fine.
  24 |   - case: empty document
  25 |     input: ""
  26 |     out:
  27 |       type: document
  28 |       position:
  29 |         start:
  30 |           offset: 0
  31 |           line: 1
  32 |           col: 1
  33 |         end:
  34 |           offset: 0
  35 |           line: 1
  36 |           col: 1
  37 |       content: []
  38 | 
  39 | # A single paragraph without newlines should be parsed as a document
  40 | # with a single paragraph.
  41 |   - case: single unicode paragraph
  42 |     input: this is a äüöß == test. "" ʕ•ᴥ•ʔ
  43 |     out:
  44 |       type: document
  45 |       content:
  46 |         - type: paragraph
  47 |           content: 
  48 |             - type: text
  49 |               text: this is a äüöß == test. "" ʕ•ᴥ•ʔ
  50 |               
  51 | # A single paragraph with some special characters.
  52 |   - case: special chars in inline context
  53 |     input: hey!!! this is a test! <3 |x|
  54 |     out:
  55 |       type: document
  56 |       content:
  57 |         - type: paragraph
  58 |           content:
  59 |             - type: text
  60 |               text: "hey!!! this is a test! <3 |x|"
  61 | 
  62 | # A multiline paragraph.
  63 |   - case: multiline paragraph
  64 |     input: |
  65 |       This
  66 |       should
  67 |       be
  68 |       a single
  69 |       paragraph.
  70 |         
  71 | 
  72 |         
  73 |       This is the next one.
  74 |     out:
  75 |       type: document
  76 |       content:
  77 |         - type: paragraph
  78 |           content:
  79 |             - type: text
  80 |               text: This should be a single paragraph.
  81 |         - type: paragraph
  82 |           content:
  83 |             - type: text
  84 |               text: This is the next one.
  85 |               
  86 | # some simple headings.
  87 |   - case: some simple headings
  88 |     input: |
  89 |       == Heading 1 ==
  90 |       === Heading 2
  91 |       === Heading 3
  92 |       ==== Heading 4
  93 |       = Heading 5 =
  94 |       = Heading 7
  95 |       == Heading 8
  96 |       == Heading 9
  97 |     out:
  98 |       type: document
  99 |       content:
 100 |         - type: heading
 101 |           depth: 2
 102 |           caption:
 103 |             - type: text
 104 |               text: "Heading 1 "
 105 |           content:
 106 |             - type: heading
 107 |               depth: 3
 108 |               caption:
 109 |                 - type: text
 110 |                   text: Heading 2
 111 |               content: []
 112 |             - type: heading
 113 |               depth: 3
 114 |               caption:
 115 |                 - type: text
 116 |                   text: Heading 3
 117 |               content:
 118 |                 - type: heading
 119 |                   depth: 4
 120 |                   caption:
 121 |                     - type: text
 122 |                       text: Heading 4
 123 |                   content: []
 124 |         - type: heading
 125 |           depth: 1
 126 |           caption:
 127 |             - type: text
 128 |               text: "Heading 5 "
 129 |           content: []
 130 |         - type: heading
 131 |           depth: 1
 132 |           caption:
 133 |             - type: text
 134 |               text: Heading 7
 135 |           content:
 136 |             - type: heading
 137 |               depth: 2
 138 |               caption:
 139 |                 - type: text
 140 |                   text: Heading 8
 141 |               content: []
 142 |             - type: heading
 143 |               depth: 2
 144 |               caption:
 145 |                 - type: text
 146 |                   text: Heading 9
 147 |               content: []
 148 |           
 149 | # heading with an ancor
 150 |   - case: heading with ancor
 151 |     input: |
 152 |       == Heading {{Anker|foo}} ==
 153 |     out:
 154 |       type: document
 155 |       content:
 156 |         - type: heading
 157 |           depth: 2
 158 |           caption:
 159 |             - type: text
 160 |               text: "Heading "
 161 |             - type: template
 162 |               name:
 163 |                 - type: text
 164 |                   text: Anker
 165 |               content:
 166 |                 - type: templateargument
 167 |                   name: "1"
 168 |                   value:
 169 |                     - type: text
 170 |                       text: foo
 171 |             - type: text
 172 |               text: " "
 173 |           content: []
 174 | 
 175 | # heading with equal sign
 176 |   - case: heading with equal sign
 177 |     input: |
 178 |       == Heading == Structure ==
 179 |     out:
 180 |       type: document
 181 |       content:
 182 |         - type: heading
 183 |           depth: 2
 184 |           caption:
 185 |             - type: text
 186 |               text: "Heading == Structure "
 187 |           content: []
 188 |           
 189 | # A paragraph with bold words contains a plain formatted element
 190 | # wrapped by ''italic'' formatting.
 191 |   - case: single italic text
 192 |     input: "''this is italic''"
 193 |     out:
 194 |       type: document
 195 |       content:
 196 |         - type: paragraph
 197 |           content:
 198 |             - type: formatted
 199 |               markup: italic
 200 |               content:
 201 |                 - type: text
 202 |                   text: this is italic
 203 | 
 204 | # A paragraph with bold words contains a plain formatted element
 205 | # wrapped by '''bold''' formatting.
 206 |   - case: single bold text
 207 |     input: "'''this is bold'''"
 208 |     out:
 209 |       type: document
 210 |       content:
 211 |         - type: paragraph
 212 |           content: 
 213 |             - type: formatted
 214 |               markup: bold
 215 |               content: 
 216 |                 - type: text
 217 |                   text: this is bold
 218 | 
 219 | # Multiline bold markup
 220 |   - case: multiline bold markup
 221 |     input: |
 222 |       '''
 223 |       bla
 224 |       ''italic''
 225 |       bla2
 226 |       '''
 227 |     out:
 228 |       type: document
 229 |       content:
 230 |         - type: paragraph
 231 |           content:
 232 |             - type: formatted
 233 |               markup: bold
 234 |               content:
 235 |                 - type: paragraph
 236 |                   content:
 237 |                     - type: text
 238 |                       text: "bla "
 239 |                     - type: formatted
 240 |                       markup: italic
 241 |                       content:
 242 |                         - type: text
 243 |                           text: italic
 244 |                     - type: text
 245 |                       text: " bla2"
 246 | 
 247 | # A paragraph with nested markup.
 248 |   - case: nested markup italic
 249 |     input: "prelude ''italic paragraph with '''''nested'' bold and italic''''' and normal text."
 250 |     out:
 251 |         type: document
 252 |         position: {} 
 253 |         content: 
 254 |           - type: paragraph
 255 |             content:
 256 |               - type: text
 257 |                 text: "prelude "
 258 |               - type: formatted
 259 |                 markup: italic
 260 |                 content: 
 261 |                   - type: text
 262 |                     text: "italic paragraph with "
 263 |                 
 264 |                   - type: formatted
 265 |                     markup: bold
 266 |                     content: 
 267 |                       - type: formatted
 268 |                         position: {} 
 269 |                         markup: italic
 270 |                         content: 
 271 |                           - type: text
 272 |                             text: nested
 273 |                       - type: text
 274 |                         position: {} 
 275 |                         text: " bold and italic"
 276 |               - type: text
 277 |                 text: " and normal text."
 278 | 
 279 | # An apostrophe after markup
 280 |   - case: markup and apostrophe
 281 |     input: "The '''''dog''''''s bone"
 282 |     out:
 283 |         type: document
 284 |         position: {} 
 285 |         content: 
 286 |           - type: paragraph
 287 |             content:
 288 |               - type: text
 289 |                 text: "The "
 290 |               - type: formatted
 291 |                 markup: bold
 292 |                 content: 
 293 |                   - type: formatted
 294 |                     markup: italic
 295 |                     content: 
 296 |                       - type: text
 297 |                         position: {} 
 298 |                         text: "dog"
 299 |               - type: text
 300 |                 text: "\'s bone"
 301 |                 
 302 | # A simple inline math tag.
 303 |   - case: simple inline math
 304 |     input: "<math>x^2</math>"
 305 |     out:
 306 |       type: document
 307 |       content:
 308 |         - type: paragraph
 309 |           content: 
 310 |             - type: formatted
 311 |               markup: math
 312 |               content:
 313 |                   - type: text
 314 |                     text: x^2
 315 | 
 316 | # Math tag with special characters
 317 |   - case: special inline math
 318 |     input: "<math>$\\{{bla}} x'=c</math>"
 319 |     out:
 320 |       type: document
 321 |       content:
 322 |         - type: paragraph
 323 |           content: 
 324 |             - type: formatted
 325 |               markup: math
 326 |               content:
 327 |                 - type: text
 328 |                   text: "$\\{{bla}} x'=c"
 329 | 
 330 | # An empty math tag.
 331 |   - case: empty math
 332 |     input: "<math></math>"
 333 |     out:
 334 |       type: document
 335 |       content:
 336 |         - type: paragraph
 337 |           content: 
 338 |           - type: formatted
 339 |             markup: math
 340 |             content: []
 341 | 
 342 | # Simple hyperlink without a caption.
 343 |   - case: hyperlink without caption
 344 |     input: "[https://www.example.com/]"
 345 |     out:
 346 |       type: document
 347 |       content:
 348 |         - type: paragraph
 349 |           content:
 350 |             - type: externalreference
 351 |               target: https://www.example.com/
 352 |               caption: []
 353 | 
 354 | # Normal text with angle brackets
 355 |   - case: text with brackets
 356 |     input: "see reference [1]!"
 357 |     out:
 358 |       type: document
 359 |       content:
 360 |         - type: paragraph
 361 |           content:
 362 |             - type: text
 363 |               text: "see reference [1]!"
 364 | 
 365 | # Simple hyperlink with a caption.
 366 |   - case: hyperlink with caption
 367 |     input: "[https://www.example.com/ Example dot com]"
 368 |     out:
 369 |       type: document
 370 |       content:
 371 |         - type: paragraph
 372 |           content:
 373 |             - type: externalreference
 374 |               target: https://www.example.com/
 375 |               caption:
 376 |                 - type: text
 377 |                   text: "Example dot com"
 378 | 
 379 | # Simple hyperlink without a caption.
 380 |   - case: hyperlink with formatted caption
 381 |     input: "[https://www.example.com/ Example '''dot com''']"
 382 |     out:
 383 |       type: document
 384 |       content:
 385 |         - type: paragraph
 386 |           content:
 387 |             - type: externalreference
 388 |               target: https://www.example.com/
 389 |               caption:
 390 |                 - type: text
 391 |                   text: "Example "
 392 | 
 393 |                 - type: formatted
 394 |                   markup: bold
 395 |                   content:
 396 |                     - type: text
 397 |                       text: dot com
 398 | 
 399 | # An arbitrary html tag
 400 |   - case: nested html tag
 401 |     input: |
 402 |       <translate language="test" attr2=bla attröbüte='this is a "test"'>
 403 |       <translate >Fréttinga is a small municipality in MungoLand.< / translate></translate>
 404 |     out:
 405 |       type: document
 406 |       content:
 407 |         - type: paragraph
 408 |           content:
 409 |             - type: htmltag
 410 |               name: translate
 411 |               attributes:
 412 |                 - key: language
 413 |                   value: test
 414 |                 - key: attr2
 415 |                   value: bla
 416 |                 - key: attröbüte
 417 |                   value: "this is a \"test\""
 418 |               content:
 419 |                 - type: htmltag
 420 |                   name: translate
 421 |                   attributes: []
 422 |                   content:
 423 |                     - type: text
 424 |                       text: "Fréttinga is a small municipality in MungoLand."
 425 |               
 426 | # An empty nowiki tag
 427 |   - case: empty nowiki
 428 |     input: "<nowiki></nowiki>"
 429 |     out:
 430 |       type: document
 431 |       content:
 432 |         - type: paragraph
 433 |           content: 
 434 |           - type: formatted
 435 |             markup: nowiki
 436 |             content: []
 437 | 
 438 | # A self-closing tag
 439 |   - case: self closing tag
 440 |     input: "<section/><section/>"
 441 |     out:
 442 |       type: document
 443 |       content:
 444 |         - type: paragraph
 445 |           content:
 446 |             - type: htmltag
 447 |               name: section
 448 |               attributes: []
 449 |               content: []
 450 |             - type: htmltag
 451 |               name: section
 452 |               attributes: []
 453 |               content: []
 454 |               
 455 | # A HTML comment
 456 |   - case: html comment
 457 |     input: "bla <!-- comment ³½}³¹ðđ æđ||đð@³¼¶²{{}} content -->"
 458 |     out:
 459 |       type: document
 460 |       content:
 461 |         - type: paragraph
 462 |           content:
 463 |             - type: text
 464 |               text: "bla "
 465 |             - type: comment
 466 |               text: " comment ³½}³¹ðđ æđ||đð@³¼¶²{{}} content "
 467 |               
 468 | # The nowiki tag should ignore everything.
 469 |   - case: nowiki
 470 |     input: |
 471 |         <nowiki>abc {{bla}} ''' 
 472 |         <another> [</nowiki>
 473 |     out:
 474 |       type: document
 475 |       content:
 476 |         - type: paragraph
 477 |           content:
 478 |             - type: formatted
 479 |               markup: nowiki
 480 |               content:
 481 |                 - type: text
 482 |                   text: "abc {{bla}} ''' \n<another> ["
 483 | 
 484 | # Strikethrough text.
 485 |   - case: strikethrough
 486 |     input: |
 487 |       <del>strikethrough text</del>
 488 |       <s>strikethrough text</s>
 489 |     out:
 490 |       type: document
 491 |       content:
 492 |         - type: paragraph
 493 |           content:
 494 |             - type: formatted
 495 |               markup: strikethrough
 496 |               content:
 497 |                 - type: text
 498 |                   text: strikethrough text
 499 |             - type: text
 500 |               text: " "
 501 |             - type: formatted
 502 |               markup: strikethrough
 503 |               content:
 504 |                 - type: text
 505 |                   text: strikethrough text
 506 |                       
 507 | # Definition markup
 508 |   - case: definition markup
 509 |     input: <dfn title="Riemannsumme">Riemannsumme</dfn>
 510 |     out:
 511 |       type: document
 512 |       content:
 513 |         - type: paragraph
 514 |           content:
 515 |           - type: htmltag
 516 |             name: dfn
 517 |             attributes:
 518 |               - key: title
 519 |                 value: Riemannsumme
 520 |             content:
 521 |               - type: text
 522 |                 text: Riemannsumme
 523 |                       
 524 | # Underline text
 525 |   - case: underline markup
 526 |     input: <ins>Inserted</ins>
 527 |     out:
 528 |       type: document
 529 |       content:
 530 |         - type: paragraph
 531 |           content:
 532 |             - type: formatted
 533 |               markup: underline
 534 |               content:
 535 |                 - type: text
 536 |                   text: Inserted
 537 |                       
 538 | # Fixed-width text
 539 |   - case: fixed width markup
 540 |     input: <code>Fixed width text</code>
 541 |     out:
 542 |       type: document
 543 |       content:
 544 |         - type: paragraph
 545 |           content:
 546 |             - type: formatted
 547 |               markup: code
 548 |               content:
 549 |                 - type: text
 550 |                   text: Fixed width text
 551 |                       
 552 | # Blockquote
 553 |   - case: blockquote markup
 554 |     input: <blockquote>Blockquote</blockquote>
 555 |     out:
 556 |       type: document
 557 |       content:
 558 |         - type: paragraph
 559 |           content:
 560 |           - type: formatted
 561 |             markup: blockquote
 562 |             content:
 563 |               - type: text
 564 |                 text: Blockquote
 565 |                       
 566 | # Pre-formatted Text
 567 |   - case: pre formatted text
 568 |     input: |
 569 |       <pre>Text is '''preformatted''' and 
 570 |       ''markups'' '''''cannot''''' be done</pre>
 571 |     out:
 572 |       type: document
 573 |       content:
 574 |         - type: paragraph
 575 |           content:
 576 |             - type: formatted
 577 |               markup: preformatted
 578 |               content:
 579 |                 - type: text
 580 |                   text: "Text is '''preformatted''' and \n''markups'' '''''cannot''''' be done"
 581 |                       
 582 | # A simple list of one item.
 583 |   - case: list one item
 584 |     input: "* item"
 585 |     out:
 586 |       type: document
 587 |       content:
 588 |         - type: list
 589 |           content:
 590 |             - type: listitem
 591 |               kind: unordered
 592 |               depth: 1
 593 |               content:
 594 |                 - type: text
 595 |                   text: item
 596 | 
 597 | # A ordered list and a unordered list of one item
 598 |   - case: two simple lists
 599 |     input: |
 600 |         * item 1
 601 |         
 602 |         ## item 2
 603 |     out:
 604 |       type: document
 605 |       content:
 606 |         - type: list
 607 |           content:
 608 |             - type: listitem
 609 |               kind: unordered
 610 |               depth: 1
 611 |               content:
 612 |                 - type: text
 613 |                   text: item 1
 614 |         - type: list
 615 |           content:
 616 |             - type: listitem
 617 |               kind: ordered
 618 |               depth: 2
 619 |               content:
 620 |                 - type: text
 621 |                   text: item 2
 622 | 
 623 | 
 624 | # Star, Fence, Semicolon should be considered as text in an inline context.
 625 |   - case: list symbols inline context
 626 |     input: |
 627 |         * item 1 #;* bla
 628 |         abc # def *
 629 |     out:
 630 |       type: document
 631 |       content:
 632 |         - type: list
 633 |           content:
 634 |             - type: listitem
 635 |               kind: unordered
 636 |               depth: 1
 637 |               content:
 638 |                 - type: text
 639 |                   text: "item 1 #;* bla"
 640 |         - type: paragraph
 641 |           content:
 642 |             - type: text
 643 |               text: "abc # def *"
 644 | 
 645 | # A list with multiple different item types and a paragraph
 646 |   - case: list diverse items
 647 |     input: |
 648 |         : item 1
 649 |         ; item 11
 650 |         *** item 2
 651 |         ## item 3
 652 |         paragraph
 653 |     out:
 654 |       type: document
 655 |       content:
 656 |         - type: list
 657 |           content:
 658 |             - type: listitem
 659 |               kind: definition
 660 |               depth: 1
 661 |               content:
 662 |                 - type: text
 663 |                   text: item 1
 664 |             - type: listitem
 665 |               kind: definitionterm
 666 |               depth: 1
 667 |               content:
 668 |                 - type: text
 669 |                   text: item 11
 670 |                 - type: list
 671 |                   content:
 672 |                     - type: listitem
 673 |                       depth: 2
 674 |                       kind: unordered 
 675 |                       content:
 676 |                         - type: list
 677 |                           content:
 678 |                             - type: listitem
 679 |                               depth: 3
 680 |                               kind: unordered 
 681 |                               content:
 682 |                                 - type: text
 683 |                                   text: item 2
 684 |                     - type: listitem
 685 |                       depth: 2
 686 |                       kind: ordered 
 687 |                       content:
 688 |                         - type: text
 689 |                           text: item 3
 690 |         - type: paragraph
 691 |           content:
 692 |             - type: text
 693 |               text: paragraph
 694 | 
 695 | # Even inside of templates, lists must start on a new line
 696 |   - case: list in template
 697 |     input: |
 698 |         {{test|
 699 |         * item 1
 700 |         * item 2}}
 701 |     out:
 702 |       type: document
 703 |       content:
 704 |         - type: template
 705 |           name:
 706 |             - type: text
 707 |               text: test
 708 |           content:
 709 |             - type: templateargument
 710 |               name: "1"
 711 |               value:
 712 |                 - type: list
 713 |                   content:
 714 |                     - type: listitem
 715 |                       kind: unordered
 716 |                       depth: 1
 717 |                       content:
 718 |                         - type: text
 719 |                           text: item 1
 720 |                     - type: listitem
 721 |                       kind: unordered
 722 |                       depth: 1
 723 |                       content:
 724 |                         - type: text
 725 |                           text: item 2
 726 | 
 727 | # Lists cannot be started mid-line.
 728 |   - case: list mod line
 729 |     input: |
 730 |         {{test| this * is <a> * a </a> test}}
 731 |         [[this ** '' * as''<u> * as</u> well]]
 732 |     out:
 733 |       type: document
 734 |       content:
 735 |         - type: template
 736 |           name:
 737 |             - type: text
 738 |               text: test
 739 |           content:
 740 |             - type: templateargument
 741 |               name: "1"
 742 |               value:
 743 |                 - type: text
 744 |                   text: "this * is "
 745 |                 - type: htmltag
 746 |                   name: a
 747 |                   attributes: []
 748 |                   content: 
 749 |                     - type: text
 750 |                       text: " * a "
 751 |                 - type: text
 752 |                   text: " test"
 753 |         - type: internalreference
 754 |           target:
 755 |             - type: text
 756 |               text: "this ** "
 757 |             - type: formatted
 758 |               markup: italic
 759 |               content:
 760 |                 - type: text
 761 |                   text: " * as"
 762 |             - type: formatted
 763 |               markup: underline
 764 |               content:
 765 |                 - type: text
 766 |                   text: " * as"
 767 |             - type: text
 768 |               text: " well"
 769 |           options: []
 770 |           caption: []
 771 |                
 772 | # A very simple template
 773 |   - case: simple template
 774 |     input: "{{name}}"
 775 |     out:
 776 |       type: document
 777 |       content:
 778 |         - type: template
 779 |           name:
 780 |             - type: text
 781 |               text: name
 782 |           content: []
 783 | 
 784 | # A sequence of block templates
 785 |   - case: block template sequence
 786 |     input: |
 787 |         {{name}}
 788 | 
 789 |         {{name}}
 790 |     out:
 791 |       type: document
 792 |       content:
 793 |         - type: template
 794 |           name:
 795 |             - type: text
 796 |               text: name
 797 |           content: []
 798 |         - type: template
 799 |           name:
 800 |             - type: text
 801 |               text: name
 802 |           content: []
 803 | 
 804 | # A sequence of inline templates
 805 |   - case: inline template sequence
 806 |     input: |
 807 |         bla {{name}} and a {{name}}
 808 |     out:
 809 |       type: document
 810 |       content:
 811 |         - type: paragraph
 812 |           content:
 813 |             - type: text
 814 |               text: "bla "
 815 |             - type: template
 816 |               name:
 817 |                 - type: text
 818 |                   text: name
 819 |               content: []
 820 |             - type: text
 821 |               text: " and a "
 822 |             - type: template
 823 |               name:
 824 |                 - type: text
 825 |                   text: name
 826 |               content: []
 827 | 
 828 | 
 829 | # A template with list-like name
 830 |   - case: template listlike name
 831 |     input: |
 832 |         {{:name
 833 |         
 834 |         }}
 835 |     out:
 836 |       type: document
 837 |       content:
 838 |         - type: template
 839 |           name:
 840 |             - type: text
 841 |               text: ":name"
 842 |           content: []
 843 | 
 844 | 
 845 | # A sequence of unnamed template arguments
 846 |   - case: anonymous attribute sequence
 847 |     input: |
 848 |         {{templatename
 849 |         |attribute2
 850 |         |atträöüß3
 851 |         |attribute4
 852 |         }}
 853 |     out:
 854 |       type: document
 855 |       content:
 856 |         - type: template
 857 |           name:
 858 |             - type: text
 859 |               text: templatename
 860 |           content:
 861 |             - type: templateargument
 862 |               name: "1"
 863 |               value:
 864 |                 - type: paragraph
 865 |                   content:
 866 |                     - type: text
 867 |                       text: attribute2
 868 |             - type: templateargument
 869 |               name: "2"
 870 |               value:
 871 |                 - type: paragraph
 872 |                   content:
 873 |                     - type: text
 874 |                       text: atträöüß3
 875 |             - type: templateargument
 876 |               name: "3"
 877 |               value:
 878 |                 - type: paragraph
 879 |                   content:
 880 |                     - type: text
 881 |                       text: attribute4
 882 | 
 883 | # A named argument
 884 |   - case: named argument
 885 |     input: "{{name|caption=üäö test}}"
 886 |     out:
 887 |       type: document
 888 |       content:
 889 |         - type: template
 890 |           name:
 891 |             - type: text
 892 |               text: name
 893 |           content:
 894 |             - type: templateargument
 895 |               name: caption
 896 |               value:
 897 |                 - type: text
 898 |                   text: üäö test
 899 | 
 900 | # Multiple named template arguments
 901 |   - case: multiple named template arguments
 902 |     input: "{{templatename|äöütem=2|item3=3|item4=4}}"
 903 |     out:
 904 |       type: document
 905 |       content:
 906 |         - type: template
 907 |           name:
 908 |             - type: text
 909 |               text: templatename
 910 |           content:
 911 |             - type: templateargument
 912 |               name: äöütem
 913 |               value:
 914 |                 - type: text
 915 |                   text: "2"
 916 |             - type: templateargument
 917 |               name: item3
 918 |               value:
 919 |                 - type: text
 920 |                   text: "3"
 921 |             - type: templateargument
 922 |               name: item4
 923 |               value:
 924 |                 - type: text
 925 |                   text: "4"
 926 | 
 927 | # Mixed named and unnamed arguments
 928 |   - case: mixed template arguments
 929 |     input: "{{template1|item2=2|item3=3|item4}}"
 930 |     out:
 931 |       type: document
 932 |       content:
 933 |         - type: template
 934 |           name:
 935 |             - type: text
 936 |               text: template1
 937 |           content:
 938 |             - type: templateargument
 939 |               name: item2
 940 |               value:
 941 |                 - type: text
 942 |                   text: "2"
 943 |             - type: templateargument
 944 |               name: item3
 945 |               value:
 946 |                 - type: text
 947 |                   text: "3"
 948 |             - type: templateargument
 949 |               name: "1"
 950 |               value:
 951 |                 - type: text
 952 |                   text: "item4"
 953 | 
 954 | # Nested templates
 955 |   - case: nested templates
 956 |     input: "{{Thankyou in {{preferred language}}|signature=Me}}"
 957 |     out:
 958 |       type: document
 959 |       content:
 960 |         - type: template
 961 |           name:
 962 |             - type: text
 963 |               text: "Thankyou in "
 964 |             - type: template
 965 |               name: 
 966 |                 - type: text
 967 |                   text: "preferred language"
 968 |               content: []
 969 |           content:
 970 |             - type: templateargument
 971 |               name: signature
 972 |               value:
 973 |                 - type: text
 974 |                   text: Me
 975 | 
 976 | # A simple internal reference
 977 |   - case: simple internal ref
 978 |     input: "[[File:Abc]]"
 979 |     out:
 980 |       type: document
 981 |       content:
 982 |         - type: internalreference
 983 |           target: 
 984 |             - type: text
 985 |               text: File:Abc
 986 |           options: []
 987 |           caption: []
 988 | 
 989 | # An empty internal reference
 990 |   - case: empty internal ref
 991 |     input: "[[]]"
 992 |     out:
 993 |       type: document
 994 |       content:
 995 |         - type: internalreference
 996 |           target: []
 997 |           options: []
 998 |           caption: []
 999 | 
1000 | # A simple internal reference with caption
1001 |   - case: internal ref with caption
1002 |     input: "[[File:Abc|this is a caption]]"
1003 |     out:
1004 |       type: document
1005 |       content:
1006 |         - type: internalreference
1007 |           target: 
1008 |             - type: text
1009 |               text: File:Abc
1010 |           options: []
1011 |           caption: 
1012 |             - type: text
1013 |               text: this is a caption
1014 | 
1015 | # A simple internal reference with options
1016 |   - case: internal ref with options
1017 |     input: "[[File:Abc|opt1=value1|opt2=123|this is a caption with '''bold and |special|''' markup]]"
1018 |     out:
1019 |       type: document
1020 |       content:
1021 |         - type: internalreference
1022 |           target: 
1023 |             - type: text
1024 |               text: "File:Abc"
1025 |           options: 
1026 |             - - type: text
1027 |                 text: "opt1=value1"
1028 |             - - type: text
1029 |                 text: "opt2=123"
1030 |           caption: 
1031 |             - type: text
1032 |               text: "this is a caption with "
1033 |             - type: formatted
1034 |               markup: bold
1035 |               content:
1036 |                 - type: text
1037 |                   text: "bold and |special|"
1038 |             - type: text
1039 |               text: " markup"                     
1040 | 
1041 | # Simple table with one cell
1042 |   - case: single cell table
1043 |     input: |
1044 |         {|
1045 |         | attributevalue = "test" | test
1046 |         |}
1047 |     out:
1048 |       type: document
1049 |       content:
1050 |         - type: table
1051 |           attributes: []
1052 |           caption_attributes: []
1053 |           caption: []
1054 |           rows:
1055 |             - type: tablerow
1056 |               attributes: []
1057 |               cells:
1058 |                 - type: tablecell
1059 |                   attributes: 
1060 |                     - key: attributevalue
1061 |                       value: test
1062 |                   header: false
1063 |                   content:
1064 |                     - type: paragraph
1065 |                       content: 
1066 |                         - type: text
1067 |                           text: test
1068 |  
1069 |  # Simple table with caption
1070 |   - case: table caption
1071 |     input: |
1072 |         {|
1073 |         |+ caption_attribute=value | this is a ''caption''
1074 |         | attributevalue = "test" | test
1075 |         |}
1076 |     out:
1077 |       type: document
1078 |       content:
1079 |         - type: table
1080 |           attributes: []
1081 |           caption_attributes:
1082 |             - key: caption_attribute
1083 |               value: value
1084 |           caption:
1085 |             - type: paragraph
1086 |               content:
1087 |                 - type: text
1088 |                   text: "this is a "
1089 |                 - type: formatted
1090 |                   markup: italic
1091 |                   content:
1092 |                     - type: text
1093 |                       text: "caption"
1094 |           rows:
1095 |             - type: tablerow
1096 |               attributes: []
1097 |               cells:
1098 |                 - type: tablecell
1099 |                   attributes:
1100 |                     - key: attributevalue 
1101 |                       value: test
1102 |                   header: false
1103 |                   content:
1104 |                     - type: paragraph
1105 |                       content: 
1106 |                         - type: text
1107 |                           text: test
1108 | 
1109 |  # simple heading in template (mediawiki can't do this)
1110 |   - case: table in template
1111 |     input: |
1112 |         {{test|bla=
1113 |         {|
1114 |         |+ caption_attribute=value | this is a ''caption''
1115 |         | attributevalue = "test" | test
1116 |         |}
1117 |         }}
1118 |     out:
1119 |       type: document
1120 |       content:
1121 |         - type: template
1122 |           name:
1123 |             - type: text
1124 |               text: test
1125 |           content:
1126 |             - type: templateargument
1127 |               name: bla
1128 |               value:
1129 |               - type: table
1130 |                 attributes: []
1131 |                 caption_attributes:
1132 |                   - key: caption_attribute
1133 |                     value: value
1134 |                 caption:
1135 |                   - type: paragraph
1136 |                     content:
1137 |                       - type: text
1138 |                         text: "this is a "
1139 |                       - type: formatted
1140 |                         markup: italic
1141 |                         content:
1142 |                           - type: text
1143 |                             text: "caption"
1144 |                 rows:
1145 |                   - type: tablerow
1146 |                     attributes: []
1147 |                     cells:
1148 |                       - type: tablecell
1149 |                         attributes:
1150 |                           - key: attributevalue 
1151 |                             value: test
1152 |                         header: false
1153 |                         content:
1154 |                           - type: paragraph
1155 |                             content: 
1156 |                               - type: text
1157 |                                 text: test
1158 | 
1159 |  # simple heading in template (with hack used in real mediawiki)
1160 |   - case: table in template mediawiki hack
1161 |     input: |
1162 |         {{test|bla=
1163 |         {{(!}}
1164 |         {{!+}} caption_attribute=value {{!}} this is a ''caption''
1165 |         {{!}} attributevalue = "test" {{!}} test
1166 |         {{!)}}
1167 |         }}
1168 |     out:
1169 |       type: document
1170 |       content:
1171 |         - type: template
1172 |           name:
1173 |             - type: text
1174 |               text: test
1175 |           content:
1176 |             - type: templateargument
1177 |               name: bla
1178 |               value:
1179 |               - type: table
1180 |                 attributes: []
1181 |                 caption_attributes:
1182 |                   - key: caption_attribute
1183 |                     value: value
1184 |                 caption:
1185 |                   - type: paragraph
1186 |                     content:
1187 |                       - type: text
1188 |                         text: "this is a "
1189 |                       - type: formatted
1190 |                         markup: italic
1191 |                         content:
1192 |                           - type: text
1193 |                             text: "caption"
1194 |                 rows:
1195 |                   - type: tablerow
1196 |                     attributes: []
1197 |                     cells:
1198 |                       - type: tablecell
1199 |                         attributes:
1200 |                           - key: attributevalue 
1201 |                             value: test
1202 |                         header: false
1203 |                         content:
1204 |                           - type: paragraph
1205 |                             content: 
1206 |                               - type: text
1207 |                                 text: test
1208 | 
1209 |  # Table with multiple rows
1210 |   - case: multi row table
1211 |     input: |
1212 |         {| class="wikitable"
1213 |         |+ caption
1214 |         |-
1215 |         | attributevalue = "test" | test
1216 |         |-
1217 |         | test ''2''
1218 |         |-style="font-style: italic; color: green;"
1219 |         | test3
1220 |         | attr4=val | test4
1221 |         |}
1222 |     out:
1223 |       type: document
1224 |       content:
1225 |         - type: table
1226 |           attributes: 
1227 |             - key: class
1228 |               value: wikitable
1229 |           caption_attributes: []
1230 |           caption:
1231 |             - type: paragraph
1232 |               content:
1233 |                 - type: text
1234 |                   text: caption
1235 |           rows:
1236 |             - type: tablerow
1237 |               attributes: []
1238 |               cells:
1239 |                 - type: tablecell
1240 |                   attributes: 
1241 |                     - key: attributevalue
1242 |                       value: test
1243 |                   header: false
1244 |                   content:
1245 |                     - type: paragraph
1246 |                       content: 
1247 |                         - type: text
1248 |                           text: test
1249 |             - type: tablerow
1250 |               attributes: []
1251 |               cells:
1252 |                 - type: tablecell
1253 |                   attributes: []
1254 |                   header: false
1255 |                   content:
1256 |                     - type: paragraph
1257 |                       content: 
1258 |                         - type: text
1259 |                           text: "test "
1260 |                         - type: formatted
1261 |                           markup: italic
1262 |                           content:
1263 |                             - type: text
1264 |                               text: "2"
1265 |             - type: tablerow
1266 |               attributes: 
1267 |                 - key: style
1268 |                   value: "font-style: italic; color: green;"
1269 |               cells:
1270 |                 - type: tablecell
1271 |                   attributes: []
1272 |                   header: false
1273 |                   content:
1274 |                     - type: paragraph
1275 |                       content: 
1276 |                         - type: text
1277 |                           text: "test3"
1278 |                 - type: tablecell
1279 |                   attributes:
1280 |                     - key: attr4
1281 |                       value: val
1282 |                   header: false
1283 |                   content:
1284 |                     - type: paragraph
1285 |                       content: 
1286 |                         - type: text
1287 |                           text: "test4"
1288 |           
1289 | # Table with multiple cells on one line
1290 |   - case: table inline cells
1291 |     input: |
1292 |         {|
1293 |         | attributevalue = "test" | test || cell 2 || || attribute=3 | cell 3
1294 |         |}
1295 |     out:
1296 |       type: document
1297 |       content:
1298 |         - type: table
1299 |           attributes: []
1300 |           caption_attributes: []
1301 |           caption: []
1302 |           rows:
1303 |             - type: tablerow
1304 |               attributes: []
1305 |               cells:
1306 |                 - type: tablecell
1307 |                   attributes: 
1308 |                     - key: attributevalue
1309 |                       value: test
1310 |                   header: false
1311 |                   content:
1312 |                     - type: text
1313 |                       text: "test "
1314 |                 - type: tablecell
1315 |                   attributes: []
1316 |                   header: false
1317 |                   content:
1318 |                     - type: text
1319 |                       text: "cell 2 "
1320 |                 - type: tablecell
1321 |                   attributes: []
1322 |                   header: false
1323 |                   content: []
1324 |                 - type: tablecell
1325 |                   attributes: 
1326 |                     - key: attribute
1327 |                       value: "3"
1328 |                   header: false
1329 |                   content:
1330 |                     - type: paragraph
1331 |                       content:
1332 |                         - type: text
1333 |                           text: cell 3
1334 |           
1335 | # Table with header cells
1336 |   - case: table header cells
1337 |     input: |
1338 |         {|
1339 |         !  Orange !! attribute="test" | Apple
1340 |         |-
1341 |         |  Bread  || Pie !! ''hey!!''
1342 |         ! footer
1343 |         |}
1344 |     out:
1345 |       type: document
1346 |       content:
1347 |         - type: table
1348 |           attributes: []
1349 |           caption_attributes: []
1350 |           caption: []
1351 |           rows:
1352 |             - type: tablerow
1353 |               attributes: []
1354 |               cells:
1355 |                 - type: tablecell
1356 |                   header: true
1357 |                   attributes: []
1358 |                   content:
1359 |                     - type: text
1360 |                       text: "Orange "
1361 |                 - type: tablecell
1362 |                   header: true
1363 |                   attributes: 
1364 |                     - key: attribute
1365 |                       value: test
1366 |                   content:
1367 |                     - type: paragraph
1368 |                       content:
1369 |                         - type: text
1370 |                           text: Apple
1371 |             - type: tablerow
1372 |               attributes: []
1373 |               cells:
1374 |                 - type: tablecell
1375 |                   header: false
1376 |                   attributes: []
1377 |                   content:
1378 |                     - type: text
1379 |                       text: "Bread "
1380 |                 - type: tablecell
1381 |                   header: false
1382 |                   attributes: []
1383 |                   content:
1384 |                     - type: text
1385 |                       text: "Pie "
1386 |                 - type: tablecell
1387 |                   header: true
1388 |                   attributes: []
1389 |                   content:
1390 |                     - type: paragraph
1391 |                       content:
1392 |                         - type: formatted
1393 |                           markup: italic
1394 |                           content:
1395 |                             - type: text
1396 |                               text: "hey!!"
1397 |                 - type: tablecell
1398 |                   header: true
1399 |                   attributes: []
1400 |                   content:
1401 |                     - type: paragraph
1402 |                       content:
1403 |                         - type: text
1404 |                           text: footer
1405 | 
1406 | # a simple gallery tag
1407 |   - case: simple gallery
1408 |     input: |
1409 |         <gallery>
1410 |         File:Abc
1411 |         </gallery>
1412 |     out:
1413 |       type: document
1414 |       content:
1415 |         - type: gallery
1416 |           attributes: []
1417 |           content:
1418 |             - type: internalreference
1419 |               target: 
1420 |                 - type: text
1421 |                   text: File:Abc
1422 |               options: []
1423 |               caption: []
1424 | 
1425 | # an empty gallery
1426 |   - case: empty gallery
1427 |     input: |
1428 |         <gallery>
1429 |         </gallery>
1430 |     out:
1431 |       type: document
1432 |       content:
1433 |         - type: gallery
1434 |           attributes: []
1435 |           content: []
1436 | 
1437 | # empty gallery with whitespace
1438 |   - case: empty whitespace gallery
1439 |     input: "<gallery>  \n \n\n\n    \n\t\n </gallery>"
1440 |     out:
1441 |       type: document
1442 |       content:
1443 |         - type: gallery
1444 |           attributes: []
1445 |           content: []
1446 | 
1447 | # simple gallery with whitespace
1448 |   - case: simple whitespace gallery
1449 |     input: "<gallery>  \n \nFile:ABC \n\n    \n\t\n </gallery>"
1450 |     out:
1451 |       type: document
1452 |       content:
1453 |         - type: gallery
1454 |           attributes: []
1455 |           content: 
1456 |             - type: internalreference
1457 |               target: 
1458 |                 - type: text
1459 |                   text: "File:ABC "
1460 |               caption: []
1461 |               options: []
1462 | 
1463 | # a gallery with figure captions
1464 |   - case: caption gallery
1465 |     input: |
1466 |         <gallery>
1467 |         File:Abc|this is a figure [[caption]]
1468 |         File:This is a new file
1469 |         </gallery>
1470 |     out:
1471 |       type: document
1472 |       content:
1473 |         - type: gallery
1474 |           attributes: []
1475 |           content:
1476 |             - type: internalreference
1477 |               target: 
1478 |                 - type: text
1479 |                   text: File:Abc
1480 |               options: []
1481 |               caption:
1482 |                 - type: text
1483 |                   text: "this is a figure "
1484 |                 - type: internalreference
1485 |                   target: 
1486 |                     - type: text
1487 |                       text: caption
1488 |                   caption: []
1489 |                   options: []
1490 |             - type: internalreference
1491 |               target:
1492 |                 - type: text
1493 |                   text: File:This is a new file
1494 |               caption: []
1495 |               options: []
1496 | 
1497 | # template with a heading as content
1498 |   - case: template with heading content
1499 |     input: |
1500 |       {{noprint|
1501 |       == caption
1502 |       content
1503 |       }}
1504 |     out:
1505 |       type: document
1506 |       content:
1507 |         - type: template
1508 |           name:
1509 |             - type: text
1510 |               text: noprint
1511 |           content:
1512 |             - type: templateargument
1513 |               name: "1"
1514 |               value:
1515 |                 - type: heading
1516 |                   depth: 2
1517 |                   caption: 
1518 |                     - type: text
1519 |                       text: caption
1520 |                   content:
1521 |                     - type: paragraph
1522 |                       content:
1523 |                         - type: text
1524 |                           text: content
1525 |                             
1526 | 


--------------------------------------------------------------------------------
/linter_notes.md:
--------------------------------------------------------------------------------
1 |  - non-incrementing list item depth
2 |  - template content should only be content attributes
3 | 


--------------------------------------------------------------------------------
/src/ast.rs:
--------------------------------------------------------------------------------
  1 | /// Data structures describing the parsed document.
  2 | 
  3 | #[cfg(feature = "no_position")]
  4 | use serde::{Serialize, SerializeMap, Serializer};
  5 | use serde_derive::{Deserialize, Serialize};
  6 | 
  7 | /**
  8 |  * Element types used in the abstract syntax tree (AST).
  9 |  *
 10 |  * Each element must keep track of its position in the original
 11 |  * input document. After parsing, the document tree can be serialized by serde.
 12 |  */
 13 | #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
 14 | #[serde(tag = "type", rename_all = "lowercase", deny_unknown_fields)]
 15 | pub enum Element {
 16 |     Document(Document),
 17 |     Heading(Heading),
 18 |     Text(Text),
 19 |     Formatted(Formatted),
 20 |     Paragraph(Paragraph),
 21 |     Template(Template),
 22 |     TemplateArgument(TemplateArgument),
 23 |     InternalReference(InternalReference),
 24 |     ExternalReference(ExternalReference),
 25 |     ListItem(ListItem),
 26 |     List(List),
 27 |     Table(Table),
 28 |     TableRow(TableRow),
 29 |     TableCell(TableCell),
 30 |     Comment(Comment),
 31 |     HtmlTag(HtmlTag),
 32 |     Gallery(Gallery),
 33 |     Error(Error),
 34 | }
 35 | 
 36 | /// The document root.
 37 | #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
 38 | #[serde(rename_all = "lowercase", deny_unknown_fields)]
 39 | pub struct Document {
 40 |     #[serde(default)]
 41 |     pub position: Span,
 42 |     pub content: Vec<Element>,
 43 | }
 44 | 
 45 | /// Headings make a hierarchical document structure.
 46 | /// Headings of higher depths have other headings as parents.
 47 | #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
 48 | #[serde(rename_all = "lowercase", deny_unknown_fields)]
 49 | pub struct Heading {
 50 |     #[serde(default)]
 51 |     pub position: Span,
 52 |     pub depth: usize,
 53 |     pub caption: Vec<Element>,
 54 |     pub content: Vec<Element>,
 55 | }
 56 | 
 57 | /// Simple text.
 58 | #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
 59 | #[serde(rename_all = "lowercase", deny_unknown_fields)]
 60 | pub struct Text {
 61 |     #[serde(default)]
 62 |     pub position: Span,
 63 |     pub text: String,
 64 | }
 65 | 
 66 | /// A formatting wrapper, usually around text.
 67 | #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
 68 | #[serde(rename_all = "lowercase", deny_unknown_fields)]
 69 | pub struct Formatted {
 70 |     #[serde(default)]
 71 |     pub position: Span,
 72 |     pub markup: MarkupType,
 73 |     pub content: Vec<Element>,
 74 | }
 75 | 
 76 | /// Paragraphs are separated by newlines in the input document.
 77 | #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
 78 | #[serde(rename_all = "lowercase", deny_unknown_fields)]
 79 | pub struct Paragraph {
 80 |     #[serde(default)]
 81 |     pub position: Span,
 82 |     pub content: Vec<Element>,
 83 | }
 84 | 
 85 | /// A mediawiki template.
 86 | #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
 87 | #[serde(rename_all = "lowercase", deny_unknown_fields)]
 88 | pub struct Template {
 89 |     #[serde(default)]
 90 |     pub position: Span,
 91 |     pub name: Vec<Element>,
 92 |     pub content: Vec<Element>,
 93 | }
 94 | 
 95 | /// Argument of a mediawiki template.
 96 | /// Empty name indicate anonymous arguments.
 97 | #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
 98 | #[serde(rename_all = "lowercase", deny_unknown_fields)]
 99 | pub struct TemplateArgument {
100 |     #[serde(default)]
101 |     pub position: Span,
102 |     pub name: String,
103 |     pub value: Vec<Element>,
104 | }
105 | 
106 | /// A reference to internal data, such as embedded files
107 | /// or other articles.
108 | #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
109 | #[serde(rename_all = "lowercase", deny_unknown_fields)]
110 | pub struct InternalReference {
111 |     #[serde(default)]
112 |     pub position: Span,
113 |     pub target: Vec<Element>,
114 |     pub options: Vec<Vec<Element>>,
115 |     pub caption: Vec<Element>,
116 | }
117 | 
118 | /// External reference, usually hyperlinks.
119 | #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
120 | #[serde(rename_all = "lowercase", deny_unknown_fields)]
121 | pub struct ExternalReference {
122 |     #[serde(default)]
123 |     pub position: Span,
124 |     pub target: String,
125 |     pub caption: Vec<Element>,
126 | }
127 | 
128 | /// List item of a certain `ListItemKind`.
129 | #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
130 | #[serde(rename_all = "lowercase", deny_unknown_fields)]
131 | pub struct ListItem {
132 |     #[serde(default)]
133 |     pub position: Span,
134 |     pub depth: usize,
135 |     pub kind: ListItemKind,
136 |     pub content: Vec<Element>,
137 | }
138 | 
139 | /// List of items. The `ListItemKind` of its children
140 | /// can be heterogenous.
141 | #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
142 | #[serde(rename_all = "lowercase", deny_unknown_fields)]
143 | pub struct List {
144 |     #[serde(default)]
145 |     pub position: Span,
146 |     pub content: Vec<Element>,
147 | }
148 | 
149 | /// A mediawiki table. `attributes` represent html
150 | /// attributes assigned to the table.
151 | #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
152 | #[serde(rename_all = "lowercase", deny_unknown_fields)]
153 | pub struct Table {
154 |     #[serde(default)]
155 |     pub position: Span,
156 |     pub attributes: Vec<TagAttribute>,
157 |     pub caption: Vec<Element>,
158 |     pub caption_attributes: Vec<TagAttribute>,
159 |     pub rows: Vec<Element>,
160 | }
161 | 
162 | /// A table row. `attributes` represent html
163 | /// attributes assigned to the table.
164 | #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
165 | #[serde(rename_all = "lowercase", deny_unknown_fields)]
166 | pub struct TableRow {
167 |     #[serde(default)]
168 |     pub position: Span,
169 |     pub attributes: Vec<TagAttribute>,
170 |     pub cells: Vec<Element>,
171 | }
172 | 
173 | /// A single table cell. `attributes` represent html
174 | /// attributes assigned to the table. `header` is true
175 | /// if this cell is marked as a header cell.
176 | #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
177 | #[serde(rename_all = "lowercase", deny_unknown_fields)]
178 | pub struct TableCell {
179 |     #[serde(default)]
180 |     pub position: Span,
181 |     pub header: bool,
182 |     pub attributes: Vec<TagAttribute>,
183 |     pub content: Vec<Element>,
184 | }
185 | 
186 | /// Comments in the input document.
187 | #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
188 | #[serde(rename_all = "lowercase", deny_unknown_fields)]
189 | pub struct Comment {
190 |     #[serde(default)]
191 |     pub position: Span,
192 |     pub text: String,
193 | }
194 | 
195 | /// Html tags not encoding formatting elements.
196 | #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
197 | #[serde(rename_all = "lowercase", deny_unknown_fields)]
198 | pub struct HtmlTag {
199 |     #[serde(default)]
200 |     pub position: Span,
201 |     pub name: String,
202 |     pub attributes: Vec<TagAttribute>,
203 |     pub content: Vec<Element>,
204 | }
205 | 
206 | /// Gallery of images (or interal references in general).
207 | #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
208 | #[serde(rename_all = "lowercase", deny_unknown_fields)]
209 | pub struct Gallery {
210 |     #[serde(default)]
211 |     pub position: Span,
212 |     pub attributes: Vec<TagAttribute>,
213 |     pub content: Vec<Element>,
214 | }
215 | 
216 | /// Indicates an erroneous part of the document tree.
217 | #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
218 | #[serde(rename_all = "lowercase", deny_unknown_fields)]
219 | pub struct Error {
220 |     #[serde(default)]
221 |     pub position: Span,
222 |     pub message: String,
223 | }
224 | 
225 | /// Types of markup a section of text may have.
226 | #[derive(Debug, Serialize, Deserialize, PartialEq, Clone, Copy)]
227 | #[serde(rename_all = "lowercase")]
228 | pub enum MarkupType {
229 |     NoWiki,
230 |     Bold,
231 |     Italic,
232 |     Math,
233 |     StrikeThrough,
234 |     Underline,
235 |     Code,
236 |     Blockquote,
237 |     Preformatted,
238 | }
239 | 
240 | /// Types of markup a section of text may have.
241 | #[derive(Debug, Serialize, Deserialize, PartialEq, Clone, Copy)]
242 | #[serde(rename_all = "lowercase")]
243 | pub enum ListItemKind {
244 |     Unordered,
245 |     Definition,
246 |     DefinitionTerm,
247 |     Ordered,
248 | }
249 | 
250 | /**
251 |  * Represents a position in the source document.
252 |  *
253 |  * The `PartialEq` implementation allows for a "any" position (all zero), which is
254 |  * equal to any other position. This is used to reduce clutter in tests, where
255 |  * a default Position ("{}") can be used where the actual representation is irrelevant.
256 |  */
257 | #[derive(Debug, Serialize, Deserialize, Clone)]
258 | #[serde(
259 |     rename_all = "lowercase",
260 |     default = "Position::any_position",
261 |     deny_unknown_fields
262 | )]
263 | pub struct Position {
264 |     pub offset: usize,
265 |     pub line: usize,
266 |     pub col: usize,
267 | }
268 | 
269 | /// Holds position information (start and end) for one element
270 | #[derive(Debug, Deserialize, PartialEq, Clone)]
271 | #[cfg_attr(not(feature = "no_position"), derive(Serialize))]
272 | #[serde(rename_all = "lowercase", default = "Span::any", deny_unknown_fields)]
273 | pub struct Span {
274 |     pub start: Position,
275 |     pub end: Position,
276 | }
277 | 
278 | /// Represents a pair of html tag attribute and value.
279 | #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
280 | #[serde(rename_all = "lowercase", deny_unknown_fields)]
281 | pub struct TagAttribute {
282 |     #[serde(default)]
283 |     pub position: Span,
284 |     pub key: String,
285 |     pub value: String,
286 | }
287 | 
288 | /// Position of a source line of code.
289 | #[derive(Debug, Serialize, Deserialize, PartialEq)]
290 | pub struct SourceLine<'input> {
291 |     pub start: usize,
292 |     pub content: &'input str,
293 |     pub end: usize,
294 | }
295 | 
296 | impl<'input> SourceLine<'input> {
297 |     /// checks if `pos` is at a line start
298 |     pub fn starts_line(pos: usize, slocs: &[SourceLine]) -> bool {
299 |         for sloc in slocs {
300 |             if sloc.start == pos {
301 |                 return true;
302 |             }
303 |         }
304 |         false
305 |     }
306 | }
307 | 
308 | impl MarkupType {
309 |     /// Match an HTML tag name to it's markup type.
310 |     pub fn by_tag_name(tag: &str) -> MarkupType {
311 |         match &tag.to_lowercase()[..] {
312 |             "math" => MarkupType::Math,
313 |             "del" | "s" => MarkupType::StrikeThrough,
314 |             "nowiki" => MarkupType::NoWiki,
315 |             "u" | "ins" => MarkupType::Underline,
316 |             "code" => MarkupType::Code,
317 |             "blockquote" => MarkupType::Blockquote,
318 |             "pre" => MarkupType::Preformatted,
319 |             _ => panic!("markup type lookup not implemented for {}!", tag),
320 |         }
321 |     }
322 | }
323 | 
324 | impl Element {
325 |     /// returns the source code position of an element.
326 |     pub fn get_position(&self) -> &Span {
327 |         match *self {
328 |             Element::Document(ref e) => &e.position,
329 |             Element::Heading(ref e) => &e.position,
330 |             Element::Text(ref e) => &e.position,
331 |             Element::Formatted(ref e) => &e.position,
332 |             Element::Paragraph(ref e) => &e.position,
333 |             Element::Template(ref e) => &e.position,
334 |             Element::TemplateArgument(ref e) => &e.position,
335 |             Element::InternalReference(ref e) => &e.position,
336 |             Element::ExternalReference(ref e) => &e.position,
337 |             Element::List(ref e) => &e.position,
338 |             Element::ListItem(ref e) => &e.position,
339 |             Element::Table(ref e) => &e.position,
340 |             Element::TableRow(ref e) => &e.position,
341 |             Element::TableCell(ref e) => &e.position,
342 |             Element::Comment(ref e) => &e.position,
343 |             Element::HtmlTag(ref e) => &e.position,
344 |             Element::Gallery(ref e) => &e.position,
345 |             Element::Error(ref e) => &e.position,
346 |         }
347 |     }
348 | 
349 |     /// returns a mutable reference the source code position of an element.
350 |     pub fn get_position_mut(&mut self) -> &mut Span {
351 |         match *self {
352 |             Element::Document(ref mut e) => &mut e.position,
353 |             Element::Heading(ref mut e) => &mut e.position,
354 |             Element::Text(ref mut e) => &mut e.position,
355 |             Element::Formatted(ref mut e) => &mut e.position,
356 |             Element::Paragraph(ref mut e) => &mut e.position,
357 |             Element::Template(ref mut e) => &mut e.position,
358 |             Element::TemplateArgument(ref mut e) => &mut e.position,
359 |             Element::InternalReference(ref mut e) => &mut e.position,
360 |             Element::ExternalReference(ref mut e) => &mut e.position,
361 |             Element::List(ref mut e) => &mut e.position,
362 |             Element::ListItem(ref mut e) => &mut e.position,
363 |             Element::Table(ref mut e) => &mut e.position,
364 |             Element::TableRow(ref mut e) => &mut e.position,
365 |             Element::TableCell(ref mut e) => &mut e.position,
366 |             Element::Comment(ref mut e) => &mut e.position,
367 |             Element::HtmlTag(ref mut e) => &mut e.position,
368 |             Element::Gallery(ref mut e) => &mut e.position,
369 |             Element::Error(ref mut e) => &mut e.position,
370 |         }
371 |     }
372 | 
373 |     /// returns the variant name of an element.
374 |     pub fn get_variant_name(&self) -> &str {
375 |         match *self {
376 |             Element::Document(_) => "Document",
377 |             Element::Heading(_) => "Heading",
378 |             Element::Text(_) => "Text",
379 |             Element::Formatted(_) => "Formatted",
380 |             Element::Paragraph(_) => "Paragraph",
381 |             Element::Template(_) => "Template",
382 |             Element::TemplateArgument(_) => "TemplateArgument",
383 |             Element::InternalReference(_) => "InternalReference",
384 |             Element::ExternalReference(_) => "ExternalReference",
385 |             Element::List(_) => "List",
386 |             Element::ListItem(_) => "ListItem",
387 |             Element::Table(_) => "Table",
388 |             Element::TableRow(_) => "TableRow",
389 |             Element::TableCell(_) => "TableCell",
390 |             Element::Comment(_) => "Comment",
391 |             Element::HtmlTag(_) => "HtmlTag",
392 |             Element::Gallery(_) => "Gallery",
393 |             Element::Error(_) => "Error",
394 |         }
395 |     }
396 | }
397 | 
398 | impl Position {
399 |     pub fn new(offset: usize, slocs: &[SourceLine]) -> Self {
400 |         for (i, sloc) in slocs.iter().enumerate() {
401 |             if offset >= sloc.start && offset < sloc.end {
402 |                 return Position {
403 |                     offset,
404 |                     line: i + 1,
405 |                     col: sloc.content[0..offset - sloc.start].chars().count() + 1,
406 |                 };
407 |             }
408 |         }
409 |         Position {
410 |             offset,
411 |             line: slocs.len() + 1,
412 |             col: 0,
413 |         }
414 |     }
415 | 
416 |     pub fn any_position() -> Self {
417 |         Position {
418 |             offset: 0,
419 |             line: 0,
420 |             col: 0,
421 |         }
422 |     }
423 | }
424 | 
425 | impl Span {
426 |     pub fn any() -> Self {
427 |         Span {
428 |             start: Position::any_position(),
429 |             end: Position::any_position(),
430 |         }
431 |     }
432 | 
433 |     pub fn new(posl: usize, posr: usize, source_lines: &[SourceLine]) -> Self {
434 |         Span {
435 |             start: Position::new(posl, source_lines),
436 |             end: Position::new(posr, source_lines),
437 |         }
438 |     }
439 | }
440 | 
441 | impl Default for Span {
442 |     fn default() -> Self {
443 |         Self::any()
444 |     }
445 | }
446 | 
447 | #[cfg(feature = "no_position")]
448 | impl Serialize for Span {
449 |     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
450 |     where
451 |         S: Serializer,
452 |     {
453 |         let map = serializer.serialize_map(None)?;
454 |         map.end()
455 |     }
456 | }
457 | 
458 | impl PartialEq for Position {
459 |     fn eq(&self, other: &Position) -> bool {
460 |         // comparing with "any" position is always true
461 |         if (other.offset == 0 && other.line == 0 && other.col == 0)
462 |             || (self.offset == 0 && self.line == 0 && self.col == 0)
463 |         {
464 |             return true;
465 |         }
466 | 
467 |         self.offset == other.offset && self.line == other.line && self.col == other.col
468 |     }
469 | }
470 | 
471 | impl TagAttribute {
472 |     pub fn new(position: Span, key: String, value: String) -> Self {
473 |         TagAttribute {
474 |             position,
475 |             key,
476 |             value,
477 |         }
478 |     }
479 | }
480 | 


--------------------------------------------------------------------------------
/src/default_transformations.rs:
--------------------------------------------------------------------------------
  1 | use crate::ast::*;
  2 | use crate::error::TransformationError;
  3 | use crate::transformations::*;
  4 | use crate::util;
  5 | use std::usize;
  6 | 
  7 | /// Settings for general transformations.
  8 | pub struct GeneralSettings {}
  9 | 
 10 | /// Moves flat headings into a hierarchical structure based on their depth.
 11 | pub fn fold_headings_transformation(mut root: Element, settings: &GeneralSettings) -> TResult {
 12 |     // append following deeper headings than current_depth in content to the result list.
 13 |     fn move_deeper_headings<'a>(
 14 |         trans: &TFuncInplace<&'a GeneralSettings>,
 15 |         root_content: &mut Vec<Element>,
 16 |         settings: &'a GeneralSettings,
 17 |     ) -> TListResult {
 18 |         let mut result = vec![];
 19 |         let mut current_heading_index = 0;
 20 | 
 21 |         // current maximum depth level, every deeper heading will be moved
 22 |         let mut current_depth = usize::MAX;
 23 | 
 24 |         for child in root_content.drain(..) {
 25 |             if let Element::Heading(cur_heading) = child {
 26 |                 if cur_heading.depth > current_depth {
 27 |                     let last = result.get_mut(current_heading_index);
 28 |                     if let Some(&mut Element::Heading(ref mut e)) = last {
 29 |                         e.content.push(Element::Heading(cur_heading));
 30 |                     }
 31 |                 } else {
 32 |                     // pick a new reference heading if the new one
 33 |                     // is equally deep or more shallow
 34 |                     current_heading_index = result.len();
 35 |                     current_depth = cur_heading.depth;
 36 |                     result.push(Element::Heading(cur_heading));
 37 |                 }
 38 |             } else {
 39 |                 if current_depth < usize::MAX {
 40 |                     return Err(TransformationError {
 41 |                         cause: "a non-heading element was found after a heading. \
 42 |                                 This should not happen."
 43 |                             .to_string(),
 44 |                         position: child.get_position().clone(),
 45 |                         transformation_name: String::from("fold_headings_transformation"),
 46 |                         tree: child.clone(),
 47 |                     });
 48 |                 }
 49 |                 result.push(child);
 50 |             }
 51 |         }
 52 | 
 53 |         // recurse transformation
 54 |         result = apply_func_drain(trans, &mut result, settings)?;
 55 |         Ok(result)
 56 |     };
 57 |     root = recurse_inplace_template(
 58 |         &fold_headings_transformation,
 59 |         root,
 60 |         settings,
 61 |         &move_deeper_headings,
 62 |     )?;
 63 |     Ok(root)
 64 | }
 65 | 
 66 | /// Moves list items of higher depth into separate sub-lists.
 67 | /// If a list is started with a deeper item than one, this transformation still applies,
 68 | /// although this should later be a linter error.
 69 | pub fn fold_lists_transformation(mut root: Element, settings: &GeneralSettings) -> TResult {
 70 |     // move list items which are deeper than the current level into new sub-lists.
 71 |     fn move_deeper_items<'a>(
 72 |         trans: &TFuncInplace<&'a GeneralSettings>,
 73 |         root_content: &mut Vec<Element>,
 74 |         settings: &'a GeneralSettings,
 75 |     ) -> TListResult {
 76 |         // the currently least deep list item, every deeper
 77 |         // list item will be moved to a new sublist
 78 |         let mut lowest_depth = usize::MAX;
 79 |         for child in &root_content[..] {
 80 |             if let Element::ListItem(ref e) = *child {
 81 |                 if e.depth < lowest_depth {
 82 |                     lowest_depth = e.depth;
 83 |                 }
 84 |             } else {
 85 |                 return Err(TransformationError {
 86 |                     cause: String::from("A list should not contain non-listitems."),
 87 |                     transformation_name: String::from("fold_lists_transformation"),
 88 |                     position: child.get_position().clone(),
 89 |                     tree: child.clone(),
 90 |                 });
 91 |             }
 92 |         }
 93 | 
 94 |         let mut result = vec![];
 95 |         // create a new sublist when encountering a lower item
 96 |         let mut create_sublist = true;
 97 | 
 98 |         for child in root_content.drain(..) {
 99 |             if let Element::ListItem(cur_item) = child {
100 |                 if cur_item.depth > lowest_depth {
101 |                     // this error is returned if the sublist to append to was not found
102 |                     let build_found_error = |origin: &ListItem| TransformationError {
103 |                         cause: "sublist was not instantiated properly.".into(),
104 |                         transformation_name: "fold_lists_transformation".into(),
105 |                         position: origin.position.clone(),
106 |                         tree: Element::ListItem(origin.clone()),
107 |                     };
108 | 
109 |                     if create_sublist {
110 |                         // create a new sublist
111 |                         create_sublist = false;
112 | 
113 |                         if result.is_empty() {
114 |                             result.push(Element::ListItem(ListItem {
115 |                                 position: cur_item.position.clone(),
116 |                                 depth: lowest_depth,
117 |                                 kind: cur_item.kind,
118 |                                 content: vec![],
119 |                             }));
120 |                         }
121 |                         if let Some(&mut Element::ListItem(ref mut last)) = result.last_mut() {
122 |                             last.content.push(Element::List(List {
123 |                                 position: cur_item.position.clone(),
124 |                                 content: vec![],
125 |                             }));
126 |                         } else {
127 |                             return Err(build_found_error(&cur_item));
128 |                         }
129 |                     }
130 | 
131 |                     if let Some(&mut Element::ListItem(ref mut item)) = result.last_mut() {
132 |                         if let Some(&mut Element::List(ref mut l)) = item.content.last_mut() {
133 |                             l.content.push(Element::ListItem(cur_item));
134 |                         } else {
135 |                             return Err(build_found_error(&cur_item));
136 |                         }
137 |                     } else {
138 |                         return Err(build_found_error(&cur_item));
139 |                     }
140 |                 } else {
141 |                     result.push(Element::ListItem(cur_item));
142 |                     create_sublist = true;
143 |                 }
144 |             } else {
145 |                 result.push(child);
146 |             };
147 |         }
148 |         result = apply_func_drain(trans, &mut result, settings)?;
149 |         Ok(result)
150 |     };
151 | 
152 |     if let Element::List { .. } = root {
153 |         root = recurse_inplace_template(
154 |             &fold_lists_transformation,
155 |             root,
156 |             settings,
157 |             &move_deeper_items,
158 |         )?;
159 |     } else {
160 |         root = recurse_inplace(&fold_lists_transformation, root, settings)?;
161 |     };
162 |     Ok(root)
163 | }
164 | 
165 | /// Transform whitespace-only paragraphs to empty paragraphs.
166 | pub fn whitespace_paragraphs_to_empty(mut root: Element, settings: &GeneralSettings) -> TResult {
167 |     if let Element::Paragraph(ref mut par) = root {
168 |         let mut is_only_whitespace = true;
169 |         for child in &par.content[..] {
170 |             if let Element::Text(ref text) = *child {
171 |                 if !util::is_whitespace(&text.text) {
172 |                     is_only_whitespace = false;
173 |                     break;
174 |                 }
175 |             } else {
176 |                 is_only_whitespace = false;
177 |                 break;
178 |             }
179 |         }
180 |         if is_only_whitespace {
181 |             par.content.drain(..);
182 |         }
183 |     };
184 |     root = recurse_inplace(&whitespace_paragraphs_to_empty, root, settings)?;
185 |     Ok(root)
186 | }
187 | 
188 | /// Reduce consecutive paragraphs and absorb trailing text into one,
189 | /// if not separated by a blank paragraph.
190 | pub fn collapse_paragraphs(
191 |     mut root: Element,
192 |     settings: &GeneralSettings,
193 | ) -> Result<Element, TransformationError> {
194 |     fn squash_empty_paragraphs<'a>(
195 |         trans: &TFuncInplace<&'a GeneralSettings>,
196 |         root_content: &mut Vec<Element>,
197 |         settings: &'a GeneralSettings,
198 |     ) -> TListResult {
199 |         let mut result = vec![];
200 |         let mut last_empty = false;
201 | 
202 |         for mut child in root_content.drain(..) {
203 |             if let Element::Paragraph(ref mut par) = child {
204 |                 if par.content.is_empty() {
205 |                     last_empty = true;
206 |                     continue;
207 |                 }
208 | 
209 |                 // if the last paragraph was not empty, append to it.
210 |                 if !last_empty {
211 |                     if let Some(&mut Element::Paragraph(ref mut last)) = result.last_mut() {
212 |                         // Add a space on line break
213 |                         last.content.push(Element::Text(Text {
214 |                             text: " ".into(),
215 |                             position: last.position.clone(),
216 |                         }));
217 |                         last.content.append(&mut par.content);
218 |                         last.position.end = par.position.end.clone();
219 |                         continue;
220 |                     }
221 |                 }
222 |             };
223 | 
224 |             result.push(child);
225 |             last_empty = false;
226 |         }
227 |         result = apply_func_drain(trans, &mut result, settings)?;
228 |         Ok(result)
229 |     }
230 |     root = recurse_inplace_template(
231 |         &collapse_paragraphs,
232 |         root,
233 |         settings,
234 |         &squash_empty_paragraphs,
235 |     )?;
236 |     Ok(root)
237 | }
238 | 
239 | /// Collapse consecutive text tags into one, removing duplicate whitespace.
240 | pub fn collapse_consecutive_text(
241 |     mut root: Element,
242 |     settings: &GeneralSettings,
243 | ) -> Result<Element, TransformationError> {
244 |     fn squash_text<'a>(
245 |         trans: &TFuncInplace<&'a GeneralSettings>,
246 |         root_content: &mut Vec<Element>,
247 |         settings: &'a GeneralSettings,
248 |     ) -> TListResult {
249 |         let mut result = vec![];
250 | 
251 |         for mut child in root_content.drain(..) {
252 |             if let Element::Text(ref mut text) = child {
253 |                 if let Some(&mut Element::Text(ref mut last)) = result.last_mut() {
254 |                     if util::is_whitespace(&text.text) {
255 |                         last.text.push(' ');
256 |                     } else {
257 |                         last.text.push_str(&text.text);
258 |                     }
259 |                     last.position.end = text.position.end.clone();
260 |                     continue;
261 |                 }
262 |             };
263 |             result.push(child);
264 |         }
265 |         result = apply_func_drain(trans, &mut result, settings)?;
266 |         Ok(result)
267 |     }
268 |     root = recurse_inplace_template(&collapse_consecutive_text, root, settings, &squash_text)?;
269 |     Ok(root)
270 | }
271 | 
272 | /// Enumerate anonymous template arguments as "1", "2", ...
273 | pub fn enumerate_anon_args(mut root: Element, settings: &GeneralSettings) -> TResult {
274 |     if let Element::Template(ref mut template) = root {
275 |         let mut counter = 1;
276 |         for child in &mut template.content {
277 |             if let Element::TemplateArgument(ref mut arg) = *child {
278 |                 if arg.name.trim().is_empty() {
279 |                     arg.name.clear();
280 |                     arg.name.push_str(&counter.to_string());
281 |                     counter += 1;
282 |                 }
283 |             }
284 |         }
285 |     };
286 |     recurse_inplace(&enumerate_anon_args, root, settings)
287 | }
288 | 
289 | // taken from https://github.com/portstrom/parse_wiki_text/blob/master/src/default.rs
290 | const PROTOCOLS: [&str; 28] = [
291 |     "//",
292 |     "bitcoin:",
293 |     "ftp://",
294 |     "ftps://",
295 |     "geo:",
296 |     "git://",
297 |     "gopher://",
298 |     "http://",
299 |     "https://",
300 |     "irc://",
301 |     "ircs://",
302 |     "magnet:",
303 |     "mailto:",
304 |     "mms://",
305 |     "news:",
306 |     "nntp://",
307 |     "redis://",
308 |     "sftp://",
309 |     "sip:",
310 |     "sips:",
311 |     "sms:",
312 |     "ssh://",
313 |     "svn://",
314 |     "tel:",
315 |     "telnet://",
316 |     "urn:",
317 |     "worldwind://",
318 |     "xmpp:",
319 | ];
320 | 
321 | /// only keep external references with actual urls
322 | pub fn validate_external_refs(mut root: Element, settings: &GeneralSettings) -> TResult {
323 |     fn validate_erefs_vec<'a>(
324 |         trans: &TFuncInplace<&'a GeneralSettings>,
325 |         root_content: &mut Vec<Element>,
326 |         settings: &'a GeneralSettings,
327 |     ) -> TListResult {
328 |         let mut result = vec![];
329 | 
330 |         for mut child in root_content.drain(..) {
331 |             if let Element::ExternalReference(ref mut eref) = child {
332 |                 let is_uri = PROTOCOLS.iter().any(|p| eref.target.trim().starts_with(p));
333 |                 if is_uri {
334 |                     eref.target = eref.target.trim().to_string();
335 |                     result.push(child);
336 |                 } else {
337 |                     result.push(Element::Text(Text {
338 |                         position: Span {
339 |                             start: eref.position.start.clone(),
340 |                             end: eref
341 |                                 .caption
342 |                                 .iter()
343 |                                 .next()
344 |                                 .map(|c| c.get_position().start.clone())
345 |                                 .unwrap_or(eref.position.end.clone()),
346 |                         },
347 |                         text: format!("[{}", eref.target),
348 |                     }));
349 |                     result.append(&mut eref.caption);
350 |                     result.push(Element::Text(Text {
351 |                         position: Span {
352 |                             start: {
353 |                                 let mut s = eref.position.end.clone();
354 |                                 s.col -= 1;
355 |                                 s.offset -= 1;
356 |                                 s
357 |                             },
358 |                             end: eref.position.end.clone(),
359 |                         },
360 |                         text: "]".to_string(),
361 |                     }));
362 |                 }
363 |             } else {
364 |                 result.push(child);
365 |             }
366 |         }
367 |         result = apply_func_drain(trans, &mut result, settings)?;
368 |         Ok(result)
369 |     }
370 |     root = recurse_inplace_template(&validate_external_refs, root, settings, &validate_erefs_vec)?;
371 |     Ok(root)
372 | }
373 | 


--------------------------------------------------------------------------------
/src/error.rs:
--------------------------------------------------------------------------------
  1 | //! Error structures
  2 | 
  3 | use crate::ast::{Element, Position, Span};
  4 | use crate::grammar;
  5 | use crate::util::{get_source_lines, is_whitespace, shorten_str};
  6 | use colored::*;
  7 | use serde_derive::{Deserialize, Serialize};
  8 | use std::error;
  9 | use std::fmt;
 10 | 
 11 | /// The number of lines to display as error context.
 12 | const ERROR_CONTEXT_LINES: usize = 5;
 13 | 
 14 | /// Generic error type for high-level errors of this libaray.
 15 | #[derive(Debug, Serialize, Deserialize, PartialEq)]
 16 | #[serde(rename_all = "lowercase", deny_unknown_fields)]
 17 | pub enum MWError {
 18 |     ParseError(ParseError),
 19 |     TransformationError(TransformationError),
 20 | }
 21 | 
 22 | /// The parser error with source code context.
 23 | #[derive(Debug, Serialize, Deserialize, PartialEq)]
 24 | #[serde(rename_all = "lowercase", deny_unknown_fields)]
 25 | pub struct ParseError {
 26 |     pub position: Position,
 27 |     pub expected: Vec<String>,
 28 |     pub context: Vec<String>,
 29 |     pub context_start: usize,
 30 |     pub context_end: usize,
 31 | }
 32 | 
 33 | /// Error structure for syntax tree transformations.
 34 | #[derive(Debug, Serialize, Deserialize, PartialEq)]
 35 | #[serde(rename_all = "lowercase", deny_unknown_fields)]
 36 | pub struct TransformationError {
 37 |     pub cause: String,
 38 |     pub position: Span,
 39 |     pub transformation_name: String,
 40 |     pub tree: Element,
 41 | }
 42 | 
 43 | impl ParseError {
 44 |     pub fn from(err: &grammar::ParseError, input: &str) -> Self {
 45 |         let source_lines = get_source_lines(input);
 46 |         let line_count = source_lines.len();
 47 | 
 48 |         let line = if err.line <= line_count {
 49 |             err.line
 50 |         } else {
 51 |             source_lines.len()
 52 |         } - 1;
 53 | 
 54 |         let start = if line < ERROR_CONTEXT_LINES {
 55 |             0
 56 |         } else {
 57 |             line - ERROR_CONTEXT_LINES
 58 |         };
 59 | 
 60 |         let end = if line + ERROR_CONTEXT_LINES >= line_count {
 61 |             line_count - 1
 62 |         } else {
 63 |             line + ERROR_CONTEXT_LINES
 64 |         };
 65 | 
 66 |         let mut token_str = vec![];
 67 |         for token in &err.expected {
 68 |             token_str.push(String::from(*token));
 69 |         }
 70 | 
 71 |         let mut context = vec![];
 72 |         for sloc in source_lines[start..=end].iter() {
 73 |             context.push(String::from(sloc.content));
 74 |         }
 75 | 
 76 |         ParseError {
 77 |             position: Position::new(err.offset, &source_lines),
 78 |             context,
 79 |             expected: token_str,
 80 |             context_start: start,
 81 |             context_end: end,
 82 |         }
 83 |     }
 84 | }
 85 | 
 86 | impl error::Error for ParseError {
 87 |     fn description(&self) -> &str {
 88 |         "Could not continue to parse, because no rules could be matched."
 89 |     }
 90 | }
 91 | 
 92 | impl fmt::Display for ParseError {
 93 |     fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
 94 |         let error_message = format!(
 95 |             "ERROR in line {} at column {}: Could not continue to parse, expected one of: ",
 96 |             self.position.line, self.position.col
 97 |         )
 98 |         .red()
 99 |         .bold();
100 | 
101 |         let mut token_str = vec![];
102 |         for token in &self.expected {
103 |             if is_whitespace(token) {
104 |                 token_str.push(format!("{:?}", token));
105 |             } else {
106 |                 token_str.push(token.to_string());
107 |             }
108 |         }
109 | 
110 |         write!(f, "{}", error_message)?;
111 |         writeln!(f, "{}", token_str.join(", ").blue().bold())?;
112 | 
113 |         for (i, content) in self.context.iter().enumerate() {
114 |             let lineno = format!("{} |", self.context_start + i + 1);
115 |             let lineno_col;
116 | 
117 |             let formatted_content;
118 |             // the erroneous line
119 |             if self.context_start + i + 1 == self.position.line {
120 |                 formatted_content = content.red();
121 |                 lineno_col = lineno.red().bold();
122 |             } else {
123 |                 formatted_content = shorten_str(content).normal();
124 |                 lineno_col = lineno.blue().bold()
125 |             }
126 | 
127 |             writeln!(f, "{} {}", lineno_col, formatted_content)?;
128 |         }
129 | 
130 |         Ok(())
131 |     }
132 | }
133 | 
134 | impl error::Error for TransformationError {
135 |     fn description(&self) -> &str {
136 |         &self.cause
137 |     }
138 | }
139 | 
140 | impl fmt::Display for TransformationError {
141 |     fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
142 |         let message = format!(
143 |             "ERROR applying transformation \"{}\" to Elemtn at {}:{} to {}:{}: {}",
144 |             self.transformation_name,
145 |             self.position.start.line,
146 |             self.position.start.col,
147 |             self.position.end.line,
148 |             self.position.end.col,
149 |             self.cause
150 |         );
151 |         writeln!(f, "{}", message.red().bold())
152 |     }
153 | }
154 | 
155 | impl error::Error for MWError {
156 |     fn description(&self) -> &str {
157 |         match *self {
158 |             MWError::ParseError(ref e) => e.description(),
159 |             MWError::TransformationError(ref e) => e.description(),
160 |         }
161 |     }
162 | }
163 | 
164 | impl fmt::Display for MWError {
165 |     fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
166 |         match *self {
167 |             MWError::ParseError(ref e) => write!(f, "{}", e),
168 |             MWError::TransformationError(ref e) => write!(f, "{}", e),
169 |         }
170 |     }
171 | }
172 | 


--------------------------------------------------------------------------------
/src/grammar.rs:
--------------------------------------------------------------------------------
1 | include!(concat!(env!("OUT_DIR"), "/grammar.rs"));
2 | 


--------------------------------------------------------------------------------
/src/grammar.rustpeg:
--------------------------------------------------------------------------------
  1 | use crate::ast::*;
  2 | use crate::util::combine;
  3 | 
  4 | #![arguments(source_lines: &[SourceLine])]
  5 | 
  6 | // the main document entry point.
  7 | pub document -> Element 
  8 |     = posl:#position top:paragraph* content:heading* EOF posr:#position 
  9 | {
 10 |     let mut res = top;
 11 |     let mut content = content;
 12 |     res.append(&mut content);
 13 | 
 14 |     Element::Document(Document {
 15 |         position: Span::new(posl, posr, source_lines),
 16 |         content: res, 
 17 |     })
 18 | }
 19 | 
 20 | head_fmt -> Element
 21 |     = FormattedTextTemplate<Text<heading_char>>
 22 | 
 23 | // A heading is a caption paragraph followed by content paragraphs.
 24 | heading -> Element 
 25 |     = posl:#position d:$('='+) _ cap:head_fmt* _ '='* _ (nl / EOF) pars:paragraph* posr:#position
 26 | {
 27 |     Element::Heading(Heading {
 28 |         position: Span::new(posl, posr, source_lines),
 29 |         depth: d.len(),
 30 |         caption: cap,
 31 |         content: pars,
 32 |     })
 33 | }
 34 | 
 35 | // a paragraph is a block element, some or no text followed by a newline.
 36 | // The fmt_rule parameter is only applied to plain top-level text. All nested formatting
 37 | // uses the standard formatted rule. This keeps formatted text or html tags from beeing
 38 | // ripped apart.
 39 | ParagraphTemplate<fmt_rule> 
 40 |     = list
 41 |     / table
 42 |     / gallery
 43 |     / (t:template _ (nl / EOF) {t})
 44 |     / (i:internal_ref _ (nl / EOF) {i})
 45 |     / (c:html_comment _ (nl / EOF) {c})
 46 |     / posl:#position PAR_START_GUARD text:fmt_rule* _ (nl / EOF) posr:#position 
 47 | {   
 48 |     Element::Paragraph(Paragraph {
 49 |         position: Span::new(posl, posr, source_lines),
 50 |         content: text,
 51 |     })
 52 | } 
 53 | 
 54 | //  the standard paragraph
 55 | paragraph -> Element
 56 |     = ParagraphTemplate<formatted>
 57 | 
 58 | 
 59 | // === Template parsing ===
 60 | template_fmt -> Element
 61 |     = FormattedTextTemplate<Text<template_char>>
 62 | template_par -> Element
 63 |     = ParagraphTemplate<template_fmt>
 64 | 
 65 | // mediawiki templates have a name followed by a sequence of arguments.
 66 | template -> Element 
 67 |     = posl:#position !(MAGIC_WORDS) "{{" ws n:(template_fmt)* ws 
 68 |       attrs:('|' t:template_arg {t})* "}}" posr:#position
 69 | {    
 70 |     Element::Template(Template {
 71 |         position: Span::new(posl, posr, source_lines),
 72 |         name: n,
 73 |         content: attrs
 74 |     })
 75 | }
 76 | 
 77 | template_arg -> Element
 78 |     = posl:#position ws name:(n:template_arg_name ws '=' {n})? ws
 79 |       value:(h:heading* p:template_par* f:template_fmt* {(h, (p, f))}) posr:#position 
 80 | {
 81 |     Element::TemplateArgument(TemplateArgument {
 82 |         position: Span::new(posl, posr, source_lines),
 83 |         name: name.unwrap_or_default(),
 84 |         value: combine((value.0, combine(value.1)))
 85 |     })
 86 | } 
 87 | 
 88 | 
 89 | // === mediawiki lists ===
 90 | list -> Element
 91 |     = posl:#position items:(li:list_item ++ (nl / EOF) {li}) nl? posr:#position
 92 | {
 93 |     Element::List(List {
 94 |         position: Span::new(posl, posr, source_lines),
 95 |         content: items,
 96 |     })
 97 | }
 98 | 
 99 | list_item -> Element
100 |     = posl:#position s:$([*#:;]+) _ content:formatted* _ posr:#position
101 | {
102 |     let kind = match s.chars().last() {
103 |         Some('*') => ListItemKind::Unordered,
104 |         Some('#') => ListItemKind::Ordered,
105 |         Some(':') => ListItemKind::Definition,
106 |         Some(';') => ListItemKind::DefinitionTerm,
107 |         _ => panic!("undefined list start: {:?} \
108 |                     this is an implementation error!", s.chars().last())
109 |     };
110 |     Element::ListItem(ListItem {
111 |         position: Span::new(posl, posr, source_lines),
112 |         depth: s.len(),
113 |         kind,
114 |         content,
115 |     })
116 | }
117 | 
118 | 
119 | // === mediawiki tables === 
120 | 
121 | table_start = "{|" / "{{(!}}"
122 | table_end = "|}" / "{{!)}}"
123 | table_caption_sep = "|+" / "{{!+}}"
124 | table_row_sep = "|-" / "{{!-}}"
125 | table_pipe = '|' / "{{!}}"
126 | cell_sep -> &'input str
127 |     = $("||") / $("!!") / $('|') / $('!') / $("{{!}}") / $("{{!!}}")
128 | 
129 | table -> Element
130 |     = posl:#position table_start attr:table_attrs? ws caption:table_caption? 
131 |         first_cells:table_cell* rows:table_row* table_end posr:#position 
132 | { 
133 |     let (cap_attrs, cap_pars) = caption.unwrap_or_default();
134 |     let mut rows = rows;
135 |     if first_cells.len() > 0 {
136 |         rows.insert(0, Element::TableRow(TableRow {
137 |             position: Span::new(0, 0, source_lines),
138 |             cells: first_cells,
139 |             attributes: vec![],
140 |         }));
141 |     }
142 | 
143 |     Element::Table(Table {
144 |         position: Span::new(posl, posr, source_lines),
145 |         rows,
146 |         attributes: attr.unwrap_or_default(),
147 |         caption: cap_pars,
148 |         caption_attributes: cap_attrs,
149 |     })
150 | }
151 | 
152 | table_attrs -> Vec<TagAttribute>
153 |     = _ attr:(html_attr ** (whitespace+)) _ {attr}
154 | table_fmt -> Element
155 |     = !(cell_sep) FormattedTextTemplate<Text<table_char>>
156 | table_par -> Element
157 |     = ParagraphTemplate<table_fmt>
158 | 
159 | table_caption -> (Vec<TagAttribute>, Vec<Element>) 
160 |     = table_caption_sep _ attr:(t:table_attrs table_pipe {t})? _ 
161 |     pars:(p:table_par* f:table_fmt* {combine((p, f))})
162 | {
163 |     (attr.unwrap_or_default(), pars)
164 | }
165 | 
166 | 
167 | row_sep -> Vec<TagAttribute>
168 |     = table_row_sep attr:table_attrs nl {attr}
169 | 
170 | table_row -> Element
171 |     = posl:#position !(table_end) sep:row_sep c:table_cell* posr:#position 
172 | {    
173 |     Element::TableRow(TableRow {
174 |         position: Span::new(posl, posr, source_lines),
175 |         cells: c,
176 |         attributes: sep,
177 |     })
178 | }
179 | 
180 | table_cell -> Element
181 |     = posl:#position !(table_end / row_sep) sep:cell_sep 
182 |       attr:(a:table_attrs table_pipe !(table_pipe) {a})?
183 |       _ content:(p:table_par* f:table_fmt* {combine((p, f))}) posr:#position 
184 | {
185 |     Element::TableCell(TableCell {
186 |         position: Span::new(posl, posr, source_lines),
187 |         content,
188 |         attributes: attr.unwrap_or_default(),
189 |         header: sep.starts_with('!'),
190 |     })
191 | }
192 | 
193 | // === References ===
194 | 
195 | // internal references, may have pipe-separated options
196 | iref_fmt -> Element
197 |     = FormattedTextTemplate<Text<template_char>>
198 | iref_par -> Element
199 |     = ParagraphTemplate<iref_fmt>
200 | 
201 | internal_ref -> Element
202 |     = posl:#position "[[" _ tar:iref_fmt* _ "|"? _ t:(pars:iref_par* _ fmts:iref_fmt* {(pars, fmts)}) ++ (_ '|' _) "]]" posr:#position
203 | {    
204 |     let mut t = t;
205 |     let mut t: Vec<Vec<Element>> = t.drain(..).map(combine).collect();
206 |     Element::InternalReference(InternalReference {
207 |         position: Span::new(posl, posr, source_lines),
208 |         target: tar,
209 |         caption: t.pop().unwrap_or_default(),
210 |         options: t, 
211 |     })
212 | }
213 | 
214 | // external references (hyperlink) with only url and optional caption
215 | external_ref -> Element
216 |     = posl:#position '[' u:url ws:_ cap:formatted* ']' posr:#position 
217 | {
218 |     Element::ExternalReference(ExternalReference {
219 |         position: Span::new(posl, posr, source_lines),
220 |         target: format!("{}{}", u, ws),
221 |         caption: cap
222 |     })
223 | }
224 | 
225 | // === Galleries ===
226 | 
227 | gallery_sep = (_ nl _)+
228 | 
229 | gallery_file -> Element 
230 |     = flp:#position content:(f:iref_fmt+ {f}) ++ '|' frp:#position
231 | {
232 |     let mut content = content;
233 |     Element::InternalReference(InternalReference {
234 |         position: Span::new(flp, frp, source_lines),
235 |         target: content.remove(0),
236 |         caption: content.pop().unwrap_or_default(),
237 |         options: content,
238 |     })
239 | }
240 | 
241 | gallery -> Element 
242 |     = posl:#position attr:TagOpen<"gallery"i> 
243 |         ws files:(gallery_file ** gallery_sep) ws 
244 |       TagClose<"gallery"i> posr:#position 
245 | {
246 |     Element::Gallery(Gallery {
247 |         position: Span::new(posl, posr, source_lines),
248 |         attributes: attr.1,
249 |         content: files,
250 |     })
251 | }
252 | 
253 | // === Inline markup ===
254 | 
255 | // quoted formatted text cannot start with a single quote, except they are "Included" 
256 | QuoteFormattedTemplate<included> = text:((!('\'') t:formatted {t}) / included) {text}
257 | 
258 | // quote formatting cannot be nested into it self
259 | strong_formatted -> Element
260 |     = QuoteFormattedTemplate<emph>
261 | emph_formatted -> Element
262 |     = QuoteFormattedTemplate<strong>
263 | strong_par -> Element
264 |     = !(list / table / gallery) e:ParagraphTemplate<strong_formatted> {e}
265 | emph_par -> Element
266 |     = !(list / table / gallery) e:ParagraphTemplate<emph_formatted> {e}
267 | 
268 | strong -> Element
269 |     = posl:#position strong_lit
270 |         inner:(strong_par / strong_formatted)+ 
271 |       strong_lit posr:#position 
272 | {
273 |     Element::Formatted(Formatted {
274 |         position: Span::new(posl, posr, source_lines),
275 |         content: inner,
276 |         markup: MarkupType::Bold
277 |     })
278 | }
279 | 
280 | emph -> Element
281 |     = posl:#position emph_lit
282 |         inner:(emph_par / emph_formatted)+ 
283 |       emph_lit posr:#position 
284 | { 
285 |     Element::Formatted(Formatted {
286 |         position: Span::new(posl, posr, source_lines),
287 |         content: inner,
288 |         markup: MarkupType::Italic
289 |     })
290 | }
291 | 
292 | // html markup
293 | math -> Element
294 |     = inner:MarkupTag<"math"i, math_text*> {inner}
295 | strike_through -> Element
296 |     = inner:MarkupTag<"del"i, p:paragraph* f:formatted* {combine((p, f))}> {inner}
297 |     / inner:MarkupTag<"s"i, p:paragraph* f:formatted* {combine((p, f))}> {inner}
298 | underline -> Element
299 |     = inner:MarkupTag<"ins"i, p:paragraph* f:formatted* {combine((p, f))}> {inner}
300 |     / inner:MarkupTag<"u"i, p:paragraph* f:formatted* {combine((p, f))}> {inner}
301 | nowiki -> Element
302 |     = inner:MarkupTag<"nowiki"i, nowiki_text*> {inner}
303 | code -> Element
304 |     = inner:MarkupTag<"code"i, code_text*> {inner}
305 | blockquote -> Element
306 |     = inner:MarkupTag<"blockquote"i, p:paragraph* f:formatted* {combine((p, f))}> {inner}
307 | pre_formatted -> Element
308 |     = inner:MarkupTag<"pre"i, preformatted_text*> {inner}
309 | 
310 | 
311 | // Template for formatted text with a specific rule for plain text.
312 | FormattedTextTemplate<text_rule>
313 |     = fmt:(
314 |     text_rule
315 |     / strong
316 |     / emph 
317 |     / template
318 |     / internal_ref
319 |     / external_ref
320 | 
321 |     / html_comment 
322 |     / math
323 |     / nowiki
324 |     / strike_through
325 |     / underline
326 |     / code
327 |     / blockquote
328 |     / pre_formatted
329 | 
330 |     / any_tag 
331 |     / whitespace_elem
332 |     ) {fmt}
333 | 
334 | // Standard text element for most contexts
335 | formatted -> Element
336 |     = f:FormattedTextTemplate<normal_text> {f}
337 | 
338 | 
339 | // === embedded html ===
340 | 
341 | html_attr -> TagAttribute
342 |     = posl:#position key:tag_name _ '=' _ value:(quoted_text / tag_safe_literal) posr:#position 
343 | { 
344 |     TagAttribute::new(Span::new(posl, posr, source_lines), key, value)
345 | }
346 | 
347 | TagInner<name>
348 |     = n:name _ attrs:(a:html_attr _ {a})* {(n, attrs)}
349 | TagOpen<name> 
350 |     = #quiet<'<' _ inner:TagInner<name> _ '>' {inner}> / #expected("opening html tag")
351 | TagClose<name> 
352 |     = #quiet<('<' _ '/' _ TagInner<name> _ '>') / '<' _ '/' _ '>'> / #expected("closing html tag")
353 | 
354 | // a generic html tag (self-closing or with inner elements) 
355 | HtmlTag<name, inner>
356 |     = (tag:TagOpen<name> i:inner TagClose<name> {(tag.0, tag.1, i)}) 
357 |     / ("<" _ tag:TagInner<name> _ "/" _ ">" {(tag.0, tag.1, vec![])})
358 | 
359 | any_open 
360 |     = TagOpen<tag_name?> {()}
361 | any_close
362 |     = TagClose<tag_name?> {()}
363 | 
364 | // matches any valid html tag (except builtins like "gallery") 
365 | // with inner Text / Paragraph / Heading, creating a HtmlTag Element.
366 | any_tag -> Element 
367 |     = posl:#position 
368 |         t:HtmlTag<(!HTML_BLOCK_ELEMENTS n:tag_name {n}), p:paragraph* f:formatted* h:heading* {combine((p, combine((f, h))))}> 
369 |       posr:#position 
370 | { 
371 |     Element::HtmlTag(HtmlTag {
372 |         position: Span::new(posl, posr, source_lines),
373 |         name: t.0,
374 |         attributes: t.1,
375 |         content: t.2
376 |     })
377 | }
378 | 
379 | // macro for simple formatting markup tags. Matches markup type by tag name (see ast.rs)
380 | MarkupTag<name, inner>
381 |     = posl:#position tag_info:HtmlTag<$(name), inner> posr:#position 
382 | { 
383 |     Element::Formatted(Formatted {
384 |         position: Span::new(posl, posr, source_lines),
385 |         content: tag_info.2,
386 |         markup: MarkupType::by_tag_name(tag_info.0),
387 |     })
388 | }
389 | 
390 | 
391 | // html comments may contain any text.
392 | html_comment_start = "<!--"
393 | html_comment_end = "-->"
394 | 
395 | html_comment -> Element
396 |     = posl:#position html_comment_start 
397 |         s:CharString<(!(html_comment_end) c:$. {c})>? 
398 |     html_comment_end posr:#position 
399 | { 
400 |     Element::Comment(Comment {
401 |         position: Span::new(posl, posr, source_lines),
402 |         text: s.unwrap_or_default(),
403 |     })
404 | }
405 | 
406 | // === primitive terminals ===
407 | 
408 | emph_lit = "''"
409 | strong_lit = "'''"
410 | nl = '\n'
411 | EOF = #quiet<!.> / #expected("EOF")
412 | 
413 | 
414 | // === text primitives ===
415 | 
416 | Text<C> 
417 |     = posl:#position s:CharString<C> posr:#position 
418 | { 
419 |     Element::Text(Text {
420 |         position: Span::new(posl, posr, source_lines),
421 |         text: s
422 |     })
423 | }
424 | 
425 | CharString<C>
426 |     = chars:C+ { chars.iter().map(|s| s.to_owned()).collect() }
427 | 
428 | EnclosedLiteral<ClosingChar>
429 |     = ClosingChar text:CharString<!(ClosingChar) $.> ClosingChar { text }
430 | 
431 | 
432 | // === various text types ===
433 | 
434 | normal_text -> Element
435 |     = #quiet<Text<normal_char>> / #expected("normal text")
436 | math_text -> Element
437 |     = #quiet<Text<math_char>> / #expected("LaTeX source code")
438 | template_arg_name -> String
439 |     = #quiet<CharString<template_arg_char>> / #expected("template attribute name")
440 | nowiki_text -> Element
441 |     = #quiet<Text<!TagClose<"nowiki"i> $.>> / #expected("any text")
442 | code_text -> Element
443 |     = #quiet<Text<!TagClose<"code"i> $. >> / #expected("any text")
444 | preformatted_text -> Element
445 |     = #quiet<Text<!TagClose<"pre"i> $. >> / #expected ("any text")
446 | url -> String
447 |     = #quiet<CharString<url_char>> / #expected("a word of text (e.g. url)")
448 | tag_safe_literal -> String
449 |     = #quiet<CharString<tag_char>> / #expected("tag attribute value")
450 | quoted_text -> String 
451 |     = #quiet<EnclosedLiteral<'\"'> / EnclosedLiteral<'\''>> / #expected("quoted text")
452 | tag_name -> String 
453 |     = #quiet<CharString<tag_char>> / #expected("tag / attribute name")
454 | 
455 | _ -> &'input str = #quiet<w:$([ \t]*) {w}> / #expected("whitespace")
456 | ws -> &'input str = #quiet<w:$([\n\r \t]*) {w}> / #expected("whitespace (including newlines)")
457 | whitespace_elem -> Element
458 |     = Text<whitespace>
459 | 
460 | // === character classes ===
461 | // These characters are allowed within certain contexts, 
462 | // excluded characters have special meaning and break texts
463 | 
464 | math_char -> &'input str = !TagClose<"math"i> $.
465 | normal_char -> &'input str 
466 |     = !([\n\r \t{}\[\]] / emph_lit /
467 |         any_open / any_close / any_tag / html_comment_start) $.
468 | 
469 | heading_char -> &'input str 
470 |     = !('='+ _ (nl / EOF)) c:normal_char {c}
471 | table_char -> &'input str 
472 |     = !(cell_sep) c:normal_char {c}
473 | template_char -> &'input str 
474 |     = !'|' c:normal_char {c}
475 | template_arg_char -> &'input str 
476 |     = ![|<>=!*#:;/] c:normal_char {c} 
477 | whitespace -> &'input str
478 |     = $(' ') / $('\t')
479 | tag_char -> &'input str
480 |     = $([^<>/ =])
481 | url_char -> &'input str
482 |     = $([^ \]])
483 | 
484 | // a paragraph may not start with these symbols as they indicate other elements
485 | PAR_START_GUARD = !([=!|;#:*] / EOF)
486 | 
487 | // tags which should be parsed as block elements, rather than html tags.
488 | HTML_BLOCK_ELEMENTS = ("gallery"i)
489 | 
490 | // magic words which cannot be interpreted as templates
491 | MAGIC_WORDS = table_start / table_end / table_caption_sep / 
492 |             table_row_sep / table_pipe / cell_sep
493 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | mod ast;
 2 | mod error;
 3 | #[allow(
 4 |     clippy::unused_unit,
 5 |     clippy::unit_arg,
 6 |     clippy::cyclomatic_complexity,
 7 |     clippy::len_zero,
 8 |     clippy::single_match,
 9 |     clippy::naive_bytecount,
10 |     clippy::suspicious_else_formatting
11 | )]
12 | mod grammar;
13 | #[cfg(test)]
14 | mod tests;
15 | mod traversion;
16 | mod util;
17 | 
18 | // public exports
19 | pub use self::ast::*;
20 | pub use self::error::*;
21 | pub use self::traversion::Traversion;
22 | 
23 | pub mod transformations;
24 | 
25 | mod default_transformations;
26 | use self::default_transformations::*;
27 | 
28 | /// Parse the input document to generate a document tree.
29 | /// After parsing, some transformations are applied to the result.
30 | pub fn parse(input: &str) -> Result<Element, MWError> {
31 |     let source_lines = util::get_source_lines(input);
32 | 
33 |     #[cfg(feature = "ptime")]
34 |     let starttime = time::precise_time_ns();
35 | 
36 |     let result = match grammar::document(input, &source_lines) {
37 |         Err(e) => Err(error::MWError::ParseError(error::ParseError::from(
38 |             &e, input,
39 |         ))),
40 |         Ok(r) => Ok(r),
41 |     }?;
42 | 
43 |     #[cfg(feature = "ptime")]
44 |     let parsedtime = time::precise_time_ns();
45 | 
46 |     let settings = GeneralSettings {};
47 |     let trans_result = apply_transformations(result, &settings);
48 | 
49 |     #[cfg(feature = "ptime")]
50 |     {
51 |         eprintln!(
52 |             "Parse Timer: Parsing took {} ms.",
53 |             ((parsedtime - starttime) as f64) / 1.0e6
54 |         );
55 |         eprintln!(
56 |             "Parse Timer: Transformation took {} ms.",
57 |             ((time::precise_time_ns() - parsedtime) as f64) / 1.0e6
58 |         );
59 |     }
60 | 
61 |     trans_result.map_err(error::MWError::TransformationError)
62 | }
63 | 
64 | fn apply_transformations(
65 |     mut root: Element,
66 |     settings: &GeneralSettings,
67 | ) -> transformations::TResult {
68 |     root = validate_external_refs(root, settings)?;
69 |     root = fold_headings_transformation(root, settings)?;
70 |     root = fold_lists_transformation(root, settings)?;
71 |     root = whitespace_paragraphs_to_empty(root, settings)?;
72 |     root = collapse_paragraphs(root, settings)?;
73 |     root = collapse_consecutive_text(root, settings)?;
74 |     root = enumerate_anon_args(root, settings)?;
75 |     Ok(root)
76 | }
77 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
 1 | //! This program takes Media Wiki source code and produces a yaml syntax tree.
 2 | //!
 3 | //! It aims to provide fast offline processing with debug information
 4 | //! (element position) included. The resulting tree represents the input
 5 | //! document on a syntactic level. Please refer to the `mediawiki_parser`
 6 | //! documentation for a description of possible elements of the abstract
 7 | //! syntax tree.
 8 | 
 9 | use mediawiki_parser;
10 | use serde_json;
11 | use serde_yaml;
12 | use std::fs;
13 | use std::io;
14 | use std::io::prelude::*;
15 | use std::io::BufReader;
16 | use std::path::PathBuf;
17 | use std::process;
18 | use structopt::StructOpt;
19 | 
20 | #[derive(Debug, StructOpt)]
21 | /// This program takes MediaWiki source code and produces
22 | /// a yaml syntax tree on stdout.
23 | struct Args {
24 |     /// Path to the input file.
25 |     /// If none is provided, stdin is used.
26 |     #[structopt(short = "i", long = "input", parse(from_os_str))]
27 |     pub input_file: Option<PathBuf>,
28 | 
29 |     /// Ouput the result as JSON
30 |     #[structopt(short = "j", long = "json")]
31 |     pub use_json: bool,
32 | }
33 | 
34 | /// read contents of a `io::Reader` into a string
35 | fn read_from_reader(reader: &mut io::Read) -> String {
36 |     let mut buffer = io::BufReader::new(reader);
37 |     let mut content = String::new();
38 |     buffer
39 |         .read_to_string(&mut content)
40 |         .expect("Could not read fron file!");
41 |     content
42 | }
43 | 
44 | /// Read a file from disk and store to string.
45 | fn read_file(filename: &PathBuf) -> String {
46 |     let file = fs::File::open(filename).expect("Could not open file!");
47 |     let mut reader = BufReader::new(file);
48 |     read_from_reader(&mut reader)
49 | }
50 | 
51 | /// Read a file from stdin from to string.
52 | fn read_stdin() -> String {
53 |     read_from_reader(&mut io::stdin())
54 | }
55 | 
56 | fn main() {
57 |     let args = Args::from_args();
58 |     let input = if let Some(path) = args.input_file {
59 |         read_file(&path)
60 |     } else {
61 |         read_stdin()
62 |     };
63 | 
64 |     let result = mediawiki_parser::parse(&input);
65 |     match result {
66 |         Ok(r) => {
67 |             if args.use_json {
68 |                 serde_json::to_writer(io::stdout(), &r).expect("could not serialize json!");
69 |             } else {
70 |                 serde_yaml::to_writer(io::stdout(), &r).expect("could not serialize yaml!");
71 |             };
72 |             println!();
73 |         }
74 |         Err(e) => {
75 |             eprintln!("{}", e);
76 |             if args.use_json {
77 |                 serde_json::to_writer(io::stdout(), &e).expect("could not serialize json!");
78 |             } else {
79 |                 serde_yaml::to_writer(io::stdout(), &e).expect("could not serialize yaml!");
80 |             };
81 |             println!();
82 |             process::exit(1);
83 |         }
84 |     };
85 | }
86 | 


--------------------------------------------------------------------------------
/src/tests/mod.rs:
--------------------------------------------------------------------------------
1 | #[cfg(test)]
2 | pub mod generated {
3 |     include!(concat!(env!("OUT_DIR"), "/tests_generated.rs"));
4 | }
5 | 


--------------------------------------------------------------------------------
/src/transformations.rs:
--------------------------------------------------------------------------------
  1 | //! Functions and types for source tree transformations.
  2 | 
  3 | use crate::ast::*;
  4 | use crate::error::TransformationError;
  5 | 
  6 | /// Transformation result type
  7 | pub type TResult = Result<Element, TransformationError>;
  8 | 
  9 | /// Result type for a list of transformed elements.
 10 | pub type TListResult = Result<Vec<Element>, TransformationError>;
 11 | 
 12 | /// Signature of an in-place transformation function
 13 | pub type TFuncInplace<S> = Fn(Element, S) -> TResult;
 14 | 
 15 | /// Signature of a cloning transformation function
 16 | pub type TFunc<S> = Fn(&Element, &[&Element], S) -> TResult;
 17 | 
 18 | /// Apply a given transformation function to a list of elements, without mutating the original.
 19 | pub fn apply_func_clone<S: Copy>(
 20 |     func: &TFunc<S>,
 21 |     content: &[Element],
 22 |     path: &[&Element],
 23 |     settings: S,
 24 | ) -> TListResult {
 25 |     let mut result = vec![];
 26 |     for child in content {
 27 |         result.push(func(child, path, settings)?);
 28 |     }
 29 |     Ok(result)
 30 | }
 31 | 
 32 | /// Apply a given transformation to every item in a list, consuming this list.
 33 | pub fn apply_func_drain<S: Copy>(
 34 |     func: &TFuncInplace<S>,
 35 |     content: &mut Vec<Element>,
 36 |     settings: S,
 37 | ) -> TListResult {
 38 |     let mut result = vec![];
 39 |     for child in content.drain(..) {
 40 |         result.push(func(child, settings)?);
 41 |     }
 42 |     Ok(result)
 43 | }
 44 | 
 45 | /// Recursively apply a transformation function `func` to all children of element `root`.
 46 | pub fn recurse_inplace<S: Copy>(func: &TFuncInplace<S>, root: Element, settings: S) -> TResult {
 47 |     recurse_inplace_template(func, root, settings, &apply_func_drain)
 48 | }
 49 | 
 50 | /// Recursively apply  a function `content_func` to the children list of a node.
 51 | pub fn recurse_inplace_template<S: Copy>(
 52 |     func: &TFuncInplace<S>,
 53 |     mut root: Element,
 54 |     settings: S,
 55 |     content_func: &Fn(&TFuncInplace<S>, &mut Vec<Element>, S) -> TListResult,
 56 | ) -> TResult {
 57 |     match root {
 58 |         Element::Document(ref mut e) => {
 59 |             let mut temp = content_func(func, &mut e.content, settings)?;
 60 |             e.content.append(&mut temp);
 61 |         }
 62 |         Element::Formatted(ref mut e) => {
 63 |             let mut temp = content_func(func, &mut e.content, settings)?;
 64 |             e.content.append(&mut temp);
 65 |         }
 66 |         Element::Paragraph(ref mut e) => {
 67 |             let mut temp = content_func(func, &mut e.content, settings)?;
 68 |             e.content.append(&mut temp);
 69 |         }
 70 |         Element::ListItem(ref mut e) => {
 71 |             let mut temp = content_func(func, &mut e.content, settings)?;
 72 |             e.content.append(&mut temp);
 73 |         }
 74 |         Element::List(ref mut e) => {
 75 |             let mut temp = content_func(func, &mut e.content, settings)?;
 76 |             e.content.append(&mut temp);
 77 |         }
 78 |         Element::TableCell(ref mut e) => {
 79 |             let mut temp = content_func(func, &mut e.content, settings)?;
 80 |             e.content.append(&mut temp);
 81 |         }
 82 |         Element::HtmlTag(ref mut e) => {
 83 |             let mut temp = content_func(func, &mut e.content, settings)?;
 84 |             e.content.append(&mut temp);
 85 |         }
 86 |         Element::Gallery(ref mut e) => {
 87 |             let mut temp = content_func(func, &mut e.content, settings)?;
 88 |             e.content.append(&mut temp);
 89 |         }
 90 |         Element::Heading(ref mut e) => {
 91 |             let mut content = content_func(func, &mut e.content, settings)?;
 92 |             let mut caption = content_func(func, &mut e.caption, settings)?;
 93 |             e.caption.append(&mut caption);
 94 |             e.content.append(&mut content);
 95 |         }
 96 |         Element::Template(ref mut e) => {
 97 |             let mut name = content_func(func, &mut e.name, settings)?;
 98 |             let mut content = content_func(func, &mut e.content, settings)?;
 99 |             e.name.append(&mut name);
100 |             e.content.append(&mut content);
101 |         }
102 |         Element::TemplateArgument(ref mut e) => {
103 |             let mut value = content_func(func, &mut e.value, settings)?;
104 |             e.value.append(&mut value);
105 |         }
106 |         Element::InternalReference(ref mut e) => {
107 |             let mut target = content_func(func, &mut e.target, settings)?;
108 |             let mut caption = content_func(func, &mut e.caption, settings)?;
109 | 
110 |             let mut new_options = vec![];
111 |             for mut option in e.options.drain(..) {
112 |                 new_options.push(content_func(func, &mut option, settings)?);
113 |             }
114 | 
115 |             e.target.append(&mut target);
116 |             e.options.append(&mut new_options);
117 |             e.caption.append(&mut caption);
118 |         }
119 |         Element::ExternalReference(ref mut e) => {
120 |             let mut caption = content_func(func, &mut e.caption, settings)?;
121 |             e.caption.append(&mut caption);
122 |         }
123 |         Element::Table(ref mut e) => {
124 |             let mut caption = content_func(func, &mut e.caption, settings)?;
125 |             let mut rows = content_func(func, &mut e.rows, settings)?;
126 |             e.caption.append(&mut caption);
127 |             e.rows.append(&mut rows);
128 |         }
129 |         Element::TableRow(ref mut e) => {
130 |             let mut cells = content_func(func, &mut e.cells, settings)?;
131 |             e.cells.append(&mut cells);
132 |         }
133 |         Element::Text(_) | Element::Comment(_) | Element::Error(_) => (),
134 |     };
135 |     Ok(root)
136 | }
137 | 
138 | /// Recursively apply a transformation function `func` to all children of element `root`, cloning the input.
139 | pub fn recurse_clone<S: Copy>(
140 |     func: &TFunc<S>,
141 |     root: &Element,
142 |     path: &[&Element],
143 |     settings: S,
144 | ) -> TResult {
145 |     recurse_clone_template(func, root, path, settings, &apply_func_clone)
146 | }
147 | 
148 | /// Recursively apply  a function `content_func` to the children list of a node, cloning the input.
149 | pub fn recurse_clone_template<S: Copy>(
150 |     func: &TFunc<S>,
151 |     root: &Element,
152 |     path: &[&Element],
153 |     settings: S,
154 |     content_func: &Fn(&TFunc<S>, &[Element], &[&Element], S) -> TListResult,
155 | ) -> TResult {
156 |     let mut path = path.to_owned();
157 |     path.push(root);
158 |     let new = match *root {
159 |         Element::Document(ref e) => Element::Document(Document {
160 |             position: e.position.clone(),
161 |             content: content_func(func, &e.content, &path, settings)?,
162 |         }),
163 |         Element::Heading(ref e) => Element::Heading(Heading {
164 |             position: e.position.clone(),
165 |             depth: e.depth,
166 |             caption: content_func(func, &e.caption, &path, settings)?,
167 |             content: content_func(func, &e.content, &path, settings)?,
168 |         }),
169 |         Element::Formatted(ref e) => Element::Formatted(Formatted {
170 |             position: e.position.clone(),
171 |             markup: e.markup,
172 |             content: content_func(func, &e.content, &path, settings)?,
173 |         }),
174 |         Element::Paragraph(ref e) => Element::Paragraph(Paragraph {
175 |             position: e.position.clone(),
176 |             content: content_func(func, &e.content, &path, settings)?,
177 |         }),
178 |         Element::Template(ref e) => Element::Template(Template {
179 |             position: e.position.clone(),
180 |             name: content_func(func, &e.name, &path, settings)?,
181 |             content: content_func(func, &e.content, &path, settings)?,
182 |         }),
183 |         Element::TemplateArgument(ref e) => Element::TemplateArgument(TemplateArgument {
184 |             position: e.position.clone(),
185 |             name: e.name.clone(),
186 |             value: content_func(func, &e.value, &path, settings)?,
187 |         }),
188 |         Element::InternalReference(ref e) => {
189 |             let mut new_options = vec![];
190 |             for option in &e.options {
191 |                 new_options.push(content_func(func, &option, &path, settings)?);
192 |             }
193 | 
194 |             Element::InternalReference(InternalReference {
195 |                 position: e.position.clone(),
196 |                 target: content_func(func, &e.target, &path, settings)?,
197 |                 options: new_options,
198 |                 caption: content_func(func, &e.caption, &path, settings)?,
199 |             })
200 |         }
201 |         Element::ExternalReference(ref e) => Element::ExternalReference(ExternalReference {
202 |             position: e.position.clone(),
203 |             target: e.target.clone(),
204 |             caption: content_func(func, &e.caption, &path, settings)?,
205 |         }),
206 |         Element::ListItem(ref e) => Element::ListItem(ListItem {
207 |             position: e.position.clone(),
208 |             depth: e.depth,
209 |             kind: e.kind,
210 |             content: content_func(func, &e.content, &path, settings)?,
211 |         }),
212 |         Element::List(ref e) => Element::List(List {
213 |             position: e.position.clone(),
214 |             content: content_func(func, &e.content, &path, settings)?,
215 |         }),
216 |         Element::Table(ref e) => Element::Table(Table {
217 |             position: e.position.clone(),
218 |             attributes: e.attributes.clone(),
219 |             caption: content_func(func, &e.caption, &path, settings)?,
220 |             caption_attributes: e.caption_attributes.clone(),
221 |             rows: content_func(func, &e.rows, &path, settings)?,
222 |         }),
223 |         Element::TableRow(ref e) => Element::TableRow(TableRow {
224 |             position: e.position.clone(),
225 |             attributes: e.attributes.clone(),
226 |             cells: content_func(func, &e.cells, &path, settings)?,
227 |         }),
228 |         Element::TableCell(ref e) => Element::TableCell(TableCell {
229 |             position: e.position.clone(),
230 |             header: e.header,
231 |             attributes: e.attributes.clone(),
232 |             content: content_func(func, &e.content, &path, settings)?,
233 |         }),
234 |         Element::Comment(ref e) => Element::Comment(e.clone()),
235 |         Element::Text(ref e) => Element::Text(e.clone()),
236 |         Element::Error(ref e) => Element::Error(e.clone()),
237 |         Element::HtmlTag(ref e) => Element::HtmlTag(HtmlTag {
238 |             position: e.position.clone(),
239 |             name: e.name.clone(),
240 |             attributes: e.attributes.clone(),
241 |             content: content_func(func, &e.content, &path, settings)?,
242 |         }),
243 |         Element::Gallery(ref e) => Element::Gallery(Gallery {
244 |             position: e.position.clone(),
245 |             attributes: e.attributes.clone(),
246 |             content: content_func(func, &e.content, &path, settings)?,
247 |         }),
248 |     };
249 |     path.pop();
250 |     Ok(new)
251 | }
252 | 


--------------------------------------------------------------------------------
/src/traversion.rs:
--------------------------------------------------------------------------------
 1 | //! Helper trait for operations reading from the document tree.
 2 | 
 3 | use super::ast::Element;
 4 | use std::io;
 5 | 
 6 | /// Implements a traversion over a tree of `Element`.
 7 | ///
 8 | /// All fields of the traversion struct can be mutated,
 9 | /// external settings cannot.
10 | pub trait Traversion<'a, S: Copy + ?Sized> {
11 |     /// push to the traversion path.
12 |     fn path_push(&mut self, elem: &'a Element);
13 |     /// pop from the traversion path.
14 |     fn path_pop(&mut self) -> Option<&'a Element>;
15 |     /// get the traversion path.
16 |     fn get_path(&self) -> &Vec<&'a Element>;
17 |     /// template method for handling single nodes.
18 |     /// if the result is `false`, handling is complete and
19 |     /// children of this node are not considered,
20 |     /// otherwise `work()` is recursively called for all children.
21 |     fn work(&mut self, _root: &'a Element, _settings: S, _out: &mut io::Write) -> io::Result<bool> {
22 |         Ok(true)
23 |     }
24 | 
25 |     /// template method for handling a vector of nodes.
26 |     /// if the result is `false`, handling is complete and
27 |     /// children of the vector's elements are not considered,
28 |     /// otherwise `work()` is recursively called for all children.
29 |     fn work_vec(
30 |         &mut self,
31 |         _root: &'a [Element],
32 |         _settings: S,
33 |         _out: &mut io::Write,
34 |     ) -> io::Result<bool> {
35 |         Ok(true)
36 |     }
37 | 
38 |     /// run this traversion for a vector of elements.
39 |     fn run_vec(
40 |         &mut self,
41 |         content: &'a [Element],
42 |         settings: S,
43 |         out: &mut io::Write,
44 |     ) -> io::Result<()> {
45 |         if !self.work_vec(content, settings, out)? {
46 |             return Ok(());
47 |         }
48 |         for elem in &content[..] {
49 |             self.run(elem, settings, out)?;
50 |         }
51 |         Ok(())
52 |     }
53 |     /// run this traversion for an element.
54 |     fn run(&mut self, root: &'a Element, settings: S, out: &mut io::Write) -> io::Result<()> {
55 |         self.path_push(root);
56 | 
57 |         // break if work function breaks recursion.
58 |         if !self.work(root, settings, out)? {
59 |             return Ok(());
60 |         }
61 |         match *root {
62 |             Element::Document(ref e) => self.run_vec(&e.content, settings, out)?,
63 |             Element::Formatted(ref e) => self.run_vec(&e.content, settings, out)?,
64 |             Element::Paragraph(ref e) => self.run_vec(&e.content, settings, out)?,
65 |             Element::ListItem(ref e) => self.run_vec(&e.content, settings, out)?,
66 |             Element::List(ref e) => self.run_vec(&e.content, settings, out)?,
67 |             Element::TableCell(ref e) => self.run_vec(&e.content, settings, out)?,
68 |             Element::HtmlTag(ref e) => self.run_vec(&e.content, settings, out)?,
69 |             Element::Gallery(ref e) => self.run_vec(&e.content, settings, out)?,
70 |             Element::Heading(ref e) => {
71 |                 self.run_vec(&e.caption, settings, out)?;
72 |                 self.run_vec(&e.content, settings, out)?;
73 |             }
74 |             Element::Template(ref e) => {
75 |                 self.run_vec(&e.name, settings, out)?;
76 |                 self.run_vec(&e.content, settings, out)?;
77 |             }
78 |             Element::TemplateArgument(ref e) => self.run_vec(&e.value, settings, out)?,
79 |             Element::InternalReference(ref e) => {
80 |                 self.run_vec(&e.target, settings, out)?;
81 |                 for option in &e.options {
82 |                     self.run_vec(option, settings, out)?;
83 |                 }
84 |                 self.run_vec(&e.caption, settings, out)?;
85 |             }
86 |             Element::ExternalReference(ref e) => self.run_vec(&e.caption, settings, out)?,
87 |             Element::Table(ref e) => {
88 |                 self.run_vec(&e.caption, settings, out)?;
89 |                 self.run_vec(&e.rows, settings, out)?;
90 |             }
91 |             Element::TableRow(ref e) => self.run_vec(&e.cells, settings, out)?,
92 |             Element::Text(_) | Element::Comment(_) | Element::Error(_) => (),
93 |         }
94 |         self.path_pop();
95 |         Ok(())
96 |     }
97 | }
98 | 


--------------------------------------------------------------------------------
/src/util.rs:
--------------------------------------------------------------------------------
 1 | //! Utility functions and types
 2 | 
 3 | use crate::ast;
 4 | 
 5 | /// The terminal width.
 6 | const TERMINAL_WIDTH: usize = 80;
 7 | 
 8 | pub fn combine<T>(t: (Vec<T>, Vec<T>)) -> Vec<T> {
 9 |     let (mut t1, mut t2) = t;
10 |     t1.append(&mut t2);
11 |     t1
12 | }
13 | 
14 | /// Compiles a list of start and end positions of the input source lines.
15 | ///
16 | /// This representation is used to calculate line and column position from the input offset.
17 | pub fn get_source_lines(source: &str) -> Vec<ast::SourceLine> {
18 |     let mut pos = 0;
19 |     let mut result = Vec::new();
20 | 
21 |     for line in source.split('\n') {
22 |         result.push(ast::SourceLine {
23 |             start: pos,
24 |             content: line,
25 |             end: pos + line.len() + 1,
26 |         });
27 |         pos += line.len() + 1;
28 |     }
29 |     result
30 | }
31 | 
32 | /// Tests if a string is entirely whitespace
33 | pub fn is_whitespace(input: &str) -> bool {
34 |     input.chars().all(|c| c.is_whitespace())
35 | }
36 | 
37 | /// Shorten a string to fit into `TERMINAL_WIDTH`.
38 | pub fn shorten_str(input: &str) -> String {
39 |     let input_len = input.chars().count();
40 | 
41 |     if input.len() < TERMINAL_WIDTH {
42 |         return String::from(input);
43 |     }
44 | 
45 |     let filler = " .. ";
46 |     let mut result = String::new();
47 |     let half_text_size = (TERMINAL_WIDTH - filler.chars().count()) / 2;
48 | 
49 |     for (char_count, c) in input.chars().enumerate() {
50 |         if char_count < half_text_size {
51 |             result.push(c);
52 |         }
53 |         if char_count == half_text_size {
54 |             result.push_str(filler);
55 |         }
56 |         if char_count >= input_len - half_text_size {
57 |             result.push(c);
58 |         }
59 |     }
60 |     result
61 | }
62 | 
63 | #[cfg(test)]
64 | mod tests {
65 |     use super::*;
66 | 
67 |     #[test]
68 |     fn test_is_whitespace() {
69 |         for arg in &["", "   ", "\t", "\n", "\t\t\t", "\n\t "] {
70 |             assert!(is_whitespace(arg), "is_whitespace({:?})", arg);
71 |         }
72 | 
73 |         for arg in &["a", "    a", "\t\\", "   \nä\t\t\t "] {
74 |             assert!(!is_whitespace(arg), "!is_whitespace({:?})", arg);
75 |         }
76 |     }
77 | }
78 | 


--------------------------------------------------------------------------------