├── .gitignore
├── CHANGELOG.md
├── Cargo.lock
├── Cargo.toml
├── LICENSE
├── README.md
├── benches
    ├── bench_wikipedia.rs
    └── wikipedia-2020-12-21.html
├── examples
    ├── get_all_href
    │   ├── index.html
    │   └── main.rs
    └── simple_parser
    │   └── main.rs
├── src
    ├── dom
    │   ├── element.rs
    │   ├── formatting.rs
    │   ├── mod.rs
    │   ├── node.rs
    │   └── span.rs
    ├── error.rs
    ├── grammar
    │   ├── mod.rs
    │   └── rules.pest
    └── lib.rs
└── tests
    ├── bin.rs
    ├── comments.rs
    ├── document.rs
    ├── document_empty.rs
    ├── document_fragment.rs
    ├── element.rs
    ├── element_attributes.rs
    ├── node_iter.rs
    ├── output.rs
    ├── snapshots
        ├── comments__it_can_parse_document_with_just_comments.snap
        ├── comments__it_can_parse_document_with_just_one_comment.snap
        ├── document__it_can_parse_document_with_comments.snap
        ├── document__it_can_parse_minimal_document.snap
        ├── document_empty__it_can_parse_empty_document.snap
        ├── document_fragment__it_can_parse_single_div_as_fragment.snap
        ├── document_fragment__it_can_parse_single_text_as_fragment.snap
        ├── document_fragment__it_can_parse_text_comment_element_as_fragment.snap
        ├── element__it_can_clone_node.snap
        ├── element__it_can_deal_with_weird_whitespaces.snap
        ├── element__it_can_parse_broken_html.snap
        ├── element__it_can_parse_deeply_nested.snap
        ├── element__it_can_parse_multiple_elements.snap
        ├── element__it_can_parse_multiple_open_elements.snap
        ├── element__it_can_parse_nested_elements.snap
        ├── element__it_can_parse_nested_elements_mixed_children.snap
        ├── element__it_can_parse_one_element.snap
        ├── element__it_can_parse_one_element_mixed_case.snap
        ├── element__it_can_parse_one_element_mixed_case_numbers.snap
        ├── element__it_can_parse_one_element_mixed_case_numbers_symbols.snap
        ├── element__it_can_parse_one_element_upper_case.snap
        ├── element__it_can_parse_script_with_content.snap
        ├── element__it_can_parse_style_with_content.snap
        ├── element__it_errors_when_multiple_nested_elements_dont_match.snap
        ├── element__it_skips_dangling_elements.snap
        ├── element_attributes__it_can_parse_attribute_key_mixed_case_symbols.snap
        ├── element_attributes__it_can_parse_attribute_multiple_values_double_quote.snap
        ├── element_attributes__it_can_parse_attribute_multiple_values_single_quote.snap
        ├── element_attributes__it_can_parse_attribute_with_empty_value.snap
        ├── element_attributes__it_can_parse_classes.snap
        ├── element_attributes__it_can_parse_double_quote.snap
        ├── element_attributes__it_can_parse_id.snap
        ├── element_attributes__it_can_parse_multiple_attributes_double_quote.snap
        ├── element_attributes__it_can_parse_multiple_attributes_no_quote.snap
        ├── element_attributes__it_can_parse_multiple_attributes_single_quote.snap
        ├── element_attributes__it_can_parse_multiple_attributes_where_whitespace_does_not_matter_for_keys.snap
        ├── element_attributes__it_can_parse_no_quote.snap
        ├── element_attributes__it_can_parse_single_quote.snap
        ├── element_attributes__it_keeps_spaces_for_non_classes.snap
        ├── output__it_can_output_complex_html_as_json.snap
        ├── source_span__it_can_generate_source_span.snap
        ├── svg__it_can_parse_svg.snap
        ├── text__it_can_parse_document_with_just_text.snap
        ├── text__it_can_parse_document_with_multiple_text_elements.snap
        ├── text__it_can_parse_document_with_text_and_line_breaks.snap
        ├── text__it_can_parse_text_in_paragraph_with_weird_formatting.snap
        └── text__it_can_parse_text_with_chevron.snap
    ├── source_span.rs
    ├── svg.rs
    ├── text.rs
    └── websites.rs


/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | *.temp
3 | 
4 | # IDE
5 | .idea/
6 | *.iml


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | ## 0.7.0
 4 | 
 5 | - Updated all dependencies
 6 | - Removed structopt for clap
 7 | - Add source_span to Element; by [bennyboer](https://github.com/bennyboer)
 8 | - Improve whitespace; by [bennyboer](https://github.com/bennyboer)
 9 | - Fix type in docs; by [c-git](https://github.com/c-git)
10 | 
11 | ## Older versions
12 | 
13 | - See commit history
14 | 


--------------------------------------------------------------------------------
/Cargo.lock:
--------------------------------------------------------------------------------
   1 | # This file is automatically @generated by Cargo.
   2 | # It is not intended for manual editing.
   3 | version = 3
   4 | 
   5 | [[package]]
   6 | name = "anes"
   7 | version = "0.1.6"
   8 | source = "registry+https://github.com/rust-lang/crates.io-index"
   9 | checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
  10 | 
  11 | [[package]]
  12 | name = "anstream"
  13 | version = "0.3.2"
  14 | source = "registry+https://github.com/rust-lang/crates.io-index"
  15 | checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163"
  16 | dependencies = [
  17 |  "anstyle",
  18 |  "anstyle-parse",
  19 |  "anstyle-query",
  20 |  "anstyle-wincon",
  21 |  "colorchoice",
  22 |  "is-terminal",
  23 |  "utf8parse",
  24 | ]
  25 | 
  26 | [[package]]
  27 | name = "anstyle"
  28 | version = "1.0.0"
  29 | source = "registry+https://github.com/rust-lang/crates.io-index"
  30 | checksum = "41ed9a86bf92ae6580e0a31281f65a1b1d867c0cc68d5346e2ae128dddfa6a7d"
  31 | 
  32 | [[package]]
  33 | name = "anstyle-parse"
  34 | version = "0.2.0"
  35 | source = "registry+https://github.com/rust-lang/crates.io-index"
  36 | checksum = "e765fd216e48e067936442276d1d57399e37bce53c264d6fefbe298080cb57ee"
  37 | dependencies = [
  38 |  "utf8parse",
  39 | ]
  40 | 
  41 | [[package]]
  42 | name = "anstyle-query"
  43 | version = "1.0.0"
  44 | source = "registry+https://github.com/rust-lang/crates.io-index"
  45 | checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b"
  46 | dependencies = [
  47 |  "windows-sys 0.48.0",
  48 | ]
  49 | 
  50 | [[package]]
  51 | name = "anstyle-wincon"
  52 | version = "1.0.1"
  53 | source = "registry+https://github.com/rust-lang/crates.io-index"
  54 | checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188"
  55 | dependencies = [
  56 |  "anstyle",
  57 |  "windows-sys 0.48.0",
  58 | ]
  59 | 
  60 | [[package]]
  61 | name = "atty"
  62 | version = "0.2.14"
  63 | source = "registry+https://github.com/rust-lang/crates.io-index"
  64 | checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
  65 | dependencies = [
  66 |  "hermit-abi 0.1.19",
  67 |  "libc",
  68 |  "winapi",
  69 | ]
  70 | 
  71 | [[package]]
  72 | name = "autocfg"
  73 | version = "1.1.0"
  74 | source = "registry+https://github.com/rust-lang/crates.io-index"
  75 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
  76 | 
  77 | [[package]]
  78 | name = "base64"
  79 | version = "0.21.0"
  80 | source = "registry+https://github.com/rust-lang/crates.io-index"
  81 | checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a"
  82 | 
  83 | [[package]]
  84 | name = "bitflags"
  85 | version = "1.3.2"
  86 | source = "registry+https://github.com/rust-lang/crates.io-index"
  87 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
  88 | 
  89 | [[package]]
  90 | name = "block-buffer"
  91 | version = "0.10.4"
  92 | source = "registry+https://github.com/rust-lang/crates.io-index"
  93 | checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
  94 | dependencies = [
  95 |  "generic-array",
  96 | ]
  97 | 
  98 | [[package]]
  99 | name = "bumpalo"
 100 | version = "3.12.2"
 101 | source = "registry+https://github.com/rust-lang/crates.io-index"
 102 | checksum = "3c6ed94e98ecff0c12dd1b04c15ec0d7d9458ca8fe806cea6f12954efe74c63b"
 103 | 
 104 | [[package]]
 105 | name = "bytes"
 106 | version = "1.4.0"
 107 | source = "registry+https://github.com/rust-lang/crates.io-index"
 108 | checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be"
 109 | 
 110 | [[package]]
 111 | name = "cast"
 112 | version = "0.3.0"
 113 | source = "registry+https://github.com/rust-lang/crates.io-index"
 114 | checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
 115 | 
 116 | [[package]]
 117 | name = "cc"
 118 | version = "1.0.79"
 119 | source = "registry+https://github.com/rust-lang/crates.io-index"
 120 | checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
 121 | 
 122 | [[package]]
 123 | name = "cfg-if"
 124 | version = "1.0.0"
 125 | source = "registry+https://github.com/rust-lang/crates.io-index"
 126 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 127 | 
 128 | [[package]]
 129 | name = "ciborium"
 130 | version = "0.2.1"
 131 | source = "registry+https://github.com/rust-lang/crates.io-index"
 132 | checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926"
 133 | dependencies = [
 134 |  "ciborium-io",
 135 |  "ciborium-ll",
 136 |  "serde",
 137 | ]
 138 | 
 139 | [[package]]
 140 | name = "ciborium-io"
 141 | version = "0.2.1"
 142 | source = "registry+https://github.com/rust-lang/crates.io-index"
 143 | checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656"
 144 | 
 145 | [[package]]
 146 | name = "ciborium-ll"
 147 | version = "0.2.1"
 148 | source = "registry+https://github.com/rust-lang/crates.io-index"
 149 | checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b"
 150 | dependencies = [
 151 |  "ciborium-io",
 152 |  "half",
 153 | ]
 154 | 
 155 | [[package]]
 156 | name = "clap"
 157 | version = "3.2.25"
 158 | source = "registry+https://github.com/rust-lang/crates.io-index"
 159 | checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123"
 160 | dependencies = [
 161 |  "bitflags",
 162 |  "clap_lex 0.2.4",
 163 |  "indexmap",
 164 |  "textwrap",
 165 | ]
 166 | 
 167 | [[package]]
 168 | name = "clap"
 169 | version = "4.2.7"
 170 | source = "registry+https://github.com/rust-lang/crates.io-index"
 171 | checksum = "34d21f9bf1b425d2968943631ec91202fe5e837264063503708b83013f8fc938"
 172 | dependencies = [
 173 |  "clap_builder",
 174 |  "clap_derive",
 175 |  "once_cell",
 176 | ]
 177 | 
 178 | [[package]]
 179 | name = "clap_builder"
 180 | version = "4.2.7"
 181 | source = "registry+https://github.com/rust-lang/crates.io-index"
 182 | checksum = "914c8c79fb560f238ef6429439a30023c862f7a28e688c58f7203f12b29970bd"
 183 | dependencies = [
 184 |  "anstream",
 185 |  "anstyle",
 186 |  "bitflags",
 187 |  "clap_lex 0.4.1",
 188 |  "strsim",
 189 | ]
 190 | 
 191 | [[package]]
 192 | name = "clap_derive"
 193 | version = "4.2.0"
 194 | source = "registry+https://github.com/rust-lang/crates.io-index"
 195 | checksum = "3f9644cd56d6b87dbe899ef8b053e331c0637664e9e21a33dfcdc36093f5c5c4"
 196 | dependencies = [
 197 |  "heck",
 198 |  "proc-macro2",
 199 |  "quote",
 200 |  "syn",
 201 | ]
 202 | 
 203 | [[package]]
 204 | name = "clap_lex"
 205 | version = "0.2.4"
 206 | source = "registry+https://github.com/rust-lang/crates.io-index"
 207 | checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5"
 208 | dependencies = [
 209 |  "os_str_bytes",
 210 | ]
 211 | 
 212 | [[package]]
 213 | name = "clap_lex"
 214 | version = "0.4.1"
 215 | source = "registry+https://github.com/rust-lang/crates.io-index"
 216 | checksum = "8a2dd5a6fe8c6e3502f568a6353e5273bbb15193ad9a89e457b9970798efbea1"
 217 | 
 218 | [[package]]
 219 | name = "colorchoice"
 220 | version = "1.0.0"
 221 | source = "registry+https://github.com/rust-lang/crates.io-index"
 222 | checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
 223 | 
 224 | [[package]]
 225 | name = "console"
 226 | version = "0.15.5"
 227 | source = "registry+https://github.com/rust-lang/crates.io-index"
 228 | checksum = "c3d79fbe8970a77e3e34151cc13d3b3e248aa0faaecb9f6091fa07ebefe5ad60"
 229 | dependencies = [
 230 |  "encode_unicode",
 231 |  "lazy_static",
 232 |  "libc",
 233 |  "windows-sys 0.42.0",
 234 | ]
 235 | 
 236 | [[package]]
 237 | name = "core-foundation"
 238 | version = "0.9.3"
 239 | source = "registry+https://github.com/rust-lang/crates.io-index"
 240 | checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146"
 241 | dependencies = [
 242 |  "core-foundation-sys",
 243 |  "libc",
 244 | ]
 245 | 
 246 | [[package]]
 247 | name = "core-foundation-sys"
 248 | version = "0.8.4"
 249 | source = "registry+https://github.com/rust-lang/crates.io-index"
 250 | checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa"
 251 | 
 252 | [[package]]
 253 | name = "cpufeatures"
 254 | version = "0.2.7"
 255 | source = "registry+https://github.com/rust-lang/crates.io-index"
 256 | checksum = "3e4c1eaa2012c47becbbad2ab175484c2a84d1185b566fb2cc5b8707343dfe58"
 257 | dependencies = [
 258 |  "libc",
 259 | ]
 260 | 
 261 | [[package]]
 262 | name = "criterion"
 263 | version = "0.4.0"
 264 | source = "registry+https://github.com/rust-lang/crates.io-index"
 265 | checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb"
 266 | dependencies = [
 267 |  "anes",
 268 |  "atty",
 269 |  "cast",
 270 |  "ciborium",
 271 |  "clap 3.2.25",
 272 |  "criterion-plot",
 273 |  "itertools",
 274 |  "lazy_static",
 275 |  "num-traits",
 276 |  "oorandom",
 277 |  "plotters",
 278 |  "rayon",
 279 |  "regex",
 280 |  "serde",
 281 |  "serde_derive",
 282 |  "serde_json",
 283 |  "tinytemplate",
 284 |  "walkdir",
 285 | ]
 286 | 
 287 | [[package]]
 288 | name = "criterion-plot"
 289 | version = "0.5.0"
 290 | source = "registry+https://github.com/rust-lang/crates.io-index"
 291 | checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
 292 | dependencies = [
 293 |  "cast",
 294 |  "itertools",
 295 | ]
 296 | 
 297 | [[package]]
 298 | name = "crossbeam-channel"
 299 | version = "0.5.8"
 300 | source = "registry+https://github.com/rust-lang/crates.io-index"
 301 | checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200"
 302 | dependencies = [
 303 |  "cfg-if",
 304 |  "crossbeam-utils",
 305 | ]
 306 | 
 307 | [[package]]
 308 | name = "crossbeam-deque"
 309 | version = "0.8.3"
 310 | source = "registry+https://github.com/rust-lang/crates.io-index"
 311 | checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef"
 312 | dependencies = [
 313 |  "cfg-if",
 314 |  "crossbeam-epoch",
 315 |  "crossbeam-utils",
 316 | ]
 317 | 
 318 | [[package]]
 319 | name = "crossbeam-epoch"
 320 | version = "0.9.14"
 321 | source = "registry+https://github.com/rust-lang/crates.io-index"
 322 | checksum = "46bd5f3f85273295a9d14aedfb86f6aadbff6d8f5295c4a9edb08e819dcf5695"
 323 | dependencies = [
 324 |  "autocfg",
 325 |  "cfg-if",
 326 |  "crossbeam-utils",
 327 |  "memoffset",
 328 |  "scopeguard",
 329 | ]
 330 | 
 331 | [[package]]
 332 | name = "crossbeam-utils"
 333 | version = "0.8.15"
 334 | source = "registry+https://github.com/rust-lang/crates.io-index"
 335 | checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b"
 336 | dependencies = [
 337 |  "cfg-if",
 338 | ]
 339 | 
 340 | [[package]]
 341 | name = "crypto-common"
 342 | version = "0.1.6"
 343 | source = "registry+https://github.com/rust-lang/crates.io-index"
 344 | checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
 345 | dependencies = [
 346 |  "generic-array",
 347 |  "typenum",
 348 | ]
 349 | 
 350 | [[package]]
 351 | name = "digest"
 352 | version = "0.10.6"
 353 | source = "registry+https://github.com/rust-lang/crates.io-index"
 354 | checksum = "8168378f4e5023e7218c89c891c0fd8ecdb5e5e4f18cb78f38cf245dd021e76f"
 355 | dependencies = [
 356 |  "block-buffer",
 357 |  "crypto-common",
 358 | ]
 359 | 
 360 | [[package]]
 361 | name = "doc-comment"
 362 | version = "0.3.3"
 363 | source = "registry+https://github.com/rust-lang/crates.io-index"
 364 | checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
 365 | 
 366 | [[package]]
 367 | name = "either"
 368 | version = "1.8.1"
 369 | source = "registry+https://github.com/rust-lang/crates.io-index"
 370 | checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
 371 | 
 372 | [[package]]
 373 | name = "encode_unicode"
 374 | version = "0.3.6"
 375 | source = "registry+https://github.com/rust-lang/crates.io-index"
 376 | checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
 377 | 
 378 | [[package]]
 379 | name = "encoding_rs"
 380 | version = "0.8.32"
 381 | source = "registry+https://github.com/rust-lang/crates.io-index"
 382 | checksum = "071a31f4ee85403370b58aca746f01041ede6f0da2730960ad001edc2b71b394"
 383 | dependencies = [
 384 |  "cfg-if",
 385 | ]
 386 | 
 387 | [[package]]
 388 | name = "errno"
 389 | version = "0.3.1"
 390 | source = "registry+https://github.com/rust-lang/crates.io-index"
 391 | checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a"
 392 | dependencies = [
 393 |  "errno-dragonfly",
 394 |  "libc",
 395 |  "windows-sys 0.48.0",
 396 | ]
 397 | 
 398 | [[package]]
 399 | name = "errno-dragonfly"
 400 | version = "0.1.2"
 401 | source = "registry+https://github.com/rust-lang/crates.io-index"
 402 | checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf"
 403 | dependencies = [
 404 |  "cc",
 405 |  "libc",
 406 | ]
 407 | 
 408 | [[package]]
 409 | name = "fastrand"
 410 | version = "1.9.0"
 411 | source = "registry+https://github.com/rust-lang/crates.io-index"
 412 | checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be"
 413 | dependencies = [
 414 |  "instant",
 415 | ]
 416 | 
 417 | [[package]]
 418 | name = "fnv"
 419 | version = "1.0.7"
 420 | source = "registry+https://github.com/rust-lang/crates.io-index"
 421 | checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
 422 | 
 423 | [[package]]
 424 | name = "foreign-types"
 425 | version = "0.3.2"
 426 | source = "registry+https://github.com/rust-lang/crates.io-index"
 427 | checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
 428 | dependencies = [
 429 |  "foreign-types-shared",
 430 | ]
 431 | 
 432 | [[package]]
 433 | name = "foreign-types-shared"
 434 | version = "0.1.1"
 435 | source = "registry+https://github.com/rust-lang/crates.io-index"
 436 | checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
 437 | 
 438 | [[package]]
 439 | name = "form_urlencoded"
 440 | version = "1.1.0"
 441 | source = "registry+https://github.com/rust-lang/crates.io-index"
 442 | checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8"
 443 | dependencies = [
 444 |  "percent-encoding",
 445 | ]
 446 | 
 447 | [[package]]
 448 | name = "futures-channel"
 449 | version = "0.3.28"
 450 | source = "registry+https://github.com/rust-lang/crates.io-index"
 451 | checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2"
 452 | dependencies = [
 453 |  "futures-core",
 454 | ]
 455 | 
 456 | [[package]]
 457 | name = "futures-core"
 458 | version = "0.3.28"
 459 | source = "registry+https://github.com/rust-lang/crates.io-index"
 460 | checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c"
 461 | 
 462 | [[package]]
 463 | name = "futures-io"
 464 | version = "0.3.28"
 465 | source = "registry+https://github.com/rust-lang/crates.io-index"
 466 | checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964"
 467 | 
 468 | [[package]]
 469 | name = "futures-sink"
 470 | version = "0.3.28"
 471 | source = "registry+https://github.com/rust-lang/crates.io-index"
 472 | checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e"
 473 | 
 474 | [[package]]
 475 | name = "futures-task"
 476 | version = "0.3.28"
 477 | source = "registry+https://github.com/rust-lang/crates.io-index"
 478 | checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65"
 479 | 
 480 | [[package]]
 481 | name = "futures-util"
 482 | version = "0.3.28"
 483 | source = "registry+https://github.com/rust-lang/crates.io-index"
 484 | checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533"
 485 | dependencies = [
 486 |  "futures-core",
 487 |  "futures-io",
 488 |  "futures-task",
 489 |  "memchr",
 490 |  "pin-project-lite",
 491 |  "pin-utils",
 492 |  "slab",
 493 | ]
 494 | 
 495 | [[package]]
 496 | name = "generic-array"
 497 | version = "0.14.7"
 498 | source = "registry+https://github.com/rust-lang/crates.io-index"
 499 | checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
 500 | dependencies = [
 501 |  "typenum",
 502 |  "version_check",
 503 | ]
 504 | 
 505 | [[package]]
 506 | name = "h2"
 507 | version = "0.3.18"
 508 | source = "registry+https://github.com/rust-lang/crates.io-index"
 509 | checksum = "17f8a914c2987b688368b5138aa05321db91f4090cf26118185672ad588bce21"
 510 | dependencies = [
 511 |  "bytes",
 512 |  "fnv",
 513 |  "futures-core",
 514 |  "futures-sink",
 515 |  "futures-util",
 516 |  "http",
 517 |  "indexmap",
 518 |  "slab",
 519 |  "tokio",
 520 |  "tokio-util",
 521 |  "tracing",
 522 | ]
 523 | 
 524 | [[package]]
 525 | name = "half"
 526 | version = "1.8.2"
 527 | source = "registry+https://github.com/rust-lang/crates.io-index"
 528 | checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
 529 | 
 530 | [[package]]
 531 | name = "hashbrown"
 532 | version = "0.12.3"
 533 | source = "registry+https://github.com/rust-lang/crates.io-index"
 534 | checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
 535 | 
 536 | [[package]]
 537 | name = "heck"
 538 | version = "0.4.1"
 539 | source = "registry+https://github.com/rust-lang/crates.io-index"
 540 | checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
 541 | 
 542 | [[package]]
 543 | name = "hermit-abi"
 544 | version = "0.1.19"
 545 | source = "registry+https://github.com/rust-lang/crates.io-index"
 546 | checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
 547 | dependencies = [
 548 |  "libc",
 549 | ]
 550 | 
 551 | [[package]]
 552 | name = "hermit-abi"
 553 | version = "0.2.6"
 554 | source = "registry+https://github.com/rust-lang/crates.io-index"
 555 | checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7"
 556 | dependencies = [
 557 |  "libc",
 558 | ]
 559 | 
 560 | [[package]]
 561 | name = "hermit-abi"
 562 | version = "0.3.1"
 563 | source = "registry+https://github.com/rust-lang/crates.io-index"
 564 | checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286"
 565 | 
 566 | [[package]]
 567 | name = "html_parser"
 568 | version = "0.7.0"
 569 | dependencies = [
 570 |  "clap 4.2.7",
 571 |  "criterion",
 572 |  "doc-comment",
 573 |  "indoc",
 574 |  "insta",
 575 |  "pest",
 576 |  "pest_derive",
 577 |  "reqwest",
 578 |  "serde",
 579 |  "serde_derive",
 580 |  "serde_json",
 581 |  "tempfile",
 582 |  "thiserror",
 583 | ]
 584 | 
 585 | [[package]]
 586 | name = "http"
 587 | version = "0.2.9"
 588 | source = "registry+https://github.com/rust-lang/crates.io-index"
 589 | checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482"
 590 | dependencies = [
 591 |  "bytes",
 592 |  "fnv",
 593 |  "itoa",
 594 | ]
 595 | 
 596 | [[package]]
 597 | name = "http-body"
 598 | version = "0.4.5"
 599 | source = "registry+https://github.com/rust-lang/crates.io-index"
 600 | checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1"
 601 | dependencies = [
 602 |  "bytes",
 603 |  "http",
 604 |  "pin-project-lite",
 605 | ]
 606 | 
 607 | [[package]]
 608 | name = "httparse"
 609 | version = "1.8.0"
 610 | source = "registry+https://github.com/rust-lang/crates.io-index"
 611 | checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904"
 612 | 
 613 | [[package]]
 614 | name = "httpdate"
 615 | version = "1.0.2"
 616 | source = "registry+https://github.com/rust-lang/crates.io-index"
 617 | checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421"
 618 | 
 619 | [[package]]
 620 | name = "hyper"
 621 | version = "0.14.26"
 622 | source = "registry+https://github.com/rust-lang/crates.io-index"
 623 | checksum = "ab302d72a6f11a3b910431ff93aae7e773078c769f0a3ef15fb9ec692ed147d4"
 624 | dependencies = [
 625 |  "bytes",
 626 |  "futures-channel",
 627 |  "futures-core",
 628 |  "futures-util",
 629 |  "h2",
 630 |  "http",
 631 |  "http-body",
 632 |  "httparse",
 633 |  "httpdate",
 634 |  "itoa",
 635 |  "pin-project-lite",
 636 |  "socket2",
 637 |  "tokio",
 638 |  "tower-service",
 639 |  "tracing",
 640 |  "want",
 641 | ]
 642 | 
 643 | [[package]]
 644 | name = "hyper-rustls"
 645 | version = "0.23.2"
 646 | source = "registry+https://github.com/rust-lang/crates.io-index"
 647 | checksum = "1788965e61b367cd03a62950836d5cd41560c3577d90e40e0819373194d1661c"
 648 | dependencies = [
 649 |  "http",
 650 |  "hyper",
 651 |  "rustls",
 652 |  "tokio",
 653 |  "tokio-rustls",
 654 | ]
 655 | 
 656 | [[package]]
 657 | name = "hyper-tls"
 658 | version = "0.5.0"
 659 | source = "registry+https://github.com/rust-lang/crates.io-index"
 660 | checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905"
 661 | dependencies = [
 662 |  "bytes",
 663 |  "hyper",
 664 |  "native-tls",
 665 |  "tokio",
 666 |  "tokio-native-tls",
 667 | ]
 668 | 
 669 | [[package]]
 670 | name = "idna"
 671 | version = "0.3.0"
 672 | source = "registry+https://github.com/rust-lang/crates.io-index"
 673 | checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6"
 674 | dependencies = [
 675 |  "unicode-bidi",
 676 |  "unicode-normalization",
 677 | ]
 678 | 
 679 | [[package]]
 680 | name = "indexmap"
 681 | version = "1.9.3"
 682 | source = "registry+https://github.com/rust-lang/crates.io-index"
 683 | checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
 684 | dependencies = [
 685 |  "autocfg",
 686 |  "hashbrown",
 687 | ]
 688 | 
 689 | [[package]]
 690 | name = "indoc"
 691 | version = "2.0.1"
 692 | source = "registry+https://github.com/rust-lang/crates.io-index"
 693 | checksum = "9f2cb48b81b1dc9f39676bf99f5499babfec7cd8fe14307f7b3d747208fb5690"
 694 | 
 695 | [[package]]
 696 | name = "insta"
 697 | version = "1.29.0"
 698 | source = "registry+https://github.com/rust-lang/crates.io-index"
 699 | checksum = "9a28d25139df397cbca21408bb742cf6837e04cdbebf1b07b760caf971d6a972"
 700 | dependencies = [
 701 |  "console",
 702 |  "lazy_static",
 703 |  "linked-hash-map",
 704 |  "serde",
 705 |  "similar",
 706 |  "yaml-rust",
 707 | ]
 708 | 
 709 | [[package]]
 710 | name = "instant"
 711 | version = "0.1.12"
 712 | source = "registry+https://github.com/rust-lang/crates.io-index"
 713 | checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
 714 | dependencies = [
 715 |  "cfg-if",
 716 | ]
 717 | 
 718 | [[package]]
 719 | name = "io-lifetimes"
 720 | version = "1.0.10"
 721 | source = "registry+https://github.com/rust-lang/crates.io-index"
 722 | checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220"
 723 | dependencies = [
 724 |  "hermit-abi 0.3.1",
 725 |  "libc",
 726 |  "windows-sys 0.48.0",
 727 | ]
 728 | 
 729 | [[package]]
 730 | name = "ipnet"
 731 | version = "2.7.2"
 732 | source = "registry+https://github.com/rust-lang/crates.io-index"
 733 | checksum = "12b6ee2129af8d4fb011108c73d99a1b83a85977f23b82460c0ae2e25bb4b57f"
 734 | 
 735 | [[package]]
 736 | name = "is-terminal"
 737 | version = "0.4.7"
 738 | source = "registry+https://github.com/rust-lang/crates.io-index"
 739 | checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f"
 740 | dependencies = [
 741 |  "hermit-abi 0.3.1",
 742 |  "io-lifetimes",
 743 |  "rustix",
 744 |  "windows-sys 0.48.0",
 745 | ]
 746 | 
 747 | [[package]]
 748 | name = "itertools"
 749 | version = "0.10.5"
 750 | source = "registry+https://github.com/rust-lang/crates.io-index"
 751 | checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
 752 | dependencies = [
 753 |  "either",
 754 | ]
 755 | 
 756 | [[package]]
 757 | name = "itoa"
 758 | version = "1.0.6"
 759 | source = "registry+https://github.com/rust-lang/crates.io-index"
 760 | checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6"
 761 | 
 762 | [[package]]
 763 | name = "js-sys"
 764 | version = "0.3.62"
 765 | source = "registry+https://github.com/rust-lang/crates.io-index"
 766 | checksum = "68c16e1bfd491478ab155fd8b4896b86f9ede344949b641e61501e07c2b8b4d5"
 767 | dependencies = [
 768 |  "wasm-bindgen",
 769 | ]
 770 | 
 771 | [[package]]
 772 | name = "lazy_static"
 773 | version = "1.4.0"
 774 | source = "registry+https://github.com/rust-lang/crates.io-index"
 775 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
 776 | 
 777 | [[package]]
 778 | name = "libc"
 779 | version = "0.2.144"
 780 | source = "registry+https://github.com/rust-lang/crates.io-index"
 781 | checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1"
 782 | 
 783 | [[package]]
 784 | name = "linked-hash-map"
 785 | version = "0.5.6"
 786 | source = "registry+https://github.com/rust-lang/crates.io-index"
 787 | checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
 788 | 
 789 | [[package]]
 790 | name = "linux-raw-sys"
 791 | version = "0.3.7"
 792 | source = "registry+https://github.com/rust-lang/crates.io-index"
 793 | checksum = "ece97ea872ece730aed82664c424eb4c8291e1ff2480247ccf7409044bc6479f"
 794 | 
 795 | [[package]]
 796 | name = "log"
 797 | version = "0.4.17"
 798 | source = "registry+https://github.com/rust-lang/crates.io-index"
 799 | checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
 800 | dependencies = [
 801 |  "cfg-if",
 802 | ]
 803 | 
 804 | [[package]]
 805 | name = "memchr"
 806 | version = "2.5.0"
 807 | source = "registry+https://github.com/rust-lang/crates.io-index"
 808 | checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
 809 | 
 810 | [[package]]
 811 | name = "memoffset"
 812 | version = "0.8.0"
 813 | source = "registry+https://github.com/rust-lang/crates.io-index"
 814 | checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1"
 815 | dependencies = [
 816 |  "autocfg",
 817 | ]
 818 | 
 819 | [[package]]
 820 | name = "mime"
 821 | version = "0.3.17"
 822 | source = "registry+https://github.com/rust-lang/crates.io-index"
 823 | checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
 824 | 
 825 | [[package]]
 826 | name = "mio"
 827 | version = "0.8.6"
 828 | source = "registry+https://github.com/rust-lang/crates.io-index"
 829 | checksum = "5b9d9a46eff5b4ff64b45a9e316a6d1e0bc719ef429cbec4dc630684212bfdf9"
 830 | dependencies = [
 831 |  "libc",
 832 |  "log",
 833 |  "wasi",
 834 |  "windows-sys 0.45.0",
 835 | ]
 836 | 
 837 | [[package]]
 838 | name = "native-tls"
 839 | version = "0.2.11"
 840 | source = "registry+https://github.com/rust-lang/crates.io-index"
 841 | checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e"
 842 | dependencies = [
 843 |  "lazy_static",
 844 |  "libc",
 845 |  "log",
 846 |  "openssl",
 847 |  "openssl-probe",
 848 |  "openssl-sys",
 849 |  "schannel",
 850 |  "security-framework",
 851 |  "security-framework-sys",
 852 |  "tempfile",
 853 | ]
 854 | 
 855 | [[package]]
 856 | name = "num-traits"
 857 | version = "0.2.15"
 858 | source = "registry+https://github.com/rust-lang/crates.io-index"
 859 | checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
 860 | dependencies = [
 861 |  "autocfg",
 862 | ]
 863 | 
 864 | [[package]]
 865 | name = "num_cpus"
 866 | version = "1.15.0"
 867 | source = "registry+https://github.com/rust-lang/crates.io-index"
 868 | checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b"
 869 | dependencies = [
 870 |  "hermit-abi 0.2.6",
 871 |  "libc",
 872 | ]
 873 | 
 874 | [[package]]
 875 | name = "once_cell"
 876 | version = "1.17.1"
 877 | source = "registry+https://github.com/rust-lang/crates.io-index"
 878 | checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
 879 | 
 880 | [[package]]
 881 | name = "oorandom"
 882 | version = "11.1.3"
 883 | source = "registry+https://github.com/rust-lang/crates.io-index"
 884 | checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
 885 | 
 886 | [[package]]
 887 | name = "openssl"
 888 | version = "0.10.52"
 889 | source = "registry+https://github.com/rust-lang/crates.io-index"
 890 | checksum = "01b8574602df80f7b85fdfc5392fa884a4e3b3f4f35402c070ab34c3d3f78d56"
 891 | dependencies = [
 892 |  "bitflags",
 893 |  "cfg-if",
 894 |  "foreign-types",
 895 |  "libc",
 896 |  "once_cell",
 897 |  "openssl-macros",
 898 |  "openssl-sys",
 899 | ]
 900 | 
 901 | [[package]]
 902 | name = "openssl-macros"
 903 | version = "0.1.1"
 904 | source = "registry+https://github.com/rust-lang/crates.io-index"
 905 | checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
 906 | dependencies = [
 907 |  "proc-macro2",
 908 |  "quote",
 909 |  "syn",
 910 | ]
 911 | 
 912 | [[package]]
 913 | name = "openssl-probe"
 914 | version = "0.1.5"
 915 | source = "registry+https://github.com/rust-lang/crates.io-index"
 916 | checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
 917 | 
 918 | [[package]]
 919 | name = "openssl-sys"
 920 | version = "0.9.87"
 921 | source = "registry+https://github.com/rust-lang/crates.io-index"
 922 | checksum = "8e17f59264b2809d77ae94f0e1ebabc434773f370d6ca667bd223ea10e06cc7e"
 923 | dependencies = [
 924 |  "cc",
 925 |  "libc",
 926 |  "pkg-config",
 927 |  "vcpkg",
 928 | ]
 929 | 
 930 | [[package]]
 931 | name = "os_str_bytes"
 932 | version = "6.5.0"
 933 | source = "registry+https://github.com/rust-lang/crates.io-index"
 934 | checksum = "ceedf44fb00f2d1984b0bc98102627ce622e083e49a5bacdb3e514fa4238e267"
 935 | 
 936 | [[package]]
 937 | name = "percent-encoding"
 938 | version = "2.2.0"
 939 | source = "registry+https://github.com/rust-lang/crates.io-index"
 940 | checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e"
 941 | 
 942 | [[package]]
 943 | name = "pest"
 944 | version = "2.6.0"
 945 | source = "registry+https://github.com/rust-lang/crates.io-index"
 946 | checksum = "e68e84bfb01f0507134eac1e9b410a12ba379d064eab48c50ba4ce329a527b70"
 947 | dependencies = [
 948 |  "thiserror",
 949 |  "ucd-trie",
 950 | ]
 951 | 
 952 | [[package]]
 953 | name = "pest_derive"
 954 | version = "2.6.0"
 955 | source = "registry+https://github.com/rust-lang/crates.io-index"
 956 | checksum = "6b79d4c71c865a25a4322296122e3924d30bc8ee0834c8bfc8b95f7f054afbfb"
 957 | dependencies = [
 958 |  "pest",
 959 |  "pest_generator",
 960 | ]
 961 | 
 962 | [[package]]
 963 | name = "pest_generator"
 964 | version = "2.6.0"
 965 | source = "registry+https://github.com/rust-lang/crates.io-index"
 966 | checksum = "6c435bf1076437b851ebc8edc3a18442796b30f1728ffea6262d59bbe28b077e"
 967 | dependencies = [
 968 |  "pest",
 969 |  "pest_meta",
 970 |  "proc-macro2",
 971 |  "quote",
 972 |  "syn",
 973 | ]
 974 | 
 975 | [[package]]
 976 | name = "pest_meta"
 977 | version = "2.6.0"
 978 | source = "registry+https://github.com/rust-lang/crates.io-index"
 979 | checksum = "745a452f8eb71e39ffd8ee32b3c5f51d03845f99786fa9b68db6ff509c505411"
 980 | dependencies = [
 981 |  "once_cell",
 982 |  "pest",
 983 |  "sha2",
 984 | ]
 985 | 
 986 | [[package]]
 987 | name = "pin-project-lite"
 988 | version = "0.2.9"
 989 | source = "registry+https://github.com/rust-lang/crates.io-index"
 990 | checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116"
 991 | 
 992 | [[package]]
 993 | name = "pin-utils"
 994 | version = "0.1.0"
 995 | source = "registry+https://github.com/rust-lang/crates.io-index"
 996 | checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
 997 | 
 998 | [[package]]
 999 | name = "pkg-config"
1000 | version = "0.3.27"
1001 | source = "registry+https://github.com/rust-lang/crates.io-index"
1002 | checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
1003 | 
1004 | [[package]]
1005 | name = "plotters"
1006 | version = "0.3.4"
1007 | source = "registry+https://github.com/rust-lang/crates.io-index"
1008 | checksum = "2538b639e642295546c50fcd545198c9d64ee2a38620a628724a3b266d5fbf97"
1009 | dependencies = [
1010 |  "num-traits",
1011 |  "plotters-backend",
1012 |  "plotters-svg",
1013 |  "wasm-bindgen",
1014 |  "web-sys",
1015 | ]
1016 | 
1017 | [[package]]
1018 | name = "plotters-backend"
1019 | version = "0.3.4"
1020 | source = "registry+https://github.com/rust-lang/crates.io-index"
1021 | checksum = "193228616381fecdc1224c62e96946dfbc73ff4384fba576e052ff8c1bea8142"
1022 | 
1023 | [[package]]
1024 | name = "plotters-svg"
1025 | version = "0.3.3"
1026 | source = "registry+https://github.com/rust-lang/crates.io-index"
1027 | checksum = "f9a81d2759aae1dae668f783c308bc5c8ebd191ff4184aaa1b37f65a6ae5a56f"
1028 | dependencies = [
1029 |  "plotters-backend",
1030 | ]
1031 | 
1032 | [[package]]
1033 | name = "proc-macro2"
1034 | version = "1.0.56"
1035 | source = "registry+https://github.com/rust-lang/crates.io-index"
1036 | checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435"
1037 | dependencies = [
1038 |  "unicode-ident",
1039 | ]
1040 | 
1041 | [[package]]
1042 | name = "quote"
1043 | version = "1.0.27"
1044 | source = "registry+https://github.com/rust-lang/crates.io-index"
1045 | checksum = "8f4f29d145265ec1c483c7c654450edde0bfe043d3938d6972630663356d9500"
1046 | dependencies = [
1047 |  "proc-macro2",
1048 | ]
1049 | 
1050 | [[package]]
1051 | name = "rayon"
1052 | version = "1.7.0"
1053 | source = "registry+https://github.com/rust-lang/crates.io-index"
1054 | checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b"
1055 | dependencies = [
1056 |  "either",
1057 |  "rayon-core",
1058 | ]
1059 | 
1060 | [[package]]
1061 | name = "rayon-core"
1062 | version = "1.11.0"
1063 | source = "registry+https://github.com/rust-lang/crates.io-index"
1064 | checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d"
1065 | dependencies = [
1066 |  "crossbeam-channel",
1067 |  "crossbeam-deque",
1068 |  "crossbeam-utils",
1069 |  "num_cpus",
1070 | ]
1071 | 
1072 | [[package]]
1073 | name = "redox_syscall"
1074 | version = "0.3.5"
1075 | source = "registry+https://github.com/rust-lang/crates.io-index"
1076 | checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29"
1077 | dependencies = [
1078 |  "bitflags",
1079 | ]
1080 | 
1081 | [[package]]
1082 | name = "regex"
1083 | version = "1.8.1"
1084 | source = "registry+https://github.com/rust-lang/crates.io-index"
1085 | checksum = "af83e617f331cc6ae2da5443c602dfa5af81e517212d9d611a5b3ba1777b5370"
1086 | dependencies = [
1087 |  "regex-syntax",
1088 | ]
1089 | 
1090 | [[package]]
1091 | name = "regex-syntax"
1092 | version = "0.7.1"
1093 | source = "registry+https://github.com/rust-lang/crates.io-index"
1094 | checksum = "a5996294f19bd3aae0453a862ad728f60e6600695733dd5df01da90c54363a3c"
1095 | 
1096 | [[package]]
1097 | name = "reqwest"
1098 | version = "0.11.17"
1099 | source = "registry+https://github.com/rust-lang/crates.io-index"
1100 | checksum = "13293b639a097af28fc8a90f22add145a9c954e49d77da06263d58cf44d5fb91"
1101 | dependencies = [
1102 |  "base64",
1103 |  "bytes",
1104 |  "encoding_rs",
1105 |  "futures-core",
1106 |  "futures-util",
1107 |  "h2",
1108 |  "http",
1109 |  "http-body",
1110 |  "hyper",
1111 |  "hyper-rustls",
1112 |  "hyper-tls",
1113 |  "ipnet",
1114 |  "js-sys",
1115 |  "log",
1116 |  "mime",
1117 |  "native-tls",
1118 |  "once_cell",
1119 |  "percent-encoding",
1120 |  "pin-project-lite",
1121 |  "rustls",
1122 |  "rustls-pemfile",
1123 |  "serde",
1124 |  "serde_json",
1125 |  "serde_urlencoded",
1126 |  "tokio",
1127 |  "tokio-native-tls",
1128 |  "tokio-rustls",
1129 |  "tower-service",
1130 |  "url",
1131 |  "wasm-bindgen",
1132 |  "wasm-bindgen-futures",
1133 |  "web-sys",
1134 |  "webpki-roots",
1135 |  "winreg",
1136 | ]
1137 | 
1138 | [[package]]
1139 | name = "ring"
1140 | version = "0.16.20"
1141 | source = "registry+https://github.com/rust-lang/crates.io-index"
1142 | checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc"
1143 | dependencies = [
1144 |  "cc",
1145 |  "libc",
1146 |  "once_cell",
1147 |  "spin",
1148 |  "untrusted",
1149 |  "web-sys",
1150 |  "winapi",
1151 | ]
1152 | 
1153 | [[package]]
1154 | name = "rustix"
1155 | version = "0.37.19"
1156 | source = "registry+https://github.com/rust-lang/crates.io-index"
1157 | checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d"
1158 | dependencies = [
1159 |  "bitflags",
1160 |  "errno",
1161 |  "io-lifetimes",
1162 |  "libc",
1163 |  "linux-raw-sys",
1164 |  "windows-sys 0.48.0",
1165 | ]
1166 | 
1167 | [[package]]
1168 | name = "rustls"
1169 | version = "0.20.8"
1170 | source = "registry+https://github.com/rust-lang/crates.io-index"
1171 | checksum = "fff78fc74d175294f4e83b28343315ffcfb114b156f0185e9741cb5570f50e2f"
1172 | dependencies = [
1173 |  "log",
1174 |  "ring",
1175 |  "sct",
1176 |  "webpki",
1177 | ]
1178 | 
1179 | [[package]]
1180 | name = "rustls-pemfile"
1181 | version = "1.0.2"
1182 | source = "registry+https://github.com/rust-lang/crates.io-index"
1183 | checksum = "d194b56d58803a43635bdc398cd17e383d6f71f9182b9a192c127ca42494a59b"
1184 | dependencies = [
1185 |  "base64",
1186 | ]
1187 | 
1188 | [[package]]
1189 | name = "ryu"
1190 | version = "1.0.13"
1191 | source = "registry+https://github.com/rust-lang/crates.io-index"
1192 | checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041"
1193 | 
1194 | [[package]]
1195 | name = "same-file"
1196 | version = "1.0.6"
1197 | source = "registry+https://github.com/rust-lang/crates.io-index"
1198 | checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
1199 | dependencies = [
1200 |  "winapi-util",
1201 | ]
1202 | 
1203 | [[package]]
1204 | name = "schannel"
1205 | version = "0.1.21"
1206 | source = "registry+https://github.com/rust-lang/crates.io-index"
1207 | checksum = "713cfb06c7059f3588fb8044c0fad1d09e3c01d225e25b9220dbfdcf16dbb1b3"
1208 | dependencies = [
1209 |  "windows-sys 0.42.0",
1210 | ]
1211 | 
1212 | [[package]]
1213 | name = "scopeguard"
1214 | version = "1.1.0"
1215 | source = "registry+https://github.com/rust-lang/crates.io-index"
1216 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
1217 | 
1218 | [[package]]
1219 | name = "sct"
1220 | version = "0.7.0"
1221 | source = "registry+https://github.com/rust-lang/crates.io-index"
1222 | checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4"
1223 | dependencies = [
1224 |  "ring",
1225 |  "untrusted",
1226 | ]
1227 | 
1228 | [[package]]
1229 | name = "security-framework"
1230 | version = "2.8.2"
1231 | source = "registry+https://github.com/rust-lang/crates.io-index"
1232 | checksum = "a332be01508d814fed64bf28f798a146d73792121129962fdf335bb3c49a4254"
1233 | dependencies = [
1234 |  "bitflags",
1235 |  "core-foundation",
1236 |  "core-foundation-sys",
1237 |  "libc",
1238 |  "security-framework-sys",
1239 | ]
1240 | 
1241 | [[package]]
1242 | name = "security-framework-sys"
1243 | version = "2.8.0"
1244 | source = "registry+https://github.com/rust-lang/crates.io-index"
1245 | checksum = "31c9bb296072e961fcbd8853511dd39c2d8be2deb1e17c6860b1d30732b323b4"
1246 | dependencies = [
1247 |  "core-foundation-sys",
1248 |  "libc",
1249 | ]
1250 | 
1251 | [[package]]
1252 | name = "serde"
1253 | version = "1.0.163"
1254 | source = "registry+https://github.com/rust-lang/crates.io-index"
1255 | checksum = "2113ab51b87a539ae008b5c6c02dc020ffa39afd2d83cffcb3f4eb2722cebec2"
1256 | dependencies = [
1257 |  "serde_derive",
1258 | ]
1259 | 
1260 | [[package]]
1261 | name = "serde_derive"
1262 | version = "1.0.163"
1263 | source = "registry+https://github.com/rust-lang/crates.io-index"
1264 | checksum = "8c805777e3930c8883389c602315a24224bcc738b63905ef87cd1420353ea93e"
1265 | dependencies = [
1266 |  "proc-macro2",
1267 |  "quote",
1268 |  "syn",
1269 | ]
1270 | 
1271 | [[package]]
1272 | name = "serde_json"
1273 | version = "1.0.96"
1274 | source = "registry+https://github.com/rust-lang/crates.io-index"
1275 | checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1"
1276 | dependencies = [
1277 |  "itoa",
1278 |  "ryu",
1279 |  "serde",
1280 | ]
1281 | 
1282 | [[package]]
1283 | name = "serde_urlencoded"
1284 | version = "0.7.1"
1285 | source = "registry+https://github.com/rust-lang/crates.io-index"
1286 | checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
1287 | dependencies = [
1288 |  "form_urlencoded",
1289 |  "itoa",
1290 |  "ryu",
1291 |  "serde",
1292 | ]
1293 | 
1294 | [[package]]
1295 | name = "sha2"
1296 | version = "0.10.6"
1297 | source = "registry+https://github.com/rust-lang/crates.io-index"
1298 | checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0"
1299 | dependencies = [
1300 |  "cfg-if",
1301 |  "cpufeatures",
1302 |  "digest",
1303 | ]
1304 | 
1305 | [[package]]
1306 | name = "similar"
1307 | version = "2.2.1"
1308 | source = "registry+https://github.com/rust-lang/crates.io-index"
1309 | checksum = "420acb44afdae038210c99e69aae24109f32f15500aa708e81d46c9f29d55fcf"
1310 | 
1311 | [[package]]
1312 | name = "slab"
1313 | version = "0.4.8"
1314 | source = "registry+https://github.com/rust-lang/crates.io-index"
1315 | checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d"
1316 | dependencies = [
1317 |  "autocfg",
1318 | ]
1319 | 
1320 | [[package]]
1321 | name = "socket2"
1322 | version = "0.4.9"
1323 | source = "registry+https://github.com/rust-lang/crates.io-index"
1324 | checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662"
1325 | dependencies = [
1326 |  "libc",
1327 |  "winapi",
1328 | ]
1329 | 
1330 | [[package]]
1331 | name = "spin"
1332 | version = "0.5.2"
1333 | source = "registry+https://github.com/rust-lang/crates.io-index"
1334 | checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
1335 | 
1336 | [[package]]
1337 | name = "strsim"
1338 | version = "0.10.0"
1339 | source = "registry+https://github.com/rust-lang/crates.io-index"
1340 | checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
1341 | 
1342 | [[package]]
1343 | name = "syn"
1344 | version = "2.0.15"
1345 | source = "registry+https://github.com/rust-lang/crates.io-index"
1346 | checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822"
1347 | dependencies = [
1348 |  "proc-macro2",
1349 |  "quote",
1350 |  "unicode-ident",
1351 | ]
1352 | 
1353 | [[package]]
1354 | name = "tempfile"
1355 | version = "3.5.0"
1356 | source = "registry+https://github.com/rust-lang/crates.io-index"
1357 | checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998"
1358 | dependencies = [
1359 |  "cfg-if",
1360 |  "fastrand",
1361 |  "redox_syscall",
1362 |  "rustix",
1363 |  "windows-sys 0.45.0",
1364 | ]
1365 | 
1366 | [[package]]
1367 | name = "textwrap"
1368 | version = "0.16.0"
1369 | source = "registry+https://github.com/rust-lang/crates.io-index"
1370 | checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d"
1371 | 
1372 | [[package]]
1373 | name = "thiserror"
1374 | version = "1.0.40"
1375 | source = "registry+https://github.com/rust-lang/crates.io-index"
1376 | checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac"
1377 | dependencies = [
1378 |  "thiserror-impl",
1379 | ]
1380 | 
1381 | [[package]]
1382 | name = "thiserror-impl"
1383 | version = "1.0.40"
1384 | source = "registry+https://github.com/rust-lang/crates.io-index"
1385 | checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f"
1386 | dependencies = [
1387 |  "proc-macro2",
1388 |  "quote",
1389 |  "syn",
1390 | ]
1391 | 
1392 | [[package]]
1393 | name = "tinytemplate"
1394 | version = "1.2.1"
1395 | source = "registry+https://github.com/rust-lang/crates.io-index"
1396 | checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
1397 | dependencies = [
1398 |  "serde",
1399 |  "serde_json",
1400 | ]
1401 | 
1402 | [[package]]
1403 | name = "tinyvec"
1404 | version = "1.6.0"
1405 | source = "registry+https://github.com/rust-lang/crates.io-index"
1406 | checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50"
1407 | dependencies = [
1408 |  "tinyvec_macros",
1409 | ]
1410 | 
1411 | [[package]]
1412 | name = "tinyvec_macros"
1413 | version = "0.1.1"
1414 | source = "registry+https://github.com/rust-lang/crates.io-index"
1415 | checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
1416 | 
1417 | [[package]]
1418 | name = "tokio"
1419 | version = "1.28.1"
1420 | source = "registry+https://github.com/rust-lang/crates.io-index"
1421 | checksum = "0aa32867d44e6f2ce3385e89dceb990188b8bb0fb25b0cf576647a6f98ac5105"
1422 | dependencies = [
1423 |  "autocfg",
1424 |  "bytes",
1425 |  "libc",
1426 |  "mio",
1427 |  "num_cpus",
1428 |  "pin-project-lite",
1429 |  "socket2",
1430 |  "windows-sys 0.48.0",
1431 | ]
1432 | 
1433 | [[package]]
1434 | name = "tokio-native-tls"
1435 | version = "0.3.1"
1436 | source = "registry+https://github.com/rust-lang/crates.io-index"
1437 | checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2"
1438 | dependencies = [
1439 |  "native-tls",
1440 |  "tokio",
1441 | ]
1442 | 
1443 | [[package]]
1444 | name = "tokio-rustls"
1445 | version = "0.23.4"
1446 | source = "registry+https://github.com/rust-lang/crates.io-index"
1447 | checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59"
1448 | dependencies = [
1449 |  "rustls",
1450 |  "tokio",
1451 |  "webpki",
1452 | ]
1453 | 
1454 | [[package]]
1455 | name = "tokio-util"
1456 | version = "0.7.8"
1457 | source = "registry+https://github.com/rust-lang/crates.io-index"
1458 | checksum = "806fe8c2c87eccc8b3267cbae29ed3ab2d0bd37fca70ab622e46aaa9375ddb7d"
1459 | dependencies = [
1460 |  "bytes",
1461 |  "futures-core",
1462 |  "futures-sink",
1463 |  "pin-project-lite",
1464 |  "tokio",
1465 |  "tracing",
1466 | ]
1467 | 
1468 | [[package]]
1469 | name = "tower-service"
1470 | version = "0.3.2"
1471 | source = "registry+https://github.com/rust-lang/crates.io-index"
1472 | checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52"
1473 | 
1474 | [[package]]
1475 | name = "tracing"
1476 | version = "0.1.37"
1477 | source = "registry+https://github.com/rust-lang/crates.io-index"
1478 | checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8"
1479 | dependencies = [
1480 |  "cfg-if",
1481 |  "pin-project-lite",
1482 |  "tracing-core",
1483 | ]
1484 | 
1485 | [[package]]
1486 | name = "tracing-core"
1487 | version = "0.1.30"
1488 | source = "registry+https://github.com/rust-lang/crates.io-index"
1489 | checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a"
1490 | dependencies = [
1491 |  "once_cell",
1492 | ]
1493 | 
1494 | [[package]]
1495 | name = "try-lock"
1496 | version = "0.2.4"
1497 | source = "registry+https://github.com/rust-lang/crates.io-index"
1498 | checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed"
1499 | 
1500 | [[package]]
1501 | name = "typenum"
1502 | version = "1.16.0"
1503 | source = "registry+https://github.com/rust-lang/crates.io-index"
1504 | checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba"
1505 | 
1506 | [[package]]
1507 | name = "ucd-trie"
1508 | version = "0.1.5"
1509 | source = "registry+https://github.com/rust-lang/crates.io-index"
1510 | checksum = "9e79c4d996edb816c91e4308506774452e55e95c3c9de07b6729e17e15a5ef81"
1511 | 
1512 | [[package]]
1513 | name = "unicode-bidi"
1514 | version = "0.3.13"
1515 | source = "registry+https://github.com/rust-lang/crates.io-index"
1516 | checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460"
1517 | 
1518 | [[package]]
1519 | name = "unicode-ident"
1520 | version = "1.0.8"
1521 | source = "registry+https://github.com/rust-lang/crates.io-index"
1522 | checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"
1523 | 
1524 | [[package]]
1525 | name = "unicode-normalization"
1526 | version = "0.1.22"
1527 | source = "registry+https://github.com/rust-lang/crates.io-index"
1528 | checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921"
1529 | dependencies = [
1530 |  "tinyvec",
1531 | ]
1532 | 
1533 | [[package]]
1534 | name = "untrusted"
1535 | version = "0.7.1"
1536 | source = "registry+https://github.com/rust-lang/crates.io-index"
1537 | checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a"
1538 | 
1539 | [[package]]
1540 | name = "url"
1541 | version = "2.3.1"
1542 | source = "registry+https://github.com/rust-lang/crates.io-index"
1543 | checksum = "0d68c799ae75762b8c3fe375feb6600ef5602c883c5d21eb51c09f22b83c4643"
1544 | dependencies = [
1545 |  "form_urlencoded",
1546 |  "idna",
1547 |  "percent-encoding",
1548 | ]
1549 | 
1550 | [[package]]
1551 | name = "utf8parse"
1552 | version = "0.2.1"
1553 | source = "registry+https://github.com/rust-lang/crates.io-index"
1554 | checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
1555 | 
1556 | [[package]]
1557 | name = "vcpkg"
1558 | version = "0.2.15"
1559 | source = "registry+https://github.com/rust-lang/crates.io-index"
1560 | checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
1561 | 
1562 | [[package]]
1563 | name = "version_check"
1564 | version = "0.9.4"
1565 | source = "registry+https://github.com/rust-lang/crates.io-index"
1566 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
1567 | 
1568 | [[package]]
1569 | name = "walkdir"
1570 | version = "2.3.3"
1571 | source = "registry+https://github.com/rust-lang/crates.io-index"
1572 | checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698"
1573 | dependencies = [
1574 |  "same-file",
1575 |  "winapi-util",
1576 | ]
1577 | 
1578 | [[package]]
1579 | name = "want"
1580 | version = "0.3.0"
1581 | source = "registry+https://github.com/rust-lang/crates.io-index"
1582 | checksum = "1ce8a968cb1cd110d136ff8b819a556d6fb6d919363c61534f6860c7eb172ba0"
1583 | dependencies = [
1584 |  "log",
1585 |  "try-lock",
1586 | ]
1587 | 
1588 | [[package]]
1589 | name = "wasi"
1590 | version = "0.11.0+wasi-snapshot-preview1"
1591 | source = "registry+https://github.com/rust-lang/crates.io-index"
1592 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
1593 | 
1594 | [[package]]
1595 | name = "wasm-bindgen"
1596 | version = "0.2.85"
1597 | source = "registry+https://github.com/rust-lang/crates.io-index"
1598 | checksum = "5b6cb788c4e39112fbe1822277ef6fb3c55cd86b95cb3d3c4c1c9597e4ac74b4"
1599 | dependencies = [
1600 |  "cfg-if",
1601 |  "wasm-bindgen-macro",
1602 | ]
1603 | 
1604 | [[package]]
1605 | name = "wasm-bindgen-backend"
1606 | version = "0.2.85"
1607 | source = "registry+https://github.com/rust-lang/crates.io-index"
1608 | checksum = "35e522ed4105a9d626d885b35d62501b30d9666283a5c8be12c14a8bdafe7822"
1609 | dependencies = [
1610 |  "bumpalo",
1611 |  "log",
1612 |  "once_cell",
1613 |  "proc-macro2",
1614 |  "quote",
1615 |  "syn",
1616 |  "wasm-bindgen-shared",
1617 | ]
1618 | 
1619 | [[package]]
1620 | name = "wasm-bindgen-futures"
1621 | version = "0.4.35"
1622 | source = "registry+https://github.com/rust-lang/crates.io-index"
1623 | checksum = "083abe15c5d88556b77bdf7aef403625be9e327ad37c62c4e4129af740168163"
1624 | dependencies = [
1625 |  "cfg-if",
1626 |  "js-sys",
1627 |  "wasm-bindgen",
1628 |  "web-sys",
1629 | ]
1630 | 
1631 | [[package]]
1632 | name = "wasm-bindgen-macro"
1633 | version = "0.2.85"
1634 | source = "registry+https://github.com/rust-lang/crates.io-index"
1635 | checksum = "358a79a0cb89d21db8120cbfb91392335913e4890665b1a7981d9e956903b434"
1636 | dependencies = [
1637 |  "quote",
1638 |  "wasm-bindgen-macro-support",
1639 | ]
1640 | 
1641 | [[package]]
1642 | name = "wasm-bindgen-macro-support"
1643 | version = "0.2.85"
1644 | source = "registry+https://github.com/rust-lang/crates.io-index"
1645 | checksum = "4783ce29f09b9d93134d41297aded3a712b7b979e9c6f28c32cb88c973a94869"
1646 | dependencies = [
1647 |  "proc-macro2",
1648 |  "quote",
1649 |  "syn",
1650 |  "wasm-bindgen-backend",
1651 |  "wasm-bindgen-shared",
1652 | ]
1653 | 
1654 | [[package]]
1655 | name = "wasm-bindgen-shared"
1656 | version = "0.2.85"
1657 | source = "registry+https://github.com/rust-lang/crates.io-index"
1658 | checksum = "a901d592cafaa4d711bc324edfaff879ac700b19c3dfd60058d2b445be2691eb"
1659 | 
1660 | [[package]]
1661 | name = "web-sys"
1662 | version = "0.3.62"
1663 | source = "registry+https://github.com/rust-lang/crates.io-index"
1664 | checksum = "16b5f940c7edfdc6d12126d98c9ef4d1b3d470011c47c76a6581df47ad9ba721"
1665 | dependencies = [
1666 |  "js-sys",
1667 |  "wasm-bindgen",
1668 | ]
1669 | 
1670 | [[package]]
1671 | name = "webpki"
1672 | version = "0.22.0"
1673 | source = "registry+https://github.com/rust-lang/crates.io-index"
1674 | checksum = "f095d78192e208183081cc07bc5515ef55216397af48b873e5edcd72637fa1bd"
1675 | dependencies = [
1676 |  "ring",
1677 |  "untrusted",
1678 | ]
1679 | 
1680 | [[package]]
1681 | name = "webpki-roots"
1682 | version = "0.22.6"
1683 | source = "registry+https://github.com/rust-lang/crates.io-index"
1684 | checksum = "b6c71e40d7d2c34a5106301fb632274ca37242cd0c9d3e64dbece371a40a2d87"
1685 | dependencies = [
1686 |  "webpki",
1687 | ]
1688 | 
1689 | [[package]]
1690 | name = "winapi"
1691 | version = "0.3.9"
1692 | source = "registry+https://github.com/rust-lang/crates.io-index"
1693 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
1694 | dependencies = [
1695 |  "winapi-i686-pc-windows-gnu",
1696 |  "winapi-x86_64-pc-windows-gnu",
1697 | ]
1698 | 
1699 | [[package]]
1700 | name = "winapi-i686-pc-windows-gnu"
1701 | version = "0.4.0"
1702 | source = "registry+https://github.com/rust-lang/crates.io-index"
1703 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
1704 | 
1705 | [[package]]
1706 | name = "winapi-util"
1707 | version = "0.1.5"
1708 | source = "registry+https://github.com/rust-lang/crates.io-index"
1709 | checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
1710 | dependencies = [
1711 |  "winapi",
1712 | ]
1713 | 
1714 | [[package]]
1715 | name = "winapi-x86_64-pc-windows-gnu"
1716 | version = "0.4.0"
1717 | source = "registry+https://github.com/rust-lang/crates.io-index"
1718 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
1719 | 
1720 | [[package]]
1721 | name = "windows-sys"
1722 | version = "0.42.0"
1723 | source = "registry+https://github.com/rust-lang/crates.io-index"
1724 | checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7"
1725 | dependencies = [
1726 |  "windows_aarch64_gnullvm 0.42.2",
1727 |  "windows_aarch64_msvc 0.42.2",
1728 |  "windows_i686_gnu 0.42.2",
1729 |  "windows_i686_msvc 0.42.2",
1730 |  "windows_x86_64_gnu 0.42.2",
1731 |  "windows_x86_64_gnullvm 0.42.2",
1732 |  "windows_x86_64_msvc 0.42.2",
1733 | ]
1734 | 
1735 | [[package]]
1736 | name = "windows-sys"
1737 | version = "0.45.0"
1738 | source = "registry+https://github.com/rust-lang/crates.io-index"
1739 | checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
1740 | dependencies = [
1741 |  "windows-targets 0.42.2",
1742 | ]
1743 | 
1744 | [[package]]
1745 | name = "windows-sys"
1746 | version = "0.48.0"
1747 | source = "registry+https://github.com/rust-lang/crates.io-index"
1748 | checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
1749 | dependencies = [
1750 |  "windows-targets 0.48.0",
1751 | ]
1752 | 
1753 | [[package]]
1754 | name = "windows-targets"
1755 | version = "0.42.2"
1756 | source = "registry+https://github.com/rust-lang/crates.io-index"
1757 | checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071"
1758 | dependencies = [
1759 |  "windows_aarch64_gnullvm 0.42.2",
1760 |  "windows_aarch64_msvc 0.42.2",
1761 |  "windows_i686_gnu 0.42.2",
1762 |  "windows_i686_msvc 0.42.2",
1763 |  "windows_x86_64_gnu 0.42.2",
1764 |  "windows_x86_64_gnullvm 0.42.2",
1765 |  "windows_x86_64_msvc 0.42.2",
1766 | ]
1767 | 
1768 | [[package]]
1769 | name = "windows-targets"
1770 | version = "0.48.0"
1771 | source = "registry+https://github.com/rust-lang/crates.io-index"
1772 | checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5"
1773 | dependencies = [
1774 |  "windows_aarch64_gnullvm 0.48.0",
1775 |  "windows_aarch64_msvc 0.48.0",
1776 |  "windows_i686_gnu 0.48.0",
1777 |  "windows_i686_msvc 0.48.0",
1778 |  "windows_x86_64_gnu 0.48.0",
1779 |  "windows_x86_64_gnullvm 0.48.0",
1780 |  "windows_x86_64_msvc 0.48.0",
1781 | ]
1782 | 
1783 | [[package]]
1784 | name = "windows_aarch64_gnullvm"
1785 | version = "0.42.2"
1786 | source = "registry+https://github.com/rust-lang/crates.io-index"
1787 | checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
1788 | 
1789 | [[package]]
1790 | name = "windows_aarch64_gnullvm"
1791 | version = "0.48.0"
1792 | source = "registry+https://github.com/rust-lang/crates.io-index"
1793 | checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc"
1794 | 
1795 | [[package]]
1796 | name = "windows_aarch64_msvc"
1797 | version = "0.42.2"
1798 | source = "registry+https://github.com/rust-lang/crates.io-index"
1799 | checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
1800 | 
1801 | [[package]]
1802 | name = "windows_aarch64_msvc"
1803 | version = "0.48.0"
1804 | source = "registry+https://github.com/rust-lang/crates.io-index"
1805 | checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3"
1806 | 
1807 | [[package]]
1808 | name = "windows_i686_gnu"
1809 | version = "0.42.2"
1810 | source = "registry+https://github.com/rust-lang/crates.io-index"
1811 | checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
1812 | 
1813 | [[package]]
1814 | name = "windows_i686_gnu"
1815 | version = "0.48.0"
1816 | source = "registry+https://github.com/rust-lang/crates.io-index"
1817 | checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241"
1818 | 
1819 | [[package]]
1820 | name = "windows_i686_msvc"
1821 | version = "0.42.2"
1822 | source = "registry+https://github.com/rust-lang/crates.io-index"
1823 | checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
1824 | 
1825 | [[package]]
1826 | name = "windows_i686_msvc"
1827 | version = "0.48.0"
1828 | source = "registry+https://github.com/rust-lang/crates.io-index"
1829 | checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00"
1830 | 
1831 | [[package]]
1832 | name = "windows_x86_64_gnu"
1833 | version = "0.42.2"
1834 | source = "registry+https://github.com/rust-lang/crates.io-index"
1835 | checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
1836 | 
1837 | [[package]]
1838 | name = "windows_x86_64_gnu"
1839 | version = "0.48.0"
1840 | source = "registry+https://github.com/rust-lang/crates.io-index"
1841 | checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1"
1842 | 
1843 | [[package]]
1844 | name = "windows_x86_64_gnullvm"
1845 | version = "0.42.2"
1846 | source = "registry+https://github.com/rust-lang/crates.io-index"
1847 | checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
1848 | 
1849 | [[package]]
1850 | name = "windows_x86_64_gnullvm"
1851 | version = "0.48.0"
1852 | source = "registry+https://github.com/rust-lang/crates.io-index"
1853 | checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953"
1854 | 
1855 | [[package]]
1856 | name = "windows_x86_64_msvc"
1857 | version = "0.42.2"
1858 | source = "registry+https://github.com/rust-lang/crates.io-index"
1859 | checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"
1860 | 
1861 | [[package]]
1862 | name = "windows_x86_64_msvc"
1863 | version = "0.48.0"
1864 | source = "registry+https://github.com/rust-lang/crates.io-index"
1865 | checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
1866 | 
1867 | [[package]]
1868 | name = "winreg"
1869 | version = "0.10.1"
1870 | source = "registry+https://github.com/rust-lang/crates.io-index"
1871 | checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d"
1872 | dependencies = [
1873 |  "winapi",
1874 | ]
1875 | 
1876 | [[package]]
1877 | name = "yaml-rust"
1878 | version = "0.4.5"
1879 | source = "registry+https://github.com/rust-lang/crates.io-index"
1880 | checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85"
1881 | dependencies = [
1882 |  "linked-hash-map",
1883 | ]
1884 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "html_parser"
 3 | version = "0.7.0"
 4 | authors = ["Mathias Iversen <work@mathiasiversen.com>"]
 5 | edition = "2018"
 6 | repository = "https://github.com/mathiversen/html-parser"
 7 | license = "MIT"
 8 | description = "A simple and general purpose html/xhtml parser"
 9 | keywords = ["html", "parser", "json", "pest", "dom"]
10 | categories = ["parsing", "web-programming"]
11 | readme = "README.md"
12 | 
13 | [dependencies]
14 | pest = "2.5.7"
15 | pest_derive = "2.5.7"
16 | thiserror = "1.0.40"
17 | serde = { version = "1.0.159", features = ["derive"] }
18 | serde_derive = "1.0.159"
19 | serde_json = "1.0.95"
20 | doc-comment = "0.3.3"
21 | 
22 | [dev-dependencies]
23 | indoc = "2.0.1"
24 | insta = { version = "1.29.0", features = ["json"]}
25 | tempfile = "3.5.0"
26 | criterion = "0.4.0"
27 | reqwest = { version = "0.11.16", features = ["blocking", "rustls-tls"] }
28 | clap = { version = "4.2.1", features = ["derive"] }
29 | 
30 | [[example]]
31 | name = "get_all_href"
32 | path = "examples/get_all_href/main.rs"
33 | 
34 | [[example]]
35 | name = "simple_parser"
36 | path = "examples/simple_parser/main.rs"
37 | 
38 | [[bench]]
39 | name = "bench_wikipedia"
40 | harness = false
41 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Mathias Iversen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Html parser
 2 | 
 3 | A simple and general purpose html/xhtml parser lib/bin, using [Pest](https://pest.rs/).
 4 | 
 5 | ## Features
 6 | 
 7 | - Parse html & xhtml (not xml processing instructions)
 8 | - Parse html-documents
 9 | - Parse html-fragments
10 | - Parse empty documents
11 | - Parse with the same api for both documents and fragments
12 | - Parse custom, non-standard, elements; `<cat/>`, `<Cat/>` and `<C4-t/>`
13 | - Removes comments
14 | - Removes dangling elements
15 | - Iterate over all nodes in the dom three
16 | 
17 | ## What is it not
18 | 
19 | - It's not a high-performance browser-grade parser
20 | - It's not suitable for html validation
21 | - It's not a parser that includes element selection or dom manipulation
22 | 
23 | If your requirements matches any of the above, then you're most likely looking for one of the crates below:
24 | 
25 | - [html5ever](https://crates.io/crates/html5ever)
26 | - [kuchiki](https://crates.io/crates/kuchiki)
27 | - [scraper](https://crates.io/crates/scraper)
28 | - or other crates using the `html5ever` parser
29 | 
30 | ## Examples bin
31 | 
32 | Parse html file
33 | 
34 | ```shell
35 | html_parser index.html
36 | 
37 | ```
38 | 
39 | Parse stdin with pretty output
40 | 
41 | ```shell
42 | curl <website> | html_parser -p
43 | ```
44 | 
45 | ## Examples lib
46 | 
47 | Parse html document
48 | 
49 | ```rust
50 |     use html_parser::Dom;
51 | 
52 |     fn main() {
53 |         let html = r#"
54 |             <!doctype html>
55 |             <html lang="en">
56 |                 <head>
57 |                     <meta charset="utf-8">
58 |                     <title>Html parser</title>
59 |                 </head>
60 |                 <body>
61 |                     <h1 id="a" class="b c">Hello world</h1>
62 |                     </h1> <!-- comments & dangling elements are ignored -->
63 |                 </body>
64 |             </html>"#;
65 | 
66 |         assert!(Dom::parse(html).is_ok());
67 |     }
68 | ```
69 | 
70 | Parse html fragment
71 | 
72 | ```rust
73 |     use html_parser::Dom;
74 | 
75 |     fn main() {
76 |         let html = "<div id=cat />";
77 |         assert!(Dom::parse(html).is_ok());
78 |     }
79 | ```
80 | 
81 | Print to json
82 | 
83 | ```rust
84 |     use html_parser::{Dom, Result};
85 | 
86 |     fn main() -> Result<()> {
87 |         let html = "<div id=cat />";
88 |         let json = Dom::parse(html)?.to_json_pretty()?;
89 |         println!("{}", json);
90 |         Ok(())
91 |     }
92 | ```
93 | 


--------------------------------------------------------------------------------
/benches/bench_wikipedia.rs:
--------------------------------------------------------------------------------
 1 | use criterion::{criterion_group, criterion_main, Criterion};
 2 | use html_parser::Dom;
 3 | 
 4 | static HTML: &'static str = include_str!("./wikipedia-2020-12-21.html");
 5 | 
 6 | fn wikipedia(c: &mut Criterion) {
 7 |     c.bench_function("wikipedia", |b| b.iter(|| Dom::parse(HTML).unwrap()));
 8 | }
 9 | 
10 | criterion_group!(benches, wikipedia);
11 | criterion_main!(benches);
12 | 


--------------------------------------------------------------------------------
/examples/get_all_href/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 6 |     <title>Can you get all the links?</title>
 7 | </head>
 8 | <body>
 9 |     <main>
10 |         <ul>
11 |             <li><a href="www.volvo.com">Volvo</a></li>
12 |             <li><a href="www.spotify.com">Spotify</a></li>
13 |             <li><a href="www.ikea.com">IKEA</a></li>
14 |             <li><a href="www.hm.com">HM</a></li>
15 |         </ul>
16 |     </main>
17 | </body>
18 | </html>


--------------------------------------------------------------------------------
/examples/get_all_href/main.rs:
--------------------------------------------------------------------------------
 1 | use html_parser::{Dom, Node, Result};
 2 | 
 3 | // This example illustrates how to use the library to get all of the anchor-hrefs from a document.
 4 | 
 5 | fn main() -> Result<()> {
 6 |     let html = include_str!("./index.html");
 7 |     let dom = Dom::parse(html)?;
 8 |     let iter = dom.children.get(0).unwrap().into_iter();
 9 | 
10 |     let hrefs = iter.filter_map(|item| match item {
11 |         Node::Element(ref element) if element.name == "a" => element.attributes["href"].clone(),
12 |         _ => None,
13 |     });
14 | 
15 |     println!("\nThe following links where found:");
16 |     for (index, href) in hrefs.enumerate() {
17 |         println!("{}: {}", index + 1, href)
18 |     }
19 | 
20 |     Ok(())
21 | }
22 | 


--------------------------------------------------------------------------------
/examples/simple_parser/main.rs:
--------------------------------------------------------------------------------
 1 | use clap::Parser;
 2 | use html_parser::{Dom, Result};
 3 | use std::{
 4 |     fs::File,
 5 |     io::{self, Read},
 6 |     path::PathBuf,
 7 | };
 8 | 
 9 | #[derive(Debug, Parser)]
10 | /// A simple and general purpose html/xhtml parser.
11 | struct Opt {
12 |     #[arg(short, long)]
13 |     /// Pretty-print the output.
14 |     pretty_print: bool,
15 | 
16 |     #[arg(short, long)]
17 |     /// Debug the parser, this will print errors to the console.
18 |     debug: bool,
19 | 
20 |     /// Path to the file, or stdin (piped content).
21 |     ///
22 |     /// This argument can either be a path to the html-file that you would like to parse or the
23 |     /// result of stdin. Note: Content over stdin needs to be finite, for now, as it is collected
24 |     /// into a string and then processed by the parser.
25 |     input: Option<PathBuf>,
26 | }
27 | 
28 | fn main() -> Result<()> {
29 |     let opt = Opt::parse();
30 | 
31 |     let mut content = String::with_capacity(100_000);
32 | 
33 |     // If input is provided then use that as a path
34 |     if let Some(path) = opt.input {
35 |         let mut file = File::open(path)?;
36 |         file.read_to_string(&mut content)?;
37 | 
38 |     // Else read from stdin, this enables piping
39 |     // ex: `cat index.html | html_parser`
40 |     } else {
41 |         let stdin = io::stdin();
42 |         let mut handle = stdin.lock();
43 |         handle.read_to_string(&mut content)?;
44 |     };
45 | 
46 |     let dom = Dom::parse(&content)?;
47 | 
48 |     if opt.debug {
49 |         for error in &dom.errors {
50 |             println!("# {}", error);
51 |         }
52 |     }
53 | 
54 |     if opt.pretty_print {
55 |         println!("{}", dom.to_json_pretty()?);
56 |     } else {
57 |         println!("{}", dom.to_json()?);
58 |     }
59 | 
60 |     Ok(())
61 | }
62 | 


--------------------------------------------------------------------------------
/src/dom/element.rs:
--------------------------------------------------------------------------------
 1 | use super::node::Node;
 2 | use super::span::SourceSpan;
 3 | use serde::{Serialize, Serializer};
 4 | use std::collections::{BTreeMap, HashMap};
 5 | use std::default::Default;
 6 | use std::result::Result;
 7 | 
 8 | /// Normal: `<div></div>` or Void: `<meta/>`and `<meta>`
 9 | #[derive(Debug, Clone, Serialize, PartialEq)]
10 | #[serde(rename_all = "camelCase")]
11 | // TODO: Align with: https://html.spec.whatwg.org/multipage/syntax.html#elements-2
12 | pub enum ElementVariant {
13 |     /// A normal element can have children, ex: <div></div>.
14 |     Normal,
15 |     /// A void element can't have children, ex: <meta /> and <meta>
16 |     Void,
17 | }
18 | 
19 | pub type Attributes = HashMap<String, Option<String>>;
20 | 
21 | /// Most of the parsed html nodes are elements, except for text
22 | #[derive(Debug, Clone, Serialize, PartialEq)]
23 | #[serde(rename_all = "camelCase")]
24 | pub struct Element {
25 |     /// The id of the element
26 |     #[serde(skip_serializing_if = "Option::is_none")]
27 |     pub id: Option<String>,
28 | 
29 |     /// The name / tag of the element
30 |     pub name: String,
31 | 
32 |     /// The element variant, if it is of type void or not
33 |     pub variant: ElementVariant,
34 | 
35 |     /// All of the elements attributes, except id and class
36 |     #[serde(skip_serializing_if = "HashMap::is_empty")]
37 |     #[serde(serialize_with = "ordered_map")]
38 |     pub attributes: Attributes,
39 | 
40 |     /// All of the elements classes
41 |     #[serde(skip_serializing_if = "Vec::is_empty")]
42 |     pub classes: Vec<String>,
43 | 
44 |     /// All of the elements child nodes
45 |     #[serde(skip_serializing_if = "Vec::is_empty")]
46 |     pub children: Vec<Node>,
47 | 
48 |     /// Span of the element in the parsed source
49 |     #[serde(skip)]
50 |     pub source_span: SourceSpan
51 | }
52 | 
53 | impl Default for Element {
54 |     fn default() -> Self {
55 |         Self {
56 |             id: None,
57 |             name: "".to_string(),
58 |             variant: ElementVariant::Void,
59 |             classes: vec![],
60 |             attributes: HashMap::new(),
61 |             children: vec![],
62 |             source_span: SourceSpan::default()
63 |         }
64 |     }
65 | }
66 | 
67 | fn ordered_map<S: Serializer>(value: &Attributes, serializer: S) -> Result<S::Ok, S::Error> {
68 |     let ordered: BTreeMap<_, _> = value.iter().collect();
69 |     ordered.serialize(serializer)
70 | }
71 | 


--------------------------------------------------------------------------------
/src/dom/formatting.rs:
--------------------------------------------------------------------------------
 1 | use crate::error::Error;
 2 | use crate::Result;
 3 | use crate::Rule;
 4 | use pest::error::Error as PestError;
 5 | 
 6 | /// This function abstracts the formatting of errors away from the core logic inside parser,
 7 | /// so that the file is easier to read.
 8 | pub fn error_msg(error: PestError<Rule>) -> Result<super::Dom> {
 9 |     let message = error.renamed_rules(|rule| match *rule {
10 |         Rule::EOI => "end of input".to_string(),
11 |         Rule::doctype => "doctype element".to_string(),
12 |         Rule::node_text => "text node".to_string(),
13 |         Rule::node_element => "element node".to_string(),
14 |         Rule::el_void => "void element".to_string(),
15 |         Rule::el_void_xml => "void element with xml ending (/>)".to_string(),
16 |         Rule::el_process_instruct => "xml processing instruction".to_string(),
17 |         Rule::el_raw_text => "element with raw text (style or script)".to_string(),
18 |         Rule::el_normal => "normal element".to_string(),
19 |         Rule::el_dangling => "".to_string(),
20 |         Rule::attr => "attribute (key=\"value\")".to_string(),
21 |         Rule::attr_key => "attribute key".to_string(),
22 |         Rule::attr_value => "attribute value".to_string(),
23 |         Rule::el_name => "element name".to_string(),
24 |         Rule::el_void_name_html => "void element name".to_string(),
25 |         // TODO: Continue with this
26 |         x => format!("{:?} ", x),
27 |     });
28 |     Err(Error::Parsing(message.to_string()))
29 | }
30 | 


--------------------------------------------------------------------------------
/src/dom/mod.rs:
--------------------------------------------------------------------------------
  1 | use crate::Result;
  2 | use pest::{iterators::Pair, iterators::Pairs, Parser};
  3 | use serde::Serialize;
  4 | use std::default::Default;
  5 | 
  6 | use crate::error::Error;
  7 | use crate::grammar::Grammar;
  8 | use crate::Rule;
  9 | 
 10 | pub mod element;
 11 | pub mod formatting;
 12 | pub mod node;
 13 | pub mod span;
 14 | 
 15 | use crate::dom::span::SourceSpan;
 16 | use element::{Element, ElementVariant};
 17 | use node::Node;
 18 | 
 19 | /// Document, DocumentFragment or Empty
 20 | #[derive(Debug, Clone, PartialEq, Serialize)]
 21 | #[serde(rename_all = "camelCase")]
 22 | pub enum DomVariant {
 23 |     /// This means that the parsed html had the representation of an html document. The doctype is optional but a document should only have one root node with the name of html.
 24 |     /// Example:
 25 |     /// ```text
 26 |     /// <!doctype html>
 27 |     /// <html>
 28 |     ///     <head></head>
 29 |     ///     <body>
 30 |     ///         <h1>Hello world</h1>
 31 |     ///     </body>
 32 |     /// </html>
 33 |     /// ```
 34 |     Document,
 35 |     /// A document fragment means that the parsed html did not have the representation of a document. A fragment can have multiple root children of any name except html, body or head.
 36 |     /// Example:
 37 |     /// ```text
 38 |     /// <h1>Hello world</h1>
 39 |     /// ```
 40 |     DocumentFragment,
 41 |     /// An empty dom means that the input was empty
 42 |     Empty,
 43 | }
 44 | 
 45 | /// **The main struct** & the result of the parsed html
 46 | #[derive(Debug, Clone, Serialize, PartialEq)]
 47 | #[serde(rename_all = "camelCase")]
 48 | pub struct Dom {
 49 |     /// The type of the tree that was parsed
 50 |     pub tree_type: DomVariant,
 51 | 
 52 |     /// All of the root children in the tree
 53 |     #[serde(skip_serializing_if = "Vec::is_empty")]
 54 |     pub children: Vec<Node>,
 55 | 
 56 |     /// A collection of all errors during parsing
 57 |     #[serde(skip_serializing)]
 58 |     pub errors: Vec<String>,
 59 | }
 60 | 
 61 | impl Default for Dom {
 62 |     fn default() -> Self {
 63 |         Self {
 64 |             tree_type: DomVariant::Empty,
 65 |             children: vec![],
 66 |             errors: vec![],
 67 |         }
 68 |     }
 69 | }
 70 | 
 71 | impl Dom {
 72 |     pub fn parse(input: &str) -> Result<Self> {
 73 |         let pairs = match Grammar::parse(Rule::html, input) {
 74 |             Ok(pairs) => pairs,
 75 |             Err(error) => return formatting::error_msg(error),
 76 |         };
 77 |         Self::build_dom(pairs)
 78 |     }
 79 | 
 80 |     pub fn to_json(&self) -> Result<String> {
 81 |         Ok(serde_json::to_string(self)?)
 82 |     }
 83 | 
 84 |     pub fn to_json_pretty(&self) -> Result<String> {
 85 |         Ok(serde_json::to_string_pretty(self)?)
 86 |     }
 87 | 
 88 |     fn build_dom(pairs: Pairs<Rule>) -> Result<Self> {
 89 |         let mut dom = Self::default();
 90 | 
 91 |         // NOTE: The logic is roughly as follows:
 92 |         // 1) A document containing nothing but comments is DomVariant::Empty even though it will have
 93 |         //    children in this first pass.  We fix this in the next section.  This allows us to use
 94 |         //    DomVariant::Empty to indicate "we haven't decided the type yet".
 95 |         // 2) If the type is DomVariant::Empty _so far_, then it can be changed to DomVariant::Document
 96 |         //    or DomVariant::DocumentFragment.  DomVariant is only selected in this stage if we see a
 97 |         //    DOCTYPE tag.  Comments do not change the type.
 98 |         // 3) If the type is non-empty, we don't re-set the type.  We do look for conflicts between
 99 |         //    the type and the tokens in the next stage.
100 |         for pair in pairs {
101 |             match pair.as_rule() {
102 |                 // A <!DOCTYPE> tag means a full-fledged document.  Note that because of the way
103 |                 // the grammar is written, we will only get this token if the <!DOCTYPE> occurs
104 |                 // before any other tag; otherwise it will be parsed as a custom tag.
105 |                 Rule::doctype => {
106 |                     if dom.tree_type == DomVariant::Empty {
107 |                         dom.tree_type = DomVariant::Document;
108 |                     }
109 |                 }
110 | 
111 |                 // If we see an element, build the sub-tree and add it as a child.  If we don't
112 |                 // have a document type yet (i.e. "empty"), select DocumentFragment
113 |                 Rule::node_element => match Self::build_node_element(pair, &mut dom) {
114 |                     Ok(el) => {
115 |                         if let Some(node) = el {
116 |                             if dom.tree_type == DomVariant::Empty {
117 |                                 dom.tree_type = DomVariant::DocumentFragment;
118 |                             };
119 |                             dom.children.push(node);
120 |                         }
121 |                     }
122 |                     Err(error) => {
123 |                         dom.errors.push(format!("{}", error));
124 |                     }
125 |                 },
126 | 
127 |                 // Similar to an element, we add it as a child and select DocumentFragment if we
128 |                 // don't already have a document type.
129 |                 Rule::node_text => {
130 |                     if dom.tree_type == DomVariant::Empty {
131 |                         dom.tree_type = DomVariant::DocumentFragment;
132 |                     }
133 |                     let text = pair.as_str().to_string();
134 |                     if !text.trim().is_empty() {
135 |                         dom.children.push(Node::Text(text));
136 |                     }
137 |                 }
138 | 
139 |                 // Store comments as a child, but it doesn't affect the document type selection
140 |                 // until the next phase (validation).
141 |                 Rule::node_comment => {
142 |                     dom.children
143 |                         .push(Node::Comment(pair.into_inner().as_str().to_string()));
144 |                 }
145 | 
146 |                 // Ignore 'end of input', which then allows the catch-all unreachable!() arm to
147 |                 // function properly.
148 |                 Rule::EOI => (),
149 | 
150 |                 // This should be unreachable, due to the way the grammar is written
151 |                 _ => unreachable!("[build dom] unknown rule: {:?}", pair.as_rule()),
152 |             };
153 |         }
154 | 
155 |         // Implement some checks on the generated dom's data and initial type.  The type may be
156 |         // modified in this section.
157 |         match dom.tree_type {
158 |             // A DomVariant::Empty can only have comments. Anything else is an error.
159 |             DomVariant::Empty => {
160 |                 for node in &dom.children {
161 |                     if let Node::Comment(_) = node {
162 |                         // An "empty" document, but it has comments - this is where we cleanup the
163 |                         // earlier assumption that a document with only comments is "empty".
164 |                         // Really, it is a "fragment".
165 |                         dom.tree_type = DomVariant::DocumentFragment
166 |                     } else {
167 |                         // Anything else (i.e. Text() or Element() ) can't happen at the top level;
168 |                         // if we had seen one, we would have set the document type above
169 |                         unreachable!("[build dom] empty document with an Element {:?}", node)
170 |                     }
171 |                 }
172 |             }
173 | 
174 |             // A DomVariant::Document can only have comments and an <HTML> node at the top level.
175 |             // Only one <HTML> tag is permitted.
176 |             DomVariant::Document => {
177 |                 if dom
178 |                     .children
179 |                     .iter()
180 |                     .filter(|x| match x {
181 |                         Node::Element(el) if el.name.to_lowercase() == "html" => true,
182 |                         _ => false,
183 |                     })
184 |                     .count()
185 |                     > 1
186 |                 {
187 |                     return Err(Error::Parsing(format!("Document with multiple HTML tags",)));
188 |                 }
189 |             }
190 | 
191 |             // A DomVariant::DocumentFragment should not have <HEAD>, or <BODY> tags at the
192 |             // top-level.  If we find an <HTML> tag, then we consider this a Document instead (if
193 |             // it comes before any other elements, and if there is only one <HTML> tag).
194 |             DomVariant::DocumentFragment => {
195 |                 let mut seen_html = false;
196 |                 let mut seen_elements = false;
197 | 
198 |                 for node in &dom.children {
199 |                     match node {
200 |                         // Nodes other than <HTML> - reject <HEAD> and <BODY>
201 |                         Node::Element(ref el) if el.name.clone().to_lowercase() != "html" => {
202 |                             if el.name == "head" || el.name == "body" {
203 |                                 return Err(Error::Parsing(format!(
204 |                                     "A document fragment should not include {}",
205 |                                     el.name
206 |                                 )));
207 |                             }
208 |                             seen_elements = true;
209 |                         }
210 |                         // <HTML> Nodes - one (before any other elements) is okay
211 |                         Node::Element(ref el) if el.name.clone().to_lowercase() == "html" => {
212 |                             if seen_html || seen_elements {
213 |                                 return Err(Error::Parsing(format!(
214 |                                     "A document fragment should not include {}",
215 |                                     el.name
216 |                                 )));
217 |                             };
218 | 
219 |                             // A fragment with just an <HTML> tag is a document
220 |                             dom.tree_type = DomVariant::Document;
221 |                             seen_html = true;
222 |                         }
223 |                         // Comment() and Text() nodes are permitted at the top-level of a
224 |                         // DocumentFragment
225 |                         _ => (),
226 |                     }
227 |                 }
228 |             }
229 |         }
230 | 
231 |         // The result is the validated tree
232 |         Ok(dom)
233 |     }
234 | 
235 |     fn build_node_element(pair: Pair<Rule>, dom: &mut Dom) -> Result<Option<Node>> {
236 |         let source_span = {
237 |             let pair_span = pair.as_span();
238 |             let (start_line, start_column) = pair_span.start_pos().line_col();
239 |             let (end_line, end_column) = pair_span.end_pos().line_col();
240 | 
241 |             SourceSpan::new(
242 |                 String::from(pair_span.as_str()),
243 |                 start_line,
244 |                 end_line,
245 |                 start_column,
246 |                 end_column,
247 |             )
248 |         };
249 | 
250 |         let mut element = Element {
251 |             source_span,
252 |             ..Element::default()
253 |         };
254 | 
255 |         for pair in pair.into_inner() {
256 |             match pair.as_rule() {
257 |                 Rule::node_element | Rule::el_raw_text => {
258 |                     match Self::build_node_element(pair, dom) {
259 |                         Ok(el) => {
260 |                             if let Some(child_element) = el {
261 |                                 element.children.push(child_element)
262 |                             }
263 |                         }
264 |                         Err(error) => {
265 |                             dom.errors.push(format!("{}", error));
266 |                         }
267 |                     }
268 |                 }
269 |                 Rule::node_text | Rule::el_raw_text_content => {
270 |                     let text = pair.as_str().to_string();
271 |                     if !text.trim().is_empty() {
272 |                         element.children.push(Node::Text(text));
273 |                     }
274 |                 }
275 |                 Rule::node_comment => {
276 |                     element
277 |                         .children
278 |                         .push(Node::Comment(pair.into_inner().as_str().to_string()));
279 |                 }
280 |                 // TODO: To enable some kind of validation we should probably align this with
281 |                 // https://html.spec.whatwg.org/multipage/syntax.html#elements-2
282 |                 // Also see element variants
283 |                 Rule::el_name | Rule::el_void_name | Rule::el_raw_text_name => {
284 |                     element.name = pair.as_str().to_string();
285 |                 }
286 |                 Rule::attr => match Self::build_attribute(pair.into_inner()) {
287 |                     Ok((attr_key, attr_value)) => {
288 |                         match attr_key.as_str() {
289 |                             "id" => element.id = attr_value,
290 |                             "class" => {
291 |                                 if let Some(classes) = attr_value {
292 |                                     let classes = classes.split_whitespace().collect::<Vec<_>>();
293 |                                     for class in classes {
294 |                                         element.classes.push(class.to_string());
295 |                                     }
296 |                                 }
297 |                             }
298 |                             _ => {
299 |                                 element.attributes.insert(attr_key, attr_value);
300 |                             }
301 |                         };
302 |                     }
303 |                     Err(error) => {
304 |                         dom.errors.push(format!("{}", error));
305 |                     }
306 |                 },
307 |                 Rule::el_normal_end | Rule::el_raw_text_end => {
308 |                     element.variant = ElementVariant::Normal;
309 |                     break;
310 |                 }
311 |                 Rule::el_dangling => (),
312 |                 Rule::EOI => (),
313 |                 _ => {
314 |                     return Err(Error::Parsing(format!(
315 |                         "Failed to create element at rule: {:?}",
316 |                         pair.as_rule()
317 |                     )))
318 |                 }
319 |             }
320 |         }
321 |         if element.name != "" {
322 |             Ok(Some(Node::Element(element)))
323 |         } else {
324 |             Ok(None)
325 |         }
326 |     }
327 | 
328 |     fn build_attribute(pairs: Pairs<Rule>) -> Result<(String, Option<String>)> {
329 |         let mut attribute = ("".to_string(), None);
330 |         for pair in pairs {
331 |             match pair.as_rule() {
332 |                 Rule::attr_key => {
333 |                     attribute.0 = pair.as_str().trim().to_string();
334 |                 }
335 |                 Rule::attr_non_quoted => {
336 |                     attribute.1 = Some(pair.as_str().trim().to_string());
337 |                 }
338 |                 Rule::attr_quoted => {
339 |                     let inner_pair = pair
340 |                         .into_inner()
341 |                         .into_iter()
342 |                         .next()
343 |                         .expect("attribute value");
344 | 
345 |                     match inner_pair.as_rule() {
346 |                         Rule::attr_value => attribute.1 = Some(inner_pair.as_str().to_string()),
347 |                         _ => {
348 |                             return Err(Error::Parsing(format!(
349 |                                 "Failed to parse attr value: {:?}",
350 |                                 inner_pair.as_rule()
351 |                             )))
352 |                         }
353 |                     }
354 |                 }
355 |                 _ => {
356 |                     return Err(Error::Parsing(format!(
357 |                         "Failed to parse attr: {:?}",
358 |                         pair.as_rule()
359 |                     )))
360 |                 }
361 |             }
362 |         }
363 |         Ok(attribute)
364 |     }
365 | }
366 | 


--------------------------------------------------------------------------------
/src/dom/node.rs:
--------------------------------------------------------------------------------
  1 | use super::element::Element;
  2 | use serde::Serialize;
  3 | 
  4 | #[derive(Debug, Clone, Serialize, PartialEq)]
  5 | #[serde(untagged)]
  6 | pub enum Node {
  7 |     Text(String),
  8 |     Element(Element),
  9 |     Comment(String),
 10 | }
 11 | 
 12 | impl Node {
 13 |     pub fn text(&self) -> Option<&str> {
 14 |         match self {
 15 |             Node::Text(t) => Some(t.as_str()),
 16 |             _ => None,
 17 |         }
 18 |     }
 19 | 
 20 |     pub fn element(&self) -> Option<&Element> {
 21 |         match self {
 22 |             Node::Element(e) => Some(e),
 23 |             _ => None,
 24 |         }
 25 |     }
 26 | 
 27 |     pub fn comment(&self) -> Option<&str> {
 28 |         match self {
 29 |             Node::Comment(t) => Some(t.as_str()),
 30 |             _ => None,
 31 |         }
 32 |     }
 33 | }
 34 | 
 35 | impl<'a> IntoIterator for &'a Node {
 36 |     type Item = &'a Node;
 37 |     type IntoIter = NodeIntoIterator<'a>;
 38 | 
 39 |     fn into_iter(self) -> Self::IntoIter {
 40 |         NodeIntoIterator {
 41 |             node: self,
 42 |             index: vec![],
 43 |         }
 44 |     }
 45 | }
 46 | 
 47 | pub struct NodeIntoIterator<'a> {
 48 |     node: &'a Node,
 49 |     // We add/remove to this vec each time we go up/down a node three
 50 |     index: Vec<(usize, &'a Node)>,
 51 | }
 52 | 
 53 | impl<'a> Iterator for NodeIntoIterator<'a> {
 54 |     type Item = &'a Node;
 55 | 
 56 |     fn next(&mut self) -> Option<Self::Item> {
 57 |         // Get first child
 58 |         let child = match self.node {
 59 |             Node::Element(ref e) => e.children.get(0),
 60 |             _ => None,
 61 |         };
 62 | 
 63 |         let result = match child {
 64 |             // If element has child, return child
 65 |             Some(child) => {
 66 |                 self.index.push((0, self.node));
 67 |                 self.node = child;
 68 |                 Some(child)
 69 |             }
 70 |             // If element doesn't have a child, but is a child of another node
 71 |             None if self.index.len() > 0 => {
 72 |                 let mut has_finished = false;
 73 |                 let mut next_node = None;
 74 | 
 75 |                 while !has_finished {
 76 |                     // Try to get the next sibling of the parent node
 77 |                     if let Some((sibling_index, parent)) = self.index.pop() {
 78 |                         let next_sibling = sibling_index + 1;
 79 |                         let sibling = if let Node::Element(ref e) = parent {
 80 |                             e.children.get(next_sibling)
 81 |                         } else {
 82 |                             None
 83 |                         };
 84 |                         if sibling.is_some() {
 85 |                             has_finished = true;
 86 |                             self.index.push((next_sibling, parent));
 87 |                             next_node = sibling;
 88 |                         } else {
 89 |                             continue;
 90 |                         }
 91 |                     // Break of there are no more parents
 92 |                     } else {
 93 |                         has_finished = true;
 94 |                     }
 95 |                 }
 96 | 
 97 |                 if let Some(next_node) = next_node {
 98 |                     self.node = next_node;
 99 |                 }
100 | 
101 |                 next_node
102 |             }
103 |             _ => None,
104 |         };
105 | 
106 |         result
107 |     }
108 | }
109 | 
110 | #[cfg(test)]
111 | mod tests {
112 |     use super::*;
113 | 
114 |     #[test]
115 |     fn node_utillity_functions() {
116 |         let node = Node::Text("test".to_string());
117 | 
118 |         assert_eq!(node.text(), Some("test"));
119 |         assert_eq!(node.element(), None);
120 |         assert_eq!(node.comment(), None);
121 | 
122 |         let node = Node::Element(Element::default());
123 | 
124 |         assert_eq!(node.text(), None);
125 |         assert_eq!(node.element(), Some(&Element::default()));
126 |         assert_eq!(node.comment(), None);
127 | 
128 |         let node = Node::Comment("test".to_string());
129 | 
130 |         assert_eq!(node.text(), None);
131 |         assert_eq!(node.element(), None);
132 |         assert_eq!(node.comment(), Some("test"));
133 |     }
134 | }


--------------------------------------------------------------------------------
/src/dom/span.rs:
--------------------------------------------------------------------------------
 1 | use serde::{Serialize};
 2 | 
 3 | /// Span of the information in the parsed source.
 4 | #[derive(Debug, Default, Clone, Serialize, PartialEq)]
 5 | #[serde(rename_all = "camelCase")]
 6 | pub struct SourceSpan {
 7 |     pub text: String,
 8 |     pub start_line: usize,
 9 |     pub end_line: usize,
10 |     pub start_column: usize,
11 |     pub end_column: usize,
12 | }
13 | 
14 | impl SourceSpan {
15 |     pub fn new(
16 |         text: String,
17 |         start_line: usize,
18 |         end_line: usize,
19 |         start_column: usize,
20 |         end_column: usize,
21 |     ) -> Self {
22 |         Self {
23 |             text,
24 |             start_line,
25 |             end_line,
26 |             start_column,
27 |             end_column,
28 |         }
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------
/src/error.rs:
--------------------------------------------------------------------------------
 1 | use thiserror::Error as ThisError;
 2 | 
 3 | #[derive(ThisError, Debug)]
 4 | pub enum Error {
 5 |     #[error("{0}")]
 6 |     Parsing(String),
 7 |     #[error("{0}")]
 8 |     Cli(String),
 9 |     #[error("{0}")]
10 |     IO(#[from] std::io::Error),
11 |     #[error("{0}")]
12 |     Serde(#[from] serde_json::Error),
13 | }
14 | 
15 | pub type Result<T> = std::result::Result<T, Error>;
16 | 


--------------------------------------------------------------------------------
/src/grammar/mod.rs:
--------------------------------------------------------------------------------
1 | use pest_derive::Parser;
2 | 
3 | #[derive(Parser)]
4 | #[grammar = "grammar/rules.pest"]
5 | pub struct Grammar;
6 | 


--------------------------------------------------------------------------------
/src/grammar/rules.pest:
--------------------------------------------------------------------------------
  1 | //
  2 | // HTML
  3 | //
  4 | html = _{
  5 |     SOI
  6 |     ~ node_comment*
  7 |     ~ doctype?
  8 |     ~ node*
  9 |     ~ EOI
 10 | }
 11 | 
 12 | //
 13 | // DOCTYPE
 14 | //
 15 | doctype = { WSP* ~ chevron_left_bang ~ ^"doctype" ~ WSP* ~ attr* ~ WSP* ~ chevron_right_normal}
 16 | 
 17 | //
 18 | // NODES
 19 | //
 20 | node = _{ node_comment | node_element | node_text }
 21 | node_comment = { WSP* ~ (comment_if | comment_normal) ~ WSP* }
 22 | node_text = { (!(node_element | comment_tag_start | chevron_left_bang) ~ ANY)+ }
 23 | node_element = { el_void | el_void_xml | el_process_instruct | el_raw_text | el_normal | el_dangling }
 24 | 
 25 | //
 26 | // COMMENTS
 27 | //
 28 | comment_normal = _{ comment_tag_start ~ comment_body ~ comment_tag_end }
 29 | comment_body = { (!comment_tag_end ~ ANY)* }
 30 | comment_tag_start = _{ chevron_left_bang ~ "--" ~ WSP* }
 31 | comment_tag_end = _{ WSP* ~ "--" ~ chevron_right_normal }
 32 | 
 33 | // Compatability with old IE browsers... This is not necessary for newer browsers
 34 | comment_if = _{ comment_if_start ~ comment_if_body ~ comment_if_end }
 35 | comment_if_body = { (!comment_if_end ~ ANY)* }
 36 | comment_if_start = _{ comment_tag_start ~ "[" ~ ^"if" }
 37 | comment_if_end = _{ chevron_left_bang ~ "[" ~ ^"endif" ~ "]" ~ comment_tag_end }
 38 | 
 39 | //
 40 | // ATTRIBUTES
 41 | //
 42 | attr = { attr_key ~ (equal ~ WSP* ~ (attr_non_quoted | attr_quoted ))? }
 43 | attr_quoted =  ${PUSH(quote) ~ attr_value ~ POP }
 44 | attr_non_quoted = @{ !quote ~ (!(WSP | chevron_right) ~ ANY)* }
 45 | attr_key = { WSP* ~ ASCII_ALPHA ~ text_chars* ~ WSP* }
 46 | attr_value = { WSP* ~ (!PEEK ~ ANY)* ~ WSP* }
 47 | 
 48 | //
 49 | // ELEMENTS
 50 | //
 51 | el_name = @{ ASCII_ALPHA ~ text_chars* }
 52 | 
 53 | // Void element aka self-closing element
 54 | // Ex: <hr>
 55 | el_void_name_html = @{
 56 |     ^"area"
 57 |     | ^"base"
 58 |     | ^"br"
 59 |     | ^"col"
 60 |     | ^"command"
 61 |     | ^"embed"
 62 |     | ^"hr"
 63 |     | ^"img"
 64 |     | ^"input"
 65 |     | ^"keygen"
 66 |     | ^"link"
 67 |     | ^"meta"
 68 |     | ^"param"
 69 |     | ^"source"
 70 |     | ^"track"
 71 |     | ^"wbr"
 72 |     | ^"meta"
 73 | }
 74 | // NOTE: This should not have to be a rule, but people doesn't know what void elements are...
 75 | el_void_name_svg = @{
 76 |     ^"path"
 77 |     | ^"polygon"
 78 |     | ^"rect"
 79 |     | ^"circle"
 80 | }
 81 | el_void_name = @{ el_void_name_html | el_void_name_svg }
 82 | el_void = _{ chevron_left_normal ~ WSP* ~ el_void_name ~ WSP* ~ attr* ~ WSP* ~ (chevron_right_normal | chevron_right_closed) }
 83 | el_void_xml = _{ chevron_left_normal ~ WSP* ~ el_name ~ WSP* ~ attr* ~ WSP* ~ chevron_right_closed }
 84 | 
 85 | // Open elements are default element that can take children 
 86 | // and have both a start tag and an end tag
 87 | // Ex: <html lang="en"></html>
 88 | el_normal = _{ el_normal_start ~ (!el_normal_end ~ node)* ~ el_normal_end }
 89 | el_normal_start = _{ chevron_left_normal ~ WSP* ~ PUSH(el_name) ~ WSP* ~ attr* ~ WSP* ~ chevron_right_normal}
 90 | el_normal_end = { chevron_left_closed ~ WSP* ~ POP ~ WSP* ~ chevron_right_normal}
 91 | 
 92 | // Raw text elements are elements with text/script content that
 93 | // might interfere with the normal html syntax
 94 | el_raw_text_name = {
 95 |     ^"style"
 96 |     | ^"script"
 97 |     | ^"title"
 98 |     | ^"textarea"
 99 | }
100 | el_raw_text_content = { (!el_raw_text_end ~ ANY)* }
101 | el_raw_text = _{ el_raw_text_start ~ el_raw_text_content ~ el_raw_text_end }
102 | el_raw_text_start = _{ chevron_left_normal ~ WSP* ~ PUSH(el_raw_text_name) ~ WSP* ~ attr* ~ WSP* ~ chevron_right_normal ~ WSP*}
103 | el_raw_text_end = { WSP* ~ chevron_left_closed ~ WSP* ~ POP ~ WSP* ~ chevron_right_normal}
104 | 
105 | // XML processing instruction
106 | // Ex: <?xml version="1.0" ?>
107 | el_process_instruct = { chevron_left_question ~ WSP* ~ el_name? ~ WSP* ~ attr* ~ WSP* ~ chevron_right_question }
108 | 
109 | // Catch dangling elements
110 | // Ex: <div/></div>
111 | el_dangling = { chevron_left_closed ~ WSP* ~ el_name ~ WSP* ~ chevron_right_normal}
112 | 
113 | //
114 | // SYMBOLS / CHARACTERS
115 | //
116 | text_chars = _{'a'..'z' | 'A'..'Z' | "_" | "-" | ":" |'0'..'9'}
117 | 
118 | chevron_left_normal = _{ "<" }
119 | chevron_left_closed = _{ "</" }
120 | chevron_left_bang = _{ "<!" }
121 | chevron_left_question = _{ "<?" }
122 | 
123 | chevron_right_normal = _{ ">" }
124 | chevron_right_closed = _{ "/>" }
125 | chevron_right_question = _{ "?>" }
126 | chevron_right = _{
127 |     chevron_right_normal
128 |     | chevron_right_closed
129 |     | chevron_right_question
130 | }
131 | 
132 | equal = _{ "=" }
133 | quote_dubble = _{ "\"" }
134 | quote_single = _{ "'" }
135 | quote = _{ quote_dubble | quote_single }
136 | WSP = _{ " " | "\t" | "\r" | "\n" }
137 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! [![github]](https://github.com/mathiversen/html-parser)
  2 | //!
  3 | //! [github]: https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github
  4 | //!
  5 | //! # Html parser
  6 | //!
  7 | //! A simple and general purpose html/xhtml parser lib/bin, using [Pest](https://pest.rs/).
  8 | //!
  9 | //! ## Features
 10 | //! - Parse html & xhtml (not xml processing instructions)
 11 | //! - Parse html-documents
 12 | //! - Parse html-fragments
 13 | //! - Parse empty documents
 14 | //! - Parse with the same api for both documents and fragments
 15 | //! - Parse custom, non-standard, elements; `<cat/>`, `<Cat/>` and `<C4-t/>`
 16 | //! - Removes comments
 17 | //! - Removes dangling elements
 18 | //! - Iterate over all nodes in the dom tree
 19 | //!
 20 | //! ## What is it not
 21 | //!
 22 | //! - It's not a high-performance browser-grade parser
 23 | //! - It's not suitable for html validation
 24 | //! - It's not a parser that includes element selection or dom manipulation
 25 | //!
 26 | //! If your requirements matches any of the above, then you're most likely looking for one of the crates below:
 27 | //!
 28 | //! - [html5ever](https://crates.io/crates/html5ever)
 29 | //! - [kuchiki](https://crates.io/crates/kuchiki)
 30 | //! - [scraper](https://crates.io/crates/scraper)
 31 | //! - or other crates using the `html5ever` parser
 32 | //!
 33 | //! ## Examples bin
 34 | //!
 35 | //! Parse html file
 36 | //!
 37 | //! ```shell
 38 | //! html_parser index.html
 39 | //!
 40 | //! ```
 41 | //!
 42 | //! Parse stdin with pretty output
 43 | //!
 44 | //! ```shell
 45 | //! curl <website> | html_parser -p
 46 | //! ```
 47 | //!
 48 | //! ## Examples lib
 49 | //!
 50 | //! Parse html document
 51 | //!
 52 | //! ```rust
 53 | //!     use html_parser::Dom;
 54 | //!
 55 | //!     fn main() {
 56 | //!         let html = r#"
 57 | //!             <!doctype html>
 58 | //!             <html lang="en">
 59 | //!                 <head>
 60 | //!                     <meta charset="utf-8">
 61 | //!                     <title>Html parser</title>
 62 | //!                 </head>
 63 | //!                 <body>
 64 | //!                     <h1 id="a" class="b c">Hello world</h1>
 65 | //!                     </h1> <!-- comments & dangling elements are ignored -->
 66 | //!                 </body>
 67 | //!             </html>"#;
 68 | //!
 69 | //!         assert!(Dom::parse(html).is_ok());
 70 | //!     }
 71 | //! ```
 72 | //!
 73 | //! Parse html fragment
 74 | //!
 75 | //! ```rust
 76 | //!     use html_parser::Dom;
 77 | //!
 78 | //!     fn main() {
 79 | //!         let html = "<div id=cat />";
 80 | //!         assert!(Dom::parse(html).is_ok());
 81 | //!     }
 82 | //! ```
 83 | //!
 84 | //! Print to json
 85 | //!
 86 | //! ```rust
 87 | //!     use html_parser::{Dom, Result};
 88 | //!
 89 | //!     fn main() -> Result<()> {
 90 | //!         let html = "<div id=cat />";
 91 | //!         let json = Dom::parse(html)?.to_json_pretty()?;
 92 | //!         println!("{}", json);
 93 | //!         Ok(())
 94 | //!     }
 95 | //! ```
 96 | 
 97 | #![allow(clippy::needless_doctest_main)]
 98 | 
 99 | mod dom;
100 | mod error;
101 | mod grammar;
102 | 
103 | use grammar::Rule;
104 | 
105 | pub use crate::dom::element::{Element, ElementVariant};
106 | pub use crate::dom::node::Node;
107 | pub use crate::dom::Dom;
108 | pub use crate::dom::DomVariant;
109 | pub use crate::error::Error;
110 | pub use crate::error::Result;
111 | 


--------------------------------------------------------------------------------
/tests/bin.rs:
--------------------------------------------------------------------------------
 1 | use html_parser::Result;
 2 | use indoc::indoc;
 3 | use std::io::Write;
 4 | use std::process::Command;
 5 | use tempfile::NamedTempFile;
 6 | 
 7 | #[test]
 8 | fn it_prints_out_processing_error() -> Result<()> {
 9 |     let html = indoc!(
10 |         r#"
11 |             <?xml version="1.0" ?>
12 |             <div />
13 |         "#
14 |     );
15 | 
16 |     let mut file = NamedTempFile::new()?;
17 |     file.write_all(html.as_bytes())?;
18 | 
19 |     let output = Command::new("./target/debug/examples/simple_parser")
20 |         .arg("-d")
21 |         .arg(file.path())
22 |         .output()
23 |         .unwrap();
24 | 
25 |     let stdout = String::from_utf8(output.stdout).unwrap();
26 | 
27 |     assert!(stdout.starts_with("# Failed to create element at rule: el_process_instruct"));
28 |     Ok(())
29 | }
30 | 


--------------------------------------------------------------------------------
/tests/comments.rs:
--------------------------------------------------------------------------------
 1 | use html_parser::{Dom, Result};
 2 | use insta::assert_json_snapshot;
 3 | 
 4 | #[test]
 5 | fn it_can_parse_document_with_just_one_comment() -> Result<()> {
 6 |     let html = "<!-- hello !\"#/()= -->";
 7 |     let ast = Dom::parse(html)?;
 8 |     assert_json_snapshot!(ast);
 9 |     Ok(())
10 | }
11 | #[test]
12 | fn it_can_parse_document_with_just_comments() -> Result<()> {
13 |     let html = "<!--x--><!--y--><!--z-->";
14 |     let ast = Dom::parse(html)?;
15 |     assert_json_snapshot!(ast);
16 |     Ok(())
17 | }
18 | 


--------------------------------------------------------------------------------
/tests/document.rs:
--------------------------------------------------------------------------------
 1 | use html_parser::{Dom, Result};
 2 | use indoc::indoc;
 3 | use insta::assert_json_snapshot;
 4 | 
 5 | #[test]
 6 | fn it_can_parse_minimal_document() -> Result<()> {
 7 |     let html = "<!DOCTYPE html><html></html>";
 8 |     let dom = Dom::parse(html)?;
 9 |     assert_json_snapshot!(dom);
10 |     Ok(())
11 | }
12 | #[test]
13 | fn it_can_parse_document_with_comments() -> Result<()> {
14 |     let html = indoc!(
15 |         r#"
16 |         <!-- comment -->
17 |         <!-- comment -->
18 |         <!DOCTYPE html>
19 |         <!-- comment -->
20 |         <!-- comment -->
21 |         <html>
22 |         <!-- comment -->
23 |         </html>
24 |         <!-- comment -->
25 |         <!-- comment -->
26 |     "#
27 |     );
28 |     let dom = Dom::parse(html)?;
29 |     assert_json_snapshot!(dom);
30 |     Ok(())
31 | }
32 | #[test]
33 | fn it_error_when_doctype_and_multiple_html() {
34 |     let html = "<!DOCTYPE html><html></html><html></html>";
35 |     assert!(Dom::parse(html).is_err());
36 | }
37 | 


--------------------------------------------------------------------------------
/tests/document_empty.rs:
--------------------------------------------------------------------------------
 1 | use html_parser::{Dom, Result};
 2 | use insta::assert_json_snapshot;
 3 | 
 4 | #[test]
 5 | fn it_can_parse_empty_document() -> Result<()> {
 6 |     let html = "";
 7 |     let dom = Dom::parse(html)?;
 8 |     assert_json_snapshot!(dom);
 9 |     Ok(())
10 | }
11 | 


--------------------------------------------------------------------------------
/tests/document_fragment.rs:
--------------------------------------------------------------------------------
 1 | use html_parser::{Dom, Result};
 2 | use insta::assert_json_snapshot;
 3 | 
 4 | #[test]
 5 | fn it_can_parse_single_div_as_fragment() -> Result<()> {
 6 |     let html = "<div/>";
 7 |     let dom = Dom::parse(html)?;
 8 |     assert_json_snapshot!(dom);
 9 |     Ok(())
10 | }
11 | #[test]
12 | fn it_can_parse_single_text_as_fragment() -> Result<()> {
13 |     let html = "hello";
14 |     let dom = Dom::parse(html)?;
15 |     assert_json_snapshot!(dom);
16 |     Ok(())
17 | }
18 | #[test]
19 | fn it_can_parse_text_comment_element_as_fragment() -> Result<()> {
20 |     let html = "hello<!--world?--><div/>";
21 |     let dom = Dom::parse(html)?;
22 |     assert_json_snapshot!(dom);
23 |     Ok(())
24 | }
25 | #[test]
26 | fn it_error_when_body_is_used_in_fragment_root() {
27 |     let html = "<div></div><body></body>";
28 |     assert!(Dom::parse(html).is_err());
29 | }
30 | #[test]
31 | fn it_error_when_head_is_used_in_fragment_root() {
32 |     let html = "<div></div><head></head>";
33 |     assert!(Dom::parse(html).is_err());
34 | }
35 | #[test]
36 | fn it_error_when_html_is_used_in_fragment_root() {
37 |     let html = "<div></div><html></html>";
38 |     assert!(Dom::parse(html).is_err());
39 | }
40 | 


--------------------------------------------------------------------------------
/tests/element.rs:
--------------------------------------------------------------------------------
  1 | use html_parser::{Dom, Result};
  2 | use indoc::indoc;
  3 | use insta::assert_json_snapshot;
  4 | 
  5 | #[test]
  6 | fn it_can_parse_one_element() -> Result<()> {
  7 |     let html = "<html></html>";
  8 |     let dom = Dom::parse(html)?;
  9 |     assert_json_snapshot!(dom);
 10 |     Ok(())
 11 | }
 12 | #[test]
 13 | fn it_can_parse_one_element_upper_case() -> Result<()> {
 14 |     let html = "<HTML></HTML>";
 15 |     let dom = Dom::parse(html)?;
 16 |     assert_json_snapshot!(dom);
 17 |     Ok(())
 18 | }
 19 | #[test]
 20 | fn it_can_parse_one_element_mixed_case() -> Result<()> {
 21 |     let html = "<Html></Html>";
 22 |     let dom = Dom::parse(html)?;
 23 |     assert_json_snapshot!(dom);
 24 |     Ok(())
 25 | }
 26 | #[test]
 27 | fn it_can_parse_one_element_mixed_case_numbers() -> Result<()> {
 28 |     let html = "<Header1></Header1>";
 29 |     let dom = Dom::parse(html)?;
 30 |     assert_json_snapshot!(dom);
 31 |     Ok(())
 32 | }
 33 | #[test]
 34 | fn it_can_parse_one_element_mixed_case_numbers_symbols() -> Result<()> {
 35 |     let html = "<Head_Er-1></Head_Er-1>";
 36 |     let dom = Dom::parse(html)?;
 37 |     assert_json_snapshot!(dom);
 38 |     Ok(())
 39 | }
 40 | #[test]
 41 | fn it_can_parse_multiple_elements() -> Result<()> {
 42 |     let html = "<div/><div/>";
 43 |     let dom = Dom::parse(html)?;
 44 |     assert_json_snapshot!(dom);
 45 |     Ok(())
 46 | }
 47 | #[test]
 48 | fn it_can_parse_multiple_open_elements() -> Result<()> {
 49 |     let html = "<div></div><div></div>";
 50 |     let dom = Dom::parse(html)?;
 51 |     assert_json_snapshot!(dom);
 52 |     Ok(())
 53 | }
 54 | #[test]
 55 | fn it_can_parse_nested_elements() -> Result<()> {
 56 |     let html = indoc!(
 57 |         r"
 58 |         <div>
 59 |             <div />
 60 |         </div>
 61 |     "
 62 |     );
 63 |     let dom = Dom::parse(html)?;
 64 |     assert_json_snapshot!(dom);
 65 |     Ok(())
 66 | }
 67 | #[test]
 68 | fn it_can_parse_nested_elements_mixed_children() -> Result<()> {
 69 |     let html = indoc!(
 70 |         r"
 71 |         <div>
 72 |             <!--comment-->
 73 |             <div/>
 74 |             Hello
 75 |             <div>
 76 |                 World
 77 |             </div>
 78 |         </div>
 79 |     "
 80 |     );
 81 |     let dom = Dom::parse(html)?;
 82 |     assert_json_snapshot!(dom);
 83 |     Ok(())
 84 | }
 85 | #[test]
 86 | fn it_can_parse_deeply_nested() -> Result<()> {
 87 |     let html = indoc!(
 88 |         r#"
 89 |             <div class='1'>
 90 |                 <div class='1'>
 91 |                     <div class='1'>
 92 |                         <div class='1'>
 93 |                             <div class='1'>
 94 |                                 <div class='1'>
 95 |                                     <div class='1'>
 96 |                                         <div class='1'>
 97 |                                             <!--this is deep-->
 98 |                                             hello world
 99 |                                         </div>
100 |                                     </div>
101 |                                 </div>
102 |                             </div> 
103 |                         </div>
104 |                     </div>
105 |                 </div>
106 |             </div>
107 |         "#
108 |     );
109 |     let dom = Dom::parse(html)?;
110 |     assert_json_snapshot!(dom);
111 |     Ok(())
112 | }
113 | #[test]
114 | fn it_can_parse_script_with_content() -> Result<()> {
115 |     let html = indoc!(
116 |         r#"
117 |             <script>
118 |                 const person_creator = ({ name, symtoms }) => {
119 |                     let person = {}
120 |                     person.name = name
121 |                     person.symtoms = {}
122 |                     for (symtom of symtoms) {
123 |                         person.symtoms[symtom] = true
124 |                     }
125 |                     return person
126 |                 }
127 |                 
128 |                 const main = () => {
129 |                     let name = 'mathias'
130 |                     let symtoms = ['Dunning-Kruger', 'ACDC', 'Slacker']
131 |                 
132 |                     setTimeout(() => {
133 |                         let person = person_creator({ name, symtoms })
134 |                         if (person.symtoms.hasOwnProperty('Dunning-Kruger')) {
135 |                             console.log('yeah buddy, that\'s right')
136 |                         }
137 |                     }, 1337)
138 |                 }
139 |                 
140 |                 main()
141 |             </script>
142 |         "#
143 |     );
144 |     let dom = Dom::parse(html)?;
145 |     assert_json_snapshot!(dom);
146 |     Ok(())
147 | }
148 | #[test]
149 | fn it_can_parse_style_with_content() -> Result<()> {
150 |     let html = indoc!(
151 |         r#"
152 |             <style>
153 |                 :root {
154 |                     --background-color: black;
155 |                     --text-color: white;
156 |                 }
157 |                 body {
158 |                     background: var(--background-color);
159 |                     color: var(--text-color);
160 |                 }
161 |             </style>
162 |         "#
163 |     );
164 |     let dom = Dom::parse(html)?;
165 |     assert_json_snapshot!(dom);
166 |     Ok(())
167 | }
168 | #[test]
169 | fn it_skips_dangling_elements() -> Result<()> {
170 |     let html = indoc!(
171 |         "
172 |         <div id='123'></div>
173 |         </div>
174 |         <div id='321'></div>
175 |     "
176 |     );
177 |     let dom = Dom::parse(html)?;
178 |     assert_json_snapshot!(dom);
179 |     Ok(())
180 | }
181 | #[test]
182 | fn it_can_parse_broken_html() -> Result<()> {
183 |     let html = "<div></span><div></div>";
184 |     let dom = Dom::parse(html)?;
185 |     assert_json_snapshot!(dom);
186 |     Ok(())
187 | }
188 | #[test]
189 | fn it_errors_when_multiple_nested_elements_dont_match() -> Result<()> {
190 |     let html = "<div><div><div><div></div></div_error></div></div>";
191 |     let dom = Dom::parse(html)?;
192 |     assert_json_snapshot!(dom);
193 |     Ok(())
194 | }
195 | #[test]
196 | fn it_can_clone_node() {
197 |     let html = indoc!(
198 |         "
199 |         <div>one</div>
200 |         <div>two</div>
201 |     "
202 |     );
203 |     let dom = Dom::parse(html).unwrap();
204 |     let one = dom.children[0].clone();
205 |     assert_json_snapshot!(one);
206 | }
207 | #[test]
208 | fn it_can_clone_dom() {
209 |     let html = indoc!(
210 |         "
211 |         <html>
212 |             <head>
213 |                 <title>Title</title>
214 |             </head>
215 |             <body>
216 |                 <h1>Hello world</h1>
217 |             </body>
218 |         </html>
219 |     "
220 |     );
221 |     let dom = Dom::parse(html).unwrap();
222 |     let dom_clone = dom.clone();
223 |     assert_eq!(dom, dom_clone);
224 | }
225 | 
226 | #[test]
227 | fn it_can_deal_with_weird_whitespaces() {
228 |     let html = indoc!(
229 |         "
230 |         <!-- Normal case -->
231 |         <div> Text </div>
232 | 
233 |         <!-- Whitespaces in opening tag to the left -->
234 |         < div> Text </div>
235 | 
236 |         <!-- Whitespaces in opening tag to the right -->
237 |         <div > Text </div>
238 | 
239 |         <!-- Whitespaces in closing tag to the left (should not work) -->
240 |         <div> Text < /div>
241 | 
242 |         <!-- Whitespaces in closing tag to the right -->
243 |         <div> Text </div >
244 | 
245 |         <!-- Whitespaces everywhere (should not work) -->
246 |         < div > Text < / div >
247 |         "
248 |     );
249 |     let dom = Dom::parse(html).unwrap();
250 |     assert_json_snapshot!(dom);
251 | }
252 | 


--------------------------------------------------------------------------------
/tests/element_attributes.rs:
--------------------------------------------------------------------------------
  1 | use html_parser::{Dom, Result};
  2 | use insta::assert_json_snapshot;
  3 | 
  4 | #[test]
  5 | fn it_can_parse_double_quote() -> Result<()> {
  6 |     let html = "<div id=\"one\"></div>";
  7 |     let dom = Dom::parse(html)?;
  8 |     assert_json_snapshot!(dom);
  9 |     Ok(())
 10 | }
 11 | #[test]
 12 | fn it_can_parse_single_quote() -> Result<()> {
 13 |     let html = "<div id='one'></div>";
 14 |     let dom = Dom::parse(html)?;
 15 |     assert_json_snapshot!(dom);
 16 |     Ok(())
 17 | }
 18 | #[test]
 19 | fn it_can_parse_no_quote() -> Result<()> {
 20 |     let html = "<div id=one></div>";
 21 |     let dom = Dom::parse(html)?;
 22 |     assert_json_snapshot!(dom);
 23 |     Ok(())
 24 | }
 25 | #[test]
 26 | fn it_can_parse_attribute_key_mixed_case_symbols() -> Result<()> {
 27 |     let html = "<div data-cat='morris'></div>";
 28 |     let dom = Dom::parse(html)?;
 29 |     assert_json_snapshot!(dom);
 30 |     Ok(())
 31 | }
 32 | #[test]
 33 | fn it_can_parse_multiple_attributes_single_quote() -> Result<()> {
 34 |     let html = "<div cat='mjau' dog='woff' ape=oh></div>";
 35 |     let dom = Dom::parse(html)?;
 36 |     assert_json_snapshot!(dom);
 37 |     Ok(())
 38 | }
 39 | #[test]
 40 | fn it_can_parse_multiple_attributes_where_whitespace_does_not_matter_for_keys() -> Result<()> {
 41 |     let html = "<div    cat   =  \"mjau\" dog ='  woff  'ape = oh ></div>";
 42 |     let dom = Dom::parse(html)?;
 43 |     assert_json_snapshot!(dom);
 44 |     Ok(())
 45 | }
 46 | #[test]
 47 | fn it_can_parse_multiple_attributes_double_quote() -> Result<()> {
 48 |     let html = "<div cat=\"mjau\" dog=\"woff\" ape=\"oh\"></div>";
 49 |     let dom = Dom::parse(html)?;
 50 |     assert_json_snapshot!(dom);
 51 |     Ok(())
 52 | }
 53 | #[test]
 54 | fn it_can_parse_multiple_attributes_no_quote() -> Result<()> {
 55 |     let html = "<div cat=mjau dog=woff ape=oh></div>";
 56 |     let dom = Dom::parse(html)?;
 57 |     assert_json_snapshot!(dom);
 58 |     Ok(())
 59 | }
 60 | #[test]
 61 | fn it_can_parse_attribute_multiple_values_single_quote() -> Result<()> {
 62 |     let html = "<div cat='mjau mjau' />";
 63 |     let dom = Dom::parse(html)?;
 64 |     assert_json_snapshot!(dom);
 65 |     Ok(())
 66 | }
 67 | #[test]
 68 | fn it_can_parse_attribute_multiple_values_double_quote() -> Result<()> {
 69 |     let html = "<div cat=\"mjau mjau\" />";
 70 |     let dom = Dom::parse(html)?;
 71 |     assert_json_snapshot!(dom);
 72 |     Ok(())
 73 | }
 74 | #[test]
 75 | fn it_can_parse_attribute_with_empty_value() -> Result<()> {
 76 |     let html = "<img hidden/>";
 77 |     let dom = Dom::parse(html)?;
 78 |     assert_json_snapshot!(dom);
 79 |     Ok(())
 80 | }
 81 | 
 82 | #[test]
 83 | fn it_can_parse_id() -> Result<()> {
 84 |     let html = "<img id=a/>";
 85 |     let dom = Dom::parse(html)?;
 86 |     assert_json_snapshot!(dom);
 87 |     Ok(())
 88 | }
 89 | #[test]
 90 | fn it_can_parse_classes() -> Result<()> {
 91 |     let html = "<img class='a b c'/>";
 92 |     let dom = Dom::parse(html)?;
 93 |     assert_json_snapshot!(dom);
 94 |     Ok(())
 95 | }
 96 | #[test]
 97 | fn it_keeps_spaces_for_non_classes() -> Result<()> {
 98 |     let html = "<img attr=' a b     \n\t'/>";
 99 |     let dom = Dom::parse(html)?;
100 |     assert_json_snapshot!(dom);
101 |     Ok(())
102 | }
103 | 


--------------------------------------------------------------------------------
/tests/node_iter.rs:
--------------------------------------------------------------------------------
 1 | use html_parser::{Dom, Node, Result};
 2 | use indoc::indoc;
 3 | 
 4 | #[test]
 5 | fn it_can_iter_1() -> Result<()> {
 6 |     let html = indoc! {"
 7 |         <html>
 8 |             <head>
 9 |                 <title>title</title>
10 |             </head>
11 |             <body>
12 |                 <ul>
13 |                     <li></li>
14 |                     <li></li>
15 |                     <li></li>
16 |                 </ul>
17 |             </body>
18 |         </html>
19 |     "};
20 |     let dom = Dom::parse(&html)?;
21 |     let root = dom.children.get(0).unwrap().into_iter();
22 |     let num_li = root.into_iter().fold(0, |mut acc, curr| match curr {
23 |         Node::Element(ref e) => {
24 |             if e.name == "li" {
25 |                 acc += 1;
26 |             }
27 |             acc
28 |         }
29 |         _ => acc,
30 |     });
31 |     assert_eq!(num_li, 3);
32 |     Ok(())
33 | }
34 | 


--------------------------------------------------------------------------------
/tests/output.rs:
--------------------------------------------------------------------------------
 1 | use html_parser::{Dom, Result};
 2 | use indoc::indoc;
 3 | use insta::assert_json_snapshot;
 4 | 
 5 | #[test]
 6 | fn it_can_output_json() -> Result<()> {
 7 |     assert!(Dom::parse("<div/>")?.to_json().is_ok());
 8 |     Ok(())
 9 | }
10 | 
11 | #[test]
12 | fn it_can_output_json_pretty() -> Result<()> {
13 |     assert!(Dom::parse("<div/>")?.to_json_pretty().is_ok());
14 |     Ok(())
15 | }
16 | 
17 | #[test]
18 | fn it_can_output_complex_html_as_json() -> Result<()> {
19 |     let html = indoc!(
20 |         "<html lang=\"sv\">
21 |         <head>
22 |             <title>Här kan man va</title>
23 |         </head>
24 |             <body>
25 |                 <h1>Tjena världen!</h1>
26 |                 <p>Tänkte bara informera om att Sverige är bättre än Finland i ishockey.</p>
27 |             </body>
28 |         </html>"
29 |     );
30 |     let dom = Dom::parse(html)?;
31 |     assert_json_snapshot!(dom);
32 |     Ok(())
33 | }
34 | 


--------------------------------------------------------------------------------
/tests/snapshots/comments__it_can_parse_document_with_just_comments.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/comments.rs
 3 | expression: ast
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     "x",
 9 |     "y",
10 |     "z"
11 |   ]
12 | }
13 | 


--------------------------------------------------------------------------------
/tests/snapshots/comments__it_can_parse_document_with_just_one_comment.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/comments.rs
 3 | expression: ast
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     "hello !\"#/()="
 9 |   ]
10 | }
11 | 


--------------------------------------------------------------------------------
/tests/snapshots/document__it_can_parse_document_with_comments.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/document.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "document",
 7 |   "children": [
 8 |     "comment",
 9 |     "comment",
10 |     "comment",
11 |     "comment",
12 |     {
13 |       "name": "html",
14 |       "variant": "normal",
15 |       "children": [
16 |         "comment"
17 |       ]
18 |     },
19 |     "comment",
20 |     "comment"
21 |   ]
22 | }
23 | 


--------------------------------------------------------------------------------
/tests/snapshots/document__it_can_parse_minimal_document.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/document.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "document",
 7 |   "children": [
 8 |     {
 9 |       "name": "html",
10 |       "variant": "normal"
11 |     }
12 |   ]
13 | }
14 | 


--------------------------------------------------------------------------------
/tests/snapshots/document_empty__it_can_parse_empty_document.snap:
--------------------------------------------------------------------------------
1 | ---
2 | source: tests/document_empty.rs
3 | expression: dom
4 | ---
5 | {
6 |   "treeType": "empty"
7 | }
8 | 


--------------------------------------------------------------------------------
/tests/snapshots/document_fragment__it_can_parse_single_div_as_fragment.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/document_fragment.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "name": "div",
10 |       "variant": "void"
11 |     }
12 |   ]
13 | }
14 | 


--------------------------------------------------------------------------------
/tests/snapshots/document_fragment__it_can_parse_single_text_as_fragment.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/document_fragment.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     "hello"
 9 |   ]
10 | }
11 | 


--------------------------------------------------------------------------------
/tests/snapshots/document_fragment__it_can_parse_text_comment_element_as_fragment.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/document_fragment.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     "hello",
 9 |     "world?",
10 |     {
11 |       "name": "div",
12 |       "variant": "void"
13 |     }
14 |   ]
15 | }
16 | 


--------------------------------------------------------------------------------
/tests/snapshots/element__it_can_clone_node.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element.rs
 3 | expression: one
 4 | ---
 5 | {
 6 |   "name": "div",
 7 |   "variant": "normal",
 8 |   "children": [
 9 |     "one"
10 |   ]
11 | }
12 | 


--------------------------------------------------------------------------------
/tests/snapshots/element__it_can_deal_with_weird_whitespaces.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     "Normal case",
 9 |     {
10 |       "name": "div",
11 |       "variant": "normal",
12 |       "children": [
13 |         " Text "
14 |       ]
15 |     },
16 |     "Whitespaces in opening tag to the left",
17 |     {
18 |       "name": "div",
19 |       "variant": "normal",
20 |       "children": [
21 |         " Text "
22 |       ]
23 |     },
24 |     "Whitespaces in opening tag to the right",
25 |     {
26 |       "name": "div",
27 |       "variant": "normal",
28 |       "children": [
29 |         " Text "
30 |       ]
31 |     },
32 |     "Whitespaces in closing tag to the left (should not work)",
33 |     "<div> Text < /div>\n\n",
34 |     "Whitespaces in closing tag to the right",
35 |     {
36 |       "name": "div",
37 |       "variant": "normal",
38 |       "children": [
39 |         " Text "
40 |       ]
41 |     },
42 |     "Whitespaces everywhere (should not work)",
43 |     "< div > Text < / div >\n"
44 |   ]
45 | }
46 | 


--------------------------------------------------------------------------------
/tests/snapshots/element__it_can_parse_broken_html.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     "<div>",
 9 |     {
10 |       "name": "div",
11 |       "variant": "normal"
12 |     }
13 |   ]
14 | }
15 | 


--------------------------------------------------------------------------------
/tests/snapshots/element__it_can_parse_deeply_nested.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "name": "div",
10 |       "variant": "normal",
11 |       "classes": [
12 |         "1"
13 |       ],
14 |       "children": [
15 |         {
16 |           "name": "div",
17 |           "variant": "normal",
18 |           "classes": [
19 |             "1"
20 |           ],
21 |           "children": [
22 |             {
23 |               "name": "div",
24 |               "variant": "normal",
25 |               "classes": [
26 |                 "1"
27 |               ],
28 |               "children": [
29 |                 {
30 |                   "name": "div",
31 |                   "variant": "normal",
32 |                   "classes": [
33 |                     "1"
34 |                   ],
35 |                   "children": [
36 |                     {
37 |                       "name": "div",
38 |                       "variant": "normal",
39 |                       "classes": [
40 |                         "1"
41 |                       ],
42 |                       "children": [
43 |                         {
44 |                           "name": "div",
45 |                           "variant": "normal",
46 |                           "classes": [
47 |                             "1"
48 |                           ],
49 |                           "children": [
50 |                             {
51 |                               "name": "div",
52 |                               "variant": "normal",
53 |                               "classes": [
54 |                                 "1"
55 |                               ],
56 |                               "children": [
57 |                                 {
58 |                                   "name": "div",
59 |                                   "variant": "normal",
60 |                                   "classes": [
61 |                                     "1"
62 |                                   ],
63 |                                   "children": [
64 |                                     "this is deep",
65 |                                     "hello world\n                            "
66 |                                   ]
67 |                                 }
68 |                               ]
69 |                             }
70 |                           ]
71 |                         }
72 |                       ]
73 |                     }
74 |                   ]
75 |                 }
76 |               ]
77 |             }
78 |           ]
79 |         }
80 |       ]
81 |     }
82 |   ]
83 | }
84 | 


--------------------------------------------------------------------------------
/tests/snapshots/element__it_can_parse_multiple_elements.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "name": "div",
10 |       "variant": "void"
11 |     },
12 |     {
13 |       "name": "div",
14 |       "variant": "void"
15 |     }
16 |   ]
17 | }
18 | 


--------------------------------------------------------------------------------
/tests/snapshots/element__it_can_parse_multiple_open_elements.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "name": "div",
10 |       "variant": "normal"
11 |     },
12 |     {
13 |       "name": "div",
14 |       "variant": "normal"
15 |     }
16 |   ]
17 | }
18 | 


--------------------------------------------------------------------------------
/tests/snapshots/element__it_can_parse_nested_elements.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "name": "div",
10 |       "variant": "normal",
11 |       "children": [
12 |         {
13 |           "name": "div",
14 |           "variant": "void"
15 |         }
16 |       ]
17 |     }
18 |   ]
19 | }
20 | 


--------------------------------------------------------------------------------
/tests/snapshots/element__it_can_parse_nested_elements_mixed_children.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "name": "div",
10 |       "variant": "normal",
11 |       "children": [
12 |         "comment",
13 |         {
14 |           "name": "div",
15 |           "variant": "void"
16 |         },
17 |         "\n    Hello\n    ",
18 |         {
19 |           "name": "div",
20 |           "variant": "normal",
21 |           "children": [
22 |             "\n        World\n    "
23 |           ]
24 |         }
25 |       ]
26 |     }
27 |   ]
28 | }
29 | 


--------------------------------------------------------------------------------
/tests/snapshots/element__it_can_parse_one_element.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "document",
 7 |   "children": [
 8 |     {
 9 |       "name": "html",
10 |       "variant": "normal"
11 |     }
12 |   ]
13 | }
14 | 


--------------------------------------------------------------------------------
/tests/snapshots/element__it_can_parse_one_element_mixed_case.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "document",
 7 |   "children": [
 8 |     {
 9 |       "name": "Html",
10 |       "variant": "normal"
11 |     }
12 |   ]
13 | }
14 | 


--------------------------------------------------------------------------------
/tests/snapshots/element__it_can_parse_one_element_mixed_case_numbers.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "name": "Header1",
10 |       "variant": "normal"
11 |     }
12 |   ]
13 | }
14 | 


--------------------------------------------------------------------------------
/tests/snapshots/element__it_can_parse_one_element_mixed_case_numbers_symbols.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "name": "Head_Er-1",
10 |       "variant": "normal"
11 |     }
12 |   ]
13 | }
14 | 


--------------------------------------------------------------------------------
/tests/snapshots/element__it_can_parse_one_element_upper_case.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "document",
 7 |   "children": [
 8 |     {
 9 |       "name": "HTML",
10 |       "variant": "normal"
11 |     }
12 |   ]
13 | }
14 | 


--------------------------------------------------------------------------------
/tests/snapshots/element__it_can_parse_script_with_content.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "name": "script",
10 |       "variant": "normal",
11 |       "children": [
12 |         "const person_creator = ({ name, symtoms }) => {\n        let person = {}\n        person.name = name\n        person.symtoms = {}\n        for (symtom of symtoms) {\n            person.symtoms[symtom] = true\n        }\n        return person\n    }\n    \n    const main = () => {\n        let name = 'mathias'\n        let symtoms = ['Dunning-Kruger', 'ACDC', 'Slacker']\n    \n        setTimeout(() => {\n            let person = person_creator({ name, symtoms })\n            if (person.symtoms.hasOwnProperty('Dunning-Kruger')) {\n                console.log('yeah buddy, that\\'s right')\n            }\n        }, 1337)\n    }\n    \n    main()"
13 |       ]
14 |     }
15 |   ]
16 | }
17 | 


--------------------------------------------------------------------------------
/tests/snapshots/element__it_can_parse_style_with_content.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "name": "style",
10 |       "variant": "normal",
11 |       "children": [
12 |         ":root {\n        --background-color: black;\n        --text-color: white;\n    }\n    body {\n        background: var(--background-color);\n        color: var(--text-color);\n    }"
13 |       ]
14 |     }
15 |   ]
16 | }
17 | 


--------------------------------------------------------------------------------
/tests/snapshots/element__it_errors_when_multiple_nested_elements_dont_match.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     "<div>",
 9 |     {
10 |       "name": "div",
11 |       "variant": "normal",
12 |       "children": [
13 |         {
14 |           "name": "div",
15 |           "variant": "normal",
16 |           "children": [
17 |             {
18 |               "name": "div",
19 |               "variant": "normal"
20 |             }
21 |           ]
22 |         }
23 |       ]
24 |     }
25 |   ]
26 | }
27 | 


--------------------------------------------------------------------------------
/tests/snapshots/element__it_skips_dangling_elements.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "id": "123",
10 |       "name": "div",
11 |       "variant": "normal"
12 |     },
13 |     {
14 |       "id": "321",
15 |       "name": "div",
16 |       "variant": "normal"
17 |     }
18 |   ]
19 | }
20 | 


--------------------------------------------------------------------------------
/tests/snapshots/element_attributes__it_can_parse_attribute_key_mixed_case_symbols.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element_attributes.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "name": "div",
10 |       "variant": "normal",
11 |       "attributes": {
12 |         "data-cat": "morris"
13 |       }
14 |     }
15 |   ]
16 | }
17 | 


--------------------------------------------------------------------------------
/tests/snapshots/element_attributes__it_can_parse_attribute_multiple_values_double_quote.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element_attributes.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "name": "div",
10 |       "variant": "void",
11 |       "attributes": {
12 |         "cat": "mjau mjau"
13 |       }
14 |     }
15 |   ]
16 | }
17 | 


--------------------------------------------------------------------------------
/tests/snapshots/element_attributes__it_can_parse_attribute_multiple_values_single_quote.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element_attributes.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "name": "div",
10 |       "variant": "void",
11 |       "attributes": {
12 |         "cat": "mjau mjau"
13 |       }
14 |     }
15 |   ]
16 | }
17 | 


--------------------------------------------------------------------------------
/tests/snapshots/element_attributes__it_can_parse_attribute_with_empty_value.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element_attributes.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "name": "img",
10 |       "variant": "void",
11 |       "attributes": {
12 |         "hidden": null
13 |       }
14 |     }
15 |   ]
16 | }
17 | 


--------------------------------------------------------------------------------
/tests/snapshots/element_attributes__it_can_parse_classes.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element_attributes.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "name": "img",
10 |       "variant": "void",
11 |       "classes": [
12 |         "a",
13 |         "b",
14 |         "c"
15 |       ]
16 |     }
17 |   ]
18 | }
19 | 


--------------------------------------------------------------------------------
/tests/snapshots/element_attributes__it_can_parse_double_quote.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element_attributes.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "id": "one",
10 |       "name": "div",
11 |       "variant": "normal"
12 |     }
13 |   ]
14 | }
15 | 


--------------------------------------------------------------------------------
/tests/snapshots/element_attributes__it_can_parse_id.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element_attributes.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "id": "a",
10 |       "name": "img",
11 |       "variant": "void"
12 |     }
13 |   ]
14 | }
15 | 


--------------------------------------------------------------------------------
/tests/snapshots/element_attributes__it_can_parse_multiple_attributes_double_quote.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element_attributes.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "name": "div",
10 |       "variant": "normal",
11 |       "attributes": {
12 |         "ape": "oh",
13 |         "cat": "mjau",
14 |         "dog": "woff"
15 |       }
16 |     }
17 |   ]
18 | }
19 | 


--------------------------------------------------------------------------------
/tests/snapshots/element_attributes__it_can_parse_multiple_attributes_no_quote.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element_attributes.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "name": "div",
10 |       "variant": "normal",
11 |       "attributes": {
12 |         "ape": "oh",
13 |         "cat": "mjau",
14 |         "dog": "woff"
15 |       }
16 |     }
17 |   ]
18 | }
19 | 


--------------------------------------------------------------------------------
/tests/snapshots/element_attributes__it_can_parse_multiple_attributes_single_quote.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element_attributes.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "name": "div",
10 |       "variant": "normal",
11 |       "attributes": {
12 |         "ape": "oh",
13 |         "cat": "mjau",
14 |         "dog": "woff"
15 |       }
16 |     }
17 |   ]
18 | }
19 | 


--------------------------------------------------------------------------------
/tests/snapshots/element_attributes__it_can_parse_multiple_attributes_where_whitespace_does_not_matter_for_keys.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element_attributes.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "name": "div",
10 |       "variant": "normal",
11 |       "attributes": {
12 |         "ape": "oh",
13 |         "cat": "mjau",
14 |         "dog": "  woff  "
15 |       }
16 |     }
17 |   ]
18 | }
19 | 


--------------------------------------------------------------------------------
/tests/snapshots/element_attributes__it_can_parse_no_quote.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element_attributes.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "id": "one",
10 |       "name": "div",
11 |       "variant": "normal"
12 |     }
13 |   ]
14 | }
15 | 


--------------------------------------------------------------------------------
/tests/snapshots/element_attributes__it_can_parse_single_quote.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element_attributes.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "id": "one",
10 |       "name": "div",
11 |       "variant": "normal"
12 |     }
13 |   ]
14 | }
15 | 


--------------------------------------------------------------------------------
/tests/snapshots/element_attributes__it_keeps_spaces_for_non_classes.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/element_attributes.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "name": "img",
10 |       "variant": "void",
11 |       "attributes": {
12 |         "attr": " a b     \n\t"
13 |       }
14 |     }
15 |   ]
16 | }
17 | 


--------------------------------------------------------------------------------
/tests/snapshots/output__it_can_output_complex_html_as_json.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/output.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "document",
 7 |   "children": [
 8 |     {
 9 |       "name": "html",
10 |       "variant": "normal",
11 |       "attributes": {
12 |         "lang": "sv"
13 |       },
14 |       "children": [
15 |         {
16 |           "name": "head",
17 |           "variant": "normal",
18 |           "children": [
19 |             {
20 |               "name": "title",
21 |               "variant": "normal",
22 |               "children": [
23 |                 "Här kan man va"
24 |               ]
25 |             }
26 |           ]
27 |         },
28 |         {
29 |           "name": "body",
30 |           "variant": "normal",
31 |           "children": [
32 |             {
33 |               "name": "h1",
34 |               "variant": "normal",
35 |               "children": [
36 |                 "Tjena världen!"
37 |               ]
38 |             },
39 |             {
40 |               "name": "p",
41 |               "variant": "normal",
42 |               "children": [
43 |                 "Tänkte bara informera om att Sverige är bättre än Finland i ishockey."
44 |               ]
45 |             }
46 |           ]
47 |         }
48 |       ]
49 |     }
50 |   ]
51 | }
52 | 


--------------------------------------------------------------------------------
/tests/snapshots/source_span__it_can_generate_source_span.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/source_span.rs
 3 | expression: dom
 4 | ---
 5 | Dom {
 6 |     tree_type: DocumentFragment,
 7 |     children: [
 8 |         Element(
 9 |             Element {
10 |                 id: None,
11 |                 name: "template",
12 |                 variant: Normal,
13 |                 attributes: {},
14 |                 classes: [],
15 |                 children: [
16 |                     Element(
17 |                         Element {
18 |                             id: None,
19 |                             name: "h1",
20 |                             variant: Normal,
21 |                             attributes: {},
22 |                             classes: [],
23 |                             children: [
24 |                                 Text(
25 |                                     "Header",
26 |                                 ),
27 |                             ],
28 |                             source_span: SourceSpan {
29 |                                 text: "<h1>Header</h1>",
30 |                                 start_line: 2,
31 |                                 end_line: 2,
32 |                                 start_column: 5,
33 |                                 end_column: 20,
34 |                             },
35 |                         },
36 |                     ),
37 |                     Element(
38 |                         Element {
39 |                             id: None,
40 |                             name: "p",
41 |                             variant: Normal,
42 |                             attributes: {},
43 |                             classes: [],
44 |                             children: [
45 |                                 Text(
46 |                                     "Paragraph",
47 |                                 ),
48 |                             ],
49 |                             source_span: SourceSpan {
50 |                                 text: "<p>Paragraph</p>",
51 |                                 start_line: 3,
52 |                                 end_line: 3,
53 |                                 start_column: 5,
54 |                                 end_column: 21,
55 |                             },
56 |                         },
57 |                     ),
58 |                 ],
59 |                 source_span: SourceSpan {
60 |                     text: "<template>\n    <h1>Header</h1>\n    <p>Paragraph</p>\n</template>",
61 |                     start_line: 1,
62 |                     end_line: 4,
63 |                     start_column: 1,
64 |                     end_column: 12,
65 |                 },
66 |             },
67 |         ),
68 |     ],
69 |     errors: [],
70 | }
71 | 


--------------------------------------------------------------------------------
/tests/snapshots/svg__it_can_parse_svg.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/svg.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "name": "svg",
10 |       "variant": "normal",
11 |       "attributes": {
12 |         "xmlns": "http://www.w3.org/2000/svg",
13 |         "xmlns:xlink": "http://www.w3.org/1999/xlink"
14 |       },
15 |       "children": [
16 |         {
17 |           "name": "rect",
18 |           "variant": "void",
19 |           "attributes": {
20 |             "height": "100",
21 |             "style": "stroke:#ff0000; fill: #0000ff",
22 |             "width": "100",
23 |             "x": "10",
24 |             "y": "10"
25 |           }
26 |         }
27 |       ]
28 |     }
29 |   ]
30 | }
31 | 


--------------------------------------------------------------------------------
/tests/snapshots/text__it_can_parse_document_with_just_text.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/text.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     "hello world"
 9 |   ]
10 | }
11 | 


--------------------------------------------------------------------------------
/tests/snapshots/text__it_can_parse_document_with_multiple_text_elements.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/text.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     "hello world\nhere's another line for you!\n",
 9 |     {
10 |       "name": "div",
11 |       "variant": "void"
12 |     },
13 |     "\nThe end\n"
14 |   ]
15 | }
16 | 


--------------------------------------------------------------------------------
/tests/snapshots/text__it_can_parse_document_with_text_and_line_breaks.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/text.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     "hello world\nhere's another line for you!\nThe end\n"
 9 |   ]
10 | }
11 | 


--------------------------------------------------------------------------------
/tests/snapshots/text__it_can_parse_text_in_paragraph_with_weird_formatting.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/text.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     {
 9 |       "name": "p",
10 |       "variant": "normal",
11 |       "children": [
12 |         "\n    This is a ",
13 |         {
14 |           "name": "b",
15 |           "variant": "normal",
16 |           "children": [
17 |             "para"
18 |           ]
19 |         },
20 |         "gra",
21 |         {
22 |           "name": "b",
23 |           "variant": "normal",
24 |           "children": [
25 |             "ph"
26 |           ]
27 |         },
28 |         " with some",
29 |         {
30 |           "name": "i",
31 |           "variant": "normal",
32 |           "children": [
33 |             " weird "
34 |           ]
35 |         },
36 |         " formatting.\n"
37 |       ]
38 |     }
39 |   ]
40 | }
41 | 


--------------------------------------------------------------------------------
/tests/snapshots/text__it_can_parse_text_with_chevron.snap:
--------------------------------------------------------------------------------
 1 | ---
 2 | source: tests/text.rs
 3 | expression: dom
 4 | ---
 5 | {
 6 |   "treeType": "documentFragment",
 7 |   "children": [
 8 |     "hello <> world"
 9 |   ]
10 | }
11 | 


--------------------------------------------------------------------------------
/tests/source_span.rs:
--------------------------------------------------------------------------------
 1 | use html_parser::{Dom, Result};
 2 | use indoc::indoc;
 3 | use insta::assert_debug_snapshot;
 4 | 
 5 | #[test]
 6 | fn it_can_generate_source_span() -> Result<()> {
 7 |     let html = indoc! {"
 8 |             <template>
 9 |                 <h1>Header</h1>
10 |                 <p>Paragraph</p>
11 |             </template>
12 |         "};
13 |     let dom = Dom::parse(html)?;
14 |     assert_debug_snapshot!(dom);
15 |     Ok(())
16 | }
17 | 


--------------------------------------------------------------------------------
/tests/svg.rs:
--------------------------------------------------------------------------------
 1 | use html_parser::{Dom, Result};
 2 | use indoc::indoc;
 3 | use insta::assert_json_snapshot;
 4 | 
 5 | #[test]
 6 | fn it_can_parse_svg() -> Result<()> {
 7 |     let html = indoc!(
 8 |         r#"
 9 |         <svg  xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
10 |             <rect x="10" y="10" height="100" width="100" style="stroke:#ff0000; fill: #0000ff"/>
11 |         </svg>
12 |     "#
13 |     );
14 |     let dom = Dom::parse(html)?;
15 |     assert_json_snapshot!(dom);
16 |     Ok(())
17 | }
18 | 
19 | #[test]
20 | fn it_can_parse_complex_svg() {
21 |     let svg = indoc!(
22 |         r#"
23 |         <svg width="600" height="600">
24 |             <rect id="rec" x="300" y="100" width="300" height="100" style="fill:lime"> 
25 |             <animate attributeName="x" attributeType="XML" begin="0s" dur="6s" fill="freeze" from="300" to="0" /> 
26 |             <animate attributeName="y" attributeType="XML" begin="0s" dur="6s" fill="freeze" from="100" to="0" /> 
27 |             <animate attributeName="width" attributeType="XML" begin="0s" dur="6s" fill="freeze" from="300" to="800" /> 
28 |             <animate attributeName="height" attributeType="XML" begin="0s" dur="6s" fill="freeze" from="100" to="300" /> 
29 |             <animate attributeName="fill" attributeType="CSS" from="lime" to="red" begin="2s" dur="4s" fill="freeze" />
30 |             </rect>
31 |             <g transform="translate(100,100)"> 
32 |             <text id="TextElement" x="0" y="0" style="font-family:Verdana;font-size:24; visibility:hidden"> It's SVG!
33 |                 <set attributeName="visibility" attributeType="CSS" to="visible" begin="1s" dur="5s" fill="freeze" />
34 |                 <animateMotion path="M 0 0 L 100 100" begin="1s" dur="5s" fill="freeze" />
35 |                 <animate attributeName="fill" attributeType="CSS" from="red" to="blue" begin="1s" dur="5s" fill="freeze" /> 
36 |                 <animateTransform attributeName="transform" attributeType="XML" type="rotate" from="-30" to="0" begin="1s" dur="5s" fill="freeze" /> 
37 |                 <animateTransform attributeName="transform" attributeType="XML" type="scale" from="1" to="3" additive="sum" begin="1s" dur="5s" fill="freeze" /> 
38 |             </text> 
39 |             </g>
40 |             Sorry, your browser does not support inline SVG.
41 |         </svg>
42 |     "#
43 |     );
44 |     assert!(Dom::parse(&svg).is_ok());
45 | }
46 | 


--------------------------------------------------------------------------------
/tests/text.rs:
--------------------------------------------------------------------------------
 1 | use html_parser::{Dom, Result};
 2 | use indoc::indoc;
 3 | use insta::assert_json_snapshot;
 4 | 
 5 | #[test]
 6 | fn it_can_parse_document_with_just_text() -> Result<()> {
 7 |     let html = "hello world";
 8 |     let dom = Dom::parse(html)?;
 9 |     assert_json_snapshot!(dom);
10 |     Ok(())
11 | }
12 | 
13 | #[test]
14 | fn it_can_parse_document_with_text_and_line_breaks() -> Result<()> {
15 |     let html = indoc!(
16 |         r"
17 |         hello world
18 |         here's another line for you!
19 |         The end
20 |     "
21 |     );
22 |     let dom = Dom::parse(html)?;
23 |     assert_json_snapshot!(dom);
24 |     Ok(())
25 | }
26 | 
27 | #[test]
28 | fn it_can_parse_document_with_multiple_text_elements() -> Result<()> {
29 |     let html = indoc!(
30 |         r"
31 |         hello world
32 |         here's another line for you!
33 |         <div/>
34 |         The end
35 |     "
36 |     );
37 |     let dom = Dom::parse(html)?;
38 |     assert_json_snapshot!(dom);
39 |     Ok(())
40 | }
41 | 
42 | #[test]
43 | fn it_can_parse_text_with_chevron() -> Result<()> {
44 |     let html = indoc!(r"hello <> world");
45 |     let dom = Dom::parse(html)?;
46 |     assert_json_snapshot!(dom);
47 |     Ok(())
48 | }
49 | 
50 | #[test]
51 | fn it_can_parse_text_in_paragraph_with_weird_formatting() -> Result<()> {
52 |     let html = indoc!(r"
53 |         <p>
54 |             This is a <b>para</b>gra<b>ph</b> with some<i> weird </i> formatting.
55 |         </p>
56 |     ");
57 |     let dom = Dom::parse(html)?;
58 |     assert_json_snapshot!(dom);
59 |     Ok(())
60 | }
61 | 


--------------------------------------------------------------------------------
/tests/websites.rs:
--------------------------------------------------------------------------------
 1 | use html_parser::Dom;
 2 | use indoc::indoc;
 3 | 
 4 | #[test]
 5 | fn it_can_parse_simple() {
 6 |     let html = indoc!(
 7 |         r#"
 8 |             <!DOCTYPE html>
 9 |             <html lang="en">
10 |                 <head>
11 |                     <meta charset="UTF-8">
12 |                     <meta name="viewport" content="width=device-width, initial-scale=1.0">
13 |                     <title>Document</title>
14 |                     <style>
15 |                         body {
16 |                             background: black;
17 |                         }
18 |                 
19 |                         h1 {
20 |                             color: white;
21 |                         }
22 |                     </style>
23 |                 </head>
24 |                 <body>
25 |                     <h1>Hello world</h1>
26 |                     <!-- There should be more text here -->
27 |                     <script>
28 |                         const title = document.querySelector("h1")
29 |                         title.innerText = "Hello from script"
30 |                     </script>
31 |                 </body>
32 |             </html>        
33 |         "#
34 |     );
35 |     assert!(Dom::parse(html).is_ok());
36 | }
37 | 
38 | #[test]
39 | fn it_can_parse_spotify() {
40 |     let resp = reqwest::blocking::get("https://www.spotify.com/se")
41 |         .unwrap()
42 |         .text()
43 |         .unwrap();
44 |     assert!(Dom::parse(&resp).is_ok());
45 | }
46 | 
47 | #[ignore]
48 | #[test]
49 | fn it_can_parse_facebook() {
50 |     let resp = reqwest::blocking::get("https://www.facebook.com/")
51 |         .unwrap()
52 |         .text()
53 |         .unwrap();
54 |     assert!(Dom::parse(&resp).is_ok());
55 | }
56 | 
57 | #[ignore]
58 | #[test]
59 | fn it_can_parse_amazon() {
60 |     let resp = reqwest::blocking::get("https://www.amazon.com/")
61 |         .unwrap()
62 |         .text()
63 |         .unwrap();
64 |     assert!(Dom::parse(&resp).is_ok());
65 | }
66 | 
67 | #[ignore]
68 | #[test]
69 | fn it_can_parse_apple() {
70 |     let resp = reqwest::blocking::get("https://www.apple.com/")
71 |         .unwrap()
72 |         .text()
73 |         .unwrap();
74 |     assert!(Dom::parse(&resp).is_ok());
75 | }
76 | 
77 | #[ignore]
78 | #[test]
79 | fn it_can_parse_nytimes() {
80 |     let resp = reqwest::blocking::get("https://www.nytimes.com/")
81 |         .unwrap()
82 |         .text()
83 |         .unwrap();
84 |     assert!(Dom::parse(&resp).is_ok());
85 | }
86 | 
87 | #[ignore]
88 | #[test]
89 | fn it_can_parse_wikipedia() {
90 |     let resp = reqwest::blocking::get("https://en.wikipedia.org/wiki/Main_Page")
91 |         .unwrap()
92 |         .text()
93 |         .unwrap();
94 |     assert!(Dom::parse(&resp).is_ok());
95 | }
96 | 


--------------------------------------------------------------------------------