├── .github ├── dependabot.yml └── workflows │ └── main.yml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── Makefile ├── README.md ├── shell.nix ├── src ├── document │ └── mod.rs └── lib.rs └── tests ├── fixtures └── docs_rs.html └── integration_tests.rs /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: cargo 4 | directory: "/" 5 | schedule: 6 | interval: monthly 7 | time: "10:00" 8 | open-pull-requests-limit: 10 9 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v2 10 | - run: make rust-setup test 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "autocfg" 7 | version = "1.1.0" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 10 | 11 | [[package]] 12 | name = "bitflags" 13 | version = "1.3.2" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 16 | 17 | [[package]] 18 | name = "cfg-if" 19 | version = "1.0.0" 20 | source = "registry+https://github.com/rust-lang/crates.io-index" 21 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 22 | 23 | [[package]] 24 | name = "crabquery" 25 | version = "0.1.8" 26 | dependencies = [ 27 | "html5ever", 28 | "markup5ever", 29 | "markup5ever_arcdom", 30 | ] 31 | 32 | [[package]] 33 | name = "futf" 34 | version = "0.1.5" 35 | source = "registry+https://github.com/rust-lang/crates.io-index" 36 | checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" 37 | dependencies = [ 38 | "mac", 39 | "new_debug_unreachable", 40 | ] 41 | 42 | [[package]] 43 | name = "getrandom" 44 | version = "0.2.6" 45 | source = "registry+https://github.com/rust-lang/crates.io-index" 46 | checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad" 47 | dependencies = [ 48 | "cfg-if", 49 | "libc", 50 | "wasi", 51 | ] 52 | 53 | [[package]] 54 | name = "html5ever" 55 | version = "0.26.0" 56 | source = "registry+https://github.com/rust-lang/crates.io-index" 57 | checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7" 58 | dependencies = [ 59 | "log", 60 | "mac", 61 | "markup5ever", 62 | "proc-macro2", 63 | "quote", 64 | "syn", 65 | ] 66 | 67 | [[package]] 68 | name = "libc" 69 | version = "0.2.121" 70 | source = "registry+https://github.com/rust-lang/crates.io-index" 71 | checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f" 72 | 73 | [[package]] 74 | name = "lock_api" 75 | version = "0.4.7" 76 | source = "registry+https://github.com/rust-lang/crates.io-index" 77 | checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53" 78 | dependencies = [ 79 | "autocfg", 80 | "scopeguard", 81 | ] 82 | 83 | [[package]] 84 | name = "log" 85 | version = "0.4.16" 86 | source = "registry+https://github.com/rust-lang/crates.io-index" 87 | checksum = "6389c490849ff5bc16be905ae24bc913a9c8892e19b2341dbc175e14c341c2b8" 88 | dependencies = [ 89 | "cfg-if", 90 | ] 91 | 92 | [[package]] 93 | name = "mac" 94 | version = "0.1.1" 95 | source = "registry+https://github.com/rust-lang/crates.io-index" 96 | checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" 97 | 98 | [[package]] 99 | name = "markup5ever" 100 | version = "0.11.0" 101 | source = "registry+https://github.com/rust-lang/crates.io-index" 102 | checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016" 103 | dependencies = [ 104 | "log", 105 | "phf", 106 | "phf_codegen", 107 | "string_cache", 108 | "string_cache_codegen", 109 | "tendril", 110 | ] 111 | 112 | [[package]] 113 | name = "markup5ever_arcdom" 114 | version = "0.1.2" 115 | source = "registry+https://github.com/rust-lang/crates.io-index" 116 | checksum = "e64a3a9ca36c5a6e0352120148e6b9ea7f4030ddca750dd581cec5ecf45e8f0c" 117 | dependencies = [ 118 | "html5ever", 119 | "markup5ever", 120 | "tendril", 121 | "xml5ever", 122 | ] 123 | 124 | [[package]] 125 | name = "new_debug_unreachable" 126 | version = "1.0.4" 127 | source = "registry+https://github.com/rust-lang/crates.io-index" 128 | checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" 129 | 130 | [[package]] 131 | name = "once_cell" 132 | version = "1.10.0" 133 | source = "registry+https://github.com/rust-lang/crates.io-index" 134 | checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9" 135 | 136 | [[package]] 137 | name = "parking_lot" 138 | version = "0.12.0" 139 | source = "registry+https://github.com/rust-lang/crates.io-index" 140 | checksum = "87f5ec2493a61ac0506c0f4199f99070cbe83857b0337006a30f3e6719b8ef58" 141 | dependencies = [ 142 | "lock_api", 143 | "parking_lot_core", 144 | ] 145 | 146 | [[package]] 147 | name = "parking_lot_core" 148 | version = "0.9.2" 149 | source = "registry+https://github.com/rust-lang/crates.io-index" 150 | checksum = "995f667a6c822200b0433ac218e05582f0e2efa1b922a3fd2fbaadc5f87bab37" 151 | dependencies = [ 152 | "cfg-if", 153 | "libc", 154 | "redox_syscall", 155 | "smallvec", 156 | "windows-sys", 157 | ] 158 | 159 | [[package]] 160 | name = "phf" 161 | version = "0.10.1" 162 | source = "registry+https://github.com/rust-lang/crates.io-index" 163 | checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" 164 | dependencies = [ 165 | "phf_shared", 166 | ] 167 | 168 | [[package]] 169 | name = "phf_codegen" 170 | version = "0.10.0" 171 | source = "registry+https://github.com/rust-lang/crates.io-index" 172 | checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd" 173 | dependencies = [ 174 | "phf_generator", 175 | "phf_shared", 176 | ] 177 | 178 | [[package]] 179 | name = "phf_generator" 180 | version = "0.10.0" 181 | source = "registry+https://github.com/rust-lang/crates.io-index" 182 | checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" 183 | dependencies = [ 184 | "phf_shared", 185 | "rand", 186 | ] 187 | 188 | [[package]] 189 | name = "phf_shared" 190 | version = "0.10.0" 191 | source = "registry+https://github.com/rust-lang/crates.io-index" 192 | checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" 193 | dependencies = [ 194 | "siphasher", 195 | ] 196 | 197 | [[package]] 198 | name = "ppv-lite86" 199 | version = "0.2.16" 200 | source = "registry+https://github.com/rust-lang/crates.io-index" 201 | checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" 202 | 203 | [[package]] 204 | name = "precomputed-hash" 205 | version = "0.1.1" 206 | source = "registry+https://github.com/rust-lang/crates.io-index" 207 | checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" 208 | 209 | [[package]] 210 | name = "proc-macro2" 211 | version = "1.0.36" 212 | source = "registry+https://github.com/rust-lang/crates.io-index" 213 | checksum = "c7342d5883fbccae1cc37a2353b09c87c9b0f3afd73f5fb9bba687a1f733b029" 214 | dependencies = [ 215 | "unicode-xid", 216 | ] 217 | 218 | [[package]] 219 | name = "quote" 220 | version = "1.0.17" 221 | source = "registry+https://github.com/rust-lang/crates.io-index" 222 | checksum = "632d02bff7f874a36f33ea8bb416cd484b90cc66c1194b1a1110d067a7013f58" 223 | dependencies = [ 224 | "proc-macro2", 225 | ] 226 | 227 | [[package]] 228 | name = "rand" 229 | version = "0.8.5" 230 | source = "registry+https://github.com/rust-lang/crates.io-index" 231 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 232 | dependencies = [ 233 | "libc", 234 | "rand_chacha", 235 | "rand_core", 236 | ] 237 | 238 | [[package]] 239 | name = "rand_chacha" 240 | version = "0.3.1" 241 | source = "registry+https://github.com/rust-lang/crates.io-index" 242 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 243 | dependencies = [ 244 | "ppv-lite86", 245 | "rand_core", 246 | ] 247 | 248 | [[package]] 249 | name = "rand_core" 250 | version = "0.6.3" 251 | source = "registry+https://github.com/rust-lang/crates.io-index" 252 | checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" 253 | dependencies = [ 254 | "getrandom", 255 | ] 256 | 257 | [[package]] 258 | name = "redox_syscall" 259 | version = "0.2.13" 260 | source = "registry+https://github.com/rust-lang/crates.io-index" 261 | checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" 262 | dependencies = [ 263 | "bitflags", 264 | ] 265 | 266 | [[package]] 267 | name = "scopeguard" 268 | version = "1.1.0" 269 | source = "registry+https://github.com/rust-lang/crates.io-index" 270 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" 271 | 272 | [[package]] 273 | name = "serde" 274 | version = "1.0.136" 275 | source = "registry+https://github.com/rust-lang/crates.io-index" 276 | checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789" 277 | 278 | [[package]] 279 | name = "siphasher" 280 | version = "0.3.10" 281 | source = "registry+https://github.com/rust-lang/crates.io-index" 282 | checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" 283 | 284 | [[package]] 285 | name = "smallvec" 286 | version = "1.8.0" 287 | source = "registry+https://github.com/rust-lang/crates.io-index" 288 | checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" 289 | 290 | [[package]] 291 | name = "string_cache" 292 | version = "0.8.4" 293 | source = "registry+https://github.com/rust-lang/crates.io-index" 294 | checksum = "213494b7a2b503146286049378ce02b482200519accc31872ee8be91fa820a08" 295 | dependencies = [ 296 | "new_debug_unreachable", 297 | "once_cell", 298 | "parking_lot", 299 | "phf_shared", 300 | "precomputed-hash", 301 | "serde", 302 | ] 303 | 304 | [[package]] 305 | name = "string_cache_codegen" 306 | version = "0.5.2" 307 | source = "registry+https://github.com/rust-lang/crates.io-index" 308 | checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988" 309 | dependencies = [ 310 | "phf_generator", 311 | "phf_shared", 312 | "proc-macro2", 313 | "quote", 314 | ] 315 | 316 | [[package]] 317 | name = "syn" 318 | version = "1.0.90" 319 | source = "registry+https://github.com/rust-lang/crates.io-index" 320 | checksum = "704df27628939572cd88d33f171cd6f896f4eaca85252c6e0a72d8d8287ee86f" 321 | dependencies = [ 322 | "proc-macro2", 323 | "quote", 324 | "unicode-xid", 325 | ] 326 | 327 | [[package]] 328 | name = "tendril" 329 | version = "0.4.3" 330 | source = "registry+https://github.com/rust-lang/crates.io-index" 331 | checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" 332 | dependencies = [ 333 | "futf", 334 | "mac", 335 | "utf-8", 336 | ] 337 | 338 | [[package]] 339 | name = "unicode-xid" 340 | version = "0.2.2" 341 | source = "registry+https://github.com/rust-lang/crates.io-index" 342 | checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" 343 | 344 | [[package]] 345 | name = "utf-8" 346 | version = "0.7.6" 347 | source = "registry+https://github.com/rust-lang/crates.io-index" 348 | checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" 349 | 350 | [[package]] 351 | name = "wasi" 352 | version = "0.10.2+wasi-snapshot-preview1" 353 | source = "registry+https://github.com/rust-lang/crates.io-index" 354 | checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" 355 | 356 | [[package]] 357 | name = "windows-sys" 358 | version = "0.34.0" 359 | source = "registry+https://github.com/rust-lang/crates.io-index" 360 | checksum = "5acdd78cb4ba54c0045ac14f62d8f94a03d10047904ae2a40afa1e99d8f70825" 361 | dependencies = [ 362 | "windows_aarch64_msvc", 363 | "windows_i686_gnu", 364 | "windows_i686_msvc", 365 | "windows_x86_64_gnu", 366 | "windows_x86_64_msvc", 367 | ] 368 | 369 | [[package]] 370 | name = "windows_aarch64_msvc" 371 | version = "0.34.0" 372 | source = "registry+https://github.com/rust-lang/crates.io-index" 373 | checksum = "17cffbe740121affb56fad0fc0e421804adf0ae00891205213b5cecd30db881d" 374 | 375 | [[package]] 376 | name = "windows_i686_gnu" 377 | version = "0.34.0" 378 | source = "registry+https://github.com/rust-lang/crates.io-index" 379 | checksum = "2564fde759adb79129d9b4f54be42b32c89970c18ebf93124ca8870a498688ed" 380 | 381 | [[package]] 382 | name = "windows_i686_msvc" 383 | version = "0.34.0" 384 | source = "registry+https://github.com/rust-lang/crates.io-index" 385 | checksum = "9cd9d32ba70453522332c14d38814bceeb747d80b3958676007acadd7e166956" 386 | 387 | [[package]] 388 | name = "windows_x86_64_gnu" 389 | version = "0.34.0" 390 | source = "registry+https://github.com/rust-lang/crates.io-index" 391 | checksum = "cfce6deae227ee8d356d19effc141a509cc503dfd1f850622ec4b0f84428e1f4" 392 | 393 | [[package]] 394 | name = "windows_x86_64_msvc" 395 | version = "0.34.0" 396 | source = "registry+https://github.com/rust-lang/crates.io-index" 397 | checksum = "d19538ccc21819d01deaf88d6a17eae6596a12e9aafdbb97916fb49896d89de9" 398 | 399 | [[package]] 400 | name = "xml5ever" 401 | version = "0.17.0" 402 | source = "registry+https://github.com/rust-lang/crates.io-index" 403 | checksum = "4034e1d05af98b51ad7214527730626f019682d797ba38b51689212118d8e650" 404 | dependencies = [ 405 | "log", 406 | "mac", 407 | "markup5ever", 408 | ] 409 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "crabquery" 3 | version = "0.1.9" 4 | authors = ["Max Gonzih "] 5 | edition = "2018" 6 | description = "JQuery like HTML query library" 7 | homepage = "https://github.com/Gonzih/crabquery" 8 | repository = "https://github.com/Gonzih/crabquery" 9 | documentation = "https://docs.rs/crabquery" 10 | readme = "README.md" 11 | keywords = ["html", "dom", "css" ,"jquery", "scraper"] 12 | license = "MIT" 13 | 14 | [badges] 15 | # github = { repository = "https://github.com/Gonzih/rquery", branch = "master" } 16 | 17 | [dependencies] 18 | html5ever = "0.26" 19 | markup5ever = "0.11" 20 | markup5ever_arcdom = "0.1" 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | ===================== 3 | 4 | Copyright © 2020 Max Gonzih 5 | 6 | Permission is hereby granted, free of charge, to any person 7 | obtaining a copy of this software and associated documentation 8 | files (the “Software”), to deal in the Software without 9 | restriction, including without limitation the rights to use, 10 | copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the 12 | Software is furnished to do so, subject to the following 13 | conditions: 14 | 15 | The above copyright notice and this permission notice shall be 16 | included in all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, 19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 20 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 22 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 23 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 25 | OTHER DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | BACKTRACE ?= 0 2 | CARGO = cargo --color always 3 | CARGO_ARGS = $(if $(RELEASE),--release) $(if $(STATIC_BINARY), --target=x86_64-unknown-linux-musl) 4 | 5 | .PHONY: test-nix 6 | test-nix: 7 | nix-shell shell.nix --run 'make test' 8 | 9 | .PHONY: build-nix 10 | build-nix: 11 | nix-shell shell.nix --run 'make build' 12 | 13 | .PHONY: build 14 | build: 15 | $(CARGO) build $(CARGO_ARGS) 16 | 17 | .PHONY: test 18 | test: 19 | $(CARGO) test $(CARGO_ARGS) 20 | 21 | .PHONY: shell 22 | shell: 23 | nix-shell shell.nix 24 | 25 | rust-setup: 26 | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y 27 | rustup default stable 28 | 29 | publish: 30 | cargo publish 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CrabQuery - like JQuery, but for Crabs 2 | 3 | [![CI][ci-badge]][ci-url] 4 | [![Crates.io][crates-badge]][crates-url] 5 | [![docs.rs][docs-badge]][docs-url] 6 | [![MIT licensed][mit-badge]][mit-url] 7 | 8 | [ci-badge]: https://github.com/Gonzih/crabquery/workflows/CI/badge.svg 9 | [ci-url]: https://github.com/Gonzih/crabquery/actions 10 | [crates-badge]: https://img.shields.io/crates/v/crabquery.svg 11 | [crates-url]: https://crates.io/crates/crabquery 12 | [docs-badge]: https://docs.rs/crabquery/badge.svg 13 | [docs-url]: https://docs.rs/crabquery 14 | [mit-badge]: https://img.shields.io/badge/license-MIT-blue.svg 15 | [mit-url]: LICENSE 16 | 17 | Small and simple library to query HTML markup for your web scraping needs. 18 | 19 | Based on servo libraries. 20 | Supports more complicated CSS selectors than other similar libraries. 21 | 22 | ## Examples 23 | 24 | ```rust 25 | use crabquery::Document; 26 | 27 | let doc = Document::from( 28 | "
29 | 30 | text hi there 31 | 32 |
", 33 | ); 34 | 35 | let sel = doc.select("div.container > a.button.link[id=\"linkmain\"]"); 36 | let el = sel.first().unwrap(); 37 | 38 | assert_eq!(el.attr("id").unwrap(), "linkmain"); 39 | 40 | let sel = doc.select("div > a > span"); 41 | let el = sel.first().unwrap(); 42 | 43 | assert_eq!(el.text().unwrap(), "text hi there"); 44 | ``` 45 | -------------------------------------------------------------------------------- /shell.nix: -------------------------------------------------------------------------------- 1 | let 2 | pkgs = import {}; 3 | in pkgs.stdenv.mkDerivation rec { 4 | name = "rquery"; 5 | buildInputs = with pkgs; [ 6 | stdenv 7 | glib 8 | pkgconfig 9 | rustup 10 | cargo 11 | curl 12 | ]; 13 | } 14 | -------------------------------------------------------------------------------- /src/document/mod.rs: -------------------------------------------------------------------------------- 1 | //! This module provides functionality for parsing and working with DomTree 2 | //! 3 | //! Supported selectors are: 4 | //! * tag based `span` or `a` 5 | //! * class based `.button` 6 | //! * id based `#mainbutton` 7 | //! * direct child `>` 8 | //! * attribute selectors `[href]`, `[href="specific-value"]`, `[href*="contains-str"]`, 9 | //! `[href^="begins-with"]`,, `[href$="ends-with"]` 10 | //! * all combinations of above like `div.container > form#feedback input.button` 11 | //! 12 | use html5ever::driver::ParseOpts; 13 | use html5ever::parse_document; 14 | use html5ever::tendril::TendrilSink; 15 | use html5ever::tree_builder::TreeBuilderOpts; 16 | use markup5ever::{Attribute, QualName}; 17 | use markup5ever_arcdom::{ArcDom, Handle, NodeData}; 18 | use std::cell::Ref; 19 | use std::collections::HashMap; 20 | use std::default::Default; 21 | use std::sync::Arc; 22 | 23 | pub struct Document { 24 | //{{{ 25 | doc: ArcDom, 26 | } 27 | 28 | fn default_parse_opts() -> ParseOpts { 29 | ParseOpts { 30 | tree_builder: TreeBuilderOpts { 31 | drop_doctype: true, 32 | ..Default::default() 33 | }, 34 | ..Default::default() 35 | } 36 | } 37 | 38 | impl From<&str> for Document { 39 | /// Create document from a string slice 40 | fn from(input: &str) -> Self { 41 | let doc = parse_document(ArcDom::default(), default_parse_opts()) 42 | .from_utf8() 43 | .read_from(&mut input.as_bytes()) 44 | .expect("could not parse html input"); 45 | 46 | Self { doc } 47 | } 48 | } 49 | 50 | impl From for Document { 51 | /// Create document from String 52 | fn from(input: String) -> Self { 53 | Self::from(input.as_str()) 54 | } 55 | } 56 | 57 | impl Document { 58 | /// Select elements using given css selector 59 | /// 60 | /// # Example 61 | /// ``` 62 | /// use crabquery::Document; 63 | /// 64 | /// let doc = Document::from("hi there"); 65 | /// let sel = doc.select("span"); 66 | /// let el = sel.first().unwrap(); 67 | /// 68 | /// assert_eq!(el.text().unwrap(), "hi there"); 69 | /// ``` 70 | pub fn select(&self, selector: &str) -> Vec { 71 | let sel = Selector::from(selector); 72 | sel.find(self.doc.document.children.borrow()) 73 | } 74 | } //}}} 75 | 76 | #[derive(Debug, PartialEq, Clone)] 77 | enum AttributeSpec { 78 | //{{{ 79 | /// Implementation of [attribute] selector 80 | Present, 81 | /// Implementation of [attribute="value"] selector 82 | Exact(String), 83 | // Implementation of [attribute~="value"] selector 84 | // ContainsWord(String, String), 85 | // Implementation of [attribute|="value"] selector 86 | // StartsWord(String, String), 87 | /// Implementation of [attribute^="value"] selector 88 | Starts(String), 89 | /// Implementation of [attribute$="value"] selector 90 | Ends(String), 91 | /// Implementation of [attribute*="value"] selector 92 | Contains(String), 93 | } 94 | 95 | impl AttributeSpec { 96 | fn matches(&self, other: String) -> bool { 97 | use AttributeSpec::*; 98 | 99 | match self { 100 | Present => true, 101 | Exact(v) => &other == v, 102 | Starts(v) => other.starts_with(v), 103 | Ends(v) => other.ends_with(v), 104 | Contains(v) => other.contains(v), 105 | } 106 | } 107 | } //}}} 108 | 109 | #[derive(Debug, PartialEq, Clone)] 110 | struct Matcher { 111 | //{{{ 112 | tag: Vec, 113 | class: Vec, 114 | id: Vec, 115 | attribute: HashMap, 116 | direct_match: bool, 117 | } 118 | 119 | impl From for Matcher { 120 | fn from(input: String) -> Self { 121 | Self::from(input.as_str()) 122 | } 123 | } 124 | 125 | impl From<&str> for Matcher { 126 | fn from(input: &str) -> Self { 127 | let mut segments = vec![]; 128 | let mut buf = "".to_string(); 129 | 130 | for c in input.chars() { 131 | match c { 132 | '>' => { 133 | return Self { 134 | tag: vec![], 135 | class: vec![], 136 | id: vec![], 137 | attribute: HashMap::new(), 138 | direct_match: true, 139 | }; 140 | } 141 | '#' | '.' | '[' => { 142 | segments.push(buf); 143 | buf = "".to_string(); 144 | } 145 | ']' => { 146 | segments.push(buf); 147 | buf = "".to_string(); 148 | continue; 149 | } 150 | _ => {} 151 | }; 152 | 153 | buf.push(c); 154 | } 155 | segments.push(buf); 156 | 157 | let mut res = Self { 158 | tag: vec![], 159 | class: vec![], 160 | id: vec![], 161 | attribute: HashMap::new(), 162 | direct_match: false, 163 | }; 164 | 165 | for segment in segments { 166 | match segment.chars().next() { 167 | Some('#') => res.id.push(segment[1..].to_string()), 168 | Some('.') => res.class.push(segment[1..].to_string()), 169 | Some('[') => res.add_data_attribute(segment[1..].to_string()), 170 | None => {} 171 | _ => res.tag.push(segment), 172 | } 173 | } 174 | 175 | res 176 | } 177 | } 178 | 179 | impl Matcher { 180 | fn add_data_attribute(&mut self, spec: String) { 181 | use AttributeSpec::*; 182 | 183 | let parts = spec.split('=').collect::>(); 184 | 185 | if parts.len() == 1 { 186 | let k = parts[0]; 187 | self.attribute.insert(k.to_string(), Present); 188 | return; 189 | } 190 | 191 | let v = parts[1].trim_matches('"').to_string(); 192 | let k = parts[0]; 193 | let k = k[..k.len() - 1].to_string(); 194 | 195 | match parts[0].chars().last() { 196 | Some('^') => { 197 | self.attribute.insert(k, Starts(v)); 198 | } 199 | Some('$') => { 200 | self.attribute.insert(k, Ends(v)); 201 | } 202 | Some('*') => { 203 | self.attribute.insert(k, Contains(v)); 204 | } 205 | Some(_) => { 206 | let k = parts[0].to_string(); 207 | self.attribute.insert(k, Exact(v)); 208 | } 209 | None => { 210 | panic!("Colud not parse attribute spec \"{}\"", spec); 211 | } 212 | } 213 | } 214 | 215 | fn matches(&self, name: &QualName, attrs: Ref<'_, Vec>) -> bool { 216 | let mut id_match = self.id.is_empty(); 217 | if let Some(el_id) = get_attr(&attrs, "id") { 218 | let el_ids: Vec<_> = el_id.split_whitespace().collect(); 219 | id_match = self.id.iter().all(|id| el_ids.iter().any(|eid| eid == id)) 220 | } 221 | 222 | let mut class_match = self.class.is_empty(); 223 | if let Some(el_class) = get_attr(&attrs, "class") { 224 | let el_classes: Vec<_> = el_class.split_whitespace().collect(); 225 | 226 | class_match = self 227 | .class 228 | .iter() 229 | .all(|class| el_classes.iter().any(|eclass| eclass == class)) 230 | } 231 | 232 | let mut attr_match = true; 233 | for (k, v) in &self.attribute { 234 | if let Some(value) = get_attr(&attrs, k.as_str()) { 235 | if !v.matches(value) { 236 | attr_match = false; 237 | break; 238 | } 239 | } else { 240 | attr_match = false; 241 | break; 242 | } 243 | } 244 | 245 | let name = name.local.to_string(); 246 | let tag_match = self.tag.is_empty() || self.tag.iter().any(|tag| &name == tag); 247 | // println!( 248 | // "for: {:?} \n {:?} \n {:?} \n tag_match: {}, id_match: {}, class_match: {}, attr_match: {} \n", 249 | // &self, name, attrs, 250 | // tag_match, id_match, class_match, attr_match 251 | // ); 252 | 253 | tag_match && id_match && class_match && attr_match 254 | } 255 | } 256 | //}}} 257 | 258 | #[derive(Debug, PartialEq)] 259 | struct Selector { 260 | //{{{ 261 | matchers: Vec, 262 | } 263 | 264 | impl From<&str> for Selector { 265 | fn from(input: &str) -> Self { 266 | let matchers: Vec<_> = input.split_whitespace().map(Matcher::from).collect(); 267 | 268 | Selector { matchers } 269 | } 270 | } 271 | 272 | fn get_attr(attrs: &Ref<'_, Vec>, name: &str) -> Option { 273 | attrs 274 | .iter() 275 | .filter(|attr| &attr.name.local == name) 276 | .take(1) 277 | .map(|attr| attr.value.to_string()) 278 | .collect::>() 279 | .pop() 280 | } 281 | 282 | impl Selector { 283 | fn find_nodes( 284 | &self, 285 | matcher: &Matcher, 286 | elements: Vec, 287 | direct_match: bool, 288 | ) -> Vec { 289 | let mut acc = vec![]; 290 | 291 | for el in elements.iter() { 292 | if !direct_match { 293 | let children: Vec<_> = el.children.borrow().iter().map(Arc::clone).collect(); 294 | acc.append(&mut self.find_nodes(matcher, children, false)); 295 | } 296 | 297 | match el.data { 298 | NodeData::Element { 299 | ref name, 300 | ref attrs, 301 | .. 302 | } if matcher.matches(name, attrs.borrow()) => { 303 | acc.push(Arc::clone(&el)); 304 | } 305 | _ => {} 306 | }; 307 | } 308 | 309 | acc 310 | } 311 | 312 | fn find(&self, elements: Ref<'_, Vec>) -> Vec { 313 | let mut elements: Vec<_> = elements.iter().map(Arc::clone).collect(); 314 | let mut direct_match = false; 315 | 316 | for matcher in &self.matchers { 317 | if matcher.direct_match { 318 | direct_match = true; 319 | elements = elements 320 | .iter() 321 | .flat_map(|el| { 322 | el.children 323 | .borrow() 324 | .iter() 325 | .map(Arc::clone) 326 | .collect::>() 327 | }) 328 | .collect(); 329 | continue; 330 | } 331 | elements = self.find_nodes(matcher, elements, direct_match); 332 | direct_match = false; 333 | } 334 | 335 | elements.iter().map(Element::from).collect() 336 | } 337 | } //}}} 338 | 339 | pub struct Element { 340 | //{{{ 341 | handle: Handle, 342 | } 343 | 344 | impl From for Element { 345 | fn from(e: Handle) -> Self { 346 | Self::from(&e) 347 | } 348 | } 349 | 350 | impl From<&Handle> for Element { 351 | fn from(e: &Handle) -> Self { 352 | Element { 353 | handle: Arc::clone(e), 354 | } 355 | } 356 | } 357 | 358 | impl Element { 359 | /// Get value of an attribue 360 | /// 361 | /// # Arguments 362 | /// * `name` - attribute name 363 | /// 364 | /// # Example 365 | /// ``` 366 | /// use crabquery::Document; 367 | /// 368 | /// let doc = Document::from("hi there"); 369 | /// let sel = doc.select("a"); 370 | /// let el = sel.first().unwrap(); 371 | /// 372 | /// assert_eq!(el.attr("class").unwrap(), "link"); 373 | /// ``` 374 | pub fn attr(&self, name: &str) -> Option { 375 | match self.handle.data { 376 | NodeData::Element { ref attrs, .. } => get_attr(&attrs.borrow(), name), 377 | _ => None, 378 | } 379 | } 380 | 381 | /// Get tag value 382 | /// 383 | /// # Example 384 | /// ``` 385 | /// use crabquery::Document; 386 | /// 387 | /// let doc = Document::from("hi there"); 388 | /// let sel = doc.select("a"); 389 | /// let el = sel.first().unwrap(); 390 | /// 391 | /// assert_eq!(el.tag().unwrap(), "a"); 392 | /// ``` 393 | pub fn tag(&self) -> Option { 394 | match self.handle.data { 395 | NodeData::Element { ref name, .. } => Some(name.local.to_string()), 396 | _ => None, 397 | } 398 | } 399 | 400 | /// Get text 401 | /// 402 | /// # Example 403 | /// ``` 404 | /// use crabquery::Document; 405 | /// 406 | /// let doc = Document::from("hi there"); 407 | /// let sel = doc.select("a"); 408 | /// let el = sel.first().unwrap(); 409 | /// 410 | /// assert_eq!(el.text().unwrap(), "hi there"); 411 | /// ``` 412 | pub fn text(&self) -> Option { 413 | let mut res = "".to_string(); 414 | let children = self.handle.children.borrow(); 415 | 416 | for child in children.iter() { 417 | if let NodeData::Text { ref contents } = child.data { 418 | res.push_str(&contents.borrow().to_string().as_str()); 419 | } 420 | } 421 | 422 | Some(res) 423 | } 424 | 425 | /// Get children elements 426 | /// 427 | /// # Example 428 | /// ``` 429 | /// use crabquery::Document; 430 | /// 431 | /// let doc = Document::from("hi there"); 432 | /// let sel = doc.select("a"); 433 | /// let el = sel.first().unwrap(); 434 | /// 435 | /// assert_eq!(el.children().first().unwrap().text().unwrap(), "hi there"); 436 | /// ``` 437 | pub fn children(&self) -> Vec { 438 | self.handle 439 | .children 440 | .borrow() 441 | .iter() 442 | .filter(|n| { 443 | if let NodeData::Element { .. } = n.data { 444 | true 445 | } else { 446 | false 447 | } 448 | }) 449 | .map(Element::from) 450 | .collect::>() 451 | } 452 | 453 | /// Get parent element 454 | /// 455 | /// # Example 456 | /// ``` 457 | /// use crabquery::Document; 458 | /// 459 | /// let doc = Document::from("hi there"); 460 | /// let sel = doc.select("span"); 461 | /// let el = sel.first().unwrap(); 462 | /// 463 | /// assert_eq!(el.parent().unwrap().tag().unwrap(), "a"); 464 | /// ``` 465 | pub fn parent(&self) -> Option { 466 | if let Some(parent) = self.handle.parent.take() { 467 | let wrapper = parent.upgrade().map(Element::from); 468 | self.handle.parent.set(Some(parent)); 469 | 470 | return wrapper; 471 | } 472 | 473 | None 474 | } 475 | 476 | /// Select child elements using given css selector 477 | /// 478 | /// # Example 479 | /// ``` 480 | /// use crabquery::Document; 481 | /// 482 | /// let doc = Document::from("hi there"); 483 | /// let sel = doc.select("span"); 484 | /// let el = sel.first().unwrap(); 485 | /// let sel = el.select("a"); 486 | /// let a = sel.first().unwrap(); 487 | /// 488 | /// assert_eq!(a.attr("class").unwrap(), "link"); 489 | /// ``` 490 | pub fn select(&self, selector: &str) -> Vec { 491 | let sel = Selector::from(selector); 492 | sel.find(self.handle.children.borrow()) 493 | } 494 | } //}}} 495 | 496 | #[cfg(test)] 497 | mod tests { 498 | use super::*; 499 | 500 | // Matcher tests{{{ 501 | #[test] 502 | fn test_matcher_tag() { 503 | let m = Matcher::from("a"); 504 | assert_eq!(m.tag, vec!["a".to_string()],); 505 | } 506 | 507 | #[test] 508 | fn test_matcher_complex() { 509 | let m = Matcher::from("a.link.another_class#idofel.klass"); 510 | assert_eq!(m.tag, vec!["a".to_string()]); 511 | assert_eq!( 512 | m.class, 513 | vec![ 514 | "link".to_string(), 515 | "another_class".to_string(), 516 | "klass".to_string() 517 | ] 518 | ); 519 | assert_eq!(m.id, vec!["idofel".to_string()]); 520 | } 521 | 522 | #[test] 523 | fn test_matcher_direct_match() { 524 | let m = Matcher::from(">"); 525 | assert_eq!(m.direct_match, true); 526 | } 527 | 528 | #[test] 529 | fn test_matcher_data_attribute_present() { 530 | let m = Matcher::from("a[target]"); 531 | let mut attr = HashMap::new(); 532 | attr.insert("target".to_string(), AttributeSpec::Present); 533 | assert_eq!(m.attribute, attr); 534 | } 535 | 536 | #[test] 537 | fn test_matcher_data_attribute_exact() { 538 | let m = Matcher::from("a[target=\"_blank\"]"); 539 | let mut attr = HashMap::new(); 540 | attr.insert( 541 | "target".to_string(), 542 | AttributeSpec::Exact("_blank".to_string()), 543 | ); 544 | assert_eq!(m.attribute, attr); 545 | } 546 | 547 | #[test] 548 | fn test_matcher_data_attribute_starts() { 549 | let m = Matcher::from("a[target^=\"_blank\"]"); 550 | let mut attr = HashMap::new(); 551 | attr.insert( 552 | "target".to_string(), 553 | AttributeSpec::Starts("_blank".to_string()), 554 | ); 555 | assert_eq!(m.attribute, attr); 556 | } 557 | 558 | #[test] 559 | fn test_matcher_data_attribute_ends() { 560 | let m = Matcher::from("a[target$=\"_blank\"]"); 561 | let mut attr = HashMap::new(); 562 | attr.insert( 563 | "target".to_string(), 564 | AttributeSpec::Ends("_blank".to_string()), 565 | ); 566 | assert_eq!(m.attribute, attr); 567 | } 568 | 569 | #[test] 570 | fn test_matcher_data_attribute_contains() { 571 | let m = Matcher::from("a[target*=\"_blank\"]"); 572 | let mut attr = HashMap::new(); 573 | attr.insert( 574 | "target".to_string(), 575 | AttributeSpec::Contains("_blank".to_string()), 576 | ); 577 | assert_eq!(m.attribute, attr); 578 | } 579 | 580 | //}}} 581 | 582 | // // Selector tests{{{ 583 | // #[test] 584 | // fn test_selector_parse_simple() { 585 | // let sel = Selector::from("a"); 586 | // assert_eq!( 587 | // sel, 588 | // Selector { 589 | // css: "a".to_string(), 590 | // next: None 591 | // } 592 | // ); 593 | // } 594 | 595 | // #[test] 596 | // fn test_selector_parse_simple_with_class() { 597 | // let sel = Selector::from("a.link"); 598 | // assert_eq!( 599 | // sel, 600 | // Selector { 601 | // css: "a.link".to_string(), 602 | // next: None 603 | // } 604 | // ); 605 | // } //}}} 606 | 607 | // Element tests{{{ 608 | #[test] 609 | fn test_el_tag() { 610 | let doc = Document::from("hi there"); 611 | let sel = doc.select("a"); 612 | let el = sel.first().unwrap(); 613 | assert_eq!(el.tag(), Some("a".to_string())); 614 | } 615 | 616 | #[test] 617 | fn test_el_attr_class() { 618 | let doc = Document::from("hi there"); 619 | let sel = doc.select("a"); 620 | let el = sel.first().unwrap(); 621 | assert_eq!(el.attr("class"), Some("link".to_string())); 622 | } 623 | 624 | #[test] 625 | fn test_el_attr_id() { 626 | let doc = Document::from("hi there"); 627 | let sel = doc.select("a"); 628 | let el = sel.first().unwrap(); 629 | assert_eq!(el.attr("id"), Some("linkilink".to_string())); 630 | } 631 | 632 | #[test] 633 | fn test_el_attr_double_id() { 634 | let doc = Document::from("hi there"); 635 | let sel = doc.select("a#linkone#linkmain"); 636 | let el = sel.first().unwrap(); 637 | assert_eq!(el.attr("class"), Some("link".to_string())); 638 | } 639 | 640 | #[test] 641 | fn test_el_attr_double_class() { 642 | let doc = Document::from("hi there"); 643 | let sel = doc.select("a.link.button"); 644 | let el = sel.first().unwrap(); 645 | assert_eq!(el.attr("id"), Some("linkmain".to_string())); 646 | } 647 | 648 | #[test] 649 | fn test_el_attr_double_class_reverse_order() { 650 | let doc = Document::from("hi there"); 651 | let sel = doc.select("a.button.link"); 652 | let el = sel.first().unwrap(); 653 | assert_eq!(el.attr("id"), Some("linkmain".to_string())); 654 | } 655 | 656 | #[test] 657 | fn test_el_nested_selection() { 658 | let doc = Document::from( 659 | "", 660 | ); 661 | let sel = doc.select("div.container a.button.link"); 662 | let el = sel.first().unwrap(); 663 | assert_eq!(el.attr("id"), Some("linkmain".to_string())); 664 | } 665 | 666 | #[test] 667 | fn test_el_nested_selection_with_el_in_between() { 668 | let doc = Document::from( 669 | "", 670 | ); 671 | let sel = doc.select("div.container a.button.link"); 672 | let el = sel.first().unwrap(); 673 | assert_eq!(el.attr("id"), Some("linkmain".to_string())); 674 | } 675 | 676 | #[test] 677 | fn test_el_double_nested_selection() { 678 | let doc = Document::from( 679 | "", 680 | ); 681 | let sel = doc.select("div.container a.button.link"); 682 | let el = sel.first().unwrap(); 683 | assert_eq!(el.attr("id"), Some("linkmain".to_string())); 684 | } 685 | 686 | #[test] 687 | fn test_el_double_nested_direct_child_no_match() { 688 | let doc = Document::from( 689 | "", 690 | ); 691 | let sel = doc.select("div.container > a.button.link"); 692 | let el = sel.first(); 693 | assert!(el.is_none()); 694 | } 695 | 696 | #[test] 697 | fn test_el_double_nested_direct_child_match() { 698 | let doc = Document::from( 699 | "", 704 | ); 705 | let sel = doc.select("div.container > a.button.link"); 706 | let el = sel.first(); 707 | assert!(el.is_some()); 708 | } 709 | 710 | #[test] 711 | fn test_simple_multiple_a() { 712 | let doc = Document::from( 713 | "
714 | 715 | text hi there 716 | 717 | text hi there two 718 |
", 719 | ); 720 | let sel = doc.select("a"); 721 | assert_eq!(sel.len(), 2); 722 | } 723 | 724 | #[test] 725 | fn test_simple_multiple_a_in_div() { 726 | let doc = Document::from( 727 | " 732 |
733 | text hi there 734 | two 735 | 736 |
737 | ", 738 | ); 739 | let sel = doc.select("div a"); 740 | assert_eq!(sel.len(), 2); 741 | } 742 | 743 | #[test] 744 | fn test_simple_attribute_present() { 745 | let doc = Document::from( 746 | "
747 | text hi there 748 | two 749 | 750 |
", 751 | ); 752 | let sel = doc.select("div > span > a[data-counter]"); 753 | assert_eq!(sel.len(), 1); 754 | } 755 | 756 | #[test] 757 | fn test_simple_attribute_starts() { 758 | let doc = Document::from( 759 | "
760 | text hi there 761 | two 762 | 763 |
", 764 | ); 765 | let sel = doc.select("div > span > a[data-counter^=\"blob\"]"); 766 | assert_eq!(sel.len(), 1); 767 | } 768 | 769 | #[test] 770 | fn test_simple_attribute_ends() { 771 | let doc = Document::from( 772 | "
773 | text hi there 774 | two 775 | 776 |
", 777 | ); 778 | let sel = doc.select("div > span > a[data-counter$=\"ovo\"]"); 779 | assert_eq!(sel.len(), 1); 780 | } 781 | 782 | #[test] 783 | fn test_simple_attribute_contains() { 784 | let doc = Document::from( 785 | "
786 | text hi there 787 | two 788 | 789 |
", 790 | ); 791 | let sel = doc.select("div > span > a[data-counter*=\"obo\"]"); 792 | assert_eq!(sel.len(), 1); 793 | } 794 | 795 | #[test] 796 | fn test_simple_text() { 797 | let doc = Document::from("text hi there"); 798 | let sel = doc.select("span"); 799 | let el = sel.first().unwrap(); 800 | assert_eq!(el.text().unwrap(), "text hi there".to_string()); 801 | } 802 | 803 | #[test] 804 | fn test_el_children() { 805 | let doc = Document::from( 806 | "
807 | one 808 | two 809 | three 810 |
", 811 | ); 812 | let sel = doc.select("div"); 813 | let el = sel.first().unwrap(); 814 | assert_eq!(el.children().len(), 3); 815 | assert_eq!(el.children().first().unwrap().text().unwrap(), "one"); 816 | } 817 | 818 | #[test] 819 | fn test_el_parent() { 820 | let doc = Document::from( 821 | "
822 | one 823 |
", 824 | ); 825 | let sel = doc.select("span"); 826 | let el = sel.first().unwrap(); 827 | assert!(el.parent().is_some()); 828 | assert_eq!(el.parent().unwrap().tag().unwrap(), "div"); 829 | } 830 | 831 | #[test] 832 | fn test_attribute_selection_multiple_els() { 833 | let doc = Document::from( 834 | " 835 | 836 | 837 | ", 838 | ); 839 | let sel = doc.select("meta[property=\"og:title\"]"); 840 | assert_eq!(sel.len(), 1); 841 | } 842 | 843 | //}}} 844 | } 845 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Library for quick and easy DOM search based on CSS queries for your scraping needs. 2 | //! 3 | //! Supported selectors are: 4 | //! * tag based `span` or `a` 5 | //! * class based `.button` 6 | //! * id based `#mainbutton` 7 | //! * direct child `>` 8 | //! * attribute selectors `[href]`, `[href="specific-value"]`, `[href*="contains-str"]`, 9 | //! `[href^="begins-with"]`,, `[href$="ends-with"]` 10 | //! * all combinations of above like `div.container > form#feedback input.button` 11 | #![crate_name = "crabquery"] 12 | 13 | mod document; 14 | 15 | pub use document::*; 16 | -------------------------------------------------------------------------------- /tests/fixtures/docs_rs.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | Docs.rs 18 | 19 | 20 | 21 | 50 | 51 | 52 | 53 | 54 | 55 | 63 | 64 | 65 |
66 |

Docs.rs

67 | 68 |
69 |
70 |
71 | 72 | 73 |
74 |
75 | 100 |
101 | 102 | 103 |
104 |
105 | 106 |
107 | Recent Releases 108 |
109 | 110 | 111 | 295 | 296 |
297 |
298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | -------------------------------------------------------------------------------- /tests/integration_tests.rs: -------------------------------------------------------------------------------- 1 | extern crate crabquery; 2 | 3 | use crabquery::*; 4 | 5 | #[test] 6 | fn test_docs_rs_index() { 7 | let document = Document::from(include_str!("fixtures/docs_rs.html")); 8 | 9 | let els = document.select("div.pure-u-sm-4-24"); 10 | assert_eq!(els.len(), 15); 11 | 12 | let els = document.select(".pure-u-sm-4-24"); 13 | assert_eq!(els.len(), 15); 14 | 15 | let els = document.select("meta[name=\"generator\"]"); 16 | assert_eq!(els.len(), 1); 17 | 18 | let els = document.select("a"); 19 | assert!(els.len() > 20); 20 | 21 | let els = document.select("a[href]"); 22 | assert!(els.len() > 20); 23 | } 24 | --------------------------------------------------------------------------------