├── .circleci └── config.yml ├── .github ├── dependabot.yml └── workflows │ ├── ci.yml │ └── jekyll-gh-pages.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── benches └── tables.rs ├── examples ├── html2term.rs └── html2text.rs ├── html2text-web-demo ├── .cargo │ └── config.toml ├── .gitignore ├── Cargo.toml ├── Trunk.toml ├── index.html └── src │ └── lib.rs ├── pages ├── .gitignore ├── _config.yml ├── _includes │ └── head.html ├── assets │ ├── demo-main.js │ └── demo.css └── index.markdown ├── rust.yml └── src ├── ansi_colours.rs ├── css.rs ├── css ├── parser.rs └── types.rs ├── lib.rs ├── macros.rs ├── markup5ever_rcdom.rs ├── render ├── mod.rs └── text_renderer.rs └── tests.rs /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | 3 | orbs: 4 | win: circleci/windows@2.2.0 5 | 6 | jobs: 7 | build-stable: 8 | docker: 9 | - image: cimg/rust:1.80.1 10 | steps: 11 | - checkout 12 | - run: cargo --version 13 | - run: cargo build 14 | - run: cargo test 15 | - run: 16 | name: Install tools 17 | command: | 18 | rustup component add rustfmt clippy 19 | - run: 20 | name: Check formatting 21 | command: | 22 | cargo fmt --all -- --check --color=auto 23 | - run: 24 | name: Clippy 25 | command: | 26 | cargo clippy 27 | build-css: 28 | docker: 29 | - image: cimg/rust:1.80.1 30 | steps: 31 | - checkout 32 | - run: cargo --version 33 | - run: cargo build --features=css 34 | - run: cargo test --features=css 35 | build-1-72: 36 | docker: 37 | - image: cimg/rust:1.72 38 | steps: 39 | - checkout 40 | - run: cargo --version 41 | - run: cargo build --features=css 42 | - run: cargo test --features=css 43 | build-windows: 44 | executor: 45 | name: win/default 46 | size: medium 47 | shell: bash.exe 48 | environment: 49 | PATHk 50 | steps: 51 | - checkout 52 | - run: 53 | name: Install Rust 54 | command: | 55 | curl https://static.rust-lang.org/rustup/dist/x86_64-pc-windows-msvc/rustup-init.exe --output rustup-init.exe 56 | ./rustup-init.exe -y 57 | - run: 58 | name: Update PATH and cargo config 59 | command: | 60 | echo "[net]" >> $USERPROFILE/.cargo/config 61 | echo "git-fetch-with-cli = true" >> $USERPROFILE/.cargo/config 62 | echo 'export PATH=$USERPROFILE/.cargo/bin:$PATH' >> $BASH_ENV 63 | - run: 64 | name: Build 65 | command: | 66 | cargo build 67 | - run: 68 | name: Tests 69 | command: | 70 | cargo test 71 | 72 | workflows: 73 | version: 2 74 | build: 75 | jobs: 76 | - "build-stable" 77 | - "build-css" 78 | - "build-1-72" 79 | - "build-windows" 80 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "cargo" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | day: "friday" 8 | rebase-strategy: "disabled" 9 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - main 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | test-action: 14 | name: Check semver compatibility 15 | runs-on: ubuntu-latest 16 | steps: 17 | - name: Checkout sources 18 | uses: actions/checkout@v2 19 | 20 | - name: Install stable toolchain 21 | uses: actions-rs/toolchain@v1 22 | with: 23 | toolchain: stable 24 | profile: minimal 25 | override: true 26 | 27 | - name: Check semver 28 | uses: obi1kenobi/cargo-semver-checks-action@v2 29 | with: 30 | version-tag-prefix: '' 31 | -------------------------------------------------------------------------------- /.github/workflows/jekyll-gh-pages.yml: -------------------------------------------------------------------------------- 1 | # Sample workflow for building and deploying a Jekyll site to GitHub Pages 2 | name: Build and deploy demo site 3 | 4 | on: 5 | # Allows you to run this workflow manually from the Actions tab 6 | workflow_dispatch: 7 | 8 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 9 | permissions: 10 | contents: read 11 | pages: write 12 | id-token: write 13 | 14 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. 15 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. 16 | concurrency: 17 | group: "pages" 18 | cancel-in-progress: false 19 | 20 | jobs: 21 | # Build job: 22 | build: 23 | runs-on: ubuntu-latest 24 | steps: 25 | - name: Checkout 26 | uses: actions/checkout@v4 27 | - name: Setup Pages 28 | uses: actions/configure-pages@v5 29 | - name: Install trunk 30 | run: cargo install trunk --version=0.21.13 31 | - name: Install WASM rust target 32 | run: rustup target add wasm32-unknown-unknown 33 | - name: Build WASM module 34 | run: trunk build 35 | working-directory: ./html2text-web-demo 36 | - name: Copy WASM assets 37 | run: cp html2text-web-demo/dist/html2text-web-demo{.js,_bg.wasm} ./pages/assets/ 38 | - name: Build with Jekyll 39 | uses: actions/jekyll-build-pages@v1 40 | with: 41 | source: ./pages 42 | destination: ./_site 43 | - name: Upload artifact 44 | uses: actions/upload-pages-artifact@v3 45 | 46 | # Deployment job 47 | deploy: 48 | environment: 49 | name: github-pages 50 | url: ${{ steps.deployment.outputs.page_url }} 51 | runs-on: ubuntu-latest 52 | needs: build 53 | steps: 54 | - name: Deploy to GitHub Pages 55 | id: deployment 56 | uses: actions/deploy-pages@v4 57 | 58 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | Possible log types: 4 | 5 | - `[added]` for new features. 6 | - `[changed]` for changes in existing functionality. 7 | - `[deprecated]` for once-stable features removed in upcoming releases. 8 | - `[removed]` for deprecated features removed in this release. 9 | - `[fixed]` for any bug fixes. 10 | - `[security]` to invite users to upgrade in case of vulnerabilities. 11 | 12 | ### Latest 13 | 14 | ### 0.15.1 15 | 16 | - [added] CSS: Support basic attribute selectors (`div[attr="bar"]`). 17 | - [changed] Various improvements to syntax highlighting: 18 | - It uses the priority of the `x-syntax` rule. 19 | - Now supported on non-`
` elements.
 20 |   - No longer strips contained tags when highlighting
 21 |   - Compatible with `display: x-raw-dom` extension (e.g. to colour the HTML)
 22 | - [fixed] With `pad_block_width` enabled, do a better job of padding blocks.
 23 |   In particular, the padding gets the block's background colour (when CSS etc.
 24 |   are being used).
 25 | 
 26 | ### 0.15.0
 27 | 
 28 | - [added] Syntax highlighting support for `
` blocks
 29 |   (`Config::register_highlighter` and CSS `x-syntax: foo`)
 30 | - [changed] CSS extensions (until now `display: x-raw-dom`, and only if the
 31 |   `css_ext` Cargo feature is enabled) are now only available in agent and user CSS.
 32 |   This is a breaking change, but is not likely to affect many users.
 33 | 
 34 | ### 0.14.4
 35 | 
 36 | - [added] `RcDom::serialize`, and expose a few more of the `RcDom` types.
 37 | - [added] Online [demo page](https://jugglerchris.github.io/rust-html2text/)
 38 | 
 39 | ### 0.14.3
 40 | 
 41 | - [changed] Updated dependencies, including html5ever 0.31.
 42 | 
 43 | ### 0.14.2
 44 | 
 45 | - [fixed] An issue with multiple verions of markup5ever being included.
 46 |   (thanks anna-hope)
 47 | 
 48 | ### 0.14.1
 49 | 
 50 | - [fixed] An issue with `FragmentStart`s being lost (thanks toiletbril)
 51 | - [fixed] An infinite loop if tabs inside `
` wrapped past the width
 52 |   (thanks nshp)
 53 | 
 54 | ### 0.14.0
 55 | 
 56 | - [changed] Various small refactors (thanks sftse)
 57 | - [changed] `Config::rich()` no longer includes decorations around `` etc. - 
 58 |   use `Config::rich().do_decorate()` to get the old behaviour.
 59 | - [fixed] Remove unnecessary empty lines at the start of lists (thanks russellbanks)
 60 | - [added] New CSS support: `::before`/`::after` and `content: "string"`, which is now
 61 |   used for simple decorations.  With CSS enabled, this allows for customising
 62 |   the display of `foo` without writing a decorator.
 63 | - [added] Add support for `
` and `
` (thanks noahbaculi) 64 | - [changed] Link footnotes are now configurable independently of the decorator, and on 65 | by default for `config::plain()` but can be enabled or disabled with 66 | `config.link_footnotes(true/false)`. The footnote references (e.g. `[1]`) are added 67 | in the main renderer, and the actual footnotes are written in a default implementation 68 | of `TextDecorator::finalise()` so can be customised. 69 | 70 | ### 0.13.6 71 | 72 | - [fixed] Fixed issue parsing CSS rules with known rules but unknown values, 73 | which caused parsing to stop instead of just skipping the unkown rule. 74 | 75 | ### 0.13.5 76 | 77 | - [added] CSS support for `:nth-child()` (not yet with the `of foo`). 78 | - [added] Non-standard `display: x-raw-dom` for debugging (with `css_ext` 79 | feature flag). 80 | - [fixed] An issue which could (apparently rarely) miss out some output depending on wrapping 81 | - [fixed] CSS parsing stopped when it hit an at-rule. 82 | - [added] Add `--show-css` option to `html2text` example for debugging what rules were parsed. 83 | - [added] Add poor-man's inspect mode to `html2term` - `I` to enable/disable, and arrows to navigate 84 | around the DOM. Implemented using `:nth-child` and `x-raw-dom`. 85 | 86 | ### 0.13.4 87 | 88 | - [fixed] Fix a debug assertion from a double-counted length increment 89 | (thanks JadedBlueeyes). 90 | 91 | ### 0.13.3 92 | 93 | - [fixed] Handle some obsolete `bgcolor=...` attributes. 94 | - [added] html2text example has `--show-render` to help debugging render issues. 95 | - [changed] Some error handling and other tidyups (thanks sftse) 96 | 97 | ### 0.13.2 98 | 99 | - [fixed] Fixed errors when building with Rust 1.72. 100 | 101 | ### 0.13.1 102 | 103 | - [added] html2text now has --show-dom 104 | - [fixed] Support background CSS property (for colour) 105 | - [fixed] Some edge cases with CSS styles on whitespace 106 | 107 | ### 0.13.0 108 | 109 | - [added] Support CSS white-space: pre-wrap (and normal, pre). 110 | 111 | ### 0.13.0-alpha.2 112 | 113 | - [changed] Updated html5ever and markup5ever crate versions. This has meant 114 | updating the MSRV, which is now set to 1.72. 115 | - [fixed] Add `Config::no_link_wrapping()` (thanks JadeBlueEyes) 116 | - [fixed] Fix panic with empty table inside a list (thanks sftse) 117 | - [changed] Top level convenience functions (`from_read` etc.) now return 118 | `Result<..>` instead of panicking (thanks sftse) 119 | - [fixed] Fix panic with very large HTML `colspan` (thanks pycui) 120 | - [changed] CSS updates: 121 | - Separate user agent, author, and user CSS layers 122 | - Improve the style precedence between layers and implement specificity. 123 | 124 | ### 0.13.0-alpha.1 125 | 126 | - [fixed] Table rows with colours would disappear. (thanks tkapias) 127 | 128 | ### 0.13.0-alpha.0 129 | 130 | - [changed] Replaced LightningCSS with a smaller CSS parser. There is a chance 131 | that some CSS edge cases which no longer work; if so this would be a bug. 132 | - [removed] Some previously `pub` items and methods which are either internal 133 | implementation details or considered redundant have been removed or made 134 | private (thanks sftse). Please open an issue for anything removed that was 135 | being used. 136 | 137 | Of note, `RenderTree::render_plain()` and `RenderTree::render_rich()` have 138 | been removed. Replace code like: 139 | 140 | ```rust 141 | let text = html2text::parse(html)? 142 | .render_plain(80)? 143 | .into_string()?; 144 | ``` 145 | with: 146 | ```rust 147 | let text = html2text::config::plain() 148 | .render_to_string(html2text::parse(html)?)? 149 | ``` 150 | - [changed] Some names moved out of `text_renderer` module, so some `use` statements 151 | may need updating. 152 | - [changed] Replace some `unwrap()` with improved patterns (thanks sftse). 153 | - [changed] Updated some dependencies 154 | 155 | ### 0.12.5 156 | 157 | - [changed] Updated some dependencies 158 | - [added] The `html2text` example now has `--ignore-css-colour`, which ignores CSS 159 | colour information but still uses `display: none`, for example. 160 | - [added] The `html2text` example now has `--only-css` option, to not use 161 | default colours when CSS colours are being used. 162 | - [fixed] Make the dummy `dashmap` depenency optional so it's not included 163 | unnecessarily when CSS isn't enabled (thanks xmakro) 164 | 165 | ### 0.12.4 166 | 167 | - [changed] Update the previous `max-height: 0` to also look at `height: 0` and require 168 | `overflow: hidden` as well. 169 | This helps with a hack some e-mail senders use for e-mail previews. (thanks tkapias) 170 | 171 | ### 0.12.3 172 | 173 | - [changed] Treat `max-height: 0` as if it's `display: none` when CSS is enabled. 174 | This helps with a hack some e-mail senders use for e-mail previews. (thanks tkapias) 175 | 176 | ### 0.12.2 177 | 178 | - [changed] Bump version of lightningcss dependency to fix build failures. 179 | 180 | ### 0.12.1 181 | 182 | - [fixed] Fix a case where Err(TooNarrow) was returned unnecessarily. (thanks sftse) 183 | - [added] Add new rendering options `Config::raw_mode()` and 184 | `Config::no_table_borders()` (thanks sftse) 185 | - [changed] Formatting, clippy and other tidy-ups (thanks sftse) 186 | - [changed] Cargo fmt now enforced in CI 187 | 188 | ### 0.12.0 189 | 190 | - [changed] Updated termion dev-dependency 191 | - [added] Support `` HTML elements 192 | - [added] Export `RcDom` publically. It was already returned by a pub function. 193 | - [added] Update handling of width overflow: 194 | With `Config::allow_width_overflow()`, prefer returning output wider 195 | than requested, instead of returning `Err(TooNarrow)`. 196 | `Config::min_wrap_width()` sets the minimum text wrap width (default 197 | 3). The minimum width (before overflow or `TooNarrow`) is now 198 | handled more cleanly. 199 | - [added] CSS: use color/bgcolor attributes on elements. 200 | 201 | ### 0.11.0 202 | 203 | - [fixed] CSS: rules marked !important were ignored. 204 | - [changed] html\_trace feature now uses the `log` crate. 205 | - [changed] Bumped MSRV to 1.63 (matching Debian stable) due to some dependencies. 206 | 207 | ### 0.10.3 208 | 209 | - [fixed] A panic on some unlucky text wrapping coincidences. 210 | - [fixed] Use dep:backtrace in Cargo.toml to avoid implicit feature. 211 | 212 | ### 0.10.2 213 | 214 | - [fixed] CSS: Ignore transparent colours. 215 | 216 | ### 0.10.1 217 | 218 | - [fixed] `max_width` was not working with some render methods. 219 | 220 | ### 0.10.0 221 | 222 | - [added] Simple support for ``, ``, and `` (thanks sgtatham) 223 | - [added] Added background-color support 224 | - [fixed] CSS support didn't work in some places, such as `` elements. 225 | - [added] Add support for `style` attributes. 226 | - [added] Styles apply to table borders 227 | - [changed] Update some dependencies 228 | - [fixed] Fix a few places which caused excess blank lines or empty tables 229 | 230 | ### 0.9.4 231 | 232 | - [changed] Updated the termion dev-dependency to 2.0. 233 | 234 | ### 0.9.3 235 | 236 | - [changed] Added cargo categories and update to 2021 edition. 237 | 238 | ### 0.9.2 239 | 240 | - [fixed] CSS didn't work inside `
    ` or `
      `. 241 | - [added] Add methods to get and use the intermediate HTML DOM and RenderTree 242 | from Config. 243 | - [fixed] Removed some clones which are no longer necessary now that Box 244 | works. 245 | 246 | ### 0.9.1 247 | 248 | - [fixed] Various documentation issues (thanks sgtatham) 249 | - [changed] CSS color rules now work for elements other than span. 250 | 251 | ### 0.9.0 252 | 253 | - [changed] `Config::add_css` now returns `Result` instead of panicking on 254 | CSS parse errors. Errors from parsing document CSS are ignored. 255 | - [added] Support `` when CSS is enabled. 256 | - [added] `Config::max_wrap_width()` to wrap text to a norrower width than 257 | the overal size available. 258 | - [added] Add --wrap-width and --css options to html2text example. 259 | 260 | ### 0.8.0 261 | 262 | - [added] CSS: Support more extensive selectors 263 | - [changed] CSS handling defaults to off; use `Config::use_doc_css()` 264 | or `Config::add_css` to use CSS. 265 | 266 | ### 0.7.1 267 | 268 | - [added] Now recognised CSS `display:none` 269 | - [added] Can now add extra CSS rules via `Config::add_css`. 270 | - [changed] StyleData::coloured is no longer public. 271 | 272 | ### 0.7.0 273 | 274 | - [changed] Remove some noisy stderr output when encoutering control chars 275 | (thanks sftse) 276 | - [added] A builder-based config API. 277 | - [changed] Updated MSRV to 1.60 278 | - [fixed] Fixed #88: panic when a width of zero passed in (thanks bingen13) 279 | - [fixed] Fixed #90: Fixed a divide-by-zero panic with colspan=0 (thanks mtorromeo) 280 | - [added] Add very basic CSS colour support (under the css feature flag) 281 | - [changed] Removed ansi\_colours feature (from\_read\_coloured is always available) 282 | - [changed] Overhauled error handling. Internally (and in the lower level 283 | API) errors (mainly "TooNarrow") are passed around with `Result`. Fixed 284 | some panics and infinite loops. (Thanks WIZeaz for fuzzing) 285 | 286 | ### 0.6.0 287 | 288 | - [changed] Improve layout of tables thanks to sftse: 289 | - Table column size estimates have been improved when the source HTML has a lot 290 | of unnecessary whitespace. 291 | - Move the URL footnotes out to the top level, also improving layout of tables 292 | containing links. 293 | - [changed] Some APIs have slightly changed as part of the table improvements, 294 | though most users should not be affeted. 295 | 296 | ### 0.5.1 297 | 298 | - [fixed] Some tables were rendered too wide. 299 | 300 | ### 0.5.0 301 | 302 | - [changed] Rich Image annotations now include the src attirbute (thanks spencerwi). 303 | 304 | ### 0.4.5 305 | 306 | - [fixed] Preserve empty lines in pre blocks (thanks kpagacz). 307 | 308 | ### 0.4.4 309 | 310 | - [fixed] Fix some panics when enumerated lists are in tables (thanks sfts). 311 | - [fixed] Impove table size estimation to include links. 312 | 313 | ### 0.4.3 314 | 315 | - [changed] MSRV is now 1.56. 316 | - [fixed] Fix some panics when very large widths are used with tables. 317 | 318 | ### 0.4.2 319 | 320 | - [changed] Moved the rcdom module directly into src/ 321 | 322 | ### 0.4.1 (unpublished) 323 | 324 | - [changed] rcdom now vendored as a module. 325 | 326 | ### 0.4.0 (unpublished) 327 | 328 | - [changed] Update html5ever to v0.26. 329 | - [changed] MSRV is now 1.49. 330 | 331 | ### 0.3.1 332 | 333 | - [changed] Update the build badges to reflect the updated CI configuration. 334 | 335 | ### 0.3.0 336 | 337 | - [added] New experimental `from_read_coloured()` (under `ansi_colours` feature). 338 | - [added] Add `into_tagged_strings` and `tagged_strings` methods to `TaggedLine` 339 | (thanks Robin Krahl) 340 | - [added] Add `width` method to `TaggedString` (thanks Robin Krahl) 341 | - [changed] Keep annotations in `TextRenderer::into_lines` (thanks Robin Krahl) 342 | - [fixed] Add colon to reference style link (thanks zakaluka) 343 | - [added] Allow text decorators to customise block prefix strings (thanks SardineFish) 344 | - [fixed] Fixed some problems rendering some complicated tables, including a panic 345 | and near-infinite loops. 346 | - [changed] Tables which are too wide to possibly render in the given width are now 347 | arranged vertically instead (with `///`) lines. 348 | - [changed] A number of small table rendering improvements. 349 | - [changed] MSRV is now 1.41. 350 | 351 | ### 0.2.1 352 | 353 | - [added] New entry points - split HTML parsing from rendering the output, 354 | thanks Robin Krahl. 355 | - [fixed] Decorators weren't being used for preformatted text. 356 | 357 | ### 0.2.0 358 | 359 | - [added] Support `` strikeout text. 360 | 361 | ### 0.1.14 (2020-08-07) 362 | 363 | - [fixed] A table with an `id` attribute on `` would be hidden. 364 | 365 | ### 0.1.13 (2020-07-21) 366 | 367 | - [changed] Run cargo fmt (thanks crunchyjesus) 368 | - [added] CHANGELOG.md 369 | - [fixed] Some text near a fragment start (`id="foo"` attribute) could be 370 | lost if it needed breaking across lines. 371 | - [added] Experimentally add dependabot configuration. 372 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "addr2line" 7 | version = "0.24.2" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" 10 | dependencies = [ 11 | "gimli", 12 | ] 13 | 14 | [[package]] 15 | name = "adler2" 16 | version = "2.0.0" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" 19 | 20 | [[package]] 21 | name = "aho-corasick" 22 | version = "1.1.3" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 25 | dependencies = [ 26 | "memchr", 27 | ] 28 | 29 | [[package]] 30 | name = "anstream" 31 | version = "0.6.18" 32 | source = "registry+https://github.com/rust-lang/crates.io-index" 33 | checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" 34 | dependencies = [ 35 | "anstyle", 36 | "anstyle-parse", 37 | "anstyle-query", 38 | "anstyle-wincon", 39 | "colorchoice", 40 | "is_terminal_polyfill", 41 | "utf8parse", 42 | ] 43 | 44 | [[package]] 45 | name = "anstyle" 46 | version = "1.0.10" 47 | source = "registry+https://github.com/rust-lang/crates.io-index" 48 | checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" 49 | 50 | [[package]] 51 | name = "anstyle-parse" 52 | version = "0.2.6" 53 | source = "registry+https://github.com/rust-lang/crates.io-index" 54 | checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" 55 | dependencies = [ 56 | "utf8parse", 57 | ] 58 | 59 | [[package]] 60 | name = "anstyle-query" 61 | version = "1.1.2" 62 | source = "registry+https://github.com/rust-lang/crates.io-index" 63 | checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" 64 | dependencies = [ 65 | "windows-sys", 66 | ] 67 | 68 | [[package]] 69 | name = "anstyle-wincon" 70 | version = "3.0.8" 71 | source = "registry+https://github.com/rust-lang/crates.io-index" 72 | checksum = "6680de5231bd6ee4c6191b8a1325daa282b415391ec9d3a37bd34f2060dc73fa" 73 | dependencies = [ 74 | "anstyle", 75 | "once_cell_polyfill", 76 | "windows-sys", 77 | ] 78 | 79 | [[package]] 80 | name = "argparse" 81 | version = "0.2.2" 82 | source = "registry+https://github.com/rust-lang/crates.io-index" 83 | checksum = "3f8ebf5827e4ac4fd5946560e6a99776ea73b596d80898f357007317a7141e47" 84 | 85 | [[package]] 86 | name = "autocfg" 87 | version = "1.4.0" 88 | source = "registry+https://github.com/rust-lang/crates.io-index" 89 | checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" 90 | 91 | [[package]] 92 | name = "backtrace" 93 | version = "0.3.75" 94 | source = "registry+https://github.com/rust-lang/crates.io-index" 95 | checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" 96 | dependencies = [ 97 | "addr2line", 98 | "cfg-if", 99 | "libc", 100 | "miniz_oxide", 101 | "object", 102 | "rustc-demangle", 103 | "windows-targets", 104 | ] 105 | 106 | [[package]] 107 | name = "base64" 108 | version = "0.22.1" 109 | source = "registry+https://github.com/rust-lang/crates.io-index" 110 | checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" 111 | 112 | [[package]] 113 | name = "bincode" 114 | version = "1.3.3" 115 | source = "registry+https://github.com/rust-lang/crates.io-index" 116 | checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" 117 | dependencies = [ 118 | "serde", 119 | ] 120 | 121 | [[package]] 122 | name = "bitflags" 123 | version = "1.3.2" 124 | source = "registry+https://github.com/rust-lang/crates.io-index" 125 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 126 | 127 | [[package]] 128 | name = "bitflags" 129 | version = "2.9.1" 130 | source = "registry+https://github.com/rust-lang/crates.io-index" 131 | checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" 132 | 133 | [[package]] 134 | name = "cc" 135 | version = "1.2.25" 136 | source = "registry+https://github.com/rust-lang/crates.io-index" 137 | checksum = "d0fc897dc1e865cc67c0e05a836d9d3f1df3cbe442aa4a9473b18e12624a4951" 138 | dependencies = [ 139 | "shlex", 140 | ] 141 | 142 | [[package]] 143 | name = "cfg-if" 144 | version = "1.0.0" 145 | source = "registry+https://github.com/rust-lang/crates.io-index" 146 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 147 | 148 | [[package]] 149 | name = "colorchoice" 150 | version = "1.0.3" 151 | source = "registry+https://github.com/rust-lang/crates.io-index" 152 | checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" 153 | 154 | [[package]] 155 | name = "crc32fast" 156 | version = "1.4.2" 157 | source = "registry+https://github.com/rust-lang/crates.io-index" 158 | checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" 159 | dependencies = [ 160 | "cfg-if", 161 | ] 162 | 163 | [[package]] 164 | name = "deranged" 165 | version = "0.4.0" 166 | source = "registry+https://github.com/rust-lang/crates.io-index" 167 | checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e" 168 | dependencies = [ 169 | "powerfmt", 170 | ] 171 | 172 | [[package]] 173 | name = "env_filter" 174 | version = "0.1.3" 175 | source = "registry+https://github.com/rust-lang/crates.io-index" 176 | checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" 177 | dependencies = [ 178 | "log", 179 | "regex", 180 | ] 181 | 182 | [[package]] 183 | name = "env_logger" 184 | version = "0.11.8" 185 | source = "registry+https://github.com/rust-lang/crates.io-index" 186 | checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" 187 | dependencies = [ 188 | "anstream", 189 | "anstyle", 190 | "env_filter", 191 | "jiff", 192 | "log", 193 | ] 194 | 195 | [[package]] 196 | name = "equivalent" 197 | version = "1.0.2" 198 | source = "registry+https://github.com/rust-lang/crates.io-index" 199 | checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" 200 | 201 | [[package]] 202 | name = "flate2" 203 | version = "1.1.1" 204 | source = "registry+https://github.com/rust-lang/crates.io-index" 205 | checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece" 206 | dependencies = [ 207 | "crc32fast", 208 | "miniz_oxide", 209 | ] 210 | 211 | [[package]] 212 | name = "fnv" 213 | version = "1.0.7" 214 | source = "registry+https://github.com/rust-lang/crates.io-index" 215 | checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" 216 | 217 | [[package]] 218 | name = "futf" 219 | version = "0.1.5" 220 | source = "registry+https://github.com/rust-lang/crates.io-index" 221 | checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" 222 | dependencies = [ 223 | "mac", 224 | "new_debug_unreachable", 225 | ] 226 | 227 | [[package]] 228 | name = "gimli" 229 | version = "0.31.1" 230 | source = "registry+https://github.com/rust-lang/crates.io-index" 231 | checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" 232 | 233 | [[package]] 234 | name = "hashbrown" 235 | version = "0.15.3" 236 | source = "registry+https://github.com/rust-lang/crates.io-index" 237 | checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" 238 | 239 | [[package]] 240 | name = "html2text" 241 | version = "0.15.1" 242 | dependencies = [ 243 | "argparse", 244 | "backtrace", 245 | "env_logger", 246 | "html5ever", 247 | "log", 248 | "nom", 249 | "syntect", 250 | "tendril", 251 | "termion", 252 | "thiserror 2.0.12", 253 | "unicode-width", 254 | ] 255 | 256 | [[package]] 257 | name = "html5ever" 258 | version = "0.31.0" 259 | source = "registry+https://github.com/rust-lang/crates.io-index" 260 | checksum = "953cbbe631aae7fc0a112702ad5d3aaf09da38beaf45ea84610d6e1c358f569c" 261 | dependencies = [ 262 | "log", 263 | "mac", 264 | "markup5ever", 265 | "match_token", 266 | ] 267 | 268 | [[package]] 269 | name = "indexmap" 270 | version = "2.9.0" 271 | source = "registry+https://github.com/rust-lang/crates.io-index" 272 | checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" 273 | dependencies = [ 274 | "equivalent", 275 | "hashbrown", 276 | ] 277 | 278 | [[package]] 279 | name = "is_terminal_polyfill" 280 | version = "1.70.1" 281 | source = "registry+https://github.com/rust-lang/crates.io-index" 282 | checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" 283 | 284 | [[package]] 285 | name = "itoa" 286 | version = "1.0.15" 287 | source = "registry+https://github.com/rust-lang/crates.io-index" 288 | checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" 289 | 290 | [[package]] 291 | name = "jiff" 292 | version = "0.2.14" 293 | source = "registry+https://github.com/rust-lang/crates.io-index" 294 | checksum = "a194df1107f33c79f4f93d02c80798520551949d59dfad22b6157048a88cca93" 295 | dependencies = [ 296 | "jiff-static", 297 | "log", 298 | "portable-atomic", 299 | "portable-atomic-util", 300 | "serde", 301 | ] 302 | 303 | [[package]] 304 | name = "jiff-static" 305 | version = "0.2.14" 306 | source = "registry+https://github.com/rust-lang/crates.io-index" 307 | checksum = "6c6e1db7ed32c6c71b759497fae34bf7933636f75a251b9e736555da426f6442" 308 | dependencies = [ 309 | "proc-macro2", 310 | "quote", 311 | "syn", 312 | ] 313 | 314 | [[package]] 315 | name = "libc" 316 | version = "0.2.172" 317 | source = "registry+https://github.com/rust-lang/crates.io-index" 318 | checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" 319 | 320 | [[package]] 321 | name = "libredox" 322 | version = "0.1.3" 323 | source = "registry+https://github.com/rust-lang/crates.io-index" 324 | checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" 325 | dependencies = [ 326 | "bitflags 2.9.1", 327 | "libc", 328 | "redox_syscall", 329 | ] 330 | 331 | [[package]] 332 | name = "linked-hash-map" 333 | version = "0.5.6" 334 | source = "registry+https://github.com/rust-lang/crates.io-index" 335 | checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" 336 | 337 | [[package]] 338 | name = "lock_api" 339 | version = "0.4.13" 340 | source = "registry+https://github.com/rust-lang/crates.io-index" 341 | checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" 342 | dependencies = [ 343 | "autocfg", 344 | "scopeguard", 345 | ] 346 | 347 | [[package]] 348 | name = "log" 349 | version = "0.4.27" 350 | source = "registry+https://github.com/rust-lang/crates.io-index" 351 | checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" 352 | 353 | [[package]] 354 | name = "mac" 355 | version = "0.1.1" 356 | source = "registry+https://github.com/rust-lang/crates.io-index" 357 | checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" 358 | 359 | [[package]] 360 | name = "markup5ever" 361 | version = "0.16.1" 362 | source = "registry+https://github.com/rust-lang/crates.io-index" 363 | checksum = "d0a8096766c229e8c88a3900c9b44b7e06aa7f7343cc229158c3e58ef8f9973a" 364 | dependencies = [ 365 | "log", 366 | "tendril", 367 | "web_atoms", 368 | ] 369 | 370 | [[package]] 371 | name = "match_token" 372 | version = "0.1.0" 373 | source = "registry+https://github.com/rust-lang/crates.io-index" 374 | checksum = "88a9689d8d44bf9964484516275f5cd4c9b59457a6940c1d5d0ecbb94510a36b" 375 | dependencies = [ 376 | "proc-macro2", 377 | "quote", 378 | "syn", 379 | ] 380 | 381 | [[package]] 382 | name = "memchr" 383 | version = "2.7.4" 384 | source = "registry+https://github.com/rust-lang/crates.io-index" 385 | checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" 386 | 387 | [[package]] 388 | name = "minimal-lexical" 389 | version = "0.2.1" 390 | source = "registry+https://github.com/rust-lang/crates.io-index" 391 | checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" 392 | 393 | [[package]] 394 | name = "miniz_oxide" 395 | version = "0.8.8" 396 | source = "registry+https://github.com/rust-lang/crates.io-index" 397 | checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a" 398 | dependencies = [ 399 | "adler2", 400 | ] 401 | 402 | [[package]] 403 | name = "new_debug_unreachable" 404 | version = "1.0.6" 405 | source = "registry+https://github.com/rust-lang/crates.io-index" 406 | checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" 407 | 408 | [[package]] 409 | name = "nom" 410 | version = "7.1.3" 411 | source = "registry+https://github.com/rust-lang/crates.io-index" 412 | checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" 413 | dependencies = [ 414 | "memchr", 415 | "minimal-lexical", 416 | ] 417 | 418 | [[package]] 419 | name = "num-conv" 420 | version = "0.1.0" 421 | source = "registry+https://github.com/rust-lang/crates.io-index" 422 | checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" 423 | 424 | [[package]] 425 | name = "numtoa" 426 | version = "0.2.4" 427 | source = "registry+https://github.com/rust-lang/crates.io-index" 428 | checksum = "6aa2c4e539b869820a2b82e1aef6ff40aa85e65decdd5185e83fb4b1249cd00f" 429 | 430 | [[package]] 431 | name = "object" 432 | version = "0.36.7" 433 | source = "registry+https://github.com/rust-lang/crates.io-index" 434 | checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" 435 | dependencies = [ 436 | "memchr", 437 | ] 438 | 439 | [[package]] 440 | name = "once_cell" 441 | version = "1.21.3" 442 | source = "registry+https://github.com/rust-lang/crates.io-index" 443 | checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" 444 | 445 | [[package]] 446 | name = "once_cell_polyfill" 447 | version = "1.70.1" 448 | source = "registry+https://github.com/rust-lang/crates.io-index" 449 | checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" 450 | 451 | [[package]] 452 | name = "onig" 453 | version = "6.5.1" 454 | source = "registry+https://github.com/rust-lang/crates.io-index" 455 | checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0" 456 | dependencies = [ 457 | "bitflags 2.9.1", 458 | "libc", 459 | "once_cell", 460 | "onig_sys", 461 | ] 462 | 463 | [[package]] 464 | name = "onig_sys" 465 | version = "69.9.1" 466 | source = "registry+https://github.com/rust-lang/crates.io-index" 467 | checksum = "c7f86c6eef3d6df15f23bcfb6af487cbd2fed4e5581d58d5bf1f5f8b7f6727dc" 468 | dependencies = [ 469 | "cc", 470 | "pkg-config", 471 | ] 472 | 473 | [[package]] 474 | name = "parking_lot" 475 | version = "0.12.4" 476 | source = "registry+https://github.com/rust-lang/crates.io-index" 477 | checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" 478 | dependencies = [ 479 | "lock_api", 480 | "parking_lot_core", 481 | ] 482 | 483 | [[package]] 484 | name = "parking_lot_core" 485 | version = "0.9.11" 486 | source = "registry+https://github.com/rust-lang/crates.io-index" 487 | checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" 488 | dependencies = [ 489 | "cfg-if", 490 | "libc", 491 | "redox_syscall", 492 | "smallvec", 493 | "windows-targets", 494 | ] 495 | 496 | [[package]] 497 | name = "phf" 498 | version = "0.11.3" 499 | source = "registry+https://github.com/rust-lang/crates.io-index" 500 | checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" 501 | dependencies = [ 502 | "phf_shared", 503 | ] 504 | 505 | [[package]] 506 | name = "phf_codegen" 507 | version = "0.11.3" 508 | source = "registry+https://github.com/rust-lang/crates.io-index" 509 | checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" 510 | dependencies = [ 511 | "phf_generator", 512 | "phf_shared", 513 | ] 514 | 515 | [[package]] 516 | name = "phf_generator" 517 | version = "0.11.3" 518 | source = "registry+https://github.com/rust-lang/crates.io-index" 519 | checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" 520 | dependencies = [ 521 | "phf_shared", 522 | "rand", 523 | ] 524 | 525 | [[package]] 526 | name = "phf_shared" 527 | version = "0.11.3" 528 | source = "registry+https://github.com/rust-lang/crates.io-index" 529 | checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" 530 | dependencies = [ 531 | "siphasher", 532 | ] 533 | 534 | [[package]] 535 | name = "pkg-config" 536 | version = "0.3.32" 537 | source = "registry+https://github.com/rust-lang/crates.io-index" 538 | checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" 539 | 540 | [[package]] 541 | name = "plist" 542 | version = "1.7.1" 543 | source = "registry+https://github.com/rust-lang/crates.io-index" 544 | checksum = "eac26e981c03a6e53e0aee43c113e3202f5581d5360dae7bd2c70e800dd0451d" 545 | dependencies = [ 546 | "base64", 547 | "indexmap", 548 | "quick-xml", 549 | "serde", 550 | "time", 551 | ] 552 | 553 | [[package]] 554 | name = "portable-atomic" 555 | version = "1.11.0" 556 | source = "registry+https://github.com/rust-lang/crates.io-index" 557 | checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" 558 | 559 | [[package]] 560 | name = "portable-atomic-util" 561 | version = "0.2.4" 562 | source = "registry+https://github.com/rust-lang/crates.io-index" 563 | checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" 564 | dependencies = [ 565 | "portable-atomic", 566 | ] 567 | 568 | [[package]] 569 | name = "powerfmt" 570 | version = "0.2.0" 571 | source = "registry+https://github.com/rust-lang/crates.io-index" 572 | checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" 573 | 574 | [[package]] 575 | name = "precomputed-hash" 576 | version = "0.1.1" 577 | source = "registry+https://github.com/rust-lang/crates.io-index" 578 | checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" 579 | 580 | [[package]] 581 | name = "proc-macro2" 582 | version = "1.0.95" 583 | source = "registry+https://github.com/rust-lang/crates.io-index" 584 | checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" 585 | dependencies = [ 586 | "unicode-ident", 587 | ] 588 | 589 | [[package]] 590 | name = "quick-xml" 591 | version = "0.32.0" 592 | source = "registry+https://github.com/rust-lang/crates.io-index" 593 | checksum = "1d3a6e5838b60e0e8fa7a43f22ade549a37d61f8bdbe636d0d7816191de969c2" 594 | dependencies = [ 595 | "memchr", 596 | ] 597 | 598 | [[package]] 599 | name = "quote" 600 | version = "1.0.40" 601 | source = "registry+https://github.com/rust-lang/crates.io-index" 602 | checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" 603 | dependencies = [ 604 | "proc-macro2", 605 | ] 606 | 607 | [[package]] 608 | name = "rand" 609 | version = "0.8.5" 610 | source = "registry+https://github.com/rust-lang/crates.io-index" 611 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 612 | dependencies = [ 613 | "rand_core", 614 | ] 615 | 616 | [[package]] 617 | name = "rand_core" 618 | version = "0.6.4" 619 | source = "registry+https://github.com/rust-lang/crates.io-index" 620 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 621 | 622 | [[package]] 623 | name = "redox_syscall" 624 | version = "0.5.12" 625 | source = "registry+https://github.com/rust-lang/crates.io-index" 626 | checksum = "928fca9cf2aa042393a8325b9ead81d2f0df4cb12e1e24cef072922ccd99c5af" 627 | dependencies = [ 628 | "bitflags 2.9.1", 629 | ] 630 | 631 | [[package]] 632 | name = "redox_termios" 633 | version = "0.1.3" 634 | source = "registry+https://github.com/rust-lang/crates.io-index" 635 | checksum = "20145670ba436b55d91fc92d25e71160fbfbdd57831631c8d7d36377a476f1cb" 636 | 637 | [[package]] 638 | name = "regex" 639 | version = "1.11.1" 640 | source = "registry+https://github.com/rust-lang/crates.io-index" 641 | checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" 642 | dependencies = [ 643 | "aho-corasick", 644 | "memchr", 645 | "regex-automata", 646 | "regex-syntax", 647 | ] 648 | 649 | [[package]] 650 | name = "regex-automata" 651 | version = "0.4.9" 652 | source = "registry+https://github.com/rust-lang/crates.io-index" 653 | checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" 654 | dependencies = [ 655 | "aho-corasick", 656 | "memchr", 657 | "regex-syntax", 658 | ] 659 | 660 | [[package]] 661 | name = "regex-syntax" 662 | version = "0.8.5" 663 | source = "registry+https://github.com/rust-lang/crates.io-index" 664 | checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" 665 | 666 | [[package]] 667 | name = "rustc-demangle" 668 | version = "0.1.24" 669 | source = "registry+https://github.com/rust-lang/crates.io-index" 670 | checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" 671 | 672 | [[package]] 673 | name = "ryu" 674 | version = "1.0.20" 675 | source = "registry+https://github.com/rust-lang/crates.io-index" 676 | checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" 677 | 678 | [[package]] 679 | name = "same-file" 680 | version = "1.0.6" 681 | source = "registry+https://github.com/rust-lang/crates.io-index" 682 | checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" 683 | dependencies = [ 684 | "winapi-util", 685 | ] 686 | 687 | [[package]] 688 | name = "scopeguard" 689 | version = "1.2.0" 690 | source = "registry+https://github.com/rust-lang/crates.io-index" 691 | checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" 692 | 693 | [[package]] 694 | name = "serde" 695 | version = "1.0.219" 696 | source = "registry+https://github.com/rust-lang/crates.io-index" 697 | checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" 698 | dependencies = [ 699 | "serde_derive", 700 | ] 701 | 702 | [[package]] 703 | name = "serde_derive" 704 | version = "1.0.219" 705 | source = "registry+https://github.com/rust-lang/crates.io-index" 706 | checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" 707 | dependencies = [ 708 | "proc-macro2", 709 | "quote", 710 | "syn", 711 | ] 712 | 713 | [[package]] 714 | name = "serde_json" 715 | version = "1.0.140" 716 | source = "registry+https://github.com/rust-lang/crates.io-index" 717 | checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" 718 | dependencies = [ 719 | "itoa", 720 | "memchr", 721 | "ryu", 722 | "serde", 723 | ] 724 | 725 | [[package]] 726 | name = "shlex" 727 | version = "1.3.0" 728 | source = "registry+https://github.com/rust-lang/crates.io-index" 729 | checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" 730 | 731 | [[package]] 732 | name = "siphasher" 733 | version = "1.0.1" 734 | source = "registry+https://github.com/rust-lang/crates.io-index" 735 | checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" 736 | 737 | [[package]] 738 | name = "smallvec" 739 | version = "1.15.0" 740 | source = "registry+https://github.com/rust-lang/crates.io-index" 741 | checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9" 742 | 743 | [[package]] 744 | name = "string_cache" 745 | version = "0.8.9" 746 | source = "registry+https://github.com/rust-lang/crates.io-index" 747 | checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f" 748 | dependencies = [ 749 | "new_debug_unreachable", 750 | "parking_lot", 751 | "phf_shared", 752 | "precomputed-hash", 753 | "serde", 754 | ] 755 | 756 | [[package]] 757 | name = "string_cache_codegen" 758 | version = "0.5.4" 759 | source = "registry+https://github.com/rust-lang/crates.io-index" 760 | checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0" 761 | dependencies = [ 762 | "phf_generator", 763 | "phf_shared", 764 | "proc-macro2", 765 | "quote", 766 | ] 767 | 768 | [[package]] 769 | name = "syn" 770 | version = "2.0.101" 771 | source = "registry+https://github.com/rust-lang/crates.io-index" 772 | checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" 773 | dependencies = [ 774 | "proc-macro2", 775 | "quote", 776 | "unicode-ident", 777 | ] 778 | 779 | [[package]] 780 | name = "syntect" 781 | version = "5.2.0" 782 | source = "registry+https://github.com/rust-lang/crates.io-index" 783 | checksum = "874dcfa363995604333cf947ae9f751ca3af4522c60886774c4963943b4746b1" 784 | dependencies = [ 785 | "bincode", 786 | "bitflags 1.3.2", 787 | "flate2", 788 | "fnv", 789 | "once_cell", 790 | "onig", 791 | "plist", 792 | "regex-syntax", 793 | "serde", 794 | "serde_derive", 795 | "serde_json", 796 | "thiserror 1.0.69", 797 | "walkdir", 798 | "yaml-rust", 799 | ] 800 | 801 | [[package]] 802 | name = "tendril" 803 | version = "0.4.3" 804 | source = "registry+https://github.com/rust-lang/crates.io-index" 805 | checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" 806 | dependencies = [ 807 | "futf", 808 | "mac", 809 | "utf-8", 810 | ] 811 | 812 | [[package]] 813 | name = "termion" 814 | version = "4.0.5" 815 | source = "registry+https://github.com/rust-lang/crates.io-index" 816 | checksum = "3669a69de26799d6321a5aa713f55f7e2cd37bd47be044b50f2acafc42c122bb" 817 | dependencies = [ 818 | "libc", 819 | "libredox", 820 | "numtoa", 821 | "redox_termios", 822 | ] 823 | 824 | [[package]] 825 | name = "thiserror" 826 | version = "1.0.69" 827 | source = "registry+https://github.com/rust-lang/crates.io-index" 828 | checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" 829 | dependencies = [ 830 | "thiserror-impl 1.0.69", 831 | ] 832 | 833 | [[package]] 834 | name = "thiserror" 835 | version = "2.0.12" 836 | source = "registry+https://github.com/rust-lang/crates.io-index" 837 | checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" 838 | dependencies = [ 839 | "thiserror-impl 2.0.12", 840 | ] 841 | 842 | [[package]] 843 | name = "thiserror-impl" 844 | version = "1.0.69" 845 | source = "registry+https://github.com/rust-lang/crates.io-index" 846 | checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" 847 | dependencies = [ 848 | "proc-macro2", 849 | "quote", 850 | "syn", 851 | ] 852 | 853 | [[package]] 854 | name = "thiserror-impl" 855 | version = "2.0.12" 856 | source = "registry+https://github.com/rust-lang/crates.io-index" 857 | checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" 858 | dependencies = [ 859 | "proc-macro2", 860 | "quote", 861 | "syn", 862 | ] 863 | 864 | [[package]] 865 | name = "time" 866 | version = "0.3.41" 867 | source = "registry+https://github.com/rust-lang/crates.io-index" 868 | checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40" 869 | dependencies = [ 870 | "deranged", 871 | "itoa", 872 | "num-conv", 873 | "powerfmt", 874 | "serde", 875 | "time-core", 876 | "time-macros", 877 | ] 878 | 879 | [[package]] 880 | name = "time-core" 881 | version = "0.1.4" 882 | source = "registry+https://github.com/rust-lang/crates.io-index" 883 | checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c" 884 | 885 | [[package]] 886 | name = "time-macros" 887 | version = "0.2.22" 888 | source = "registry+https://github.com/rust-lang/crates.io-index" 889 | checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49" 890 | dependencies = [ 891 | "num-conv", 892 | "time-core", 893 | ] 894 | 895 | [[package]] 896 | name = "unicode-ident" 897 | version = "1.0.18" 898 | source = "registry+https://github.com/rust-lang/crates.io-index" 899 | checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" 900 | 901 | [[package]] 902 | name = "unicode-width" 903 | version = "0.2.0" 904 | source = "registry+https://github.com/rust-lang/crates.io-index" 905 | checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" 906 | 907 | [[package]] 908 | name = "utf-8" 909 | version = "0.7.6" 910 | source = "registry+https://github.com/rust-lang/crates.io-index" 911 | checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" 912 | 913 | [[package]] 914 | name = "utf8parse" 915 | version = "0.2.2" 916 | source = "registry+https://github.com/rust-lang/crates.io-index" 917 | checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" 918 | 919 | [[package]] 920 | name = "walkdir" 921 | version = "2.5.0" 922 | source = "registry+https://github.com/rust-lang/crates.io-index" 923 | checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" 924 | dependencies = [ 925 | "same-file", 926 | "winapi-util", 927 | ] 928 | 929 | [[package]] 930 | name = "web_atoms" 931 | version = "0.1.2" 932 | source = "registry+https://github.com/rust-lang/crates.io-index" 933 | checksum = "0b9c5f0bc545ea3b20b423e33b9b457764de0b3730cd957f6c6aa6c301785f6e" 934 | dependencies = [ 935 | "phf", 936 | "phf_codegen", 937 | "string_cache", 938 | "string_cache_codegen", 939 | ] 940 | 941 | [[package]] 942 | name = "winapi-util" 943 | version = "0.1.9" 944 | source = "registry+https://github.com/rust-lang/crates.io-index" 945 | checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" 946 | dependencies = [ 947 | "windows-sys", 948 | ] 949 | 950 | [[package]] 951 | name = "windows-sys" 952 | version = "0.59.0" 953 | source = "registry+https://github.com/rust-lang/crates.io-index" 954 | checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" 955 | dependencies = [ 956 | "windows-targets", 957 | ] 958 | 959 | [[package]] 960 | name = "windows-targets" 961 | version = "0.52.6" 962 | source = "registry+https://github.com/rust-lang/crates.io-index" 963 | checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" 964 | dependencies = [ 965 | "windows_aarch64_gnullvm", 966 | "windows_aarch64_msvc", 967 | "windows_i686_gnu", 968 | "windows_i686_gnullvm", 969 | "windows_i686_msvc", 970 | "windows_x86_64_gnu", 971 | "windows_x86_64_gnullvm", 972 | "windows_x86_64_msvc", 973 | ] 974 | 975 | [[package]] 976 | name = "windows_aarch64_gnullvm" 977 | version = "0.52.6" 978 | source = "registry+https://github.com/rust-lang/crates.io-index" 979 | checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" 980 | 981 | [[package]] 982 | name = "windows_aarch64_msvc" 983 | version = "0.52.6" 984 | source = "registry+https://github.com/rust-lang/crates.io-index" 985 | checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" 986 | 987 | [[package]] 988 | name = "windows_i686_gnu" 989 | version = "0.52.6" 990 | source = "registry+https://github.com/rust-lang/crates.io-index" 991 | checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" 992 | 993 | [[package]] 994 | name = "windows_i686_gnullvm" 995 | version = "0.52.6" 996 | source = "registry+https://github.com/rust-lang/crates.io-index" 997 | checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" 998 | 999 | [[package]] 1000 | name = "windows_i686_msvc" 1001 | version = "0.52.6" 1002 | source = "registry+https://github.com/rust-lang/crates.io-index" 1003 | checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" 1004 | 1005 | [[package]] 1006 | name = "windows_x86_64_gnu" 1007 | version = "0.52.6" 1008 | source = "registry+https://github.com/rust-lang/crates.io-index" 1009 | checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" 1010 | 1011 | [[package]] 1012 | name = "windows_x86_64_gnullvm" 1013 | version = "0.52.6" 1014 | source = "registry+https://github.com/rust-lang/crates.io-index" 1015 | checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" 1016 | 1017 | [[package]] 1018 | name = "windows_x86_64_msvc" 1019 | version = "0.52.6" 1020 | source = "registry+https://github.com/rust-lang/crates.io-index" 1021 | checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" 1022 | 1023 | [[package]] 1024 | name = "yaml-rust" 1025 | version = "0.4.5" 1026 | source = "registry+https://github.com/rust-lang/crates.io-index" 1027 | checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" 1028 | dependencies = [ 1029 | "linked-hash-map", 1030 | ] 1031 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "html2text" 3 | version = "0.15.1" 4 | authors = ["Chris Emerson "] 5 | description = "Render HTML as plain text." 6 | repository = "https://github.com/jugglerchris/rust-html2text/" 7 | readme = "README.md" 8 | documentation = "https://docs.rs/html2text/" 9 | edition = "2021" 10 | rust-version = "1.72" 11 | categories = ["text-processing"] 12 | 13 | keywords = ["html", "text"] 14 | license = "MIT" 15 | 16 | [dependencies] 17 | html5ever = "0.31.0" 18 | tendril = "0.4" 19 | unicode-width = "0.2" 20 | backtrace = { version = "0.3", optional=true } 21 | thiserror = "2.0.0" 22 | log = { version = "0.4.20", optional = true } 23 | nom = { version = "7.1.3", optional = true } 24 | 25 | [features] 26 | html_trace = ["dep:log"] 27 | html_trace_bt = ["html_trace", "dep:backtrace"] 28 | default = [] 29 | css = [ "dep:nom" ] 30 | css_ext = ["css"] 31 | 32 | [[example]] 33 | name = "html2term" 34 | path = "examples/html2term.rs" 35 | 36 | [[example]] 37 | name = "html2text" 38 | path = "examples/html2text.rs" 39 | 40 | [dev-dependencies] 41 | env_logger = "0.11.6" 42 | argparse = "0.2.2" 43 | log = "0.4.20" 44 | syntect = "5.2.0" 45 | 46 | [target.'cfg(unix)'.dev-dependencies] 47 | termion = "4.0" 48 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Chris Emerson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![jugglerchris](https://circleci.com/gh/jugglerchris/rust-html2text.svg?branch=master&style=svg)](https://app.circleci.com/pipelines/github/jugglerchris/rust-html2text?filter=all) 2 | 3 | # html2text 4 | 5 | html2text is a [Rust](http://www.rust-lang.org/) crate which converts HTML to 6 | plain text (as in Rust `String`) or text spans with annotations like colours, 7 | e.g. optionally using CSS. See [the online demo](https://jugglerchris.github.io/rust-html2text/) 8 | for examples of the output. 9 | 10 | It makes use of the [Servo project](https://github.com/servo/servo)'s HTML 11 | parser, [html5ever](https://github.com/servo/html5ever/), using the DOM to 12 | generate text (which can optionally include annotations for some features such 13 | as hyperlinks). 14 | 15 | The project aims to do a reasonable job of rendering reasonable HTML in a 16 | terminal or other places where HTML needs to be converted to text (for 17 | example the text/plain fallback in HTML e-mails). 18 | 19 | With features (see below) some CSS/colour support is available. 20 | 21 | ## Examples 22 | 23 | The simple functions like `from_read()` return formatted text (in various 24 | formats including plain text). 25 | 26 | ```rust 27 | use html2text::from_read; 28 | let html = b" 29 |
        30 |
      • Item one
      • 31 |
      • Item two
      • 32 |
      • Item three
      • 33 |
      "; 34 | assert_eq!(from_read(&html[..], 20).unwrap(), 35 | "\ 36 | * Item one 37 | * Item two 38 | * Item three 39 | "); 40 | ``` 41 | 42 | A lower level API gives a bit more control. This give the same result (except for 43 | returning errors as Result instead of panicking): 44 | 45 | ```rust 46 | use html2text::config; 47 | 48 | let html = b" 49 |
        50 |
      • Item one
      • 51 |
      • Item two
      • 52 |
      • Item three
      • 53 |
      "; 54 | 55 | assert_eq!( 56 | config::plain() 57 | .string_from_read(&html[..], 20) 58 | .unwrap(), 59 | "\ 60 | * Item one 61 | * Item two 62 | * Item three 63 | "); 64 | ``` 65 | 66 | A couple of simple demonstration programs are included as examples: 67 | 68 | ### html2text 69 | 70 | The simplest example uses `from_read` to convert HTML on stdin into plain 71 | text: 72 | 73 | ```sh 74 | $ cargo run --example html2text < foo.html 75 | [...] 76 | ``` 77 | 78 | ### html2term 79 | 80 | A very simple example of using the rich interface (`from_read_rich`) for a 81 | slightly interactive console HTML viewer is provided as `html2term`. 82 | 83 | ```sh 84 | $ cargo run --example html2term foo.html 85 | [...] 86 | ``` 87 | 88 | Note that this example takes the HTML file as a parameter so that it can 89 | read keys from stdin. 90 | 91 | ## Cargo Features 92 | 93 | |Feature| Description| 94 | |-------|------------| 95 | |css | Limited handling of CSS, adding Coloured nodes to the render tree. | 96 | |html\_trace| Add verbose internal logging (not recommended) | 97 | |html\_trace\_bt| Add backtraces to the verbose internal logging | 98 | 99 | ### CSS support 100 | 101 | When the `css` feature is enabled, some simple CSS handling is available. 102 | 103 | Style rules are taken from: 104 | * If `Config::use_doc_css()` is called, then style from the document: 105 | * ` 19 | 38 | 39 | 40 |

      Html2text demo

      41 | CSS 42 |
      43 | Colour 44 |
      45 | 66 |
      67 | 68 |
      69 | 70 | 71 | -------------------------------------------------------------------------------- /html2text-web-demo/src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::os::raw; 2 | 3 | use wasm_bindgen::prelude::wasm_bindgen; 4 | 5 | use ratzilla::ratatui::{ 6 | style::{Color, Style, Stylize}, 7 | text::{Text, Line, Span}, 8 | widgets::{Block, Paragraph}, 9 | Frame, 10 | Terminal, 11 | }; 12 | 13 | use html2text::render::TextDecorator; 14 | use ratzilla::DomBackend; 15 | 16 | #[derive(Default)] 17 | #[wasm_bindgen] 18 | pub struct Config { 19 | css: bool, 20 | colour: bool, 21 | user_css: Option, 22 | agent_css: Option, 23 | pad_block_width: bool, 24 | wrap_width: Option, 25 | allow_overflow: bool, 26 | min_wrap_width: Option, 27 | raw_mode: bool, 28 | no_borders: bool, 29 | no_link_wrap: bool, 30 | unicode_so: bool, 31 | do_decorate: bool, 32 | link_footnotes: bool, 33 | } 34 | 35 | #[wasm_bindgen] 36 | impl Config { 37 | pub fn new() -> Self { 38 | Config { 39 | ..Default::default() 40 | } 41 | } 42 | 43 | pub fn use_colour(&mut self) { 44 | self.colour = true; 45 | } 46 | 47 | pub fn use_css(&mut self) { 48 | self.css = true; 49 | } 50 | 51 | pub fn add_user_css(&mut self, css: String) { 52 | if css.trim().is_empty() { 53 | self.user_css = None; 54 | } else { 55 | self.user_css = Some(css); 56 | } 57 | } 58 | 59 | pub fn add_agent_css(&mut self, css: String) { 60 | if css.trim().is_empty() { 61 | self.agent_css = None; 62 | } else { 63 | self.agent_css = Some(css); 64 | } 65 | } 66 | 67 | pub fn pad_block_width(&mut self) { 68 | self.pad_block_width = true; 69 | } 70 | 71 | pub fn max_wrap_width(&mut self, width: usize) { 72 | self.wrap_width = Some(width); 73 | } 74 | 75 | pub fn allow_overflow(&mut self) { 76 | self.allow_overflow = true; 77 | } 78 | 79 | pub fn min_wrap_width(&mut self, width: usize) { 80 | self.min_wrap_width = Some(width); 81 | } 82 | pub fn raw_mode(&mut self) { 83 | self.raw_mode = true; 84 | } 85 | pub fn no_borders(&mut self) { 86 | self.no_borders = true; 87 | } 88 | pub fn no_link_wrap(&mut self) { 89 | self.no_link_wrap = true; 90 | } 91 | pub fn unicode_so(&mut self) { 92 | self.unicode_so = true; 93 | } 94 | pub fn do_decorate(&mut self) { 95 | self.do_decorate = true; 96 | } 97 | pub fn link_footnotes(&mut self, value: bool) { 98 | self.link_footnotes = value; 99 | } 100 | 101 | 102 | fn update_conf(&self, conf: html2text::config::Config) -> Result, String> { 103 | let mut conf = if self.css { 104 | conf.use_doc_css() 105 | } else { 106 | conf 107 | }; 108 | if let Some(user_css) = &self.user_css { 109 | conf = conf.add_css(user_css).map_err(|e| format!("{}", e))?; 110 | } 111 | if let Some(agent_css) = &self.agent_css { 112 | conf = conf.add_agent_css(agent_css).map_err(|e| format!("{}", e))?; 113 | } 114 | if self.pad_block_width { 115 | conf = conf.pad_block_width(); 116 | } 117 | if let Some(width) = self.wrap_width { 118 | conf = conf.max_wrap_width(width); 119 | } 120 | if self.allow_overflow { 121 | conf = conf.allow_width_overflow(); 122 | } 123 | if let Some(width) = self.min_wrap_width { 124 | conf = conf.min_wrap_width(width); 125 | } 126 | if self.raw_mode { 127 | conf = conf.raw_mode(true); 128 | } 129 | if self.no_borders { 130 | conf = conf.no_table_borders(); 131 | } 132 | if self.no_link_wrap { 133 | conf = conf.no_link_wrapping(); 134 | } 135 | if self.unicode_so { 136 | conf = conf.unicode_strikeout(true); 137 | } 138 | if self.do_decorate { 139 | conf = conf.do_decorate(); 140 | } 141 | conf = conf.link_footnotes(self.link_footnotes); 142 | Ok(conf 143 | .unicode_strikeout(false)) 144 | } 145 | } 146 | 147 | fn do_render_colour(f: &mut Frame, config: &Config, input: &[u8]) -> Result<(), String> { 148 | let area = f.area(); 149 | 150 | let conf = config.update_conf(html2text::config::rich())?; 151 | 152 | let lines = conf.lines_from_read(input, area.width as usize - 2).unwrap(); 153 | let mut out = Text::default(); 154 | for line in lines { 155 | let mut term_line = Line::default(); 156 | for piece in line.tagged_strings() { 157 | let span = Span::from(dbg!(piece.s.clone())); 158 | let mut style = Style::new(); 159 | for attr in &piece.tag { 160 | use html2text::render::RichAnnotation::*; 161 | match attr { 162 | Default | Link(_) | Image(_) | Code | Preformat(_) => {} 163 | Emphasis => { 164 | style = style.italic(); 165 | } 166 | Strong => { 167 | style = style.bold(); 168 | } 169 | Strikeout => { 170 | style = style.crossed_out(); 171 | } 172 | Colour(col) => { 173 | style = style.fg(Color::Rgb(col.r, col.g, col.b)); 174 | } 175 | BgColour(col) => { 176 | style = style.bg(Color::Rgb(col.r, col.g, col.b)); 177 | } 178 | _ => {} 179 | } 180 | } 181 | term_line.push_span(span.style(style)); 182 | } 183 | out.push_line(term_line); 184 | } 185 | f.render_widget( 186 | Paragraph::new(out).block(Block::bordered().title("HTML").border_style(Color::Yellow)), 187 | f.area()); 188 | Ok(()) 189 | } 190 | 191 | #[wasm_bindgen] 192 | pub fn format_html(config: Config, input: &str) -> Result<(), String> { 193 | let backend = DomBackend::new_by_id("lib").unwrap(); 194 | let mut terminal = Terminal::new(backend).unwrap(); 195 | 196 | let inp = input.to_string(); 197 | terminal.draw(move |f| { 198 | if config.colour { 199 | do_render_colour(f, &config, inp.as_bytes()).unwrap(); 200 | } else { 201 | let area = f.area(); 202 | 203 | let conf = config.update_conf(html2text::config::plain()).unwrap(); 204 | let output = conf.string_from_read(inp.as_bytes(), area.width as usize).unwrap(); 205 | 206 | f.render_widget( 207 | Paragraph::new(output), 208 | f.area()); 209 | } 210 | }).map_err(|e| format!("{e}"))?; 211 | Ok(()) 212 | } 213 | -------------------------------------------------------------------------------- /pages/.gitignore: -------------------------------------------------------------------------------- 1 | _site 2 | -------------------------------------------------------------------------------- /pages/_config.yml: -------------------------------------------------------------------------------- 1 | lsi: false 2 | safe: true 3 | source: . 4 | incremental: false 5 | baseurl: "/rust-html2text" 6 | gist: 7 | noscript: false 8 | 9 | theme: minima 10 | 11 | github_username: jugglerchris 12 | -------------------------------------------------------------------------------- /pages/_includes/head.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {%- seo -%} 6 | 7 | {%- feed_meta -%} 8 | {%- if jekyll.environment == 'production' and site.google_analytics -%} 9 | {%- include google-analytics.html -%} 10 | {%- endif -%} 11 | {%- if page.h2t_wasm -%} 12 | 13 | 14 | 15 | {%- endif -%} 16 | {%- if page.h2t_js -%} 17 | 19 | {%- endif -%} 20 | 21 | -------------------------------------------------------------------------------- /pages/assets/demo-main.js: -------------------------------------------------------------------------------- 1 | const controls = [ 2 | // Element id, (conf, value) -> "rust code" 3 | ["conf_css", (conf, value) => { conf.use_css(); return ".use_doc_css()"; }], 4 | ["conf_user_css", (conf, value) => { conf.add_user_css(value); return `.add_css(r#"{value}"#)`; }], 5 | ["conf_agent_css", (conf, value) => { conf.add_agent_css(value); return `.add_agent_css(r#"{value}"#)`; }], 6 | ["conf_pad_block_width", (conf, value) => { conf.bad_block_width(); return `.pad_block_width()`; }], 7 | ["conf_wrap_width", (conf, value) => { conf.max_wrap_width(value); return `.max_wrap_width({value})`; }], 8 | ["conf_allow_overflow", (conf, value) => { conf.allow_overflow(); return `.allow_width_overflow()`; }], 9 | ["conf_min_wrap_width", (conf, value) => { conf.min_wrap_width(value); return `.min_wrap_width({value})`; }], 10 | ["conf_raw", (conf, value) => { conf.raw_mode(); return `.raw_mode(true)`; }], 11 | ["conf_no_borders", (conf, value) => { conf.no_borders(); return `.no_table_borders(true)`; }], 12 | ["conf_no_link_wrap", (conf, value) => { conf.no_link_wrap(); return `.no_link_wrapping(true)`; }], 13 | ["conf_unicode_so", (conf, value) => { conf.unicode_so(); return `.unicode_strikeout(true)`; }], 14 | ["conf_do_decorate", (conf, value) => { conf.do_decorate(); return `.do_decorate(true)`; }], 15 | ["conf_link_footnotes", (conf, value) => { conf.link_footnotes(value); return `.link_footnotes({value})`; }], 16 | 17 | ]; 18 | function update_html() { 19 | const text = document.getElementById("input_html").value; 20 | const colour = document.getElementById("conf_colour").checked; 21 | 22 | const raw = document.getElementById("conf_raw").checked; 23 | const no_borders = document.getElementById("conf_no_borders").checked; 24 | const no_link_wrap = document.getElementById("conf_no_link_wrap").checked; 25 | const unicode_so = document.getElementById("conf_unicode_so").checked; 26 | const do_decorate = document.getElementById("conf_do_decorate").checked; 27 | const link_footnotes = document.getElementById("conf_link_footnotes").checked; 28 | 29 | let rust_code = ""; 30 | 31 | let conf = wasmBindings.Config.new(); 32 | if (colour) { 33 | rust_code += "let config = html2text::config::rich()"; 34 | conf.use_colour(); 35 | } else { 36 | rust_code += "let config = html2text::config::plain()"; 37 | } 38 | for (const conf_desc of controls) { 39 | const elt_id = conf_desc[0]; 40 | const handler = conf_desc[1]; 41 | 42 | const elt = document.getElementById(elt_id); 43 | if (elt.type == "checkbox") { 44 | if (elt.checked) { 45 | let codefrag = handler(conf, elt.checked); 46 | if (codefrag) { 47 | rust_code += "\n " + codefrag; 48 | } 49 | } 50 | } else { 51 | if (elt.value) { 52 | let codefrag = handler(conf, elt.value); 53 | if (codefrag) { 54 | rust_code += "\n " + codefrag; 55 | } 56 | } 57 | } 58 | } 59 | 60 | rust_code += ";\n"; 61 | if (colour) { 62 | rust_code += ` 63 | let lines = conf.lines_from_read(input, width); 64 | for line in lines { 65 | for ts in line.tagged_strings() { 66 | // examine tags for each text span for colours etc. 67 | } 68 | } 69 | `; 70 | } else { 71 | rust_code += ` 72 | let text = conf.string_from_read(input, width); 73 | `; 74 | } 75 | 76 | let tn = document.createTextNode(rust_code); 77 | document.getElementById("rust-code").replaceChildren(tn); 78 | wasmBindings.format_html(conf, text); 79 | } 80 | 81 | function start() { 82 | const confItems = document.querySelectorAll("input"); 83 | confItems.forEach((elt) => { 84 | elt.addEventListener("change", update_html); 85 | }); 86 | // Do the first render 87 | update_html(); 88 | } 89 | window.addEventListener("TrunkApplicationStarted", start); 90 | -------------------------------------------------------------------------------- /pages/assets/demo.css: -------------------------------------------------------------------------------- 1 | #lib { 2 | background-color: black; 3 | height: 30em; 4 | overflow: scroll; 5 | } 6 | #input_html { 7 | height: 300px; 8 | width: 95%; 9 | overflow: scroll; 10 | } 11 | 12 | #lib pre { 13 | margin: 0; 14 | padding: 0; 15 | overflow: hidden; 16 | background-color: black; 17 | border: 0px; 18 | } 19 | 20 | .warning { 21 | color: red; 22 | } 23 | .warning::before { 24 | content: "⚠️"; 25 | } 26 | 27 | div.wrapper { 28 | max-width: 100%; 29 | } 30 | @media screen and (min-width: 1000px) { 31 | #h2tmain { 32 | display: grid; 33 | gap: 10px; 34 | grid-template-columns: 1fr 1fr; 35 | } 36 | #lib_container { 37 | grid-column: 1; 38 | min-width: 45%; 39 | } 40 | #input_container { 41 | grid-column: 1; 42 | grid-row-start: 2; 43 | min-width: 45%; 44 | } 45 | #configtable { 46 | grid-column: 2; 47 | grid-row-start: 1; 48 | grid-row-end: 3; 49 | min-width: 45%; 50 | } 51 | #rust-code-pre { 52 | grid-column: 2; 53 | min-width: 45%; 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /pages/index.markdown: -------------------------------------------------------------------------------- 1 | --- 2 | # Feel free to add content and custom Front Matter to this file. 3 | # To modify the layout, see https://jekyllrb.com/docs/themes/#overriding-theme-defaults 4 | 5 | title: html2text API demo 6 | layout: home 7 | 8 | # Local additions 9 | h2t_wasm: true 10 | h2t_js: "/assets/demo-main.js" 11 | --- 12 | 13 | 16 | 17 | An online demonstration of the 18 | [`html2text`](https://github.com/jugglerchris/rust-html2text) Rust crate. Edit 19 | the HTML below and see how `html2text` converts it for text or terminal 20 | display. 21 | 22 | This demo uses `html2text` compiled to WASM, which can run in any modern 23 | browser, with [ratzilla](https://github.com/orhun/ratzilla) for the web-based 24 | terminal output. 25 | 26 |
      27 | 28 |
      29 | 30 | ## Output 31 | 32 | The html2text output is updated here: 33 | 34 |
      35 | 36 |
      37 | 38 |
      39 |

      Input HTML

      40 |

      Edit the HTML here - the output will update live.

      41 | 63 |
      64 |
      65 | 66 | ## Configuration 67 | 68 | The following are the configuration settings (accessible via [`html2text::config`](https://docs.rs/html2text/latest/html2text/config/struct.Config.html)). 69 | 70 | | Use Rich output | The [`rich`](https://docs.rs/html2text/latest/html2text/config/fn.rich.html) mode returns spans with attributes (like hyperlinks, emphasis, or colours). When disabled ([`plain`](https://docs.rs/html2text/latest/html2text/config/fn.plain.html)), the output is a plain `String` (possibly with formatting depending on other settings, e.g. table borders or `**markdown-style**` characters added). Rich output adds extra information (annotations) to allow, for example, using terminal colours and other features for a nicer TUI. | 71 | | use_doc_css | Parse CSS from the HTML document (css) | 72 | | User CSS | Add user stylesheet rules (css) | 73 | | Agent CSS | Add browser stylesheet rules (css) | 74 | | Pad block width | Pad blocks to the width with spaces | 75 | | Text wrap width | Wrap text to this width even if overall width is wider | 76 | | Allow width overflow | Allow text to be too wide in extreme cases instead of returning an error | 77 | | Minimum wrap width | Set the minimum number of columns to use for text blocks. | 78 | | Raw mode | Render contents of tables as if they were just text. Implies `no_table_borders` | 79 | | Don't render table borders | Tables are shown without borders | 80 | | Don't wrap URLs at the end | Some terminals handle long URLs better if not pre-wrapped | 81 | | Use Unicode combining characters for strikeout | This allows crossed out text without terminal codes, but some environments don't render them correctly (e.g. offset). | 82 | | Add markdown-like decoration | Add characters, e.g. `*` around `` text even with plain decorators. | 83 | | URL footnotes | Add numbered list of URLs at the end of the output | 84 | 85 |
      86 | 87 |
      88 | 89 | ## Rust API configuration 90 | 91 | The code below shows how to use the currently selected settings in the Rust API. 92 | 93 |
      94 |
      95 | 96 | 105 | -------------------------------------------------------------------------------- /rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Build 20 | run: cargo build --verbose 21 | - name: Run tests 22 | run: cargo test --verbose 23 | -------------------------------------------------------------------------------- /src/ansi_colours.rs: -------------------------------------------------------------------------------- 1 | //! Convenience helper for producing coloured terminal output. 2 | //! 3 | //! This optional helper applies terminal colours (or other effects which 4 | //! can be achieved using inline characters sent to the terminal such as 5 | //! underlining in some terminals). 6 | 7 | use crate::RichAnnotation; 8 | use std::io; 9 | 10 | /// Reads HTML from `input`, and returns text wrapped to `width` columns. 11 | /// 12 | /// The text is returned as a `Vec>`; the annotations are vectors 13 | /// of `RichAnnotation`. The "outer" annotation comes first in the `Vec`. 14 | /// 15 | /// The function `colour_map` is given a slice of `RichAnnotation` and should 16 | /// return a pair of static strings which should be inserted before/after a text 17 | /// span with that annotation; for example a string which sets text colour 18 | /// and a string which sets the colour back to the default. 19 | pub fn from_read_coloured( 20 | input: R, 21 | width: usize, 22 | colour_map: FMap, 23 | ) -> Result 24 | where 25 | R: io::Read, 26 | FMap: Fn(&[RichAnnotation], &str) -> String, 27 | { 28 | super::config::rich().coloured(input, width, colour_map) 29 | } 30 | -------------------------------------------------------------------------------- /src/css.rs: -------------------------------------------------------------------------------- 1 | //! Some basic CSS support. 2 | use std::ops::Deref; 3 | use std::rc::Rc; 4 | 5 | #[cfg(feature = "css")] 6 | mod parser; 7 | pub(crate) mod types; 8 | 9 | #[cfg(feature = "css")] 10 | use crate::{Colour, Result, WhiteSpace}; 11 | #[cfg(feature = "css")] 12 | use parser::parse_style_attribute; 13 | 14 | use types::Importance; 15 | 16 | use crate::{ 17 | markup5ever_rcdom::{ 18 | Handle, 19 | NodeData::{self, Comment, Document, Element}, 20 | }, 21 | ComputedStyle, Specificity, StyleOrigin, 22 | }; 23 | 24 | #[derive(Debug, Clone, PartialEq, Eq)] 25 | /// Attribute seletor operations 26 | pub(crate) enum AttrOperator { 27 | #[allow(unused)] 28 | Present, // foo[href] 29 | #[allow(unused)] 30 | Equal, // foo[href="foo"] 31 | } 32 | 33 | #[derive(Debug, Clone, PartialEq, Eq)] 34 | #[allow(unused)] 35 | pub(crate) enum SelectorComponent { 36 | Class(String), 37 | Element(String), 38 | Hash(String), 39 | Star, 40 | CombChild, 41 | CombDescendant, 42 | NthChild { 43 | /* An + B [of sel] */ 44 | a: i32, 45 | b: i32, 46 | sel: Selector, 47 | }, 48 | Attr { 49 | name: String, 50 | value: Option, 51 | op: AttrOperator, 52 | // TODO: other comparisions like $= 53 | // TODO: case sensitivity flags 54 | }, 55 | } 56 | 57 | #[derive(Debug, Clone, PartialEq, Eq)] 58 | pub(crate) enum PseudoElement { 59 | Before, 60 | After, 61 | } 62 | 63 | impl std::fmt::Display for SelectorComponent { 64 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 65 | match self { 66 | SelectorComponent::Class(name) => write!(f, ".{}", name), 67 | SelectorComponent::Element(name) => write!(f, "{}", name), 68 | SelectorComponent::Hash(val) => write!(f, "#{}", val), 69 | SelectorComponent::Star => write!(f, " * "), 70 | SelectorComponent::CombChild => write!(f, " > "), 71 | SelectorComponent::CombDescendant => write!(f, " "), 72 | SelectorComponent::NthChild { a, b, .. } => write!(f, ":nth-child({}n+{})", a, b), 73 | SelectorComponent::Attr { name, value, op } => match op { 74 | AttrOperator::Present => write!(f, "[{name}]"), 75 | AttrOperator::Equal => write!( 76 | f, 77 | "[{name} = \"{}\"]", 78 | value 79 | .as_ref() 80 | .expect("Missing value for attribute equality comparison") 81 | ), 82 | }, 83 | } 84 | } 85 | } 86 | 87 | #[derive(Debug, Clone, PartialEq, Eq, Default)] 88 | pub(crate) struct Selector { 89 | // List of components, right first so we match from the leaf. 90 | pub(crate) components: Vec, 91 | pub(crate) pseudo_element: Option, 92 | } 93 | 94 | impl std::fmt::Display for Selector { 95 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 96 | for comp in self.components.iter().rev() { 97 | comp.fmt(f)?; 98 | } 99 | match self.pseudo_element { 100 | Some(PseudoElement::Before) => write!(f, "::before")?, 101 | Some(PseudoElement::After) => write!(f, "::after")?, 102 | None => (), 103 | } 104 | Ok(()) 105 | } 106 | } 107 | 108 | impl Selector { 109 | fn do_matches(comps: &[SelectorComponent], node: &Handle) -> bool { 110 | match comps.first() { 111 | None => true, 112 | Some(comp) => match comp { 113 | SelectorComponent::Class(class) => match &node.data { 114 | Document 115 | | NodeData::Doctype { .. } 116 | | NodeData::Text { .. } 117 | | Comment { .. } 118 | | NodeData::ProcessingInstruction { .. } => false, 119 | Element { attrs, .. } => { 120 | let attrs = attrs.borrow(); 121 | for attr in attrs.iter() { 122 | if &attr.name.local == "class" { 123 | for cls in attr.value.split_whitespace() { 124 | if cls == class { 125 | return Self::do_matches(&comps[1..], node); 126 | } 127 | } 128 | } 129 | } 130 | false 131 | } 132 | }, 133 | SelectorComponent::Attr { name, value, op } => match &node.data { 134 | Document 135 | | NodeData::Doctype { .. } 136 | | NodeData::Text { .. } 137 | | Comment { .. } 138 | | NodeData::ProcessingInstruction { .. } => false, 139 | Element { attrs, .. } => { 140 | let attrs = attrs.borrow(); 141 | for attr in attrs.iter() { 142 | if &attr.name.local == name { 143 | match op { 144 | AttrOperator::Present => { 145 | return Self::do_matches(&comps[1..], node); 146 | } 147 | AttrOperator::Equal => { 148 | if &*attr.value 149 | == value 150 | .as_ref() 151 | .expect("No value in Attr equality comparison") 152 | { 153 | return Self::do_matches(&comps[1..], node); 154 | } else { 155 | return false; 156 | } 157 | } 158 | } 159 | } 160 | } 161 | false 162 | } 163 | }, 164 | SelectorComponent::Hash(hash) => { 165 | if let Element { attrs, .. } = &node.data { 166 | let attrs = attrs.borrow(); 167 | for attr in attrs.iter() { 168 | if &attr.name.local == "id" && &*attr.value == hash { 169 | return Self::do_matches(&comps[1..], node); 170 | } 171 | } 172 | } 173 | false 174 | } 175 | SelectorComponent::Element(name) => match &node.data { 176 | Element { name: eltname, .. } if name == eltname.expanded().local.deref() => { 177 | Self::do_matches(&comps[1..], node) 178 | } 179 | _ => false, 180 | }, 181 | SelectorComponent::Star => Self::do_matches(&comps[1..], node), 182 | SelectorComponent::CombChild => { 183 | if let Some(parent) = node.get_parent() { 184 | Self::do_matches(&comps[1..], &parent) 185 | } else { 186 | false 187 | } 188 | } 189 | SelectorComponent::CombDescendant => { 190 | if let Some(parent) = node.get_parent() { 191 | Self::do_matches(&comps[1..], &parent) || Self::do_matches(comps, &parent) 192 | } else { 193 | false 194 | } 195 | } 196 | SelectorComponent::NthChild { a, b, sel } => { 197 | let parent = if let Some(parent) = node.get_parent() { 198 | parent 199 | } else { 200 | return false; 201 | }; 202 | let mut idx = 0i32; 203 | for child in parent.children.borrow().iter() { 204 | if let Element { .. } = child.data { 205 | if sel.matches(child) { 206 | idx += 1; 207 | if Rc::ptr_eq(child, node) { 208 | break; 209 | } 210 | } else if Rc::ptr_eq(child, node) { 211 | return false; 212 | } 213 | } 214 | } 215 | if idx == 0 { 216 | // The child wasn't found(?) 217 | return false; 218 | } 219 | /* The selector matches if idx == a*n + b, where 220 | * n >= 0 221 | */ 222 | let idx_offset = idx - b; 223 | if *a == 0 { 224 | return idx_offset == 0 && Self::do_matches(&comps[1..], node); 225 | } 226 | if (idx_offset % a) != 0 { 227 | // Not a multiple 228 | return false; 229 | } 230 | let n = idx_offset / a; 231 | n >= 0 && Self::do_matches(&comps[1..], node) 232 | } 233 | }, 234 | } 235 | } 236 | fn matches(&self, node: &Handle) -> bool { 237 | Self::do_matches(&self.components, node) 238 | } 239 | fn specificity(&self) -> Specificity { 240 | let mut result: Specificity = Default::default(); 241 | 242 | for component in &self.components { 243 | match component { 244 | SelectorComponent::Class(_) | SelectorComponent::Attr { .. } => { 245 | result.class += 1; 246 | } 247 | SelectorComponent::Element(_) => { 248 | result.typ += 1; 249 | } 250 | SelectorComponent::Hash(_) => { 251 | result.id += 1; 252 | } 253 | SelectorComponent::Star => {} 254 | SelectorComponent::CombChild => {} 255 | SelectorComponent::CombDescendant => {} 256 | SelectorComponent::NthChild { sel, .. } => { 257 | result.class += 1; 258 | result += &sel.specificity(); 259 | } 260 | } 261 | } 262 | 263 | result 264 | } 265 | } 266 | 267 | #[cfg(feature = "css")] 268 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 269 | pub(crate) enum Display { 270 | /// display: none 271 | None, 272 | #[cfg(feature = "css_ext")] 273 | /// Show node as HTML DOM 274 | ExtRawDom, 275 | } 276 | 277 | #[derive(Debug, Clone, PartialEq, Eq)] 278 | pub(crate) struct PseudoContent { 279 | /// content: "foo" 280 | pub(crate) text: String, 281 | } 282 | 283 | #[cfg(feature = "css_ext")] 284 | #[derive(Debug, Clone, PartialEq, Eq)] 285 | pub(crate) struct SyntaxInfo { 286 | /// Highlight language 287 | pub(crate) language: String, 288 | } 289 | 290 | #[derive(Debug, Clone, PartialEq, Eq)] 291 | pub(crate) enum Style { 292 | #[cfg(feature = "css")] 293 | Colour(Colour), 294 | #[cfg(feature = "css")] 295 | BgColour(Colour), 296 | #[cfg(feature = "css")] 297 | Display(Display), 298 | #[cfg(feature = "css")] 299 | WhiteSpace(WhiteSpace), 300 | Content(PseudoContent), 301 | #[cfg(feature = "css_ext")] 302 | Syntax(SyntaxInfo), 303 | } 304 | 305 | #[derive(Debug, Clone, PartialEq, Eq)] 306 | pub(crate) struct StyleDecl { 307 | pub(crate) style: Style, 308 | pub(crate) importance: Importance, 309 | } 310 | 311 | impl std::fmt::Display for StyleDecl { 312 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 313 | match &self.style { 314 | #[cfg(feature = "css")] 315 | Style::Colour(col) => write!(f, "color: {}", col)?, 316 | #[cfg(feature = "css")] 317 | Style::BgColour(col) => write!(f, "background-color: {}", col)?, 318 | #[cfg(feature = "css")] 319 | Style::Display(Display::None) => write!(f, "display: none")?, 320 | #[cfg(feature = "css_ext")] 321 | Style::Display(Display::ExtRawDom) => write!(f, "display: x-raw-dom")?, 322 | #[cfg(feature = "css")] 323 | Style::WhiteSpace(ws) => match ws { 324 | WhiteSpace::Normal => write!(f, "white-space: normal")?, 325 | WhiteSpace::Pre => write!(f, "white-space: pre")?, 326 | WhiteSpace::PreWrap => write!(f, "white-space: pre-wrap")?, 327 | }, 328 | Style::Content(content) => write!(f, "content: \"{}\"", content.text)?, 329 | #[cfg(feature = "css_ext")] 330 | Style::Syntax(syntax_info) => write!(f, "x-syntax: {}", syntax_info.language)?, 331 | } 332 | match self.importance { 333 | Importance::Default => (), 334 | Importance::Important => write!(f, " !important")?, 335 | } 336 | Ok(()) 337 | } 338 | } 339 | 340 | #[derive(Debug, Clone, PartialEq, Eq)] 341 | pub(crate) struct Ruleset { 342 | pub(crate) selector: Selector, 343 | pub(crate) styles: Vec, 344 | } 345 | 346 | impl std::fmt::Display for Ruleset { 347 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 348 | writeln!(f, " {} {{", self.selector)?; 349 | for decl in &self.styles { 350 | writeln!(f, " {}", decl)?; 351 | } 352 | writeln!(f, " }}")?; 353 | Ok(()) 354 | } 355 | } 356 | 357 | /// Stylesheet data which can be used while building the render tree. 358 | #[derive(Clone, Default, Debug, PartialEq, Eq)] 359 | pub(crate) struct StyleData { 360 | agent_rules: Vec, 361 | user_rules: Vec, 362 | author_rules: Vec, 363 | } 364 | 365 | #[cfg(feature = "css")] 366 | fn styles_from_properties( 367 | decls: &[parser::Declaration], 368 | _allow_extensions: bool, 369 | ) -> Vec { 370 | let mut styles = Vec::new(); 371 | html_trace_quiet!("styles:from_properties2: {decls:?}"); 372 | let mut overflow_hidden = false; 373 | let mut height_zero = false; 374 | for decl in decls { 375 | html_trace_quiet!("styles:from_properties2: {decl:?}"); 376 | match &decl.data { 377 | parser::Decl::Unknown { .. } => {} 378 | parser::Decl::Color { 379 | value: parser::Colour::Rgb(r, g, b), 380 | } => { 381 | styles.push(StyleDecl { 382 | style: Style::Colour(Colour { 383 | r: *r, 384 | g: *g, 385 | b: *b, 386 | }), 387 | importance: decl.important, 388 | }); 389 | } 390 | parser::Decl::BackgroundColor { 391 | value: parser::Colour::Rgb(r, g, b), 392 | } => { 393 | styles.push(StyleDecl { 394 | style: Style::BgColour(Colour { 395 | r: *r, 396 | g: *g, 397 | b: *b, 398 | }), 399 | importance: decl.important, 400 | }); 401 | } 402 | parser::Decl::Height { value } => match value { 403 | parser::Height::Auto => (), 404 | parser::Height::Length(l, _) => { 405 | if *l == 0.0 { 406 | height_zero = true; 407 | } 408 | } 409 | }, 410 | parser::Decl::MaxHeight { value } => match value { 411 | parser::Height::Auto => (), 412 | parser::Height::Length(l, _) => { 413 | if *l == 0.0 { 414 | height_zero = true; 415 | } 416 | } 417 | }, 418 | parser::Decl::Overflow { 419 | value: parser::Overflow::Hidden, 420 | } 421 | | parser::Decl::OverflowY { 422 | value: parser::Overflow::Hidden, 423 | } => { 424 | overflow_hidden = true; 425 | } 426 | parser::Decl::Overflow { .. } | parser::Decl::OverflowY { .. } => {} 427 | parser::Decl::Display { value } => match value { 428 | parser::Display::None => { 429 | styles.push(StyleDecl { 430 | style: Style::Display(Display::None), 431 | importance: decl.important, 432 | }); 433 | } 434 | #[cfg(feature = "css_ext")] 435 | parser::Display::RawDom => { 436 | if !_allow_extensions { 437 | continue; 438 | } 439 | styles.push(StyleDecl { 440 | style: Style::Display(Display::ExtRawDom), 441 | importance: decl.important, 442 | }); 443 | } 444 | _ => (), 445 | }, 446 | parser::Decl::WhiteSpace { value } => { 447 | styles.push(StyleDecl { 448 | style: Style::WhiteSpace(*value), 449 | importance: decl.important, 450 | }); 451 | } 452 | parser::Decl::Content { text } => { 453 | styles.push(StyleDecl { 454 | style: Style::Content(PseudoContent { text: text.clone() }), 455 | importance: decl.important, 456 | }); 457 | } 458 | #[cfg(feature = "css_ext")] 459 | parser::Decl::XSyntax { language } => { 460 | if !_allow_extensions { 461 | continue; 462 | } 463 | styles.push(StyleDecl { 464 | style: Style::Syntax(SyntaxInfo { 465 | language: language.clone(), 466 | }), 467 | importance: decl.important, 468 | }); 469 | } /* 470 | _ => { 471 | html_trace_quiet!("CSS: Unhandled property {:?}", decl); 472 | } 473 | */ 474 | } 475 | } 476 | // If the height is set to zero and overflow hidden, treat as display: none 477 | if height_zero && overflow_hidden { 478 | styles.push(StyleDecl { 479 | style: Style::Display(Display::None), 480 | importance: Importance::Default, 481 | }); 482 | } 483 | styles 484 | } 485 | 486 | impl StyleData { 487 | #[cfg(feature = "css")] 488 | /// Add some CSS source to be included. The source will be parsed 489 | /// and the relevant and supported features extracted. 490 | fn do_add_css(css: &str, rules: &mut Vec, allow_extensions: bool) -> Result<()> { 491 | let (_, ss) = parser::parse_stylesheet(css).map_err(|_| crate::Error::CssParseError)?; 492 | 493 | for rule in ss { 494 | let styles = styles_from_properties(&rule.declarations, allow_extensions); 495 | if !styles.is_empty() { 496 | for selector in rule.selectors { 497 | let ruleset = Ruleset { 498 | selector, 499 | styles: styles.clone(), 500 | }; 501 | html_trace_quiet!("Adding ruleset {ruleset:?}"); 502 | rules.push(ruleset); 503 | } 504 | } 505 | } 506 | Ok(()) 507 | } 508 | 509 | pub(crate) fn add_agent_rules(&mut self, rules: &[Ruleset]) { 510 | for rule in rules { 511 | self.agent_rules.push(rule.clone()); 512 | } 513 | } 514 | 515 | #[cfg(feature = "css")] 516 | /// Add some CSS source to be included as part of the user agent ("browser") CSS rules. 517 | pub fn add_agent_css(&mut self, css: &str) -> Result<()> { 518 | Self::do_add_css(css, &mut self.agent_rules, true) 519 | } 520 | 521 | #[cfg(feature = "css")] 522 | /// Add some CSS source to be included as part of the user CSS rules. 523 | pub fn add_user_css(&mut self, css: &str) -> Result<()> { 524 | Self::do_add_css(css, &mut self.user_rules, true) 525 | } 526 | 527 | #[cfg(feature = "css")] 528 | /// Add some CSS source to be included as part of the document/author CSS rules. 529 | pub fn add_author_css(&mut self, css: &str) -> Result<()> { 530 | Self::do_add_css(css, &mut self.author_rules, false) 531 | } 532 | 533 | #[cfg(feature = "css")] 534 | /// Merge style data from other into this one. 535 | /// Data on other takes precedence. 536 | pub fn merge(&mut self, other: Self) { 537 | self.agent_rules.extend(other.agent_rules); 538 | self.user_rules.extend(other.user_rules); 539 | self.author_rules.extend(other.author_rules); 540 | } 541 | 542 | pub(crate) fn computed_style( 543 | &self, 544 | parent_style: &ComputedStyle, 545 | handle: &Handle, 546 | _use_doc_css: bool, 547 | ) -> ComputedStyle { 548 | let mut result = parent_style.inherit(); 549 | 550 | for (origin, ruleset) in [ 551 | (StyleOrigin::Agent, &self.agent_rules), 552 | (StyleOrigin::User, &self.user_rules), 553 | (StyleOrigin::Author, &self.author_rules), 554 | ] { 555 | for rule in ruleset { 556 | if rule.selector.matches(handle) { 557 | for style in rule.styles.iter() { 558 | Self::merge_computed_style( 559 | &mut result, 560 | style.importance == Importance::Important, 561 | origin, 562 | rule.selector.specificity(), 563 | rule.selector.pseudo_element.as_ref(), 564 | style, 565 | ); 566 | } 567 | } 568 | } 569 | } 570 | 571 | #[cfg(feature = "css")] 572 | if _use_doc_css { 573 | // Now look for a style attribute 574 | if let Element { attrs, .. } = &handle.data { 575 | let borrowed = attrs.borrow(); 576 | for attr in borrowed.iter() { 577 | if &attr.name.local == "style" { 578 | let rules = parse_style_attribute(&attr.value).unwrap_or_default(); 579 | for style in rules { 580 | Self::merge_computed_style( 581 | &mut result, 582 | false, 583 | StyleOrigin::Author, 584 | Specificity::inline(), 585 | None, 586 | &style, 587 | ); 588 | } 589 | } else if &*attr.name.local == "color" { 590 | if let Ok(colour) = parser::parse_color_attribute(&attr.value) { 591 | Self::merge_computed_style( 592 | &mut result, 593 | false, 594 | StyleOrigin::Author, 595 | Specificity::inline(), 596 | None, 597 | &StyleDecl { 598 | style: Style::Colour(colour.into()), 599 | importance: Importance::Default, 600 | }, 601 | ); 602 | } 603 | } else if &*attr.name.local == "bgcolor" { 604 | if let Ok(colour) = parser::parse_color_attribute(&attr.value) { 605 | Self::merge_computed_style( 606 | &mut result, 607 | false, 608 | StyleOrigin::Author, 609 | Specificity::inline(), 610 | None, 611 | &StyleDecl { 612 | style: Style::BgColour(colour.into()), 613 | importance: Importance::Default, 614 | }, 615 | ); 616 | } 617 | } 618 | } 619 | } 620 | } 621 | 622 | result 623 | } 624 | 625 | fn merge_computed_style( 626 | result: &mut ComputedStyle, 627 | important: bool, 628 | origin: StyleOrigin, 629 | specificity: Specificity, 630 | pseudo_selectors: Option<&PseudoElement>, 631 | style: &StyleDecl, 632 | ) { 633 | let result_target = match pseudo_selectors { 634 | None => result, 635 | Some(PseudoElement::Before) => { 636 | // TODO: ideally we should inherit from the parent; however we haven't finished 637 | // computing the parent yet. 638 | result.content_before.get_or_insert_with(Default::default) 639 | } 640 | Some(PseudoElement::After) => result.content_after.get_or_insert_with(Default::default), 641 | }; 642 | // The increasing priority is: 643 | // * agent 644 | // * user 645 | // * author 646 | // * author !important 647 | // * user !important 648 | // * agent !important 649 | // Since we view in the order agent, user, author, we always want to 650 | // replace the value if we haven't yet seen an !important rule, and 651 | // never afterwards. 652 | match style.style { 653 | #[cfg(feature = "css")] 654 | Style::Colour(col) => { 655 | result_target 656 | .colour 657 | .maybe_update(important, origin, specificity, col); 658 | } 659 | #[cfg(feature = "css")] 660 | Style::BgColour(col) => { 661 | result_target 662 | .bg_colour 663 | .maybe_update(important, origin, specificity, col); 664 | } 665 | #[cfg(feature = "css")] 666 | Style::Display(disp) => { 667 | // We don't have a "not DisplayNone" - we might need to fix this. 668 | result_target 669 | .display 670 | .maybe_update(important, origin, specificity, disp); 671 | } 672 | #[cfg(feature = "css")] 673 | Style::WhiteSpace(ws) => { 674 | result_target 675 | .white_space 676 | .maybe_update(important, origin, specificity, ws); 677 | } 678 | Style::Content(ref content) => { 679 | result_target 680 | .content 681 | .maybe_update(important, origin, specificity, content.clone()); 682 | } 683 | #[cfg(feature = "css_ext")] 684 | Style::Syntax(ref syntax_info) => { 685 | result_target.syntax.maybe_update( 686 | important, 687 | origin, 688 | specificity, 689 | syntax_info.clone(), 690 | ); 691 | } 692 | } 693 | } 694 | } 695 | 696 | impl std::fmt::Display for StyleData { 697 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 698 | if !self.agent_rules.is_empty() { 699 | writeln!(f, "Agent rules:")?; 700 | for ruleset in &self.agent_rules { 701 | ruleset.fmt(f)?; 702 | } 703 | } 704 | if !self.user_rules.is_empty() { 705 | writeln!(f, "User rules:")?; 706 | for ruleset in &self.user_rules { 707 | ruleset.fmt(f)?; 708 | } 709 | } 710 | if !self.author_rules.is_empty() { 711 | writeln!(f, "Author rules:")?; 712 | for ruleset in &self.author_rules { 713 | ruleset.fmt(f)?; 714 | } 715 | } 716 | Ok(()) 717 | } 718 | } 719 | 720 | #[cfg(feature = "css")] 721 | pub(crate) mod dom_extract { 722 | use std::io::Write; 723 | 724 | use crate::{expanded_name, local_name, namespace_url, ns}; 725 | 726 | use crate::{ 727 | markup5ever_rcdom::{ 728 | Handle, 729 | NodeData::{self, Comment, Document, Element}, 730 | }, 731 | tree_map_reduce, Result, TreeMapResult, 732 | }; 733 | 734 | use super::StyleData; 735 | 736 | fn pending(handle: Handle, f: F) -> TreeMapResult<'static, (), Handle, Vec> 737 | where 738 | F: Fn(&mut (), Vec>) -> Result>> + 'static, 739 | { 740 | TreeMapResult::PendingChildren { 741 | children: handle.children.borrow().clone(), 742 | cons: Box::new(f), 743 | prefn: None, 744 | postfn: None, 745 | } 746 | } 747 | 748 | fn combine_vecs(vecs: Vec>) -> Vec { 749 | let mut it = vecs.into_iter(); 750 | let first = it.next(); 751 | match first { 752 | None => Vec::new(), 753 | Some(mut first) => { 754 | for v in it { 755 | first.extend(v.into_iter()); 756 | } 757 | first 758 | } 759 | } 760 | } 761 | 762 | fn extract_style_nodes( 763 | handle: Handle, 764 | _err_out: &mut T, 765 | ) -> TreeMapResult<'static, (), Handle, Vec> { 766 | use TreeMapResult::*; 767 | 768 | match handle.clone().data { 769 | Document => pending(handle, |&mut (), cs| Ok(Some(combine_vecs(cs)))), 770 | Comment { .. } => Nothing, 771 | Element { ref name, .. } => { 772 | match name.expanded() { 773 | expanded_name!(html "style") => { 774 | let mut result = String::new(); 775 | // Assume just a flat text node 776 | for child in handle.children.borrow().iter() { 777 | if let NodeData::Text { ref contents } = child.data { 778 | result += &contents.borrow(); 779 | } 780 | } 781 | Finished(vec![result]) 782 | } 783 | _ => pending(handle, |_, cs| Ok(Some(combine_vecs(cs)))), 784 | } 785 | } 786 | NodeData::Text { 787 | contents: ref _tstr, 788 | } => Nothing, 789 | _ => { 790 | // NodeData doesn't have a Debug impl. 791 | Nothing 792 | } 793 | } 794 | } 795 | 796 | /// Extract stylesheet data from document. 797 | pub(crate) fn dom_to_stylesheet( 798 | handle: Handle, 799 | err_out: &mut T, 800 | ) -> Result { 801 | let styles = tree_map_reduce(&mut (), handle, |_, handle| { 802 | Ok(extract_style_nodes(handle, err_out)) 803 | })?; 804 | 805 | let mut result = StyleData::default(); 806 | if let Some(styles) = styles { 807 | for css in styles { 808 | // Ignore CSS parse errors. 809 | let _ = result.add_author_css(&css); 810 | } 811 | } 812 | Ok(result) 813 | } 814 | } 815 | 816 | #[cfg(feature = "css")] 817 | #[cfg(test)] 818 | mod tests { 819 | use crate::Specificity; 820 | 821 | use super::parser::parse_selector; 822 | 823 | #[test] 824 | fn test_specificity() { 825 | let sel_id1 = parse_selector("#foo").unwrap().1; 826 | assert_eq!( 827 | sel_id1.specificity(), 828 | Specificity { 829 | id: 1, 830 | ..Default::default() 831 | } 832 | ); 833 | 834 | let sel_cl3 = parse_selector(".foo .bar .baz").unwrap().1; 835 | assert_eq!( 836 | sel_cl3.specificity(), 837 | Specificity { 838 | class: 3, 839 | ..Default::default() 840 | } 841 | ); 842 | 843 | assert!(sel_id1.specificity() > sel_cl3.specificity()); 844 | } 845 | } 846 | -------------------------------------------------------------------------------- /src/css/types.rs: -------------------------------------------------------------------------------- 1 | #[derive(Copy, Clone, PartialEq, Eq, Debug)] 2 | pub(crate) enum Importance { 3 | Default, 4 | Important, 5 | } 6 | -------------------------------------------------------------------------------- /src/macros.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "html_trace_bt")] 2 | extern crate backtrace; 3 | 4 | /* This is to work around a false positive for the clippy warning 5 | * `match_on_same_arms`. 6 | * See https://github.com/Manishearth/rust-clippy/issues/1390 7 | */ 8 | #[cfg(not(feature = "html_trace"))] 9 | #[inline(always)] 10 | pub fn nop() {} 11 | 12 | #[cfg(feature = "html_trace")] 13 | #[macro_export] 14 | #[doc(hidden)] 15 | macro_rules! html_trace { 16 | ($fmt:expr) => { 17 | #[cfg(feature = "html_trace_bt")] 18 | { 19 | let bt = ::backtrace::Backtrace::new(); 20 | log::info!( concat!($fmt, " at {:?}"), bt ); 21 | } 22 | #[cfg(not(feature = "html_trace_bt"))] 23 | { 24 | log::info!($fmt); 25 | } 26 | }; 27 | ($fmt:expr, $( $args:expr ),*) => { 28 | #[cfg(feature = "html_trace_bt")] 29 | { 30 | let bt = ::backtrace::Backtrace::new(); 31 | log::info!( concat!($fmt, " at {:?}"), $( $args ),* , bt ); 32 | } 33 | #[cfg(not(feature = "html_trace_bt"))] 34 | { 35 | log::info!($fmt, $( $args ),*); 36 | } 37 | }; 38 | } 39 | #[cfg(not(feature = "html_trace"))] 40 | #[macro_export] 41 | #[doc(hidden)] 42 | macro_rules! html_trace { 43 | ($fmt:expr) => { 44 | $crate::macros::nop(); 45 | }; 46 | ($fmt:expr, $( $args:expr ),*) => { 47 | $crate::macros::nop(); 48 | }; 49 | } 50 | 51 | #[cfg(feature = "html_trace")] 52 | #[macro_export] 53 | #[doc(hidden)] 54 | macro_rules! html_trace_quiet { 55 | ($fmt:expr) => { 56 | log::trace!( $fmt ); 57 | }; 58 | ($fmt:expr, $( $args:expr ),*) => { 59 | log::trace!( $fmt, $( $args ),* ); 60 | }; 61 | } 62 | 63 | #[cfg(not(feature = "html_trace"))] 64 | #[macro_export] 65 | #[doc(hidden)] 66 | macro_rules! html_trace_quiet { 67 | ($fmt:expr) => { 68 | $crate::macros::nop(); 69 | }; 70 | ($fmt:expr, $( $args:expr ),*) => { 71 | $crate::macros::nop(); 72 | }; 73 | } 74 | -------------------------------------------------------------------------------- /src/markup5ever_rcdom.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2014-2017 The html5ever Project Developers. See the 2 | // COPYRIGHT file at the top-level directory of this distribution. 3 | // 4 | // Licensed under the Apache License, Version 2.0 or the MIT license 6 | // , at your 7 | // option. This file may not be copied, modified, or distributed 8 | // except according to those terms. 9 | 10 | //! A simple reference-counted DOM. 11 | //! 12 | //! This is sufficient as a static parse tree, but don't build a 13 | //! web browser using it. :) 14 | //! 15 | //! A DOM is a [tree structure] with ordered children that can be represented in an XML-like 16 | //! format. For example, the following graph 17 | //! 18 | //! ```text 19 | //! div 20 | //! +- "text node" 21 | //! +- span 22 | //! ``` 23 | //! in HTML would be serialized as 24 | //! 25 | //! ```html 26 | //!
      text node
      27 | //! ``` 28 | //! 29 | //! See the [document object model article on wikipedia][dom wiki] for more information. 30 | //! 31 | //! This implementation stores the information associated with each node once, and then hands out 32 | //! refs to children. The nodes themselves are reference-counted to avoid copying - you can create 33 | //! a new ref and then a node will outlive the document. Nodes own their children, but only have 34 | //! weak references to their parents. 35 | //! 36 | //! [tree structure]: https://en.wikipedia.org/wiki/Tree_(data_structure) 37 | //! [dom wiki]: https://en.wikipedia.org/wiki/Document_Object_Model 38 | 39 | extern crate tendril; 40 | 41 | use std::borrow::Cow; 42 | use std::cell::{Cell, RefCell}; 43 | use std::collections::{HashSet, VecDeque}; 44 | use std::fmt; 45 | use std::io; 46 | use std::mem; 47 | use std::rc::{Rc, Weak}; 48 | 49 | use html5ever::interface::ElemName; 50 | use tendril::StrTendril; 51 | 52 | use html5ever::interface::tree_builder; 53 | use html5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; 54 | use html5ever::serialize::TraversalScope; 55 | use html5ever::serialize::TraversalScope::{ChildrenOnly, IncludeNode}; 56 | use html5ever::serialize::{Serialize, Serializer}; 57 | use html5ever::Attribute; 58 | use html5ever::ExpandedName; 59 | use html5ever::QualName; 60 | 61 | /// The different kinds of nodes in the DOM. 62 | #[derive(Debug)] 63 | pub enum NodeData { 64 | /// The `Document` itself - the root node of a HTML document. 65 | Document, 66 | 67 | /// A `DOCTYPE` with name, public id, and system id. See 68 | /// [document type declaration on wikipedia][dtd wiki]. 69 | /// 70 | /// [dtd wiki]: https://en.wikipedia.org/wiki/Document_type_declaration 71 | Doctype { 72 | name: StrTendril, 73 | public_id: StrTendril, 74 | system_id: StrTendril, 75 | }, 76 | 77 | /// A text node. 78 | Text { contents: RefCell }, 79 | 80 | /// A comment. 81 | Comment { 82 | /// The comment text. 83 | contents: StrTendril, 84 | }, 85 | 86 | /// An element with attributes. 87 | Element { 88 | /// The qualified element name 89 | name: QualName, 90 | /// The element's attributes. 91 | attrs: RefCell>, 92 | 93 | /// For HTML \ elements, the [template contents]. 94 | /// 95 | /// [template contents]: https://html.spec.whatwg.org/multipage/#template-contents 96 | template_contents: RefCell>, 97 | 98 | /// Whether the node is a [HTML integration point]. 99 | /// 100 | /// [HTML integration point]: https://html.spec.whatwg.org/multipage/#html-integration-point 101 | mathml_annotation_xml_integration_point: bool, 102 | }, 103 | 104 | /// A Processing instruction. 105 | ProcessingInstruction { 106 | target: StrTendril, 107 | contents: StrTendril, 108 | }, 109 | } 110 | 111 | /// A DOM node. 112 | pub struct Node { 113 | /// Parent node. 114 | pub parent: Cell>, 115 | /// Child nodes of this node. 116 | pub children: RefCell>, 117 | /// Represents this node's data. 118 | pub data: NodeData, 119 | } 120 | 121 | impl Node { 122 | /// Create a new node from its contents 123 | pub fn new(data: NodeData) -> Rc { 124 | Rc::new(Node { 125 | data, 126 | parent: Cell::new(None), 127 | children: RefCell::new(Vec::new()), 128 | }) 129 | } 130 | 131 | pub fn get_parent(&self) -> Option> { 132 | if let Some(parent) = self.parent.take() { 133 | let parent_handle = parent.upgrade(); 134 | self.parent.set(Some(parent)); 135 | parent_handle 136 | } else { 137 | None 138 | } 139 | } 140 | 141 | /// Return the nth child element of this node, or None. 142 | pub fn nth_child(&self, idx: usize) -> Option> { 143 | let mut element_idx = 0; 144 | for child in self.children.borrow().iter() { 145 | if let NodeData::Element { .. } = child.data { 146 | element_idx += 1; 147 | if element_idx == idx { 148 | return Some(child.clone()); 149 | } 150 | } 151 | } 152 | None 153 | } 154 | 155 | /// Return the element type (if an element) 156 | pub fn element_name(&self) -> Option { 157 | if let NodeData::Element { ref name, .. } = self.data { 158 | Some(format!("{}", name.local_name())) 159 | } else { 160 | None 161 | } 162 | } 163 | 164 | /// Serialise the node to a writable. 165 | pub fn serialize(self: &Rc, writer: impl io::Write) -> io::Result<()> { 166 | html5ever::serialize( 167 | writer, 168 | &SerializableHandle(self.clone()), 169 | html5ever::serialize::SerializeOpts { 170 | scripting_enabled: true, 171 | traversal_scope: html5ever::serialize::TraversalScope::IncludeNode, 172 | create_missing_parent: false, 173 | }, 174 | ) 175 | } 176 | } 177 | 178 | impl Drop for Node { 179 | fn drop(&mut self) { 180 | let mut nodes = mem::take(&mut *self.children.borrow_mut()); 181 | while let Some(node) = nodes.pop() { 182 | let children = mem::take(&mut *node.children.borrow_mut()); 183 | nodes.extend(children.into_iter()); 184 | if let NodeData::Element { 185 | ref template_contents, 186 | .. 187 | } = node.data 188 | { 189 | if let Some(template_contents) = template_contents.borrow_mut().take() { 190 | nodes.push(template_contents); 191 | } 192 | } 193 | } 194 | } 195 | } 196 | 197 | impl fmt::Debug for Node { 198 | fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { 199 | fmt.debug_struct("Node") 200 | .field("data", &self.data) 201 | .field("children", &self.children) 202 | .finish() 203 | } 204 | } 205 | 206 | /// Reference to a DOM node. 207 | pub type Handle = Rc; 208 | 209 | /// Weak reference to a DOM node, used for parent pointers. 210 | pub type WeakHandle = Weak; 211 | 212 | /// Append a parentless node to another nodes' children 213 | fn append(new_parent: &Handle, child: Handle) { 214 | let previous_parent = child.parent.replace(Some(Rc::downgrade(new_parent))); 215 | // Invariant: child cannot have existing parent 216 | assert!(previous_parent.is_none()); 217 | new_parent.children.borrow_mut().push(child); 218 | } 219 | 220 | /// If the node has a parent, get it and this node's position in its children 221 | fn get_parent_and_index(target: &Handle) -> Option<(Handle, usize)> { 222 | let weak = target.parent.take()?; 223 | let parent = weak.upgrade().expect("dangling weak pointer"); 224 | target.parent.set(Some(weak)); 225 | let i = match parent 226 | .children 227 | .borrow() 228 | .iter() 229 | .enumerate() 230 | .find(|&(_, child)| Rc::ptr_eq(child, target)) 231 | { 232 | Some((i, _)) => i, 233 | None => panic!("have parent but couldn't find in parent's children!"), 234 | }; 235 | Some((parent, i)) 236 | } 237 | 238 | fn append_to_existing_text(prev: &Handle, text: &str) -> bool { 239 | match prev.data { 240 | NodeData::Text { ref contents } => { 241 | contents.borrow_mut().push_slice(text); 242 | true 243 | } 244 | _ => false, 245 | } 246 | } 247 | 248 | fn remove_from_parent(target: &Handle) { 249 | if let Some((parent, i)) = get_parent_and_index(target) { 250 | parent.children.borrow_mut().remove(i); 251 | target.parent.set(None); 252 | } 253 | } 254 | 255 | /// The DOM itself; the result of parsing. 256 | pub struct RcDom { 257 | /// The `Document` itself. 258 | pub document: Handle, 259 | 260 | /// Errors that occurred during parsing. 261 | pub errors: RefCell>>, 262 | 263 | /// The document's quirks mode. 264 | pub quirks_mode: Cell, 265 | } 266 | 267 | impl RcDom { 268 | fn add_node_to_string(s: &mut String, node: &Handle, indent: usize) { 269 | use std::fmt::Write as _; 270 | match &node.data { 271 | NodeData::Document => { 272 | for child in &*node.children.borrow() { 273 | Self::add_node_to_string(s, child, indent); 274 | } 275 | } 276 | NodeData::Doctype { .. } => { 277 | writeln!(s, "{0:indent$}", "", indent = indent).unwrap(); 278 | } 279 | NodeData::Text { contents } => { 280 | let borrowed = contents.borrow(); 281 | let text = borrowed.to_string(); 282 | if !text.trim().is_empty() { 283 | writeln!(s, "{0:indent$}Text:{1}", "", text, indent = indent).unwrap(); 284 | } 285 | } 286 | NodeData::Comment { .. } => (), 287 | NodeData::Element { name, .. } => { 288 | writeln!(s, "{0:indent$}<{1}>", "", name.local, indent = indent).unwrap(); 289 | for child in &*node.children.borrow() { 290 | Self::add_node_to_string(s, child, indent + 1); 291 | } 292 | writeln!(s, "{0:indent$}", "", name.local, indent = indent).unwrap(); 293 | } 294 | NodeData::ProcessingInstruction { .. } => {} 295 | } 296 | } 297 | 298 | /// A low-quality debug DOM rendering. 299 | pub fn as_dom_string(&self) -> String { 300 | let mut s = String::new(); 301 | Self::add_node_to_string(&mut s, &self.document, 0); 302 | s 303 | } 304 | 305 | /// A low-quality debug DOM rendering of an individual node 306 | pub fn node_as_dom_string(node: &Handle) -> String { 307 | let mut s = String::new(); 308 | Self::add_node_to_string(&mut s, node, 0); 309 | s 310 | } 311 | 312 | /// Serialise the DOM to a writable. 313 | pub fn serialize(&self, writer: impl io::Write) -> io::Result<()> { 314 | html5ever::serialize( 315 | writer, 316 | &SerializableHandle(self.document.clone()), 317 | html5ever::serialize::SerializeOpts { 318 | scripting_enabled: true, 319 | traversal_scope: html5ever::serialize::TraversalScope::IncludeNode, 320 | create_missing_parent: false, 321 | }, 322 | ) 323 | } 324 | 325 | /// Find the node at a child path starting from the root element. At each level, 1 is the 326 | /// first child element, and only elements are counted. 327 | pub fn get_node_by_path(&self, path: &[usize]) -> Option { 328 | let mut node = self.document.clone(); 329 | for idx in path { 330 | node = match node.nth_child(*idx) { 331 | Some(new_node) => new_node, 332 | None => return None, 333 | }; 334 | } 335 | Some(node) 336 | } 337 | } 338 | 339 | impl TreeSink for RcDom { 340 | type Output = Self; 341 | 342 | type ElemName<'a> = ExpandedName<'a>; 343 | fn finish(self) -> Self { 344 | self 345 | } 346 | 347 | type Handle = Handle; 348 | 349 | fn parse_error(&self, msg: Cow<'static, str>) { 350 | self.errors.borrow_mut().push(msg); 351 | } 352 | 353 | fn get_document(&self) -> Handle { 354 | self.document.clone() 355 | } 356 | 357 | fn get_template_contents(&self, target: &Handle) -> Handle { 358 | if let NodeData::Element { 359 | ref template_contents, 360 | .. 361 | } = target.data 362 | { 363 | template_contents 364 | .borrow() 365 | .as_ref() 366 | .expect("not a template element!") 367 | .clone() 368 | } else { 369 | panic!("not a template element!") 370 | } 371 | } 372 | 373 | fn set_quirks_mode(&self, mode: QuirksMode) { 374 | self.quirks_mode.set(mode); 375 | } 376 | 377 | fn same_node(&self, x: &Handle, y: &Handle) -> bool { 378 | Rc::ptr_eq(x, y) 379 | } 380 | 381 | fn elem_name<'a>(&self, target: &'a Handle) -> ExpandedName<'a> { 382 | match target.data { 383 | NodeData::Element { ref name, .. } => name.expanded(), 384 | _ => panic!("not an element!"), 385 | } 386 | } 387 | 388 | fn create_element(&self, name: QualName, attrs: Vec, flags: ElementFlags) -> Handle { 389 | Node::new(NodeData::Element { 390 | name, 391 | attrs: RefCell::new(attrs), 392 | template_contents: RefCell::new(if flags.template { 393 | Some(Node::new(NodeData::Document)) 394 | } else { 395 | None 396 | }), 397 | mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point, 398 | }) 399 | } 400 | 401 | fn create_comment(&self, text: StrTendril) -> Handle { 402 | Node::new(NodeData::Comment { contents: text }) 403 | } 404 | 405 | fn create_pi(&self, target: StrTendril, data: StrTendril) -> Handle { 406 | Node::new(NodeData::ProcessingInstruction { 407 | target, 408 | contents: data, 409 | }) 410 | } 411 | 412 | fn append(&self, parent: &Handle, child: NodeOrText) { 413 | // Append to an existing Text node if we have one. 414 | if let NodeOrText::AppendText(text) = &child { 415 | if let Some(h) = parent.children.borrow().last() { 416 | if append_to_existing_text(h, text) { 417 | return; 418 | } 419 | } 420 | } 421 | 422 | append( 423 | parent, 424 | match child { 425 | NodeOrText::AppendText(text) => Node::new(NodeData::Text { 426 | contents: RefCell::new(text), 427 | }), 428 | NodeOrText::AppendNode(node) => node, 429 | }, 430 | ); 431 | } 432 | 433 | fn append_before_sibling(&self, sibling: &Handle, child: NodeOrText) { 434 | let (parent, i) = get_parent_and_index(sibling) 435 | .expect("append_before_sibling called on node without parent"); 436 | 437 | let child = match (child, i) { 438 | // No previous node. 439 | (NodeOrText::AppendText(text), 0) => Node::new(NodeData::Text { 440 | contents: RefCell::new(text), 441 | }), 442 | 443 | // Look for a text node before the insertion point. 444 | (NodeOrText::AppendText(text), i) => { 445 | let children = parent.children.borrow(); 446 | let prev = &children[i - 1]; 447 | if append_to_existing_text(prev, &text) { 448 | return; 449 | } 450 | Node::new(NodeData::Text { 451 | contents: RefCell::new(text), 452 | }) 453 | } 454 | 455 | // The tree builder promises we won't have a text node after 456 | // the insertion point. 457 | 458 | // Any other kind of node. 459 | (NodeOrText::AppendNode(node), _) => node, 460 | }; 461 | 462 | remove_from_parent(&child); 463 | 464 | child.parent.set(Some(Rc::downgrade(&parent))); 465 | parent.children.borrow_mut().insert(i, child); 466 | } 467 | 468 | fn append_based_on_parent_node( 469 | &self, 470 | element: &Self::Handle, 471 | prev_element: &Self::Handle, 472 | child: NodeOrText, 473 | ) { 474 | let parent = element.parent.take(); 475 | let has_parent = parent.is_some(); 476 | element.parent.set(parent); 477 | 478 | if has_parent { 479 | self.append_before_sibling(element, child); 480 | } else { 481 | self.append(prev_element, child); 482 | } 483 | } 484 | 485 | fn append_doctype_to_document( 486 | &self, 487 | name: StrTendril, 488 | public_id: StrTendril, 489 | system_id: StrTendril, 490 | ) { 491 | append( 492 | &self.document, 493 | Node::new(NodeData::Doctype { 494 | name, 495 | public_id, 496 | system_id, 497 | }), 498 | ); 499 | } 500 | 501 | fn add_attrs_if_missing(&self, target: &Handle, attrs: Vec) { 502 | let mut existing = if let NodeData::Element { ref attrs, .. } = target.data { 503 | attrs.borrow_mut() 504 | } else { 505 | panic!("not an element") 506 | }; 507 | 508 | let existing_names = existing 509 | .iter() 510 | .map(|e| e.name.clone()) 511 | .collect::>(); 512 | existing.extend( 513 | attrs 514 | .into_iter() 515 | .filter(|attr| !existing_names.contains(&attr.name)), 516 | ); 517 | } 518 | 519 | fn remove_from_parent(&self, target: &Handle) { 520 | remove_from_parent(target); 521 | } 522 | 523 | fn reparent_children(&self, node: &Handle, new_parent: &Handle) { 524 | let mut children = node.children.borrow_mut(); 525 | let mut new_children = new_parent.children.borrow_mut(); 526 | for child in children.iter() { 527 | let previous_parent = child.parent.replace(Some(Rc::downgrade(new_parent))); 528 | assert!(Rc::ptr_eq( 529 | node, 530 | &previous_parent.unwrap().upgrade().expect("dangling weak") 531 | )) 532 | } 533 | new_children.extend(mem::take(&mut *children)); 534 | } 535 | 536 | fn is_mathml_annotation_xml_integration_point(&self, target: &Handle) -> bool { 537 | if let NodeData::Element { 538 | mathml_annotation_xml_integration_point, 539 | .. 540 | } = target.data 541 | { 542 | mathml_annotation_xml_integration_point 543 | } else { 544 | panic!("not an element!") 545 | } 546 | } 547 | } 548 | 549 | impl Default for RcDom { 550 | fn default() -> RcDom { 551 | RcDom { 552 | document: Node::new(NodeData::Document), 553 | errors: vec![].into(), 554 | quirks_mode: tree_builder::NoQuirks.into(), 555 | } 556 | } 557 | } 558 | 559 | enum SerializeOp { 560 | Open(Handle), 561 | Close(QualName), 562 | } 563 | 564 | pub struct SerializableHandle(Handle); 565 | 566 | impl From for SerializableHandle { 567 | fn from(h: Handle) -> SerializableHandle { 568 | SerializableHandle(h) 569 | } 570 | } 571 | 572 | impl Serialize for SerializableHandle { 573 | fn serialize(&self, serializer: &mut S, traversal_scope: TraversalScope) -> io::Result<()> 574 | where 575 | S: Serializer, 576 | { 577 | let mut ops = VecDeque::new(); 578 | match traversal_scope { 579 | IncludeNode => ops.push_back(SerializeOp::Open(self.0.clone())), 580 | ChildrenOnly(_) => ops.extend( 581 | self.0 582 | .children 583 | .borrow() 584 | .iter() 585 | .map(|h| SerializeOp::Open(h.clone())), 586 | ), 587 | } 588 | 589 | while let Some(op) = ops.pop_front() { 590 | match op { 591 | SerializeOp::Open(handle) => match handle.data { 592 | NodeData::Element { 593 | ref name, 594 | ref attrs, 595 | .. 596 | } => { 597 | serializer.start_elem( 598 | name.clone(), 599 | attrs.borrow().iter().map(|at| (&at.name, &at.value[..])), 600 | )?; 601 | 602 | ops.reserve(1 + handle.children.borrow().len()); 603 | ops.push_front(SerializeOp::Close(name.clone())); 604 | 605 | for child in handle.children.borrow().iter().rev() { 606 | ops.push_front(SerializeOp::Open(child.clone())); 607 | } 608 | } 609 | 610 | NodeData::Doctype { ref name, .. } => serializer.write_doctype(name)?, 611 | 612 | NodeData::Text { ref contents } => serializer.write_text(&contents.borrow())?, 613 | 614 | NodeData::Comment { ref contents } => serializer.write_comment(contents)?, 615 | 616 | NodeData::ProcessingInstruction { 617 | ref target, 618 | ref contents, 619 | } => serializer.write_processing_instruction(target, contents)?, 620 | 621 | NodeData::Document => panic!("Can't serialize Document node itself"), 622 | }, 623 | 624 | SerializeOp::Close(name) => { 625 | serializer.end_elem(name)?; 626 | } 627 | } 628 | } 629 | 630 | Ok(()) 631 | } 632 | } 633 | -------------------------------------------------------------------------------- /src/render/mod.rs: -------------------------------------------------------------------------------- 1 | //! Module containing the `Renderer` interface for constructing a 2 | //! particular text output. 3 | 4 | use crate::Colour; 5 | use crate::WhiteSpace; 6 | 7 | pub(crate) mod text_renderer; 8 | 9 | pub use text_renderer::{ 10 | PlainDecorator, RichAnnotation, RichDecorator, TaggedLine, TaggedLineElement, TextDecorator, 11 | TrivialDecorator, 12 | }; 13 | 14 | pub(crate) type Result = std::result::Result; 15 | 16 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 17 | pub(crate) struct TooNarrow; 18 | 19 | impl From for crate::Error { 20 | fn from(_: TooNarrow) -> crate::Error { 21 | crate::Error::TooNarrow 22 | } 23 | } 24 | 25 | /// A type which is a backend for HTML to text rendering. 26 | pub(crate) trait Renderer { 27 | /// Add an empty line to the output (ie between blocks). 28 | fn add_empty_line(&mut self) -> Result<()>; 29 | 30 | /// Create a sub-renderer for nested blocks. 31 | fn new_sub_renderer(&self, width: usize) -> Result 32 | where 33 | Self: Sized; 34 | 35 | /// Start a new block. 36 | fn start_block(&mut self) -> Result<()>; 37 | 38 | /// Mark the end of a block. 39 | fn end_block(&mut self); 40 | 41 | /// Start a new line, if necessary (but don't add a new line). 42 | fn new_line(&mut self) -> Result<()>; 43 | 44 | /// Start a new line. 45 | fn new_line_hard(&mut self) -> Result<()>; 46 | 47 | /// Add a horizontal table border. 48 | fn add_horizontal_border(&mut self) -> Result<()>; 49 | 50 | /// Add a horizontal border which is not the full width 51 | fn add_horizontal_border_width( 52 | &mut self, 53 | #[allow(unused_variables)] width: usize, 54 | ) -> Result<()> { 55 | self.add_horizontal_border() 56 | } 57 | 58 | /// Begin a preformatted block. This indicates we are inside a
       element.
       59 |     /// The whitespace/wrapping behaviour is treated separately with `push_ws`.
       60 |     fn push_preformat(&mut self);
       61 | 
       62 |     /// End a preformatted block.
       63 |     fn pop_preformat(&mut self);
       64 | 
       65 |     /// Update the white-space CSS setting.
       66 |     fn push_ws(&mut self, ws: WhiteSpace);
       67 | 
       68 |     /// End the current white-space setting.
       69 |     fn pop_ws(&mut self);
       70 | 
       71 |     /// Add some inline text (which should be wrapped at the
       72 |     /// appropriate width) to the current block.
       73 |     fn add_inline_text(&mut self, text: &str) -> Result<()>;
       74 | 
       75 |     /// Return the current width in character cells
       76 |     fn width(&self) -> usize;
       77 | 
       78 |     /// Add a new block from a sub renderer, and prefix every line by the
       79 |     /// corresponding text from each iteration of prefixes.
       80 |     fn append_subrender<'a, I>(&mut self, other: Self, prefixes: I) -> Result<()>
       81 |     where
       82 |         I: Iterator;
       83 | 
       84 |     /// Append a set of sub renderers joined left-to-right with a vertical line,
       85 |     /// and add a horizontal line below.
       86 |     /// If collapse is true, then merge top/bottom borders of the subrenderer
       87 |     /// with the surrounding one.
       88 |     fn append_columns_with_borders(&mut self, cols: I, collapse: bool) -> Result<()>
       89 |     where
       90 |         I: IntoIterator,
       91 |         Self: Sized;
       92 | 
       93 |     /// Append a set of sub renderers joined vertically with lines, for tables
       94 |     /// which would otherwise be too wide for the screen.
       95 |     fn append_vert_row(&mut self, cols: I) -> Result<()>
       96 |     where
       97 |         I: IntoIterator,
       98 |         Self: Sized;
       99 | 
      100 |     /// Returns true if this renderer has no content.
      101 |     fn empty(&self) -> bool;
      102 | 
      103 |     /// Start a hyperlink
      104 |     /// TODO: return sub-builder or similar to make misuse
      105 |     /// of start/link harder?
      106 |     fn start_link(&mut self, target: &str) -> Result<()>;
      107 | 
      108 |     /// Finish a hyperlink started earlier.
      109 |     fn end_link(&mut self) -> Result<()>;
      110 | 
      111 |     /// Start an emphasised region
      112 |     fn start_emphasis(&mut self) -> Result<()>;
      113 | 
      114 |     /// Finish emphasised text started earlier.
      115 |     fn end_emphasis(&mut self) -> Result<()>;
      116 | 
      117 |     /// Start a strong region
      118 |     fn start_strong(&mut self) -> Result<()>;
      119 | 
      120 |     /// Finish strong text started earlier.
      121 |     fn end_strong(&mut self) -> Result<()>;
      122 | 
      123 |     /// Start a strikeout region
      124 |     fn start_strikeout(&mut self) -> Result<()>;
      125 | 
      126 |     /// Finish strikeout text started earlier.
      127 |     fn end_strikeout(&mut self) -> Result<()>;
      128 | 
      129 |     /// Start a code region
      130 |     fn start_code(&mut self) -> Result<()>;
      131 | 
      132 |     /// End a code region
      133 |     fn end_code(&mut self) -> Result<()>;
      134 | 
      135 |     /// Add an image
      136 |     fn add_image(&mut self, src: &str, title: &str) -> Result<()>;
      137 | 
      138 |     /// Get prefix string of header in specific level.
      139 |     fn header_prefix(&mut self, level: usize) -> String;
      140 | 
      141 |     /// Get prefix string of quoted block.
      142 |     fn quote_prefix(&mut self) -> String;
      143 | 
      144 |     /// Get prefix string of unordered list item.
      145 |     fn unordered_item_prefix(&mut self) -> String;
      146 | 
      147 |     /// Get prefix string of ith ordered list item.
      148 |     fn ordered_item_prefix(&mut self, i: i64) -> String;
      149 | 
      150 |     /// Record the start of a named HTML fragment
      151 |     fn record_frag_start(&mut self, fragname: &str);
      152 | 
      153 |     #[allow(unused)]
      154 |     /// Push a new foreground colour
      155 |     fn push_colour(&mut self, colour: Colour);
      156 | 
      157 |     #[allow(unused)]
      158 |     /// Pop the last foreground colour
      159 |     fn pop_colour(&mut self);
      160 | 
      161 |     #[allow(unused)]
      162 |     /// Push a new background colour
      163 |     fn push_bgcolour(&mut self, colour: Colour);
      164 | 
      165 |     #[allow(unused)]
      166 |     /// Pop the last background colour
      167 |     fn pop_bgcolour(&mut self);
      168 | 
      169 |     /// Start a section of superscript text.
      170 |     fn start_superscript(&mut self) -> Result<()>;
      171 | 
      172 |     /// End a section of superscript text.
      173 |     fn end_superscript(&mut self) -> Result<()>;
      174 | }
      175 | 
      
      
      --------------------------------------------------------------------------------