├── .gitignore ├── pages ├── .gitignore ├── _config.yml ├── _includes │ └── head.html ├── assets │ ├── demo.css │ └── demo-main.js └── index.markdown ├── html2text-web-demo ├── .gitignore ├── .cargo │ └── config.toml ├── Trunk.toml ├── Cargo.toml ├── index.html └── src │ └── lib.rs ├── src ├── css │ └── types.rs ├── ansi_colours.rs ├── macros.rs ├── render │ └── mod.rs ├── markup5ever_rcdom.rs └── css.rs ├── .github ├── dependabot.yml └── workflows │ ├── ci.yml │ └── jekyll-gh-pages.yml ├── rust.yml ├── LICENSE ├── Cargo.toml ├── .circleci └── config.yml ├── benches └── tables.rs ├── README.md ├── examples ├── html2text.rs └── html2term.rs ├── CHANGELOG.md └── Cargo.lock /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | -------------------------------------------------------------------------------- /pages/.gitignore: -------------------------------------------------------------------------------- 1 | _site 2 | -------------------------------------------------------------------------------- /html2text-web-demo/.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | -------------------------------------------------------------------------------- /html2text-web-demo/.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [build] 2 | target = "wasm32-wasip2" 3 | -------------------------------------------------------------------------------- /src/css/types.rs: -------------------------------------------------------------------------------- 1 | #[derive(Copy, Clone, PartialEq, Eq, Debug)] 2 | pub(crate) enum Importance { 3 | Default, 4 | Important, 5 | } 6 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "cargo" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | day: "friday" 8 | rebase-strategy: "disabled" 9 | -------------------------------------------------------------------------------- /pages/_config.yml: -------------------------------------------------------------------------------- 1 | lsi: false 2 | safe: true 3 | source: . 4 | incremental: false 5 | baseurl: "/rust-html2text" 6 | gist: 7 | noscript: false 8 | 9 | theme: minima 10 | 11 | github_username: jugglerchris 12 | -------------------------------------------------------------------------------- /html2text-web-demo/Trunk.toml: -------------------------------------------------------------------------------- 1 | trunk-version = "^0.21.13" 2 | 3 | [build] 4 | public_url = "/rust-html2text/" 5 | release = true 6 | filehash = false 7 | inject_scripts = true #false 8 | offline = false #true 9 | frozen = true 10 | minify = "on_release" 11 | -------------------------------------------------------------------------------- /html2text-web-demo/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "html2text-web-demo" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | html2text = { path = "..", features = ["css"] } 8 | ratzilla = "0.0.6" 9 | wasm-bindgen = "0.2.100" 10 | 11 | [lib] 12 | crate-type = ["cdylib", "rlib"] 13 | -------------------------------------------------------------------------------- /rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Build 20 | run: cargo build --verbose 21 | - name: Run tests 22 | run: cargo test --verbose 23 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - main 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | test-action: 14 | name: Check semver compatibility 15 | runs-on: ubuntu-latest 16 | steps: 17 | - name: Checkout sources 18 | uses: actions/checkout@v2 19 | 20 | - name: Install stable toolchain 21 | uses: actions-rs/toolchain@v1 22 | with: 23 | toolchain: stable 24 | profile: minimal 25 | override: true 26 | 27 | - name: Check semver 28 | uses: obi1kenobi/cargo-semver-checks-action@v2 29 | with: 30 | version-tag-prefix: '' 31 | -------------------------------------------------------------------------------- /pages/_includes/head.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {%- seo -%} 6 | 7 | {%- feed_meta -%} 8 | {%- if jekyll.environment == 'production' and site.google_analytics -%} 9 | {%- include google-analytics.html -%} 10 | {%- endif -%} 11 | {%- if page.h2t_wasm -%} 12 | 13 | 14 | 15 | {%- endif -%} 16 | {%- if page.h2t_js -%} 17 | 19 | {%- endif -%} 20 | 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Chris Emerson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /pages/assets/demo.css: -------------------------------------------------------------------------------- 1 | #lib { 2 | background-color: black; 3 | height: 30em; 4 | overflow: scroll; 5 | } 6 | #input_html { 7 | height: 300px; 8 | width: 95%; 9 | overflow: scroll; 10 | } 11 | 12 | #lib pre { 13 | margin: 0; 14 | padding: 0; 15 | overflow: hidden; 16 | background-color: black; 17 | border: 0px; 18 | } 19 | 20 | .warning { 21 | color: red; 22 | } 23 | .warning::before { 24 | content: "⚠️"; 25 | } 26 | 27 | div.wrapper { 28 | max-width: 100%; 29 | } 30 | @media screen and (min-width: 1000px) { 31 | #h2tmain { 32 | display: grid; 33 | gap: 10px; 34 | grid-template-columns: 1fr 1fr; 35 | } 36 | #lib_container { 37 | grid-column: 1; 38 | min-width: 45%; 39 | } 40 | #input_container { 41 | grid-column: 1; 42 | grid-row-start: 2; 43 | min-width: 45%; 44 | } 45 | #configtable { 46 | grid-column: 2; 47 | grid-row-start: 1; 48 | grid-row-end: 3; 49 | min-width: 45%; 50 | } 51 | #rust-code-pre { 52 | grid-column: 2; 53 | min-width: 45%; 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /src/ansi_colours.rs: -------------------------------------------------------------------------------- 1 | //! Convenience helper for producing coloured terminal output. 2 | //! 3 | //! This optional helper applies terminal colours (or other effects which 4 | //! can be achieved using inline characters sent to the terminal such as 5 | //! underlining in some terminals). 6 | 7 | use crate::RichAnnotation; 8 | use std::io; 9 | 10 | /// Reads HTML from `input`, and returns text wrapped to `width` columns. 11 | /// 12 | /// The text is returned as a `Vec>`; the annotations are vectors 13 | /// of `RichAnnotation`. The "outer" annotation comes first in the `Vec`. 14 | /// 15 | /// The function `colour_map` is given a slice of `RichAnnotation` and should 16 | /// return a pair of static strings which should be inserted before/after a text 17 | /// span with that annotation; for example a string which sets text colour 18 | /// and a string which sets the colour back to the default. 19 | pub fn from_read_coloured( 20 | input: R, 21 | width: usize, 22 | colour_map: FMap, 23 | ) -> Result 24 | where 25 | R: io::Read, 26 | FMap: Fn(&[RichAnnotation], &str) -> String, 27 | { 28 | super::config::rich().coloured(input, width, colour_map) 29 | } 30 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "html2text" 3 | version = "0.16.5" 4 | authors = ["Chris Emerson "] 5 | description = "Render HTML as plain text." 6 | repository = "https://github.com/jugglerchris/rust-html2text/" 7 | readme = "README.md" 8 | documentation = "https://docs.rs/html2text/" 9 | edition = "2021" 10 | rust-version = "1.85" 11 | categories = ["text-processing"] 12 | 13 | keywords = ["html", "text"] 14 | license = "MIT" 15 | 16 | [dependencies] 17 | html5ever = "0.36.1" 18 | tendril = "0.4" 19 | unicode-width = "0.2" 20 | backtrace = { version = "0.3", optional=true } 21 | thiserror = "2.0.0" 22 | log = { version = "0.4.20", optional = true } 23 | nom = { version = "8.0.0", optional = true } 24 | 25 | [features] 26 | html_trace = ["dep:log"] 27 | html_trace_bt = ["html_trace", "dep:backtrace"] 28 | default = [] 29 | css = [ "dep:nom" ] 30 | css_ext = ["css"] 31 | 32 | [[example]] 33 | name = "html2term" 34 | path = "examples/html2term.rs" 35 | 36 | [[example]] 37 | name = "html2text" 38 | path = "examples/html2text.rs" 39 | 40 | [dev-dependencies] 41 | env_logger = "0.11.6" 42 | argparse = "0.2.2" 43 | log = "0.4.20" 44 | syntect = "5.2.0" 45 | 46 | [target.'cfg(unix)'.dev-dependencies] 47 | termion = "4.0" 48 | -------------------------------------------------------------------------------- /.github/workflows/jekyll-gh-pages.yml: -------------------------------------------------------------------------------- 1 | # Sample workflow for building and deploying a Jekyll site to GitHub Pages 2 | name: Build and deploy demo site 3 | 4 | on: 5 | # Allows you to run this workflow manually from the Actions tab 6 | workflow_dispatch: 7 | 8 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 9 | permissions: 10 | contents: read 11 | pages: write 12 | id-token: write 13 | 14 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. 15 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. 16 | concurrency: 17 | group: "pages" 18 | cancel-in-progress: false 19 | 20 | jobs: 21 | # Build job: 22 | build: 23 | runs-on: ubuntu-latest 24 | steps: 25 | - name: Checkout 26 | uses: actions/checkout@v4 27 | - name: Setup Pages 28 | uses: actions/configure-pages@v5 29 | - name: Install trunk 30 | run: cargo install trunk --version=0.21.13 31 | - name: Install WASM rust target 32 | run: rustup target add wasm32-unknown-unknown 33 | - name: Build WASM module 34 | run: trunk build 35 | working-directory: ./html2text-web-demo 36 | - name: Copy WASM assets 37 | run: cp html2text-web-demo/dist/html2text-web-demo{.js,_bg.wasm} ./pages/assets/ 38 | - name: Build with Jekyll 39 | uses: actions/jekyll-build-pages@v1 40 | with: 41 | source: ./pages 42 | destination: ./_site 43 | - name: Upload artifact 44 | uses: actions/upload-pages-artifact@v3 45 | 46 | # Deployment job 47 | deploy: 48 | environment: 49 | name: github-pages 50 | url: ${{ steps.deployment.outputs.page_url }} 51 | runs-on: ubuntu-latest 52 | needs: build 53 | steps: 54 | - name: Deploy to GitHub Pages 55 | id: deployment 56 | uses: actions/deploy-pages@v4 57 | 58 | -------------------------------------------------------------------------------- /src/macros.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "html_trace_bt")] 2 | extern crate backtrace; 3 | 4 | /* This is to work around a false positive for the clippy warning 5 | * `match_on_same_arms`. 6 | * See https://github.com/Manishearth/rust-clippy/issues/1390 7 | */ 8 | #[cfg(not(feature = "html_trace"))] 9 | #[inline(always)] 10 | pub fn nop() {} 11 | 12 | #[cfg(feature = "html_trace")] 13 | #[macro_export] 14 | #[doc(hidden)] 15 | macro_rules! html_trace { 16 | ($fmt:expr) => { 17 | #[cfg(feature = "html_trace_bt")] 18 | { 19 | let bt = ::backtrace::Backtrace::new(); 20 | log::info!( concat!($fmt, " at {:?}"), bt ); 21 | } 22 | #[cfg(not(feature = "html_trace_bt"))] 23 | { 24 | log::info!($fmt); 25 | } 26 | }; 27 | ($fmt:expr, $( $args:expr ),*) => { 28 | #[cfg(feature = "html_trace_bt")] 29 | { 30 | let bt = ::backtrace::Backtrace::new(); 31 | log::info!( concat!($fmt, " at {:?}"), $( $args ),* , bt ); 32 | } 33 | #[cfg(not(feature = "html_trace_bt"))] 34 | { 35 | log::info!($fmt, $( $args ),*); 36 | } 37 | }; 38 | } 39 | #[cfg(not(feature = "html_trace"))] 40 | #[macro_export] 41 | #[doc(hidden)] 42 | macro_rules! html_trace { 43 | ($fmt:expr) => { 44 | $crate::macros::nop(); 45 | }; 46 | ($fmt:expr, $( $args:expr ),*) => { 47 | $crate::macros::nop(); 48 | }; 49 | } 50 | 51 | #[cfg(feature = "html_trace")] 52 | #[macro_export] 53 | #[doc(hidden)] 54 | macro_rules! html_trace_quiet { 55 | ($fmt:expr) => { 56 | log::trace!( $fmt ); 57 | }; 58 | ($fmt:expr, $( $args:expr ),*) => { 59 | log::trace!( $fmt, $( $args ),* ); 60 | }; 61 | } 62 | 63 | #[cfg(not(feature = "html_trace"))] 64 | #[macro_export] 65 | #[doc(hidden)] 66 | macro_rules! html_trace_quiet { 67 | ($fmt:expr) => { 68 | $crate::macros::nop(); 69 | }; 70 | ($fmt:expr, $( $args:expr ),*) => { 71 | $crate::macros::nop(); 72 | }; 73 | } 74 | -------------------------------------------------------------------------------- /html2text-web-demo/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 19 | 38 | 39 | 40 |

Html2text demo

41 | CSS 42 |
43 | Colour 44 |
45 | 66 |
67 | 68 |
69 | 70 | 71 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | 3 | orbs: 4 | win: circleci/windows@2.2.0 5 | 6 | jobs: 7 | build-stable: 8 | docker: 9 | - image: cimg/rust:1.85.1 10 | steps: 11 | - checkout 12 | - run: cargo --version 13 | - run: cargo build 14 | - run: cargo test 15 | - run: 16 | name: Install tools 17 | command: | 18 | rustup component add rustfmt clippy 19 | - run: 20 | name: Check formatting 21 | command: | 22 | cargo fmt --all -- --check --color=auto 23 | - run: 24 | name: Clippy 25 | command: | 26 | cargo clippy --all-features 27 | build-css: 28 | docker: 29 | - image: cimg/rust:1.90 30 | steps: 31 | - checkout 32 | - run: cargo --version 33 | - run: cargo build --features=css 34 | - run: cargo test --features=css 35 | build-1-85: 36 | docker: 37 | - image: cimg/rust:1.85 38 | steps: 39 | - checkout 40 | - run: cargo --version 41 | - run: cargo build --features=css 42 | - run: cargo test --features=css 43 | build-windows: 44 | executor: 45 | name: win/default 46 | size: medium 47 | shell: bash.exe 48 | environment: 49 | PATHk 50 | steps: 51 | - checkout 52 | - run: 53 | name: Install Rust 54 | command: | 55 | curl https://static.rust-lang.org/rustup/dist/x86_64-pc-windows-msvc/rustup-init.exe --output rustup-init.exe 56 | ./rustup-init.exe -y 57 | - run: 58 | name: Update PATH and cargo config 59 | command: | 60 | echo "[net]" >> $USERPROFILE/.cargo/config 61 | echo "git-fetch-with-cli = true" >> $USERPROFILE/.cargo/config 62 | echo 'export PATH=$USERPROFILE/.cargo/bin:$PATH' >> $BASH_ENV 63 | - run: 64 | name: Build 65 | command: | 66 | cargo build 67 | - run: 68 | name: Tests 69 | command: | 70 | cargo test 71 | 72 | workflows: 73 | version: 2 74 | build: 75 | jobs: 76 | - "build-stable" 77 | - "build-css" 78 | - "build-1-85" 79 | - "build-windows" 80 | -------------------------------------------------------------------------------- /benches/tables.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | extern crate html2text; 3 | extern crate test; 4 | 5 | use ::test::Bencher; 6 | 7 | use html2text::from_read; 8 | 9 | fn make_html(content: &str) -> String { 10 | String::from("") + content + "" 11 | } 12 | 13 | fn make_tab(cell: &str, rows: usize, cols: usize) -> String { 14 | let mut result = String::from(""); 15 | for _ in 0..rows { 16 | result.push_str(""); 17 | for _ in 0..cols { 18 | result.push_str(""); 21 | } 22 | result.push_str(""); 23 | } 24 | result 25 | } 26 | 27 | #[bench] 28 | fn bench_empty(b: &mut Bencher) { 29 | b.iter(|| from_read(make_html("").as_bytes(), 80)); 30 | } 31 | 32 | #[bench] 33 | fn bench_tab_1_1(b: &mut Bencher) { 34 | b.iter(|| from_read(make_html(&make_tab("cell", 1, 1)).as_bytes(), 80)); 35 | } 36 | #[bench] 37 | fn bench_tab_2_2(b: &mut Bencher) { 38 | b.iter(|| from_read(make_html(&make_tab("cell", 2, 2)).as_bytes(), 80)); 39 | } 40 | #[bench] 41 | fn bench_tab_3_3(b: &mut Bencher) { 42 | b.iter(|| from_read(make_html(&make_tab("cell", 3, 3)).as_bytes(), 80)); 43 | } 44 | #[bench] 45 | fn bench_tab_4_4(b: &mut Bencher) { 46 | b.iter(|| from_read(make_html(&make_tab("cell", 4, 4)).as_bytes(), 80)); 47 | } 48 | #[bench] 49 | fn bench_tab_5_5(b: &mut Bencher) { 50 | b.iter(|| from_read(make_html(&make_tab("cell", 5, 5)).as_bytes(), 80)); 51 | } 52 | #[bench] 53 | fn bench_tab_6_6(b: &mut Bencher) { 54 | b.iter(|| from_read(make_html(&make_tab("cell", 6, 6)).as_bytes(), 80)); 55 | } 56 | // Try a table with `depth` nested tables each with `rows` rows and `cols` columns. 57 | fn bench_tab_depth(b: &mut Bencher, content: &str, depth: usize, rows: usize, cols: usize) { 58 | let mut t = String::from(content); 59 | for _ in 0..depth { 60 | t = make_tab(&t, rows, cols); 61 | } 62 | let html = make_html(&t); 63 | b.iter(|| from_read(html.as_bytes(), 80)); 64 | } 65 | #[bench] 66 | fn bench_tab_2_1_depth_2(b: &mut Bencher) { 67 | bench_tab_depth(b, "cell", 2, 2, 1); 68 | } 69 | #[bench] 70 | fn bench_tab_3_1_depth_2(b: &mut Bencher) { 71 | bench_tab_depth(b, "cell", 2, 3, 1); 72 | } 73 | #[bench] 74 | fn bench_tab_4_1_depth_2(b: &mut Bencher) { 75 | bench_tab_depth(b, "cell", 2, 4, 1); 76 | } 77 | #[bench] 78 | fn bench_tab_1_2_depth_2(b: &mut Bencher) { 79 | bench_tab_depth(b, "cell", 2, 1, 2); 80 | } 81 | #[bench] 82 | fn bench_tab_1_3_depth_2(b: &mut Bencher) { 83 | bench_tab_depth(b, "cell", 2, 1, 3); 84 | } 85 | #[bench] 86 | fn bench_tab_1_4_depth_2(b: &mut Bencher) { 87 | bench_tab_depth(b, "cell", 2, 1, 4); 88 | } 89 | #[bench] 90 | fn bench_tab_2_depth_2(b: &mut Bencher) { 91 | bench_tab_depth(b, "cell", 2, 2, 2); 92 | } 93 | /* 94 | #[bench] 95 | fn bench_tab_2_depth_3(b: &mut Bencher) { 96 | bench_tab_depth(b, "cell", 3, 2, 2); 97 | } 98 | #[bench] 99 | fn bench_tab_2_depth_4(b: &mut Bencher) { 100 | bench_tab_depth(b, "cell", 4, 2, 2); 101 | } 102 | #[bench] 103 | fn bench_tab_2_depth_5(b: &mut Bencher) { 104 | bench_tab_depth(b, "cell", 5, 2, 2); 105 | } 106 | */ 107 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![jugglerchris](https://circleci.com/gh/jugglerchris/rust-html2text.svg?branch=master&style=svg)](https://app.circleci.com/pipelines/github/jugglerchris/rust-html2text?filter=all) 2 | 3 | # html2text 4 | 5 | html2text is a [Rust](http://www.rust-lang.org/) crate which converts HTML to 6 | plain text (as in Rust `String`) or text spans with annotations like colours, 7 | e.g. optionally using CSS. See [the online demo](https://jugglerchris.github.io/rust-html2text/) 8 | for examples of the output. 9 | 10 | It makes use of the [Servo project](https://github.com/servo/servo)'s HTML 11 | parser, [html5ever](https://github.com/servo/html5ever/), using the DOM to 12 | generate text (which can optionally include annotations for some features such 13 | as hyperlinks). 14 | 15 | The project aims to do a reasonable job of rendering reasonable HTML in a 16 | terminal or other places where HTML needs to be converted to text (for 17 | example the text/plain fallback in HTML e-mails). 18 | 19 | With features (see below) some CSS/colour support is available. 20 | 21 | ## Examples 22 | 23 | The simple functions like `from_read()` return formatted text (in various 24 | formats including plain text). 25 | 26 | ```rust 27 | use html2text::from_read; 28 | let html = b" 29 |
    30 |
  • Item one
  • 31 |
  • Item two
  • 32 |
  • Item three
  • 33 |
"; 34 | assert_eq!(from_read(&html[..], 20).unwrap(), 35 | "\ 36 | * Item one 37 | * Item two 38 | * Item three 39 | "); 40 | ``` 41 | 42 | A lower level API gives a bit more control. This give the same result (except for 43 | returning errors as Result instead of panicking): 44 | 45 | ```rust 46 | use html2text::config; 47 | 48 | let html = b" 49 |
    50 |
  • Item one
  • 51 |
  • Item two
  • 52 |
  • Item three
  • 53 |
"; 54 | 55 | assert_eq!( 56 | config::plain() 57 | .string_from_read(&html[..], 20) 58 | .unwrap(), 59 | "\ 60 | * Item one 61 | * Item two 62 | * Item three 63 | "); 64 | ``` 65 | 66 | A couple of simple demonstration programs are included as examples: 67 | 68 | ### html2text 69 | 70 | The simplest example uses `from_read` to convert HTML on stdin into plain 71 | text: 72 | 73 | ```sh 74 | $ cargo run --example html2text < foo.html 75 | [...] 76 | ``` 77 | 78 | ### html2term 79 | 80 | A very simple example of using the rich interface (`from_read_rich`) for a 81 | slightly interactive console HTML viewer is provided as `html2term`. 82 | 83 | ```sh 84 | $ cargo run --example html2term foo.html 85 | [...] 86 | ``` 87 | 88 | Note that this example takes the HTML file as a parameter so that it can 89 | read keys from stdin. 90 | 91 | ## Cargo Features 92 | 93 | |Feature| Description| 94 | |-------|------------| 95 | |css | Limited handling of CSS, adding Coloured nodes to the render tree. | 96 | |html\_trace| Add verbose internal logging (not recommended) | 97 | |html\_trace\_bt| Add backtraces to the verbose internal logging | 98 | 99 | ### CSS support 100 | 101 | When the `css` feature is enabled, some simple CSS handling is available. 102 | 103 | Style rules are taken from: 104 | * If `Config::use_doc_css()` is called, then style from the document: 105 | * ` 48 | 49 |

Hi there

50 |

This is some simple text with a link to github

51 |
    52 |
  1. Item one
  2. 53 |
  3. Item two
  4. 54 |
  5. Item three
  6. 55 |
56 |
"); 19 | result.push_str(cell); 20 | result.push_str("
57 | 58 | 59 | 60 |
Heading 1Heading 2Heading 3
Data 1Data 2Data 3
Hello there
61 | 62 | 63 | 64 |
65 | 66 | ## Configuration 67 | 68 | The following are the configuration settings (accessible via [`html2text::config`](https://docs.rs/html2text/latest/html2text/config/struct.Config.html)). 69 | 70 | | Use Rich output | The [`rich`](https://docs.rs/html2text/latest/html2text/config/fn.rich.html) mode returns spans with attributes (like hyperlinks, emphasis, or colours). When disabled ([`plain`](https://docs.rs/html2text/latest/html2text/config/fn.plain.html)), the output is a plain `String` (possibly with formatting depending on other settings, e.g. table borders or `**markdown-style**` characters added). Rich output adds extra information (annotations) to allow, for example, using terminal colours and other features for a nicer TUI. | 71 | | use_doc_css | Parse CSS from the HTML document (css) | 72 | | User CSS | Add user stylesheet rules (css) | 73 | | Agent CSS | Add browser stylesheet rules (css) | 74 | | Pad block width | Pad blocks to the width with spaces | 75 | | Text wrap width | Wrap text to this width even if overall width is wider | 76 | | Allow width overflow | Allow text to be too wide in extreme cases instead of returning an error | 77 | | Minimum wrap width | Set the minimum number of columns to use for text blocks. | 78 | | Raw mode | Render contents of tables as if they were just text. Implies `no_table_borders` | 79 | | Don't render table borders | Tables are shown without borders | 80 | | Don't wrap URLs at the end | Some terminals handle long URLs better if not pre-wrapped | 81 | | Use Unicode combining characters for strikeout | This allows crossed out text without terminal codes, but some environments don't render them correctly (e.g. offset). | 82 | | Add markdown-like decoration | Add characters, e.g. `*` around `` text even with plain decorators. | 83 | | URL footnotes | Add numbered list of URLs at the end of the output | 84 | | | Configure how images with no `alt` text are handled | 85 | 86 |
87 | 88 |
89 | 90 | ## Rust API configuration 91 | 92 | The code below shows how to use the currently selected settings in the Rust API. 93 | 94 |
95 |
96 | 97 | 106 | -------------------------------------------------------------------------------- /src/render/mod.rs: -------------------------------------------------------------------------------- 1 | //! Module containing the `Renderer` interface for constructing a 2 | //! particular text output. 3 | 4 | use crate::Colour; 5 | use crate::WhiteSpace; 6 | 7 | pub(crate) mod text_renderer; 8 | 9 | pub use text_renderer::{ 10 | PlainDecorator, RichAnnotation, RichDecorator, TaggedLine, TaggedLineElement, TextDecorator, 11 | TrivialDecorator, 12 | }; 13 | 14 | pub(crate) type Result = std::result::Result; 15 | 16 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 17 | pub(crate) struct TooNarrow; 18 | 19 | impl From for crate::Error { 20 | fn from(_: TooNarrow) -> crate::Error { 21 | crate::Error::TooNarrow 22 | } 23 | } 24 | 25 | /// A type which is a backend for HTML to text rendering. 26 | pub(crate) trait Renderer { 27 | /// Add an empty line to the output (ie between blocks). 28 | fn add_empty_line(&mut self) -> Result<()>; 29 | 30 | /// Create a sub-renderer for nested blocks. 31 | fn new_sub_renderer(&self, width: usize) -> Result 32 | where 33 | Self: Sized; 34 | 35 | /// Start a new block. 36 | fn start_block(&mut self) -> Result<()>; 37 | 38 | /// Start a new table. 39 | fn start_table(&mut self) -> Result<()>; 40 | 41 | /// Mark the end of a block. 42 | fn end_block(&mut self); 43 | 44 | /// Start a new line, if necessary (but don't add a new line). 45 | fn new_line(&mut self) -> Result<()>; 46 | 47 | /// Start a new line. 48 | fn new_line_hard(&mut self) -> Result<()>; 49 | 50 | /// Add a horizontal table border. 51 | fn add_horizontal_border(&mut self) -> Result<()>; 52 | 53 | /// Add a horizontal border which is not the full width 54 | fn add_horizontal_border_width( 55 | &mut self, 56 | #[allow(unused_variables)] width: usize, 57 | ) -> Result<()> { 58 | self.add_horizontal_border() 59 | } 60 | 61 | /// Begin a preformatted block. This indicates we are inside a
 element.
 62 |     /// The whitespace/wrapping behaviour is treated separately with `push_ws`.
 63 |     fn push_preformat(&mut self);
 64 | 
 65 |     /// End a preformatted block.
 66 |     fn pop_preformat(&mut self);
 67 | 
 68 |     /// Update the white-space CSS setting.
 69 |     fn push_ws(&mut self, ws: WhiteSpace);
 70 | 
 71 |     /// End the current white-space setting.
 72 |     fn pop_ws(&mut self);
 73 | 
 74 |     /// Add some inline text (which should be wrapped at the
 75 |     /// appropriate width) to the current block.
 76 |     fn add_inline_text(&mut self, text: &str) -> Result<()>;
 77 | 
 78 |     /// Return the current width in character cells
 79 |     fn width(&self) -> usize;
 80 | 
 81 |     /// Add a new block from a sub renderer, and prefix every line by the
 82 |     /// corresponding text from each iteration of prefixes.
 83 |     fn append_subrender<'a, I>(&mut self, other: Self, prefixes: I) -> Result<()>
 84 |     where
 85 |         I: Iterator;
 86 | 
 87 |     /// Append a set of sub renderers joined left-to-right with a vertical line,
 88 |     /// and add a horizontal line below.
 89 |     /// If collapse is true, then merge top/bottom borders of the subrenderer
 90 |     /// with the surrounding one.
 91 |     fn append_columns_with_borders(&mut self, cols: I, collapse: bool) -> Result<()>
 92 |     where
 93 |         I: IntoIterator,
 94 |         Self: Sized;
 95 | 
 96 |     /// Append a set of sub renderers joined vertically with lines, for tables
 97 |     /// which would otherwise be too wide for the screen.
 98 |     fn append_vert_row(&mut self, cols: I) -> Result<()>
 99 |     where
100 |         I: IntoIterator,
101 |         Self: Sized;
102 | 
103 |     /// Returns true if this renderer has no content.
104 |     fn empty(&self) -> bool;
105 | 
106 |     /// Start a hyperlink
107 |     /// TODO: return sub-builder or similar to make misuse
108 |     /// of start/link harder?
109 |     fn start_link(&mut self, target: &str) -> Result<()>;
110 | 
111 |     /// Finish a hyperlink started earlier.
112 |     fn end_link(&mut self) -> Result<()>;
113 | 
114 |     /// Start an emphasised region
115 |     fn start_emphasis(&mut self) -> Result<()>;
116 | 
117 |     /// Finish emphasised text started earlier.
118 |     fn end_emphasis(&mut self) -> Result<()>;
119 | 
120 |     /// Start a strong region
121 |     fn start_strong(&mut self) -> Result<()>;
122 | 
123 |     /// Finish strong text started earlier.
124 |     fn end_strong(&mut self) -> Result<()>;
125 | 
126 |     /// Start a strikeout region
127 |     fn start_strikeout(&mut self) -> Result<()>;
128 | 
129 |     /// Finish strikeout text started earlier.
130 |     fn end_strikeout(&mut self) -> Result<()>;
131 | 
132 |     /// Start a code region
133 |     fn start_code(&mut self) -> Result<()>;
134 | 
135 |     /// End a code region
136 |     fn end_code(&mut self) -> Result<()>;
137 | 
138 |     /// Add an image
139 |     fn add_image(&mut self, src: &str, title: &str) -> Result<()>;
140 | 
141 |     /// Get prefix string of header in specific level.
142 |     fn header_prefix(&mut self, level: usize) -> String;
143 | 
144 |     /// Get prefix string of quoted block.
145 |     fn quote_prefix(&mut self) -> String;
146 | 
147 |     /// Get prefix string of unordered list item.
148 |     fn unordered_item_prefix(&mut self) -> String;
149 | 
150 |     /// Get prefix string of ith ordered list item.
151 |     fn ordered_item_prefix(&mut self, i: i64) -> String;
152 | 
153 |     /// Record the start of a named HTML fragment
154 |     fn record_frag_start(&mut self, fragname: &str);
155 | 
156 |     #[allow(unused)]
157 |     /// Push a new foreground colour
158 |     fn push_colour(&mut self, colour: Colour);
159 | 
160 |     #[allow(unused)]
161 |     /// Pop the last foreground colour
162 |     fn pop_colour(&mut self);
163 | 
164 |     #[allow(unused)]
165 |     /// Push a new background colour
166 |     fn push_bgcolour(&mut self, colour: Colour);
167 | 
168 |     #[allow(unused)]
169 |     /// Pop the last background colour
170 |     fn pop_bgcolour(&mut self);
171 | 
172 |     /// Start a section of superscript text.
173 |     fn start_superscript(&mut self) -> Result<()>;
174 | 
175 |     /// End a section of superscript text.
176 |     fn end_superscript(&mut self) -> Result<()>;
177 | }
178 | 


--------------------------------------------------------------------------------
/html2text-web-demo/src/lib.rs:
--------------------------------------------------------------------------------
  1 | use wasm_bindgen::prelude::wasm_bindgen;
  2 | 
  3 | use ratzilla::ratatui::{
  4 |     style::{Color, Style, Stylize},
  5 |     text::{Text, Line, Span},
  6 |     widgets::{Block, Paragraph},
  7 |     Frame,
  8 |     Terminal,
  9 | };
 10 | 
 11 | use html2text::{
 12 |     config::ImageRenderMode,
 13 |     render::TextDecorator,
 14 | };
 15 | use ratzilla::DomBackend;
 16 | 
 17 | #[derive(Default)]
 18 | #[wasm_bindgen]
 19 | pub struct Config {
 20 |     css: bool,
 21 |     colour: bool,
 22 |     user_css: Option,
 23 |     agent_css: Option,
 24 |     pad_block_width: bool,
 25 |     wrap_width: Option,
 26 |     allow_overflow: bool,
 27 |     min_wrap_width: Option,
 28 |     raw_mode: bool,
 29 |     no_borders: bool,
 30 |     no_link_wrap: bool,
 31 |     unicode_so: bool,
 32 |     do_decorate: bool,
 33 |     link_footnotes: bool,
 34 |     image_mode: ImageRenderMode,
 35 | }
 36 | 
 37 | #[wasm_bindgen]
 38 | impl Config {
 39 |     pub fn new() -> Self {
 40 |         Config {
 41 |             ..Default::default()
 42 |         }
 43 |     }
 44 | 
 45 |     pub fn use_colour(&mut self) {
 46 |         self.colour = true;
 47 |     }
 48 | 
 49 |     pub fn use_css(&mut self) {
 50 |         self.css = true;
 51 |     }
 52 | 
 53 |     pub fn add_user_css(&mut self, css: String) {
 54 |         if css.trim().is_empty() {
 55 |             self.user_css = None;
 56 |         } else {
 57 |             self.user_css = Some(css);
 58 |         }
 59 |     }
 60 | 
 61 |     pub fn add_agent_css(&mut self, css: String) {
 62 |         if css.trim().is_empty() {
 63 |             self.agent_css = None;
 64 |         } else {
 65 |             self.agent_css = Some(css);
 66 |         }
 67 |     }
 68 | 
 69 |     pub fn pad_block_width(&mut self) {
 70 |         self.pad_block_width = true;
 71 |     }
 72 | 
 73 |     pub fn max_wrap_width(&mut self, width: usize) {
 74 |         self.wrap_width = Some(width);
 75 |     }
 76 | 
 77 |     pub fn allow_overflow(&mut self) {
 78 |         self.allow_overflow = true;
 79 |     }
 80 | 
 81 |     pub fn min_wrap_width(&mut self, width: usize) {
 82 |         self.min_wrap_width = Some(width);
 83 |     }
 84 |     pub fn raw_mode(&mut self) {
 85 |         self.raw_mode = true;
 86 |     }
 87 |     pub fn no_borders(&mut self) {
 88 |         self.no_borders = true;
 89 |     }
 90 |     pub fn no_link_wrap(&mut self) {
 91 |         self.no_link_wrap = true;
 92 |     }
 93 |     pub fn unicode_so(&mut self) {
 94 |         self.unicode_so = true;
 95 |     }
 96 |     pub fn do_decorate(&mut self) {
 97 |         self.do_decorate = true;
 98 |     }
 99 |     pub fn link_footnotes(&mut self, value: bool) {
100 |         self.link_footnotes = value;
101 |     }
102 | 
103 |     pub fn image_mode(&mut self, value: &str) {
104 |         match value {
105 |             "ignore" => self.image_mode = ImageRenderMode::IgnoreEmpty,
106 |             "always" => self.image_mode = ImageRenderMode::ShowAlways,
107 |             "replace" => self.image_mode = ImageRenderMode::Replace("XX"),
108 |             "filename" =>  self.image_mode = ImageRenderMode::Filename,
109 |             _ => self.image_mode = ImageRenderMode::IgnoreEmpty,
110 |         }
111 |     }
112 | 
113 |     fn update_conf(&self, conf: html2text::config::Config) -> Result, String> {
114 |         let mut conf = if self.css {
115 |             conf.use_doc_css()
116 |         } else {
117 |             conf
118 |         };
119 |         if let Some(user_css) = &self.user_css {
120 |             conf = conf.add_css(user_css).map_err(|e| format!("{}", e))?;
121 |         }
122 |         if let Some(agent_css) = &self.agent_css {
123 |             conf = conf.add_agent_css(agent_css).map_err(|e| format!("{}", e))?;
124 |         }
125 |         if self.pad_block_width {
126 |             conf = conf.pad_block_width();
127 |         }
128 |         if let Some(width) = self.wrap_width {
129 |             conf = conf.max_wrap_width(width);
130 |         }
131 |         if self.allow_overflow {
132 |             conf = conf.allow_width_overflow();
133 |         }
134 |         if let Some(width) = self.min_wrap_width {
135 |             conf = conf.min_wrap_width(width);
136 |         }
137 |         if self.raw_mode {
138 |             conf = conf.raw_mode(true);
139 |         }
140 |         if self.no_borders {
141 |             conf = conf.no_table_borders();
142 |         }
143 |         if self.no_link_wrap {
144 |             conf = conf.no_link_wrapping();
145 |         }
146 |         if self.unicode_so {
147 |             conf = conf.unicode_strikeout(true);
148 |         }
149 |         if self.do_decorate {
150 |             conf = conf.do_decorate();
151 |         }
152 |         conf = conf.link_footnotes(self.link_footnotes);
153 |         if self.image_mode != ImageRenderMode::IgnoreEmpty {
154 |             conf = conf.empty_img_mode(self.image_mode);
155 |         }
156 |         Ok(conf
157 |             .unicode_strikeout(false))
158 |     }
159 | }
160 | 
161 | fn do_render_colour(f: &mut Frame, config: &Config, input: &[u8]) -> Result<(), String> {
162 |     let area = f.area();
163 | 
164 |     let conf = config.update_conf(html2text::config::rich())?;
165 | 
166 |     let lines = conf.lines_from_read(input, area.width as usize - 2).unwrap();
167 |     let mut out = Text::default();
168 |     for line in lines {
169 |         let mut term_line = Line::default();
170 |         for piece in line.tagged_strings() {
171 |             let span = Span::from(dbg!(piece.s.clone()));
172 |             let mut style = Style::new();
173 |             for attr in &piece.tag {
174 |                 use html2text::render::RichAnnotation::*;
175 |                 match attr {
176 |                     Default | Link(_) | Image(_) | Code | Preformat(_) => {}
177 |                     Emphasis => {
178 |                         style = style.italic();
179 |                     }
180 |                     Strong => {
181 |                         style = style.bold();
182 |                     }
183 |                     Strikeout => {
184 |                         style = style.crossed_out();
185 |                     }
186 |                     Colour(col) => {
187 |                         style = style.fg(Color::Rgb(col.r, col.g, col.b));
188 |                     }
189 |                     BgColour(col) => {
190 |                         style = style.bg(Color::Rgb(col.r, col.g, col.b));
191 |                     }
192 |                     _ => {}
193 |                 }
194 |             }
195 |             term_line.push_span(span.style(style));
196 |         }
197 |         out.push_line(term_line);
198 |     }
199 |     f.render_widget(
200 |         Paragraph::new(out).block(Block::bordered().title("HTML").border_style(Color::Yellow)),
201 |         f.area());
202 |     Ok(())
203 | }
204 | 
205 | #[wasm_bindgen]
206 | pub fn format_html(config: Config, input: &str) -> Result<(), String> {
207 |     let backend = DomBackend::new_by_id("lib").unwrap();
208 |     let mut terminal = Terminal::new(backend).unwrap();
209 | 
210 |     let inp = input.to_string();
211 |     terminal.draw(move |f| {
212 |         if config.colour {
213 |             do_render_colour(f, &config, inp.as_bytes()).unwrap();
214 |         } else {
215 |             let area = f.area();
216 | 
217 |             let conf = config.update_conf(html2text::config::plain()).unwrap();
218 |             let output = conf.string_from_read(inp.as_bytes(), area.width as usize).unwrap();
219 | 
220 |             f.render_widget(
221 |                 Paragraph::new(output),
222 |                 f.area());
223 |         }
224 |     }).map_err(|e| format!("{e}"))?;
225 |     Ok(())
226 | }
227 | 


--------------------------------------------------------------------------------
/examples/html2text.rs:
--------------------------------------------------------------------------------
  1 | extern crate argparse;
  2 | extern crate html2text;
  3 | use argparse::{ArgumentParser, Store, StoreOption, StoreTrue};
  4 | use html2text::config::{self, Config};
  5 | use html2text::render::{TextDecorator, TrivialDecorator};
  6 | use log::trace;
  7 | use std::io;
  8 | use std::io::Write;
  9 | 
 10 | #[cfg(unix)]
 11 | use html2text::render::RichAnnotation;
 12 | #[cfg(unix)]
 13 | fn default_colour_map(
 14 |     annotations: &[RichAnnotation],
 15 |     s: &str,
 16 |     use_css_colours: bool,
 17 |     no_default_colours: bool,
 18 | ) -> String {
 19 |     use termion::color::*;
 20 |     use RichAnnotation::*;
 21 |     // Explicit CSS colours override any other colours
 22 |     let mut have_explicit_colour = no_default_colours;
 23 |     let mut start = Vec::new();
 24 |     let mut finish = Vec::new();
 25 |     trace!("default_colour_map: str={s}, annotations={annotations:?}");
 26 |     for annotation in annotations.iter() {
 27 |         match annotation {
 28 |             Default => {}
 29 |             Link(_) => {
 30 |                 start.push(format!("{}", termion::style::Underline));
 31 |                 finish.push(format!("{}", termion::style::Reset));
 32 |             }
 33 |             Image(_) => {
 34 |                 if !have_explicit_colour {
 35 |                     start.push(format!("{}", Fg(Blue)));
 36 |                     finish.push(format!("{}", Fg(Reset)));
 37 |                 }
 38 |             }
 39 |             Emphasis => {
 40 |                 start.push(format!("{}", termion::style::Bold));
 41 |                 finish.push(format!("{}", termion::style::Reset));
 42 |             }
 43 |             Strong => {
 44 |                 if !have_explicit_colour {
 45 |                     start.push(format!("{}", Fg(LightYellow)));
 46 |                     finish.push(format!("{}", Fg(Reset)));
 47 |                 }
 48 |             }
 49 |             Strikeout => {
 50 |                 if !have_explicit_colour {
 51 |                     start.push(format!("{}", Fg(LightBlack)));
 52 |                     finish.push(format!("{}", Fg(Reset)));
 53 |                 }
 54 |             }
 55 |             Code => {
 56 |                 if !have_explicit_colour {
 57 |                     start.push(format!("{}", Fg(Blue)));
 58 |                     finish.push(format!("{}", Fg(Reset)));
 59 |                 }
 60 |             }
 61 |             Preformat(_) => {
 62 |                 if !have_explicit_colour {
 63 |                     start.push(format!("{}", Fg(Blue)));
 64 |                     finish.push(format!("{}", Fg(Reset)));
 65 |                 }
 66 |             }
 67 |             Colour(c) => {
 68 |                 if use_css_colours {
 69 |                     start.push(format!("{}", Fg(Rgb(c.r, c.g, c.b))));
 70 |                     finish.push(format!("{}", Fg(Reset)));
 71 |                     have_explicit_colour = true;
 72 |                 }
 73 |             }
 74 |             BgColour(c) => {
 75 |                 if use_css_colours {
 76 |                     start.push(format!("{}", Bg(Rgb(c.r, c.g, c.b))));
 77 |                     finish.push(format!("{}", Bg(Reset)));
 78 |                 }
 79 |             }
 80 |             _ => {}
 81 |         }
 82 |     }
 83 |     // Reverse the finish sequences
 84 |     finish.reverse();
 85 |     let mut result = start.join("");
 86 |     result.push_str(s);
 87 |     for s in finish {
 88 |         result.push_str(&s);
 89 |     }
 90 |     trace!("default_colour_map: output={result}");
 91 |     result
 92 | }
 93 | 
 94 | #[cfg(feature = "css_ext")]
 95 | fn do_syntect_highlight<'t>(text: &'t str, language: &str) -> Vec<(html2text::TextStyle, &'t str)> {
 96 |     use html2text::{Colour, TextStyle};
 97 |     use syntect::{
 98 |         easy::HighlightLines, highlighting::ThemeSet, parsing::SyntaxSet, util::LinesWithEndings,
 99 |     };
100 | 
101 |     let ps = SyntaxSet::load_defaults_newlines();
102 |     let ts = ThemeSet::load_defaults();
103 | 
104 |     let syntax = ps.find_syntax_by_extension(&language).unwrap();
105 |     let mut h = HighlightLines::new(syntax, &ts.themes["Solarized (dark)"]);
106 | 
107 |     let mut results = Vec::new();
108 |     for line in LinesWithEndings::from(&text) {
109 |         let ranges: Vec<(syntect::highlighting::Style, &str)> =
110 |             h.highlight_line(line, &ps).unwrap();
111 | 
112 |         fn convert(c: syntect::highlighting::Color) -> Colour {
113 |             Colour {
114 |                 r: c.r,
115 |                 g: c.g,
116 |                 b: c.b,
117 |             }
118 |         }
119 |         for (sty, text) in ranges {
120 |             results.push((
121 |                 TextStyle::colours(convert(sty.foreground), convert(sty.background)),
122 |                 text,
123 |             ));
124 |         }
125 |     }
126 |     results
127 | }
128 | 
129 | fn update_config(mut config: Config, flags: &Flags) -> Config {
130 |     if let Some(wrap_width) = flags.wrap_width {
131 |         config = config.max_wrap_width(wrap_width);
132 |     }
133 |     #[cfg(feature = "css")]
134 |     if flags.use_css {
135 |         config = config.use_doc_css();
136 |     }
137 |     #[cfg(feature = "css")]
138 |     if !flags.agent_css.is_empty() {
139 |         config = config.add_agent_css(&flags.agent_css).expect("Invalid CSS");
140 |     }
141 |     #[cfg(feature = "css_ext")]
142 |     if flags.syntax_highlight {
143 |         config = config
144 |             .register_highlighter("rs", Box::new(|text| do_syntect_highlight(text, "rs")))
145 |             .register_highlighter("html", Box::new(|text| do_syntect_highlight(text, "html")));
146 |     }
147 |     match (flags.link_footnotes, flags.no_link_footnotes) {
148 |         (true, true) => {
149 |             eprintln!("Error: can't specify both --link-footnotes and --no-link-footnotes");
150 |             std::process::exit(1);
151 |         }
152 |         (true, false) => config = config.link_footnotes(true),
153 |         (false, true) => config = config.link_footnotes(false),
154 |         (false, false) => {}
155 |     };
156 |     if flags.pad_width {
157 |         config = config.pad_block_width();
158 |     }
159 |     config
160 | }
161 | 
162 | fn translate(input: R, flags: Flags, literal: bool) -> String
163 | where
164 |     R: io::Read,
165 | {
166 |     #[cfg(unix)]
167 |     {
168 |         if flags.use_colour {
169 |             let conf = config::rich();
170 |             let conf = update_config(conf, &flags);
171 |             #[cfg(feature = "css_ext")]
172 |             let conf = if flags.show_dom {
173 |                 conf.add_agent_css("body { display: x-raw-dom !important; }")
174 |                     .unwrap()
175 |             } else {
176 |                 conf
177 |             };
178 |             #[cfg(feature = "css")]
179 |             let use_css_colours = !flags.ignore_css_colours;
180 |             #[cfg(not(feature = "css"))]
181 |             let use_css_colours = false;
182 |             #[cfg(feature = "css")]
183 |             let use_only_css = flags.use_only_css;
184 |             #[cfg(not(feature = "css"))]
185 |             let use_only_css = false;
186 |             return conf
187 |                 .coloured(input, flags.width, move |anns, s| {
188 |                     default_colour_map(anns, s, use_css_colours, use_only_css)
189 |                 })
190 |                 .unwrap();
191 |         }
192 |     }
193 |     #[cfg(feature = "css")]
194 |     {
195 |         if flags.show_css {
196 |             let conf = config::plain();
197 |             let conf = update_config(conf, &flags);
198 |             let dom = conf.parse_html(input).unwrap();
199 |             return html2text::dom_to_parsed_style(&dom).expect("Parsing CSS");
200 |         }
201 |     }
202 |     if flags.show_dom {
203 |         let conf = config::plain();
204 |         let conf = update_config(conf, &flags);
205 |         let dom = conf.parse_html(input).unwrap();
206 |         dom.as_dom_string()
207 |     } else if flags.show_render {
208 |         let conf = config::plain();
209 |         let conf = update_config(conf, &flags);
210 |         let dom = conf.parse_html(input).unwrap();
211 |         let rendertree = conf.dom_to_render_tree(&dom).unwrap();
212 |         rendertree.to_string()
213 |     } else if literal {
214 |         let conf = config::with_decorator(TrivialDecorator::new());
215 |         let conf = update_config(conf, &flags);
216 |         conf.string_from_read(input, flags.width).unwrap()
217 |     } else {
218 |         let conf = config::plain();
219 |         let conf = update_config(conf, &flags);
220 |         conf.string_from_read(input, flags.width).unwrap()
221 |     }
222 | }
223 | 
224 | #[derive(Debug)]
225 | struct Flags {
226 |     width: usize,
227 |     wrap_width: Option,
228 |     #[allow(unused)]
229 |     use_colour: bool,
230 |     #[cfg(feature = "css")]
231 |     use_css: bool,
232 |     #[cfg(feature = "css")]
233 |     ignore_css_colours: bool,
234 |     #[cfg(feature = "css")]
235 |     use_only_css: bool,
236 |     show_dom: bool,
237 |     show_render: bool,
238 |     #[cfg(feature = "css")]
239 |     show_css: bool,
240 |     pad_width: bool,
241 |     link_footnotes: bool,
242 |     no_link_footnotes: bool,
243 |     #[cfg(feature = "css_ext")]
244 |     syntax_highlight: bool,
245 |     #[cfg(feature = "css")]
246 |     agent_css: String,
247 | }
248 | 
249 | fn main() {
250 |     #[cfg(feature = "html_trace")]
251 |     env_logger::init();
252 | 
253 |     let mut infile: Option = None;
254 |     let mut outfile: Option = None;
255 |     let mut flags = Flags {
256 |         width: 80,
257 |         wrap_width: None,
258 |         use_colour: false,
259 |         #[cfg(feature = "css")]
260 |         use_css: false,
261 |         #[cfg(feature = "css")]
262 |         ignore_css_colours: false,
263 |         #[cfg(feature = "css")]
264 |         use_only_css: false,
265 |         show_dom: false,
266 |         show_render: false,
267 |         #[cfg(feature = "css")]
268 |         show_css: false,
269 |         #[cfg(feature = "css")]
270 |         agent_css: Default::default(),
271 |         pad_width: false,
272 |         link_footnotes: false,
273 |         no_link_footnotes: false,
274 |         #[cfg(feature = "css_ext")]
275 |         syntax_highlight: false,
276 |     };
277 |     let mut literal: bool = false;
278 | 
279 |     {
280 |         let mut ap = ArgumentParser::new();
281 |         ap.refer(&mut infile).add_argument(
282 |             "infile",
283 |             StoreOption,
284 |             "Input HTML file (default is standard input)",
285 |         );
286 |         ap.refer(&mut flags.width).add_option(
287 |             &["-w", "--width"],
288 |             Store,
289 |             "Column width to format to (default is 80)",
290 |         );
291 |         ap.refer(&mut flags.wrap_width).add_option(
292 |             &["-W", "--wrap-width"],
293 |             StoreOption,
294 |             "Maximum text wrap width (default same as width)",
295 |         );
296 |         ap.refer(&mut outfile).add_option(
297 |             &["-o", "--output"],
298 |             StoreOption,
299 |             "Output file (default is standard output)",
300 |         );
301 |         ap.refer(&mut literal).add_option(
302 |             &["-L", "--literal"],
303 |             StoreTrue,
304 |             "Output only literal text (no decorations)",
305 |         );
306 |         ap.refer(&mut flags.pad_width).add_option(
307 |             &["--pad-width"],
308 |             StoreTrue,
309 |             "Pad blocks to their full width",
310 |         );
311 |         ap.refer(&mut flags.link_footnotes).add_option(
312 |             &["--link-footnotes"],
313 |             StoreTrue,
314 |             "Enable link footnotes",
315 |         );
316 |         ap.refer(&mut flags.no_link_footnotes).add_option(
317 |             &["--no-link-footnotes"],
318 |             StoreTrue,
319 |             "Enable link footnotes",
320 |         );
321 |         #[cfg(unix)]
322 |         ap.refer(&mut flags.use_colour).add_option(
323 |             &["--colour"],
324 |             StoreTrue,
325 |             "Use ANSI terminal colours",
326 |         );
327 |         #[cfg(feature = "css")]
328 |         ap.refer(&mut flags.use_css)
329 |             .add_option(&["--css"], StoreTrue, "Enable CSS");
330 |         #[cfg(feature = "css")]
331 |         ap.refer(&mut flags.ignore_css_colours)
332 |             .add_option(&["--ignore-css-colour"], StoreTrue, "With --css, ignore CSS colour information (still hides elements with e.g. display: none)");
333 |         #[cfg(feature = "css")]
334 |         ap.refer(&mut flags.use_only_css).add_option(
335 |             &["--only-css"],
336 |             StoreTrue,
337 |             "Don't use default non-CSS colours",
338 |         );
339 |         ap.refer(&mut flags.show_dom).add_option(
340 |             &["--show-dom"],
341 |             StoreTrue,
342 |             "Show the parsed HTML DOM instead of rendered output",
343 |         );
344 |         ap.refer(&mut flags.show_render).add_option(
345 |             &["--show-render"],
346 |             StoreTrue,
347 |             "Show the computed render tree instead of the rendered output",
348 |         );
349 |         #[cfg(feature = "css")]
350 |         ap.refer(&mut flags.show_css).add_option(
351 |             &["--show-css"],
352 |             StoreTrue,
353 |             "Show the parsed CSS instead of rendered output",
354 |         );
355 |         #[cfg(feature = "css")]
356 |         ap.refer(&mut flags.agent_css).add_option(
357 |             &["--agent-css"],
358 |             Store,
359 |             "Add some CSS rules (to the agent spreadsheet)",
360 |         );
361 |         #[cfg(feature = "css_ext")]
362 |         ap.refer(&mut flags.syntax_highlight).add_option(
363 |             &["--syntax"],
364 |             StoreTrue,
365 |             "Enable syntax highlighting of 
 blocks.",
366 |         );
367 |         ap.parse_args_or_exit();
368 |     }
369 | 
370 |     let data = match infile {
371 |         None => {
372 |             let stdin = io::stdin();
373 | 
374 |             translate(&mut stdin.lock(), flags, literal)
375 |         }
376 |         Some(name) => {
377 |             let mut file = std::fs::File::open(name).expect("Tried to open file");
378 |             translate(&mut file, flags, literal)
379 |         }
380 |     };
381 | 
382 |     match outfile {
383 |         None => {
384 |             print!("{}", data);
385 |         }
386 |         Some(name) => {
387 |             let mut file = std::fs::File::create(name).expect("Tried to create file");
388 |             write!(file, "{}", data).unwrap();
389 |         }
390 |     };
391 | }
392 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | Possible log types:
  4 | 
  5 | - `[added]` for new features.
  6 | - `[changed]` for changes in existing functionality.
  7 | - `[deprecated]` for once-stable features removed in upcoming releases.
  8 | - `[removed]` for deprecated features removed in this release.
  9 | - `[fixed]` for any bug fixes.
 10 | - `[security]` to invite users to upgrade in case of vulnerabilities.
 11 | 
 12 | ### 0.16.5
 13 | 
 14 | - [fixed] Fix a subtract with underflow with rowspans and empty rows (thanks
 15 |   mdierksen)
 16 | 
 17 | ### 0.16.4
 18 | 
 19 | - [fixed] Further fix for RcDom::serialize() when there is a ``.
 20 | 
 21 | ### 0.16.3
 22 | 
 23 | - [fixed] RcDom::serialize() panicked.
 24 | - [changed] Bumped html5ever dependency
 25 | - [fixed] Fixed a subtraction underflow in the `html2term` example.
 26 | 
 27 | ### 0.16.2
 28 | 
 29 | - [fixed] Removed spurious `dbg!()` accidentally left in.
 30 | 
 31 | ### 0.16.1
 32 | 
 33 | - [added] Add `Config::empty_img_mode()` to configure how images with no alt text
 34 |   are handled.
 35 | 
 36 | ### 0.16.0
 37 | 
 38 | - [changed] Updated MSRV to 1.85.
 39 | - [fixed] Fix a panic in debug mode (subtraction underflow) with some table/rowspan
 40 |   edge cases (thanks mtorromeo)
 41 | 
 42 | ### 0.15.5
 43 | 
 44 | - [fixed] Fix an assertion and some missing styles with rowspan cells in rich mode.
 45 | 
 46 | ### 0.15.4
 47 | 
 48 | - [added] Support handling `rowspan` in tables.
 49 | 
 50 | ### 0.15.3
 51 | 
 52 | - [fixed] Parse `