├── .editorconfig ├── .github └── workflows │ ├── lint.yml │ └── test.yml ├── .gitignore ├── CHANGELOG.md ├── LICENSE.md ├── README.md ├── composer.json ├── convert.php ├── html2text.php ├── phpstan.neon.dist ├── phpunit.xml ├── src ├── Html2Text.php └── Html2TextException.php └── tests ├── Html2TextTest.php ├── failures └── .gitignore ├── html ├── anchors.html ├── basic.html ├── blockquotes.html ├── dom-processing.html ├── empty.html ├── full_email.html ├── huge-msoffice.html ├── images.html ├── invalid.html ├── lists.html ├── more-anchors.html ├── msoffice.html ├── nbsp.html ├── nested-divs.html ├── newlines.html ├── non-breaking-spaces.html ├── pre.html ├── table.html ├── test3.html ├── test4.html ├── utf8-example.html ├── windows-1252-example.html └── zero-width-non-joiners.html └── txt ├── anchors.no-links.txt ├── anchors.txt ├── basic.no-links.txt ├── basic.txt ├── blockquotes.txt ├── dom-processing.txt ├── empty.txt ├── full_email.txt ├── huge-msoffice.txt ├── images.txt ├── invalid.txt ├── lists.txt ├── more-anchors.txt ├── msoffice.txt ├── nbsp.txt ├── nested-divs.txt ├── newlines.txt ├── non-breaking-spaces.txt ├── pre.txt ├── table.txt ├── test3.txt ├── test4.txt ├── utf8-example.txt ├── windows-1252-example.txt └── zero-width-non-joiners.txt /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig is awesome: http://EditorConfig.org 2 | 3 | # top-most EditorConfig file 4 | root = true 5 | 6 | # Unix-style newlines with a newline ending every file 7 | [*] 8 | end_of_line = lf 9 | charset = utf-8 10 | insert_final_newline = true 11 | trim_trailing_whitespace = true 12 | indent_style = tab 13 | indent_size = 4 14 | 15 | [*.md] 16 | indent_style = space 17 | indent_size = 2 18 | 19 | # don't add newlines to test files 20 | [tests/*] 21 | indent_style = tabs 22 | trim_trailing_whitespace = false 23 | insert_final_newline = false 24 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | on: 3 | - push 4 | jobs: 5 | lint: 6 | name: Lint 7 | runs-on: ubuntu-latest 8 | steps: 9 | - name: Checkout 10 | uses: actions/checkout@v3 11 | - name: Setup PHP 12 | uses: shivammathur/setup-php@v2 13 | with: 14 | php-version: '7.4' 15 | tools: phplint 16 | - name: Check syntax 17 | run: phplint . 18 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | on: 3 | - push 4 | jobs: 5 | test: 6 | strategy: 7 | matrix: 8 | operating-system: 9 | - ubuntu-latest 10 | php-version: 11 | - '7.3' 12 | - '7.4' 13 | - '8.0' 14 | - '8.1' 15 | - '8.2' 16 | name: php ${{ matrix.php-version }} on ${{ matrix.operating-system }} 17 | runs-on: ${{ matrix.operating-system }} 18 | steps: 19 | - name: Checkout 20 | uses: actions/checkout@v3 21 | - name: Setup PHP 22 | uses: shivammathur/setup-php@v2 23 | with: 24 | php-version: ${{ matrix.php-version }} 25 | extensions: mbstring 26 | coverage: none 27 | - name: Get composer cache directory 28 | id: composer-cache 29 | run: echo "::set-output name=dir::$(composer config cache-files-dir)" 30 | - name: Setup composer cache 31 | uses: actions/cache@v3 32 | with: 33 | path: ${{ steps.composer-cache.outputs.dir }} 34 | key: ${{ runner.os }}-composer-${{ hashFiles('**/composer.lock') }} 35 | restore-keys: ${{ runner.os }}-composer- 36 | - name: Install composer dependencies 37 | env: 38 | COMPOSER_AUTH: ${{ secrets.COMPOSER_AUTH }} 39 | run: composer install --no-ansi --no-interaction --no-scripts --no-progress --prefer-dist 40 | - name: Run tests 41 | run: vendor/bin/phpunit 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | tests/*.output 2 | *.sublime-project 3 | *.sublime-workspace 4 | vendor/ 5 | **/*.DS_Store 6 | .phpunit.result.cache 7 | composer.lock 8 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | All notable changes to this project will be documented in this file. 3 | 4 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 5 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 6 | 7 | ## [Unreleased] 8 | 9 | ## [1.1.0] - 2019-02-15 10 | ### Added 11 | - Zero-width non-joiners are now stripped to prevent output issues, similar to non-breaking whitespace 12 | 13 | ### Fixed 14 | - Fix namespace in composer [#67](https://github.com/soundasleep/html2text/pull/67) 15 | 16 | ## [1.0.0] - 2019-02-14 17 | ### Added 18 | - Added `drop_links` option to render links without the target href [#65](https://github.com/soundasleep/html2text/pull/65) 19 | 20 | ### Changed 21 | - **Important:** Changed namespace from `\Html2Text\Html2Text` to `\Soundasleep\Html2text` [#45](https://github.com/soundasleep/html2text/issues/45) 22 | - Treat non-breaking spaces consistently: never include them in output text [#64](https://github.com/soundasleep/html2text/pull/64) 23 | - Second argument to `convert()` is now an array, rather than boolean [#65](https://github.com/soundasleep/html2text/pull/65) 24 | - Optimise/improve newline & whitespace handling [#47](https://github.com/soundasleep/html2text/pull/47) 25 | - Upgrade PHP support to PHP 7.3+ 26 | - Upgrade PHPUnit to 7.x 27 | - Re-release project under MIT license [#58](https://github.com/soundasleep/html2text/issues/58) 28 | 29 | ## [0.5.0] - 2017-04-20 30 | ### Added 31 | - Add ignore_error optional argument [#63](https://github.com/soundasleep/html2text/pull/63) 32 | - Blockquote support [#50](https://github.com/soundasleep/html2text/pull/50) 33 | 34 | [Unreleased]: https://github.com/soundasleep/html2text/compare/1.1.0...HEAD 35 | [1.1.0]: https://github.com/soundasleep/html2text/compare/1.0.0...1.1.0 36 | [1.0.0]: https://github.com/soundasleep/html2text/compare/0.5.0...1.0.0 37 | [0.5.0]: https://github.com/soundasleep/html2text/compare/0.5.0...0.3.4 38 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Jevon Wright 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |  [](https://packagist.org/packages/soundasleep/html2text) 2 | ========= 3 | 4 | html2text is a very simple script that uses DOM methods to convert HTML into a format similar to what would be 5 | rendered by a browser - perfect for places where you need a quick text representation. For example: 6 | 7 | ```html 8 | 9 |
This is some e-mail content. 14 | Even though it has whitespace and newlines, the e-mail converter 15 | will handle it correctly. 16 | 17 |
Even mismatched tags.
18 | 19 |In particular, it tries to maintain the following features: 21 | *
This is some e-mail content. 7 | Even though it has whitespace and newlines, the e-mail converter 8 | will handle it correctly. 9 | 10 |
Even mismatched tags.
11 | 12 |Another line
Yet another line
4 | Nest some block quotes with preformated text 5 |42 | Some ending text 43 | just to make sure -------------------------------------------------------------------------------- /tests/html/dom-processing.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | Hello 7 | 8 | -------------------------------------------------------------------------------- /tests/html/empty.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/soundasleep/html2text/83502b6f8f1aaef8e2e238897199d64f284b4af3/tests/html/empty.html -------------------------------------------------------------------------------- /tests/html/full_email.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 22 |6 | Here is the code 7 |20 | 21 | Some text and tags here 22 | 23 |8 | #include <stdlib.h> 9 | #include <stdio.h> 10 | 11 | int main(){ 12 | return 0; 13 | }; 14 | 15 |16 | 17 | Put some tags 18 | at the end 19 |24 | First line 25 |41 |Header 1
26 | Some text 27 |
28 | Some more text 29 |Paragraph tag!
30 |Header 2
31 |
32 |Header 3
33 | Some text 34 |Header 4
35 |36 | More quoted text! 37 |38 |Paragraph tag!
39 | Final line 40 |
25 |
|
38 |
44 | 45 | Hi Susan 46 |47 | 50 | 51 | |
52 |
61 | You have found 5 cats less than anyone else
62 |
63 |
68 | |
69 |
75 |
76 |
95 |
96 |
97 | Down the road77 |Across the hall 78 | 79 |Your achievements80 |
98 |
120 |
121 | 99 | 100 | Your last cat was found two days ago.101 |One type of cat is a kitten. 102 | 103 |
|
122 |
134 |
|
188 |
198 | 199 | Contact us 200 |201 |
202 | cats@cats.com |
215 |
3 | One:
4 |
7 | Two:
8 |
11 | Three:
12 |
15 | Four:
16 |