├── .editorconfig ├── .gitignore ├── LICENSE ├── README.md ├── package.json ├── rollup.config.js ├── src ├── config.ts ├── index.ts ├── misc.spec.ts ├── parse.spec.ts ├── parse.ts ├── safeHtml.spec.ts ├── safeHtml.ts ├── test │ ├── issue_6.spec.ts │ └── issue_7.spec.ts ├── tokenize.spec.ts ├── tokenize.ts ├── types.ts ├── utils.ts └── walk.ts ├── tsconfig.json └── yarn.lock /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 2 6 | tab_width = 2 7 | end_of_line = lf 8 | charset = utf-8 9 | trim_trailing_whitespace = true 10 | insert_final_newline = true 11 | max_line_length = 80 12 | 13 | [*.md] 14 | trim_trailing_whitespace = false 15 | indent_size = 4 16 | 17 | [*.go] 18 | indent_style = tab 19 | 20 | [*.py] 21 | indent_style = tab 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | .cache/ 3 | temp/ 4 | *.log 5 | .DS_Store 6 | .DS_Store* 7 | *~ 8 | .*~ 9 | *.swp 10 | .*.swp 11 | *.tgz 12 | .idea/ 13 | .vscode/ 14 | dist/ 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2020 acrazing 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # html5parser 2 | 3 | `html5parser` is a super fast and tiny **HTML5** parser. 4 | 5 | ## Highlights 6 | 7 | - **[Fast](#benchmark)**: maybe the fastest one you can find on GitHub. 8 | - **Tiny**: the fully packaged bundle size is less than `5kb`. 9 | - **Cross platform**: works in the modern browsers and Node.js. 10 | - **[HTML5 only](#warnings)**: any thing not in the specification will be ignored. 11 | - **Accurate**: every token could be located in source file. 12 | 13 | ## Table of Contents 14 | 15 | - [Installation](#installation) 16 | - [Quick start](#quick-start) 17 | - [API Reference](#api-reference) 18 | - Core 19 | - [tokenize()](#tokenizeinput) 20 | - [parse()](#parseinput) 21 | - Utilities 22 | - [walk()](#walkast-options) 23 | - [safeHtml()](#safehtmlinput) 24 | - [safeHtmlDefaultOptions](#safehtmldefaultoptions) 25 | - [Warnings](#warnings) 26 | - [Benchmark](#benchmark) 27 | 28 | ## Installation 29 | 30 | 1. Package manager 31 | 32 | ```bash 33 | npm i -S html5parser 34 | 35 | # or var yarn 36 | yarn add html5parser 37 | ``` 38 | 39 | 2. CDN 40 | 41 | ```html 42 | 43 | ``` 44 | 45 | ## Quick start 46 | 47 | [](https://codesandbox.io/s/keen-wind-2mpwr?fontsize=14&hidenavigation=1&theme=dark) 48 | 49 | ```typescript jsx 50 | import { parse, walk, SyntaxKind } from 'html5parser'; 51 | 52 | const ast = parse('