├── .github └── workflows │ └── test.yml ├── .gitignore ├── .tm_properties ├── LICENSE ├── README.md ├── SECURITY.md ├── config ├── rollup.config.browser.cjs.js ├── rollup.config.browser.es.js ├── rollup.config.browser.umd.js ├── rollup.config.cjs.js ├── rollup.config.es.js ├── rollup.config.iife.js ├── rollup.config.js └── rollup.config.umd.js ├── index.html ├── package-lock.json ├── package.json ├── src ├── collapse-whitespace.js ├── commonmark-rules.js ├── html-parser.js ├── node.js ├── root-node.js ├── rules.js ├── turndown.js └── utilities.js └── test ├── index.html ├── internals-test.js └── turndown-test.js /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | workflow_dispatch: 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | 13 | strategy: 14 | matrix: 15 | node_version: [18, 20, 21] 16 | 17 | steps: 18 | - name: Checkout 19 | uses: actions/checkout@v4 20 | 21 | - name: Use Node.js ${{ matrix.node_version }} 22 | uses: actions/setup-node@v4 23 | with: 24 | node-version: ${{ matrix.node_version }} 25 | cache: npm 26 | 27 | - name: Install dependencies 28 | run: npm ci 29 | 30 | - name: Run tests 31 | run: npm run test 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | lib 3 | node_modules 4 | npm-debug.log 5 | test/*browser.js 6 | -------------------------------------------------------------------------------- /.tm_properties: -------------------------------------------------------------------------------- 1 | [test/index.html] 2 | scopeAttributes = attr.keep-whitespace 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Dom Christie 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Turndown 2 | 3 | Convert HTML into Markdown with JavaScript. 4 | 5 | ## Project Updates 6 | * `to-markdown` has been renamed to Turndown. See the [migration guide](https://github.com/domchristie/to-markdown/wiki/Migrating-from-to-markdown-to-Turndown) for details. 7 | * Turndown repository has changed its URL to https://github.com/mixmark-io/turndown. 8 | 9 | ## Installation 10 | 11 | npm: 12 | 13 | ``` 14 | npm install turndown 15 | ``` 16 | 17 | Browser: 18 | 19 | ```html 20 | 21 | ``` 22 | 23 | For usage with RequireJS, UMD versions are located in `lib/turndown.umd.js` (for Node.js) and `lib/turndown.browser.umd.js` for browser usage. These files are generated when the npm package is published. To generate them manually, clone this repo and run `npm run build`. 24 | 25 | ## Usage 26 | 27 | ```js 28 | // For Node.js 29 | var TurndownService = require('turndown') 30 | 31 | var turndownService = new TurndownService() 32 | var markdown = turndownService.turndown('

Hello world!

') 33 | ``` 34 | 35 | Turndown also accepts DOM nodes as input (either element nodes, document nodes, or document fragment nodes): 36 | 37 | ```js 38 | var markdown = turndownService.turndown(document.getElementById('content')) 39 | ``` 40 | 41 | ## Options 42 | 43 | Options can be passed in to the constructor on instantiation. For example: 44 | 45 | ```js 46 | var turndownService = new TurndownService({ option: 'value' }) 47 | ``` 48 | 49 | | Option | Valid values | Default | 50 | | :-------------------- | :------------ | :------ | 51 | | `headingStyle` | `setext` or `atx` | `setext` | 52 | | `hr` | Any [Thematic break](http://spec.commonmark.org/0.27/#thematic-breaks) | `* * *` | 53 | | `bulletListMarker` | `-`, `+`, or `*` | `*` | 54 | | `codeBlockStyle` | `indented` or `fenced` | `indented` | 55 | | `fence` | ` ``` ` or `~~~` | ` ``` ` | 56 | | `emDelimiter` | `_` or `*` | `_` | 57 | | `strongDelimiter` | `**` or `__` | `**` | 58 | | `linkStyle` | `inlined` or `referenced` | `inlined` | 59 | | `linkReferenceStyle` | `full`, `collapsed`, or `shortcut` | `full` | 60 | | `preformattedCode` | `false` or [`true`](https://github.com/lucthev/collapse-whitespace/issues/16) | `false` | 61 | 62 | ### Advanced Options 63 | 64 | | Option | Valid values | Default | 65 | | :-------------------- | :------------ | :------ | 66 | | `blankReplacement` | rule replacement function | See **Special Rules** below | 67 | | `keepReplacement` | rule replacement function | See **Special Rules** below | 68 | | `defaultReplacement` | rule replacement function | See **Special Rules** below | 69 | 70 | ## Methods 71 | 72 | ### `addRule(key, rule)` 73 | 74 | The `key` parameter is a unique name for the rule for easy reference. Example: 75 | 76 | ```js 77 | turndownService.addRule('strikethrough', { 78 | filter: ['del', 's', 'strike'], 79 | replacement: function (content) { 80 | return '~' + content + '~' 81 | } 82 | }) 83 | ``` 84 | 85 | `addRule` returns the `TurndownService` instance for chaining. 86 | 87 | See **Extending with Rules** below. 88 | 89 | ### `keep(filter)` 90 | 91 | Determines which elements are to be kept and rendered as HTML. By default, Turndown does not keep any elements. The filter parameter works like a rule filter (see section on filters belows). Example: 92 | 93 | ```js 94 | turndownService.keep(['del', 'ins']) 95 | turndownService.turndown('

Hello worldWorld

') // 'Hello worldWorld' 96 | ``` 97 | 98 | This will render `` and `` elements as HTML when converted. 99 | 100 | `keep` can be called multiple times, with the newly added keep filters taking precedence over older ones. Keep filters will be overridden by the standard CommonMark rules and any added rules. To keep elements that are normally handled by those rules, add a rule with the desired behaviour. 101 | 102 | `keep` returns the `TurndownService` instance for chaining. 103 | 104 | ### `remove(filter)` 105 | 106 | Determines which elements are to be removed altogether i.e. converted to an empty string. By default, Turndown does not remove any elements. The filter parameter works like a rule filter (see section on filters belows). Example: 107 | 108 | ```js 109 | turndownService.remove('del') 110 | turndownService.turndown('

Hello worldWorld

') // 'Hello World' 111 | ``` 112 | 113 | This will remove `` elements (and contents). 114 | 115 | `remove` can be called multiple times, with the newly added remove filters taking precedence over older ones. Remove filters will be overridden by the keep filters, standard CommonMark rules, and any added rules. To remove elements that are normally handled by those rules, add a rule with the desired behaviour. 116 | 117 | `remove` returns the `TurndownService` instance for chaining. 118 | 119 | ### `use(plugin|array)` 120 | 121 | Use a plugin, or an array of plugins. Example: 122 | 123 | ```js 124 | // Import plugins from turndown-plugin-gfm 125 | var turndownPluginGfm = require('turndown-plugin-gfm') 126 | var gfm = turndownPluginGfm.gfm 127 | var tables = turndownPluginGfm.tables 128 | var strikethrough = turndownPluginGfm.strikethrough 129 | 130 | // Use the gfm plugin 131 | turndownService.use(gfm) 132 | 133 | // Use the table and strikethrough plugins only 134 | turndownService.use([tables, strikethrough]) 135 | ``` 136 | 137 | `use` returns the `TurndownService` instance for chaining. 138 | 139 | See **Plugins** below. 140 | 141 | ## Extending with Rules 142 | 143 | Turndown can be extended by adding **rules**. A rule is a plain JavaScript object with `filter` and `replacement` properties. For example, the rule for converting `

` elements is as follows: 144 | 145 | ```js 146 | { 147 | filter: 'p', 148 | replacement: function (content) { 149 | return '\n\n' + content + '\n\n' 150 | } 151 | } 152 | ``` 153 | 154 | The filter selects `

` elements, and the replacement function returns the `

` contents separated by two new lines. 155 | 156 | ### `filter` String|Array|Function 157 | 158 | The filter property determines whether or not an element should be replaced with the rule's `replacement`. DOM nodes can be selected simply using a tag name or an array of tag names: 159 | 160 | * `filter: 'p'` will select `

` elements 161 | * `filter: ['em', 'i']` will select `` or `` elements 162 | 163 | The tag names in the `filter` property are expected in lowercase, regardless of their form in the document. 164 | 165 | Alternatively, the filter can be a function that returns a boolean depending on whether a given node should be replaced. The function is passed a DOM node as well as the `TurndownService` options. For example, the following rule selects `` elements (with an `href`) when the `linkStyle` option is `inlined`: 166 | 167 | ```js 168 | filter: function (node, options) { 169 | return ( 170 | options.linkStyle === 'inlined' && 171 | node.nodeName === 'A' && 172 | node.getAttribute('href') 173 | ) 174 | } 175 | ``` 176 | 177 | ### `replacement` Function 178 | 179 | The replacement function determines how an element should be converted. It should return the Markdown string for a given node. The function is passed the node's content, the node itself, and the `TurndownService` options. 180 | 181 | The following rule shows how `` elements are converted: 182 | 183 | ```js 184 | rules.emphasis = { 185 | filter: ['em', 'i'], 186 | 187 | replacement: function (content, node, options) { 188 | return options.emDelimiter + content + options.emDelimiter 189 | } 190 | } 191 | ``` 192 | 193 | ### Special Rules 194 | 195 | **Blank rule** determines how to handle blank elements. It overrides every rule (even those added via `addRule`). A node is blank if it only contains whitespace, and it's not an ``, ``,`` or a void element. Its behaviour can be customised using the `blankReplacement` option. 196 | 197 | **Keep rules** determine how to handle the elements that should not be converted, i.e. rendered as HTML in the Markdown output. By default, no elements are kept. Block-level elements will be separated from surrounding content by blank lines. Its behaviour can be customised using the `keepReplacement` option. 198 | 199 | **Remove rules** determine which elements to remove altogether. By default, no elements are removed. 200 | 201 | **Default rule** handles nodes which are not recognised by any other rule. By default, it outputs the node's text content (separated by blank lines if it is a block-level element). Its behaviour can be customised with the `defaultReplacement` option. 202 | 203 | ### Rule Precedence 204 | 205 | Turndown iterates over the set of rules, and picks the first one that matches the `filter`. The following list describes the order of precedence: 206 | 207 | 1. Blank rule 208 | 2. Added rules (optional) 209 | 3. Commonmark rules 210 | 4. Keep rules 211 | 5. Remove rules 212 | 6. Default rule 213 | 214 | ## Plugins 215 | 216 | The plugin API provides a convenient way for developers to apply multiple extensions. A plugin is just a function that is called with the `TurndownService` instance. 217 | 218 | ## Escaping Markdown Characters 219 | 220 | Turndown uses backslashes (`\`) to escape Markdown characters in the HTML input. This ensures that these characters are not interpreted as Markdown when the output is compiled back to HTML. For example, the contents of `

1. Hello world

` needs to be escaped to `1\. Hello world`, otherwise it will be interpreted as a list item rather than a heading. 221 | 222 | To avoid the complexity and the performance implications of parsing the content of every HTML element as Markdown, Turndown uses a group of regular expressions to escape potential Markdown syntax. As a result, the escaping rules can be quite aggressive. 223 | 224 | ### Overriding `TurndownService.prototype.escape` 225 | 226 | If you are confident in doing so, you may want to customise the escaping behaviour to suit your needs. This can be done by overriding `TurndownService.prototype.escape`. `escape` takes the text of each HTML element and should return a version with the Markdown characters escaped. 227 | 228 | Note: text in code elements is never passed to`escape`. 229 | 230 | ## License 231 | 232 | turndown is copyright © 2017+ Dom Christie and released under the MIT license. 233 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | | Version | Supported | Remark | 6 | | ------- | ------------------ | -------| 7 | | 7.0.x | :white_check_mark: | | 8 | | < 7.0 | :x: | jsdom | 9 | 10 | ## DOM Parser Notice 11 | 12 | Turndown input is 13 | * either a string that is passed to a DOM parser 14 | * or an `HTMLElement` referring to an already built DOM tree 15 | 16 | When a string input is passed, the DOM parser is picked as follows. 17 | * For web browser usage, the corresponding native web parser is used, which is typically `DOMImplementation`. 18 | * For standalone usage, custom [domino](https://github.com/mixmark-io/domino) parser is used. 19 | 20 | Please note that a malicious string input can cause undesired effects within the DOM parser 21 | even before Turndown code starts processing the document itself. 22 | These effects especially include script execution and downloading external resources. 23 | 24 | For critical applications with untrusted inputs, you should consider either cleaning up 25 | the input with a dedicated HTML sanitizer library or using an alternate DOM parser that 26 | better suits your security needs. 27 | 28 | In particular, Turndown version 6 and below used [jsdom](https://github.com/jsdom/jsdom) as the 29 | standalone DOM parser. As `jsdom` is a fully featured DOM parser with script execution support, 30 | it imposes an inherent security risk. We recommend upgrading to version 7, which uses custom 31 | [domino](https://github.com/mixmark-io/domino) that doesn't even support executing scripts nor 32 | downloading external resources. 33 | 34 | ## Reporting a Vulnerability 35 | 36 | If you've found a vulnerability, please report it to disclosure@orchitech.cz and we'll get back to you. 37 | -------------------------------------------------------------------------------- /config/rollup.config.browser.cjs.js: -------------------------------------------------------------------------------- 1 | import config from './rollup.config' 2 | 3 | export default config({ 4 | output: { 5 | file: 'lib/turndown.browser.cjs.js', 6 | format: 'cjs', 7 | exports: 'auto' 8 | }, 9 | browser: true 10 | }) 11 | -------------------------------------------------------------------------------- /config/rollup.config.browser.es.js: -------------------------------------------------------------------------------- 1 | import config from './rollup.config' 2 | 3 | export default config({ 4 | output: { 5 | file: 'lib/turndown.browser.es.js', 6 | format: 'es' 7 | }, 8 | browser: true 9 | }) 10 | -------------------------------------------------------------------------------- /config/rollup.config.browser.umd.js: -------------------------------------------------------------------------------- 1 | import config from './rollup.config' 2 | 3 | export default config({ 4 | output: { 5 | file: 'lib/turndown.browser.umd.js', 6 | format: 'umd', 7 | name: 'TurndownService' 8 | }, 9 | browser: true 10 | }) 11 | -------------------------------------------------------------------------------- /config/rollup.config.cjs.js: -------------------------------------------------------------------------------- 1 | import config from './rollup.config' 2 | 3 | export default config({ 4 | output: { 5 | file: 'lib/turndown.cjs.js', 6 | format: 'cjs', 7 | exports: 'auto' 8 | }, 9 | browser: false 10 | }) 11 | -------------------------------------------------------------------------------- /config/rollup.config.es.js: -------------------------------------------------------------------------------- 1 | import config from './rollup.config' 2 | 3 | export default config({ 4 | output: { 5 | file: 'lib/turndown.es.js', 6 | format: 'es' 7 | }, 8 | browser: false 9 | }) 10 | -------------------------------------------------------------------------------- /config/rollup.config.iife.js: -------------------------------------------------------------------------------- 1 | import config from './rollup.config' 2 | 3 | export default config({ 4 | output: { 5 | file: 'dist/turndown.js', 6 | format: 'iife', 7 | name: 'TurndownService' 8 | }, 9 | browser: true 10 | }) 11 | -------------------------------------------------------------------------------- /config/rollup.config.js: -------------------------------------------------------------------------------- 1 | import commonjs from '@rollup/plugin-commonjs' 2 | import replace from '@rollup/plugin-replace' 3 | import resolve from '@rollup/plugin-node-resolve' 4 | 5 | export default function (config) { 6 | return { 7 | input: 'src/turndown.js', 8 | output: config.output, 9 | external: ['@mixmark-io/domino'], 10 | plugins: [ 11 | commonjs(), 12 | replace({ 'process.browser': JSON.stringify(!!config.browser), preventAssignment: true }), 13 | resolve() 14 | ] 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /config/rollup.config.umd.js: -------------------------------------------------------------------------------- 1 | import config from './rollup.config' 2 | 3 | export default config({ 4 | output: { 5 | file: 'lib/turndown.umd.js', 6 | format: 'umd', 7 | name: 'TurndownService' 8 | } 9 | }) 10 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Turndown Demo 6 | 7 | 112 | 113 | 114 | 115 |
116 |

turndown

117 |
Source on GitHub 118 |
119 |
120 |
121 | 122 | 149 |
150 |
151 | 152 | 153 |
154 |
155 | 156 |
157 |
158 |
159 | 160 | 164 |
165 | 166 |
167 | 168 | 173 |
174 | 175 |
176 | 177 | 182 |
183 | 184 |
185 | 186 | 190 |
191 | 192 |
193 | 194 | 198 |
199 | 200 |
201 | 202 | 206 |
207 | 208 |
209 | 210 | 214 |
215 | 216 |
217 | 218 | 222 |
223 | 224 |
225 | 226 | 231 |
232 |
233 |
234 | 235 |

Turndown is copyright © 2017 Dom Christie and is released under the MIT license

236 | 267 | 268 | 269 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "turndown", 3 | "description": "A library that converts HTML to Markdown", 4 | "version": "7.2.0", 5 | "author": "Dom Christie", 6 | "main": "lib/turndown.cjs.js", 7 | "module": "lib/turndown.es.js", 8 | "jsnext:main": "lib/turndown.es.js", 9 | "browser": { 10 | "@mixmark-io/domino": false, 11 | "./lib/turndown.cjs.js": "./lib/turndown.browser.cjs.js", 12 | "./lib/turndown.es.js": "./lib/turndown.browser.es.js", 13 | "./lib/turndown.umd.js": "./lib/turndown.browser.umd.js" 14 | }, 15 | "dependencies": { 16 | "@mixmark-io/domino": "^2.2.0" 17 | }, 18 | "devDependencies": { 19 | "@rollup/plugin-commonjs": "^19.0.0", 20 | "@rollup/plugin-node-resolve": "13.0.0", 21 | "@rollup/plugin-replace": "2.4.2", 22 | "browserify": "17.0.0", 23 | "rewire": "^6.0.0", 24 | "rollup": "2.52.3", 25 | "standard": "^10.0.3", 26 | "turndown-attendant": "0.0.3" 27 | }, 28 | "files": [ 29 | "lib", 30 | "dist" 31 | ], 32 | "keywords": [ 33 | "converter", 34 | "html", 35 | "markdown" 36 | ], 37 | "license": "MIT", 38 | "repository": { 39 | "type": "git", 40 | "url": "https://github.com/mixmark-io/turndown.git" 41 | }, 42 | "scripts": { 43 | "build": "npm run build-cjs && npm run build-es && npm run build-umd && npm run build-iife", 44 | "build-cjs": "rollup -c config/rollup.config.cjs.js && rollup -c config/rollup.config.browser.cjs.js", 45 | "build-es": "rollup -c config/rollup.config.es.js && rollup -c config/rollup.config.browser.es.js", 46 | "build-umd": "rollup -c config/rollup.config.umd.js && rollup -c config/rollup.config.browser.umd.js", 47 | "build-iife": "rollup -c config/rollup.config.iife.js", 48 | "build-test": "browserify test/turndown-test.js --outfile test/turndown-test.browser.js", 49 | "prepare": "npm run build", 50 | "test": "npm run build && npm run build-test && standard ./src/**/*.js && node test/internals-test.js && node test/turndown-test.js" 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/collapse-whitespace.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The collapseWhitespace function is adapted from collapse-whitespace 3 | * by Luc Thevenard. 4 | * 5 | * The MIT License (MIT) 6 | * 7 | * Copyright (c) 2014 Luc Thevenard 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a copy 10 | * of this software and associated documentation files (the "Software"), to deal 11 | * in the Software without restriction, including without limitation the rights 12 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | * copies of the Software, and to permit persons to whom the Software is 14 | * furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included in 17 | * all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | * THE SOFTWARE. 26 | */ 27 | 28 | /** 29 | * collapseWhitespace(options) removes extraneous whitespace from an the given element. 30 | * 31 | * @param {Object} options 32 | */ 33 | function collapseWhitespace (options) { 34 | var element = options.element 35 | var isBlock = options.isBlock 36 | var isVoid = options.isVoid 37 | var isPre = options.isPre || function (node) { 38 | return node.nodeName === 'PRE' 39 | } 40 | 41 | if (!element.firstChild || isPre(element)) return 42 | 43 | var prevText = null 44 | var keepLeadingWs = false 45 | 46 | var prev = null 47 | var node = next(prev, element, isPre) 48 | 49 | while (node !== element) { 50 | if (node.nodeType === 3 || node.nodeType === 4) { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE 51 | var text = node.data.replace(/[ \r\n\t]+/g, ' ') 52 | 53 | if ((!prevText || / $/.test(prevText.data)) && 54 | !keepLeadingWs && text[0] === ' ') { 55 | text = text.substr(1) 56 | } 57 | 58 | // `text` might be empty at this point. 59 | if (!text) { 60 | node = remove(node) 61 | continue 62 | } 63 | 64 | node.data = text 65 | 66 | prevText = node 67 | } else if (node.nodeType === 1) { // Node.ELEMENT_NODE 68 | if (isBlock(node) || node.nodeName === 'BR') { 69 | if (prevText) { 70 | prevText.data = prevText.data.replace(/ $/, '') 71 | } 72 | 73 | prevText = null 74 | keepLeadingWs = false 75 | } else if (isVoid(node) || isPre(node)) { 76 | // Avoid trimming space around non-block, non-BR void elements and inline PRE. 77 | prevText = null 78 | keepLeadingWs = true 79 | } else if (prevText) { 80 | // Drop protection if set previously. 81 | keepLeadingWs = false 82 | } 83 | } else { 84 | node = remove(node) 85 | continue 86 | } 87 | 88 | var nextNode = next(prev, node, isPre) 89 | prev = node 90 | node = nextNode 91 | } 92 | 93 | if (prevText) { 94 | prevText.data = prevText.data.replace(/ $/, '') 95 | if (!prevText.data) { 96 | remove(prevText) 97 | } 98 | } 99 | } 100 | 101 | /** 102 | * remove(node) removes the given node from the DOM and returns the 103 | * next node in the sequence. 104 | * 105 | * @param {Node} node 106 | * @return {Node} node 107 | */ 108 | function remove (node) { 109 | var next = node.nextSibling || node.parentNode 110 | 111 | node.parentNode.removeChild(node) 112 | 113 | return next 114 | } 115 | 116 | /** 117 | * next(prev, current, isPre) returns the next node in the sequence, given the 118 | * current and previous nodes. 119 | * 120 | * @param {Node} prev 121 | * @param {Node} current 122 | * @param {Function} isPre 123 | * @return {Node} 124 | */ 125 | function next (prev, current, isPre) { 126 | if ((prev && prev.parentNode === current) || isPre(current)) { 127 | return current.nextSibling || current.parentNode 128 | } 129 | 130 | return current.firstChild || current.nextSibling || current.parentNode 131 | } 132 | 133 | export default collapseWhitespace 134 | -------------------------------------------------------------------------------- /src/commonmark-rules.js: -------------------------------------------------------------------------------- 1 | import { repeat } from './utilities' 2 | 3 | var rules = {} 4 | 5 | rules.paragraph = { 6 | filter: 'p', 7 | 8 | replacement: function (content) { 9 | return '\n\n' + content + '\n\n' 10 | } 11 | } 12 | 13 | rules.lineBreak = { 14 | filter: 'br', 15 | 16 | replacement: function (content, node, options) { 17 | return options.br + '\n' 18 | } 19 | } 20 | 21 | rules.heading = { 22 | filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'], 23 | 24 | replacement: function (content, node, options) { 25 | var hLevel = Number(node.nodeName.charAt(1)) 26 | 27 | if (options.headingStyle === 'setext' && hLevel < 3) { 28 | var underline = repeat((hLevel === 1 ? '=' : '-'), content.length) 29 | return ( 30 | '\n\n' + content + '\n' + underline + '\n\n' 31 | ) 32 | } else { 33 | return '\n\n' + repeat('#', hLevel) + ' ' + content + '\n\n' 34 | } 35 | } 36 | } 37 | 38 | rules.blockquote = { 39 | filter: 'blockquote', 40 | 41 | replacement: function (content) { 42 | content = content.replace(/^\n+|\n+$/g, '') 43 | content = content.replace(/^/gm, '> ') 44 | return '\n\n' + content + '\n\n' 45 | } 46 | } 47 | 48 | rules.list = { 49 | filter: ['ul', 'ol'], 50 | 51 | replacement: function (content, node) { 52 | var parent = node.parentNode 53 | if (parent.nodeName === 'LI' && parent.lastElementChild === node) { 54 | return '\n' + content 55 | } else { 56 | return '\n\n' + content + '\n\n' 57 | } 58 | } 59 | } 60 | 61 | rules.listItem = { 62 | filter: 'li', 63 | 64 | replacement: function (content, node, options) { 65 | content = content 66 | .replace(/^\n+/, '') // remove leading newlines 67 | .replace(/\n+$/, '\n') // replace trailing newlines with just a single one 68 | .replace(/\n/gm, '\n ') // indent 69 | var prefix = options.bulletListMarker + ' ' 70 | var parent = node.parentNode 71 | if (parent.nodeName === 'OL') { 72 | var start = parent.getAttribute('start') 73 | var index = Array.prototype.indexOf.call(parent.children, node) 74 | prefix = (start ? Number(start) + index : index + 1) + '. ' 75 | } 76 | return ( 77 | prefix + content + (node.nextSibling && !/\n$/.test(content) ? '\n' : '') 78 | ) 79 | } 80 | } 81 | 82 | rules.indentedCodeBlock = { 83 | filter: function (node, options) { 84 | return ( 85 | options.codeBlockStyle === 'indented' && 86 | node.nodeName === 'PRE' && 87 | node.firstChild && 88 | node.firstChild.nodeName === 'CODE' 89 | ) 90 | }, 91 | 92 | replacement: function (content, node, options) { 93 | return ( 94 | '\n\n ' + 95 | node.firstChild.textContent.replace(/\n/g, '\n ') + 96 | '\n\n' 97 | ) 98 | } 99 | } 100 | 101 | rules.fencedCodeBlock = { 102 | filter: function (node, options) { 103 | return ( 104 | options.codeBlockStyle === 'fenced' && 105 | node.nodeName === 'PRE' && 106 | node.firstChild && 107 | node.firstChild.nodeName === 'CODE' 108 | ) 109 | }, 110 | 111 | replacement: function (content, node, options) { 112 | var className = node.firstChild.getAttribute('class') || '' 113 | var language = (className.match(/language-(\S+)/) || [null, ''])[1] 114 | var code = node.firstChild.textContent 115 | 116 | var fenceChar = options.fence.charAt(0) 117 | var fenceSize = 3 118 | var fenceInCodeRegex = new RegExp('^' + fenceChar + '{3,}', 'gm') 119 | 120 | var match 121 | while ((match = fenceInCodeRegex.exec(code))) { 122 | if (match[0].length >= fenceSize) { 123 | fenceSize = match[0].length + 1 124 | } 125 | } 126 | 127 | var fence = repeat(fenceChar, fenceSize) 128 | 129 | return ( 130 | '\n\n' + fence + language + '\n' + 131 | code.replace(/\n$/, '') + 132 | '\n' + fence + '\n\n' 133 | ) 134 | } 135 | } 136 | 137 | rules.horizontalRule = { 138 | filter: 'hr', 139 | 140 | replacement: function (content, node, options) { 141 | return '\n\n' + options.hr + '\n\n' 142 | } 143 | } 144 | 145 | rules.inlineLink = { 146 | filter: function (node, options) { 147 | return ( 148 | options.linkStyle === 'inlined' && 149 | node.nodeName === 'A' && 150 | node.getAttribute('href') 151 | ) 152 | }, 153 | 154 | replacement: function (content, node) { 155 | var href = node.getAttribute('href') 156 | if (href) href = href.replace(/([()])/g, '\\$1') 157 | var title = cleanAttribute(node.getAttribute('title')) 158 | if (title) title = ' "' + title.replace(/"/g, '\\"') + '"' 159 | return '[' + content + '](' + href + title + ')' 160 | } 161 | } 162 | 163 | rules.referenceLink = { 164 | filter: function (node, options) { 165 | return ( 166 | options.linkStyle === 'referenced' && 167 | node.nodeName === 'A' && 168 | node.getAttribute('href') 169 | ) 170 | }, 171 | 172 | replacement: function (content, node, options) { 173 | var href = node.getAttribute('href') 174 | var title = cleanAttribute(node.getAttribute('title')) 175 | if (title) title = ' "' + title + '"' 176 | var replacement 177 | var reference 178 | 179 | switch (options.linkReferenceStyle) { 180 | case 'collapsed': 181 | replacement = '[' + content + '][]' 182 | reference = '[' + content + ']: ' + href + title 183 | break 184 | case 'shortcut': 185 | replacement = '[' + content + ']' 186 | reference = '[' + content + ']: ' + href + title 187 | break 188 | default: 189 | var id = this.references.length + 1 190 | replacement = '[' + content + '][' + id + ']' 191 | reference = '[' + id + ']: ' + href + title 192 | } 193 | 194 | this.references.push(reference) 195 | return replacement 196 | }, 197 | 198 | references: [], 199 | 200 | append: function (options) { 201 | var references = '' 202 | if (this.references.length) { 203 | references = '\n\n' + this.references.join('\n') + '\n\n' 204 | this.references = [] // Reset references 205 | } 206 | return references 207 | } 208 | } 209 | 210 | rules.emphasis = { 211 | filter: ['em', 'i'], 212 | 213 | replacement: function (content, node, options) { 214 | if (!content.trim()) return '' 215 | return options.emDelimiter + content + options.emDelimiter 216 | } 217 | } 218 | 219 | rules.strong = { 220 | filter: ['strong', 'b'], 221 | 222 | replacement: function (content, node, options) { 223 | if (!content.trim()) return '' 224 | return options.strongDelimiter + content + options.strongDelimiter 225 | } 226 | } 227 | 228 | rules.code = { 229 | filter: function (node) { 230 | var hasSiblings = node.previousSibling || node.nextSibling 231 | var isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings 232 | 233 | return node.nodeName === 'CODE' && !isCodeBlock 234 | }, 235 | 236 | replacement: function (content) { 237 | if (!content) return '' 238 | content = content.replace(/\r?\n|\r/g, ' ') 239 | 240 | var extraSpace = /^`|^ .*?[^ ].* $|`$/.test(content) ? ' ' : '' 241 | var delimiter = '`' 242 | var matches = content.match(/`+/gm) || [] 243 | while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`' 244 | 245 | return delimiter + extraSpace + content + extraSpace + delimiter 246 | } 247 | } 248 | 249 | rules.image = { 250 | filter: 'img', 251 | 252 | replacement: function (content, node) { 253 | var alt = cleanAttribute(node.getAttribute('alt')) 254 | var src = node.getAttribute('src') || '' 255 | var title = cleanAttribute(node.getAttribute('title')) 256 | var titlePart = title ? ' "' + title + '"' : '' 257 | return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : '' 258 | } 259 | } 260 | 261 | function cleanAttribute (attribute) { 262 | return attribute ? attribute.replace(/(\n+\s*)+/g, '\n') : '' 263 | } 264 | 265 | export default rules 266 | -------------------------------------------------------------------------------- /src/html-parser.js: -------------------------------------------------------------------------------- 1 | /* 2 | * Set up window for Node.js 3 | */ 4 | 5 | var root = (typeof window !== 'undefined' ? window : {}) 6 | 7 | /* 8 | * Parsing HTML strings 9 | */ 10 | 11 | function canParseHTMLNatively () { 12 | var Parser = root.DOMParser 13 | var canParse = false 14 | 15 | // Adapted from https://gist.github.com/1129031 16 | // Firefox/Opera/IE throw errors on unsupported types 17 | try { 18 | // WebKit returns null on unsupported types 19 | if (new Parser().parseFromString('', 'text/html')) { 20 | canParse = true 21 | } 22 | } catch (e) {} 23 | 24 | return canParse 25 | } 26 | 27 | function createHTMLParser () { 28 | var Parser = function () {} 29 | 30 | if (process.browser) { 31 | if (shouldUseActiveX()) { 32 | Parser.prototype.parseFromString = function (string) { 33 | var doc = new window.ActiveXObject('htmlfile') 34 | doc.designMode = 'on' // disable on-page scripts 35 | doc.open() 36 | doc.write(string) 37 | doc.close() 38 | return doc 39 | } 40 | } else { 41 | Parser.prototype.parseFromString = function (string) { 42 | var doc = document.implementation.createHTMLDocument('') 43 | doc.open() 44 | doc.write(string) 45 | doc.close() 46 | return doc 47 | } 48 | } 49 | } else { 50 | var domino = require('@mixmark-io/domino') 51 | Parser.prototype.parseFromString = function (string) { 52 | return domino.createDocument(string) 53 | } 54 | } 55 | return Parser 56 | } 57 | 58 | function shouldUseActiveX () { 59 | var useActiveX = false 60 | try { 61 | document.implementation.createHTMLDocument('').open() 62 | } catch (e) { 63 | if (root.ActiveXObject) useActiveX = true 64 | } 65 | return useActiveX 66 | } 67 | 68 | export default canParseHTMLNatively() ? root.DOMParser : createHTMLParser() 69 | -------------------------------------------------------------------------------- /src/node.js: -------------------------------------------------------------------------------- 1 | import { isBlock, isVoid, hasVoid, isMeaningfulWhenBlank, hasMeaningfulWhenBlank } from './utilities' 2 | 3 | export default function Node (node, options) { 4 | node.isBlock = isBlock(node) 5 | node.isCode = node.nodeName === 'CODE' || node.parentNode.isCode 6 | node.isBlank = isBlank(node) 7 | node.flankingWhitespace = flankingWhitespace(node, options) 8 | return node 9 | } 10 | 11 | function isBlank (node) { 12 | return ( 13 | !isVoid(node) && 14 | !isMeaningfulWhenBlank(node) && 15 | /^\s*$/i.test(node.textContent) && 16 | !hasVoid(node) && 17 | !hasMeaningfulWhenBlank(node) 18 | ) 19 | } 20 | 21 | function flankingWhitespace (node, options) { 22 | if (node.isBlock || (options.preformattedCode && node.isCode)) { 23 | return { leading: '', trailing: '' } 24 | } 25 | 26 | var edges = edgeWhitespace(node.textContent) 27 | 28 | // abandon leading ASCII WS if left-flanked by ASCII WS 29 | if (edges.leadingAscii && isFlankedByWhitespace('left', node, options)) { 30 | edges.leading = edges.leadingNonAscii 31 | } 32 | 33 | // abandon trailing ASCII WS if right-flanked by ASCII WS 34 | if (edges.trailingAscii && isFlankedByWhitespace('right', node, options)) { 35 | edges.trailing = edges.trailingNonAscii 36 | } 37 | 38 | return { leading: edges.leading, trailing: edges.trailing } 39 | } 40 | 41 | function edgeWhitespace (string) { 42 | var m = string.match(/^(([ \t\r\n]*)(\s*))(?:(?=\S)[\s\S]*\S)?((\s*?)([ \t\r\n]*))$/) 43 | return { 44 | leading: m[1], // whole string for whitespace-only strings 45 | leadingAscii: m[2], 46 | leadingNonAscii: m[3], 47 | trailing: m[4], // empty for whitespace-only strings 48 | trailingNonAscii: m[5], 49 | trailingAscii: m[6] 50 | } 51 | } 52 | 53 | function isFlankedByWhitespace (side, node, options) { 54 | var sibling 55 | var regExp 56 | var isFlanked 57 | 58 | if (side === 'left') { 59 | sibling = node.previousSibling 60 | regExp = / $/ 61 | } else { 62 | sibling = node.nextSibling 63 | regExp = /^ / 64 | } 65 | 66 | if (sibling) { 67 | if (sibling.nodeType === 3) { 68 | isFlanked = regExp.test(sibling.nodeValue) 69 | } else if (options.preformattedCode && sibling.nodeName === 'CODE') { 70 | isFlanked = false 71 | } else if (sibling.nodeType === 1 && !isBlock(sibling)) { 72 | isFlanked = regExp.test(sibling.textContent) 73 | } 74 | } 75 | return isFlanked 76 | } 77 | -------------------------------------------------------------------------------- /src/root-node.js: -------------------------------------------------------------------------------- 1 | import collapseWhitespace from './collapse-whitespace' 2 | import HTMLParser from './html-parser' 3 | import { isBlock, isVoid } from './utilities' 4 | 5 | export default function RootNode (input, options) { 6 | var root 7 | if (typeof input === 'string') { 8 | var doc = htmlParser().parseFromString( 9 | // DOM parsers arrange elements in the and . 10 | // Wrapping in a custom element ensures elements are reliably arranged in 11 | // a single element. 12 | '' + input + '', 13 | 'text/html' 14 | ) 15 | root = doc.getElementById('turndown-root') 16 | } else { 17 | root = input.cloneNode(true) 18 | } 19 | collapseWhitespace({ 20 | element: root, 21 | isBlock: isBlock, 22 | isVoid: isVoid, 23 | isPre: options.preformattedCode ? isPreOrCode : null 24 | }) 25 | 26 | return root 27 | } 28 | 29 | var _htmlParser 30 | function htmlParser () { 31 | _htmlParser = _htmlParser || new HTMLParser() 32 | return _htmlParser 33 | } 34 | 35 | function isPreOrCode (node) { 36 | return node.nodeName === 'PRE' || node.nodeName === 'CODE' 37 | } 38 | -------------------------------------------------------------------------------- /src/rules.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Manages a collection of rules used to convert HTML to Markdown 3 | */ 4 | 5 | export default function Rules (options) { 6 | this.options = options 7 | this._keep = [] 8 | this._remove = [] 9 | 10 | this.blankRule = { 11 | replacement: options.blankReplacement 12 | } 13 | 14 | this.keepReplacement = options.keepReplacement 15 | 16 | this.defaultRule = { 17 | replacement: options.defaultReplacement 18 | } 19 | 20 | this.array = [] 21 | for (var key in options.rules) this.array.push(options.rules[key]) 22 | } 23 | 24 | Rules.prototype = { 25 | add: function (key, rule) { 26 | this.array.unshift(rule) 27 | }, 28 | 29 | keep: function (filter) { 30 | this._keep.unshift({ 31 | filter: filter, 32 | replacement: this.keepReplacement 33 | }) 34 | }, 35 | 36 | remove: function (filter) { 37 | this._remove.unshift({ 38 | filter: filter, 39 | replacement: function () { 40 | return '' 41 | } 42 | }) 43 | }, 44 | 45 | forNode: function (node) { 46 | if (node.isBlank) return this.blankRule 47 | var rule 48 | 49 | if ((rule = findRule(this.array, node, this.options))) return rule 50 | if ((rule = findRule(this._keep, node, this.options))) return rule 51 | if ((rule = findRule(this._remove, node, this.options))) return rule 52 | 53 | return this.defaultRule 54 | }, 55 | 56 | forEach: function (fn) { 57 | for (var i = 0; i < this.array.length; i++) fn(this.array[i], i) 58 | } 59 | } 60 | 61 | function findRule (rules, node, options) { 62 | for (var i = 0; i < rules.length; i++) { 63 | var rule = rules[i] 64 | if (filterValue(rule, node, options)) return rule 65 | } 66 | return void 0 67 | } 68 | 69 | function filterValue (rule, node, options) { 70 | var filter = rule.filter 71 | if (typeof filter === 'string') { 72 | if (filter === node.nodeName.toLowerCase()) return true 73 | } else if (Array.isArray(filter)) { 74 | if (filter.indexOf(node.nodeName.toLowerCase()) > -1) return true 75 | } else if (typeof filter === 'function') { 76 | if (filter.call(rule, node, options)) return true 77 | } else { 78 | throw new TypeError('`filter` needs to be a string, array, or function') 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/turndown.js: -------------------------------------------------------------------------------- 1 | import COMMONMARK_RULES from './commonmark-rules' 2 | import Rules from './rules' 3 | import { extend, trimLeadingNewlines, trimTrailingNewlines } from './utilities' 4 | import RootNode from './root-node' 5 | import Node from './node' 6 | var reduce = Array.prototype.reduce 7 | var escapes = [ 8 | [/\\/g, '\\\\'], 9 | [/\*/g, '\\*'], 10 | [/^-/g, '\\-'], 11 | [/^\+ /g, '\\+ '], 12 | [/^(=+)/g, '\\$1'], 13 | [/^(#{1,6}) /g, '\\$1 '], 14 | [/`/g, '\\`'], 15 | [/^~~~/g, '\\~~~'], 16 | [/\[/g, '\\['], 17 | [/\]/g, '\\]'], 18 | [/^>/g, '\\>'], 19 | [/_/g, '\\_'], 20 | [/^(\d+)\. /g, '$1\\. '] 21 | ] 22 | 23 | export default function TurndownService (options) { 24 | if (!(this instanceof TurndownService)) return new TurndownService(options) 25 | 26 | var defaults = { 27 | rules: COMMONMARK_RULES, 28 | headingStyle: 'setext', 29 | hr: '* * *', 30 | bulletListMarker: '*', 31 | codeBlockStyle: 'indented', 32 | fence: '```', 33 | emDelimiter: '_', 34 | strongDelimiter: '**', 35 | linkStyle: 'inlined', 36 | linkReferenceStyle: 'full', 37 | br: ' ', 38 | preformattedCode: false, 39 | blankReplacement: function (content, node) { 40 | return node.isBlock ? '\n\n' : '' 41 | }, 42 | keepReplacement: function (content, node) { 43 | return node.isBlock ? '\n\n' + node.outerHTML + '\n\n' : node.outerHTML 44 | }, 45 | defaultReplacement: function (content, node) { 46 | return node.isBlock ? '\n\n' + content + '\n\n' : content 47 | } 48 | } 49 | this.options = extend({}, defaults, options) 50 | this.rules = new Rules(this.options) 51 | } 52 | 53 | TurndownService.prototype = { 54 | /** 55 | * The entry point for converting a string or DOM node to Markdown 56 | * @public 57 | * @param {String|HTMLElement} input The string or DOM node to convert 58 | * @returns A Markdown representation of the input 59 | * @type String 60 | */ 61 | 62 | turndown: function (input) { 63 | if (!canConvert(input)) { 64 | throw new TypeError( 65 | input + ' is not a string, or an element/document/fragment node.' 66 | ) 67 | } 68 | 69 | if (input === '') return '' 70 | 71 | var output = process.call(this, new RootNode(input, this.options)) 72 | return postProcess.call(this, output) 73 | }, 74 | 75 | /** 76 | * Add one or more plugins 77 | * @public 78 | * @param {Function|Array} plugin The plugin or array of plugins to add 79 | * @returns The Turndown instance for chaining 80 | * @type Object 81 | */ 82 | 83 | use: function (plugin) { 84 | if (Array.isArray(plugin)) { 85 | for (var i = 0; i < plugin.length; i++) this.use(plugin[i]) 86 | } else if (typeof plugin === 'function') { 87 | plugin(this) 88 | } else { 89 | throw new TypeError('plugin must be a Function or an Array of Functions') 90 | } 91 | return this 92 | }, 93 | 94 | /** 95 | * Adds a rule 96 | * @public 97 | * @param {String} key The unique key of the rule 98 | * @param {Object} rule The rule 99 | * @returns The Turndown instance for chaining 100 | * @type Object 101 | */ 102 | 103 | addRule: function (key, rule) { 104 | this.rules.add(key, rule) 105 | return this 106 | }, 107 | 108 | /** 109 | * Keep a node (as HTML) that matches the filter 110 | * @public 111 | * @param {String|Array|Function} filter The unique key of the rule 112 | * @returns The Turndown instance for chaining 113 | * @type Object 114 | */ 115 | 116 | keep: function (filter) { 117 | this.rules.keep(filter) 118 | return this 119 | }, 120 | 121 | /** 122 | * Remove a node that matches the filter 123 | * @public 124 | * @param {String|Array|Function} filter The unique key of the rule 125 | * @returns The Turndown instance for chaining 126 | * @type Object 127 | */ 128 | 129 | remove: function (filter) { 130 | this.rules.remove(filter) 131 | return this 132 | }, 133 | 134 | /** 135 | * Escapes Markdown syntax 136 | * @public 137 | * @param {String} string The string to escape 138 | * @returns A string with Markdown syntax escaped 139 | * @type String 140 | */ 141 | 142 | escape: function (string) { 143 | return escapes.reduce(function (accumulator, escape) { 144 | return accumulator.replace(escape[0], escape[1]) 145 | }, string) 146 | } 147 | } 148 | 149 | /** 150 | * Reduces a DOM node down to its Markdown string equivalent 151 | * @private 152 | * @param {HTMLElement} parentNode The node to convert 153 | * @returns A Markdown representation of the node 154 | * @type String 155 | */ 156 | 157 | function process (parentNode) { 158 | var self = this 159 | return reduce.call(parentNode.childNodes, function (output, node) { 160 | node = new Node(node, self.options) 161 | 162 | var replacement = '' 163 | if (node.nodeType === 3) { 164 | replacement = node.isCode ? node.nodeValue : self.escape(node.nodeValue) 165 | } else if (node.nodeType === 1) { 166 | replacement = replacementForNode.call(self, node) 167 | } 168 | 169 | return join(output, replacement) 170 | }, '') 171 | } 172 | 173 | /** 174 | * Appends strings as each rule requires and trims the output 175 | * @private 176 | * @param {String} output The conversion output 177 | * @returns A trimmed version of the ouput 178 | * @type String 179 | */ 180 | 181 | function postProcess (output) { 182 | var self = this 183 | this.rules.forEach(function (rule) { 184 | if (typeof rule.append === 'function') { 185 | output = join(output, rule.append(self.options)) 186 | } 187 | }) 188 | 189 | return output.replace(/^[\t\r\n]+/, '').replace(/[\t\r\n\s]+$/, '') 190 | } 191 | 192 | /** 193 | * Converts an element node to its Markdown equivalent 194 | * @private 195 | * @param {HTMLElement} node The node to convert 196 | * @returns A Markdown representation of the node 197 | * @type String 198 | */ 199 | 200 | function replacementForNode (node) { 201 | var rule = this.rules.forNode(node) 202 | var content = process.call(this, node) 203 | var whitespace = node.flankingWhitespace 204 | if (whitespace.leading || whitespace.trailing) content = content.trim() 205 | return ( 206 | whitespace.leading + 207 | rule.replacement(content, node, this.options) + 208 | whitespace.trailing 209 | ) 210 | } 211 | 212 | /** 213 | * Joins replacement to the current output with appropriate number of new lines 214 | * @private 215 | * @param {String} output The current conversion output 216 | * @param {String} replacement The string to append to the output 217 | * @returns Joined output 218 | * @type String 219 | */ 220 | 221 | function join (output, replacement) { 222 | var s1 = trimTrailingNewlines(output) 223 | var s2 = trimLeadingNewlines(replacement) 224 | var nls = Math.max(output.length - s1.length, replacement.length - s2.length) 225 | var separator = '\n\n'.substring(0, nls) 226 | 227 | return s1 + separator + s2 228 | } 229 | 230 | /** 231 | * Determines whether an input can be converted 232 | * @private 233 | * @param {String|HTMLElement} input Describe this parameter 234 | * @returns Describe what it returns 235 | * @type String|Object|Array|Boolean|Number 236 | */ 237 | 238 | function canConvert (input) { 239 | return ( 240 | input != null && ( 241 | typeof input === 'string' || 242 | (input.nodeType && ( 243 | input.nodeType === 1 || input.nodeType === 9 || input.nodeType === 11 244 | )) 245 | ) 246 | ) 247 | } 248 | -------------------------------------------------------------------------------- /src/utilities.js: -------------------------------------------------------------------------------- 1 | export function extend (destination) { 2 | for (var i = 1; i < arguments.length; i++) { 3 | var source = arguments[i] 4 | for (var key in source) { 5 | if (source.hasOwnProperty(key)) destination[key] = source[key] 6 | } 7 | } 8 | return destination 9 | } 10 | 11 | export function repeat (character, count) { 12 | return Array(count + 1).join(character) 13 | } 14 | 15 | export function trimLeadingNewlines (string) { 16 | return string.replace(/^\n*/, '') 17 | } 18 | 19 | export function trimTrailingNewlines (string) { 20 | // avoid match-at-end regexp bottleneck, see #370 21 | var indexEnd = string.length 22 | while (indexEnd > 0 && string[indexEnd - 1] === '\n') indexEnd-- 23 | return string.substring(0, indexEnd) 24 | } 25 | 26 | export var blockElements = [ 27 | 'ADDRESS', 'ARTICLE', 'ASIDE', 'AUDIO', 'BLOCKQUOTE', 'BODY', 'CANVAS', 28 | 'CENTER', 'DD', 'DIR', 'DIV', 'DL', 'DT', 'FIELDSET', 'FIGCAPTION', 'FIGURE', 29 | 'FOOTER', 'FORM', 'FRAMESET', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'HEADER', 30 | 'HGROUP', 'HR', 'HTML', 'ISINDEX', 'LI', 'MAIN', 'MENU', 'NAV', 'NOFRAMES', 31 | 'NOSCRIPT', 'OL', 'OUTPUT', 'P', 'PRE', 'SECTION', 'TABLE', 'TBODY', 'TD', 32 | 'TFOOT', 'TH', 'THEAD', 'TR', 'UL' 33 | ] 34 | 35 | export function isBlock (node) { 36 | return is(node, blockElements) 37 | } 38 | 39 | export var voidElements = [ 40 | 'AREA', 'BASE', 'BR', 'COL', 'COMMAND', 'EMBED', 'HR', 'IMG', 'INPUT', 41 | 'KEYGEN', 'LINK', 'META', 'PARAM', 'SOURCE', 'TRACK', 'WBR' 42 | ] 43 | 44 | export function isVoid (node) { 45 | return is(node, voidElements) 46 | } 47 | 48 | export function hasVoid (node) { 49 | return has(node, voidElements) 50 | } 51 | 52 | var meaningfulWhenBlankElements = [ 53 | 'A', 'TABLE', 'THEAD', 'TBODY', 'TFOOT', 'TH', 'TD', 'IFRAME', 'SCRIPT', 54 | 'AUDIO', 'VIDEO' 55 | ] 56 | 57 | export function isMeaningfulWhenBlank (node) { 58 | return is(node, meaningfulWhenBlankElements) 59 | } 60 | 61 | export function hasMeaningfulWhenBlank (node) { 62 | return has(node, meaningfulWhenBlankElements) 63 | } 64 | 65 | function is (node, tagNames) { 66 | return tagNames.indexOf(node.nodeName) >= 0 67 | } 68 | 69 | function has (node, tagNames) { 70 | return ( 71 | node.getElementsByTagName && 72 | tagNames.some(function (tagName) { 73 | return node.getElementsByTagName(tagName).length 74 | }) 75 | ) 76 | } 77 | -------------------------------------------------------------------------------- /test/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | turndown test runner 6 | 7 | 8 | 9 | 10 | 11 | 12 |
13 |

Lorem ipsum

14 |
Lorem ipsum
15 |
16 | 17 |
18 |
19 |

Lorem

20 |

ipsum

21 |

sit

22 |
23 |
Lorem
  24 | 
  25 | ipsum
  26 | 
  27 | sit
28 |
29 | 30 |
31 |
em element
32 |
_em element_
33 |
34 | 35 |
36 |
i element
37 |
_i element_
38 |
39 | 40 |
41 |
strong element
42 |
**strong element**
43 |
44 | 45 |
46 |
b element
47 |
**b element**
48 |
49 | 50 |
51 |
code element
52 |
`code element`
53 |
54 | 55 |
56 |
There is a literal backtick (`) here
57 |
``There is a literal backtick (`) here``
58 |
59 | 60 |
61 |
here are three ``` here are four ```` that's it
62 |
`here are three ``` here are four ```` that's it`
63 |
64 | 65 |
66 |
here are three ``` here are four ```` here is one ` that's it
67 |
``here are three ``` here are four ```` here is one ` that's it``
68 |
69 | 70 |
71 |
`starting with a backtick
72 |
`` `starting with a backtick ``
73 |
74 | 75 |
76 |
_emphasis_
77 |
`_emphasis_`
78 |
79 | 80 |
81 |
_emphasis_
82 |
`_emphasis_`
83 |
84 | 85 |
86 |

Level One Heading

87 |
Level One Heading
  88 | =================
89 |
90 | 91 |
92 |
===
93 |
\===
94 |
95 | 96 |
97 |
A sentence containing =
98 |
A sentence containing =
99 |
100 | 101 |
102 |

Level One Heading with ATX

103 |
# Level One Heading with ATX
104 |
105 | 106 |
107 |

Level Two Heading

108 |
Level Two Heading
 109 | -----------------
110 |
111 | 112 |
113 |

Level Two Heading with ATX

114 |
## Level Two Heading with ATX
115 |
116 | 117 |
118 |

Level Three Heading

119 |
### Level Three Heading
120 |
121 | 122 |
123 |

Level Four Heading with child

124 |
#### Level Four Heading with `child`
125 |
126 | 127 |
128 |
Level Seven Heading?
129 |
Level Seven Heading?
130 |
131 | 132 |
133 |

134 |
* * *
135 |
136 | 137 |
138 |

139 |
* * *
140 |
141 | 142 |
143 |

144 |
- - -
145 |
146 | 147 |
148 |
More
after the break
149 |
More  
 150 | after the break
151 |
152 | 153 |
154 |
More
after the break
155 |
More\
 156 | after the break
157 |
158 | 159 |
160 |
161 |
![](http://example.com/logo.png)
162 |
163 | 164 |
165 |
166 |
![](logo.png)
167 |
168 | 169 |
170 |
img with alt
171 |
![img with alt](logo.png)
172 |
173 | 174 |
175 |
176 |

 177 | 
178 | 179 |
180 |
img with
 181 |     alt
182 |
![img with
 183 | alt](logo.png)
184 |
185 | 186 |
187 |
img with
 188 |     
 189 |     alt
190 |
![img with
 191 | alt](logo.png)
192 |
193 | 194 |
195 |
198 |
![](logo.png "the
 199 | title")
200 |
201 | 202 |
203 | 204 |
[An anchor](http://example.com)
205 |
206 | 207 |
208 | 209 |
[An anchor](http://example.com "Title for link")
210 |
211 | 212 |
213 | 216 |
[An anchor](http://example.com "Title for
 217 | link")
218 |
219 | 220 |
221 | 222 |
[An anchor](http://example.com "\"hello\"")
223 |
224 | 225 |
226 | 227 |
[An anchor](http://example.com?\(query\))
228 |
229 | 230 |
231 | 232 |
Anchor without a title
233 |
234 | 235 |
236 | 237 |
[Some `code`](http://example.com/code)
238 |
239 | 240 |
241 | 242 |
[Reference link][1]
 243 | 
 244 | [1]: http://example.com
245 |
246 | 247 |
248 | 249 |
[Reference link with collapsed style][]
 250 | 
 251 | [Reference link with collapsed style]: http://example.com
252 |
253 | 254 |
255 | 256 |
[Reference link with shortcut style]
 257 | 
 258 | [Reference link with shortcut style]: http://example.com
259 |
260 | 261 |
262 |
def code_block
 263 |   # 42 < 9001
 264 |   "Hello world!"
 265 | end
266 | 267 |
    def code_block
 268 |       # 42 < 9001
 269 |       "Hello world!"
 270 |     end
271 |
272 | 273 |
274 |
def first_code_block
 275 |   # 42 < 9001
 276 |   "Hello world!"
 277 | end
278 | 279 |

next:

280 | 281 |
def second_code_block
 282 |   # 42 < 9001
 283 |   "Hello world!"
 284 | end
285 | 286 |
    def first_code_block
 287 |       # 42 < 9001
 288 |       "Hello world!"
 289 |     end
 290 | 
 291 | next:
 292 | 
 293 |     def second_code_block
 294 |       # 42 < 9001
 295 |       "Hello world!"
 296 |     end
297 |
298 | 299 |
300 |
Multiple new lines
 301 | 
 302 | 
 303 | should not be
 304 | 
 305 | 
 306 | removed
307 | 308 |
    Multiple new lines
 309 |     
 310 |     
 311 |     should not be
 312 |     
 313 |     
 314 |     removed
315 |
316 | 317 |
318 |
319 |
def a_fenced_code block; end
320 |
321 |
```
 322 | def a_fenced_code block; end
 323 | ```
324 |
325 | 326 |
327 |
328 |
def a_fenced_code block; end
329 |
330 |
~~~
 331 | def a_fenced_code block; end
 332 | ~~~
333 |
334 | 335 |
336 |
337 |
~~~ foo
338 |
339 |
\~~~ foo
340 |
341 | 342 |
343 |
A sentence containing ~~~
344 |
A sentence containing ~~~
345 |
346 | 347 |
348 |
349 |
def a_fenced_code block; end
350 |
351 |
```ruby
 352 | def a_fenced_code block; end
 353 | ```
354 |
355 | 356 |
357 |
358 |

 359 |   
360 |

 361 | 
362 | 363 |
364 |
365 |
    366 |
  1. Ordered list item 1
  2. 367 |
  3. Ordered list item 2
  4. 368 |
  5. Ordered list item 3
  6. 369 |
370 |
371 |
1.  Ordered list item 1
 372 | 2.  Ordered list item 2
 373 | 3.  Ordered list item 3
374 |
375 | 376 |
377 |
378 |
    379 |
  1. Ordered list item 42
  2. 380 |
  3. Ordered list item 43
  4. 381 |
  5. Ordered list item 44
  6. 382 |
383 |
384 |
42.  Ordered list item 42
 385 | 43.  Ordered list item 43
 386 | 44.  Ordered list item 44
387 |
388 | 389 |
390 |
391 |

A paragraph.

392 |
    393 |
  1. Ordered list item 1
  2. 394 |
  3. Ordered list item 2
  4. 395 |
  5. Ordered list item 3
  6. 396 |
397 |

Another paragraph.

398 |
    399 |
  • Unordered list item 1
  • 400 |
  • Unordered list item 2
  • 401 |
  • Unordered list item 3
  • 402 |
403 |
404 |
A paragraph.
 405 | 
 406 | 1.  Ordered list item 1
 407 | 2.  Ordered list item 2
 408 | 3.  Ordered list item 3
 409 | 
 410 | Another paragraph.
 411 | 
 412 | *   Unordered list item 1
 413 | *   Unordered list item 2
 414 | *   Unordered list item 3
415 |
416 | 417 |
418 |
419 |
    420 |
  • Unordered list item 1
  • 421 |
  • Unordered list item 2
  • 422 |
  • Unordered list item 3
  • 423 |
424 |
425 |
*   Unordered list item 1
 426 | *   Unordered list item 2
 427 | *   Unordered list item 3
428 |
429 | 430 |
431 |
432 |
    433 |
  • Unordered list item 1
  • 434 |
  • Unordered list item 2
  • 435 |
  • Unordered list item 3
  • 436 |
437 |
438 |
-   Unordered list item 1
 439 | -   Unordered list item 2
 440 | -   Unordered list item 3
441 |
442 | 443 |
444 |
445 |
    446 |
  • List item with paragraph

  • 447 |
  • List item without paragraph
  • 448 |
449 |
450 |
*   List item with paragraph
 451 |     
 452 | *   List item without paragraph
453 |
454 | 455 |
456 |
457 |
    458 |
  1. 459 |

    This is a paragraph in a list item.

    460 |

    This is a paragraph in the same list item as above.

    461 |
  2. 462 |
  3. 463 |

    A paragraph in a second list item.

    464 |
  4. 465 |
466 |
467 |
1.  This is a paragraph in a list item.
 468 |     
 469 |     This is a paragraph in the same list item as above.
 470 |     
 471 | 2.  A paragraph in a second list item.
472 |
473 | 474 |
475 |
476 |
    477 |
  • This is a list item at root level
  • 478 |
  • This is another item at root level
  • 479 |
  • 480 |
      481 |
    • This is a nested list item
    • 482 |
    • This is another nested list item
    • 483 |
    • 484 |
        485 |
      • This is a deeply nested list item
      • 486 |
      • This is another deeply nested list item
      • 487 |
      • This is a third deeply nested list item
      • 488 |
      489 |
    • 490 |
    491 |
  • 492 |
  • This is a third item at root level
  • 493 |
494 |
495 |
*   This is a list item at root level
 496 | *   This is another item at root level
 497 | *   *   This is a nested list item
 498 |     *   This is another nested list item
 499 |     *   *   This is a deeply nested list item
 500 |         *   This is another deeply nested list item
 501 |         *   This is a third deeply nested list item
 502 | *   This is a third item at root level
503 |
504 | 505 |
506 |
507 |
    508 |
  • This is a list item at root level
  • 509 |
  • This is another item at root level
  • 510 |
  • 511 |
      512 |
    1. This is a nested list item
    2. 513 |
    3. This is another nested list item
    4. 514 |
    5. 515 |
        516 |
      • This is a deeply nested list item
      • 517 |
      • This is another deeply nested list item
      • 518 |
      • This is a third deeply nested list item
      • 519 |
      520 |
    6. 521 |
    522 |
  • 523 |
  • This is a third item at root level
  • 524 |
525 |
526 |
*   This is a list item at root level
 527 | *   This is another item at root level
 528 | *   1.  This is a nested list item
 529 |     2.  This is another nested list item
 530 |     3.  *   This is a deeply nested list item
 531 |         *   This is another deeply nested list item
 532 |         *   This is a third deeply nested list item
 533 | *   This is a third item at root level
534 |
535 | 536 |
537 |
538 |
    539 |
  • 540 |

    A list item with a blockquote:

    541 |
    542 |

    This is a blockquote inside a list item.

    543 |
    544 |
  • 545 |
546 |
547 |
*   A list item with a blockquote:
 548 |     
 549 |     > This is a blockquote inside a list item.
550 |
551 | 552 |
553 |
554 |
555 |

This is a paragraph within a blockquote.

556 |

This is another paragraph within a blockquote.

557 |
558 |
559 |
> This is a paragraph within a blockquote.
 560 | > 
 561 | > This is another paragraph within a blockquote.
562 |
563 | 564 |
565 |
566 |
567 |

This is the first level of quoting.

568 |
569 |

This is a paragraph in a nested blockquote.

570 |
571 |

Back to the first level.

572 |
573 |
574 |
> This is the first level of quoting.
 575 | > 
 576 | > > This is a paragraph in a nested blockquote.
 577 | > 
 578 | > Back to the first level.
579 |
580 | 581 |
582 |
583 |
584 |

This is a header.

585 |
    586 |
  1. This is the first list item.
  2. 587 |
  3. This is the second list item.
  4. 588 |
589 |

A code block:

590 |
return 1 < 2 ? shell_exec('echo $input | $markdown_script') : 0;
591 |
592 |
593 |
> This is a header.
 594 | > -----------------
 595 | > 
 596 | > 1.  This is the first list item.
 597 | > 2.  This is the second list item.
 598 | > 
 599 | > A code block:
 600 | > 
 601 | >     return 1 < 2 ? shell_exec('echo $input | $markdown_script') : 0;
602 |
603 | 604 |
605 |
606 |
A div
607 |
Another div
608 |
609 |
A div
 610 | 
 611 | Another div
612 |
613 | 614 |
615 |
616 |
A div
617 |
Another div
618 |
619 |
A div
 620 | 
 621 | Another div
622 |
623 | 624 |
625 |
626 |

 627 | 
628 | 629 |
630 |
631 |
Hello world
632 |
633 |
    Hello world
634 |
635 | 636 |
637 |

638 | h3 with leading whitespace

639 |
### h3 with leading whitespace
640 |
641 | 642 |
643 |
644 |
    645 |
  1. Chapter One 646 |
      647 |
    1. Section One
    2. 648 |
    3. Section Two with trailing whitespace
    4. 649 |
    5. Section Three with trailing whitespace
    6. 650 |
    651 |
  2. 652 |
  3. Chapter Two
  4. 653 |
  5. Chapter Three with trailing whitespace
  6. 654 |
655 |
656 |
1.  Chapter One
 657 |     1.  Section One
 658 |     2.  Section Two with trailing whitespace
 659 |     3.  Section Three with trailing whitespace
 660 | 2.  Chapter Two
 661 | 3.  Chapter Three with trailing whitespace
662 |
663 | 664 |
665 |
666 |
    667 |
  • 668 | Indented li with leading/trailing newlines 669 |
  • 670 |
  • 671 | Strong with trailing space inside li with leading/trailing whitespace
  • 672 |
  • li without whitespace
  • 673 |
  • Leading space, text, lots of whitespace … 674 | text 675 |
  • 676 | 677 |
678 |
*   Indented li with leading/trailing newlines
 679 | *   **Strong with trailing space inside li with leading/trailing whitespace**
 680 | *   li without whitespace
 681 | *   Leading space, text, lots of whitespace … text
682 |
683 | 684 |
685 |
686 |

I need more spaces!

687 |
688 |
I [need](http://example.com/need) [more](http://www.example.com/more) spaces!
689 |
690 | 691 |
692 |
Text with no space after the period. Text in em with leading/trailing spaces text in strong with trailing space
693 |
Text with no space after the period. _Text in em with leading/trailing spaces_ **text in strong with trailing space**
694 |
695 | 696 |
697 |
Text at root link text with trailing space in strong more text at root
698 |
Text at root **[link text with trailing space in strong](http://www.example.com)** more text at root
699 |
700 | 701 |
702 |
703 | Text before blank em … text after blank em 704 |
705 |
Text before blank em … text after blank em
706 |
707 | 708 |
709 |
710 | Text before blank div …
text after blank div 711 |
712 |
Text before blank div …
 713 | 
 714 | text after blank div
715 |
716 | 717 |
718 |

719 |

 720 | 
721 | 722 |
723 |
Content in a nested div
724 |
Content in another div
725 |
Content in a nested div
 726 | 
 727 | Content in another div
728 |
729 | 730 |
731 |
backslash \
732 |
backslash \\
733 |
734 | 735 |
736 |
### This is not a heading
737 |
\### This is not a heading
738 |
739 | 740 |
741 |
#This is not # a heading
742 |
#This is not # a heading
743 |
744 | 745 |
746 |
To add emphasis, surround text with *. For example: *this is emphasis*
747 |
To add emphasis, surround text with \*. For example: \*this is emphasis\*
748 |
749 | 750 |
751 |
To add emphasis, surround text with _. For example: _this is emphasis_
752 |
To add emphasis, surround text with \_. For example: \_this is emphasis\_
753 |
754 | 755 |
756 |
def this_is_a_method; end;
757 |
    def this_is_a_method; end;
758 |
759 | 760 |
761 |
To add strong emphasis, surround text with **. For example: **this is strong**
762 |
To add strong emphasis, surround text with \*\*. For example: \*\*this is strong\*\*
763 |
764 | 765 |
766 |
To add strong emphasis, surround text with __. For example: __this is strong__
767 |
To add strong emphasis, surround text with \_\_. For example: \_\_this is strong\_\_
768 |
769 | 770 |
771 |
* * *
772 |
\* \* \*
773 |
774 | 775 |
776 |
- - -
777 |
\- - -
778 |
779 | 780 |
781 |
_ _ _
782 |
\_ \_ \_
783 |
784 | 785 |
786 |
***
787 |
\*\*\*
788 |
789 | 790 |
791 |
* * * * *
792 |
\* \* \* \* \*
793 |
794 | 795 |
796 |
1984. by George Orwell
797 |
1984\. by George Orwell
798 |
799 | 800 |
801 |
1984.George Orwell wrote 1984.
802 |
1984.George Orwell wrote 1984.
803 |
804 | 805 |
806 |
* An unordered list item
807 |
\* An unordered list item
808 |
809 | 810 |
811 |
- An unordered list item
812 |
\- An unordered list item
813 |
814 | 815 |
816 |
+ An unordered list item
817 |
\+ An unordered list item
818 |
819 | 820 |
821 |
Hello-world, 45 - 3 is 42
822 |
Hello-world, 45 - 3 is 42
823 |
824 | 825 |
826 |
+1 and another +
827 |
+1 and another +
828 |
829 | 830 |
831 |
You can use * for multiplication
832 |
You can use \* for multiplication
833 |
834 | 835 |
836 |
**test
837 |
**\*\*test**
838 |
839 | 840 |
841 |
test_italics
842 |
_test\_italics_
843 |
844 | 845 |
846 |
> Blockquote in markdown
847 |
\> Blockquote in markdown
848 |
849 | 850 |
851 |
>Blockquote in markdown
852 |
\>Blockquote in markdown
853 |
854 | 855 |
856 |
42 > 1
857 |
42 > 1
858 |
859 | 860 |
861 |
`not code`
862 |
\`not code\`
863 |
864 | 865 |
866 |
[This] is a sentence with brackets
867 |
\[This\] is a sentence with brackets
868 |
869 | 870 |
871 | 872 |
[c\[iao](http://www.example.com)
873 |
874 | 875 | 876 |
877 |
fasdf *883 asdf wer qweasd fsd asdf asdfaqwe rqwefrsdf
878 |
fasdf \*883 asdf wer qweasd fsd asdf asdfaqwe rqwefrsdf
879 |
880 | 881 |
882 |

* * ** It aims to be*

883 |
\* \* \*\* It aims to be\*
884 |
885 | 886 |
887 |

_Really_? Is that what it _is_? A **2000** year-old computer?

888 |
\_Really\_? Is that what it \_is\_? A \*\*2000\*\* year-old computer?
889 |
890 | 891 |
892 |
893 | Foo 894 |
Bar
895 | Baz 896 |
897 |
Foo
 898 | 
 899 | Bar
 900 | 
 901 | Baz
902 |
903 | 904 |
905 |
906 | Foo Bar 907 |
908 |
Foo Bar
909 |
910 | 911 |
912 |
913 | Hello world 914 |
915 |
Hello world
916 |
917 | 918 |
919 |
920 |

921 |
922 |
![](http://example.com/logo.png)
923 |
924 | 925 |
926 |
927 |

928 |
929 |
![](http://example.com/logo.png)
930 |
931 | 932 |
933 |
934 |

Foo Bar

935 |
936 |
Foo Bar
937 |
938 | 939 |
940 |
941 |

Foo Bar

942 |
943 |
Foo Bar
944 |
945 | 946 |
947 |
948 |
~~~
 949 | Code
 950 | ~~~
 951 | 
952 |
953 |
~~~~
 954 | ~~~
 955 | Code
 956 | ~~~
 957 | ~~~~
958 |
959 | 960 |
961 |
962 |
```
 963 | Code
 964 | ```
 965 | 
966 |
967 |
````
 968 | ```
 969 | Code
 970 | ```
 971 | ````
972 |
973 | 974 |
975 |
976 |
````
 977 | Code
 978 | ````
 979 | 
980 |
981 |
`````
 982 | ````
 983 | Code
 984 | ````
 985 | `````
986 |
987 | 988 |
989 |
990 |

 991 | Code
 992 | 
 993 | 
994 |
995 |
```
 996 | 
 997 | Code
 998 | 
 999 | ```
1000 |
1001 | 1002 |
1003 |
1004 |

Foo   Bar

1005 |
1006 |
Foo   Bar
1007 |
1008 | 1009 |
1010 |
 1. First
 2. Second
1011 |
 1. First  
1012 |  2. Second
1013 |
1014 | 1015 |
1016 |
foo bar
1017 |
_foo_ bar
1018 |
1019 | 1020 |
1021 |
foo  bar
1022 |
_foo_  bar
1023 |
1024 | 1025 |
1026 |
foo  bar
1027 |
_foo_  bar
1028 |
1029 | 1030 |
1031 |
foo  bar
1032 |
_foo_  bar
1033 |
1034 | 1035 |
1036 |
foo bar
1037 |
foo _bar_
1038 |
1039 | 1040 |
1041 |
foo  bar
1042 |
foo  _bar_
1043 |
1044 | 1045 |
1046 |
foo  bar
1047 |
foo  _bar_
1048 |
1049 | 1050 |
1051 |
foo  bar
1052 |
foo  _bar_
1053 |
1054 | 1055 | 1056 |
1057 |
Four spaces make an indented code block in Markdown
1058 |
Four spaces `    make an indented code block in Markdown`
1059 |
1060 | 1061 |
1062 |
A line break note the spaces
1063 |
`A line break  ` **note the spaces**
1064 |
1065 | 1066 |
1067 |
tightcodewrap
1068 |
**tight**`code`**wrap**
1069 |
1070 | 1071 |
1072 |
not so tight code wrap
1073 |
**not so tight** `code` **wrap**
1074 |
1075 | 1076 | 1077 |
1078 |
1079 | 1080 | 1081 | nasty 1082 | code 1083 | 1084 | 1085 |
1086 |
`    nasty code   `
1087 |
1088 | 1089 | 1090 | 1091 | 1092 | 1093 | 1094 | -------------------------------------------------------------------------------- /test/internals-test.js: -------------------------------------------------------------------------------- 1 | var test = require('tape').test 2 | var rewire = require('rewire') 3 | var turndownModule = rewire('../lib/turndown.cjs') 4 | 5 | test('edge whitespace detection',function (t) { 6 | function ews (leadingAscii, leadingNonAscii, trailingNonAscii, trailingAscii) { 7 | return { 8 | leading: leadingAscii + leadingNonAscii, 9 | leadingAscii: leadingAscii, 10 | leadingNonAscii: leadingNonAscii, 11 | trailing: trailingNonAscii + trailingAscii, 12 | trailingNonAscii: trailingNonAscii, 13 | trailingAscii: trailingAscii 14 | } 15 | } 16 | var WS = '\r\n \t' 17 | var TEST_CASES = [ 18 | [`${WS}HELLO WORLD${WS}`, ews(WS, '', '', WS)], 19 | [`${WS}H${WS}`, ews(WS, '', '', WS)], 20 | [`${WS}\xa0${WS}HELLO${WS}WORLD${WS}\xa0${WS}`, ews(WS, `\xa0${WS}`, `${WS}\xa0`, WS)], 21 | [`\xa0${WS}HELLO${WS}WORLD${WS}\xa0`, ews('', `\xa0${WS}`, `${WS}\xa0`, '')], 22 | [`\xa0${WS}\xa0`, ews('', `\xa0${WS}\xa0`, '', '')], 23 | [`${WS}\xa0${WS}`, ews(WS, `\xa0${WS}`, '', '')], 24 | [`${WS}\xa0`, ews(WS, `\xa0`, '', '')], 25 | [`HELLO WORLD`, ews('', '', '', '')], 26 | [``, ews('', '', '', '')], 27 | [`TEST${Array(32768).join(' ')}END`, ews('', '', '', '')], // performance check 28 | ] 29 | t.plan(TEST_CASES.length) 30 | t.timeoutAfter(300) 31 | var edgeWhitespace = turndownModule.__get__('edgeWhitespace') 32 | TEST_CASES.forEach(function (c) { 33 | t.deepEqual(edgeWhitespace(c[0]), c[1]) 34 | }) 35 | }) 36 | -------------------------------------------------------------------------------- /test/turndown-test.js: -------------------------------------------------------------------------------- 1 | var Attendant = require('turndown-attendant') 2 | var TurndownService = require('../lib/turndown.cjs') 3 | 4 | var attendant = new Attendant({ 5 | file: __dirname + '/index.html', 6 | TurndownService: TurndownService 7 | }) 8 | var test = attendant.test 9 | 10 | attendant.run() 11 | 12 | test('malformed documents', function (t) { 13 | t.plan(0) 14 | var turndownService = new TurndownService() 15 | turndownService.turndown('') 16 | t.end() 17 | }) 18 | 19 | test('null input', function (t) { 20 | t.plan(1) 21 | var turndownService = new TurndownService() 22 | t.throws( 23 | function () { turndownService.turndown(null) }, /null is not a string/ 24 | ) 25 | }) 26 | 27 | test('undefined input', function (t) { 28 | t.plan(1) 29 | var turndownService = new TurndownService() 30 | t.throws( 31 | function () { turndownService.turndown(void (0)) }, 32 | /undefined is not a string/ 33 | ) 34 | }) 35 | 36 | test('#addRule returns the instance', function (t) { 37 | t.plan(1) 38 | var turndownService = new TurndownService() 39 | var rule = { 40 | filter: ['del', 's', 'strike'], 41 | replacement: function (content) { 42 | return '~~' + content + '~~' 43 | } 44 | } 45 | t.equal(turndownService.addRule('strikethrough', rule), turndownService) 46 | }) 47 | 48 | test('#addRule adds the rule', function (t) { 49 | t.plan(2) 50 | var turndownService = new TurndownService() 51 | var rule = { 52 | filter: ['del', 's', 'strike'], 53 | replacement: function (content) { 54 | return '~~' + content + '~~' 55 | } 56 | } 57 | // Assert rules#add is called 58 | turndownService.rules.add = function (key, r) { 59 | t.equal(key, 'strikethrough') 60 | t.equal(rule, r) 61 | } 62 | turndownService.addRule('strikethrough', rule) 63 | }) 64 | 65 | test('#use returns the instance for chaining', function (t) { 66 | t.plan(1) 67 | var turndownService = new TurndownService() 68 | t.equal(turndownService.use(function plugin () {}), turndownService) 69 | }) 70 | 71 | test('#use with a single plugin calls the fn with instance', function (t) { 72 | t.plan(1) 73 | var turndownService = new TurndownService() 74 | function plugin (service) { 75 | t.equal(service, turndownService) 76 | } 77 | turndownService.use(plugin) 78 | }) 79 | 80 | test('#use with multiple plugins calls each fn with instance', function (t) { 81 | t.plan(2) 82 | var turndownService = new TurndownService() 83 | function plugin1 (service) { 84 | t.equal(service, turndownService) 85 | } 86 | function plugin2 (service) { 87 | t.equal(service, turndownService) 88 | } 89 | turndownService.use([plugin1, plugin2]) 90 | }) 91 | 92 | test('#keep keeps elements as HTML', function (t) { 93 | t.plan(2) 94 | var turndownService = new TurndownService() 95 | var input = '

Hello worldWorld

' 96 | 97 | // Without `.keep(['del', 'ins'])` 98 | t.equal(turndownService.turndown(input), 'Hello worldWorld') 99 | 100 | // With `.keep(['del', 'ins'])` 101 | turndownService.keep(['del', 'ins']) 102 | t.equal( 103 | turndownService.turndown('

Hello worldWorld

'), 104 | 'Hello worldWorld' 105 | ) 106 | }) 107 | 108 | test('#keep returns the TurndownService instance for chaining', function (t) { 109 | t.plan(1) 110 | var turndownService = new TurndownService() 111 | t.equal(turndownService.keep(['del', 'ins']), turndownService) 112 | }) 113 | 114 | test('keep rules are overridden by the standard rules', function (t) { 115 | t.plan(1) 116 | var turndownService = new TurndownService() 117 | turndownService.keep('p') 118 | t.equal(turndownService.turndown('

Hello world

'), 'Hello world') 119 | }) 120 | 121 | test('keeping elements that have a blank textContent but contain significant elements', function (t) { 122 | t.plan(1) 123 | var turndownService = new TurndownService() 124 | turndownService.keep('figure') 125 | t.equal( 126 | turndownService.turndown('
'), 127 | '
' 128 | ) 129 | }) 130 | 131 | test('keepReplacement can be customised', function (t) { 132 | t.plan(1) 133 | var turndownService = new TurndownService({ 134 | keepReplacement: function (content, node) { 135 | return '\n\n' + node.outerHTML + '\n\n' 136 | } 137 | }) 138 | turndownService.keep(['del', 'ins']) 139 | t.equal(turndownService.turndown( 140 | '

Hello worldWorld

'), 141 | 'Hello \n\nworld\n\nWorld' 142 | ) 143 | }) 144 | 145 | test('#remove removes elements', function (t) { 146 | t.plan(2) 147 | var turndownService = new TurndownService() 148 | var input = 'Please redact me' 149 | 150 | // Without `.remove('del')` 151 | t.equal(turndownService.turndown(input), 'Please redact me') 152 | 153 | // With `.remove('del')` 154 | turndownService.remove('del') 155 | t.equal(turndownService.turndown(input), '') 156 | }) 157 | 158 | test('#remove returns the TurndownService instance for chaining', function (t) { 159 | t.plan(1) 160 | var turndownService = new TurndownService() 161 | t.equal(turndownService.remove(['del', 'ins']), turndownService) 162 | }) 163 | 164 | test('remove elements are overridden by rules', function (t) { 165 | t.plan(1) 166 | var turndownService = new TurndownService() 167 | turndownService.remove('p') 168 | t.equal(turndownService.turndown('

Hello world

'), 'Hello world') 169 | }) 170 | 171 | test('remove elements are overridden by keep', function (t) { 172 | t.plan(1) 173 | var turndownService = new TurndownService() 174 | turndownService.keep(['del', 'ins']) 175 | turndownService.remove(['del', 'ins']) 176 | t.equal(turndownService.turndown( 177 | '

Hello worldWorld

'), 178 | 'Hello worldWorld' 179 | ) 180 | }) 181 | --------------------------------------------------------------------------------