├── .bowerrc ├── .gitignore ├── test ├── index.html ├── gfm-test.js └── to-markdown-test.js ├── .travis.yml ├── bower.json ├── package.json ├── LICENSE ├── testem.yml ├── lib ├── gfm-converters.js └── md-converters.js ├── README.md ├── index.js └── dist └── to-markdown.js /.bowerrc: -------------------------------------------------------------------------------- 1 | { 2 | "directory": "bower_components" 3 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bower_components/ 2 | node_modules/ 3 | bower_components/ 4 | npm-debug.log 5 | -------------------------------------------------------------------------------- /test/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | QUnit Example 6 | 7 | 8 | 9 |
10 |
11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - 4 4 | 5 | before_script: 6 | - npm install -g bower 7 | - bower install --dev 8 | - npm install --dev 9 | 10 | script: 11 | - npm test 12 | - testem ci --port 8080 13 | 14 | env: 15 | global: 16 | - secure: gPidjA+ITMh8LixuPvvf/haZXOiM4eLbixFZupkeUo8jHgiBDBCoytzOvrXlBOWFx5QEByr7t4Q/UOHVVFM5/eRykN0RxO7JUiiosU49WFPB6uFiTEqmZx3Kb8L2bG8SITvCoRJSLVi1JS9dX5bQeeLA5HD/me+9ak4Ef4A0rlk= 17 | - secure: PnHNOs9Ld/kO4qXhjxtE04j/zC0rgPMev5FpbTQ7gEqDGn1PBTfKWLAcbUpPkMO9hKADJvBCWU8+VCmPZ0Qk6nu6KKAAkvE5acpwM3yeLckgzVxeAncxS9mvGSTNRv9EHbxIN9g8Jo1FtN0UhY9Y23xq8MxyDC0ManeLin7Q6qs= 18 | -------------------------------------------------------------------------------- /bower.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "to-markdown", 3 | "version": "1.3.0", 4 | "homepage": "https://github.com/domchristie/to-markdown", 5 | "authors": [ 6 | "Dom Christie " 7 | ], 8 | "description": "An HTML to Markdown converter written in JavaScript", 9 | "main": "dist/to-markdown.js", 10 | "keywords": [ 11 | "markdown", 12 | "html" 13 | ], 14 | "license": "MIT", 15 | "ignore": [ 16 | "**/.*", 17 | "node_modules", 18 | "bower_components", 19 | "package.json", 20 | "test", 21 | "lib", 22 | "index.js" 23 | ], 24 | "devDependencies": { 25 | "qunit": "~1.14.0" 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "to-markdown", 3 | "description": "HTML-to-Markdown converter", 4 | "url": "http://domchristie.github.com/to-markdown/", 5 | "keywords": "markdown", 6 | "author": "Dom Christie", 7 | "main": "index.js", 8 | "version": "2.0.1", 9 | "repository": { 10 | "type": "git", 11 | "url": "https://github.com/domchristie/to-markdown.git" 12 | }, 13 | "scripts": { 14 | "start": "watchify -s toMarkdown -o dist/to-markdown.js index.js -v", 15 | "test": "qunit -c ./index.js -t ./test/to-markdown-test.js ./test/gfm-test.js" 16 | }, 17 | "devDependencies": { 18 | "saucie": "0.1.3", 19 | "testem": "^0.8.2", 20 | "qunit": "^0.7.6", 21 | "watchify": "^2.5.0" 22 | }, 23 | "browser": { 24 | "jsdom": false 25 | }, 26 | "dependencies": { 27 | "collapse-whitespace": "1.1.2", 28 | "jsdom": "^6.5.1" 29 | }, 30 | "engines": { 31 | "node": "^4" 32 | }, 33 | "license": "MIT" 34 | } 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2011-2015 Dom Christie 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /testem.yml: -------------------------------------------------------------------------------- 1 | framework: qunit 2 | 3 | src_files: [dist/to-markdown.js, test/to-markdown-test.js] 4 | 5 | launchers: 6 | sl_ff_mac: 7 | command: saucie --browserNameSL="firefox" --platformSL="OS X 10.10" 8 | protocol: tap 9 | 10 | sl_chrome_mac: 11 | command: saucie --browserNameSL="chrome" --platformSL="OS X 10.10" 12 | protocol: tap 13 | 14 | sl_safari_mac: 15 | command: saucie --browserNameSL="safari" --platformSL="OS X 10.10" 16 | protocol: tap 17 | 18 | sl_ff_win: 19 | command: saucie --browserNameSL="firefox" --platformSL="Windows 8.1" 20 | protocol: tap 21 | 22 | sl_chrome_win: 23 | command: saucie --browserNameSL="chrome" --platformSL="Windows 8.1" 24 | protocol: tap 25 | 26 | sl_ie_11: 27 | command: saucie --browserNameSL="internet explorer" --versionSL="11" --platformSL="Windows 8.1" 28 | protocol: tap 29 | 30 | sl_ie_10: 31 | command: saucie --browserNameSL="internet explorer" --versionSL="10" --platformSL="Windows 8" 32 | protocol: tap 33 | 34 | sl_ie_9: 35 | command: saucie --browserNameSL="internet explorer" --versionSL="9" --platformSL="Windows 7" 36 | protocol: tap 37 | 38 | launch_in_ci: [sl_ff_mac, sl_chrome_mac, sl_safari_mac, sl_ff_win, sl_chrome_win, sl_ie_9, sl_ie_10, sl_ie_11] 39 | launch_in_dev: [Chrome, Firefox, Safari, Opera] -------------------------------------------------------------------------------- /lib/gfm-converters.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | function cell(content, node) { 4 | var index = Array.prototype.indexOf.call(node.parentNode.childNodes, node); 5 | var prefix = ' '; 6 | if (index === 0) { prefix = '| '; } 7 | return prefix + content + ' |'; 8 | } 9 | 10 | var highlightRegEx = /highlight highlight-(\S+)/; 11 | 12 | module.exports = [ 13 | { 14 | filter: 'br', 15 | replacement: function () { 16 | return '\n'; 17 | } 18 | }, 19 | { 20 | filter: ['del', 's', 'strike'], 21 | replacement: function (content) { 22 | return '~~' + content + '~~'; 23 | } 24 | }, 25 | 26 | { 27 | filter: function (node) { 28 | return node.type === 'checkbox' && node.parentNode.nodeName === 'LI'; 29 | }, 30 | replacement: function (content, node) { 31 | return (node.checked ? '[x]' : '[ ]') + ' '; 32 | } 33 | }, 34 | 35 | { 36 | filter: ['th', 'td'], 37 | replacement: function (content, node) { 38 | return cell(content, node); 39 | } 40 | }, 41 | 42 | { 43 | filter: 'tr', 44 | replacement: function (content, node) { 45 | var borderCells = ''; 46 | var alignMap = { left: ':--', right: '--:', center: ':-:' }; 47 | 48 | if (node.parentNode.nodeName === 'THEAD') { 49 | for (var i = 0; i < node.childNodes.length; i++) { 50 | var align = node.childNodes[i].attributes.align; 51 | var border = '---'; 52 | 53 | if (align) { border = alignMap[align.value] || border; } 54 | 55 | borderCells += cell(border, node.childNodes[i]); 56 | } 57 | } 58 | return '\n' + content + (borderCells ? '\n' + borderCells : ''); 59 | } 60 | }, 61 | 62 | { 63 | filter: 'table', 64 | replacement: function (content) { 65 | return '\n\n' + content + '\n\n'; 66 | } 67 | }, 68 | 69 | { 70 | filter: ['thead', 'tbody', 'tfoot'], 71 | replacement: function (content) { 72 | return content; 73 | } 74 | }, 75 | 76 | // Fenced code blocks 77 | { 78 | filter: function (node) { 79 | return node.nodeName === 'PRE' && 80 | node.firstChild && 81 | node.firstChild.nodeName === 'CODE'; 82 | }, 83 | replacement: function(content, node) { 84 | return '\n\n```\n' + node.firstChild.textContent + '\n```\n\n'; 85 | } 86 | }, 87 | 88 | // Syntax-highlighted code blocks 89 | { 90 | filter: function (node) { 91 | return node.nodeName === 'PRE' && 92 | node.parentNode.nodeName === 'DIV' && 93 | highlightRegEx.test(node.parentNode.className); 94 | }, 95 | replacement: function (content, node) { 96 | var language = node.parentNode.className.match(highlightRegEx)[1]; 97 | return '\n\n```' + language + '\n' + node.textContent + '\n```\n\n'; 98 | } 99 | }, 100 | 101 | { 102 | filter: function (node) { 103 | return node.nodeName === 'DIV' && 104 | highlightRegEx.test(node.className); 105 | }, 106 | replacement: function (content) { 107 | return '\n\n' + content + '\n\n'; 108 | } 109 | } 110 | ]; 111 | -------------------------------------------------------------------------------- /lib/md-converters.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | module.exports = [ 4 | { 5 | filter: 'p', 6 | replacement: function (content) { 7 | return '\n\n' + content + '\n\n'; 8 | } 9 | }, 10 | 11 | { 12 | filter: 'br', 13 | replacement: function () { 14 | return ' \n'; 15 | } 16 | }, 17 | 18 | { 19 | filter: ['h1', 'h2', 'h3', 'h4','h5', 'h6'], 20 | replacement: function(content, node) { 21 | var hLevel = node.nodeName.charAt(1); 22 | var hPrefix = ''; 23 | for(var i = 0; i < hLevel; i++) { 24 | hPrefix += '#'; 25 | } 26 | return '\n\n' + hPrefix + ' ' + content + '\n\n'; 27 | } 28 | }, 29 | 30 | { 31 | filter: 'hr', 32 | replacement: function () { 33 | return '\n\n* * *\n\n'; 34 | } 35 | }, 36 | 37 | { 38 | filter: ['em', 'i'], 39 | replacement: function(content) { 40 | return content.split(/\s*?(?:\r|\n)/).map(function(line) { 41 | return line.trim().length ? '_' + line + '_' : line; 42 | }).join("\n"); 43 | } 44 | }, 45 | 46 | { 47 | filter: ['strong', 'b'], 48 | replacement: function(content) { 49 | return content.split(/\s*?(?:\r|\n)/).map(function(line) { 50 | return line.trim().length ? '**' + line + '**' : line; 51 | }).join("\n"); 52 | } 53 | }, 54 | 55 | // Inline code 56 | { 57 | filter: function (node) { 58 | var hasSiblings = node.previousSibling || node.nextSibling; 59 | var isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings; 60 | 61 | return node.nodeName === 'CODE' && !isCodeBlock; 62 | }, 63 | replacement: function(content) { 64 | return '`' + content + '`'; 65 | } 66 | }, 67 | 68 | { 69 | filter: function (node) { 70 | return node.nodeName === 'A' && node.getAttribute('href'); 71 | }, 72 | replacement: function(content, node) { 73 | var titlePart = node.title ? ' "'+ node.title +'"' : ''; 74 | return '[' + content + '](' + node.getAttribute('href') + titlePart + ')'; 75 | } 76 | }, 77 | 78 | { 79 | filter: 'img', 80 | replacement: function(content, node) { 81 | var alt = node.alt || ''; 82 | var src = node.getAttribute('src') || ''; 83 | var title = node.title || ''; 84 | var titlePart = title ? ' "'+ title +'"' : ''; 85 | return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : ''; 86 | } 87 | }, 88 | 89 | // Code blocks 90 | { 91 | filter: function (node) { 92 | return node.nodeName === 'PRE' && node.firstChild.nodeName === 'CODE'; 93 | }, 94 | replacement: function(content, node) { 95 | return '\n\n ' + node.firstChild.textContent.replace(/\n/g, '\n ') + '\n\n'; 96 | } 97 | }, 98 | 99 | { 100 | filter: 'blockquote', 101 | replacement: function (content) { 102 | content = this.trim(content); 103 | content = content.replace(/\n{3,}/g, '\n\n'); 104 | content = content.replace(/^/gm, '> '); 105 | return '\n\n' + content + '\n\n'; 106 | } 107 | }, 108 | 109 | { 110 | filter: 'li', 111 | replacement: function (content, node) { 112 | content = content.replace(/^\s+/, '').replace(/\n/gm, '\n '); 113 | var prefix = '* '; 114 | var parent = node.parentNode; 115 | var index = Array.prototype.indexOf.call(parent.children, node) + 1; 116 | 117 | prefix = /ol/i.test(parent.nodeName) ? index + '. ' : '* '; 118 | return prefix + content; 119 | } 120 | }, 121 | 122 | { 123 | filter: ['ul', 'ol'], 124 | replacement: function (content, node) { 125 | var strings = []; 126 | for (var i = 0; i < node.childNodes.length; i++) { 127 | strings.push(node.childNodes[i]._replacement); 128 | } 129 | 130 | if (/li/i.test(node.parentNode.nodeName)) { 131 | return '\n' + strings.join('\n'); 132 | } 133 | return '\n\n' + strings.join('\n') + '\n\n'; 134 | } 135 | }, 136 | 137 | { 138 | filter: function (node) { 139 | return this.isBlock(node); 140 | }, 141 | replacement: function (content, node) { 142 | return '\n\n' + this.outer(node, content) + '\n\n'; 143 | } 144 | }, 145 | 146 | // Anything else! 147 | { 148 | filter: function () { 149 | return true; 150 | }, 151 | replacement: function (content, node) { 152 | return this.outer(node, content); 153 | } 154 | } 155 | ]; -------------------------------------------------------------------------------- /test/gfm-test.js: -------------------------------------------------------------------------------- 1 | /* global QUnit, test, equal */ 2 | 3 | 'use strict'; 4 | 5 | if (typeof module !== 'undefined' && module.exports) { 6 | var toMarkdown = require('../index'); 7 | } 8 | 9 | QUnit.module('GitHub Flavored Markdown'); 10 | 11 | // Test cases are in the format: [html, expectedMarkdown, message]; 12 | function runGfmTestCases(testCases) { 13 | for (var i = 0; i < testCases.length; i++) { 14 | var testCase = testCases[i]; 15 | equal(toMarkdown(testCase[0], { gfm: true }), testCase[1], testCase[2]); 16 | } 17 | } 18 | 19 | test('line breaks', function () { 20 | runGfmTestCases([ 21 | ['

Hello
world

', 'Hello\nworld'] 22 | ]); 23 | }); 24 | 25 | test('strikethroughs', function() { 26 | runGfmTestCases([ 27 | ['Lorem ipsum', '~~Lorem ipsum~~', 'del'], 28 | ['Lorem ipsum', '~~Lorem ipsum~~', 's'], 29 | ['Lorem ipsum', '~~Lorem ipsum~~', 'strike'] 30 | ]); 31 | }); 32 | 33 | test('task lists', function() { 34 | runGfmTestCases([ 35 | [ 36 | '', 37 | '* [ ] Check Me!', 38 | 'Unchecked inputs' 39 | ], 40 | [ 41 | '', 42 | '* [x] Checked!', 43 | 'Checked inputs' 44 | ] 45 | ]); 46 | }); 47 | 48 | test('tables', function() { 49 | runGfmTestCases([ 50 | [ 51 | ['', 52 | ' ', 53 | ' ', 54 | ' ', 55 | ' ', 56 | ' ', 57 | ' ', 58 | ' ', 59 | ' ', 60 | ' ', 61 | ' ', 62 | ' ', 63 | ' ', 64 | ' ', 65 | ' ', 66 | ' ', 67 | ' '].join('\n'), 69 | 70 | ['| Column 1 | Column 2 |', 71 | '| --- | --- |', 72 | '| Row 1, Column 1 | Row 1, Column 2 |', 73 | '| Row 2, Column 1 | Row 2, Column 2 |'].join('\n'), 74 | 75 | 'Basic table' 76 | ], 77 | [ 78 | ['
Column 1Column 2
Row 1, Column 1Row 1, Column 2
Row 2, Column 1Row 2, Column 2
', 79 | ' ', 80 | ' ', 81 | ' ', 82 | ' ', 83 | ' ', 84 | ' ', 85 | ' ', 86 | ' ', 87 | ' ', 88 | ' ', 89 | ' ', 90 | ' ', 91 | ' ', 92 | ' ', 93 | ' ', 94 | ' ', 95 | ' ', 96 | ' ', 97 | ' ', 98 | ' ', 99 | ' ', 100 | ' '].join('\n'), 102 | 103 | ['| Column 1 | Column 2 | Column 3 | Column 4 |', 104 | '| :-- | :-: | --: | --- |', 105 | '| Row 1, Column 1 | Row 1, Column 2 | Row 1, Column 3 | Row 1, Column 4 |', 106 | '| Row 2, Column 1 | Row 2, Column 2 | Row 2, Column 3 | Row 2, Column 4 |'].join('\n'), 107 | 108 | 'Cell alignment' 109 | ] 110 | ]); 111 | }); 112 | 113 | test('fenced code blocks', function () { 114 | runGfmTestCases([ 115 | [ 116 | ['
This is a regular paragraph.',
117 |       '',
118 |       '<table>',
119 |       '    <tr>',
120 |       '        <td>Foo</td>',
121 |       '    </tr>',
122 |       '</table>',
123 |       '',
124 |       'This is another regular paragraph.',
125 |       '
'].join('\n'), 126 | 127 | ['```', 128 | 'This is a regular paragraph.', 129 | '', 130 | '
Column 1Column 2Column 3Column 4
Row 1, Column 1Row 1, Column 2Row 1, Column 3Row 1, Column 4
Row 2, Column 1Row 2, Column 2Row 2, Column 3Row 2, Column 4
', 131 | ' ', 132 | ' ', 133 | ' ', 134 | '
Foo
', 135 | '', 136 | 'This is another regular paragraph.', 137 | '', 138 | '```'].join('\n') 139 | ] 140 | ]); 141 | }); 142 | 143 | test('syntax highlighting', function () { 144 | runGfmTestCases([ 145 | [ 146 | ['
<table>',
147 |       '    <tr>',
148 |       '        <td>Foo</td>',
149 |       '    </tr>',
150 |       '</table>
'].join('\n'), 151 | 152 | ['```html', 153 | '', 154 | ' ', 155 | ' ', 156 | ' ', 157 | '
Foo
', 158 | '```'].join('\n'), 159 | 160 | 'HTML' 161 | ] 162 | ]); 163 | }); 164 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # to-markdown 2 | 3 | An HTML to Markdown converter written in JavaScript. 4 | 5 | The API is as follows: 6 | 7 | ```js 8 | toMarkdown(stringOfHTML, options); 9 | ``` 10 | 11 | **Note** to-markdown v2 runs on Node 4+. For a version compatible with Node 0.10 - 0.12, please use [to-markdown v1.x](https://github.com/domchristie/to-markdown/tree/1.x). 12 | 13 | ## Installation 14 | 15 | ### Browser 16 | 17 | Download the compiled script located at `dist/to-markdown.js`. 18 | 19 | ```html 20 | 21 | 22 | ``` 23 | 24 | Or with **Bower**: 25 | 26 | ```sh 27 | $ bower install to-markdown 28 | ``` 29 | 30 | ```html 31 | 32 | 33 | ``` 34 | 35 | ### Node.js 36 | 37 | Install the `to-markdown` module: 38 | 39 | ```sh 40 | $ npm install to-markdown 41 | ``` 42 | 43 | Then you can use it like below: 44 | 45 | ```js 46 | var toMarkdown = require('to-markdown'); 47 | toMarkdown('

Hello world!

'); 48 | ``` 49 | 50 | (Note it is no longer necessary to call `.toMarkdown` on the required module as of v1.) 51 | 52 | ## Options 53 | 54 | ### `converters` (array) 55 | 56 | to-markdown can be extended by passing in an array of converters to the options object: 57 | 58 | ```js 59 | toMarkdown(stringOfHTML, { converters: [converter1, converter2, …] }); 60 | ``` 61 | 62 | A converter object consists of a **filter**, and a **replacement**. This example from the source replaces `code` elements: 63 | 64 | ```js 65 | { 66 | filter: 'code', 67 | replacement: function(content) { 68 | return '`' + content + '`'; 69 | } 70 | } 71 | ``` 72 | 73 | #### `filter` (string|array|function) 74 | 75 | The filter property determines whether or not an element should be replaced. DOM nodes can be selected simply by filtering by tag name, with strings or with arrays of strings: 76 | 77 | * `filter: 'p'` will select `p` elements 78 | * `filter: ['em', 'i']` will select `em` or `i` elements 79 | 80 | Alternatively, the filter can be a function that returns a boolean depending on whether a given node should be replaced. The function is passed a DOM node as its only argument. For example, the following will match any `span` element with an `italic` font style: 81 | 82 | ```js 83 | filter: function (node) { 84 | return node.nodeName === 'SPAN' && /italic/i.test(node.style.fontStyle); 85 | } 86 | ``` 87 | 88 | #### `replacement` (function) 89 | 90 | The replacement function determines how an element should be converted. It should return the markdown string for a given node. The function is passed the node’s content, as well as the node itself (used in more complex conversions). It is called in the context of `toMarkdown`, and therefore has access to the methods detailed below. 91 | 92 | The following converter replaces heading elements (`h1`-`h6`): 93 | 94 | ```js 95 | { 96 | filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'], 97 | 98 | replacement: function(innerHTML, node) { 99 | var hLevel = node.tagName.charAt(1); 100 | var hPrefix = ''; 101 | for(var i = 0; i < hLevel; i++) { 102 | hPrefix += '#'; 103 | } 104 | return '\n' + hPrefix + ' ' + innerHTML + '\n\n'; 105 | } 106 | } 107 | ``` 108 | 109 | ### `gfm` (boolean) 110 | 111 | to-markdown has beta support for GitHub flavored markdown (GFM). Set the `gfm` option to true: 112 | 113 | ```js 114 | toMarkdown('Hello world!', { gfm: true }); 115 | ``` 116 | 117 | ## Methods 118 | 119 | The following methods can be called on the `toMarkdown` object. 120 | 121 | ### `isBlock(node)` 122 | 123 | Returns `true`/`false` depending on whether the element is block level. 124 | 125 | ### `isVoid(node)` 126 | 127 | Returns `true`/`false` depending on whether the element is [void](http://www.w3.org/TR/html-markup/syntax.html#syntax-elements). 128 | 129 | ### `trim(string)` 130 | 131 | Returns the string with leading and trailing whitespace removed. 132 | 133 | ### `outer(node)` 134 | 135 | Returns the content of the node along with the element itself. 136 | 137 | ## Development 138 | 139 | First make sure you have node.js/npm installed, then: 140 | 141 | ```sh 142 | $ npm install --dev 143 | $ bower install --dev 144 | ``` 145 | 146 | Automatically browserify the module when source files change by running: 147 | 148 | ```sh 149 | $ npm start 150 | ``` 151 | 152 | ### Tests 153 | 154 | To run the tests in the browser, open `test/index.html`. 155 | 156 | To run in node.js: 157 | 158 | ```sh 159 | $ npm test 160 | ``` 161 | 162 | ## Credits 163 | 164 | Thanks to all [contributors](https://github.com/domchristie/to-markdown/graphs/contributors). Also, thanks to [Alex Cornejo](https://github.com/acornejo) for advice and inspiration for the breadth-first search algorithm. 165 | 166 | ## Licence 167 | 168 | to-markdown is copyright © 2011-15 [Dom Christie](http://domchristie.co.uk) and released under the MIT license. 169 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | /* 2 | * to-markdown - an HTML to Markdown converter 3 | * 4 | * Copyright 2011-15, Dom Christie 5 | * Licenced under the MIT licence 6 | * 7 | */ 8 | 9 | 'use strict'; 10 | 11 | var toMarkdown; 12 | var converters; 13 | var mdConverters = require('./lib/md-converters'); 14 | var gfmConverters = require('./lib/gfm-converters'); 15 | var collapse = require('collapse-whitespace'); 16 | 17 | /* 18 | * Set up window and document for Node.js 19 | */ 20 | 21 | var _window = (typeof window !== 'undefined' ? window : this), _document; 22 | if (typeof document === 'undefined') { 23 | _document = require('jsdom').jsdom(); 24 | } 25 | else { 26 | _document = document; 27 | } 28 | 29 | /* 30 | * Utilities 31 | */ 32 | 33 | function trim(string) { 34 | return string.replace(/^[ \r\n\t]+|[ \r\n\t]+$/g, ''); 35 | } 36 | 37 | var blocks = ['address', 'article', 'aside', 'audio', 'blockquote', 'body', 38 | 'canvas', 'center', 'dd', 'dir', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 39 | 'figure', 'footer', 'form', 'frameset', 'h1', 'h2', 'h3', 'h4','h5', 'h6', 40 | 'header', 'hgroup', 'hr', 'html', 'isindex', 'li', 'main', 'menu', 'nav', 41 | 'noframes', 'noscript', 'ol', 'output', 'p', 'pre', 'section', 'table', 42 | 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'ul' 43 | ]; 44 | 45 | function isBlock(node) { 46 | return blocks.indexOf(node.nodeName.toLowerCase()) !== -1; 47 | } 48 | 49 | var voids = [ 50 | 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 51 | 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr' 52 | ]; 53 | 54 | function isVoid(node) { 55 | return voids.indexOf(node.nodeName.toLowerCase()) !== -1; 56 | } 57 | 58 | /* 59 | * Parsing HTML strings 60 | */ 61 | 62 | function canParseHtml() { 63 | var Parser = _window.DOMParser, canParse = false; 64 | 65 | // Adapted from https://gist.github.com/1129031 66 | // Firefox/Opera/IE throw errors on unsupported types 67 | try { 68 | // WebKit returns null on unsupported types 69 | if (new Parser().parseFromString('', 'text/html')) { 70 | canParse = true; 71 | } 72 | } catch (e) {} 73 | return canParse; 74 | } 75 | 76 | function createHtmlParser() { 77 | var Parser = function () {}; 78 | 79 | Parser.prototype.parseFromString = function (string) { 80 | var newDoc = _document.implementation.createHTMLDocument(''); 81 | 82 | if (string.toLowerCase().indexOf(' -1) { 83 | newDoc.documentElement.innerHTML = string; 84 | } 85 | else { 86 | newDoc.body.innerHTML = string; 87 | } 88 | return newDoc; 89 | }; 90 | return Parser; 91 | } 92 | 93 | var HtmlParser = canParseHtml() ? _window.DOMParser : createHtmlParser(); 94 | 95 | function htmlToDom(string) { 96 | var tree = new HtmlParser().parseFromString(string, 'text/html'); 97 | collapse(tree, isBlock); 98 | return tree; 99 | } 100 | 101 | /* 102 | * Flattens DOM tree into single array 103 | */ 104 | 105 | function bfsOrder(node) { 106 | var inqueue = [node], 107 | outqueue = [], 108 | elem, children, i; 109 | 110 | while (inqueue.length > 0) { 111 | elem = inqueue.shift(); 112 | outqueue.push(elem); 113 | children = elem.childNodes; 114 | for (i = 0 ; i < children.length; i++) { 115 | if (children[i].nodeType === 1) { inqueue.push(children[i]); } 116 | } 117 | } 118 | outqueue.shift(); 119 | return outqueue; 120 | } 121 | 122 | /* 123 | * Contructs a Markdown string of replacement text for a given node 124 | */ 125 | 126 | function getContent(node) { 127 | var text = ''; 128 | for (var i = 0; i < node.childNodes.length; i++) { 129 | if (node.childNodes[i].nodeType === 1) { 130 | text += node.childNodes[i]._replacement; 131 | } 132 | else if (node.childNodes[i].nodeType === 3) { 133 | text += node.childNodes[i].data; 134 | } 135 | else { continue; } 136 | } 137 | return text; 138 | } 139 | 140 | /* 141 | * Returns the HTML string of an element with its contents converted 142 | */ 143 | 144 | function outer(node, content) { 145 | return node.cloneNode(false).outerHTML.replace('><', '>'+ content +'<'); 146 | } 147 | 148 | function canConvert(node, filter) { 149 | if (typeof filter === 'string') { 150 | return filter === node.nodeName.toLowerCase(); 151 | } 152 | if (Array.isArray(filter)) { 153 | return filter.indexOf(node.nodeName.toLowerCase()) !== -1; 154 | } 155 | else if (typeof filter === 'function') { 156 | return filter.call(toMarkdown, node); 157 | } 158 | else { 159 | throw new TypeError('`filter` needs to be a string, array, or function'); 160 | } 161 | } 162 | 163 | function isFlankedByWhitespace(side, node) { 164 | var sibling, regExp, isFlanked; 165 | 166 | if (side === 'left') { 167 | sibling = node.previousSibling; 168 | regExp = / $/; 169 | } 170 | else { 171 | sibling = node.nextSibling; 172 | regExp = /^ /; 173 | } 174 | 175 | if (sibling) { 176 | if (sibling.nodeType === 3) { 177 | isFlanked = regExp.test(sibling.nodeValue); 178 | } 179 | else if(sibling.nodeType === 1 && !isBlock(sibling)) { 180 | isFlanked = regExp.test(sibling.textContent); 181 | } 182 | } 183 | return isFlanked; 184 | } 185 | 186 | function flankingWhitespace(node) { 187 | var leading = '', trailing = ''; 188 | 189 | if (!isBlock(node)) { 190 | var hasLeading = /^[ \r\n\t]/.test(node.innerHTML), 191 | hasTrailing = /[ \r\n\t]$/.test(node.innerHTML); 192 | 193 | if (hasLeading && !isFlankedByWhitespace('left', node)) { 194 | leading = ' '; 195 | } 196 | if (hasTrailing && !isFlankedByWhitespace('right', node)) { 197 | trailing = ' '; 198 | } 199 | } 200 | 201 | return { leading: leading, trailing: trailing }; 202 | } 203 | 204 | /* 205 | * Finds a Markdown converter, gets the replacement, and sets it on 206 | * `_replacement` 207 | */ 208 | 209 | function process(node) { 210 | var replacement, content = getContent(node); 211 | 212 | // Remove blank nodes 213 | if (!isVoid(node) && !/A/.test(node.nodeName) && /^\s*$/i.test(content)) { 214 | node._replacement = ''; 215 | return; 216 | } 217 | 218 | for (var i = 0; i < converters.length; i++) { 219 | var converter = converters[i]; 220 | 221 | if (canConvert(node, converter.filter)) { 222 | if (typeof converter.replacement !== 'function') { 223 | throw new TypeError( 224 | '`replacement` needs to be a function that returns a string' 225 | ); 226 | } 227 | 228 | var whitespace = flankingWhitespace(node); 229 | 230 | if (whitespace.leading || whitespace.trailing) { 231 | content = trim(content); 232 | } 233 | replacement = whitespace.leading + 234 | converter.replacement.call(toMarkdown, content, node) + 235 | whitespace.trailing; 236 | break; 237 | } 238 | } 239 | 240 | node._replacement = replacement; 241 | } 242 | 243 | toMarkdown = function (input, options) { 244 | options = options || {}; 245 | 246 | if (typeof input !== 'string') { 247 | throw new TypeError(input + ' is not a string'); 248 | } 249 | 250 | // Escape potential ol triggers 251 | input = input.replace(/(\d+)\. /g, '$1\\. '); 252 | 253 | var clone = htmlToDom(input).body, 254 | nodes = bfsOrder(clone), 255 | output; 256 | 257 | converters = mdConverters.slice(0); 258 | if (options.gfm) { 259 | converters = gfmConverters.concat(converters); 260 | } 261 | 262 | if (options.converters) { 263 | converters = options.converters.concat(converters); 264 | } 265 | 266 | // Process through nodes in reverse (so deepest child elements are first). 267 | for (var i = nodes.length - 1; i >= 0; i--) { 268 | process(nodes[i]); 269 | } 270 | output = getContent(clone); 271 | 272 | return output.replace(/^[\t\r\n]+|[\t\r\n\s]+$/g, '') 273 | .replace(/\n\s+\n/g, '\n\n') 274 | .replace(/\n{3,}/g, '\n\n'); 275 | }; 276 | 277 | toMarkdown.isBlock = isBlock; 278 | toMarkdown.isVoid = isVoid; 279 | toMarkdown.trim = trim; 280 | toMarkdown.outer = outer; 281 | 282 | module.exports = toMarkdown; 283 | -------------------------------------------------------------------------------- /test/to-markdown-test.js: -------------------------------------------------------------------------------- 1 | /* global QUnit, test, equal, throws, asyncTest, start */ 2 | 3 | 'use strict'; 4 | 5 | if (typeof module !== 'undefined' && module.exports) { 6 | var toMarkdown = require('../index'); 7 | } 8 | 9 | // Test cases are in the format: [html, expectedMarkdown, message]; 10 | function runTestCases(testCases) { 11 | for (var i = 0; i < testCases.length; i++) { 12 | var testCase = testCases[i]; 13 | equal(toMarkdown(testCase[0]), testCase[1], testCase[2]); 14 | } 15 | } 16 | 17 | QUnit.module('Markdown'); 18 | 19 | test('paragraphs', function() { 20 | runTestCases([ 21 | ['

Lorem ipsum

', 'Lorem ipsum', 'p'], 22 | ['

Lorem

ipsum

', 'Lorem\n\nipsum', 'Multiple ps'] 23 | ]); 24 | }); 25 | 26 | test('emphasis', function() { 27 | runTestCases([ 28 | ['Hello world', '**Hello world**', 'b'], 29 | ['Hello world', '**Hello world**', 'strong'], 30 | ['Hello world', '_Hello world_', 'i'], 31 | ['Hello world', '_Hello world_', 'em'], 32 | ['Hello world', '_Hello_ _world_', 'Multiple ems'], 33 | ['Hello

world
', '_Hello_\n\n_world_', 'em with two newlines'], 34 | ['Hello

world', '**Hello**\n\n**world**', 'strong with two newlines'], 35 | ]); 36 | }); 37 | 38 | test('code', function() { 39 | runTestCases([ 40 | ['print()', '`print()`'] 41 | ]); 42 | }); 43 | 44 | test('headings', function() { 45 | runTestCases([ 46 | ['

Hello world

', '# Hello world', 'h1'], 47 | ['

Hello world

', '### Hello world', 'h3'], 48 | ['
Hello world
', '###### Hello world', 'h6'], 49 | ['

Hello world

', '#### _Hello_ world', 'h4 with child'], 50 | ['Hello world', 'Hello world', 'invalid heading'] 51 | ]); 52 | }); 53 | 54 | test('horizontal rules', function() { 55 | runTestCases([ 56 | ['
', '* * *', 'hr'], 57 | ['
', '* * *', 'open/closed hr'] 58 | ]); 59 | }); 60 | 61 | test('line breaks', function() { 62 | runTestCases([ 63 | ['Hello
world', 'Hello \nworld'] 64 | ]); 65 | }); 66 | 67 | test('images', function() { 68 | runTestCases([ 69 | ['', '![](http://example.com/logo.png)', 'img with no alt'], 70 | ['', '![](logo.png)', 'img with relative src'], 71 | ['Example logo', '![Example logo](logo.png)', 'img with alt'], 72 | ['', '', 'img no src'] 73 | ]); 74 | }); 75 | 76 | test('anchors', function() { 77 | runTestCases([ 78 | ['About us', '[About us](http://example.com/about)', 'a'], 79 | ['About us', '[About us](http://example.com/about "About this company")', 'a with title'], 80 | ['About us', 'About us', 'a with no src'], 81 | ['About us', '[About us](http://example.com/about)', 'with a span'] 82 | ]); 83 | }); 84 | 85 | test('pre/code blocks', function() { 86 | runTestCases([ 87 | [ 88 | ['
def hello_world',
 89 |       '  # 42 < 9001',
 90 |       '  "Hello world!"',
 91 |       'end
'].join('\n'), 92 | 93 | [' def hello_world', 94 | ' # 42 < 9001', 95 | ' "Hello world!"', 96 | ' end'].join('\n') 97 | ], 98 | [ 99 | ['
def foo',
100 |       '  # 42 < 9001',
101 |       '  \'Hello world!\'',
102 |       'end
', 103 | '

next:

', 104 | '
def bar',
105 |       '  # 42 < 9001',
106 |       '  \'Hello world!\'',
107 |       'end
'].join('\n'), 108 | 109 | [' def foo', 110 | ' # 42 < 9001', 111 | ' \'Hello world!\'', 112 | ' end', 113 | '', 114 | 'next:', 115 | '', 116 | ' def bar', 117 | ' # 42 < 9001', 118 | ' \'Hello world!\'', 119 | ' end'].join('\n'), 120 | 121 | 'Multiple pre/code blocks' 122 | ], 123 | ['
preformatted
', '
preformatted
', 'Plain pre'] 124 | ]); 125 | }); 126 | 127 | test('lists', function() { 128 | runTestCases([ 129 | ['1986. What a great season.', '1986\\. What a great season.', 'ol triggers are escaped'], 130 | ['
    \n\t
  1. Hello world
  2. \n\t
  3. Foo bar
  4. \n
', '1. Hello world\n2. Foo bar', 'ol'], 131 | ['
    \n\t
  • Hello world
  • \n\t
  • Foo bar
  • \n
', '* Hello world\n* Foo bar', 'ul'], 132 | [ 133 | ['
    ', 134 | '
  • Hello world
  • ', 135 | '
  • Lorem ipsum
  • ', 136 | '
', 137 | '
    ', 138 | '
  • Hello world
  • ', 139 | '
  • Lorem ipsum
  • ', 140 | '
'].join('\n'), 141 | 142 | ['* Hello world', 143 | '* Lorem ipsum', 144 | '', 145 | '* Hello world', 146 | '* Lorem ipsum'].join('\n'), 147 | 148 | 'Multiple uls' 149 | ], 150 | [ 151 | '
  • Hello world

  • Lorem ipsum
', 152 | '* Hello world\n\n* Lorem ipsum', 153 | 'ul with p' 154 | ], 155 | [ 156 | ['
    ', 157 | '
  1. ', 158 | '

    This is a list item with two paragraphs.

    ', 159 | '

    Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.

    ', 160 | '
  2. ', 161 | '
  3. ', 162 | '

    Suspendisse id sem consectetuer libero luctus adipiscing.

    ', 163 | '
  4. ', 164 | '
'].join('\n'), 165 | 166 | ['1. This is a list item with two paragraphs.', 167 | '', 168 | ' Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.', 169 | '', 170 | '2. Suspendisse id sem consectetuer libero luctus adipiscing.'].join('\n'), 171 | 172 | 'ol with multiple ps' 173 | ], 174 | [ 175 | ['
    ', 176 | '
  • This is a list item at root level
  • ', 177 | '
  • This is another item at root level
  • ', 178 | '
  • ', 179 | '
      ', 180 | '
    • This is a nested list item
    • ', 181 | '
    • This is another nested list item
    • ', 182 | '
    • ', 183 | '
        ', 184 | '
      • This is a deeply nested list item
      • ', 185 | '
      • This is another deeply nested list item
      • ', 186 | '
      • This is a third deeply nested list item
      • ', 187 | '
      ', 188 | '
    • ', 189 | '
    ', 190 | '
  • ', 191 | '
  • This is a third item at root level
  • ', 192 | '
'].join('\n'), 193 | 194 | ['* This is a list item at root level', 195 | '* This is another item at root level', 196 | '* * This is a nested list item', 197 | ' * This is another nested list item', 198 | ' * * This is a deeply nested list item', 199 | ' * This is another deeply nested list item', 200 | ' * This is a third deeply nested list item', 201 | '* This is a third item at root level'].join('\n'), 202 | 203 | 'Nested uls' 204 | ], 205 | [ 206 | ['
    ', 207 | '
  • This is a list item at root level
  • ', 208 | '
  • This is another item at root level
  • ', 209 | '
  • ', 210 | '
      ', 211 | '
    1. This is a nested list item
    2. ', 212 | '
    3. This is another nested list item
    4. ', 213 | '
    5. ', 214 | '
        ', 215 | '
      • This is a deeply nested list item
      • ', 216 | '
      • This is another deeply nested list item
      • ', 217 | '
      • This is a third deeply nested list item
      • ', 218 | '
      ', 219 | '
    6. ', 220 | '
    ', 221 | '
  • ', 222 | '
  • This is a third item at root level
  • ', 223 | '
'].join('\n'), 224 | 225 | ['* This is a list item at root level', 226 | '* This is another item at root level', 227 | '* 1. This is a nested list item', 228 | ' 2. This is another nested list item', 229 | ' 3. * This is a deeply nested list item', 230 | ' * This is another deeply nested list item', 231 | ' * This is a third deeply nested list item', 232 | '* This is a third item at root level'].join('\n'), 233 | 234 | 'Nested ols' 235 | ], 236 | [ 237 | ['
    ', 238 | '
  • ', 239 | '

    A list item with a blockquote:

    ', 240 | '
    ', 241 | '

    This is a blockquote inside a list item.

    ', 242 | '
    ', 243 | '
  • ', 244 | '
'].join('\n'), 245 | 246 | ['* A list item with a blockquote:', 247 | '', 248 | ' > This is a blockquote inside a list item.'].join('\n'), 249 | 250 | 'ul with blockquote' 251 | ] 252 | ]); 253 | }); 254 | 255 | test('blockquotes', function() { 256 | runTestCases([ 257 | [ 258 | ['
', 259 | '

This is a blockquote with two paragraphs.

', 260 | '', 261 | '

Donec sit amet nisl.

', 262 | '
'].join('\n'), 263 | 264 | ['> This is a blockquote with two paragraphs.', 265 | '> ', 266 | '> Donec sit amet nisl.'].join('\n'), 267 | 268 | 'blockquote with two ps' 269 | ], 270 | [ 271 | ['
', 272 | '

This is the first level of quoting.

', 273 | '', 274 | '
', 275 | '

This is nested blockquote.

', 276 | '
', 277 | '', 278 | '

Back to the first level.

', 279 | '
'].join('\n'), 280 | 281 | ['> This is the first level of quoting.', 282 | '> ', 283 | '> > This is nested blockquote.', 284 | '> ', 285 | '> Back to the first level.'].join('\n'), 286 | 287 | 'Nested blockquotes' 288 | ], 289 | [ 290 | ['
', 291 | '

This is a header.

', 292 | '
    ', 293 | '
  1. This is the first list item.
  2. ', 294 | '
  3. This is the second list item.
  4. ', 295 | '
', 296 | '

Here\'s some example code:

', 297 | '
return 1 < 2 ? shell_exec(\'echo $input | $markdown_script\') : 0;
', 298 | '
'].join('\n'), 299 | 300 | ['> ## This is a header.', 301 | '> ', 302 | '> 1. This is the first list item.', 303 | '> 2. This is the second list item.', 304 | '> ', 305 | '> Here\'s some example code:', 306 | '> ', 307 | '> return 1 < 2 ? shell_exec(\'echo $input | $markdown_script\') : 0;'].join('\n'), 308 | 309 | 'html in blockquote' 310 | ] 311 | ]); 312 | }); 313 | 314 | test('block-level', function () { 315 | runTestCases([ 316 | ['
Hello
world
', '
Hello
\n\n
world
', 'divs separated by \\n\\n'], 317 | ['
hello
', '
_hello_
'] 318 | ]); 319 | }); 320 | 321 | test('comments', function () { 322 | equal(toMarkdown(''), '', 'comments removed'); 323 | }); 324 | 325 | test('leading/trailing whitespace', function() { 326 | runTestCases([ 327 | [ 328 | '

I need more spaces!

', 329 | 'I [need](http://example.com) [more](http://www.example.com) spaces!', 330 | 'Whitespace between inline elements' 331 | ], 332 | ['

\n Header text', '# Header text', 'Leading whitespace in h1'], 333 | [ 334 | ['
    ', 335 | '
  1. Chapter One', 336 | '
      ', 337 | '
    1. Section One
    2. ', 338 | '
    3. Section Two
    4. ', 339 | '
    5. Section Three
    6. ', 340 | '
    ', 341 | '
  2. ', 342 | '
  3. Chapter Two
  4. ', 343 | '
  5. Chapter Three
  6. ', 344 | '
'].join('\n'), 345 | 346 | ['1. Chapter One', 347 | ' 1. Section One', 348 | ' 2. Section Two', 349 | ' 3. Section Three', 350 | '2. Chapter Two', 351 | '3. Chapter Three'].join('\n'), 352 | 353 | 'Trailing whitespace in li' 354 | ], 355 | [ 356 | ['
    ', 357 | '
  • ', // Multilined 358 | ' Foo ', 359 | '
  • ', 360 | '
  • ', // Bizarre formatting 361 | ' Bar
  • ', 362 | '
  • Baz
  • ', 363 | '
', 364 | '
    ', 365 | '
  1. Hello', 366 | ' world', 367 | '
  2. ', 368 | '
'].join('\n'), 369 | 370 | ['* Foo', 371 | '* **Bar**', 372 | '* Baz', 373 | '', 374 | '1. Hello world'].join('\n') 375 | ], 376 | [ 377 | 'Hello world. Foo bar ', 378 | 'Hello world. _Foo_ **bar**', 379 | 'Whitespace in inline elements' 380 | ], 381 | [ 382 | '

Hello world.

', 383 | '# ![](image.png) Hello world.', 384 | 'Whitespace and void elements' 385 | ] 386 | ]); 387 | }); 388 | 389 | test('blank', function () { 390 | runTestCases([ 391 | ['
', '', 'Blank div'], 392 | ['', '', 'Blank em'], 393 | ['
', '', 'Blank strong with br'], 394 | ['', '[](#foo)', 'Blank a'], 395 | ]); 396 | }); 397 | 398 | test('custom converters', function() { 399 | var html, converter, md = '*Hello world*'; 400 | var replacement = function (innerHTML) { 401 | return '*' + innerHTML + '*'; 402 | }; 403 | 404 | html = 'Hello world'; 405 | converter = { 406 | filter: 'span', 407 | replacement: replacement 408 | }; 409 | equal(toMarkdown(html, {converters: [converter]}), md, 'Custom filter string'); 410 | 411 | html = 'Hello world'; 412 | converter = { 413 | filter: ['span'], 414 | replacement: replacement 415 | }; 416 | equal(toMarkdown(html, {converters: [converter]}), md, 'Custom filter array'); 417 | 418 | html = 'Hello world'; 419 | converter = { 420 | filter: function (node) { 421 | return node.tagName === 'SPAN' && /italic/i.test(node.style.fontStyle); 422 | }, 423 | replacement: replacement 424 | }; 425 | equal(toMarkdown(html, {converters: [converter]}), md, 'Custom filter function'); 426 | }); 427 | 428 | test('invalid input', function () { 429 | throws(function () { toMarkdown(null); }, /null is not a string/, 'null input'); 430 | throws(function () { toMarkdown(void(0)); }, /undefined is not a string/, 'undefined input'); 431 | 432 | throws(function () { toMarkdown(null); }, function (e) { 433 | return e.name === 'TypeError'; 434 | }, 'error type'); 435 | }); 436 | 437 | asyncTest('img[onerror]', 1, function () { 438 | start(); 439 | equal(toMarkdown('>\'>">'), '>\'>">![](x)', 'We expect img[onerror] functions not to run'); 440 | }); 441 | -------------------------------------------------------------------------------- /dist/to-markdown.js: -------------------------------------------------------------------------------- 1 | (function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.toMarkdown = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o -1) { 84 | newDoc.documentElement.innerHTML = string; 85 | } 86 | else { 87 | newDoc.body.innerHTML = string; 88 | } 89 | return newDoc; 90 | }; 91 | return Parser; 92 | } 93 | 94 | var HtmlParser = canParseHtml() ? _window.DOMParser : createHtmlParser(); 95 | 96 | function htmlToDom(string) { 97 | var tree = new HtmlParser().parseFromString(string, 'text/html'); 98 | collapse(tree, isBlock); 99 | return tree; 100 | } 101 | 102 | /* 103 | * Flattens DOM tree into single array 104 | */ 105 | 106 | function bfsOrder(node) { 107 | var inqueue = [node], 108 | outqueue = [], 109 | elem, children, i; 110 | 111 | while (inqueue.length > 0) { 112 | elem = inqueue.shift(); 113 | outqueue.push(elem); 114 | children = elem.childNodes; 115 | for (i = 0 ; i < children.length; i++) { 116 | if (children[i].nodeType === 1) { inqueue.push(children[i]); } 117 | } 118 | } 119 | outqueue.shift(); 120 | return outqueue; 121 | } 122 | 123 | /* 124 | * Contructs a Markdown string of replacement text for a given node 125 | */ 126 | 127 | function getContent(node) { 128 | var text = ''; 129 | for (var i = 0; i < node.childNodes.length; i++) { 130 | if (node.childNodes[i].nodeType === 1) { 131 | text += node.childNodes[i]._replacement; 132 | } 133 | else if (node.childNodes[i].nodeType === 3) { 134 | text += node.childNodes[i].data; 135 | } 136 | else { continue; } 137 | } 138 | return text; 139 | } 140 | 141 | /* 142 | * Returns the HTML string of an element with its contents converted 143 | */ 144 | 145 | function outer(node, content) { 146 | return node.cloneNode(false).outerHTML.replace('><', '>'+ content +'<'); 147 | } 148 | 149 | function canConvert(node, filter) { 150 | if (typeof filter === 'string') { 151 | return filter === node.nodeName.toLowerCase(); 152 | } 153 | if (Array.isArray(filter)) { 154 | return filter.indexOf(node.nodeName.toLowerCase()) !== -1; 155 | } 156 | else if (typeof filter === 'function') { 157 | return filter.call(toMarkdown, node); 158 | } 159 | else { 160 | throw new TypeError('`filter` needs to be a string, array, or function'); 161 | } 162 | } 163 | 164 | function isFlankedByWhitespace(side, node) { 165 | var sibling, regExp, isFlanked; 166 | 167 | if (side === 'left') { 168 | sibling = node.previousSibling; 169 | regExp = / $/; 170 | } 171 | else { 172 | sibling = node.nextSibling; 173 | regExp = /^ /; 174 | } 175 | 176 | if (sibling) { 177 | if (sibling.nodeType === 3) { 178 | isFlanked = regExp.test(sibling.nodeValue); 179 | } 180 | else if(sibling.nodeType === 1 && !isBlock(sibling)) { 181 | isFlanked = regExp.test(sibling.textContent); 182 | } 183 | } 184 | return isFlanked; 185 | } 186 | 187 | function flankingWhitespace(node) { 188 | var leading = '', trailing = ''; 189 | 190 | if (!isBlock(node)) { 191 | var hasLeading = /^[ \r\n\t]/.test(node.innerHTML), 192 | hasTrailing = /[ \r\n\t]$/.test(node.innerHTML); 193 | 194 | if (hasLeading && !isFlankedByWhitespace('left', node)) { 195 | leading = ' '; 196 | } 197 | if (hasTrailing && !isFlankedByWhitespace('right', node)) { 198 | trailing = ' '; 199 | } 200 | } 201 | 202 | return { leading: leading, trailing: trailing }; 203 | } 204 | 205 | /* 206 | * Finds a Markdown converter, gets the replacement, and sets it on 207 | * `_replacement` 208 | */ 209 | 210 | function process(node) { 211 | var replacement, content = getContent(node); 212 | 213 | // Remove blank nodes 214 | if (!isVoid(node) && !/A/.test(node.nodeName) && /^\s*$/i.test(content)) { 215 | node._replacement = ''; 216 | return; 217 | } 218 | 219 | for (var i = 0; i < converters.length; i++) { 220 | var converter = converters[i]; 221 | 222 | if (canConvert(node, converter.filter)) { 223 | if (typeof converter.replacement !== 'function') { 224 | throw new TypeError( 225 | '`replacement` needs to be a function that returns a string' 226 | ); 227 | } 228 | 229 | var whitespace = flankingWhitespace(node); 230 | 231 | if (whitespace.leading || whitespace.trailing) { 232 | content = trim(content); 233 | } 234 | replacement = whitespace.leading + 235 | converter.replacement.call(toMarkdown, content, node) + 236 | whitespace.trailing; 237 | break; 238 | } 239 | } 240 | 241 | node._replacement = replacement; 242 | } 243 | 244 | toMarkdown = function (input, options) { 245 | options = options || {}; 246 | 247 | if (typeof input !== 'string') { 248 | throw new TypeError(input + ' is not a string'); 249 | } 250 | 251 | // Escape potential ol triggers 252 | input = input.replace(/(\d+)\. /g, '$1\\. '); 253 | 254 | var clone = htmlToDom(input).body, 255 | nodes = bfsOrder(clone), 256 | output; 257 | 258 | converters = mdConverters.slice(0); 259 | if (options.gfm) { 260 | converters = gfmConverters.concat(converters); 261 | } 262 | 263 | if (options.converters) { 264 | converters = options.converters.concat(converters); 265 | } 266 | 267 | // Process through nodes in reverse (so deepest child elements are first). 268 | for (var i = nodes.length - 1; i >= 0; i--) { 269 | process(nodes[i]); 270 | } 271 | output = getContent(clone); 272 | 273 | return output.replace(/^[\t\r\n]+|[\t\r\n\s]+$/g, '') 274 | .replace(/\n\s+\n/g, '\n\n') 275 | .replace(/\n{3,}/g, '\n\n'); 276 | }; 277 | 278 | toMarkdown.isBlock = isBlock; 279 | toMarkdown.isVoid = isVoid; 280 | toMarkdown.trim = trim; 281 | toMarkdown.outer = outer; 282 | 283 | module.exports = toMarkdown; 284 | 285 | },{"./lib/gfm-converters":2,"./lib/md-converters":3,"collapse-whitespace":4,"jsdom":7}],2:[function(require,module,exports){ 286 | 'use strict'; 287 | 288 | function cell(content, node) { 289 | var index = Array.prototype.indexOf.call(node.parentNode.childNodes, node); 290 | var prefix = ' '; 291 | if (index === 0) { prefix = '| '; } 292 | return prefix + content + ' |'; 293 | } 294 | 295 | var highlightRegEx = /highlight highlight-(\S+)/; 296 | 297 | module.exports = [ 298 | { 299 | filter: 'br', 300 | replacement: function () { 301 | return '\n'; 302 | } 303 | }, 304 | { 305 | filter: ['del', 's', 'strike'], 306 | replacement: function (content) { 307 | return '~~' + content + '~~'; 308 | } 309 | }, 310 | 311 | { 312 | filter: function (node) { 313 | return node.type === 'checkbox' && node.parentNode.nodeName === 'LI'; 314 | }, 315 | replacement: function (content, node) { 316 | return (node.checked ? '[x]' : '[ ]') + ' '; 317 | } 318 | }, 319 | 320 | { 321 | filter: ['th', 'td'], 322 | replacement: function (content, node) { 323 | return cell(content, node); 324 | } 325 | }, 326 | 327 | { 328 | filter: 'tr', 329 | replacement: function (content, node) { 330 | var borderCells = ''; 331 | var alignMap = { left: ':--', right: '--:', center: ':-:' }; 332 | 333 | if (node.parentNode.nodeName === 'THEAD') { 334 | for (var i = 0; i < node.childNodes.length; i++) { 335 | var align = node.childNodes[i].attributes.align; 336 | var border = '---'; 337 | 338 | if (align) { border = alignMap[align.value] || border; } 339 | 340 | borderCells += cell(border, node.childNodes[i]); 341 | } 342 | } 343 | return '\n' + content + (borderCells ? '\n' + borderCells : ''); 344 | } 345 | }, 346 | 347 | { 348 | filter: 'table', 349 | replacement: function (content) { 350 | return '\n\n' + content + '\n\n'; 351 | } 352 | }, 353 | 354 | { 355 | filter: ['thead', 'tbody', 'tfoot'], 356 | replacement: function (content) { 357 | return content; 358 | } 359 | }, 360 | 361 | // Fenced code blocks 362 | { 363 | filter: function (node) { 364 | return node.nodeName === 'PRE' && 365 | node.firstChild && 366 | node.firstChild.nodeName === 'CODE'; 367 | }, 368 | replacement: function(content, node) { 369 | return '\n\n```\n' + node.firstChild.textContent + '\n```\n\n'; 370 | } 371 | }, 372 | 373 | // Syntax-highlighted code blocks 374 | { 375 | filter: function (node) { 376 | return node.nodeName === 'PRE' && 377 | node.parentNode.nodeName === 'DIV' && 378 | highlightRegEx.test(node.parentNode.className); 379 | }, 380 | replacement: function (content, node) { 381 | var language = node.parentNode.className.match(highlightRegEx)[1]; 382 | return '\n\n```' + language + '\n' + node.textContent + '\n```\n\n'; 383 | } 384 | }, 385 | 386 | { 387 | filter: function (node) { 388 | return node.nodeName === 'DIV' && 389 | highlightRegEx.test(node.className); 390 | }, 391 | replacement: function (content) { 392 | return '\n\n' + content + '\n\n'; 393 | } 394 | } 395 | ]; 396 | 397 | },{}],3:[function(require,module,exports){ 398 | 'use strict'; 399 | 400 | module.exports = [ 401 | { 402 | filter: 'p', 403 | replacement: function (content) { 404 | return '\n\n' + content + '\n\n'; 405 | } 406 | }, 407 | 408 | { 409 | filter: 'br', 410 | replacement: function () { 411 | return ' \n'; 412 | } 413 | }, 414 | 415 | { 416 | filter: ['h1', 'h2', 'h3', 'h4','h5', 'h6'], 417 | replacement: function(content, node) { 418 | var hLevel = node.nodeName.charAt(1); 419 | var hPrefix = ''; 420 | for(var i = 0; i < hLevel; i++) { 421 | hPrefix += '#'; 422 | } 423 | return '\n\n' + hPrefix + ' ' + content + '\n\n'; 424 | } 425 | }, 426 | 427 | { 428 | filter: 'hr', 429 | replacement: function () { 430 | return '\n\n* * *\n\n'; 431 | } 432 | }, 433 | 434 | { 435 | filter: ['em', 'i'], 436 | replacement: function (content) { 437 | return '_' + content + '_'; 438 | } 439 | }, 440 | 441 | { 442 | filter: ['strong', 'b'], 443 | replacement: function (content) { 444 | return '**' + content + '**'; 445 | } 446 | }, 447 | 448 | // Inline code 449 | { 450 | filter: function (node) { 451 | var hasSiblings = node.previousSibling || node.nextSibling; 452 | var isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings; 453 | 454 | return node.nodeName === 'CODE' && !isCodeBlock; 455 | }, 456 | replacement: function(content) { 457 | return '`' + content + '`'; 458 | } 459 | }, 460 | 461 | { 462 | filter: function (node) { 463 | return node.nodeName === 'A' && node.getAttribute('href'); 464 | }, 465 | replacement: function(content, node) { 466 | var titlePart = node.title ? ' "'+ node.title +'"' : ''; 467 | return '[' + content + '](' + node.getAttribute('href') + titlePart + ')'; 468 | } 469 | }, 470 | 471 | { 472 | filter: 'img', 473 | replacement: function(content, node) { 474 | var alt = node.alt || ''; 475 | var src = node.getAttribute('src') || ''; 476 | var title = node.title || ''; 477 | var titlePart = title ? ' "'+ title +'"' : ''; 478 | return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : ''; 479 | } 480 | }, 481 | 482 | // Code blocks 483 | { 484 | filter: function (node) { 485 | return node.nodeName === 'PRE' && node.firstChild.nodeName === 'CODE'; 486 | }, 487 | replacement: function(content, node) { 488 | return '\n\n ' + node.firstChild.textContent.replace(/\n/g, '\n ') + '\n\n'; 489 | } 490 | }, 491 | 492 | { 493 | filter: 'blockquote', 494 | replacement: function (content) { 495 | content = this.trim(content); 496 | content = content.replace(/\n{3,}/g, '\n\n'); 497 | content = content.replace(/^/gm, '> '); 498 | return '\n\n' + content + '\n\n'; 499 | } 500 | }, 501 | 502 | { 503 | filter: 'li', 504 | replacement: function (content, node) { 505 | content = content.replace(/^\s+/, '').replace(/\n/gm, '\n '); 506 | var prefix = '* '; 507 | var parent = node.parentNode; 508 | var index = Array.prototype.indexOf.call(parent.children, node) + 1; 509 | 510 | prefix = /ol/i.test(parent.nodeName) ? index + '. ' : '* '; 511 | return prefix + content; 512 | } 513 | }, 514 | 515 | { 516 | filter: ['ul', 'ol'], 517 | replacement: function (content, node) { 518 | var strings = []; 519 | for (var i = 0; i < node.childNodes.length; i++) { 520 | strings.push(node.childNodes[i]._replacement); 521 | } 522 | 523 | if (/li/i.test(node.parentNode.nodeName)) { 524 | return '\n' + strings.join('\n'); 525 | } 526 | return '\n\n' + strings.join('\n') + '\n\n'; 527 | } 528 | }, 529 | 530 | { 531 | filter: function (node) { 532 | return this.isBlock(node); 533 | }, 534 | replacement: function (content, node) { 535 | return '\n\n' + this.outer(node, content) + '\n\n'; 536 | } 537 | }, 538 | 539 | // Anything else! 540 | { 541 | filter: function () { 542 | return true; 543 | }, 544 | replacement: function (content, node) { 545 | return this.outer(node, content); 546 | } 547 | } 548 | ]; 549 | },{}],4:[function(require,module,exports){ 550 | 'use strict'; 551 | 552 | var voidElements = require('void-elements'); 553 | Object.keys(voidElements).forEach(function (name) { 554 | voidElements[name.toUpperCase()] = 1; 555 | }); 556 | 557 | var blockElements = {}; 558 | require('block-elements').forEach(function (name) { 559 | blockElements[name.toUpperCase()] = 1; 560 | }); 561 | 562 | /** 563 | * isBlockElem(node) determines if the given node is a block element. 564 | * 565 | * @param {Node} node 566 | * @return {Boolean} 567 | */ 568 | function isBlockElem(node) { 569 | return !!(node && blockElements[node.nodeName]); 570 | } 571 | 572 | /** 573 | * isVoid(node) determines if the given node is a void element. 574 | * 575 | * @param {Node} node 576 | * @return {Boolean} 577 | */ 578 | function isVoid(node) { 579 | return !!(node && voidElements[node.nodeName]); 580 | } 581 | 582 | /** 583 | * whitespace(elem [, isBlock]) removes extraneous whitespace from an 584 | * the given element. The function isBlock may optionally be passed in 585 | * to determine whether or not an element is a block element; if none 586 | * is provided, defaults to using the list of block elements provided 587 | * by the `block-elements` module. 588 | * 589 | * @param {Node} elem 590 | * @param {Function} blockTest 591 | */ 592 | function collapseWhitespace(elem, isBlock) { 593 | if (!elem.firstChild || elem.nodeName === 'PRE') return; 594 | 595 | if (typeof isBlock !== 'function') { 596 | isBlock = isBlockElem; 597 | } 598 | 599 | var prevText = null; 600 | var prevVoid = false; 601 | 602 | var prev = null; 603 | var node = next(prev, elem); 604 | 605 | while (node !== elem) { 606 | if (node.nodeType === 3) { 607 | // Node.TEXT_NODE 608 | var text = node.data.replace(/[ \r\n\t]+/g, ' '); 609 | 610 | if ((!prevText || / $/.test(prevText.data)) && !prevVoid && text[0] === ' ') { 611 | text = text.substr(1); 612 | } 613 | 614 | // `text` might be empty at this point. 615 | if (!text) { 616 | node = remove(node); 617 | continue; 618 | } 619 | 620 | node.data = text; 621 | prevText = node; 622 | } else if (node.nodeType === 1) { 623 | // Node.ELEMENT_NODE 624 | if (isBlock(node) || node.nodeName === 'BR') { 625 | if (prevText) { 626 | prevText.data = prevText.data.replace(/ $/, ''); 627 | } 628 | 629 | prevText = null; 630 | prevVoid = false; 631 | } else if (isVoid(node)) { 632 | // Avoid trimming space around non-block, non-BR void elements. 633 | prevText = null; 634 | prevVoid = true; 635 | } 636 | } else { 637 | node = remove(node); 638 | continue; 639 | } 640 | 641 | var nextNode = next(prev, node); 642 | prev = node; 643 | node = nextNode; 644 | } 645 | 646 | if (prevText) { 647 | prevText.data = prevText.data.replace(/ $/, ''); 648 | if (!prevText.data) { 649 | remove(prevText); 650 | } 651 | } 652 | } 653 | 654 | /** 655 | * remove(node) removes the given node from the DOM and returns the 656 | * next node in the sequence. 657 | * 658 | * @param {Node} node 659 | * @return {Node} node 660 | */ 661 | function remove(node) { 662 | var next = node.nextSibling || node.parentNode; 663 | 664 | node.parentNode.removeChild(node); 665 | 666 | return next; 667 | } 668 | 669 | /** 670 | * next(prev, current) returns the next node in the sequence, given the 671 | * current and previous nodes. 672 | * 673 | * @param {Node} prev 674 | * @param {Node} current 675 | * @return {Node} 676 | */ 677 | function next(prev, current) { 678 | if (prev && prev.parentNode === current || current.nodeName === 'PRE') { 679 | return current.nextSibling || current.parentNode; 680 | } 681 | 682 | return current.firstChild || current.nextSibling || current.parentNode; 683 | } 684 | 685 | module.exports = collapseWhitespace; 686 | 687 | },{"block-elements":5,"void-elements":6}],5:[function(require,module,exports){ 688 | /** 689 | * This file automatically generated from `build.js`. 690 | * Do not manually edit. 691 | */ 692 | 693 | module.exports = [ 694 | "address", 695 | "article", 696 | "aside", 697 | "audio", 698 | "blockquote", 699 | "canvas", 700 | "dd", 701 | "div", 702 | "dl", 703 | "fieldset", 704 | "figcaption", 705 | "figure", 706 | "footer", 707 | "form", 708 | "h1", 709 | "h2", 710 | "h3", 711 | "h4", 712 | "h5", 713 | "h6", 714 | "header", 715 | "hgroup", 716 | "hr", 717 | "main", 718 | "nav", 719 | "noscript", 720 | "ol", 721 | "output", 722 | "p", 723 | "pre", 724 | "section", 725 | "table", 726 | "tfoot", 727 | "ul", 728 | "video" 729 | ]; 730 | 731 | },{}],6:[function(require,module,exports){ 732 | /** 733 | * This file automatically generated from `pre-publish.js`. 734 | * Do not manually edit. 735 | */ 736 | 737 | module.exports = { 738 | "area": true, 739 | "base": true, 740 | "br": true, 741 | "col": true, 742 | "embed": true, 743 | "hr": true, 744 | "img": true, 745 | "input": true, 746 | "keygen": true, 747 | "link": true, 748 | "menuitem": true, 749 | "meta": true, 750 | "param": true, 751 | "source": true, 752 | "track": true, 753 | "wbr": true 754 | }; 755 | 756 | },{}],7:[function(require,module,exports){ 757 | 758 | },{}]},{},[1])(1) 759 | }); --------------------------------------------------------------------------------