├── publish.sh ├── .travis.yml ├── .gitignore ├── .tm_properties ├── config ├── rollup.config.umd.js ├── rollup.config.cjs.js ├── rollup.config.es.js ├── rollup.config.iife.js ├── rollup.config.browser.cjs.js ├── rollup.config.browser.es.js ├── rollup.config.browser.umd.js └── rollup.config.js ├── src ├── root-node.js ├── utilities.js ├── node.js ├── html-parser.js ├── rules.js ├── collapse-whitespace.js ├── turndown.js └── commonmark-rules.js ├── LICENSE ├── package.json ├── test ├── turndown-test.js └── index.html ├── index.html └── README.md /publish.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | npm version patch 3 | npm publish -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - "node" 4 | - "6" 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | lib 3 | node_modules 4 | npm-debug.log 5 | test/*browser.js 6 | -------------------------------------------------------------------------------- /.tm_properties: -------------------------------------------------------------------------------- 1 | [test/index.html] 2 | scopeAttributes = attr.keep-whitespace 3 | -------------------------------------------------------------------------------- /config/rollup.config.umd.js: -------------------------------------------------------------------------------- 1 | import config from './rollup.config' 2 | 3 | export default config({ 4 | output: { 5 | file: 'lib/turndown.umd.js', 6 | format: 'umd' 7 | } 8 | }) 9 | -------------------------------------------------------------------------------- /config/rollup.config.cjs.js: -------------------------------------------------------------------------------- 1 | import config from './rollup.config' 2 | 3 | export default config({ 4 | output: { 5 | file: 'lib/turndown.cjs.js', 6 | format: 'cjs' 7 | }, 8 | browser: false 9 | }) 10 | -------------------------------------------------------------------------------- /config/rollup.config.es.js: -------------------------------------------------------------------------------- 1 | import config from './rollup.config' 2 | 3 | export default config({ 4 | output: { 5 | file: 'lib/turndown.es.js', 6 | format: 'es' 7 | }, 8 | browser: false 9 | }) 10 | -------------------------------------------------------------------------------- /config/rollup.config.iife.js: -------------------------------------------------------------------------------- 1 | import config from './rollup.config' 2 | 3 | export default config({ 4 | output: { 5 | file: 'dist/turndown.js', 6 | format: 'iife' 7 | }, 8 | browser: true 9 | }) 10 | -------------------------------------------------------------------------------- /config/rollup.config.browser.cjs.js: -------------------------------------------------------------------------------- 1 | import config from './rollup.config' 2 | 3 | export default config({ 4 | output: { 5 | file: 'lib/turndown.browser.cjs.js', 6 | format: 'cjs' 7 | }, 8 | browser: true 9 | }) 10 | -------------------------------------------------------------------------------- /config/rollup.config.browser.es.js: -------------------------------------------------------------------------------- 1 | import config from './rollup.config' 2 | 3 | export default config({ 4 | output: { 5 | file: 'lib/turndown.browser.es.js', 6 | format: 'es' 7 | }, 8 | browser: true 9 | }) 10 | -------------------------------------------------------------------------------- /config/rollup.config.browser.umd.js: -------------------------------------------------------------------------------- 1 | import config from './rollup.config' 2 | 3 | export default config({ 4 | output: { 5 | file: 'lib/turndown.browser.umd.js', 6 | format: 'umd' 7 | }, 8 | browser: true 9 | }) 10 | -------------------------------------------------------------------------------- /config/rollup.config.js: -------------------------------------------------------------------------------- 1 | import commonjs from 'rollup-plugin-commonjs' 2 | import replace from 'rollup-plugin-replace' 3 | import resolve from 'rollup-plugin-node-resolve' 4 | 5 | export default function (config) { 6 | return { 7 | input: 'src/turndown.js', 8 | name: 'TurndownService', 9 | output: config.output, 10 | external: ['jsdom'], 11 | plugins: [ 12 | commonjs(), 13 | replace({ 'process.browser': JSON.stringify(!!config.browser) }), 14 | resolve() 15 | ] 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/root-node.js: -------------------------------------------------------------------------------- 1 | import collapseWhitespace from './collapse-whitespace' 2 | import HTMLParser from './html-parser' 3 | import { isBlock, isVoid } from './utilities' 4 | 5 | export default function RootNode (input) { 6 | var root 7 | if (typeof input === 'string') { 8 | var doc = htmlParser().parseFromString( 9 | // DOM parsers arrange elements in the and . 10 | // Wrapping in a custom element ensures elements are reliably arranged in 11 | // a single element. 12 | '' + input + '', 13 | 'text/html' 14 | ) 15 | root = doc.getElementById('turndown-root') 16 | } else { 17 | root = input.cloneNode(true) 18 | } 19 | collapseWhitespace({ 20 | element: root, 21 | isBlock: isBlock, 22 | isVoid: isVoid 23 | }) 24 | 25 | return root 26 | } 27 | 28 | var _htmlParser 29 | function htmlParser () { 30 | _htmlParser = _htmlParser || new HTMLParser() 31 | return _htmlParser 32 | } 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Dom Christie 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/utilities.js: -------------------------------------------------------------------------------- 1 | export function extend (destination) { 2 | for (var i = 1; i < arguments.length; i++) { 3 | var source = arguments[i] 4 | for (var key in source) { 5 | if (source.hasOwnProperty(key)) destination[key] = source[key] 6 | } 7 | } 8 | return destination 9 | } 10 | 11 | export function repeat (character, count) { 12 | return Array(count + 1).join(character) 13 | } 14 | 15 | export var blockElements = [ 16 | 'address', 'article', 'aside', 'audio', 'blockquote', 'body', 'canvas', 17 | 'center', 'dd', 'dir', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 18 | 'figure', 'footer', 'form', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 19 | 'header', 'hgroup', 'hr', 'html', 'isindex', 'li', 'main', 'menu', 'nav', 20 | 'noframes', 'noscript', 'ol', 'output', 'p', 'pre', 'section', 'table', 21 | 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'ul' 22 | ] 23 | 24 | export function isBlock (node) { 25 | return blockElements.indexOf(node.nodeName.toLowerCase()) !== -1 26 | } 27 | 28 | export var voidElements = [ 29 | 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 30 | 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr' 31 | ] 32 | 33 | export function isVoid (node) { 34 | return voidElements.indexOf(node.nodeName.toLowerCase()) !== -1 35 | } 36 | 37 | var voidSelector = voidElements.join() 38 | export function hasVoid (node) { 39 | return node.querySelector && node.querySelector(voidSelector) 40 | } 41 | -------------------------------------------------------------------------------- /src/node.js: -------------------------------------------------------------------------------- 1 | import { isBlock, isVoid, hasVoid } from './utilities' 2 | 3 | export default function Node (node) { 4 | node.isBlock = isBlock(node) 5 | node.isCode = node.nodeName.toLowerCase() === 'code' || node.parentNode.isCode 6 | node.isBlank = isBlank(node) 7 | node.flankingWhitespace = flankingWhitespace(node) 8 | return node 9 | } 10 | 11 | function isBlank (node) { 12 | return ( 13 | ['A', 'TH', 'TD'].indexOf(node.nodeName) === -1 && 14 | /^\s*$/i.test(node.textContent) && 15 | !isVoid(node) && 16 | !hasVoid(node) 17 | ) 18 | } 19 | 20 | function flankingWhitespace (node) { 21 | var leading = '' 22 | var trailing = '' 23 | 24 | if (!node.isBlock) { 25 | var hasLeading = /^[ \r\n\t]/.test(node.textContent) 26 | var hasTrailing = /[ \r\n\t]$/.test(node.textContent) 27 | 28 | if (hasLeading && !isFlankedByWhitespace('left', node)) { 29 | leading = ' ' 30 | } 31 | if (hasTrailing && !isFlankedByWhitespace('right', node)) { 32 | trailing = ' ' 33 | } 34 | } 35 | 36 | return { leading: leading, trailing: trailing } 37 | } 38 | 39 | function isFlankedByWhitespace (side, node) { 40 | var sibling 41 | var regExp 42 | var isFlanked 43 | 44 | if (side === 'left') { 45 | sibling = node.previousSibling 46 | regExp = / $/ 47 | } else { 48 | sibling = node.nextSibling 49 | regExp = /^ / 50 | } 51 | 52 | if (sibling) { 53 | if (sibling.nodeType === 3) { 54 | isFlanked = regExp.test(sibling.nodeValue) 55 | } else if (sibling.nodeType === 1 && !isBlock(sibling)) { 56 | isFlanked = regExp.test(sibling.textContent) 57 | } 58 | } 59 | return isFlanked 60 | } 61 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "joplin-turndown", 3 | "description": "A library that converts HTML to Markdown", 4 | "version": "4.0.21", 5 | "author": "Dom Christie", 6 | "main": "lib/turndown.cjs.js", 7 | "module": "lib/turndown.es.js", 8 | "jsnext:main": "lib/turndown.es.js", 9 | "browser": { 10 | "jsdom": false 11 | }, 12 | "dependencies": { 13 | "css": "^2.2.4", 14 | "html-entities": "^1.2.1", 15 | "jsdom": "^11.9.0" 16 | }, 17 | "devDependencies": { 18 | "browserify": "^14.5.0", 19 | "rollup": "^0.50.0", 20 | "rollup-plugin-commonjs": "^8.2.6", 21 | "rollup-plugin-node-resolve": "^3.0.0", 22 | "rollup-plugin-replace": "^2.0.0", 23 | "standard": "^10.0.3", 24 | "turndown-attendant": "0.0.2" 25 | }, 26 | "files": [ 27 | "lib", 28 | "dist" 29 | ], 30 | "keywords": [ 31 | "converter", 32 | "html", 33 | "markdown" 34 | ], 35 | "license": "MIT", 36 | "repository": { 37 | "type": "git", 38 | "url": "https://github.com/laurent22/joplin-turndown.git" 39 | }, 40 | "scripts": { 41 | "build": "npm run build-cjs && npm run build-es && npm run build-umd && npm run build-iife", 42 | "build-cjs": "rollup -c config/rollup.config.cjs.js && rollup -c config/rollup.config.browser.cjs.js", 43 | "build-es": "rollup -c config/rollup.config.es.js && rollup -c config/rollup.config.browser.es.js", 44 | "build-umd": "rollup -c config/rollup.config.umd.js && rollup -c config/rollup.config.browser.umd.js", 45 | "build-iife": "rollup -c config/rollup.config.iife.js", 46 | "build-test": "browserify test/turndown-test.js --outfile test/turndown-test.browser.js", 47 | "prepublish": "npm run build", 48 | "test": "npm run build && npm run build-test && standard ./src/**/*.js && node test/turndown-test.js" 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/html-parser.js: -------------------------------------------------------------------------------- 1 | /* 2 | * Set up window for Node.js 3 | */ 4 | 5 | var root = (typeof window !== 'undefined' ? window : {}) 6 | 7 | /* 8 | * Parsing HTML strings 9 | */ 10 | 11 | function canParseHTMLNatively () { 12 | var Parser = root.DOMParser 13 | var canParse = false 14 | 15 | // Adapted from https://gist.github.com/1129031 16 | // Firefox/Opera/IE throw errors on unsupported types 17 | try { 18 | // WebKit returns null on unsupported types 19 | if (new Parser().parseFromString('', 'text/html')) { 20 | canParse = true 21 | } 22 | } catch (e) {} 23 | 24 | return canParse 25 | } 26 | 27 | function createHTMLParser () { 28 | var Parser = function () {} 29 | 30 | if (process.browser) { 31 | if (shouldUseActiveX()) { 32 | Parser.prototype.parseFromString = function (string) { 33 | var doc = new window.ActiveXObject('htmlfile') 34 | doc.designMode = 'on' // disable on-page scripts 35 | doc.open() 36 | doc.write(string) 37 | doc.close() 38 | return doc 39 | } 40 | } else { 41 | Parser.prototype.parseFromString = function (string) { 42 | var doc = document.implementation.createHTMLDocument('') 43 | doc.open() 44 | doc.write(string) 45 | doc.close() 46 | return doc 47 | } 48 | } 49 | } else { 50 | var JSDOM = require('jsdom').JSDOM 51 | Parser.prototype.parseFromString = function (string) { 52 | return new JSDOM(string).window.document 53 | } 54 | } 55 | return Parser 56 | } 57 | 58 | function shouldUseActiveX () { 59 | var useActiveX = false 60 | try { 61 | document.implementation.createHTMLDocument('').open() 62 | } catch (e) { 63 | if (window.ActiveXObject) useActiveX = true 64 | } 65 | return useActiveX 66 | } 67 | 68 | export default canParseHTMLNatively() ? root.DOMParser : createHTMLParser() 69 | -------------------------------------------------------------------------------- /src/rules.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Manages a collection of rules used to convert HTML to Markdown 3 | */ 4 | 5 | export default function Rules (options) { 6 | this.options = options 7 | this._keep = [] 8 | this._remove = [] 9 | 10 | this.blankRule = { 11 | replacement: options.blankReplacement 12 | } 13 | 14 | this.keepReplacement = options.keepReplacement 15 | 16 | this.defaultRule = { 17 | replacement: options.defaultReplacement 18 | } 19 | 20 | this.array = [] 21 | for (var key in options.rules) this.array.push(options.rules[key]) 22 | } 23 | 24 | Rules.prototype = { 25 | add: function (key, rule) { 26 | this.array.unshift(rule) 27 | }, 28 | 29 | keep: function (filter) { 30 | this._keep.unshift({ 31 | filter: filter, 32 | replacement: this.keepReplacement 33 | }) 34 | }, 35 | 36 | remove: function (filter) { 37 | this._remove.unshift({ 38 | filter: filter, 39 | replacement: function () { 40 | return '' 41 | } 42 | }) 43 | }, 44 | 45 | forNode: function (node) { 46 | if (node.isBlank) return this.blankRule 47 | var rule 48 | 49 | if ((rule = findRule(this.array, node, this.options))) return rule 50 | if ((rule = findRule(this._keep, node, this.options))) return rule 51 | if ((rule = findRule(this._remove, node, this.options))) return rule 52 | 53 | return this.defaultRule 54 | }, 55 | 56 | forEach: function (fn) { 57 | for (var i = 0; i < this.array.length; i++) fn(this.array[i], i) 58 | } 59 | } 60 | 61 | function findRule (rules, node, options) { 62 | for (var i = 0; i < rules.length; i++) { 63 | var rule = rules[i] 64 | if (filterValue(rule, node, options)) return rule 65 | } 66 | return void 0 67 | } 68 | 69 | function filterValue (rule, node, options) { 70 | var filter = rule.filter 71 | if (typeof filter === 'string') { 72 | if (filter === node.nodeName.toLowerCase()) return true 73 | } else if (Array.isArray(filter)) { 74 | if (filter.indexOf(node.nodeName.toLowerCase()) > -1) return true 75 | } else if (typeof filter === 'function') { 76 | if (filter.call(rule, node, options)) return true 77 | } else { 78 | throw new TypeError('`filter` needs to be a string, array, or function') 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/collapse-whitespace.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The collapseWhitespace function is adapted from collapse-whitespace 3 | * by Luc Thevenard. 4 | * 5 | * The MIT License (MIT) 6 | * 7 | * Copyright (c) 2014 Luc Thevenard 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a copy 10 | * of this software and associated documentation files (the "Software"), to deal 11 | * in the Software without restriction, including without limitation the rights 12 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | * copies of the Software, and to permit persons to whom the Software is 14 | * furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included in 17 | * all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | * THE SOFTWARE. 26 | */ 27 | 28 | function containsOnlySpaces(text) { 29 | if (!text) return false; 30 | for (let i = 0; i < text.length; i++) { 31 | if (text[i] !== ' ') return false; 32 | } 33 | return true; 34 | } 35 | 36 | /** 37 | * collapseWhitespace(options) removes extraneous whitespace from an the given element. 38 | * 39 | * @param {Object} options 40 | */ 41 | function collapseWhitespace (options) { 42 | var element = options.element 43 | var isBlock = options.isBlock 44 | var isVoid = options.isVoid 45 | var isPre = options.isPre || function (node) { 46 | return node.nodeName === 'PRE' 47 | } 48 | 49 | if (!element.firstChild || isPre(element)) return 50 | 51 | var prevText = null 52 | var prevVoid = false 53 | 54 | var prev = null 55 | var node = next(prev, element, isPre) 56 | 57 | // We keep track of whether the previous was only spaces or not. This prevent the case where multiple empty blocks are 58 | // added, which results in multiple spaces. This spaces are then incorrectly interpreted as a code block by renderers. 59 | // So by keeping track of this, we make sure that only one space at most is added. 60 | var prevTextIsOnlySpaces = false; 61 | 62 | while (node !== element) { 63 | if (node.nodeType === 3 || node.nodeType === 4) { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE 64 | var text = node.data.replace(/[ \r\n\t]+/g, ' ') 65 | 66 | if ((!prevText || / $/.test(prevText.data)) && 67 | !prevVoid && text[0] === ' ') { 68 | text = text.substr(1) 69 | } 70 | 71 | var textIsOnlySpaces = containsOnlySpaces(text); 72 | 73 | // `text` might be empty at this point. 74 | if (!text || (textIsOnlySpaces && prevTextIsOnlySpaces)) { 75 | node = remove(node) 76 | continue 77 | } 78 | 79 | prevTextIsOnlySpaces = textIsOnlySpaces; 80 | node.data = text 81 | 82 | prevText = node 83 | } else if (node.nodeType === 1) { // Node.ELEMENT_NODE 84 | if (isBlock(node) || node.nodeName === 'BR') { 85 | if (prevText) { 86 | prevText.data = prevText.data.replace(/ $/, '') 87 | } 88 | 89 | prevText = null 90 | prevVoid = false 91 | } else if (isVoid(node)) { 92 | // Avoid trimming space around non-block, non-BR void elements. 93 | prevText = null 94 | prevVoid = true 95 | } 96 | } else { 97 | node = remove(node) 98 | continue 99 | } 100 | 101 | var nextNode = next(prev, node, isPre) 102 | prev = node 103 | node = nextNode 104 | } 105 | 106 | if (prevText) { 107 | prevText.data = prevText.data.replace(/ $/, '') 108 | if (!prevText.data) { 109 | remove(prevText) 110 | } 111 | } 112 | } 113 | 114 | /** 115 | * remove(node) removes the given node from the DOM and returns the 116 | * next node in the sequence. 117 | * 118 | * @param {Node} node 119 | * @return {Node} node 120 | */ 121 | function remove (node) { 122 | var next = node.nextSibling || node.parentNode 123 | 124 | node.parentNode.removeChild(node) 125 | 126 | return next 127 | } 128 | 129 | /** 130 | * next(prev, current, isPre) returns the next node in the sequence, given the 131 | * current and previous nodes. 132 | * 133 | * @param {Node} prev 134 | * @param {Node} current 135 | * @param {Function} isPre 136 | * @return {Node} 137 | */ 138 | function next (prev, current, isPre) { 139 | if ((prev && prev.parentNode === current) || isPre(current)) { 140 | return current.nextSibling || current.parentNode 141 | } 142 | 143 | return current.firstChild || current.nextSibling || current.parentNode 144 | } 145 | 146 | export default collapseWhitespace 147 | -------------------------------------------------------------------------------- /test/turndown-test.js: -------------------------------------------------------------------------------- 1 | var Attendant = require('turndown-attendant') 2 | var TurndownService = require('../lib/turndown.cjs') 3 | 4 | var attendant = new Attendant({ 5 | file: __dirname + '/index.html', 6 | TurndownService: TurndownService 7 | }) 8 | var test = attendant.test 9 | 10 | attendant.run() 11 | 12 | test('malformed documents', function (t) { 13 | t.plan(0) 14 | var turndownService = new TurndownService() 15 | turndownService.turndown('') 16 | t.end() 17 | }) 18 | 19 | test('null input', function (t) { 20 | t.plan(1) 21 | var turndownService = new TurndownService() 22 | t.throws( 23 | function () { turndownService.turndown(null) }, /null is not a string/ 24 | ) 25 | }) 26 | 27 | test('undefined input', function (t) { 28 | t.plan(1) 29 | var turndownService = new TurndownService() 30 | t.throws( 31 | function () { turndownService.turndown(void (0)) }, 32 | /undefined is not a string/ 33 | ) 34 | }) 35 | 36 | test('#addRule returns the instance', function (t) { 37 | t.plan(1) 38 | var turndownService = new TurndownService() 39 | var rule = { 40 | filter: ['del', 's', 'strike'], 41 | replacement: function (content) { 42 | return '~~' + content + '~~' 43 | } 44 | } 45 | t.equal(turndownService.addRule('strikethrough', rule), turndownService) 46 | }) 47 | 48 | test('#addRule adds the rule', function (t) { 49 | t.plan(2) 50 | var turndownService = new TurndownService() 51 | var rule = { 52 | filter: ['del', 's', 'strike'], 53 | replacement: function (content) { 54 | return '~~' + content + '~~' 55 | } 56 | } 57 | // Assert rules#add is called 58 | turndownService.rules.add = function (key, r) { 59 | t.equal(key, 'strikethrough') 60 | t.equal(rule, r) 61 | } 62 | turndownService.addRule('strikethrough', rule) 63 | }) 64 | 65 | test('#use returns the instance for chaining', function (t) { 66 | t.plan(1) 67 | var turndownService = new TurndownService() 68 | t.equal(turndownService.use(function plugin () {}), turndownService) 69 | }) 70 | 71 | test('#use with a single plugin calls the fn with instance', function (t) { 72 | t.plan(1) 73 | var turndownService = new TurndownService() 74 | function plugin (service) { 75 | t.equal(service, turndownService) 76 | } 77 | turndownService.use(plugin) 78 | }) 79 | 80 | test('#use with multiple plugins calls each fn with instance', function (t) { 81 | t.plan(2) 82 | var turndownService = new TurndownService() 83 | function plugin1 (service) { 84 | t.equal(service, turndownService) 85 | } 86 | function plugin2 (service) { 87 | t.equal(service, turndownService) 88 | } 89 | turndownService.use([plugin1, plugin2]) 90 | }) 91 | 92 | test('#keep keeps elements as HTML', function (t) { 93 | t.plan(2) 94 | var turndownService = new TurndownService() 95 | var input = '

Hello worldWorld

' 96 | 97 | // Without `.keep(['del', 'ins'])` 98 | t.equal(turndownService.turndown(input), 'Hello worldWorld') 99 | 100 | // With `.keep(['del', 'ins'])` 101 | turndownService.keep(['del', 'ins']) 102 | t.equal( 103 | turndownService.turndown('

Hello worldWorld

'), 104 | 'Hello worldWorld' 105 | ) 106 | }) 107 | 108 | test('#keep returns the TurndownService instance for chaining', function (t) { 109 | t.plan(1) 110 | var turndownService = new TurndownService() 111 | t.equal(turndownService.keep(['del', 'ins']), turndownService) 112 | }) 113 | 114 | test('keep rules are overridden by the standard rules', function (t) { 115 | t.plan(1) 116 | var turndownService = new TurndownService() 117 | turndownService.keep('p') 118 | t.equal(turndownService.turndown('

Hello world

'), 'Hello world') 119 | }) 120 | 121 | test('keepReplacement can be customised', function (t) { 122 | t.plan(1) 123 | var turndownService = new TurndownService({ 124 | keepReplacement: function (content, node) { 125 | return '\n\n' + node.outerHTML + '\n\n' 126 | } 127 | }) 128 | turndownService.keep(['del', 'ins']) 129 | t.equal(turndownService.turndown( 130 | '

Hello worldWorld

'), 131 | 'Hello \n\nworld\n\nWorld' 132 | ) 133 | }) 134 | 135 | test('#remove removes elements', function (t) { 136 | t.plan(2) 137 | var turndownService = new TurndownService() 138 | var input = 'Please redact me' 139 | 140 | // Without `.remove('del')` 141 | t.equal(turndownService.turndown(input), 'Please redact me') 142 | 143 | // With `.remove('del')` 144 | turndownService.remove('del') 145 | t.equal(turndownService.turndown(input), '') 146 | }) 147 | 148 | test('#remove returns the TurndownService instance for chaining', function (t) { 149 | t.plan(1) 150 | var turndownService = new TurndownService() 151 | t.equal(turndownService.remove(['del', 'ins']), turndownService) 152 | }) 153 | 154 | test('remove elements are overridden by rules', function (t) { 155 | t.plan(1) 156 | var turndownService = new TurndownService() 157 | turndownService.remove('p') 158 | t.equal(turndownService.turndown('

Hello world

'), 'Hello world') 159 | }) 160 | 161 | test('remove elements are overridden by keep', function (t) { 162 | t.plan(1) 163 | var turndownService = new TurndownService() 164 | turndownService.keep(['del', 'ins']) 165 | turndownService.remove(['del', 'ins']) 166 | t.equal(turndownService.turndown( 167 | '

Hello worldWorld

'), 168 | 'Hello worldWorld' 169 | ) 170 | }) 171 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Turndown Demo 6 | 7 | 126 | 127 | 128 | 129 |
130 |

turndown

131 | Source on GitHub 132 |
133 |
134 |
135 |

HTML

136 | 163 |
164 |
165 |

Markdown

166 | 167 |
168 |
169 | 170 |
171 |
172 |
173 | 174 | 178 |
179 | 180 |
181 | 182 | 187 |
188 | 189 |
190 | 191 | 196 |
197 | 198 |
199 | 200 | 204 |
205 | 206 |
207 | 208 | 212 |
213 | 214 |
215 | 216 | 220 |
221 | 222 |
223 | 224 | 228 |
229 | 230 |
231 | 232 | 236 |
237 | 238 |
239 | 240 | 245 |
246 |
247 |
248 | 249 | 250 | 281 | 282 | 283 | -------------------------------------------------------------------------------- /src/turndown.js: -------------------------------------------------------------------------------- 1 | import COMMONMARK_RULES from './commonmark-rules' 2 | import Rules from './rules' 3 | import { extend } from './utilities' 4 | import RootNode from './root-node' 5 | import Node from './node' 6 | var reduce = Array.prototype.reduce 7 | var leadingNewLinesRegExp = /^\n*/ 8 | var trailingNewLinesRegExp = /\n*$/ 9 | 10 | export default function TurndownService (options) { 11 | if (!(this instanceof TurndownService)) return new TurndownService(options) 12 | 13 | var defaults = { 14 | rules: COMMONMARK_RULES, 15 | headingStyle: 'setext', 16 | hr: '* * *', 17 | bulletListMarker: '*', 18 | codeBlockStyle: 'indented', 19 | fence: '```', 20 | emDelimiter: '_', 21 | strongDelimiter: '**', 22 | linkStyle: 'inlined', 23 | linkReferenceStyle: 'full', 24 | anchorNames: [], 25 | br: ' ', 26 | blankReplacement: function (content, node) { 27 | return node.isBlock ? '\n\n' : '' 28 | }, 29 | keepReplacement: function (content, node) { 30 | return node.isBlock ? '\n\n' + node.outerHTML + '\n\n' : node.outerHTML 31 | }, 32 | defaultReplacement: function (content, node) { 33 | return node.isBlock ? '\n\n' + content + '\n\n' : content 34 | } 35 | } 36 | this.options = extend({}, defaults, options) 37 | this.rules = new Rules(this.options) 38 | } 39 | 40 | TurndownService.prototype = { 41 | /** 42 | * The entry point for converting a string or DOM node to Markdown 43 | * @public 44 | * @param {String|HTMLElement} input The string or DOM node to convert 45 | * @returns A Markdown representation of the input 46 | * @type String 47 | */ 48 | 49 | turndown: function (input) { 50 | if (!canConvert(input)) { 51 | throw new TypeError( 52 | input + ' is not a string, or an element/document/fragment node.' 53 | ) 54 | } 55 | 56 | if (input === '') return '' 57 | 58 | var output = process.call(this, new RootNode(input)) 59 | return postProcess.call(this, output) 60 | }, 61 | 62 | /** 63 | * Add one or more plugins 64 | * @public 65 | * @param {Function|Array} plugin The plugin or array of plugins to add 66 | * @returns The Turndown instance for chaining 67 | * @type Object 68 | */ 69 | 70 | use: function (plugin) { 71 | if (Array.isArray(plugin)) { 72 | for (var i = 0; i < plugin.length; i++) this.use(plugin[i]) 73 | } else if (typeof plugin === 'function') { 74 | plugin(this) 75 | } else { 76 | throw new TypeError('plugin must be a Function or an Array of Functions') 77 | } 78 | return this 79 | }, 80 | 81 | /** 82 | * Adds a rule 83 | * @public 84 | * @param {String} key The unique key of the rule 85 | * @param {Object} rule The rule 86 | * @returns The Turndown instance for chaining 87 | * @type Object 88 | */ 89 | 90 | addRule: function (key, rule) { 91 | this.rules.add(key, rule) 92 | return this 93 | }, 94 | 95 | /** 96 | * Keep a node (as HTML) that matches the filter 97 | * @public 98 | * @param {String|Array|Function} filter The unique key of the rule 99 | * @returns The Turndown instance for chaining 100 | * @type Object 101 | */ 102 | 103 | keep: function (filter) { 104 | this.rules.keep(filter) 105 | return this 106 | }, 107 | 108 | /** 109 | * Remove a node that matches the filter 110 | * @public 111 | * @param {String|Array|Function} filter The unique key of the rule 112 | * @returns The Turndown instance for chaining 113 | * @type Object 114 | */ 115 | 116 | remove: function (filter) { 117 | this.rules.remove(filter) 118 | return this 119 | }, 120 | 121 | /** 122 | * Escapes Markdown syntax 123 | * @public 124 | * @param {String} string The string to escape 125 | * @returns A string with Markdown syntax escaped 126 | * @type String 127 | */ 128 | 129 | escape: function (string) { 130 | return ( 131 | string 132 | // Escape backslash escapes! 133 | .replace(/\\(\S)/g, '\\\\$1') 134 | 135 | // Escape headings 136 | .replace(/^(#{1,6} )/gm, '\\$1') 137 | 138 | // Escape hr 139 | .replace(/^([-*_] *){3,}$/gm, function (match, character) { 140 | return match.split(character).join('\\' + character) 141 | }) 142 | 143 | // Escape ol bullet points 144 | .replace(/^(\W* {0,3})(\d+)\. /gm, '$1$2\\. ') 145 | 146 | // Escape ul bullet points 147 | .replace(/^([^\\\w]*)[*+-] /gm, function (match) { 148 | return match.replace(/([*+-])/g, '\\$1') 149 | }) 150 | 151 | // Escape blockquote indents 152 | .replace(/^(\W* {0,3})> /gm, '$1\\> ') 153 | 154 | // Escape em/strong * 155 | .replace(/\*+(?![*\s\W]).+?\*+/g, function (match) { 156 | return match.replace(/\*/g, '\\*') 157 | }) 158 | 159 | // Escape em/strong _ 160 | .replace(/_+(?![_\s\W]).+?_+/g, function (match) { 161 | return match.replace(/_/g, '\\_') 162 | }) 163 | 164 | // Escape code _ 165 | .replace(/`+(?![`\s\W]).+?`+/g, function (match) { 166 | return match.replace(/`/g, '\\`') 167 | }) 168 | 169 | // Escape link brackets 170 | .replace(/[\[\]]/g, '\\$&') // eslint-disable-line no-useless-escape 171 | ) 172 | } 173 | } 174 | 175 | /** 176 | * Reduces a DOM node down to its Markdown string equivalent 177 | * @private 178 | * @param {HTMLElement} parentNode The node to convert 179 | * @returns A Markdown representation of the node 180 | * @type String 181 | */ 182 | 183 | function process (parentNode, escapeContent = 'auto') { 184 | var self = this 185 | return reduce.call(parentNode.childNodes, function (output, node) { 186 | node = new Node(node) 187 | 188 | var replacement = '' 189 | if (node.nodeType === 3) { 190 | if (node.isCode || escapeContent === false) { 191 | replacement = node.nodeValue 192 | } else { 193 | replacement = self.escape(node.nodeValue) 194 | 195 | // Escape < and > so that, for example, this kind of HTML text: "This is a tag: <p>" is still rendered as "This is a tag: <p>" 196 | // and not "This is a tag:

". If the latter, it means the HTML will be rendered if the viewer supports HTML (which, in Joplin, it does). 197 | replacement = replacement.replace(/<(.+?)>/g, '<$1>'); 198 | } 199 | } else if (node.nodeType === 1) { 200 | replacement = replacementForNode.call(self, node) 201 | } 202 | 203 | return join(output, replacement) 204 | }, '') 205 | } 206 | 207 | /** 208 | * Appends strings as each rule requires and trims the output 209 | * @private 210 | * @param {String} output The conversion output 211 | * @returns A trimmed version of the ouput 212 | * @type String 213 | */ 214 | 215 | function postProcess (output) { 216 | var self = this 217 | this.rules.forEach(function (rule) { 218 | if (typeof rule.append === 'function') { 219 | output = join(output, rule.append(self.options)) 220 | } 221 | }) 222 | 223 | return output.replace(/^[\t\r\n]+/, '').replace(/[\t\r\n\s]+$/, '') 224 | } 225 | 226 | /** 227 | * Converts an element node to its Markdown equivalent 228 | * @private 229 | * @param {HTMLElement} node The node to convert 230 | * @returns A Markdown representation of the node 231 | * @type String 232 | */ 233 | 234 | function replacementForNode (node) { 235 | var rule = this.rules.forNode(node) 236 | var content = process.call(this, node, rule.escapeContent ? rule.escapeContent() : 'auto') 237 | var whitespace = node.flankingWhitespace 238 | if (whitespace.leading || whitespace.trailing) content = content.trim() 239 | return ( 240 | whitespace.leading + 241 | rule.replacement(content, node, this.options) + 242 | whitespace.trailing 243 | ) 244 | } 245 | 246 | /** 247 | * Determines the new lines between the current output and the replacement 248 | * @private 249 | * @param {String} output The current conversion output 250 | * @param {String} replacement The string to append to the output 251 | * @returns The whitespace to separate the current output and the replacement 252 | * @type String 253 | */ 254 | 255 | function separatingNewlines (output, replacement) { 256 | var newlines = [ 257 | output.match(trailingNewLinesRegExp)[0], 258 | replacement.match(leadingNewLinesRegExp)[0] 259 | ].sort() 260 | var maxNewlines = newlines[newlines.length - 1] 261 | return maxNewlines.length < 2 ? maxNewlines : '\n\n' 262 | } 263 | 264 | function join (string1, string2) { 265 | var separator = separatingNewlines(string1, string2) 266 | 267 | // Remove trailing/leading newlines and replace with separator 268 | string1 = string1.replace(trailingNewLinesRegExp, '') 269 | string2 = string2.replace(leadingNewLinesRegExp, '') 270 | 271 | return string1 + separator + string2 272 | } 273 | 274 | /** 275 | * Determines whether an input can be converted 276 | * @private 277 | * @param {String|HTMLElement} input Describe this parameter 278 | * @returns Describe what it returns 279 | * @type String|Object|Array|Boolean|Number 280 | */ 281 | 282 | function canConvert (input) { 283 | return ( 284 | input != null && ( 285 | typeof input === 'string' || 286 | (input.nodeType && ( 287 | input.nodeType === 1 || input.nodeType === 9 || input.nodeType === 11 288 | )) 289 | ) 290 | ) 291 | } 292 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Turndown 2 | 3 | [![Build Status](https://travis-ci.org/domchristie/turndown.svg?branch=master)](https://travis-ci.org/domchristie/turndown) 4 | 5 | Convert HTML into Markdown with JavaScript. 6 | 7 | ## Modifications 8 | 9 | **This is a mod of the original turndown package for use with Joplin.** The following changes have been made: 10 | 11 | - Remove JavaScript code from links. 12 | - Prevent newlines inside link text. 13 | - Fixed ordered lists indentation when there are more than 9 items. 14 | - Added support for `` tags. 15 | - Fixed encoding of anchor URLs. 16 | - Support named anchors (`Internal link`, which would link to ``) 17 | - Detect more types of code blocks based on special cases. 18 | - Handle MathJax blocks 19 | - Allow a rule to specify whether it wants its content escaped or not 20 | 21 | ### to-markdown has been renamed to Turndown. See the [migration guide](https://github.com/domchristie/to-markdown/wiki/Migrating-from-to-markdown-to-Turndown) for details. 22 | 23 | ## Installation 24 | 25 | npm: 26 | 27 | ``` 28 | npm install joplin-turndown 29 | ``` 30 | 31 | Browser: 32 | 33 | ```html 34 | 35 | ``` 36 | 37 | For usage with RequireJS, UMD versions are located in `lib/turndown.umd.js` (for Node.js) and `lib/turndown.browser.umd.js` for browser usage. These files are generated when the npm package is published. To generate them manually, clone this repo and run `npm run build`. 38 | 39 | ## Usage 40 | 41 | ```js 42 | // For Node.js 43 | var TurndownService = require('turndown') 44 | 45 | var turndownService = new TurndownService() 46 | var markdown = turndownService.turndown('

Hello world!

') 47 | ``` 48 | 49 | Turndown also accepts DOM nodes as input (either element nodes, document nodes, or document fragment nodes): 50 | 51 | ```js 52 | var markdown = turndownService.turndown(document.getElementById('content')) 53 | ``` 54 | 55 | ## Options 56 | 57 | Options can be passed in to the constructor on instantiation. 58 | 59 | | Option | Valid values | Default | 60 | | :-------------------- | :------------ | :------ | 61 | | `headingStyle` | `setext` or `atx` | `setext` | 62 | | `hr` | Any [Thematic break](http://spec.commonmark.org/0.27/#thematic-breaks) | `* * *` | 63 | | `bulletListMarker` | `-`, `+`, or `*` | `*` | 64 | | `codeBlockStyle` | `indented` or `fenced` | `indented` | 65 | | `fence` | ` ``` ` or `~~~` | ` ``` ` | 66 | | `emDelimiter` | `_` or `*` | `_` | 67 | | `strongDelimiter` | `**` or `__` | `**` | 68 | | `linkStyle` | `inlined` or `referenced` | `inlined` | 69 | | `linkReferenceStyle` | `full`, `collapsed`, or `shortcut` | `full` | 70 | 71 | ### Advanced Options 72 | 73 | | Option | Valid values | Default | 74 | | :-------------------- | :------------ | :------ | 75 | | `blankReplacement` | rule replacement function | See **Special Rules** below | 76 | | `keepReplacement` | rule replacement function | See **Special Rules** below | 77 | | `defaultReplacement` | rule replacement function | See **Special Rules** below | 78 | 79 | ## Methods 80 | 81 | ### `addRule(key, rule)` 82 | 83 | The `key` parameter is a unique name for the rule for easy reference. Example: 84 | 85 | ```js 86 | turndownService.addRule('strikethrough', { 87 | filter: ['del', 's', 'strike'], 88 | replacement: function (content) { 89 | return '~' + content + '~' 90 | } 91 | }) 92 | ``` 93 | 94 | `addRule` returns the `TurndownService` instance for chaining. 95 | 96 | See **Extending with Rules** below. 97 | 98 | ### `keep(filter)` 99 | 100 | Determines which elements are to be kept and rendered as HTML. By default, Turndown does not keep any elements. The filter parameter works like a rule filter (see section on filters belows). Example: 101 | 102 | ```js 103 | turndownService.keep(['del', 'ins']) 104 | turndownService.turndown('

Hello worldWorld

') // 'Hello worldWorld' 105 | ``` 106 | 107 | This will render `` and `` elements as HTML when converted. 108 | 109 | `keep` can be called multiple times, with the newly added keep filters taking precedence over older ones. Keep filters will be overridden by the standard CommonMark rules and any added rules. To keep elements that are normally handled by those rules, add a rule with the desired behaviour. 110 | 111 | `keep` returns the `TurndownService` instance for chaining. 112 | 113 | ### `remove(filter)` 114 | 115 | Determines which elements are to be removed altogether i.e. converted to an empty string. By default, Turndown does not remove any elements. The filter parameter works like a rule filter (see section on filters belows). Example: 116 | 117 | ```js 118 | turndownService.remove('del') 119 | turndownService.turndown('

Hello worldWorld

') // 'Hello World' 120 | ``` 121 | 122 | This will remove `` elements (and contents). 123 | 124 | `remove` can be called multiple times, with the newly added remove filters taking precedence over older ones. Remove filters will be overridden by the keep filters, standard CommonMark rules, and any added rules. To remove elements that are normally handled by those rules, add a rule with the desired behaviour. 125 | 126 | `remove` returns the `TurndownService` instance for chaining. 127 | 128 | ### `use(plugin|array)` 129 | 130 | Use a plugin, or an array of plugins. Example: 131 | 132 | ```js 133 | // Import plugins from turndown-plugin-gfm 134 | var turndownPluginGfm = require('turndown-plugin-gfm') 135 | var gfm = turndownPluginGfm.gfm 136 | var tables = turndownPluginGfm.tables 137 | var strikethrough = turndownPluginGfm.strikethrough 138 | 139 | // Use the gfm plugin 140 | turndownService.use(gfm) 141 | 142 | // Use the table and strikethrough plugins only 143 | turndownService.use([tables, strikethrough]) 144 | ``` 145 | 146 | `use` returns the `TurndownService` instance for chaining. 147 | 148 | See **Plugins** below. 149 | 150 | ## Extending with Rules 151 | 152 | Turndown can be extended by adding **rules**. A rule is a plain JavaScript object with `filter` and `replacement` properties. For example, the rule for converting `

` elements is as follows: 153 | 154 | ```js 155 | { 156 | filter: 'p', 157 | replacement: function (content) { 158 | return '\n\n' + content + '\n\n' 159 | } 160 | } 161 | ``` 162 | 163 | The filter selects `

` elements, and the replacement function returns the `

` contents separated by two new lines. 164 | 165 | ### `filter` String|Array|Function 166 | 167 | The filter property determines whether or not an element should be replaced with the rule's `replacement`. DOM nodes can be selected simply using a tag name or an array of tag names: 168 | 169 | * `filter: 'p'` will select `

` elements 170 | * `filter: ['em', 'i']` will select `` or `` elements 171 | 172 | Alternatively, the filter can be a function that returns a boolean depending on whether a given node should be replaced. The function is passed a DOM node as well as the `TurndownService` options. For example, the following rule selects `` elements (with an `href`) when the `linkStyle` option is `inlined`: 173 | 174 | ```js 175 | filter: function (node, options) { 176 | return ( 177 | options.linkStyle === 'inlined' && 178 | node.nodeName === 'A' && 179 | node.getAttribute('href') 180 | ) 181 | } 182 | ``` 183 | 184 | ### `replacement` Function 185 | 186 | The replacement function determines how an element should be converted. It should return the Markdown string for a given node. The function is passed the node's content, the node itself, and the `TurndownService` options. 187 | 188 | The following rule shows how `` elements are converted: 189 | 190 | ```js 191 | rules.emphasis = { 192 | filter: ['em', 'i'], 193 | 194 | replacement: function (content, node, options) { 195 | return options.emDelimiter + content + options.emDelimiter 196 | } 197 | } 198 | ``` 199 | 200 | ### Special Rules 201 | 202 | **Blank rule** determines how to handle blank elements. It overrides every rule (even those added via `addRule`). A node is blank if it only contains whitespace, and it's not an ``, ``,`` or a void element. Its behaviour can be customised using the `blankReplacement` option. 203 | 204 | **Keep rules** determine how to handle the elements that should not be converted, i.e. rendered as HTML in the Markdown output. By default, no elements are kept. Block-level elements will be separated from surrounding content by blank lines. Its behaviour can be customised using the `keepReplacement` option. 205 | 206 | **Remove rules** determine which elements to remove altogether. By default, no elements are removed. 207 | 208 | **Default rule** handles nodes which are not recognised by any other rule. By default, it outputs the node's text content (separated by blank lines if it is a block-level element). Its behaviour can be customised with the `defaultReplacement` option. 209 | 210 | ### Rule Precedence 211 | 212 | Turndown iterates over the set of rules, and picks the first one that matches the `filter`. The following list describes the order of precedence: 213 | 214 | 1. Blank rule 215 | 2. Added rules (optional) 216 | 3. Commonmark rules 217 | 4. Keep rules 218 | 5. Remove rules 219 | 6. Default rule 220 | 221 | ## Plugins 222 | 223 | The plugin API provides a convenient way for developers to apply multiple extensions. A plugin is just a function that is called with the `TurndownService` instance. 224 | 225 | ## License 226 | 227 | turndown is copyright © 2017+ Dom Christie and released under the MIT license. 228 | -------------------------------------------------------------------------------- /src/commonmark-rules.js: -------------------------------------------------------------------------------- 1 | import { repeat } from './utilities' 2 | const Entities = require('html-entities').AllHtmlEntities; 3 | const htmlentities = (new Entities()).encode; 4 | const css = require('css'); 5 | 6 | var rules = {} 7 | 8 | rules.paragraph = { 9 | filter: 'p', 10 | 11 | replacement: function (content) { 12 | return '\n\n' + content + '\n\n' 13 | } 14 | } 15 | 16 | rules.lineBreak = { 17 | filter: 'br', 18 | 19 | replacement: function (content, node, options) { 20 | return options.br + '\n' 21 | } 22 | } 23 | 24 | rules.heading = { 25 | filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'], 26 | 27 | replacement: function (content, node, options) { 28 | var hLevel = Number(node.nodeName.charAt(1)) 29 | 30 | if (options.headingStyle === 'setext' && hLevel < 3) { 31 | var underline = repeat((hLevel === 1 ? '=' : '-'), content.length) 32 | return ( 33 | '\n\n' + content + '\n' + underline + '\n\n' 34 | ) 35 | } else { 36 | return '\n\n' + repeat('#', hLevel) + ' ' + content + '\n\n' 37 | } 38 | } 39 | } 40 | 41 | rules.blockquote = { 42 | filter: 'blockquote', 43 | 44 | replacement: function (content) { 45 | content = content.replace(/^\n+|\n+$/g, '') 46 | content = content.replace(/^/gm, '> ') 47 | return '\n\n' + content + '\n\n' 48 | } 49 | } 50 | 51 | rules.list = { 52 | filter: ['ul', 'ol'], 53 | 54 | replacement: function (content, node) { 55 | var parent = node.parentNode 56 | if (parent.nodeName === 'LI' && parent.lastElementChild === node) { 57 | return '\n' + content 58 | } else { 59 | return '\n\n' + content + '\n\n' 60 | } 61 | } 62 | } 63 | 64 | rules.listItem = { 65 | filter: 'li', 66 | 67 | replacement: function (content, node, options) { 68 | const joplinCheckbox = joplinCheckboxInfo(node); 69 | 70 | content = content 71 | .replace(/^\n+/, '') // remove leading newlines 72 | .replace(/\n+$/, '\n') // replace trailing newlines with just a single one 73 | 74 | if (joplinCheckbox) { 75 | return '- [' + (joplinCheckbox.checked ? 'x' : ' ') + '] ' + content; 76 | } else { 77 | content = content.replace(/\n/gm, '\n ') // indent 78 | var prefix = options.bulletListMarker + ' ' 79 | var parent = node.parentNode 80 | if (parent.nodeName === 'OL') { 81 | var start = parent.getAttribute('start') 82 | var index = Array.prototype.indexOf.call(parent.children, node) 83 | var indexStr = (start ? Number(start) + index : index + 1) + '' 84 | // The content of the line that contains the bullet must align wih the following lines. 85 | // 86 | // i.e it should be: 87 | // 88 | // 9. my content 89 | // second line 90 | // 10. next one 91 | // second line 92 | // 93 | // But not: 94 | // 95 | // 9. my content 96 | // second line 97 | // 10. next one 98 | // second line 99 | // 100 | prefix = indexStr + '.' + ' '.repeat(3 - indexStr.length) 101 | } 102 | return ( 103 | prefix + content + (node.nextSibling && !/\n$/.test(content) ? '\n' : '') 104 | ) 105 | } 106 | } 107 | } 108 | 109 | // To handle code that is presented as below (see https://github.com/laurent22/joplin/issues/573) 110 | // 111 | // 112 | //

113 | //     def ma_fonction
114 | //   
115 | // 116 | function isCodeBlockSpecialCase1(node) { 117 | const parent = node.parentNode 118 | return parent.classList.contains('code') && parent.nodeName === 'TD' && node.nodeName === 'PRE' 119 | } 120 | 121 | // To handle PRE tags that have a monospace font family. In that case 122 | // we assume it is a code block. 123 | function isCodeBlockSpecialCase2(node) { 124 | if (node.nodeName !== 'PRE') return false; 125 | 126 | const style = node.getAttribute('style'); 127 | if (!style) return false; 128 | const o = css.parse('pre {' + style + '}'); 129 | if (!o.stylesheet.rules.length) return; 130 | const fontFamily = o.stylesheet.rules[0].declarations.find(d => d.property.toLowerCase() === 'font-family'); 131 | const isMonospace = fontFamily.value.split(',').map(e => e.trim().toLowerCase()).indexOf('monospace') >= 0; 132 | return isMonospace; 133 | } 134 | 135 | rules.indentedCodeBlock = { 136 | filter: function (node, options) { 137 | if (options.codeBlockStyle !== 'indented') return false 138 | if (isCodeBlockSpecialCase1(node) || isCodeBlockSpecialCase2(node)) return true 139 | 140 | return ( 141 | node.nodeName === 'PRE' && 142 | node.firstChild && 143 | node.firstChild.nodeName === 'CODE' 144 | ) 145 | }, 146 | 147 | replacement: function (content, node, options) { 148 | const handledNode = isCodeBlockSpecialCase1(node) ? node : node.firstChild 149 | 150 | return ( 151 | '\n\n ' + 152 | handledNode.textContent.replace(/\n/g, '\n ') + 153 | '\n\n' 154 | ) 155 | } 156 | } 157 | 158 | rules.fencedCodeBlock = { 159 | filter: function (node, options) { 160 | if (options.codeBlockStyle !== 'fenced') return false; 161 | if (isCodeBlockSpecialCase1(node) || isCodeBlockSpecialCase2(node)) return true 162 | 163 | return ( 164 | node.nodeName === 'PRE' && 165 | node.firstChild && 166 | node.firstChild.nodeName === 'CODE' 167 | ) 168 | }, 169 | 170 | replacement: function (content, node, options) { 171 | let handledNode = node.firstChild; 172 | if (isCodeBlockSpecialCase1(node) || isCodeBlockSpecialCase2(node)) handledNode = node; 173 | 174 | var className = handledNode.className || '' 175 | var language = (className.match(/language-(\S+)/) || [null, ''])[1] 176 | 177 | return ( 178 | '\n\n' + options.fence + language + '\n' + 179 | handledNode.textContent + 180 | '\n' + options.fence + '\n\n' 181 | ) 182 | } 183 | } 184 | 185 | rules.horizontalRule = { 186 | filter: 'hr', 187 | 188 | replacement: function (content, node, options) { 189 | return '\n\n' + options.hr + '\n\n' 190 | } 191 | } 192 | 193 | function filterLinkContent (content) { 194 | return content.trim().replace(/[\n\r]+/g, '
') 195 | } 196 | 197 | function filterLinkHref (href) { 198 | if (!href) return '' 199 | href = href.trim() 200 | if (href.toLowerCase().indexOf('javascript:') === 0) return '' // We don't want to keep js code in the markdown 201 | // Replace the spaces with %20 because otherwise they can cause problems for some 202 | // renderer and space is not a valid URL character anyway. 203 | href = href.replace(/ /g, '%20'); 204 | return href 205 | } 206 | 207 | function getNamedAnchorFromLink(node, options) { 208 | var id = node.getAttribute('id') 209 | if (!id) id = node.getAttribute('name') 210 | if (id) id = id.trim(); 211 | 212 | if (id && options.anchorNames.indexOf(id.toLowerCase()) >= 0) { 213 | return '
'; 214 | } else { 215 | return ''; 216 | } 217 | } 218 | 219 | rules.inlineLink = { 220 | filter: function (node, options) { 221 | return ( 222 | options.linkStyle === 'inlined' && 223 | node.nodeName === 'A' && 224 | (node.getAttribute('href') || node.getAttribute('name') || node.getAttribute('id')) 225 | ) 226 | }, 227 | 228 | replacement: function (content, node, options) { 229 | var href = filterLinkHref(node.getAttribute('href')) 230 | if (!href) { 231 | return getNamedAnchorFromLink(node, options) + filterLinkContent(content) 232 | } else { 233 | var title = node.title ? ' "' + node.title + '"' : '' 234 | if (!href) title = '' 235 | return getNamedAnchorFromLink(node, options) + '[' + filterLinkContent(content) + '](' + href + title + ')' 236 | } 237 | } 238 | } 239 | 240 | // Normally a named anchor would be but 241 | // you can also find Something so the 242 | // rule below handle this. 243 | // Fixes https://github.com/laurent22/joplin/issues/1876 244 | rules.otherNamedAnchors = { 245 | filter: function (node, options) { 246 | return !!getNamedAnchorFromLink(node, options); 247 | }, 248 | 249 | replacement: function (content, node, options) { 250 | return getNamedAnchorFromLink(node, options) + content; 251 | } 252 | } 253 | 254 | rules.referenceLink = { 255 | filter: function (node, options) { 256 | return ( 257 | options.linkStyle === 'referenced' && 258 | node.nodeName === 'A' && 259 | node.getAttribute('href') 260 | ) 261 | }, 262 | 263 | replacement: function (content, node, options) { 264 | var href = filterLinkHref(node.getAttribute('href')) 265 | var title = node.title ? ' "' + node.title + '"' : '' 266 | if (!href) title = '' 267 | var replacement 268 | var reference 269 | 270 | content = filterLinkContent(content) 271 | 272 | switch (options.linkReferenceStyle) { 273 | case 'collapsed': 274 | replacement = '[' + content + '][]' 275 | reference = '[' + content + ']: ' + href + title 276 | break 277 | case 'shortcut': 278 | replacement = '[' + content + ']' 279 | reference = '[' + content + ']: ' + href + title 280 | break 281 | default: 282 | var id = this.references.length + 1 283 | replacement = '[' + content + '][' + id + ']' 284 | reference = '[' + id + ']: ' + href + title 285 | } 286 | 287 | this.references.push(reference) 288 | return replacement 289 | }, 290 | 291 | references: [], 292 | 293 | append: function (options) { 294 | var references = '' 295 | if (this.references.length) { 296 | references = '\n\n' + this.references.join('\n') + '\n\n' 297 | this.references = [] // Reset references 298 | } 299 | return references 300 | } 301 | } 302 | 303 | rules.emphasis = { 304 | filter: ['em', 'i'], 305 | 306 | replacement: function (content, node, options) { 307 | if (!content.trim()) return '' 308 | return options.emDelimiter + content + options.emDelimiter 309 | } 310 | } 311 | 312 | rules.strong = { 313 | filter: ['strong', 'b'], 314 | 315 | replacement: function (content, node, options) { 316 | if (!content.trim()) return '' 317 | return options.strongDelimiter + content + options.strongDelimiter 318 | } 319 | } 320 | 321 | rules.code = { 322 | filter: function (node) { 323 | var hasSiblings = node.previousSibling || node.nextSibling 324 | var isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings 325 | 326 | return node.nodeName === 'CODE' && !isCodeBlock 327 | }, 328 | 329 | replacement: function (content) { 330 | if (!content.trim()) return '' 331 | 332 | var delimiter = '`' 333 | var leadingSpace = '' 334 | var trailingSpace = '' 335 | var matches = content.match(/`+/gm) 336 | if (matches) { 337 | if (/^`/.test(content)) leadingSpace = ' ' 338 | if (/`$/.test(content)) trailingSpace = ' ' 339 | while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`' 340 | } 341 | 342 | return delimiter + leadingSpace + content + trailingSpace + delimiter 343 | } 344 | } 345 | 346 | function imageMarkdownFromNode(node) { 347 | var alt = node.alt || '' 348 | var src = node.getAttribute('src') || '' 349 | var title = node.title || '' 350 | var titlePart = title ? ' "' + title + '"' : '' 351 | return src ? '![' + alt.replace(/([[\]])/g, '\\$1') + ']' + '(' + src + titlePart + ')' : '' 352 | } 353 | 354 | function imageUrlFromSource(node) { 355 | // Format of srcset can be: 356 | // srcset="kitten.png" 357 | // or: 358 | // srcset="kitten.png, kitten@2X.png 2x" 359 | 360 | let src = node.getAttribute('srcset'); 361 | if (!src) src = node.getAttribute('data-srcset'); 362 | if (!src) return ''; 363 | 364 | const s = src.split(','); 365 | if (!s.length) return ''; 366 | src = s[0]; 367 | 368 | src = src.split(' '); 369 | return src[0]; 370 | } 371 | 372 | rules.image = { 373 | filter: 'img', 374 | 375 | replacement: function (content, node) { 376 | return imageMarkdownFromNode(node); 377 | } 378 | } 379 | 380 | rules.picture = { 381 | filter: 'picture', 382 | 383 | replacement: function (content, node) { 384 | if (!node.childNodes) return ''; 385 | 386 | let firstSource = null; 387 | let firstImg = null; 388 | 389 | for (let i = 0; i < node.childNodes.length; i++) { 390 | const child = node.childNodes[i]; 391 | 392 | if (child.nodeName === 'SOURCE' && !firstSource) firstSource = child; 393 | if (child.nodeName === 'IMG') firstImg = child; 394 | } 395 | 396 | if (firstImg && firstImg.getAttribute('src')) { 397 | return imageMarkdownFromNode(firstImg); 398 | } else if (firstSource) { 399 | // A tag can have multiple tag and the browser should decide which one to download 400 | // but for now let's pick the first one. 401 | const src = imageUrlFromSource(firstSource); 402 | return src ? '![](' + src + ')' : ''; 403 | } 404 | 405 | return ''; 406 | } 407 | } 408 | 409 | function findFirstDescendant(node, byType, name) { 410 | for (const childNode of node.childNodes) { 411 | if (byType === 'class' && childNode.classList.contains(name)) return childNode; 412 | if (byType === 'nodeName' && childNode.nodeName === name) return childNode; 413 | 414 | const sub = findFirstDescendant(childNode, byType, name); 415 | if (sub) return sub; 416 | } 417 | return null; 418 | } 419 | 420 | function findParent(node, byType, name) { 421 | while (true) { 422 | const p = node.parentNode; 423 | if (!p) return null; 424 | if (byType === 'class' && p.classList.contains(name)) return p; 425 | if (byType === 'nodeName' && p.nodeName === name) return p; 426 | node = p; 427 | } 428 | } 429 | 430 | // =============================================================================== 431 | // MATHJAX support 432 | // 433 | // When encountering Mathjax elements there's first the rendered Mathjax, 434 | // which we want to skip because it cannot be converted reliably to Markdown. 435 | // This tag is followed by the actual MathJax script in a 852 | 853 | 854 | --------------------------------------------------------------------------------