├── .gitignore ├── README.asc ├── bower.json ├── to-asciidoc.html └── to-asciidoc.js /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | bower_components 3 | .bower.json -------------------------------------------------------------------------------- /README.asc: -------------------------------------------------------------------------------- 1 | = to-asciidoc 2 | 3 | An HTML to Asciidoc converter written in javascript. Inspired from https://github.com/domchristie/to-markdown[to-markdown] 4 | 5 | == Installation 6 | 7 | Use directly `to-asciidoc.js` or install via `bower` 8 | 9 | [source,javascript] 10 | ---- 11 | bower install to-asciidoc 12 | ---- 13 | 14 | == Usage 15 | 16 | [source,javascript] 17 | ---- 18 | 19 | 20 | 21 | ---- 22 | 23 | == State 24 | 25 | This project currently developed and used under https://github.com/asciidocfx/AsciidocFX[AsciidocFX] project. 26 | 27 | == Licence 28 | 29 | MIT 30 | -------------------------------------------------------------------------------- /bower.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "to-asciidoc", 3 | "main": "to-asciidoc.js", 4 | "version": "1.0.1", 5 | "homepage": "https://github.com/asciidocfx/to-asciidoc", 6 | "authors": [ 7 | "rahmanusta@kodcu.com", 8 | "ozler.hakan@gmail.com" 9 | ], 10 | "description": "An HTML to Asciidoc converter written in JavaScript", 11 | "moduleType": [ 12 | "globals", 13 | "node" 14 | ], 15 | "keywords": [ 16 | "asciidoc", 17 | "html", 18 | "converter", 19 | "javascript" 20 | ], 21 | "license": "MIT", 22 | "ignore": [ 23 | "**/.*", 24 | "node_modules", 25 | "bower_components", 26 | "test", 27 | "tests" 28 | ], 29 | "dependencies": { 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /to-asciidoc.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | HTML to Asciidoc Converter 6 | 7 | 8 | 13 | 14 | 15 | 16 | 17 |

18 | 26 | 27 | -------------------------------------------------------------------------------- /to-asciidoc.js: -------------------------------------------------------------------------------- 1 | /* 2 | * to-asciidoc - an HTML to Asciidoc converter 3 | * 4 | * Copyright 2015, asciidocfx 5 | * Licenced under the MIT licence 6 | * 7 | */ 8 | 9 | var nbspRegex = new RegExp(String.fromCharCode(160), "g"); 10 | 11 | var toAsciidoc = function (string) { 12 | 13 | string = string.replace(nbspRegex, " "); 14 | 15 | var all = document.createElement("div"); 16 | all.innerHTML = string; 17 | 18 | // fix for apple converted space 19 | var spans = all.querySelectorAll("span.Apple-converted-space"); 20 | for (var i = 0; i < spans.length; i++) { 21 | var parentNode = spans[i].parentNode || ""; 22 | if (parentNode) 23 | parentNode.replaceChild(document.createTextNode(" "), spans[i]); 24 | } 25 | 26 | // crayon-syntax higlighter fix 27 | $(all).find("div[class*='crayon-syntax']").each(function () { 28 | var elem = $(this); 29 | elem.find(".crayon-line").append("\n"); 30 | elem.find(".crayon-num").remove(); 31 | var code = $(""); 32 | code.append(elem.text()); 33 | elem.replaceWith(code); 34 | }); 35 | 36 | 37 | // google syntax higlighter fix 38 | $(all).find("div.syntaxhighlighter").each(function () { 39 | var elem = $(this); 40 | elem.find(".line").append("\n"); 41 | elem.find(".number,.toolbar").remove(); 42 | console.log(elem.text()); 43 | var code = $(""); 44 | code.append(elem.text()); 45 | elem.replaceWith(code); 46 | }); 47 | 48 | // table converter 49 | var tables = all.querySelectorAll("table"); 50 | for (var i = 0; i < tables.length; i++) { 51 | var tableBoundary = "|====\n"; 52 | var tableText = ""; 53 | var table = tables[i]; 54 | var trs = table.querySelectorAll("tr"); 55 | var caption = table.querySelector("caption"); 56 | 57 | tableText += "\n\n"; 58 | if (caption) 59 | tableText += "." + caption.innerText.replace(/Table \d+\. /, "") + "\n"; 60 | tableText += tableBoundary; 61 | 62 | for (var j = 0; j < trs.length; j++) { 63 | var tr = trs[j]; 64 | var columns = tr.querySelectorAll("td"); 65 | if (columns.length == 0) 66 | columns = tr.querySelectorAll("th"); 67 | var row = [].slice.call(columns).map(function (e) { 68 | return "|" + (e.innerHTML ? traverse(e.innerHTML) : ""); 69 | }).join(" "); 70 | tableText += row + "\n"; 71 | } 72 | 73 | tableText += tableBoundary; 74 | 75 | if (table.parentNode) 76 | table.parentNode.replaceChild(document.createTextNode(tableText), table); 77 | } 78 | 79 | 80 | // fix pre > code block 81 | var codes = all.querySelectorAll("pre,code,pre>code"); 82 | for (var i = 0; i < codes.length; i++) { 83 | var code = codes[i]; 84 | if (code.innerHTML.split(/\n|\r|
|<\/br>/).length > 1) { 85 | if (code.parentNode) 86 | code.parentNode.replaceChild(document.createTextNode("\n[source,java]\n----\n" + code.innerText + "\n----\n"), code); 87 | } 88 | } 89 | 90 | // remove anchor surrounding an img 91 | var images = all.querySelectorAll("img"); 92 | for (var i = 0; i < images.length; i++) { 93 | var parentNode = images[i].parentNode || ""; 94 | if (parentNode.parentNode) 95 | if (parentNode.constructor == HTMLAnchorElement) 96 | parentNode.parentNode.replaceChild(images[i], parentNode); 97 | } 98 | string = traverse(all.innerHTML); 99 | 100 | function traverse(string) { 101 | var ELEMENTS = [ 102 | { 103 | patterns: ["script", "iframe", "meta","embed"], 104 | replacement: function (str, attrs, innerHTML) { 105 | return ""; 106 | } 107 | }, 108 | { 109 | patterns: ["div", "span", "body", "i", "section", "html"], 110 | replacement: function (str, attrs, innerHTML) { 111 | return innerHTML ? innerHTML : ''; 112 | } 113 | }, 114 | { 115 | patterns: 'p', 116 | replacement: function (str, attrs, innerHTML) { 117 | return innerHTML ? "\n" + innerHTML + "\n" : ''; 118 | } 119 | }, 120 | { 121 | patterns: 'br', 122 | type: 'void', 123 | replacement: ' \n' 124 | }, 125 | { 126 | patterns: 'h([1-6])', 127 | replacement: function (str, hLevel, attrs, innerHTML) { 128 | var hPrefix = ''; 129 | for (var i = 0; i < hLevel; i++) { 130 | hPrefix += '='; 131 | } 132 | return '\n\n' + hPrefix + ' ' + innerHTML + '\n'; 133 | } 134 | }, 135 | { 136 | patterns: 'hr', 137 | type: 'void', 138 | replacement: "\n\n'''\n" 139 | }, 140 | { 141 | patterns: 'a', 142 | replacement: function (str, attrs, innerHTML) { 143 | var href = attrs.match(attrRegExp('href')), 144 | title = attrs.match(attrRegExp('title')); 145 | 146 | return href ? href[1] + '[' + innerHTML + ']' : ''; 147 | //return href ? '[' + innerHTML + ']' + '(' + href[1] + (title && title[1] ? ' "' + title[1] + '"' : '') + ')' : str; 148 | } 149 | }, 150 | { 151 | patterns: ['b', 'strong'], 152 | replacement: function (str, attrs, innerHTML) { 153 | return innerHTML ? '**' + innerHTML + '**' : ''; 154 | } 155 | }, 156 | { 157 | patterns: ['i', 'em'], 158 | replacement: function (str, attrs, innerHTML) { 159 | return innerHTML ? '__' + innerHTML + '__' : ''; 160 | } 161 | }, 162 | { 163 | patterns: 'sub', 164 | replacement: function (str, attrs, innerHTML) { 165 | return innerHTML ? '~' + innerHTML + '~' : ''; 166 | } 167 | }, 168 | { 169 | patterns: 'sup', 170 | replacement: function (str, attrs, innerHTML) { 171 | return innerHTML ? '^' + innerHTML + '^' : ''; 172 | } 173 | }, 174 | { 175 | patterns: 'u', 176 | replacement: function (str, attrs, innerHTML) { 177 | return innerHTML ? '[underline]#' + innerHTML + '#' : ''; 178 | } 179 | }, 180 | { 181 | patterns: 'del', 182 | replacement: function (str, attrs, innerHTML) { 183 | return innerHTML ? '[line-through]#' + innerHTML + '#' : ''; 184 | } 185 | }, 186 | { 187 | patterns: 'code', 188 | replacement: function (str, attrs, innerHTML) { 189 | return innerHTML ? '``' + innerHTML + '``' : ''; 190 | } 191 | }, 192 | { 193 | patterns: 'pre', 194 | replacement: function (str, attrs, innerHTML) { 195 | return innerHTML ? '\n\n----\n' + innerHTML + '\n----\n' : ''; 196 | } 197 | }, 198 | { 199 | patterns: 'img', 200 | type: 'void', 201 | replacement: function (str, attrs, innerHTML) { 202 | var src = attrs.match(attrRegExp('src')), 203 | alt = attrs.match(attrRegExp('alt')), 204 | title = attrs.match(attrRegExp('title')); 205 | return src ? '\nimage::' + src[1] + '[' + (alt && alt[1] ? alt[1] : '') + ']\n' : ''; 206 | //return src ? '![' + (alt && alt[1] ? alt[1] : '') + ']' + '(' + src[1] + (title && title[1] ? ' "' + title[1] + '"' : '') + ')' : ''; 207 | } 208 | } 209 | ]; 210 | 211 | for (var i = 0, len = ELEMENTS.length; i < len; i++) { 212 | if (typeof ELEMENTS[i].patterns === 'string') { 213 | string = replaceEls(string, { 214 | tag: ELEMENTS[i].patterns, 215 | replacement: ELEMENTS[i].replacement, 216 | type: ELEMENTS[i].type 217 | }); 218 | } 219 | else { 220 | for (var j = 0, pLen = ELEMENTS[i].patterns.length; j < pLen; j++) { 221 | string = replaceEls(string, { 222 | tag: ELEMENTS[i].patterns[j], 223 | replacement: ELEMENTS[i].replacement, 224 | type: ELEMENTS[i].type 225 | }); 226 | } 227 | } 228 | } 229 | 230 | function replaceEls(html, elProperties) { 231 | var pattern = elProperties.type === 'void' ? '<' + elProperties.tag + '\\b([^>]*)\\/?>' : '<' + elProperties.tag + '\\b([^>]*)>([\\s\\S]*?)<\\/' + elProperties.tag + '>', 232 | regex = new RegExp(pattern, 'gi'), 233 | asciidoc = ''; 234 | if (typeof elProperties.replacement === 'string') { 235 | asciidoc = html.replace(regex, elProperties.replacement); 236 | } 237 | else { 238 | asciidoc = html.replace(regex, function (str, p1, p2, p3) { 239 | return elProperties.replacement.call(this, str, p1, p2, p3); 240 | }); 241 | } 242 | return asciidoc; 243 | } 244 | 245 | return string; 246 | } 247 | 248 | function strip(html) { 249 | html = html.replace(/<[\/]?(meta)[^><]*>/ig, ""); 250 | html = html.replace(/<[\/]?(span)[^><]*>/ig, ""); 251 | html = html.replace(/<[\/]?(div)[^><]*>/ig, ""); 252 | html = html.replace(/<[\/]?(section)[^><]*>/ig, ""); 253 | html = html.replace(/<[\/]?(i)[^><]*>/ig, ""); 254 | html = html.replace(/<[\/]?(html)[^><]*>/ig, ""); 255 | html = html.replace(/<[\/]?(body)[^><]*>/ig, ""); 256 | html = html.replace(/(>)/ig, ">"); 257 | html = html.replace(/(<)/ig, "<"); 258 | html = html.replace(/(&)/ig, "&"); 259 | html = html.replace(/(\u2014)/ig, "--"); 260 | html = html.replace(/(\u2009)/ig, " "); 261 | return html; 262 | } 263 | 264 | function attrRegExp(attr) { 265 | return new RegExp(attr + '\\s*=\\s*["\']?([^"\']*)["\']?', 'i'); 266 | } 267 | 268 | // Pre code blocks 269 | 270 | string = string.replace(/]*>`([\s\S]*?)`<\/pre>/gi, function (str, innerHTML) { 271 | var text = innerHTML; 272 | text = text.replace(/^\t+/g, ' '); // convert tabs to spaces (you know it makes sense) 273 | text = text.replace(/\n/g, '\n '); 274 | return '\n\n ' + text + '\n'; 275 | }); 276 | 277 | // Lists 278 | 279 | // Escape numbers that could trigger an ol 280 | // If there are more than three spaces before the code, it would be in a pre tag 281 | // Make sure we are escaping the period not matching any character 282 | string = string.replace(/^(\s{0,3}\d+)\. /g, '$1\\. '); 283 | 284 | // Converts lists that have no child lists (of same type) first, then works its way up 285 | var noChildrenRegex = /<(ul|ol)\b[^>]*>(?:(?!/gi; 286 | while (string.match(noChildrenRegex)) { 287 | string = string.replace(noChildrenRegex, function (str) { 288 | return replaceLists(str); 289 | }); 290 | } 291 | 292 | function replaceLists(html) { 293 | 294 | html = html.replace(/<(ul|ol)\b[^>]*>([\s\S]*?)<\/\1>/gi, function (str, listType, innerHTML) { 295 | var lis = innerHTML.split(''); 296 | lis.splice(lis.length - 1, 1); 297 | 298 | for (i = 0, len = lis.length; i < len; i++) { 299 | if (lis[i]) { 300 | var prefix = (listType === 'ol') ? (i + 1) + ". " : "* "; 301 | lis[i] = lis[i].replace(/\s*]*>([\s\S]*)/i, function (str, innerHTML) { 302 | 303 | innerHTML = innerHTML.replace(/^\s+/, ''); 304 | innerHTML = innerHTML.replace(/\n\n/g, '\n\n '); 305 | // indent nested lists 306 | innerHTML = innerHTML.replace(/\n([ ]*)+(\*|\d+\.) /g, '\n$1 $2 '); 307 | return prefix + innerHTML; 308 | }); 309 | } 310 | lis[i] = lis[i].replace(/(.) +$/m, '$1'); 311 | } 312 | return lis.join('\n'); 313 | }); 314 | 315 | return '\n\n' + html.replace(/[ \t]+\n|\s+$/g, ''); 316 | } 317 | 318 | // Blockquotes 319 | var deepest = /]*>((?:(?!/gi; 320 | while (string.match(deepest)) { 321 | string = string.replace(deepest, function (str) { 322 | return replaceBlockquotes(str); 323 | }); 324 | } 325 | 326 | function replaceBlockquotes(html) { 327 | html = html.replace(/]*>([\s\S]*?)<\/blockquote>/gi, function (str, inner) { 328 | inner = inner.replace(/^\s+|\s+$/g, ''); 329 | inner = cleanUp(inner); 330 | inner = inner.replace(/^/gm, '> '); 331 | inner = inner.replace(/^(>([ \t]{2,}>)+)/gm, '> >'); 332 | return inner; 333 | }); 334 | return html; 335 | } 336 | 337 | function cleanUp(string) { 338 | string = strip(string); 339 | string = string.replace(/^[\t\r\n]+|[\t\r\n]+$/g, ''); // trim leading/trailing whitespace 340 | string = string.replace(/\n\s+\n/g, '\n\n'); 341 | string = string.replace(/\n{3,}/g, '\n\n'); // limit consecutive linebreaks to 2 342 | string = strip(string); 343 | return string; 344 | } 345 | 346 | return cleanUp(string); 347 | }; 348 | --------------------------------------------------------------------------------