├── .gitignore
├── README.asc
├── bower.json
├── to-asciidoc.html
└── to-asciidoc.js
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | bower_components
3 | .bower.json
--------------------------------------------------------------------------------
/README.asc:
--------------------------------------------------------------------------------
1 | = to-asciidoc
2 |
3 | An HTML to Asciidoc converter written in javascript. Inspired from https://github.com/domchristie/to-markdown[to-markdown]
4 |
5 | == Installation
6 |
7 | Use directly `to-asciidoc.js` or install via `bower`
8 |
9 | [source,javascript]
10 | ----
11 | bower install to-asciidoc
12 | ----
13 |
14 | == Usage
15 |
16 | [source,javascript]
17 | ----
18 |
19 |
20 |
21 | ----
22 |
23 | == State
24 |
25 | This project currently developed and used under https://github.com/asciidocfx/AsciidocFX[AsciidocFX] project.
26 |
27 | == Licence
28 |
29 | MIT
30 |
--------------------------------------------------------------------------------
/bower.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "to-asciidoc",
3 | "main": "to-asciidoc.js",
4 | "version": "1.0.1",
5 | "homepage": "https://github.com/asciidocfx/to-asciidoc",
6 | "authors": [
7 | "rahmanusta@kodcu.com",
8 | "ozler.hakan@gmail.com"
9 | ],
10 | "description": "An HTML to Asciidoc converter written in JavaScript",
11 | "moduleType": [
12 | "globals",
13 | "node"
14 | ],
15 | "keywords": [
16 | "asciidoc",
17 | "html",
18 | "converter",
19 | "javascript"
20 | ],
21 | "license": "MIT",
22 | "ignore": [
23 | "**/.*",
24 | "node_modules",
25 | "bower_components",
26 | "test",
27 | "tests"
28 | ],
29 | "dependencies": {
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/to-asciidoc.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | HTML to Asciidoc Converter
6 |
7 |
8 |
13 |
14 |
15 |
16 |
17 | Convert
18 |
26 |
27 |
--------------------------------------------------------------------------------
/to-asciidoc.js:
--------------------------------------------------------------------------------
1 | /*
2 | * to-asciidoc - an HTML to Asciidoc converter
3 | *
4 | * Copyright 2015, asciidocfx
5 | * Licenced under the MIT licence
6 | *
7 | */
8 |
9 | var nbspRegex = new RegExp(String.fromCharCode(160), "g");
10 |
11 | var toAsciidoc = function (string) {
12 |
13 | string = string.replace(nbspRegex, " ");
14 |
15 | var all = document.createElement("div");
16 | all.innerHTML = string;
17 |
18 | // fix for apple converted space
19 | var spans = all.querySelectorAll("span.Apple-converted-space");
20 | for (var i = 0; i < spans.length; i++) {
21 | var parentNode = spans[i].parentNode || "";
22 | if (parentNode)
23 | parentNode.replaceChild(document.createTextNode(" "), spans[i]);
24 | }
25 |
26 | // crayon-syntax higlighter fix
27 | $(all).find("div[class*='crayon-syntax']").each(function () {
28 | var elem = $(this);
29 | elem.find(".crayon-line").append("\n");
30 | elem.find(".crayon-num").remove();
31 | var code = $("");
32 | code.append(elem.text());
33 | elem.replaceWith(code);
34 | });
35 |
36 |
37 | // google syntax higlighter fix
38 | $(all).find("div.syntaxhighlighter").each(function () {
39 | var elem = $(this);
40 | elem.find(".line").append("\n");
41 | elem.find(".number,.toolbar").remove();
42 | console.log(elem.text());
43 | var code = $("");
44 | code.append(elem.text());
45 | elem.replaceWith(code);
46 | });
47 |
48 | // table converter
49 | var tables = all.querySelectorAll("table");
50 | for (var i = 0; i < tables.length; i++) {
51 | var tableBoundary = "|====\n";
52 | var tableText = "";
53 | var table = tables[i];
54 | var trs = table.querySelectorAll("tr");
55 | var caption = table.querySelector("caption");
56 |
57 | tableText += "\n\n";
58 | if (caption)
59 | tableText += "." + caption.innerText.replace(/Table \d+\. /, "") + "\n";
60 | tableText += tableBoundary;
61 |
62 | for (var j = 0; j < trs.length; j++) {
63 | var tr = trs[j];
64 | var columns = tr.querySelectorAll("td");
65 | if (columns.length == 0)
66 | columns = tr.querySelectorAll("th");
67 | var row = [].slice.call(columns).map(function (e) {
68 | return "|" + (e.innerHTML ? traverse(e.innerHTML) : "");
69 | }).join(" ");
70 | tableText += row + "\n";
71 | }
72 |
73 | tableText += tableBoundary;
74 |
75 | if (table.parentNode)
76 | table.parentNode.replaceChild(document.createTextNode(tableText), table);
77 | }
78 |
79 |
80 | // fix pre > code block
81 | var codes = all.querySelectorAll("pre,code,pre>code");
82 | for (var i = 0; i < codes.length; i++) {
83 | var code = codes[i];
84 | if (code.innerHTML.split(/\n|\r| |<\/br>/).length > 1) {
85 | if (code.parentNode)
86 | code.parentNode.replaceChild(document.createTextNode("\n[source,java]\n----\n" + code.innerText + "\n----\n"), code);
87 | }
88 | }
89 |
90 | // remove anchor surrounding an img
91 | var images = all.querySelectorAll("img");
92 | for (var i = 0; i < images.length; i++) {
93 | var parentNode = images[i].parentNode || "";
94 | if (parentNode.parentNode)
95 | if (parentNode.constructor == HTMLAnchorElement)
96 | parentNode.parentNode.replaceChild(images[i], parentNode);
97 | }
98 | string = traverse(all.innerHTML);
99 |
100 | function traverse(string) {
101 | var ELEMENTS = [
102 | {
103 | patterns: ["script", "iframe", "meta","embed"],
104 | replacement: function (str, attrs, innerHTML) {
105 | return "";
106 | }
107 | },
108 | {
109 | patterns: ["div", "span", "body", "i", "section", "html"],
110 | replacement: function (str, attrs, innerHTML) {
111 | return innerHTML ? innerHTML : '';
112 | }
113 | },
114 | {
115 | patterns: 'p',
116 | replacement: function (str, attrs, innerHTML) {
117 | return innerHTML ? "\n" + innerHTML + "\n" : '';
118 | }
119 | },
120 | {
121 | patterns: 'br',
122 | type: 'void',
123 | replacement: ' \n'
124 | },
125 | {
126 | patterns: 'h([1-6])',
127 | replacement: function (str, hLevel, attrs, innerHTML) {
128 | var hPrefix = '';
129 | for (var i = 0; i < hLevel; i++) {
130 | hPrefix += '=';
131 | }
132 | return '\n\n' + hPrefix + ' ' + innerHTML + '\n';
133 | }
134 | },
135 | {
136 | patterns: 'hr',
137 | type: 'void',
138 | replacement: "\n\n'''\n"
139 | },
140 | {
141 | patterns: 'a',
142 | replacement: function (str, attrs, innerHTML) {
143 | var href = attrs.match(attrRegExp('href')),
144 | title = attrs.match(attrRegExp('title'));
145 |
146 | return href ? href[1] + '[' + innerHTML + ']' : '';
147 | //return href ? '[' + innerHTML + ']' + '(' + href[1] + (title && title[1] ? ' "' + title[1] + '"' : '') + ')' : str;
148 | }
149 | },
150 | {
151 | patterns: ['b', 'strong'],
152 | replacement: function (str, attrs, innerHTML) {
153 | return innerHTML ? '**' + innerHTML + '**' : '';
154 | }
155 | },
156 | {
157 | patterns: ['i', 'em'],
158 | replacement: function (str, attrs, innerHTML) {
159 | return innerHTML ? '__' + innerHTML + '__' : '';
160 | }
161 | },
162 | {
163 | patterns: 'sub',
164 | replacement: function (str, attrs, innerHTML) {
165 | return innerHTML ? '~' + innerHTML + '~' : '';
166 | }
167 | },
168 | {
169 | patterns: 'sup',
170 | replacement: function (str, attrs, innerHTML) {
171 | return innerHTML ? '^' + innerHTML + '^' : '';
172 | }
173 | },
174 | {
175 | patterns: 'u',
176 | replacement: function (str, attrs, innerHTML) {
177 | return innerHTML ? '[underline]#' + innerHTML + '#' : '';
178 | }
179 | },
180 | {
181 | patterns: 'del',
182 | replacement: function (str, attrs, innerHTML) {
183 | return innerHTML ? '[line-through]#' + innerHTML + '#' : '';
184 | }
185 | },
186 | {
187 | patterns: 'code',
188 | replacement: function (str, attrs, innerHTML) {
189 | return innerHTML ? '``' + innerHTML + '``' : '';
190 | }
191 | },
192 | {
193 | patterns: 'pre',
194 | replacement: function (str, attrs, innerHTML) {
195 | return innerHTML ? '\n\n----\n' + innerHTML + '\n----\n' : '';
196 | }
197 | },
198 | {
199 | patterns: 'img',
200 | type: 'void',
201 | replacement: function (str, attrs, innerHTML) {
202 | var src = attrs.match(attrRegExp('src')),
203 | alt = attrs.match(attrRegExp('alt')),
204 | title = attrs.match(attrRegExp('title'));
205 | return src ? '\nimage::' + src[1] + '[' + (alt && alt[1] ? alt[1] : '') + ']\n' : '';
206 | //return src ? '![' + (alt && alt[1] ? alt[1] : '') + ']' + '(' + src[1] + (title && title[1] ? ' "' + title[1] + '"' : '') + ')' : '';
207 | }
208 | }
209 | ];
210 |
211 | for (var i = 0, len = ELEMENTS.length; i < len; i++) {
212 | if (typeof ELEMENTS[i].patterns === 'string') {
213 | string = replaceEls(string, {
214 | tag: ELEMENTS[i].patterns,
215 | replacement: ELEMENTS[i].replacement,
216 | type: ELEMENTS[i].type
217 | });
218 | }
219 | else {
220 | for (var j = 0, pLen = ELEMENTS[i].patterns.length; j < pLen; j++) {
221 | string = replaceEls(string, {
222 | tag: ELEMENTS[i].patterns[j],
223 | replacement: ELEMENTS[i].replacement,
224 | type: ELEMENTS[i].type
225 | });
226 | }
227 | }
228 | }
229 |
230 | function replaceEls(html, elProperties) {
231 | var pattern = elProperties.type === 'void' ? '<' + elProperties.tag + '\\b([^>]*)\\/?>' : '<' + elProperties.tag + '\\b([^>]*)>([\\s\\S]*?)<\\/' + elProperties.tag + '>',
232 | regex = new RegExp(pattern, 'gi'),
233 | asciidoc = '';
234 | if (typeof elProperties.replacement === 'string') {
235 | asciidoc = html.replace(regex, elProperties.replacement);
236 | }
237 | else {
238 | asciidoc = html.replace(regex, function (str, p1, p2, p3) {
239 | return elProperties.replacement.call(this, str, p1, p2, p3);
240 | });
241 | }
242 | return asciidoc;
243 | }
244 |
245 | return string;
246 | }
247 |
248 | function strip(html) {
249 | html = html.replace(/<[\/]?(meta)[^><]*>/ig, "");
250 | html = html.replace(/<[\/]?(span)[^><]*>/ig, "");
251 | html = html.replace(/<[\/]?(div)[^><]*>/ig, "");
252 | html = html.replace(/<[\/]?(section)[^><]*>/ig, "");
253 | html = html.replace(/<[\/]?(i)[^><]*>/ig, "");
254 | html = html.replace(/<[\/]?(html)[^><]*>/ig, "");
255 | html = html.replace(/<[\/]?(body)[^><]*>/ig, "");
256 | html = html.replace(/(>)/ig, ">");
257 | html = html.replace(/(<)/ig, "<");
258 | html = html.replace(/(&)/ig, "&");
259 | html = html.replace(/(\u2014)/ig, "--");
260 | html = html.replace(/(\u2009)/ig, " ");
261 | return html;
262 | }
263 |
264 | function attrRegExp(attr) {
265 | return new RegExp(attr + '\\s*=\\s*["\']?([^"\']*)["\']?', 'i');
266 | }
267 |
268 | // Pre code blocks
269 |
270 | string = string.replace(/]*>`([\s\S]*?)`<\/pre>/gi, function (str, innerHTML) {
271 | var text = innerHTML;
272 | text = text.replace(/^\t+/g, ' '); // convert tabs to spaces (you know it makes sense)
273 | text = text.replace(/\n/g, '\n ');
274 | return '\n\n ' + text + '\n';
275 | });
276 |
277 | // Lists
278 |
279 | // Escape numbers that could trigger an ol
280 | // If there are more than three spaces before the code, it would be in a pre tag
281 | // Make sure we are escaping the period not matching any character
282 | string = string.replace(/^(\s{0,3}\d+)\. /g, '$1\\. ');
283 |
284 | // Converts lists that have no child lists (of same type) first, then works its way up
285 | var noChildrenRegex = /<(ul|ol)\b[^>]*>(?:(?! /gi;
286 | while (string.match(noChildrenRegex)) {
287 | string = string.replace(noChildrenRegex, function (str) {
288 | return replaceLists(str);
289 | });
290 | }
291 |
292 | function replaceLists(html) {
293 |
294 | html = html.replace(/<(ul|ol)\b[^>]*>([\s\S]*?)<\/\1>/gi, function (str, listType, innerHTML) {
295 | var lis = innerHTML.split('');
296 | lis.splice(lis.length - 1, 1);
297 |
298 | for (i = 0, len = lis.length; i < len; i++) {
299 | if (lis[i]) {
300 | var prefix = (listType === 'ol') ? (i + 1) + ". " : "* ";
301 | lis[i] = lis[i].replace(/\s*]*>([\s\S]*)/i, function (str, innerHTML) {
302 |
303 | innerHTML = innerHTML.replace(/^\s+/, '');
304 | innerHTML = innerHTML.replace(/\n\n/g, '\n\n ');
305 | // indent nested lists
306 | innerHTML = innerHTML.replace(/\n([ ]*)+(\*|\d+\.) /g, '\n$1 $2 ');
307 | return prefix + innerHTML;
308 | });
309 | }
310 | lis[i] = lis[i].replace(/(.) +$/m, '$1');
311 | }
312 | return lis.join('\n');
313 | });
314 |
315 | return '\n\n' + html.replace(/[ \t]+\n|\s+$/g, '');
316 | }
317 |
318 | // Blockquotes
319 | var deepest = /]*>((?:(?!/gi;
320 | while (string.match(deepest)) {
321 | string = string.replace(deepest, function (str) {
322 | return replaceBlockquotes(str);
323 | });
324 | }
325 |
326 | function replaceBlockquotes(html) {
327 | html = html.replace(/]*>([\s\S]*?)<\/blockquote>/gi, function (str, inner) {
328 | inner = inner.replace(/^\s+|\s+$/g, '');
329 | inner = cleanUp(inner);
330 | inner = inner.replace(/^/gm, '> ');
331 | inner = inner.replace(/^(>([ \t]{2,}>)+)/gm, '> >');
332 | return inner;
333 | });
334 | return html;
335 | }
336 |
337 | function cleanUp(string) {
338 | string = strip(string);
339 | string = string.replace(/^[\t\r\n]+|[\t\r\n]+$/g, ''); // trim leading/trailing whitespace
340 | string = string.replace(/\n\s+\n/g, '\n\n');
341 | string = string.replace(/\n{3,}/g, '\n\n'); // limit consecutive linebreaks to 2
342 | string = strip(string);
343 | return string;
344 | }
345 |
346 | return cleanUp(string);
347 | };
348 |
--------------------------------------------------------------------------------