├── .gitignore ├── README.md └── src ├── htmlParser.js └── index.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | npm*.lock 3 | yarn.lock 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | tiny browser -------------------------------------------------------------------------------- /src/htmlParser.js: -------------------------------------------------------------------------------- 1 | const startTagReg = /^<([a-zA-Z0-9\-]+)(?:([ ]+[a-zA-Z0-9\-]+=[^> ]+))*>/; 2 | const attributeReg = /^(?:[ ]+([a-zA-Z0-9\-]+=[^> ]+))/; 3 | const endTagReg = /^<\/([a-zA-Z0-9\-]+)>/; 4 | const commentReg = /^)]*\-\->/; 5 | const docTypeReg = /^]+>/; 6 | 7 | function parse(html, options) { 8 | function advance(num) { 9 | html = html.slice(num); 10 | } 11 | 12 | while(html){ 13 | if(html.startsWith('<')) { 14 | const commentMatch = html.match(commentReg); 15 | if (commentMatch) { 16 | options.onComment({ 17 | type: 'comment', 18 | value: commentMatch[0] 19 | }) 20 | advance(commentMatch[0].length); 21 | continue; 22 | } 23 | 24 | const docTypeMatch = html.match(docTypeReg); 25 | if (docTypeMatch) { 26 | options.onDoctype({ 27 | type: 'docType', 28 | value: docTypeMatch[0] 29 | }); 30 | advance(docTypeMatch[0].length); 31 | continue; 32 | } 33 | 34 | const endTagMatch = html.match(endTagReg); 35 | if (endTagMatch) { 36 | options.onEndTag({ 37 | type: 'tagEnd', 38 | value: endTagMatch[1] 39 | }); 40 | advance(endTagMatch[0].length); 41 | continue; 42 | } 43 | 44 | const startTagMatch = html.match(startTagReg); 45 | if(startTagMatch) { 46 | options.onStartTag({ 47 | type: 'tagStart', 48 | value: startTagMatch[1] 49 | }); 50 | 51 | advance(startTagMatch[1].length + 1); 52 | let attributeMath; 53 | while(attributeMath = html.match(attributeReg)) { 54 | options.onAttribute({ 55 | type: 'attribute', 56 | value: attributeMath[1] 57 | }); 58 | advance(attributeMath[0].length); 59 | } 60 | advance(1); 61 | continue; 62 | } 63 | } else { 64 | let textEndIndex = html.indexOf('<'); 65 | options.onText({ 66 | type: 'text', 67 | value: html.slice(0, textEndIndex) 68 | }); 69 | textEndIndex = textEndIndex === -1 ? html.length: textEndIndex; 70 | advance(textEndIndex); 71 | } 72 | } 73 | } 74 | 75 | module.exports = function htmlParser(str) { 76 | const ast = { 77 | children: [] 78 | }; 79 | let curParent = ast; 80 | let prevParent = null; 81 | const domTree = parse(str,{ 82 | onComment(node) { 83 | }, 84 | onStartTag(token) { 85 | const tag = { 86 | tagName: token.value, 87 | attributes: [], 88 | text: '', 89 | children: [] 90 | }; 91 | curParent.children.push(tag); 92 | prevParent = curParent; 93 | curParent = tag; 94 | }, 95 | onAttribute(token) { 96 | const [ name, value ] = token.value.split('='); 97 | curParent.attributes.push({ 98 | name, 99 | value: value.replace(/^['"]/, '').replace(/['"]$/, '') 100 | }); 101 | }, 102 | onEndTag(token) { 103 | curParent = prevParent; 104 | }, 105 | onDoctype(token) { 106 | }, 107 | onText(token) { 108 | curParent.text = token.value; 109 | } 110 | }); 111 | return ast.children[0]; 112 | } -------------------------------------------------------------------------------- /src/index.js: -------------------------------------------------------------------------------- 1 | const htmlParser = require('./htmlParser'); 2 | 3 | const domTree = htmlParser(` 4 | 5 | 6 |

7 | 8 | 9 |

10 |

11 |

box1 box1 box1

12 |

13 |

14 |

box2 box2 box2

15 |

16 |

17 |

18 | 19 | `); 20 | 21 | console.log(JSON.stringify(domTree, null, 4)); 22 | --------------------------------------------------------------------------------