├── .gitignore ├── Readme.md ├── index.js ├── package.json └── test.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # Blocktree 2 | 3 | Back to the basics, Hickey-inspired, generic text parser that spits out an [Abstract Syntax Tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) that you can operate on. 4 | 5 | ## Example 6 | 7 | ```js 8 | let parse = require('blocktree') 9 | let str = 'a<1,2,3>4>hi' 10 | 11 | let ast = parse(str, { 12 | marker: ',', 13 | open: '<', 14 | close: '>', 15 | }) 16 | 17 | { 18 | type: 'document', 19 | children: [ 20 | { type: 'text', value: 'a' }, 21 | { 22 | type: 'block', 23 | children: [ 24 | { type: 'text', value: '1' }, 25 | ... 26 | ] 27 | } 28 | ] 29 | } 30 | ``` 31 | 32 | ## Installation 33 | 34 | ```js 35 | npm install blocktree 36 | ``` 37 | 38 | ## Concepts 39 | 40 | All languages can be boiled down to 2 concepts: **markers** & **blocks**. Here's some examples: 41 | 42 | - Loops, conditionals are all just **blocks** 43 | - Variables, Binary operators are just **markers** 44 | - Arrays and objects are just a group of **markers** 45 | 46 | Another analogy you can draw is where your cursor is in a document is a **marker** and when you make a selection, that's a **block**. 47 | 48 | Given **markers** and **blocks**, you can implement pretty much anything. 49 | 50 | Here's a basic HTML parser: 51 | 52 | ```js 53 | let html = '

hiok

' 54 | ast = Tree(html, { 55 | marker: /<(\w+)\/>/, 56 | open: /<(\w+)>/, 57 | close: /<\/(\w+)>/, 58 | }) 59 | ``` 60 | 61 | For complex languages, you may want a more expressive AST, but for DSLs and micro-languages, this is a nice constraint. 62 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Module Dependencies 3 | */ 4 | 5 | let esc = require('escape-regexp') 6 | 7 | /** 8 | * Export 9 | */ 10 | 11 | module.exports = ast 12 | 13 | /** 14 | * AST 15 | */ 16 | 17 | function ast (str, rules) { 18 | rules = prepare(rules) 19 | let toks = tokens(str, rules) 20 | return { 21 | type: 'document', 22 | children: children(toks) 23 | } 24 | } 25 | 26 | /** 27 | * Children 28 | */ 29 | 30 | function children (tokens) { 31 | let in_tag = false 32 | let token = null 33 | let ast = [] 34 | 35 | while (tokens.length) { 36 | token = tokens.shift() 37 | if (token.type === 'text') { 38 | ast.push(text(token)) 39 | } else if (token.type === 'open') { 40 | ast.push(block(tokens, token)) 41 | } else if (token.type === 'marker') { 42 | ast.push(marker(token)) 43 | } else { 44 | throw SyntaxError(`unexpected token "${token.value}", expected opening tag, opening block or marker`) 45 | } 46 | } 47 | return ast 48 | } 49 | 50 | /** 51 | * Marker node 52 | */ 53 | 54 | function marker (token) { 55 | return token 56 | } 57 | 58 | /** 59 | * Text node 60 | */ 61 | 62 | function text (token) { 63 | return token 64 | } 65 | 66 | /** 67 | * Block node 68 | */ 69 | 70 | function block (tokens, open) { 71 | let token = null 72 | 73 | let out = { 74 | type: 'block', 75 | children: [] 76 | } 77 | 78 | if (open.params) { 79 | out.open = open.params 80 | } 81 | 82 | while (tokens.length) { 83 | token = tokens.shift() 84 | if (token.type === 'text') { 85 | out.children.push(text(token)) 86 | } else if (token.type === 'marker') { 87 | out.children.push(marker(token)) 88 | } else if (token.type === 'open') { 89 | out.children.push(block(tokens, token)) 90 | } else if (token.type === 'close') { 91 | if (token.params) out.close = token.params 92 | return out 93 | } else { 94 | throw new SyntaxError(`unexpected token "${token.value}"`) 95 | } 96 | } 97 | 98 | return out 99 | } 100 | 101 | /** 102 | * Tokenizer 103 | */ 104 | 105 | function tokens (str, rules) { 106 | let match = null 107 | let offset = 0 108 | let toks = [] 109 | let buf = [] 110 | 111 | while (str.length) { 112 | let match = null 113 | for (let rule of rules) { 114 | match = str.match(rule.pattern) 115 | if (match) { 116 | if (buf.length) { 117 | toks.push({ type: 'text', value: buf.join('') }) 118 | buf = [] 119 | } 120 | 121 | if (match.length > 1) { 122 | toks.push({ type: rule.name, value: match[0], params: match.slice(1) }) 123 | } else { 124 | toks.push({ type: rule.name, value: match[0] }) 125 | } 126 | 127 | str = str.slice(match[0].length) 128 | break 129 | } 130 | } 131 | 132 | if (!match) { 133 | buf.push(str[0]) 134 | str = str.slice(1) 135 | } 136 | } 137 | 138 | // push the last remaining 139 | if (buf.length) { 140 | toks.push({ type: 'text', value: buf.join('') }) 141 | } 142 | 143 | return toks 144 | } 145 | 146 | /** 147 | * Prepare the rules 148 | */ 149 | 150 | function prepare (rules) { 151 | return Object.keys(rules).map(function (label) { 152 | let rule = rules[label] 153 | if (rule.source) { 154 | return { 155 | name: label, 156 | pattern: new RegExp('^' + rule.source, rule.flags.replace('g', '')) 157 | } 158 | } else if (typeof rule === 'string') { 159 | return { 160 | name: label, 161 | pattern: new RegExp('^' + esc(rule)) 162 | } 163 | } else if (Array.isArray(rule)) { 164 | return { 165 | name: label, 166 | pattern: new RegExp(`^(${rule.map(r => esc(r)).join('|')})`) 167 | } 168 | } else { 169 | throw new Error(`rule must be either a string, regexp or an array of strings`) 170 | } 171 | }) 172 | } 173 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "blocktree", 3 | "version": "1.0.0", 4 | "description": "Back to the basics, Clojure-inspired, generic text parser", 5 | "main": "index.js", 6 | "dependencies": { 7 | "escape-regexp": "0.0.1" 8 | }, 9 | "scripts": { 10 | "test": "make test" 11 | }, 12 | "keywords": [ 13 | "generic", 14 | "ast", 15 | "text", 16 | "transform" 17 | ], 18 | "author": "Matthew Mueller", 19 | "license": "MIT" 20 | } 21 | -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- 1 | let Tree = require('./index') 2 | 3 | let str = 'a<1,2,3>4>hi' 4 | 5 | let ast = Tree(str, { 6 | marker: ',', 7 | open: '<', 8 | close: '>', 9 | }) 10 | 11 | console.dir(ast, { colors: true, depth: Infinity }) 12 | 13 | let html = '

hiok

' 14 | ast = Tree(html, { 15 | marker: /<(\w+)\/>/, 16 | open: /<(\w+)>/, 17 | close: /<\/(\w+)>/, 18 | }) 19 | 20 | console.dir(ast, { colors: true, depth: Infinity }) 21 | --------------------------------------------------------------------------------