├── .gitignore ├── .eslintrc ├── .travis.yml ├── LICENSE ├── package.json ├── test └── Parser.test.js ├── lib └── Parser.js └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | coverage -------------------------------------------------------------------------------- /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "env": { 3 | "node": true 4 | }, 5 | "rules": { 6 | "strict": 0, 7 | "curly": 0, 8 | "quotes": 0 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: node_js 3 | node_js: 4 | - "0.10" 5 | - "0.12" 6 | - "iojs" 7 | script: npm run travis 8 | 9 | before_install: 10 | - '[ "${TRAVIS_NODE_VERSION}" != "0.10" ] || npm install -g npm' 11 | 12 | after_success: cat ./coverage/lcov.info | node_modules/.bin/coveralls --verbose && rm -rf ./coverage 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2018 Tobias Koppers 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "fastparse", 3 | "version": "1.1.2", 4 | "description": "A very simple and stupid parser, based on a statemachine and regular expressions.", 5 | "main": "lib/Parser.js", 6 | "scripts": { 7 | "pretest": "npm run lint", 8 | "test": "mocha", 9 | "travis": "npm run cover -- --report lcovonly", 10 | "lint": "eslint lib", 11 | "precover": "npm run lint", 12 | "cover": "istanbul cover node_modules/mocha/bin/_mocha", 13 | "publish-patch": "mocha && npm version patch && git push && git push --tags && npm publish" 14 | }, 15 | "repository": { 16 | "type": "git", 17 | "url": "https://github.com/webpack/fastparse.git" 18 | }, 19 | "keywords": [ 20 | "parser", 21 | "regexp" 22 | ], 23 | "files": [ 24 | "lib" 25 | ], 26 | "author": "Tobias Koppers @sokra", 27 | "license": "MIT", 28 | "bugs": { 29 | "url": "https://github.com/webpack/fastparse/issues" 30 | }, 31 | "homepage": "https://github.com/webpack/fastparse", 32 | "devDependencies": { 33 | "coveralls": "^2.11.2", 34 | "eslint": "^0.21.2", 35 | "istanbul": "^0.3.14", 36 | "mocha": "^2.2.5", 37 | "should": "^6.0.3" 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /test/Parser.test.js: -------------------------------------------------------------------------------- 1 | /*globals describe it */ 2 | 3 | require("should"); 4 | var Parser = require("../"); 5 | 6 | var testdata = [ 7 | { 8 | name: "simple string", 9 | states: { 10 | "start": { 11 | "[d-gm-rv]+": function(match, index) { 12 | if(!this.data) this.data = []; 13 | this.data.push({ 14 | match: match, 15 | index: index 16 | }); 17 | } 18 | } 19 | }, 20 | string: "abcdefghijklmnopqrstuvwxyz", 21 | expected: { 22 | data: [ 23 | { match: "defg", index: 3 }, 24 | { match: "mnopqr", index: 12 }, 25 | { match: "v", index: 21 } 26 | ] 27 | } 28 | }, 29 | { 30 | name: "state switing", 31 | states: { 32 | "number": { 33 | "([0-9]+)": function(match, number) { 34 | if(!this.data) this.data = {}; 35 | this.data[this.ident] = +number; 36 | delete this.ident; 37 | return "start"; 38 | }, 39 | "-\\?": true, 40 | "\\?": "start" 41 | }, 42 | "start": { 43 | "([a-z]+)": function(match, name) { 44 | this.ident = name; 45 | return "number"; 46 | } 47 | } 48 | }, 49 | string: "a 1 b 2 c f 3 d ? e -? 4", 50 | expected: { 51 | data: { 52 | a: 1, b: 2, c: 3, e: 4 53 | } 54 | } 55 | }, 56 | { 57 | name: "state array", 58 | states: { 59 | "start": [ 60 | { "a": function() { this.a = true; } }, 61 | { 62 | "b": function() { this.b = true; }, 63 | "c": function() { this.c = true; } 64 | } 65 | ] 66 | }, 67 | string: "hello abc", 68 | expected: { 69 | a: true, b: true, c: true 70 | } 71 | }, 72 | { 73 | name: "reference other states", 74 | states: { 75 | "start": [ 76 | { "a": function() { this.a = true; } }, 77 | "bc" 78 | ], 79 | "bc": { 80 | "b": function() { this.b = true; }, 81 | "c": function() { this.c = true; } 82 | } 83 | }, 84 | string: "hello abc", 85 | expected: { 86 | a: true, b: true, c: true 87 | } 88 | } 89 | ]; 90 | 91 | describe("Parser", function() { 92 | testdata.forEach(function(testcase) { 93 | it("should parse " + testcase.name, function() { 94 | var parser = new Parser(testcase.states); 95 | var actual = parser.parse("start", testcase.string, {}); 96 | actual.should.be.eql(testcase.expected); 97 | }); 98 | }); 99 | 100 | it("should default context to empty object", function() { 101 | var parser = new Parser({ 102 | "a": { 103 | "a": function() { 104 | this.should.be.eql({}); 105 | } 106 | } 107 | }); 108 | var result = parser.parse("a", "a"); 109 | result.should.be.eql({}); 110 | }); 111 | 112 | it("should error for unexpected format", function() { 113 | (function() { 114 | var parser = new Parser({ 115 | "a": 123 116 | }); 117 | return parser; 118 | }).should.throw(); 119 | }); 120 | 121 | it("should error for not existing state", function() { 122 | var parser = new Parser({ 123 | "a": { 124 | "a": "b" 125 | } 126 | }); 127 | (function() { 128 | return parser.parse("a", "a"); 129 | }).should.throw(); 130 | }); 131 | }); 132 | -------------------------------------------------------------------------------- /lib/Parser.js: -------------------------------------------------------------------------------- 1 | /* 2 | MIT License http://www.opensource.org/licenses/mit-license.php 3 | Author Tobias Koppers @sokra 4 | */ 5 | 6 | function ignoreFunction() {} 7 | 8 | function createReturningFunction(value) { 9 | return function() { 10 | return value; 11 | }; 12 | } 13 | 14 | function Parser(states) { 15 | this.states = this.compileStates(states); 16 | } 17 | 18 | Parser.prototype.compileStates = function(states) { 19 | var result = {}; 20 | Object.keys(states).forEach(function(name) { 21 | result[name] = this.compileState(states[name], states); 22 | }, this); 23 | return result; 24 | }; 25 | 26 | Parser.prototype.compileState = function(state, states) { 27 | var regExps = []; 28 | function iterator(str, value) { 29 | regExps.push({ 30 | groups: Parser.getGroupCount(str), 31 | regExp: str, 32 | value: value 33 | }); 34 | } 35 | function processState(statePart) { 36 | if(Array.isArray(statePart)) { 37 | statePart.forEach(processState); 38 | } else if(typeof statePart === "object") { 39 | Object.keys(statePart).forEach(function(key) { 40 | iterator(key, statePart[key]); 41 | }); 42 | } else if(typeof statePart === "string") { 43 | processState(states[statePart]); 44 | } else { 45 | throw new Error("Unexpected 'state' format"); 46 | } 47 | } 48 | processState(state); 49 | var total = regExps.map(function(r) { 50 | return "(" + r.regExp + ")"; 51 | }).join("|"); 52 | var actions = []; 53 | var pos = 1; 54 | regExps.forEach(function(r) { 55 | var fn; 56 | if(typeof r.value === "function") { 57 | fn = r.value; 58 | } else if(typeof r.value === "string") { 59 | fn = createReturningFunction(r.value); 60 | } else { 61 | fn = ignoreFunction; 62 | } 63 | actions.push({ 64 | name: r.regExp, 65 | fn: fn, 66 | pos: pos, 67 | pos2: pos + r.groups + 1 68 | }); 69 | pos += r.groups + 1; 70 | }); 71 | return { 72 | regExp: new RegExp(total, "g"), 73 | actions: actions 74 | }; 75 | }; 76 | 77 | Parser.getGroupCount = function(regExpStr) { 78 | return new RegExp("(" + regExpStr + ")|^$").exec("").length - 2; 79 | }; 80 | 81 | Parser.prototype.parse = function(initialState, string, context) { 82 | context = context || {}; 83 | var currentState = initialState; 84 | var currentIndex = 0; 85 | for(;;) { 86 | var state = this.states[currentState]; 87 | var regExp = state.regExp; 88 | regExp.lastIndex = currentIndex; 89 | var match = regExp.exec(string); 90 | if(!match) return context; 91 | var actions = state.actions; 92 | currentIndex = state.regExp.lastIndex; 93 | for(var i = 0; i < actions.length; i++) { 94 | var action = actions[i]; 95 | if(match[action.pos]) { 96 | var ret = action.fn.apply(context, Array.prototype.slice.call(match, action.pos, action.pos2).concat([state.regExp.lastIndex - match[0].length, match[0].length])); 97 | if(ret) { 98 | if(!(ret in this.states)) 99 | throw new Error("State '" + ret + "' doesn't exist"); 100 | currentState = ret; 101 | } 102 | break; 103 | } 104 | } 105 | } 106 | }; 107 | 108 | module.exports = Parser; 109 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fastparse 2 | 3 | A very simple and stupid parser, based on a statemachine and regular expressions. 4 | 5 | It's not intended for complex languages. It's intended to easily write a simple parser for a simple language. 6 | 7 | 8 | 9 | ## Usage 10 | 11 | Pass a description of statemachine to the constructor. The description must be in this form: 12 | 13 | ``` javascript 14 | new Parser(description) 15 | 16 | description is { 17 | // The key is the name of the state 18 | // The value is an object containing possible transitions 19 | "state-name": { 20 | // The key is a regular expression 21 | // If the regular expression matches the transition is executed 22 | // The value can be "true", a other state name or a function 23 | 24 | "a": true, 25 | // true will make the parser stay in the current state 26 | 27 | "b": "other-state-name", 28 | // a string will make the parser transit to a new state 29 | 30 | "[cde]": function(match, index, matchLength) { 31 | // "match" will be the matched string 32 | // "index" will be the position in the complete string 33 | // "matchLength" will be "match.length" 34 | 35 | // "this" will be the "context" passed to the "parse" method" 36 | 37 | // A new state name (string) can be returned 38 | return "other-state-name"; 39 | }, 40 | 41 | "([0-9]+)(\\.[0-9]+)?": function(match, first, second, index, matchLength) { 42 | // groups can be used in the regular expression 43 | // they will match to arguments "first", "second" 44 | }, 45 | 46 | // the parser stops when it cannot match the string anymore 47 | 48 | // order of keys is the order in which regular expressions are matched 49 | // if the javascript runtime preserves the order of keys in an object 50 | // (this is not standardized, but it's a de-facto standard) 51 | } 52 | } 53 | ``` 54 | 55 | The statemachine is compiled down to a single regular expression per state. So basically the parsing work is delegated to the (native) regular expression logic of the javascript runtime. 56 | 57 | 58 | ``` javascript 59 | Parser.prototype.parse(initialState: String, parsedString: String, context: Object) 60 | ``` 61 | 62 | `initialState`: state where the parser starts to parse. 63 | 64 | `parsedString`: the string which should be parsed. 65 | 66 | `context`: an object which can be used to save state and results. Available as `this` in transition functions. 67 | 68 | returns `context` 69 | 70 | 71 | 72 | 73 | ## Example 74 | 75 | ``` javascript 76 | var Parser = require("fastparse"); 77 | 78 | // A simple parser that extracts @licence ... from comments in a JS file 79 | var parser = new Parser({ 80 | // The "source" state 81 | "source": { 82 | // matches comment start 83 | "/\\*": "comment", 84 | "//": "linecomment", 85 | 86 | // this would be necessary for a complex language like JS 87 | // but omitted here for simplicity 88 | // "\"": "string1", 89 | // "\'": "string2", 90 | // "\/": "regexp" 91 | 92 | }, 93 | // The "comment" state 94 | "comment": { 95 | "\\*/": "source", 96 | "@licen[cs]e\\s((?:[^*\n]|\\*+[^*/\n])*)": function(match, licenseText) { 97 | this.licences.push(licenseText.trim()); 98 | } 99 | }, 100 | // The "linecomment" state 101 | "linecomment": { 102 | "\n": "source", 103 | "@licen[cs]e\\s(.*)": function(match, licenseText) { 104 | this.licences.push(licenseText.trim()); 105 | } 106 | } 107 | }); 108 | 109 | var licences = parser.parse("source", sourceCode, { licences: [] }).licences; 110 | 111 | console.log(licences); 112 | ``` 113 | 114 | 115 | 116 | ## License 117 | 118 | MIT (http://www.opensource.org/licenses/mit-license.php) 119 | --------------------------------------------------------------------------------