├── .gitignore
├── .eslintrc
├── .travis.yml
├── LICENSE
├── package.json
├── test
    └── Parser.test.js
├── lib
    └── Parser.js
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | coverage


--------------------------------------------------------------------------------
/.eslintrc:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"env": {
 3 | 		"node": true
 4 | 	},
 5 | 	"rules": {
 6 | 		"strict": 0,
 7 | 		"curly": 0,
 8 | 		"quotes": 0
 9 | 	}
10 | }
11 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: false
 2 | language: node_js
 3 | node_js:
 4 |   - "0.10"
 5 |   - "0.12"
 6 |   - "iojs"
 7 | script: npm run travis
 8 | 
 9 | before_install:
10 |   - '[ "${TRAVIS_NODE_VERSION}" != "0.10" ] || npm install -g npm'
11 | 
12 | after_success: cat ./coverage/lcov.info | node_modules/.bin/coveralls --verbose && rm -rf ./coverage
13 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2018 Tobias Koppers
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "fastparse",
 3 |   "version": "1.1.2",
 4 |   "description": "A very simple and stupid parser, based on a statemachine and regular expressions.",
 5 |   "main": "lib/Parser.js",
 6 |   "scripts": {
 7 |     "pretest": "npm run lint",
 8 |     "test": "mocha",
 9 |     "travis": "npm run cover -- --report lcovonly",
10 |     "lint": "eslint lib",
11 |     "precover": "npm run lint",
12 |     "cover": "istanbul cover node_modules/mocha/bin/_mocha",
13 |     "publish-patch": "mocha && npm version patch && git push && git push --tags && npm publish"
14 |   },
15 |   "repository": {
16 |     "type": "git",
17 |     "url": "https://github.com/webpack/fastparse.git"
18 |   },
19 |   "keywords": [
20 |     "parser",
21 |     "regexp"
22 |   ],
23 |   "files": [
24 |     "lib"
25 |   ],
26 |   "author": "Tobias Koppers @sokra",
27 |   "license": "MIT",
28 |   "bugs": {
29 |     "url": "https://github.com/webpack/fastparse/issues"
30 |   },
31 |   "homepage": "https://github.com/webpack/fastparse",
32 |   "devDependencies": {
33 |     "coveralls": "^2.11.2",
34 |     "eslint": "^0.21.2",
35 |     "istanbul": "^0.3.14",
36 |     "mocha": "^2.2.5",
37 |     "should": "^6.0.3"
38 |   }
39 | }
40 | 


--------------------------------------------------------------------------------
/test/Parser.test.js:
--------------------------------------------------------------------------------
  1 | /*globals describe it */
  2 | 
  3 | require("should");
  4 | var Parser = require("../");
  5 | 
  6 | var testdata = [
  7 | 	{
  8 | 		name: "simple string",
  9 | 		states: {
 10 | 			"start": {
 11 | 				"[d-gm-rv]+": function(match, index) {
 12 | 					if(!this.data) this.data = [];
 13 | 					this.data.push({
 14 | 						match: match,
 15 | 						index: index
 16 | 					});
 17 | 				}
 18 | 			}
 19 | 		},
 20 | 		string: "abcdefghijklmnopqrstuvwxyz",
 21 | 		expected: {
 22 | 			data: [
 23 | 				{ match: "defg", index: 3 },
 24 | 				{ match: "mnopqr", index: 12 },
 25 | 				{ match: "v", index: 21 }
 26 | 			]
 27 | 		}
 28 | 	},
 29 | 	{
 30 | 		name: "state switing",
 31 | 		states: {
 32 | 			"number": {
 33 | 				"([0-9]+)": function(match, number) {
 34 | 					if(!this.data) this.data = {};
 35 | 					this.data[this.ident] = +number;
 36 | 					delete this.ident;
 37 | 					return "start";
 38 | 				},
 39 | 				"-\\?": true,
 40 | 				"\\?": "start"
 41 | 			},
 42 | 			"start": {
 43 | 				"([a-z]+)": function(match, name) {
 44 | 					this.ident = name;
 45 | 					return "number";
 46 | 				}
 47 | 			}
 48 | 		},
 49 | 		string: "a 1 b 2 c f 3 d ? e -? 4",
 50 | 		expected: {
 51 | 			data: {
 52 | 				a: 1, b: 2, c: 3, e: 4
 53 | 			}
 54 | 		}
 55 | 	},
 56 | 	{
 57 | 		name: "state array",
 58 | 		states: {
 59 | 			"start": [
 60 | 				{ "a": function() { this.a = true; } },
 61 | 				{
 62 | 					"b": function() { this.b = true; },
 63 | 					"c": function() { this.c = true; }
 64 | 				}
 65 | 			]
 66 | 		},
 67 | 		string: "hello abc",
 68 | 		expected: {
 69 | 			a: true, b: true, c: true
 70 | 		}
 71 | 	},
 72 | 	{
 73 | 		name: "reference other states",
 74 | 		states: {
 75 | 			"start": [
 76 | 				{ "a": function() { this.a = true; } },
 77 | 				"bc"
 78 | 			],
 79 | 			"bc": {
 80 | 				"b": function() { this.b = true; },
 81 | 				"c": function() { this.c = true; }
 82 | 			}
 83 | 		},
 84 | 		string: "hello abc",
 85 | 		expected: {
 86 | 			a: true, b: true, c: true
 87 | 		}
 88 | 	}
 89 | ];
 90 | 
 91 | describe("Parser", function() {
 92 | 	testdata.forEach(function(testcase) {
 93 | 		it("should parse " + testcase.name, function() {
 94 | 			var parser = new Parser(testcase.states);
 95 | 			var actual = parser.parse("start", testcase.string, {});
 96 | 			actual.should.be.eql(testcase.expected);
 97 | 		});
 98 | 	});
 99 | 
100 | 	it("should default context to empty object", function() {
101 | 		var parser = new Parser({
102 | 			"a": {
103 | 				"a": function() {
104 | 					this.should.be.eql({});
105 | 				}
106 | 			}
107 | 		});
108 | 		var result = parser.parse("a", "a");
109 | 		result.should.be.eql({});
110 | 	});
111 | 
112 | 	it("should error for unexpected format", function() {
113 | 		(function() {
114 | 			var parser = new Parser({
115 | 				"a": 123
116 | 			});
117 | 			return parser;
118 | 		}).should.throw();
119 | 	});
120 | 
121 | 	it("should error for not existing state", function() {
122 | 		var parser = new Parser({
123 | 			"a": {
124 | 				"a": "b"
125 | 			}
126 | 		});
127 | 		(function() {
128 | 			return parser.parse("a", "a");
129 | 		}).should.throw();
130 | 	});
131 | });
132 | 


--------------------------------------------------------------------------------
/lib/Parser.js:
--------------------------------------------------------------------------------
  1 | /*
  2 | 	MIT License http://www.opensource.org/licenses/mit-license.php
  3 | 	Author Tobias Koppers @sokra
  4 | */
  5 | 
  6 | function ignoreFunction() {}
  7 | 
  8 | function createReturningFunction(value) {
  9 | 	return function() {
 10 | 		return value;
 11 | 	};
 12 | }
 13 | 
 14 | function Parser(states) {
 15 | 	this.states = this.compileStates(states);
 16 | }
 17 | 
 18 | Parser.prototype.compileStates = function(states) {
 19 | 	var result = {};
 20 | 	Object.keys(states).forEach(function(name) {
 21 | 		result[name] = this.compileState(states[name], states);
 22 | 	}, this);
 23 | 	return result;
 24 | };
 25 | 
 26 | Parser.prototype.compileState = function(state, states) {
 27 | 	var regExps = [];
 28 | 	function iterator(str, value) {
 29 | 		regExps.push({
 30 | 			groups: Parser.getGroupCount(str),
 31 | 			regExp: str,
 32 | 			value: value
 33 | 		});
 34 | 	}
 35 | 	function processState(statePart) {
 36 | 		if(Array.isArray(statePart)) {
 37 | 			statePart.forEach(processState);
 38 | 		} else if(typeof statePart === "object") {
 39 | 			Object.keys(statePart).forEach(function(key) {
 40 | 				iterator(key, statePart[key]);
 41 | 			});
 42 | 		} else if(typeof statePart === "string") {
 43 | 			processState(states[statePart]);
 44 | 		} else {
 45 | 			throw new Error("Unexpected 'state' format");
 46 | 		}
 47 | 	}
 48 | 	processState(state);
 49 | 	var total = regExps.map(function(r) {
 50 | 		return "(" + r.regExp + ")";
 51 | 	}).join("|");
 52 | 	var actions = [];
 53 | 	var pos = 1;
 54 | 	regExps.forEach(function(r) {
 55 | 		var fn;
 56 | 		if(typeof r.value === "function") {
 57 | 			fn = r.value;
 58 | 		} else if(typeof r.value === "string") {
 59 | 			fn = createReturningFunction(r.value);
 60 | 		} else {
 61 | 			fn = ignoreFunction;
 62 | 		}
 63 | 		actions.push({
 64 | 			name: r.regExp,
 65 | 			fn: fn,
 66 | 			pos: pos,
 67 | 			pos2: pos + r.groups + 1
 68 | 		});
 69 | 		pos += r.groups + 1;
 70 | 	});
 71 | 	return {
 72 | 		regExp: new RegExp(total, "g"),
 73 | 		actions: actions
 74 | 	};
 75 | };
 76 | 
 77 | Parser.getGroupCount = function(regExpStr) {
 78 | 	return new RegExp("(" + regExpStr + ")|^$").exec("").length - 2;
 79 | };
 80 | 
 81 | Parser.prototype.parse = function(initialState, string, context) {
 82 | 	context = context || {};
 83 | 	var currentState = initialState;
 84 | 	var currentIndex = 0;
 85 | 	for(;;) {
 86 | 		var state = this.states[currentState];
 87 | 		var regExp = state.regExp;
 88 | 		regExp.lastIndex = currentIndex;
 89 | 		var match = regExp.exec(string);
 90 | 		if(!match) return context;
 91 | 		var actions = state.actions;
 92 | 		currentIndex = state.regExp.lastIndex;
 93 | 		for(var i = 0; i < actions.length; i++) {
 94 | 			var action = actions[i];
 95 | 			if(match[action.pos]) {
 96 | 				var ret = action.fn.apply(context, Array.prototype.slice.call(match, action.pos, action.pos2).concat([state.regExp.lastIndex - match[0].length, match[0].length]));
 97 | 				if(ret) {
 98 | 					if(!(ret in this.states))
 99 | 						throw new Error("State '" + ret + "' doesn't exist");
100 | 					currentState = ret;
101 | 				}
102 | 				break;
103 | 			}
104 | 		}
105 | 	}
106 | };
107 | 
108 | module.exports = Parser;
109 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # fastparse
  2 | 
  3 | A very simple and stupid parser, based on a statemachine and regular expressions.
  4 | 
  5 | It's not intended for complex languages. It's intended to easily write a simple parser for a simple language.
  6 | 
  7 | 
  8 | 
  9 | ## Usage
 10 | 
 11 | Pass a description of statemachine to the constructor. The description must be in this form:
 12 | 
 13 | ``` javascript
 14 | new Parser(description)
 15 | 
 16 | description is {
 17 | 	// The key is the name of the state
 18 | 	// The value is an object containing possible transitions
 19 | 	"state-name": {
 20 | 		// The key is a regular expression
 21 | 		// If the regular expression matches the transition is executed
 22 | 		// The value can be "true", a other state name or a function
 23 | 
 24 | 		"a": true,
 25 | 		// true will make the parser stay in the current state
 26 | 		
 27 | 		"b": "other-state-name",
 28 | 		// a string will make the parser transit to a new state
 29 | 		
 30 | 		"[cde]": function(match, index, matchLength) {
 31 | 			// "match" will be the matched string
 32 | 			// "index" will be the position in the complete string
 33 | 			// "matchLength" will be "match.length"
 34 | 			
 35 | 			// "this" will be the "context" passed to the "parse" method"
 36 | 			
 37 | 			// A new state name (string) can be returned
 38 | 			return "other-state-name";
 39 | 		},
 40 | 		
 41 | 		"([0-9]+)(\\.[0-9]+)?": function(match, first, second, index, matchLength) {
 42 | 			// groups can be used in the regular expression
 43 | 			// they will match to arguments "first", "second"
 44 | 		},
 45 | 		
 46 | 		// the parser stops when it cannot match the string anymore
 47 | 		
 48 | 		// order of keys is the order in which regular expressions are matched
 49 | 		// if the javascript runtime preserves the order of keys in an object
 50 | 		// (this is not standardized, but it's a de-facto standard)
 51 | 	}
 52 | }
 53 | ```
 54 | 
 55 | The statemachine is compiled down to a single regular expression per state. So basically the parsing work is delegated to the (native) regular expression logic of the javascript runtime.
 56 | 
 57 | 
 58 | ``` javascript
 59 | Parser.prototype.parse(initialState: String, parsedString: String, context: Object)
 60 | ```
 61 | 
 62 | `initialState`: state where the parser starts to parse.
 63 | 
 64 | `parsedString`: the string which should be parsed.
 65 | 
 66 | `context`: an object which can be used to save state and results. Available as `this` in transition functions.
 67 | 
 68 | returns `context`
 69 | 
 70 | 
 71 | 
 72 | 
 73 | ## Example
 74 | 
 75 | ``` javascript
 76 | var Parser = require("fastparse");
 77 | 
 78 | // A simple parser that extracts @licence ... from comments in a JS file
 79 | var parser = new Parser({
 80 | 	// The "source" state
 81 | 	"source": {
 82 | 		// matches comment start
 83 | 		"/\\*": "comment",
 84 | 		"//": "linecomment",
 85 | 		
 86 | 		// this would be necessary for a complex language like JS
 87 | 		// but omitted here for simplicity
 88 | 		// "\"": "string1",
 89 | 		// "\'": "string2",
 90 | 		// "\/": "regexp"
 91 | 		
 92 | 	},
 93 | 	// The "comment" state
 94 | 	"comment": {
 95 | 		"\\*/": "source",
 96 | 		"@licen[cs]e\\s((?:[^*\n]|\\*+[^*/\n])*)": function(match, licenseText) {
 97 | 			this.licences.push(licenseText.trim());
 98 | 		}
 99 | 	},
100 | 	// The "linecomment" state
101 | 	"linecomment": {
102 | 		"\n": "source",
103 | 		"@licen[cs]e\\s(.*)": function(match, licenseText) {
104 | 			this.licences.push(licenseText.trim());
105 | 		}
106 | 	}
107 | });
108 | 
109 | var licences = parser.parse("source", sourceCode, { licences: [] }).licences;
110 | 
111 | console.log(licences);
112 | ```
113 | 
114 | 
115 | 
116 | ## License
117 | 
118 | MIT (http://www.opensource.org/licenses/mit-license.php)
119 | 


--------------------------------------------------------------------------------