├── test ├── .gitkeep ├── mocha.opts ├── cpp_unit │ ├── cases │ │ ├── case_1.c │ │ ├── case_1.c.pp │ │ ├── case_2.c.pp │ │ ├── case_2.c │ │ ├── case_4.c.pp │ │ ├── case_4.c │ │ ├── case_3.c.pp │ │ └── case_3.c │ └── cpp_unit.js └── lex_unit │ ├── cases │ ├── case_1.c.pp │ ├── case_2.c.pp │ ├── case_3.c.pp │ └── case_4.c.pp │ └── lex_unit.js ├── .gitignore ├── index.js ├── lib ├── node-c-lexer.js ├── cpp-unit.js ├── token-rules.js └── lex-unit.js ├── package.json └── README.md /test/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | -------------------------------------------------------------------------------- /test/mocha.opts: -------------------------------------------------------------------------------- 1 | --recursive 2 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | module.exports = require('./lib/node-c-lexer'); 2 | -------------------------------------------------------------------------------- /test/cpp_unit/cases/case_1.c: -------------------------------------------------------------------------------- 1 | int main(){ 2 | return 0; 3 | } 4 | -------------------------------------------------------------------------------- /test/cpp_unit/cases/case_1.c.pp: -------------------------------------------------------------------------------- 1 | int main(){ 2 | return 0; 3 | } 4 | -------------------------------------------------------------------------------- /test/lex_unit/cases/case_1.c.pp: -------------------------------------------------------------------------------- 1 | int main(){ 2 | return 0; 3 | } 4 | -------------------------------------------------------------------------------- /test/cpp_unit/cases/case_2.c.pp: -------------------------------------------------------------------------------- 1 | 2 | 3 | int main(){ 4 | int a[100]; 5 | return 0; 6 | } 7 | -------------------------------------------------------------------------------- /test/lex_unit/cases/case_2.c.pp: -------------------------------------------------------------------------------- 1 | 2 | 3 | int main(){ 4 | int a[100]; 5 | return 0; 6 | } 7 | -------------------------------------------------------------------------------- /test/cpp_unit/cases/case_2.c: -------------------------------------------------------------------------------- 1 | #define MAX 100 2 | 3 | int main(){ 4 | int a[MAX]; 5 | return 0; 6 | } 7 | -------------------------------------------------------------------------------- /lib/node-c-lexer.js: -------------------------------------------------------------------------------- 1 | var lexUnit = require("./lex-unit"); 2 | module.exports.lexUnit = lexUnit; 3 | 4 | var cppUnit = require("./cpp-unit"); 5 | module.exports.cppUnit = cppUnit; 6 | -------------------------------------------------------------------------------- /test/cpp_unit/cases/case_4.c.pp: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | int main() 5 | { 6 | int i; 7 | clock_t time1, time2; 8 | time1= clock(); 9 | for (i=1; i<=100000; i++); 10 | time2=clock(); 11 | printf("%f seconds\n",(float)(time2-time1)/((clock_t) 1000000)); 12 | 13 | } 14 | -------------------------------------------------------------------------------- /test/cpp_unit/cases/case_4.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() 5 | { 6 | int i; 7 | clock_t time1, time2; 8 | time1= clock(); 9 | for (i=1; i<=100000; i++); 10 | time2=clock(); 11 | printf("%f seconds\n",(float)(time2-time1)/CLOCKS_PER_SEC); 12 | 13 | } 14 | -------------------------------------------------------------------------------- /test/cpp_unit/cases/case_3.c.pp: -------------------------------------------------------------------------------- 1 | 2 | 3 | int factorial(int n){ 4 | if(n == 1) return 1; 5 | else return n*factorial(n-1); 6 | } 7 | 8 | int main(){ 9 | int a; 10 | printf("Input number:\n"); 11 | scanf("%d", &a); 12 | printf("Factorial of given number is: %d\n", factorial(a)); 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /test/lex_unit/cases/case_3.c.pp: -------------------------------------------------------------------------------- 1 | 2 | 3 | int factorial(int n){ 4 | if(n == 1) return 1; 5 | else return n*factorial(n-1); 6 | } 7 | 8 | int main(){ 9 | int a; 10 | printf("Input number:\n"); 11 | scanf("%d", &a); 12 | printf("Factorial of given number is: %d\n", factorial(a)); 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /test/cpp_unit/cases/case_3.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int factorial(int n){ 4 | if(n == 1) return 1; 5 | else return n*factorial(n-1); 6 | } 7 | 8 | int main(){ 9 | int a; 10 | printf("Input number:\n"); 11 | scanf("%d", &a); 12 | printf("Factorial of given number is: %d\n", factorial(a)); 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /test/lex_unit/cases/case_4.c.pp: -------------------------------------------------------------------------------- 1 | 2 | 3 | int factorial(int n){ 4 | // Terminal condition of recursion. 5 | /* 6 | If n is equal to 1 then no need to go any deeper level of recursion. 7 | Rather return 1. Because factorial of 1 is 1. 8 | */ 9 | if(n == 1) return 1; 10 | else return n*factorial(n-1); 11 | } 12 | 13 | int main(){ 14 | int a; 15 | /*** Print a message 16 | * A message to inform user what to provide 17 | **/ 18 | printf("Input number:\n"); 19 | /** 20 | * Take input 21 | */ 22 | scanf("%d", &a); 23 | printf("Factorial of given number is: %d\n", factorial(a)); 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "node-c-lexer", 3 | "version": "1.0.5", 4 | "description": "NodeJS library to get token stream from C code.", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "node_modules/.bin/mocha" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "https://github.com/taufique71/node-c-lexer.git" 12 | }, 13 | "keywords": [ 14 | "node", 15 | "c", 16 | "lexer" 17 | ], 18 | "author": "Md. Taufique Hussain", 19 | "license": "ISC", 20 | "bugs": { 21 | "url": "https://github.com/taufique71/node-c-lexer/issues" 22 | }, 23 | "homepage": "https://github.com/taufique71/node-c-lexer", 24 | "dependencies": { 25 | "async": "^1.5.1", 26 | "jison": "^0.4.15", 27 | "lex": "^1.7.9", 28 | "uuid": "^2.0.1" 29 | }, 30 | "devDependencies": { 31 | "assert": "^1.4.1", 32 | "async": "^1.5.2", 33 | "chai": "^3.5.0", 34 | "diff": "^3.0.1", 35 | "mocha": "^2.3.4" 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /test/lex_unit/lex_unit.js: -------------------------------------------------------------------------------- 1 | var fs = require("fs"); 2 | var diff = require("diff"); 3 | var async = require("async"); 4 | var assert = require("chai").assert; 5 | var expect = require("chai").expect; 6 | var should = require("chai").should(); 7 | 8 | describe("Tests for lexing unit", function(){ 9 | it("Should be able to require tokenize as function", function(){ 10 | var tokenize = require("../../lib/lex-unit.js").tokenize; 11 | assert(tokenize); 12 | assert(typeof(tokenize), "function"); 13 | }); 14 | it("case_1 should have 9 tokens", function(done){ 15 | var input_file = __dirname + "/cases/case_1.c.pp"; 16 | var tokenize = require("../../lib/lex-unit.js").tokenize; 17 | fs.readFile(input_file, "utf-8", function(err, code_text){ 18 | if(err) done(err); 19 | else{ 20 | var tokens = tokenize(code_text); 21 | assert.equal(tokens.length, 9); 22 | done(); 23 | } 24 | }); 25 | }); 26 | it("case_2 should have 15 tokens", function(done){ 27 | var input_file = __dirname + "/cases/case_2.c.pp"; 28 | var tokenize = require("../../lib/lex-unit.js").tokenize; 29 | fs.readFile(input_file, "utf-8", function(err, code_text){ 30 | if(err) done(err); 31 | else{ 32 | var tokens = tokenize(code_text); 33 | assert.equal(tokens.length, 15); 34 | done(); 35 | } 36 | }); 37 | }); 38 | it("case_3 should have 63 tokens", function(done){ 39 | var input_file = __dirname + "/cases/case_3.c.pp"; 40 | var tokenize = require("../../lib/lex-unit.js").tokenize; 41 | fs.readFile(input_file, "utf-8", function(err, code_text){ 42 | if(err) done(err); 43 | else{ 44 | var tokens = tokenize(code_text); 45 | assert.equal(tokens.length, 63); 46 | done(); 47 | } 48 | }); 49 | }); 50 | it("case_4 should have 63 tokens", function(done){ 51 | var input_file = __dirname + "/cases/case_4.c.pp"; 52 | var tokenize = require("../../lib/lex-unit.js").tokenize; 53 | fs.readFile(input_file, "utf-8", function(err, code_text){ 54 | if(err) done(err); 55 | else{ 56 | var tokens = tokenize(code_text); 57 | assert.equal(tokens.length, 63); 58 | done(); 59 | } 60 | }); 61 | }); 62 | }); 63 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # node-c-lexer 2 | Lexical analyzer library for C programming language in NodeJS 3 | 4 | ### Usage 5 | There are two units in the library - preprocessing unit and lexical analyzing 6 | unit. These two units can be used independently. Purpose of including 7 | preprocessing unit within the library is so that user can remove the 8 | preprocessor with it and then feed it to scanning unit to get token stream. 9 | 10 | ##### Preprocessing Unit 11 | To use preprocessing unit C code needs to be resided in a file. 12 | ```js 13 | var lexer = require("node-c-lexer"); 14 | lexer.cppUnit.clearPreprocessors("./a.c", function(err, codeText){ 15 | if(err){ 16 | /* Some error occured */ 17 | } 18 | else{ 19 | /* Do what you want to do with preprocessor free code text */ 20 | } 21 | }); 22 | ``` 23 | The clearPreprocessors method by default invokes `cpp` on the first arguement, 24 | producing an intermediate preprocessed file. It then finalizes the output by stripping 25 | the output of the preprocessor from the intermediate file and passing it to your callback 26 | as codeText. 27 | 28 | If you already have preprocessed files (.ii) at hand, you can skip the preprocessing step 29 | by passing the path to your preprocessed file as your last arguement. 30 | 31 | A reason why would want to do something like this is if you, for instance, have a 32 | separate environment on which your .ii(s) are generated(e.g preprocessed files are produced 33 | within your windows bash environment). Or if your preprocessing pass diverges from 34 | the execution of a simple 'cpp' command in general. 35 | ```js 36 | var lexer = require("node-c-lexer"); 37 | lexer.cppUnit.clearPreprocessors("./a.c", function(err, codeText){ 38 | if(err){ 39 | /* Some error occured */ 40 | } 41 | else{ 42 | /* Do what you want to do with preprocessor free code text */ 43 | } 44 | }, "./a.ii"); 45 | ``` 46 | 47 | ##### Scanning Unit 48 | ```js 49 | var lexer = require("node-c-lexer"); 50 | var tokenStream = lexer.lexUnit.tokenize(codeText); 51 | /* Now do what you want with token stream */ 52 | ``` 53 | Token stream is actually an array of tokens. Single token is a single javascript 54 | object. Format of a single token is following 55 | ```js 56 | { 57 | "lexeme": "func", 58 | "row": 5, 59 | "col": 3, 60 | "tokenClass": "IDENTIFIER", 61 | "keyword": False, 62 | "parent": null, 63 | "child": null 64 | } 65 | ``` 66 | ```parent``` and ```child``` 67 | these two are kept so that parse tree can be built using the tokens as nodes. 68 | -------------------------------------------------------------------------------- /lib/cpp-unit.js: -------------------------------------------------------------------------------- 1 | var exec = require("child_process").exec; 2 | var uuid = require("uuid"); 3 | var fs = require("fs"); 4 | 5 | /** 6 | * Extracts preprocessor directives from a file 7 | * with the 'cpp' command that's in your PATH. 8 | * @param {string} fileName - Absolute path to the input file. 9 | * @param {outputCallback} cb - function called with the output (codeText) 10 | * @param {string} outFile - An optional paramater that is assummed 11 | * To be the corresponding preprocessed file, when provided 12 | * Automatic preprocessing with 'cpp' is omitted. 13 | */ 14 | var clearPreprocessors = function(fileName, cb, outFile){ 15 | var cppFileName = uuid.v1(); 16 | var commandToExecute = "cpp" + " " + fileName + " " + cppFileName; 17 | if (outFile != null){ 18 | clear_pp_portion(fileName, outFile, cb, false); 19 | } 20 | else{ 21 | exec(commandToExecute, function(err, stdout, stderr){ 22 | if(!err){ 23 | clear_pp_portion(fileName, cppFileName, cb, true); 24 | } 25 | else{ 26 | cb(err); 27 | } 28 | }); 29 | } 30 | 31 | /** 32 | * Extracts preprocessor directives from an already 33 | * preprocessed file 34 | * @param {string} original - Absolute path to the unpreprocessed file. 35 | * @param {string} preprocessed - Absolute path the the coresseponding preprocessed file. 36 | * @param {outputCallback} cb - function called with the output (codeText) 37 | * @param {bool} delPreprocessed - delete (preprocessed) if true 38 | */ 39 | function clear_pp_portion(original, preprocessed, cb, delPreprocessed){ 40 | var line_reader = require('readline').createInterface({ 41 | input: require('fs').createReadStream(preprocessed) 42 | }); 43 | 44 | var on_off_flag = false; 45 | var list_of_lines = []; 46 | var code_text = ""; 47 | var prev_line = ""; 48 | var target_line_no = 0; 49 | var file_name_to_match = '"' + original + '"'; 50 | 51 | line_reader.on('line', function (line) { 52 | var tokens = line.split(" "); 53 | if(tokens[0] === "#"){ 54 | var line_no = parseInt(tokens[1]); 55 | var file_name = tokens[2]; 56 | if(file_name === file_name_to_match){ 57 | on_off_flag = true; 58 | target_line_no = line_no; 59 | } 60 | else{ 61 | if(on_off_flag === true){ 62 | list_of_lines.push(""); 63 | } 64 | on_off_flag = false; 65 | } 66 | } 67 | else{ 68 | if(on_off_flag === true){ 69 | if(target_line_no <= list_of_lines.length){ 70 | line = line.trim(); 71 | list_of_lines[target_line_no - 1] = list_of_lines[target_line_no - 1] + line; 72 | target_line_no++; 73 | } 74 | else{ 75 | list_of_lines.push(line); 76 | target_line_no++; 77 | } 78 | } 79 | } 80 | 81 | }); 82 | 83 | line_reader.on('close', function() { 84 | if (delPreprocessed){ 85 | commandToExecute = "rm " + cppFileName; 86 | exec(commandToExecute, function(err, stdout, stderr){ 87 | cb(null, list_of_lines.join("\n")+"\n"); 88 | }); 89 | } 90 | else{ 91 | cb(null, list_of_lines.join("\n")+"\n"); 92 | } 93 | }); 94 | } 95 | } 96 | module.exports.clearPreprocessors = clearPreprocessors; 97 | -------------------------------------------------------------------------------- /lib/token-rules.js: -------------------------------------------------------------------------------- 1 | var tokenRules = { 2 | "singleLineComment": /\/\/.*/, 3 | "multiLineComment": /\/\*([^\*]|[\r\n]|(\*+([^\/\*]|[\n\r])))*\*+\//, 4 | "while": /while/, 5 | "volatile": /volatile/, 6 | "void": /void/, 7 | "unsigned": /unsigned/, 8 | "union": /union/, 9 | "typedef": /typedef/, 10 | "switch": /switch/, 11 | "struct": /struct/, 12 | "static": /static/, 13 | "sizeof": /sizeof/, 14 | "signed": /signed/, 15 | "short": /short/, 16 | "return": /return/, 17 | "register": /register/, 18 | "long": /long/, 19 | "int": /int/, 20 | "if": /if/, 21 | "goto": /goto/, 22 | "for": /for/, 23 | "float": /float/, 24 | "extern": /extern/, 25 | "enum": /enum/, 26 | "else": /else/, 27 | "double": /double/, 28 | "do": /do/, 29 | "default": /default/, 30 | "continue": /continue/, 31 | "const": /const/, 32 | "char": /char/, 33 | "case": /case/, 34 | "break": /break/, 35 | "auto": /auto/, 36 | "bool": /_Bool/, 37 | "complex": /_Complex/, 38 | "imaginary": /_Imaginary/, 39 | "inline": /inline/, 40 | "restrict": /restrict/, 41 | "identifier": /[A-Za-z_]\w*/, 42 | "hexadecimal": /0[xX][a-fA-F0-9]+((u|U)|((u|U)?(l|L|ll|LL))|((l|L|ll|LL)(u|U)))?/, 43 | "octal": /0[0-7]+((u|U)|((u|U)?(l|L|ll|LL))|((l|L|ll|LL)(u|U)))?/, 44 | "decimal": /[0-9]+((u|U)|((u|U)?(l|L|ll|LL))|((l|L|ll|LL)(u|U)))?/, 45 | "char_literal": /[a-zA-Z_]?\'(\\.|[^\\'\n])+\'/, 46 | "floatWithoutPoint": /[0-9]+([Ee][+-]?[0-9]+)(f|F|l|L)?/, 47 | "floatWithNothingBeforePoint": /[0-9]*\.[0-9]+([Ee][+-]?[0-9]+)?(f|F|l|L)?/, 48 | "floatWithNothingAfterPoint": /[0-9]+\.[0-9]*([Ee][+-]?[0-9]+)?(f|F|l|L)?/, 49 | "string_literal": /[a-zA-Z_]?\"(\\.|[^\\"\n])*\"/, 50 | "ellipsis": /\.\.\./, 51 | "right_assign": />>=/, 52 | "left_assign": /<<=/, 53 | "add_assign": /\+=/, 54 | "sub_assign": /\-=/, 55 | "mul_assign": /\*=/, 56 | "div_assign": /\/=/, 57 | "mod_assign": /%=/, 58 | "and_assign": /&=/, 59 | "xor_assign": /\^=/, 60 | "or_assign": /\|=/, 61 | "right_op": />>/, 62 | "left_op": /</, 66 | "and_op": /&&/, 67 | "or_op": /\|\|/, 68 | "le_op": /<=/, 69 | "ge_op": />=/, 70 | "eq_op": /==/, 71 | "ne_op": /!=/, 72 | ";": /;/, 73 | "{": /{|<%/, 74 | "}": /}|%>/, 75 | ",": /,/, 76 | ":": /:/, 77 | "=": /=/, 78 | "(": /\(/, 79 | ")": /\)/, 80 | "[": /\[|<:/, 81 | "]": /\]|:>/, 82 | ".": /\./, 83 | "&": /&/, 84 | "!": /!/, 85 | "~": /~/, 86 | "-": /\-/, 87 | "+": /\+/, 88 | "*": /\*/, 89 | "/": /\//, 90 | "%": /%/, 91 | "<": /": />/, 93 | "^": /\^/, 94 | "|": /\|/, 95 | "?": /\?/, 96 | "whitespace": /[ \t\v\r\n\f]/, 97 | "unmatched": /./ 98 | }; 99 | module.exports.tokenRules = tokenRules; 100 | -------------------------------------------------------------------------------- /test/cpp_unit/cpp_unit.js: -------------------------------------------------------------------------------- 1 | var fs = require("fs"); 2 | var diff = require("diff"); 3 | var async = require("async"); 4 | var assert = require("chai").assert; 5 | var expect = require("chai").expect; 6 | var should = require("chai").should(); 7 | 8 | describe("Tests for preprocessor removal unit", function(){ 9 | it("Should be able to require clearPreprocessors as function", function(){ 10 | var clearPreprocessors = require("../../lib/cpp-unit.js").clearPreprocessors; 11 | assert(clearPreprocessors); 12 | assert(typeof(clearPreprocessors), "function"); 13 | }); 14 | it("Should successfully remove preprocessor from case_1.c file", function(done){ 15 | var input_file = __dirname + "/cases/case_1.c"; 16 | var output_file = __dirname + "/cases/case_1.c.pp"; 17 | var clearPreprocessors = require("../../lib/cpp-unit.js").clearPreprocessors; 18 | async.parallel([ 19 | function(read_output_file){ 20 | fs.readFile(output_file, "utf-8", function(err, data){ 21 | if(err) read_output_file(err); 22 | else read_output_file(null, data); 23 | }); 24 | }, 25 | function(perform_operation){ 26 | clearPreprocessors(input_file, function(err, data){ 27 | if(err) perform_operation(err); 28 | else perform_operation(null, data); 29 | }); 30 | } 31 | ], function(err, results){ 32 | if(err) done(err); 33 | else{ 34 | var d = diff.diffChars(results[0], results[1]); 35 | assert.equal(d.length, 1); 36 | done(null); 37 | } 38 | }); 39 | }); 40 | it("Should successfully remove preprocessor from case_2.c file", function(done){ 41 | var input_file = __dirname + "/cases/case_2.c"; 42 | var output_file = __dirname + "/cases/case_2.c.pp"; 43 | var clearPreprocessors = require("../../lib/cpp-unit.js").clearPreprocessors; 44 | async.parallel([ 45 | function(read_output_file){ 46 | fs.readFile(output_file, "utf-8", function(err, data){ 47 | if(err) read_output_file(err); 48 | else read_output_file(null, data); 49 | }); 50 | }, 51 | function(perform_operation){ 52 | clearPreprocessors(input_file, function(err, data){ 53 | if(err) perform_operation(err); 54 | else perform_operation(null, data); 55 | }); 56 | } 57 | ], function(err, results){ 58 | if(err) done(err); 59 | else{ 60 | var d = diff.diffChars(results[0], results[1]); 61 | assert.equal(d.length, 1); 62 | done(null); 63 | } 64 | }); 65 | }); 66 | it("Should successfully remove preprocessor from case_3.c file", function(done){ 67 | var input_file = __dirname + "/cases/case_3.c"; 68 | var output_file = __dirname + "/cases/case_3.c.pp"; 69 | var clearPreprocessors = require("../../lib/cpp-unit.js").clearPreprocessors; 70 | async.parallel([ 71 | function(read_output_file){ 72 | fs.readFile(output_file, "utf-8", function(err, data){ 73 | if(err) read_output_file(err); 74 | else read_output_file(null, data); 75 | }); 76 | }, 77 | function(perform_operation){ 78 | clearPreprocessors(input_file, function(err, data){ 79 | if(err) perform_operation(err); 80 | else perform_operation(null, data); 81 | }); 82 | } 83 | ], function(err, results){ 84 | if(err) done(err); 85 | else{ 86 | var d = diff.diffChars(results[0], results[1]); 87 | assert.equal(d.length, 1); 88 | done(null); 89 | } 90 | }); 91 | }); 92 | it("Should successfully remove preprocessor from case_4.c file", function(done){ 93 | var input_file = __dirname + "/cases/case_4.c"; 94 | var output_file = __dirname + "/cases/case_4.c.pp"; 95 | var clearPreprocessors = require("../../lib/cpp-unit.js").clearPreprocessors; 96 | async.parallel([ 97 | function(read_output_file){ 98 | fs.readFile(output_file, "utf-8", function(err, data){ 99 | if(err) read_output_file(err); 100 | else read_output_file(null, data); 101 | }); 102 | }, 103 | function(perform_operation){ 104 | clearPreprocessors(input_file, function(err, data){ 105 | if(err) perform_operation(err); 106 | else perform_operation(null, data); 107 | }); 108 | } 109 | ], function(err, results){ 110 | if(err) done(err); 111 | else{ 112 | var d = diff.diffChars(results[0], results[1]); 113 | assert.equal(d.length, 1); 114 | done(null); 115 | } 116 | }); 117 | }); 118 | }); 119 | -------------------------------------------------------------------------------- /lib/lex-unit.js: -------------------------------------------------------------------------------- 1 | var tokenRules = require("./token-rules")["tokenRules"]; 2 | var Lexer = require("lex"); 3 | var lexer = new Lexer; 4 | 5 | var row = 1, col = 1; 6 | 7 | var count = function(lexeme){ 8 | for(var i = 0; i"], function(lexeme){ 1068 | var token = {}; 1069 | token["lexeme"] = lexeme; 1070 | token["row"] = row; 1071 | token["col"] = col; 1072 | token["tokenClass"] = ">"; 1073 | token["parent"] = null; 1074 | token["children"] = null; 1075 | count(lexeme); 1076 | return token; 1077 | }); 1078 | lexer.addRule(tokenRules["^"], function(lexeme){ 1079 | var token = {}; 1080 | token["lexeme"] = lexeme; 1081 | token["row"] = row; 1082 | token["col"] = col; 1083 | token["tokenClass"] = "^"; 1084 | token["parent"] = null; 1085 | token["children"] = null; 1086 | count(lexeme); 1087 | return token; 1088 | }); 1089 | lexer.addRule(tokenRules["|"], function(lexeme){ 1090 | var token = {}; 1091 | token["lexeme"] = lexeme; 1092 | token["row"] = row; 1093 | token["col"] = col; 1094 | token["tokenClass"] = "|"; 1095 | token["parent"] = null; 1096 | token["children"] = null; 1097 | count(lexeme); 1098 | return token; 1099 | }); 1100 | lexer.addRule(tokenRules["?"], function(lexeme){ 1101 | var token = {}; 1102 | token["lexeme"] = lexeme; 1103 | token["row"] = row; 1104 | token["col"] = col; 1105 | token["tokenClass"] = "?"; 1106 | token["parent"] = null; 1107 | token["children"] = null; 1108 | count(lexeme); 1109 | return token; 1110 | }); 1111 | lexer.addRule(tokenRules["whitespace"], function(lexeme){ 1112 | var token = {}; 1113 | token["lexeme"] = lexeme; 1114 | token["row"] = row; 1115 | token["col"] = col; 1116 | token["tokenClass"] = "WHITESPACE"; 1117 | token["parent"] = null; 1118 | token["children"] = null; 1119 | count(lexeme); 1120 | return token; 1121 | }); 1122 | lexer.addRule(tokenRules["unmatched"], function(lexeme){ 1123 | var token = {}; 1124 | token["lexeme"] = lexeme; 1125 | token["row"] = row; 1126 | token["col"] = col; 1127 | token["tokenClass"] = "UNMATCHED"; 1128 | token["parent"] = null; 1129 | token["children"] = null; 1130 | count(lexeme); 1131 | return token; 1132 | }); 1133 | 1134 | lexer.setInput(streamOfText); 1135 | var x = lexer.lex(); 1136 | while(x != undefined){ 1137 | if((x.tokenClass != "UNMATCHED") && (x.tokenClass != "WHITESPACE") && (x.tokenClass != "COMMENT")){ 1138 | streamOfTokens.push(x); 1139 | } 1140 | x = lexer.lex(); 1141 | }; 1142 | return streamOfTokens; 1143 | } 1144 | module.exports.tokenize = tokenize; 1145 | --------------------------------------------------------------------------------