├── .gitignore ├── src ├── StringStream.js ├── StringStream.d.ts ├── ast.ts ├── index.ts ├── editor.ts ├── position.ts ├── tokenstream.ts ├── mode.ts ├── parselet.ts ├── lexer.ts └── parser.ts ├── tsconfig.json ├── dist └── index.html ├── fuse.js ├── README.md ├── package.json └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | .fusebox 3 | 4 | -------------------------------------------------------------------------------- /src/StringStream.js: -------------------------------------------------------------------------------- 1 | import StringStream from '../node_modules/codemirror/src/util/StringStream' 2 | 3 | export default StringStream 4 | -------------------------------------------------------------------------------- /src/StringStream.d.ts: -------------------------------------------------------------------------------- 1 | import {StringStream as StringStreamI} from 'codemirror' 2 | 3 | // interface merging to expose StringStream as a class 4 | // https://github.com/Microsoft/TypeScript/issues/340 5 | interface StringStream extends StringStreamI {} 6 | declare class StringStream { 7 | constructor(line: string) 8 | } 9 | 10 | export default StringStream 11 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es6", 4 | "module": "commonjs", 5 | "outDir": "./build", 6 | "noEmit": true, 7 | "strict": true, 8 | 9 | "noUnusedLocals": true, 10 | "noUnusedParameters": true, 11 | "noImplicitReturns": true, 12 | "noFallthroughCasesInSwitch": true, 13 | 14 | "moduleResolution": "node", 15 | "esModuleInterop": true 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /dist/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /fuse.js: -------------------------------------------------------------------------------- 1 | const { 2 | FuseBox, 3 | WebIndexPlugin, 4 | CSSPlugin, 5 | CSSResourcePlugin, 6 | } = require('fuse-box'); 7 | const fuse = FuseBox.init({ 8 | homeDir: '.', 9 | target: 'browser@es6', 10 | output: 'dist/$name.js', 11 | plugins: [ 12 | [ 13 | CSSResourcePlugin({ 14 | dist: 'dist/css-resources', 15 | resolve: f => `/css-resources/${f}`, 16 | }), 17 | CSSPlugin(), 18 | ], 19 | WebIndexPlugin({ 20 | path: "." 21 | }), 22 | ], 23 | }); 24 | fuse.dev(); // launch http server 25 | fuse 26 | .bundle('app') 27 | .instructions(' > src/index.ts') 28 | .hmr() 29 | .watch(); 30 | fuse.run(); 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pratt-parser-blog-code 2 | 3 | This project implements a lexer and Pratt parser for a simple language. 4 | 5 | It also creates a CodeMirror mode, `myMode`, that provides syntax highliting based on the lexer, and linting for parsing errors. 6 | 7 | For more details, see this [blog post on the Desmos engineering blog](https://engineering.desmos.com/articles/pratt-parser). 8 | 9 | Hopefully this will serve as a nice starting point for anyone interested in building a web-based language. Enjoy! 10 | 11 | # Online 12 | 13 | You can play with the parser online [on the github page](https://desmosinc.github.io/pratt-parser-blog-code/) 14 | 15 | # Setup 16 | 17 | Clone the repo, then run 18 | 19 | ``` 20 | npm install 21 | node fuse.js 22 | ``` 23 | 24 | Then open http://localhost:4444/ 25 | -------------------------------------------------------------------------------- /src/ast.ts: -------------------------------------------------------------------------------- 1 | import {Position} from './position'; 2 | import {BinaryOperationTokenType} from './lexer'; 3 | 4 | export type NodeType = 5 | | 'SinkAssignment' 6 | | 'VariableAssignment' 7 | | 'Number' 8 | | 'Boolean' 9 | | 'String' 10 | | 'BinaryOperation' 11 | | 'Choose' 12 | | 'Identifier' 13 | | 'Function' 14 | | 'CalculatorReference'; 15 | 16 | export type NumberNode = { 17 | type: 'Number'; 18 | value: number; 19 | pos: Position; 20 | }; 21 | 22 | export type BooleanNode = { 23 | type: 'Boolean'; 24 | value: boolean; 25 | pos: Position; 26 | }; 27 | 28 | export type BinaryOperationNode = { 29 | type: 'BinaryOperation'; 30 | operator: BinaryOperationTokenType; 31 | left: Node; 32 | right: Node; 33 | pos: Position; 34 | }; 35 | 36 | export type Node = BooleanNode | NumberNode | BinaryOperationNode; 37 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | import {getTokens} from './lexer'; 2 | import {create} from './editor'; 3 | import {parse} from './parser'; 4 | 5 | const cmContainer = document.createElement('div'); 6 | cmContainer.className = 'cm-container'; 7 | document.body.appendChild(cmContainer); 8 | const cm = create(cmContainer); 9 | 10 | const outputContainer = document.createElement('pre'); 11 | outputContainer.className = 'output-container'; 12 | document.body.appendChild(outputContainer); 13 | 14 | function updateOutput() { 15 | const ast = parse(cm.getDoc().getValue()); 16 | cm.setOption('script-errors', ast.errors); 17 | 18 | const tokens = getTokens(cm.getDoc().getValue()); 19 | outputContainer.innerHTML = `\ 20 | ast: ${JSON.stringify(ast, null, 2)} 21 | tokens: ${JSON.stringify(tokens, null, 2)}`; 22 | } 23 | 24 | cm.on('change', updateOutput); 25 | updateOutput(); 26 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "pratt-parser-blog-code", 3 | "version": "1.0.0", 4 | "description": "The code to illustrate the pratt parser blog post for the desmos engineering blog.", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "npx ts-node src/index.ts" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "git+https://github.com/desmosinc/pratt-parser-blog-code.git" 12 | }, 13 | "author": "", 14 | "license": "ISC", 15 | "bugs": { 16 | "url": "https://github.com/desmosinc/pratt-parser-blog-code/issues" 17 | }, 18 | "homepage": "https://github.com/desmosinc/pratt-parser-blog-code#readme", 19 | "devDependencies": { 20 | "@types/codemirror": "0.0.70", 21 | "fuse-box": "^3.6.0", 22 | "ts-node": "^7.0.1", 23 | "typescript": "^3.2.2" 24 | }, 25 | "dependencies": { 26 | "codemirror": "^5.42.0" 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/editor.ts: -------------------------------------------------------------------------------- 1 | import CM from 'codemirror'; 2 | 3 | import 'codemirror/lib/codemirror.css'; 4 | import 'codemirror/addon/lint/lint' 5 | import 'codemirror/addon/lint/lint.css' 6 | 7 | import {ParseError} from './position' 8 | import './mode' 9 | 10 | export function create( 11 | node: HTMLElement 12 | ) { 13 | const editor = CM(node, { 14 | value: '1 + (2 + 3) / 4', 15 | mode: 'myMode', 16 | gutters: ['CodeMirror-lint-markers'], 17 | lint: true, 18 | lineWrapping: true 19 | }); 20 | 21 | CM.registerHelper('lint', 'myMode', () => { 22 | const parseErrors: ParseError[] = editor.getOption('script-errors') || [] 23 | return parseErrors.map((e) => ({ 24 | from: CM.Pos(e.position.first_line - 1, e.position.first_column), 25 | to: CM.Pos(e.position.last_line - 1, e.position.last_column), 26 | message: e.message, 27 | severity: 'error' 28 | })) 29 | }) 30 | 31 | return editor; 32 | } 33 | -------------------------------------------------------------------------------- /src/position.ts: -------------------------------------------------------------------------------- 1 | import { Token } from './lexer'; 2 | 3 | export type Position = { 4 | first_line: number; 5 | first_column: number; 6 | last_line: number; 7 | last_column: number; 8 | }; 9 | 10 | export function token2pos(token: Token): Position { 11 | return { 12 | first_line: token.line, 13 | last_line: token.line, 14 | first_column: token.first_column, 15 | last_column: token.last_column 16 | } 17 | } 18 | 19 | export function join(start: Position, end: Position) { 20 | return { 21 | first_line: start.first_line, 22 | last_line: end.last_line, 23 | first_column: start.first_column, 24 | last_column: end.last_column 25 | }; 26 | } 27 | 28 | // note, extending Error in the browser is problematic 29 | // https://stackoverflow.com/questions/33870684/why-doesnt-instanceof-work-on-instances-of-error-subclasses-under-babel-node 30 | export class ParseError { 31 | constructor(public message: string, public position: Position) {} 32 | } 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Desmos 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/tokenstream.ts: -------------------------------------------------------------------------------- 1 | import {Token, TokenType, getTokens} from './lexer'; 2 | import {ParseError, token2pos} from './position'; 3 | 4 | export class TokenStream { 5 | tokens: Token[]; 6 | pos: number = 0; 7 | 8 | constructor(text: string) { 9 | this.tokens = getTokens(text).filter(t => t.type != 'COMMENT'); 10 | } 11 | 12 | consume(): Token | undefined { 13 | const token = this.tokens[this.pos]; 14 | if (token) { 15 | this.pos += 1; 16 | } 17 | return token; 18 | } 19 | 20 | peek(): Token | undefined { 21 | return this.tokens[this.pos]; 22 | } 23 | 24 | last(): Token { 25 | return this.tokens[this.pos - 1]; 26 | } 27 | 28 | expectToken(expectedType: T): Token { 29 | const actual = this.consume(); 30 | 31 | if (!actual) { 32 | throw new ParseError( 33 | `Expected "${expectedType}" token but found none.`, 34 | token2pos(this.last()), 35 | ); 36 | } 37 | 38 | if (actual.type != expectedType) { 39 | throw new ParseError( 40 | `Expected "${expectedType}" token type but found "${actual.type}".`, 41 | token2pos(actual), 42 | ); 43 | } 44 | 45 | return actual as Token; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/mode.ts: -------------------------------------------------------------------------------- 1 | // Code Mirror syntax-highlighing mode 2 | import { getToken, State } from './lexer'; 3 | 4 | import * as CM from 'codemirror'; 5 | 6 | type TokenType = 7 | | 'operator' 8 | | 'bracket' 9 | | 'keyword' 10 | | 'variable' 11 | | 'number' 12 | | 'comment' 13 | | 'string' 14 | | 'error' 15 | 16 | 17 | export function MakeMode(_config: CodeMirror.EditorConfiguration, _modeOptions?: any): CM.Mode { 18 | return { 19 | token: ( 20 | stream: CM.StringStream, 21 | state: State 22 | ): TokenType | null => { 23 | const token = getToken(stream, state); 24 | if (!token) { 25 | return null; 26 | } 27 | 28 | const type = token.type; 29 | switch (type) { 30 | case 'NUMBER': 31 | return 'number'; 32 | 33 | case '(': 34 | case ')': 35 | return 'bracket'; 36 | 37 | case '+': 38 | case '-': 39 | case '*': 40 | case '/': 41 | case '^': 42 | return 'operator'; 43 | 44 | case 'COMMENT': 45 | return 'comment'; 46 | 47 | case 'ERROR': 48 | return 'error'; 49 | 50 | default: 51 | return assertUnreachable(type); 52 | } 53 | }, 54 | startState: () => ({ 55 | stack: ['default' as 'default'], 56 | line: 0 57 | }) 58 | }; 59 | } 60 | 61 | function assertUnreachable(x: never): never { 62 | throw new Error(`Didn't expect to get here ${x}`); 63 | } 64 | 65 | CM.defineMode('myMode', MakeMode); 66 | -------------------------------------------------------------------------------- /src/parselet.ts: -------------------------------------------------------------------------------- 1 | import { TokenStream } from './tokenstream'; 2 | import { Token, TokenType, BinaryOperationTokenType} from './lexer'; 3 | import * as AST from './ast'; 4 | import { AbstractParser } from './parser'; 5 | import {token2pos, join} from './position' 6 | 7 | export interface InitialParselet { 8 | parse(parser: AbstractParser, tokens: TokenStream, token: Token): AST.Node; 9 | } 10 | 11 | export class NumberParselet implements InitialParselet { 12 | parse(_parser: AbstractParser, _tokens: TokenStream, token: Token) { 13 | return { 14 | type: 'Number' as 'Number', 15 | value: parseFloat(token.text), 16 | pos: token2pos(token) 17 | } 18 | } 19 | } 20 | 21 | export class BooleanParselet implements InitialParselet { 22 | constructor(private value: boolean) {} 23 | parse(_parser: AbstractParser, _tokens: TokenStream, token: Token) { 24 | return { 25 | type: 'Boolean' as 'Boolean', 26 | value: this.value, 27 | pos: token2pos(token) 28 | } 29 | } 30 | } 31 | 32 | export class ParenParselet implements InitialParselet { 33 | parse(parser: AbstractParser, tokens: TokenStream, _token: Token) { 34 | const exp = parser.parse(tokens, 0); 35 | tokens.expectToken(')'); 36 | 37 | return exp; 38 | } 39 | } 40 | 41 | export abstract class ConsequentParselet { 42 | constructor( 43 | readonly tokenType: TokenType, 44 | readonly associativity: 'left' | 'right' 45 | ) {} 46 | abstract parse( 47 | parser: AbstractParser, 48 | tokens: TokenStream, 49 | left: AST.Node, 50 | token: Token 51 | ): AST.Node; 52 | } 53 | 54 | export class BinaryOperatorParselet extends ConsequentParselet { 55 | constructor( 56 | public tokenType: BinaryOperationTokenType, 57 | associativity: 'left' | 'right' 58 | ) { 59 | super(tokenType, associativity); 60 | } 61 | 62 | parse( 63 | parser: AbstractParser, 64 | tokens: TokenStream, 65 | left: AST.Node, 66 | token: Token 67 | ): AST.Node { 68 | const bindingPower = parser.bindingPower(token); 69 | 70 | const right = parser.parse( 71 | tokens, 72 | this.associativity == 'left' ? bindingPower : bindingPower - 1 73 | ); 74 | 75 | return { 76 | type: 'BinaryOperation' as 'BinaryOperation', 77 | operator: this.tokenType, 78 | left, 79 | right, 80 | pos: join(left.pos, token2pos(tokens.last())) 81 | } 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /src/lexer.ts: -------------------------------------------------------------------------------- 1 | import StringStream from './StringStream'; 2 | 3 | export function getTokens(text: string): Token[] { 4 | const tokens: Token[] = []; 5 | const state: State = {line: 1, stack: ['default']}; 6 | 7 | for (const line of text.split('\n')) { 8 | const stream = new StringStream(line); 9 | while (!stream.eol()) { 10 | const token = getToken(stream, state); 11 | if (token != undefined) { 12 | tokens.push(token); 13 | } 14 | 15 | if (stream.start == stream.pos) { 16 | throw new Error( 17 | `getToken failed to advance stream at position ${ 18 | stream.pos 19 | } in string ${stream.string}`, 20 | ); 21 | } 22 | stream.start = stream.pos; 23 | } 24 | 25 | state.line += 1; 26 | } 27 | 28 | return tokens; 29 | } 30 | 31 | export function getToken( 32 | stream: StringStream, 33 | state: State, 34 | ): Token | undefined { 35 | //Built for codeMirror streams API 36 | //State is a stack of states 37 | switch (state.stack[state.stack.length - 1]) { 38 | default: 39 | return getDefaultToken(stream, state); 40 | } 41 | } 42 | 43 | function makeEmit(stream: StringStream, state: State) { 44 | return function emitToken(type: TokenType): Token { 45 | return { 46 | type, 47 | first_column: stream.start, 48 | last_column: stream.pos, 49 | line: state.line, 50 | text: stream.current(), 51 | }; 52 | }; 53 | } 54 | 55 | function getDefaultToken( 56 | stream: StringStream, 57 | state: State, 58 | ): Token | undefined { 59 | const emitToken = makeEmit(stream, state); 60 | if (stream.eatSpace()) { 61 | // skip whitespace 62 | return undefined; 63 | } 64 | 65 | if (stream.match(/\+/)) { 66 | return emitToken('+'); 67 | } 68 | 69 | if (stream.match(/\-/)) { 70 | return emitToken('-'); 71 | } 72 | 73 | if (stream.match(/\*/)) { 74 | return emitToken('*'); 75 | } 76 | 77 | if (stream.match(/\//)) { 78 | return emitToken('/'); 79 | } 80 | 81 | if (stream.match(/\^/)) { 82 | return emitToken('^'); 83 | } 84 | 85 | if (stream.match(/\(/)) { 86 | return emitToken('('); 87 | } 88 | 89 | if (stream.match(/\)/)) { 90 | return emitToken(')'); 91 | } 92 | 93 | if (stream.match(/-?[0-9]+(\.[0-9]+)?/)) { 94 | return emitToken('NUMBER'); 95 | } 96 | 97 | if (stream.match(/#/)) { 98 | if (!stream.match(/\n/)) { 99 | // comment lasts till end of line 100 | stream.match(/.*/); // if no eol encountered, comment lasts till end of file 101 | } 102 | return emitToken('COMMENT'); 103 | } 104 | 105 | stream.next(); 106 | return emitToken('ERROR'); 107 | } 108 | 109 | export type BinaryOperationTokenType = 110 | | '+' 111 | | '-' 112 | | '*' 113 | | '/' 114 | | '^' 115 | 116 | export type TokenType = 117 | | BinaryOperationTokenType 118 | | 'NUMBER' 119 | | '(' 120 | | ')' 121 | | 'COMMENT' 122 | | 'ERROR'; 123 | 124 | export interface Token { 125 | type: T; 126 | text: string; 127 | line: number; 128 | first_column: number; 129 | last_column: number; 130 | } 131 | 132 | type Mode = 'default'; 133 | 134 | export interface State { 135 | stack: Mode[]; 136 | line: number; 137 | } 138 | -------------------------------------------------------------------------------- /src/parser.ts: -------------------------------------------------------------------------------- 1 | import * as Parselet from './parselet'; 2 | import {Token, TokenType} from './lexer'; 3 | import {TokenStream} from './tokenstream'; 4 | import {ParseError, token2pos} from './position'; 5 | import * as AST from './ast'; 6 | 7 | export function parse(text: string): {nodes: AST.Node[]; errors: ParseError[]} { 8 | const nodes: AST.Node[] = []; 9 | 10 | const tokens = new TokenStream(text); 11 | const parser = new Parser(); 12 | while (tokens.peek()) { 13 | try { 14 | nodes.push(parser.parse(tokens, 0)); 15 | } catch (e) { 16 | return { 17 | nodes, 18 | errors: [e], 19 | }; 20 | } 21 | } 22 | 23 | return {nodes, errors: []}; 24 | } 25 | 26 | export abstract class AbstractParser { 27 | public bindingPowers: {[tokenType in TokenType]: number}; 28 | 29 | protected abstract initialMap(): Partial< 30 | {[K in TokenType]: Parselet.InitialParselet} 31 | >; 32 | protected abstract consequentMap(): Partial< 33 | {[K in TokenType]: Parselet.ConsequentParselet} 34 | >; 35 | protected abstract bindingClasses(): TokenType[][]; 36 | 37 | constructor() { 38 | this.bindingPowers = {} as any; 39 | 40 | const bindingClasses = this.bindingClasses(); 41 | for (let i = 0; i < bindingClasses.length; i++) { 42 | for (const tokenType of bindingClasses[i]) { 43 | this.bindingPowers[tokenType] = 10 * i + 9; 44 | } 45 | } 46 | 47 | for (const tokenType of Object.keys(this.consequentMap) as TokenType[]) { 48 | if (this.bindingPowers[tokenType] == undefined) { 49 | throw new Error( 50 | `Token ${tokenType} defined in consequentMap has no associated binding power. 51 | Make sure it is also listed in bindingClasses.`, 52 | ); 53 | } 54 | } 55 | } 56 | 57 | bindingPower(token: Token): number { 58 | if (this.bindingPowers[token.type] != undefined) { 59 | return this.bindingPowers[token.type]; 60 | } else { 61 | throw new ParseError( 62 | `Unexpected token type ${token.type}.`, 63 | token2pos(token), 64 | ); 65 | } 66 | } 67 | 68 | parse(tokens: TokenStream, currentBindingPower: number): AST.Node { 69 | const token = tokens.consume(); 70 | if (!token) { 71 | throw new ParseError( 72 | `Unexpected end of tokens.`, 73 | token2pos(tokens.last()), 74 | ); 75 | } 76 | 77 | const initialParselet = this.initialMap()[token.type]; 78 | 79 | if (!initialParselet) { 80 | throw new ParseError( 81 | `Unexpected token type ${token.type}`, 82 | token2pos(token), 83 | ); 84 | } 85 | 86 | let left = initialParselet.parse(this, tokens, token); 87 | 88 | while (true) { 89 | const next = tokens.peek(); 90 | if (!next) { 91 | break; 92 | } 93 | 94 | const consequentParselet = this.consequentMap()[next.type]; 95 | 96 | if (!consequentParselet) { 97 | break; 98 | } 99 | 100 | if (currentBindingPower >= this.bindingPower(next)) { 101 | break; 102 | } 103 | 104 | tokens.consume(); 105 | left = consequentParselet.parse(this, tokens, left, next); 106 | } 107 | 108 | return left; 109 | } 110 | } 111 | 112 | export class Parser extends AbstractParser { 113 | initialMap() { 114 | return { 115 | NUMBER: new Parselet.NumberParselet(), 116 | '(': new Parselet.ParenParselet(), 117 | }; 118 | } 119 | 120 | consequentMap() { 121 | return { 122 | '+': new Parselet.BinaryOperatorParselet('+', 'left'), 123 | '-': new Parselet.BinaryOperatorParselet('-', 'left'), 124 | '*': new Parselet.BinaryOperatorParselet('*', 'left'), 125 | '/': new Parselet.BinaryOperatorParselet('/', 'left'), 126 | '^': new Parselet.BinaryOperatorParselet('^', 'right'), 127 | }; 128 | } 129 | 130 | bindingClasses() { 131 | const classes: TokenType[][] = [['+', '-'], ['*', '/'], ['^']]; 132 | return classes; 133 | } 134 | } 135 | --------------------------------------------------------------------------------