├── .gitignore
├── src
├── StringStream.js
├── StringStream.d.ts
├── ast.ts
├── index.ts
├── editor.ts
├── position.ts
├── tokenstream.ts
├── mode.ts
├── parselet.ts
├── lexer.ts
└── parser.ts
├── tsconfig.json
├── dist
└── index.html
├── fuse.js
├── README.md
├── package.json
└── LICENSE
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | .fusebox
3 |
4 |
--------------------------------------------------------------------------------
/src/StringStream.js:
--------------------------------------------------------------------------------
1 | import StringStream from '../node_modules/codemirror/src/util/StringStream'
2 |
3 | export default StringStream
4 |
--------------------------------------------------------------------------------
/src/StringStream.d.ts:
--------------------------------------------------------------------------------
1 | import {StringStream as StringStreamI} from 'codemirror'
2 |
3 | // interface merging to expose StringStream as a class
4 | // https://github.com/Microsoft/TypeScript/issues/340
5 | interface StringStream extends StringStreamI {}
6 | declare class StringStream {
7 | constructor(line: string)
8 | }
9 |
10 | export default StringStream
11 |
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "es6",
4 | "module": "commonjs",
5 | "outDir": "./build",
6 | "noEmit": true,
7 | "strict": true,
8 |
9 | "noUnusedLocals": true,
10 | "noUnusedParameters": true,
11 | "noImplicitReturns": true,
12 | "noFallthroughCasesInSwitch": true,
13 |
14 | "moduleResolution": "node",
15 | "esModuleInterop": true
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/dist/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/fuse.js:
--------------------------------------------------------------------------------
1 | const {
2 | FuseBox,
3 | WebIndexPlugin,
4 | CSSPlugin,
5 | CSSResourcePlugin,
6 | } = require('fuse-box');
7 | const fuse = FuseBox.init({
8 | homeDir: '.',
9 | target: 'browser@es6',
10 | output: 'dist/$name.js',
11 | plugins: [
12 | [
13 | CSSResourcePlugin({
14 | dist: 'dist/css-resources',
15 | resolve: f => `/css-resources/${f}`,
16 | }),
17 | CSSPlugin(),
18 | ],
19 | WebIndexPlugin({
20 | path: "."
21 | }),
22 | ],
23 | });
24 | fuse.dev(); // launch http server
25 | fuse
26 | .bundle('app')
27 | .instructions(' > src/index.ts')
28 | .hmr()
29 | .watch();
30 | fuse.run();
31 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # pratt-parser-blog-code
2 |
3 | This project implements a lexer and Pratt parser for a simple language.
4 |
5 | It also creates a CodeMirror mode, `myMode`, that provides syntax highliting based on the lexer, and linting for parsing errors.
6 |
7 | For more details, see this [blog post on the Desmos engineering blog](https://engineering.desmos.com/articles/pratt-parser).
8 |
9 | Hopefully this will serve as a nice starting point for anyone interested in building a web-based language. Enjoy!
10 |
11 | # Online
12 |
13 | You can play with the parser online [on the github page](https://desmosinc.github.io/pratt-parser-blog-code/)
14 |
15 | # Setup
16 |
17 | Clone the repo, then run
18 |
19 | ```
20 | npm install
21 | node fuse.js
22 | ```
23 |
24 | Then open http://localhost:4444/
25 |
--------------------------------------------------------------------------------
/src/ast.ts:
--------------------------------------------------------------------------------
1 | import {Position} from './position';
2 | import {BinaryOperationTokenType} from './lexer';
3 |
4 | export type NodeType =
5 | | 'SinkAssignment'
6 | | 'VariableAssignment'
7 | | 'Number'
8 | | 'Boolean'
9 | | 'String'
10 | | 'BinaryOperation'
11 | | 'Choose'
12 | | 'Identifier'
13 | | 'Function'
14 | | 'CalculatorReference';
15 |
16 | export type NumberNode = {
17 | type: 'Number';
18 | value: number;
19 | pos: Position;
20 | };
21 |
22 | export type BooleanNode = {
23 | type: 'Boolean';
24 | value: boolean;
25 | pos: Position;
26 | };
27 |
28 | export type BinaryOperationNode = {
29 | type: 'BinaryOperation';
30 | operator: BinaryOperationTokenType;
31 | left: Node;
32 | right: Node;
33 | pos: Position;
34 | };
35 |
36 | export type Node = BooleanNode | NumberNode | BinaryOperationNode;
37 |
--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
1 | import {getTokens} from './lexer';
2 | import {create} from './editor';
3 | import {parse} from './parser';
4 |
5 | const cmContainer = document.createElement('div');
6 | cmContainer.className = 'cm-container';
7 | document.body.appendChild(cmContainer);
8 | const cm = create(cmContainer);
9 |
10 | const outputContainer = document.createElement('pre');
11 | outputContainer.className = 'output-container';
12 | document.body.appendChild(outputContainer);
13 |
14 | function updateOutput() {
15 | const ast = parse(cm.getDoc().getValue());
16 | cm.setOption('script-errors', ast.errors);
17 |
18 | const tokens = getTokens(cm.getDoc().getValue());
19 | outputContainer.innerHTML = `\
20 | ast: ${JSON.stringify(ast, null, 2)}
21 | tokens: ${JSON.stringify(tokens, null, 2)}`;
22 | }
23 |
24 | cm.on('change', updateOutput);
25 | updateOutput();
26 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "pratt-parser-blog-code",
3 | "version": "1.0.0",
4 | "description": "The code to illustrate the pratt parser blog post for the desmos engineering blog.",
5 | "main": "index.js",
6 | "scripts": {
7 | "test": "npx ts-node src/index.ts"
8 | },
9 | "repository": {
10 | "type": "git",
11 | "url": "git+https://github.com/desmosinc/pratt-parser-blog-code.git"
12 | },
13 | "author": "",
14 | "license": "ISC",
15 | "bugs": {
16 | "url": "https://github.com/desmosinc/pratt-parser-blog-code/issues"
17 | },
18 | "homepage": "https://github.com/desmosinc/pratt-parser-blog-code#readme",
19 | "devDependencies": {
20 | "@types/codemirror": "0.0.70",
21 | "fuse-box": "^3.6.0",
22 | "ts-node": "^7.0.1",
23 | "typescript": "^3.2.2"
24 | },
25 | "dependencies": {
26 | "codemirror": "^5.42.0"
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/src/editor.ts:
--------------------------------------------------------------------------------
1 | import CM from 'codemirror';
2 |
3 | import 'codemirror/lib/codemirror.css';
4 | import 'codemirror/addon/lint/lint'
5 | import 'codemirror/addon/lint/lint.css'
6 |
7 | import {ParseError} from './position'
8 | import './mode'
9 |
10 | export function create(
11 | node: HTMLElement
12 | ) {
13 | const editor = CM(node, {
14 | value: '1 + (2 + 3) / 4',
15 | mode: 'myMode',
16 | gutters: ['CodeMirror-lint-markers'],
17 | lint: true,
18 | lineWrapping: true
19 | });
20 |
21 | CM.registerHelper('lint', 'myMode', () => {
22 | const parseErrors: ParseError[] = editor.getOption('script-errors') || []
23 | return parseErrors.map((e) => ({
24 | from: CM.Pos(e.position.first_line - 1, e.position.first_column),
25 | to: CM.Pos(e.position.last_line - 1, e.position.last_column),
26 | message: e.message,
27 | severity: 'error'
28 | }))
29 | })
30 |
31 | return editor;
32 | }
33 |
--------------------------------------------------------------------------------
/src/position.ts:
--------------------------------------------------------------------------------
1 | import { Token } from './lexer';
2 |
3 | export type Position = {
4 | first_line: number;
5 | first_column: number;
6 | last_line: number;
7 | last_column: number;
8 | };
9 |
10 | export function token2pos(token: Token): Position {
11 | return {
12 | first_line: token.line,
13 | last_line: token.line,
14 | first_column: token.first_column,
15 | last_column: token.last_column
16 | }
17 | }
18 |
19 | export function join(start: Position, end: Position) {
20 | return {
21 | first_line: start.first_line,
22 | last_line: end.last_line,
23 | first_column: start.first_column,
24 | last_column: end.last_column
25 | };
26 | }
27 |
28 | // note, extending Error in the browser is problematic
29 | // https://stackoverflow.com/questions/33870684/why-doesnt-instanceof-work-on-instances-of-error-subclasses-under-babel-node
30 | export class ParseError {
31 | constructor(public message: string, public position: Position) {}
32 | }
33 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Desmos
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/src/tokenstream.ts:
--------------------------------------------------------------------------------
1 | import {Token, TokenType, getTokens} from './lexer';
2 | import {ParseError, token2pos} from './position';
3 |
4 | export class TokenStream {
5 | tokens: Token[];
6 | pos: number = 0;
7 |
8 | constructor(text: string) {
9 | this.tokens = getTokens(text).filter(t => t.type != 'COMMENT');
10 | }
11 |
12 | consume(): Token | undefined {
13 | const token = this.tokens[this.pos];
14 | if (token) {
15 | this.pos += 1;
16 | }
17 | return token;
18 | }
19 |
20 | peek(): Token | undefined {
21 | return this.tokens[this.pos];
22 | }
23 |
24 | last(): Token {
25 | return this.tokens[this.pos - 1];
26 | }
27 |
28 | expectToken(expectedType: T): Token {
29 | const actual = this.consume();
30 |
31 | if (!actual) {
32 | throw new ParseError(
33 | `Expected "${expectedType}" token but found none.`,
34 | token2pos(this.last()),
35 | );
36 | }
37 |
38 | if (actual.type != expectedType) {
39 | throw new ParseError(
40 | `Expected "${expectedType}" token type but found "${actual.type}".`,
41 | token2pos(actual),
42 | );
43 | }
44 |
45 | return actual as Token;
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/src/mode.ts:
--------------------------------------------------------------------------------
1 | // Code Mirror syntax-highlighing mode
2 | import { getToken, State } from './lexer';
3 |
4 | import * as CM from 'codemirror';
5 |
6 | type TokenType =
7 | | 'operator'
8 | | 'bracket'
9 | | 'keyword'
10 | | 'variable'
11 | | 'number'
12 | | 'comment'
13 | | 'string'
14 | | 'error'
15 |
16 |
17 | export function MakeMode(_config: CodeMirror.EditorConfiguration, _modeOptions?: any): CM.Mode {
18 | return {
19 | token: (
20 | stream: CM.StringStream,
21 | state: State
22 | ): TokenType | null => {
23 | const token = getToken(stream, state);
24 | if (!token) {
25 | return null;
26 | }
27 |
28 | const type = token.type;
29 | switch (type) {
30 | case 'NUMBER':
31 | return 'number';
32 |
33 | case '(':
34 | case ')':
35 | return 'bracket';
36 |
37 | case '+':
38 | case '-':
39 | case '*':
40 | case '/':
41 | case '^':
42 | return 'operator';
43 |
44 | case 'COMMENT':
45 | return 'comment';
46 |
47 | case 'ERROR':
48 | return 'error';
49 |
50 | default:
51 | return assertUnreachable(type);
52 | }
53 | },
54 | startState: () => ({
55 | stack: ['default' as 'default'],
56 | line: 0
57 | })
58 | };
59 | }
60 |
61 | function assertUnreachable(x: never): never {
62 | throw new Error(`Didn't expect to get here ${x}`);
63 | }
64 |
65 | CM.defineMode('myMode', MakeMode);
66 |
--------------------------------------------------------------------------------
/src/parselet.ts:
--------------------------------------------------------------------------------
1 | import { TokenStream } from './tokenstream';
2 | import { Token, TokenType, BinaryOperationTokenType} from './lexer';
3 | import * as AST from './ast';
4 | import { AbstractParser } from './parser';
5 | import {token2pos, join} from './position'
6 |
7 | export interface InitialParselet {
8 | parse(parser: AbstractParser, tokens: TokenStream, token: Token): AST.Node;
9 | }
10 |
11 | export class NumberParselet implements InitialParselet {
12 | parse(_parser: AbstractParser, _tokens: TokenStream, token: Token) {
13 | return {
14 | type: 'Number' as 'Number',
15 | value: parseFloat(token.text),
16 | pos: token2pos(token)
17 | }
18 | }
19 | }
20 |
21 | export class BooleanParselet implements InitialParselet {
22 | constructor(private value: boolean) {}
23 | parse(_parser: AbstractParser, _tokens: TokenStream, token: Token) {
24 | return {
25 | type: 'Boolean' as 'Boolean',
26 | value: this.value,
27 | pos: token2pos(token)
28 | }
29 | }
30 | }
31 |
32 | export class ParenParselet implements InitialParselet {
33 | parse(parser: AbstractParser, tokens: TokenStream, _token: Token) {
34 | const exp = parser.parse(tokens, 0);
35 | tokens.expectToken(')');
36 |
37 | return exp;
38 | }
39 | }
40 |
41 | export abstract class ConsequentParselet {
42 | constructor(
43 | readonly tokenType: TokenType,
44 | readonly associativity: 'left' | 'right'
45 | ) {}
46 | abstract parse(
47 | parser: AbstractParser,
48 | tokens: TokenStream,
49 | left: AST.Node,
50 | token: Token
51 | ): AST.Node;
52 | }
53 |
54 | export class BinaryOperatorParselet extends ConsequentParselet {
55 | constructor(
56 | public tokenType: BinaryOperationTokenType,
57 | associativity: 'left' | 'right'
58 | ) {
59 | super(tokenType, associativity);
60 | }
61 |
62 | parse(
63 | parser: AbstractParser,
64 | tokens: TokenStream,
65 | left: AST.Node,
66 | token: Token
67 | ): AST.Node {
68 | const bindingPower = parser.bindingPower(token);
69 |
70 | const right = parser.parse(
71 | tokens,
72 | this.associativity == 'left' ? bindingPower : bindingPower - 1
73 | );
74 |
75 | return {
76 | type: 'BinaryOperation' as 'BinaryOperation',
77 | operator: this.tokenType,
78 | left,
79 | right,
80 | pos: join(left.pos, token2pos(tokens.last()))
81 | }
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/src/lexer.ts:
--------------------------------------------------------------------------------
1 | import StringStream from './StringStream';
2 |
3 | export function getTokens(text: string): Token[] {
4 | const tokens: Token[] = [];
5 | const state: State = {line: 1, stack: ['default']};
6 |
7 | for (const line of text.split('\n')) {
8 | const stream = new StringStream(line);
9 | while (!stream.eol()) {
10 | const token = getToken(stream, state);
11 | if (token != undefined) {
12 | tokens.push(token);
13 | }
14 |
15 | if (stream.start == stream.pos) {
16 | throw new Error(
17 | `getToken failed to advance stream at position ${
18 | stream.pos
19 | } in string ${stream.string}`,
20 | );
21 | }
22 | stream.start = stream.pos;
23 | }
24 |
25 | state.line += 1;
26 | }
27 |
28 | return tokens;
29 | }
30 |
31 | export function getToken(
32 | stream: StringStream,
33 | state: State,
34 | ): Token | undefined {
35 | //Built for codeMirror streams API
36 | //State is a stack of states
37 | switch (state.stack[state.stack.length - 1]) {
38 | default:
39 | return getDefaultToken(stream, state);
40 | }
41 | }
42 |
43 | function makeEmit(stream: StringStream, state: State) {
44 | return function emitToken(type: TokenType): Token {
45 | return {
46 | type,
47 | first_column: stream.start,
48 | last_column: stream.pos,
49 | line: state.line,
50 | text: stream.current(),
51 | };
52 | };
53 | }
54 |
55 | function getDefaultToken(
56 | stream: StringStream,
57 | state: State,
58 | ): Token | undefined {
59 | const emitToken = makeEmit(stream, state);
60 | if (stream.eatSpace()) {
61 | // skip whitespace
62 | return undefined;
63 | }
64 |
65 | if (stream.match(/\+/)) {
66 | return emitToken('+');
67 | }
68 |
69 | if (stream.match(/\-/)) {
70 | return emitToken('-');
71 | }
72 |
73 | if (stream.match(/\*/)) {
74 | return emitToken('*');
75 | }
76 |
77 | if (stream.match(/\//)) {
78 | return emitToken('/');
79 | }
80 |
81 | if (stream.match(/\^/)) {
82 | return emitToken('^');
83 | }
84 |
85 | if (stream.match(/\(/)) {
86 | return emitToken('(');
87 | }
88 |
89 | if (stream.match(/\)/)) {
90 | return emitToken(')');
91 | }
92 |
93 | if (stream.match(/-?[0-9]+(\.[0-9]+)?/)) {
94 | return emitToken('NUMBER');
95 | }
96 |
97 | if (stream.match(/#/)) {
98 | if (!stream.match(/\n/)) {
99 | // comment lasts till end of line
100 | stream.match(/.*/); // if no eol encountered, comment lasts till end of file
101 | }
102 | return emitToken('COMMENT');
103 | }
104 |
105 | stream.next();
106 | return emitToken('ERROR');
107 | }
108 |
109 | export type BinaryOperationTokenType =
110 | | '+'
111 | | '-'
112 | | '*'
113 | | '/'
114 | | '^'
115 |
116 | export type TokenType =
117 | | BinaryOperationTokenType
118 | | 'NUMBER'
119 | | '('
120 | | ')'
121 | | 'COMMENT'
122 | | 'ERROR';
123 |
124 | export interface Token {
125 | type: T;
126 | text: string;
127 | line: number;
128 | first_column: number;
129 | last_column: number;
130 | }
131 |
132 | type Mode = 'default';
133 |
134 | export interface State {
135 | stack: Mode[];
136 | line: number;
137 | }
138 |
--------------------------------------------------------------------------------
/src/parser.ts:
--------------------------------------------------------------------------------
1 | import * as Parselet from './parselet';
2 | import {Token, TokenType} from './lexer';
3 | import {TokenStream} from './tokenstream';
4 | import {ParseError, token2pos} from './position';
5 | import * as AST from './ast';
6 |
7 | export function parse(text: string): {nodes: AST.Node[]; errors: ParseError[]} {
8 | const nodes: AST.Node[] = [];
9 |
10 | const tokens = new TokenStream(text);
11 | const parser = new Parser();
12 | while (tokens.peek()) {
13 | try {
14 | nodes.push(parser.parse(tokens, 0));
15 | } catch (e) {
16 | return {
17 | nodes,
18 | errors: [e],
19 | };
20 | }
21 | }
22 |
23 | return {nodes, errors: []};
24 | }
25 |
26 | export abstract class AbstractParser {
27 | public bindingPowers: {[tokenType in TokenType]: number};
28 |
29 | protected abstract initialMap(): Partial<
30 | {[K in TokenType]: Parselet.InitialParselet}
31 | >;
32 | protected abstract consequentMap(): Partial<
33 | {[K in TokenType]: Parselet.ConsequentParselet}
34 | >;
35 | protected abstract bindingClasses(): TokenType[][];
36 |
37 | constructor() {
38 | this.bindingPowers = {} as any;
39 |
40 | const bindingClasses = this.bindingClasses();
41 | for (let i = 0; i < bindingClasses.length; i++) {
42 | for (const tokenType of bindingClasses[i]) {
43 | this.bindingPowers[tokenType] = 10 * i + 9;
44 | }
45 | }
46 |
47 | for (const tokenType of Object.keys(this.consequentMap) as TokenType[]) {
48 | if (this.bindingPowers[tokenType] == undefined) {
49 | throw new Error(
50 | `Token ${tokenType} defined in consequentMap has no associated binding power.
51 | Make sure it is also listed in bindingClasses.`,
52 | );
53 | }
54 | }
55 | }
56 |
57 | bindingPower(token: Token): number {
58 | if (this.bindingPowers[token.type] != undefined) {
59 | return this.bindingPowers[token.type];
60 | } else {
61 | throw new ParseError(
62 | `Unexpected token type ${token.type}.`,
63 | token2pos(token),
64 | );
65 | }
66 | }
67 |
68 | parse(tokens: TokenStream, currentBindingPower: number): AST.Node {
69 | const token = tokens.consume();
70 | if (!token) {
71 | throw new ParseError(
72 | `Unexpected end of tokens.`,
73 | token2pos(tokens.last()),
74 | );
75 | }
76 |
77 | const initialParselet = this.initialMap()[token.type];
78 |
79 | if (!initialParselet) {
80 | throw new ParseError(
81 | `Unexpected token type ${token.type}`,
82 | token2pos(token),
83 | );
84 | }
85 |
86 | let left = initialParselet.parse(this, tokens, token);
87 |
88 | while (true) {
89 | const next = tokens.peek();
90 | if (!next) {
91 | break;
92 | }
93 |
94 | const consequentParselet = this.consequentMap()[next.type];
95 |
96 | if (!consequentParselet) {
97 | break;
98 | }
99 |
100 | if (currentBindingPower >= this.bindingPower(next)) {
101 | break;
102 | }
103 |
104 | tokens.consume();
105 | left = consequentParselet.parse(this, tokens, left, next);
106 | }
107 |
108 | return left;
109 | }
110 | }
111 |
112 | export class Parser extends AbstractParser {
113 | initialMap() {
114 | return {
115 | NUMBER: new Parselet.NumberParselet(),
116 | '(': new Parselet.ParenParselet(),
117 | };
118 | }
119 |
120 | consequentMap() {
121 | return {
122 | '+': new Parselet.BinaryOperatorParselet('+', 'left'),
123 | '-': new Parselet.BinaryOperatorParselet('-', 'left'),
124 | '*': new Parselet.BinaryOperatorParselet('*', 'left'),
125 | '/': new Parselet.BinaryOperatorParselet('/', 'left'),
126 | '^': new Parselet.BinaryOperatorParselet('^', 'right'),
127 | };
128 | }
129 |
130 | bindingClasses() {
131 | const classes: TokenType[][] = [['+', '-'], ['*', '/'], ['^']];
132 | return classes;
133 | }
134 | }
135 |
--------------------------------------------------------------------------------