├── LICENSE ├── README.md └── expr_parser ├── __init__.py ├── lexer.py └── parser.py /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016-2017 Percolate 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pratt-parser 2 | A Pratt Parser implementation in Python. 3 | 4 | This code accompanies a talk given by Percolate, at the April 2017 SF Python Meetup. The presentation slides are [also available for download](https://www.slideshare.net/percolate/pratt-parser-in-python). 5 | -------------------------------------------------------------------------------- /expr_parser/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/percolate/pratt-parser/ac92d22ec023bfa1cf936d8c1f6486cead4dc13e/expr_parser/__init__.py -------------------------------------------------------------------------------- /expr_parser/lexer.py: -------------------------------------------------------------------------------- 1 | """ 2 | A simple demo lexer for a pratt parser 3 | """ 4 | from __future__ import absolute_import, unicode_literals 5 | 6 | import re 7 | 8 | TOKENS = ( 9 | ('ws', r'\s+'), 10 | ('name', r'[a-z][\w_]*'), 11 | ('infix', r'[+\-*/\^]'), 12 | ('punct', r'[\(\),]'), 13 | ('number', r'(:?\d*\.)?\d+'), 14 | ) 15 | 16 | 17 | TOKEN_RE = '|'.join( 18 | "(?P<%s>%s)" % t for t in TOKENS 19 | ) 20 | 21 | LEX_RE = re.compile(TOKEN_RE, re.UNICODE | re.VERBOSE | re.IGNORECASE) 22 | 23 | 24 | class LexerException(Exception): 25 | pass 26 | 27 | 28 | class Token(object): 29 | def __init__(self, token_type, value, pos): 30 | self.token_type = token_type 31 | self.value = value 32 | self.pos = pos 33 | 34 | def __repr__(self): 35 | return "%s('%s', %d)" % (self.token_type, self.value, self.pos) 36 | 37 | def __str__(self): 38 | return repr(self) 39 | 40 | 41 | def lex(source, pat=LEX_RE): 42 | i = 0 43 | 44 | def error(): 45 | raise LexerException( 46 | "Unexpected character at position %d: `%s`" % (i, source[i]) 47 | ) 48 | for m in pat.finditer(source): 49 | pos = m.start() 50 | if pos > i: 51 | error() 52 | i = m.end() 53 | name = m.lastgroup 54 | if name == "ws": 55 | continue 56 | else: 57 | token_type = "<%s>" % name 58 | t = Token(token_type, m.group(0), pos) 59 | yield t 60 | 61 | if i < len(source): 62 | error() 63 | -------------------------------------------------------------------------------- /expr_parser/parser.py: -------------------------------------------------------------------------------- 1 | """ 2 | a Pratt parser (and interpreter) for simple arithmetic expressions 3 | """ 4 | from __future__ import unicode_literals, absolute_import 5 | 6 | import operator 7 | import math 8 | from . import lexer 9 | 10 | OP_REGISTRY = { 11 | "+": operator.add, 12 | "-": operator.sub, 13 | "*": operator.mul, 14 | "/": operator.div, 15 | "^": operator.pow, 16 | "sqrt": math.sqrt, 17 | "log": math.log, 18 | "log2": lambda x: math.log(x, 2) 19 | } 20 | 21 | 22 | class ParserError(Exception): 23 | pass 24 | 25 | 26 | class Symbol(object): 27 | """Base class for all nodes""" 28 | id = None 29 | lbp = 0 30 | 31 | def __init__(self, parser, value=None): 32 | self.parser = parser 33 | self.value = value or self.id 34 | self.first = None 35 | self.second = None 36 | 37 | def nud(self): 38 | raise ParserError("Symbol action undefined for `%s'" % self.value) 39 | 40 | def led(self, left): 41 | raise ParserError("Infix action undefined for `%s'" % self.value) 42 | 43 | def eval(self, doc): 44 | raise ParserError("Unimplemented") 45 | 46 | def __repr__(self): 47 | return "<'%s'>" % self.value 48 | 49 | 50 | class Literal(Symbol): 51 | """Simple literal (a number or a variable/function name) 52 | just produces itself""" 53 | def nud(self): 54 | return self 55 | 56 | 57 | class Infix(Symbol): 58 | """Infix operator""" 59 | rightAssoc = False 60 | 61 | def led(self, left): 62 | self.first = left 63 | rbp = self.lbp - int(self.rightAssoc) 64 | self.second = self.parser.expression(rbp) 65 | return self 66 | 67 | def eval(self, doc): 68 | return OP_REGISTRY[self.value]( 69 | self.first.eval(doc), 70 | self.second.eval(doc) 71 | ) 72 | 73 | def __repr__(self): 74 | return "<'%s'>(%s, %s)" % ( 75 | self.value, repr(self.first), repr(self.second) 76 | ) 77 | 78 | 79 | class InfixR(Infix): 80 | """Infix (right associative) operator""" 81 | rightAssoc = True 82 | 83 | 84 | class Prefix(Symbol): 85 | """Prefix operator. 86 | For the sake of simplicity has fixed right binding power""" 87 | def nud(self): 88 | self.first = self.parser.expression(80) 89 | return self 90 | 91 | def eval(self, doc): 92 | return OP_REGISTRY[self.value](self.first) 93 | 94 | def __repr__(self): 95 | return "<'%s'>(%s)" % ( 96 | self.value, repr(self.first) 97 | ) 98 | 99 | 100 | class Parser(object): 101 | """ 102 | Main parser class. Contains both the grammar definition 103 | and a pointer to the current token stream 104 | """ 105 | def __init__(self, lex=lexer.lex): 106 | self.lex = lex 107 | self.symbol_table = {} 108 | self.define("") 109 | 110 | self.tokens = iter(()) 111 | self.token = None 112 | 113 | def define(self, sid, bp=0, symbol_class=Symbol): 114 | symbol_table = self.symbol_table 115 | sym = symbol_table[sid] = type( 116 | symbol_class.__name__, 117 | (symbol_class,), 118 | {'id': sid, 'lbp': bp} 119 | ) 120 | 121 | def wrapper(val): 122 | val.id = sid 123 | val.lbp = sym.lbp 124 | symbol_table[sid] = val 125 | return val 126 | 127 | return wrapper 128 | 129 | def expression(self, rbp): 130 | tok = self.token 131 | self.advance() 132 | left = tok.nud() 133 | while rbp < self.token.lbp: 134 | tok = self.token 135 | self.advance() 136 | left = tok.led(left) 137 | return left 138 | 139 | def advance(self, value=None): 140 | tok = self.token 141 | if value and value not in (tok.value, tok.id): 142 | raise ParserError( 143 | "Expected `%s'; got `%s' instead" % (value, tok.value)) 144 | try: 145 | tok = self.tokens.next() 146 | symbol_table = self.symbol_table 147 | # first look up symbol's value 148 | if tok.value in symbol_table: 149 | sym = symbol_table[tok.value] 150 | elif tok.token_type in symbol_table: 151 | # then symbol's type 152 | sym = symbol_table[tok.token_type] 153 | else: 154 | raise ParserError("Undefined token %s" % repr(tok)) 155 | self.token = sym(self, tok.value) 156 | except StopIteration: 157 | self.token = self.symbol_table[""](self) 158 | 159 | return self.token 160 | 161 | def parse(self, source): 162 | try: 163 | self.tokens = self.lex(source) 164 | self.advance() 165 | return self.expression(0) 166 | finally: 167 | self.tokens = iter(()) 168 | self.token = None 169 | 170 | 171 | """ 172 | Grammar definition: 173 | 174 | expression ::= mul-expr ( ( '+' | '-' ) mul-expr )* 175 | mul-expr ::= pow-expr ( ( '*' | '/' ) pow-expr )* 176 | pow-expr ::= prefix-expr ['^' pow-expr] 177 | prefix-expr ::= [ '-' ] primary 178 | primary ::= '(' expr ')' | number | name [ '(' expr ( ',' expr )* ')' ] 179 | """ 180 | 181 | expr = Parser() 182 | # just to leave ourselves some space, start with 50 183 | expr.define("+", 50, Infix) 184 | expr.define("*", 60, Infix) 185 | expr.define("/", 60, Infix) 186 | expr.define("^", 70, InfixR) 187 | 188 | 189 | @expr.define("") 190 | class Number(Literal): 191 | """Only defined for the sake of eval""" 192 | def eval(self, doc): 193 | return float(self.value) 194 | 195 | 196 | @expr.define("") 197 | class Reference(Literal): 198 | """Only defined for the sake of eval""" 199 | def eval(self, doc): 200 | try: 201 | return doc[self.value] 202 | except KeyError: 203 | raise ParserError("Missing reference '%s'" % self.value) 204 | 205 | 206 | @expr.define("-", 50) 207 | class Minus(Infix, Prefix): 208 | """This combines both Prefix' nud and Infix' led""" 209 | def eval(self, doc): 210 | if self.second is None: 211 | return operator.neg(self.first.eval(doc)) 212 | return super(Minus, self).eval(doc) 213 | 214 | expr.define(",") 215 | expr.define(")") 216 | 217 | 218 | @expr.define("(", 90) 219 | class FunctionCall(Symbol): 220 | """Defining both function application and parenthesized expression""" 221 | def nud(self): 222 | p = self.parser 223 | e = p.expression(0) 224 | p.advance(")") 225 | return e 226 | 227 | def led(self, left): 228 | self.first = left 229 | args = self.second = [] 230 | p = self.parser 231 | while p.token.value != ")": 232 | args.append(p.expression(0)) 233 | if p.token.value != ",": 234 | break 235 | p.advance(",") 236 | p.advance(")") 237 | return self 238 | 239 | def __repr__(self): 240 | return "(%s)>" % ( 241 | self.first.value, 242 | ', '.join(map(repr, self.second)) 243 | ) 244 | 245 | def eval(self, doc): 246 | try: 247 | return OP_REGISTRY[self.first.value]( 248 | *(val.eval(doc) for val in self.second) 249 | ) 250 | except KeyError as e: 251 | raise ParserError("Invalid function '%s'" % e.args[0]) 252 | 253 | 254 | --------------------------------------------------------------------------------