├── LICENSE
├── README.md
└── expr_parser
    ├── __init__.py
    ├── lexer.py
    └── parser.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2016-2017 Percolate
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # pratt-parser
2 | A Pratt Parser implementation in Python.
3 | 
4 | This code accompanies a talk given by Percolate, at the April 2017 SF Python Meetup. The presentation slides are [also available for download](https://www.slideshare.net/percolate/pratt-parser-in-python).
5 | 


--------------------------------------------------------------------------------
/expr_parser/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/percolate/pratt-parser/ac92d22ec023bfa1cf936d8c1f6486cead4dc13e/expr_parser/__init__.py


--------------------------------------------------------------------------------
/expr_parser/lexer.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A simple demo lexer for a pratt parser
 3 | """
 4 | from __future__ import absolute_import, unicode_literals
 5 | 
 6 | import re
 7 | 
 8 | TOKENS = (
 9 |     ('ws', r'\s+'),
10 |     ('name', r'[a-z][\w_]*'),
11 |     ('infix', r'[+\-*/\^]'),
12 |     ('punct', r'[\(\),]'),
13 |     ('number', r'(:?\d*\.)?\d+'),
14 | )
15 | 
16 | 
17 | TOKEN_RE = '|'.join(
18 |     "(?P<%s>%s)" % t for t in TOKENS
19 | )
20 | 
21 | LEX_RE = re.compile(TOKEN_RE, re.UNICODE | re.VERBOSE | re.IGNORECASE)
22 | 
23 | 
24 | class LexerException(Exception):
25 |     pass
26 | 
27 | 
28 | class Token(object):
29 |     def __init__(self, token_type, value, pos):
30 |         self.token_type = token_type
31 |         self.value = value
32 |         self.pos = pos
33 | 
34 |     def __repr__(self):
35 |         return "%s('%s', %d)" % (self.token_type, self.value, self.pos)
36 | 
37 |     def __str__(self):
38 |         return repr(self)
39 | 
40 | 
41 | def lex(source, pat=LEX_RE):
42 |     i = 0
43 | 
44 |     def error():
45 |         raise LexerException(
46 |             "Unexpected character at position %d: `%s`" % (i, source[i])
47 |         )
48 |     for m in pat.finditer(source):
49 |         pos = m.start()
50 |         if pos > i:
51 |             error()
52 |         i = m.end()
53 |         name = m.lastgroup
54 |         if name == "ws":
55 |             continue
56 |         else:
57 |             token_type = "<%s>" % name
58 |             t = Token(token_type, m.group(0), pos)
59 |         yield t
60 | 
61 |     if i < len(source):
62 |         error()
63 | 


--------------------------------------------------------------------------------
/expr_parser/parser.py:
--------------------------------------------------------------------------------
  1 | """
  2 | a Pratt parser (and interpreter) for simple arithmetic expressions
  3 | """
  4 | from __future__ import unicode_literals, absolute_import
  5 | 
  6 | import operator
  7 | import math
  8 | from . import lexer
  9 | 
 10 | OP_REGISTRY = {
 11 |     "+": operator.add,
 12 |     "-": operator.sub,
 13 |     "*": operator.mul,
 14 |     "/": operator.div,
 15 |     "^": operator.pow,
 16 |     "sqrt": math.sqrt,
 17 |     "log": math.log,
 18 |     "log2": lambda x: math.log(x, 2)
 19 | }
 20 | 
 21 | 
 22 | class ParserError(Exception):
 23 |     pass
 24 | 
 25 | 
 26 | class Symbol(object):
 27 |     """Base class for all nodes"""
 28 |     id = None
 29 |     lbp = 0
 30 | 
 31 |     def __init__(self, parser, value=None):
 32 |         self.parser = parser
 33 |         self.value = value or self.id
 34 |         self.first = None
 35 |         self.second = None
 36 | 
 37 |     def nud(self):
 38 |         raise ParserError("Symbol action undefined for `%s'" % self.value)
 39 | 
 40 |     def led(self, left):
 41 |         raise ParserError("Infix action undefined for `%s'" % self.value)
 42 | 
 43 |     def eval(self, doc):
 44 |         raise ParserError("Unimplemented")
 45 | 
 46 |     def __repr__(self):
 47 |         return "<'%s'>" % self.value
 48 | 
 49 | 
 50 | class Literal(Symbol):
 51 |     """Simple literal (a number or a variable/function name)
 52 |        just produces itself"""
 53 |     def nud(self):
 54 |         return self
 55 | 
 56 | 
 57 | class Infix(Symbol):
 58 |     """Infix operator"""
 59 |     rightAssoc = False
 60 | 
 61 |     def led(self, left):
 62 |         self.first = left
 63 |         rbp = self.lbp - int(self.rightAssoc)
 64 |         self.second = self.parser.expression(rbp)
 65 |         return self
 66 | 
 67 |     def eval(self, doc):
 68 |         return OP_REGISTRY[self.value](
 69 |             self.first.eval(doc),
 70 |             self.second.eval(doc)
 71 |         )
 72 | 
 73 |     def __repr__(self):
 74 |         return "<'%s'>(%s, %s)" % (
 75 |             self.value, repr(self.first), repr(self.second)
 76 |         )
 77 | 
 78 | 
 79 | class InfixR(Infix):
 80 |     """Infix (right associative) operator"""
 81 |     rightAssoc = True
 82 | 
 83 | 
 84 | class Prefix(Symbol):
 85 |     """Prefix operator.
 86 |        For the sake of simplicity has fixed right binding power"""
 87 |     def nud(self):
 88 |         self.first = self.parser.expression(80)
 89 |         return self
 90 | 
 91 |     def eval(self, doc):
 92 |         return OP_REGISTRY[self.value](self.first)
 93 | 
 94 |     def __repr__(self):
 95 |         return "<'%s'>(%s)" % (
 96 |             self.value, repr(self.first)
 97 |         )
 98 | 
 99 | 
100 | class Parser(object):
101 |     """
102 |     Main parser class. Contains both the grammar definition
103 |     and a pointer to the current token stream
104 |     """
105 |     def __init__(self, lex=lexer.lex):
106 |         self.lex = lex
107 |         self.symbol_table = {}
108 |         self.define("<end>")
109 | 
110 |         self.tokens = iter(())
111 |         self.token = None
112 | 
113 |     def define(self, sid, bp=0, symbol_class=Symbol):
114 |         symbol_table = self.symbol_table
115 |         sym = symbol_table[sid] = type(
116 |             symbol_class.__name__,
117 |             (symbol_class,),
118 |             {'id': sid, 'lbp': bp}
119 |         )
120 | 
121 |         def wrapper(val):
122 |             val.id = sid
123 |             val.lbp = sym.lbp
124 |             symbol_table[sid] = val
125 |             return val
126 | 
127 |         return wrapper
128 | 
129 |     def expression(self, rbp):
130 |         tok = self.token
131 |         self.advance()
132 |         left = tok.nud()
133 |         while rbp < self.token.lbp:
134 |             tok = self.token
135 |             self.advance()
136 |             left = tok.led(left)
137 |         return left
138 | 
139 |     def advance(self, value=None):
140 |         tok = self.token
141 |         if value and value not in (tok.value, tok.id):
142 |             raise ParserError(
143 |                 "Expected `%s'; got `%s' instead" % (value, tok.value))
144 |         try:
145 |             tok = self.tokens.next()
146 |             symbol_table = self.symbol_table
147 |             # first look up symbol's value
148 |             if tok.value in symbol_table:
149 |                 sym = symbol_table[tok.value]
150 |             elif tok.token_type in symbol_table:
151 |                 # then symbol's type
152 |                 sym = symbol_table[tok.token_type]
153 |             else:
154 |                 raise ParserError("Undefined token %s" % repr(tok))
155 |             self.token = sym(self, tok.value)
156 |         except StopIteration:
157 |             self.token = self.symbol_table["<end>"](self)
158 | 
159 |         return self.token
160 | 
161 |     def parse(self, source):
162 |         try:
163 |             self.tokens = self.lex(source)
164 |             self.advance()
165 |             return self.expression(0)
166 |         finally:
167 |             self.tokens = iter(())
168 |             self.token = None
169 | 
170 | 
171 | """
172 | Grammar definition:
173 | 
174 | expression ::= mul-expr ( ( '+' | '-' ) mul-expr )*
175 | mul-expr ::= pow-expr ( ( '*' | '/' ) pow-expr )*
176 | pow-expr ::= prefix-expr ['^' pow-expr]
177 | prefix-expr ::= [ '-' ] primary
178 | primary ::= '(' expr ')' | number | name [ '(' expr ( ',' expr )* ')' ]
179 | """
180 | 
181 | expr = Parser()
182 | # just to leave ourselves some space, start with 50
183 | expr.define("+", 50, Infix)
184 | expr.define("*", 60, Infix)
185 | expr.define("/", 60, Infix)
186 | expr.define("^", 70, InfixR)
187 | 
188 | 
189 | @expr.define("<number>")
190 | class Number(Literal):
191 |     """Only defined for the sake of eval"""
192 |     def eval(self, doc):
193 |         return float(self.value)
194 | 
195 | 
196 | @expr.define("<name>")
197 | class Reference(Literal):
198 |     """Only defined for the sake of eval"""
199 |     def eval(self, doc):
200 |         try:
201 |             return doc[self.value]
202 |         except KeyError:
203 |             raise ParserError("Missing reference '%s'" % self.value)
204 | 
205 | 
206 | @expr.define("-", 50)
207 | class Minus(Infix, Prefix):
208 |     """This combines both Prefix' nud and Infix' led"""
209 |     def eval(self, doc):
210 |         if self.second is None:
211 |             return operator.neg(self.first.eval(doc))
212 |         return super(Minus, self).eval(doc)
213 | 
214 | expr.define(",")
215 | expr.define(")")
216 | 
217 | 
218 | @expr.define("(", 90)
219 | class FunctionCall(Symbol):
220 |     """Defining both function application and parenthesized expression"""
221 |     def nud(self):
222 |         p = self.parser
223 |         e = p.expression(0)
224 |         p.advance(")")
225 |         return e
226 | 
227 |     def led(self, left):
228 |         self.first = left
229 |         args = self.second = []
230 |         p = self.parser
231 |         while p.token.value != ")":
232 |             args.append(p.expression(0))
233 |             if p.token.value != ",":
234 |                 break
235 |             p.advance(",")
236 |         p.advance(")")
237 |         return self
238 | 
239 |     def __repr__(self):
240 |         return "<Call:'%s'>(%s)>" % (
241 |             self.first.value,
242 |             ', '.join(map(repr, self.second))
243 |         )
244 | 
245 |     def eval(self, doc):
246 |         try:
247 |             return OP_REGISTRY[self.first.value](
248 |                 *(val.eval(doc) for val in self.second)
249 |             )
250 |         except KeyError as e:
251 |             raise ParserError("Invalid function '%s'" % e.args[0])
252 | 
253 | 
254 | 


--------------------------------------------------------------------------------