├── .gitignore ├── CHANGES.md ├── LICENSE ├── README.md ├── examples ├── allowing_compiler.py └── sqlalchemy_compiler.py ├── expressions ├── __init__.py ├── compat.py ├── compiler.py ├── grammar.ebnf └── grammar.py ├── generate_parser.sh ├── setup.py └── tests ├── __init__.py └── test_expressions.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | 21 | # Installer logs 22 | pip-log.txt 23 | 24 | # Unit test / coverage reports 25 | .coverage 26 | .tox 27 | nosetests.xml 28 | 29 | # Translations 30 | *.mo 31 | 32 | # Mr Developer 33 | .mr.developer.cfg 34 | .project 35 | .pydevproject 36 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | ++++++++++++++++++++++ 2 | Changes in Expressions 3 | ++++++++++++++++++++++ 4 | 5 | Version 0.1.2 6 | ============= 7 | 8 | * added chance to finalize the compilation object 9 | * back-ported to Python 2.x 10 | 11 | Version 0.1.1 12 | ============= 13 | 14 | New features 15 | ------------ 16 | 17 | * new base class `Dialect` for syntax dialects – will contain list of 18 | operators and other dialect properties 19 | * added `register_dialect`, `get_dialect` and `unregister_dialect` 20 | 21 | 22 | Changes 23 | ------- 24 | 25 | * pass dialect by name, not by structure 26 | 27 | Fixes 28 | ----- 29 | 30 | * fixed compilation of function calls 31 | 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013 Stefan Urbanek 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Expressions 2 | =========== 3 | 4 | Arithmetic expression parser library. Embed customized expression evaluation 5 | into your application or library. Example uses: 6 | 7 | * Safely process an expression entered through a web application, 8 | for example some formula to be plotted. The library allows safe translation 9 | of such expression without exposing any application's internals 10 | * precompiler that checks for allowed and denied identifiers in an expression 11 | * have a common expression language through your application regardless of the 12 | backend languages 13 | * compile arithmetic expression to any other expression tree (semantic), for 14 | example [SQLAlchemy](http://docs.sqlalchemy.org/en/rel_0_7/core/expression_api.html) expression objects 15 | 16 | 17 | Part of the [Data Brewery](http://databrewery.org) 18 | 19 | Installation 20 | ------------ 21 | 22 | Install using pip: 23 | 24 | pip install expressions 25 | 26 | Expressions sources are available at [Github](https://github.com/DataBrewery/expressions) 27 | 28 | Works with Python 2.7 and Python 3.3. Uses [Grako](https://bitbucket.org/apalala/grako). 29 | 30 | Quick Start 31 | ----------- 32 | 33 | ```python 34 | from expressions import Compiler 35 | 36 | compiler = Compiler() 37 | result = compiler.compile("min(a, b) * 2") 38 | ``` 39 | 40 | Result from the default (non-extended) compiler will be abstract semantic 41 | graph containing nodes *Literal*, *Variable*, *Function*, *Binary* and *Unary* 42 | operators. Subclasses of `Compiler` can yield different outputs by 43 | implementing just few simple methods which represent semantic graph nodes 44 | (same as the objects). 45 | 46 | Example 47 | ------- 48 | 49 | Here is an example compiler that allows only certain variables. The list of 50 | allowed variables is provided in the compilation context: 51 | 52 | ```python 53 | from expressions import Compiler, ExpressionError 54 | 55 | class AllowingCompiler(Compiler): 56 | def compile_literal(self, context, literal): 57 | return repr(literal) 58 | 59 | def compile_variable(self, context, variable): 60 | if context and variable not in context: 61 | raise ExpressionError("Variable %s is not allowed" % variable) 62 | 63 | return variable 64 | 65 | def compile_binary(self, context, operator, op1, op2): 66 | return "(%s %s %s)" % (op1, operator, op2) 67 | 68 | def compile_function(self, context, function, args): 69 | arglist = ", " % args 70 | return "%s(%s)" % (function, arglist) 71 | ``` 72 | 73 | Allow only `a` and `b` variables: 74 | 75 | compiler = AllowingCompiler() 76 | 77 | allowed_variables = ["a", "b"] 78 | 79 | Try to compile and execute the expression: 80 | 81 | result = compiler.compile("a + b", allowed_variables) 82 | 83 | a = 1 84 | b = 1 85 | print(eval(result)) 86 | 87 | This will fail, because only `a` and `b` are allowed, `c` is not: 88 | 89 | result = compiler.compile("a + c", allowed_variables) 90 | 91 | 92 | See the examples source directory for more examples such as very simplified 93 | expression-to-SQLAlchemy compiler. 94 | 95 | Syntax 96 | ====== 97 | 98 | Example expressions: 99 | 100 | ```sql 101 | 1 + 1 102 | (a + b) ^ 2 103 | sum(amount) / count() 104 | date.year = 2010 and amount > 10 105 | ``` 106 | 107 | * Binary arithmetic operators: `+`, `-`, `*`, `/`, `%` (modulo), `^` (power) 108 | * Binary comparison operators: `<`, `<=`, `=`, `>=`, `>`, `in`, `is` 109 | * Binary bit-wise operators: `|` (or), `&` (and), `<<` (shift left), `>>` (shift right) 110 | * Binary logical operators: `and`, `or` 111 | * Unary operators: `+`, `-`, `~` (bit-wise not) 112 | 113 | * Function call: `function_name(arg1, arg2, ...)` 114 | 115 | *Variable* and *function* names are either regular identifiers or identifiers 116 | separated by `.`. There is no value dereference and the dot `.` is just 117 | namespace composition operator for variable names. Example variable names: 118 | `amount`, `date.year`, `product.name`. 119 | 120 | The suggested meaning of the operators is based mostly on the 121 | [PostgreSQL operators](http://www.postgresql.org/docs/9.0/static/functions.html) 122 | 123 | Writing a compiler 124 | ================== 125 | 126 | To write a custom compiler subclass a `Compiler` class and implement all of 127 | some of the following methods: 128 | 129 | * *compile_function(context, reference, args)* – compile a function call. The 130 | `reference` is the same kind of object as passed to the 131 | *compile_variable()*, `args` is list of function arguments. Default 132 | implementation returns an object with attributes `reference` and `args` 133 | * *compile_binary(context, operator, left, right)* – compile a binary 134 | operator `operator` with two operands `left` and `right`. Default 135 | implementation returns an object with attributes `operator`, `left` and `right` 136 | * *compile_unary(context, operator, operand)* – compile a unary `operator` with 137 | a single `operand`. Default implementation returns an object with attributes 138 | `operator` and `operand`. 139 | * *compile_variable(context, variable)* – compile a variable reference 140 | `variable` which is an object with properties `name` and `reference`. `name` 141 | is the full variable name (joined with `.`), `reference` is a list of 142 | variable name components. Return value should be either evaluated variable 143 | as a constant or some other useful variable reference. 144 | * *compile_literal(context, literal)* – compile an integer, float or a string 145 | object `literal`. Default implementation just passes the argument. You 146 | rarely need to override this method. 147 | * *finalize(context, object)* – return the final compilation result. 148 | 149 | 150 | When compiling function arguments or operator operands you should check 151 | whether they are literals or instances of a `Variable`. For example: 152 | 153 | ```python 154 | def compile_function(context, reference, args): 155 | # Assume that the context is a dictionary with variables and functions 156 | 157 | values = [] 158 | for arg in args: 159 | if isinstance(arg, Variable): 160 | value = context[arg.name] 161 | else: 162 | value = arg 163 | values.append(value) 164 | 165 | function = context[reference.name] 166 | 167 | return function(*args) 168 | ``` 169 | 170 | Example compiler: Identifier Preprocessor 171 | 172 | The following compiler is included in the library: 173 | 174 | ```python 175 | class IdentifierPreprocessor(Compiler): 176 | def __init__(self): 177 | super(IdentifierPreprocessor, self).__init__() 178 | 179 | self.variables = set() 180 | self.functions = set() 181 | 182 | def compile_variable(self, context, variable): 183 | self.variables.add(variable) 184 | return variable 185 | 186 | def compile_function(self, context, function, args): 187 | self.functions.add(function) 188 | return function 189 | ``` 190 | 191 | Use: 192 | 193 | ```python 194 | >>> preproc = IdentifierPreprocessor() 195 | >>> preproc.compile("a + b") 196 | >>> preproc.compile("sum(amount)") 197 | ``` 198 | 199 | The `preproc.variables` will contain *Variable* objects for `a`, `b` and 200 | `amount`, the `proproc.functions` will contain one *Variable* object `sum`: 201 | 202 | ```python 203 | >>> print(preproc.variables) 204 | {Variable(amount), Variable(b), Variable(a)} 205 | >>> print(preproc.functions) 206 | {Variable(sum)} 207 | ``` 208 | 209 | Note that the *Variable* object represents any named object reference – both 210 | variables and functions. 211 | 212 | Classes 213 | ======= 214 | 215 | The following classes are provided by the library: 216 | 217 | * *Compiler* – core compiler class that generates default structure, 218 | `compile_*` methods can be overriden to generate custom results 219 | * *IdentifierPreprocessor* – a *Compiler* subclass with two attributes 220 | `variables` and `functions` containing list of `Variable` objects collected 221 | from the compiled expression. Can be used for validation or preparation of 222 | variables 223 | 224 | 225 | What Expressions is *not* 226 | ------------------------- 227 | 228 | * This is *not* a Python expression compiler. The grammar is based on very 229 | basic SQL grammar and few other simple SQL grammar features might be added in 230 | the future. There is no SQL compatibility guaranteed though. It is not meant 231 | to be a rich expression, but a small subset of quite common expressions to 232 | allow easy translation to other languages or object structures. Main use is 233 | arithmetic expression support for a modular application with different 234 | backends 235 | 236 | * It is *not* an expression of an object-oriented language – it does not have 237 | access to object attributes – the '.' dot operator is just an attribute name 238 | concatenation. The compiler receives full object reference as a string and 239 | as a list of reference components. 240 | 241 | 242 | License 243 | ------- 244 | 245 | Expressions framework is licensed under the MIT license. 246 | 247 | For more information see the LICENSE file. 248 | 249 | 250 | Author 251 | ------ 252 | 253 | Stefan Urbanek, stefan.urbanek@gmail.com, Twitter: @Stiivi 254 | 255 | 256 | -------------------------------------------------------------------------------- /examples/allowing_compiler.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf8 -*- 2 | from expressions import Compiler 3 | 4 | class AllowingCompiler(Compiler): 5 | def compile_literal(self, context, literal): 6 | return repr(literal) 7 | 8 | def compile_variable(self, context, variable): 9 | if context and variable.name not in context: 10 | raise Exception("Variable '%s' is not allowed" % variable) 11 | 12 | return variable 13 | 14 | def compile_binary(self, context, operator, op1, op2): 15 | return "(%s %s %s)" % (op1, operator, op2) 16 | 17 | def compile_function(self, context, function, args): 18 | arglist = ", " % args 19 | return "%s(%s)" % (function, arglist) 20 | 21 | 22 | allowed_variables = ["a", "b"] 23 | 24 | compiler = AllowingCompiler() 25 | 26 | result = compiler.compile("a + b", allowed_variables) 27 | 28 | a = 1 29 | b = 1 30 | print("Result is {}".format(eval(result))) 31 | 32 | # This will fail, because only a and b are allowed 33 | try: 34 | result = compiler.compile("a + c", allowed_variables) 35 | except Exception as e: 36 | print("Compiler raised an exception (as expected): {}".format(e)) 37 | -------------------------------------------------------------------------------- /examples/sqlalchemy_compiler.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf8 -*- 2 | 3 | from expressions import Compiler 4 | from sqlalchemy import create_engine, MetaData, Table, Integer, Column 5 | from sqlalchemy import sql 6 | 7 | # Contents: 8 | # 9 | # 1. Expression compiler .............. line 18 10 | # 2. Load example data ................ line 52 11 | # 3. Compile and use the expression ... line 70 12 | 13 | 14 | # A simple compiler that generates SQL Alchemy object structures from an 15 | # arithmetic expression. 16 | # 17 | # Compilation context is a statement. Variables in the expression refer to the 18 | # column names. 19 | 20 | class SQLAlchemyExpressionCompiler(Compiler): 21 | def compile_literal(self, context, literal): 22 | """Compile a literal object – we just pass it along and let the 23 | SQLAlchemy functions deal with it""" 24 | return literal 25 | 26 | def compile_variable(self, context, variable): 27 | """Compile a variable – in our case it refers to a column of a 28 | SQL table or a SQL statement. The statement is our context of 29 | compilation.""" 30 | return context.c[variable.name] 31 | 32 | def compile_binary(self, context, operator, op1, op2): 33 | """Return SQLAlchemy object construct using an operator.""" 34 | 35 | if operator == "+": 36 | return op1 + op2 37 | elif operator == "-": 38 | return op1 - op2 39 | elif operator == "*": 40 | return op1 * op2 41 | elif operator == "/": 42 | return op1 / op2 43 | else: 44 | raise SyntaxError("Unknown operator '%s'" % operator) 45 | 46 | # Some data: 47 | data = [ 48 | # id, transaction, amount 49 | [ 1, 10, 100], 50 | [ 2, 20, 150], 51 | [ 3, 30, 200] 52 | ] 53 | 54 | # 2. Create the example data table 55 | 56 | engine = create_engine("sqlite:///") 57 | metadata = MetaData(engine) 58 | 59 | table = Table("Data", metadata, 60 | Column("id", Integer), 61 | Column("transactions", Integer), 62 | Column("amount", Integer) 63 | ) 64 | 65 | table.create() 66 | 67 | # ... and load it with data 68 | for row in data: 69 | engine.execute(table.insert().values(row)) 70 | 71 | # 72 | # 3. The Expression 73 | # 74 | compiler = SQLAlchemyExpressionCompiler() 75 | 76 | # Compile the expression within a context of the created table 77 | # 78 | selection = compiler.compile("(amount / transactions) * 2", table) 79 | print("compiled selection type: %s" % type(selection)) 80 | print("compiled selection content: %s" % selection) 81 | 82 | statement = sql.expression.select([selection], table) 83 | print("SQL statement: %s" % statement) 84 | 85 | result = statement.execute() 86 | print("result: %s" % list(result)) 87 | 88 | -------------------------------------------------------------------------------- /expressions/__init__.py: -------------------------------------------------------------------------------- 1 | from .compiler import * 2 | 3 | __version__ = '0.2.2' 4 | 5 | -------------------------------------------------------------------------------- /expressions/compat.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | """Python compatibility utilities""" 3 | 4 | from __future__ import absolute_import 5 | 6 | import sys 7 | 8 | py3k = sys.version_info >= (3, 0) 9 | 10 | if py3k: 11 | string_type = str 12 | text_type = str 13 | 14 | def unicode_escape(s): 15 | return bytes(s, "utf-8").decode("unicode_escape") 16 | 17 | else: 18 | string_type = basestring 19 | text_type = unicode 20 | 21 | def unicode_escape(s): 22 | return s.decode("string-escape") 23 | 24 | -------------------------------------------------------------------------------- /expressions/compiler.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import print_function 5 | 6 | from typing import List, Any, Union, Optional, Set, Tuple 7 | 8 | from .grammar import ExpressionParser, ExpressionSemantics 9 | from grako.exceptions import FailedSemantics 10 | from . import compat 11 | 12 | __all__ = [ 13 | "Compiler", 14 | "ExpressionInspector", 15 | "Variable", 16 | "Function", 17 | "BinaryOperator", 18 | "UnaryOperator", 19 | "Node", 20 | "inspect_variables" 21 | ] 22 | 23 | 24 | class Node(object): 25 | pass 26 | 27 | class Function(Node): 28 | def __init__(self, variable, args): 29 | # type: (Variable, List[str]) -> None 30 | self.reference = variable.reference # type: str 31 | self.name = variable.name # type: str 32 | self.args = args # type: List[str] 33 | 34 | def __str__(self): 35 | # type: () -> str 36 | return "{}({})".format(self.name, ", ".join(str(a) for a in self.args)) 37 | 38 | def __repr__(self): 39 | # type: () -> str 40 | return "{}({})".format(self.name, ", ".join(repr(a) for a in self.args)) 41 | 42 | 43 | class Variable(Node): 44 | def __init__(self, reference): 45 | # type: (str) -> None 46 | """Creates a variable reference. Attributes: `reference` – variable 47 | reference as a list of variable parts and `name` as a full variable 48 | name. This object is passed to the `compile_variable()` and 49 | `compile_function()`""" 50 | 51 | self.reference = reference 52 | self.name = ".".join(self.reference) 53 | 54 | def __str__(self): 55 | # type: () -> str 56 | return self.name 57 | 58 | def __repr__(self): 59 | # type: () -> str 60 | return "Variable({.name})".format(self) 61 | 62 | def __eq__(self, other): 63 | # type: (Any) -> bool 64 | if not isinstance(other, Variable): 65 | return NotImplemented 66 | else: 67 | return self.name == other.name \ 68 | and self.reference == other.reference 69 | 70 | def __hash__(self): 71 | # type: () -> int 72 | return hash(self.name) 73 | 74 | 75 | class UnaryOperator(Node): 76 | def __init__(self, operator, operand): 77 | # type: (str, str) -> None 78 | self.operator = operator 79 | self.operand = operand 80 | 81 | def __str__(self): 82 | # type: () -> str 83 | return "({0.operator} {0.operand})".format(self) 84 | 85 | def __repr__(self): 86 | # type: () -> str 87 | return "Unary({0.operator!r}, {0.operand!r})".format(self) 88 | 89 | 90 | class BinaryOperator(Node): 91 | def __init__(self, operator, left, right): 92 | # type: (str, str, str) -> None 93 | self.operator = operator 94 | self.left = left 95 | self.right = right 96 | 97 | def __str__(self): 98 | # type: () -> str 99 | return "({0.left} {0.operator} {0.right})".format(self) 100 | 101 | def __repr__(self): 102 | # type: () -> str 103 | return "Binary({0.left!r}, {0.operator!r}, {0.right!r})".format(self) 104 | 105 | 106 | class _Result(object): 107 | """Wrapper class for compilation result. We need this to properly 108 | distinguish between our result and delegated results.""" 109 | 110 | def __init__(self, value): 111 | # type: (Any) -> None 112 | self.value = value 113 | def __str__(self): 114 | # type: () -> str 115 | return str(self.value) 116 | def __repr__(self): 117 | # type: () -> str 118 | return "_Result({})".format(repr(self.value)) 119 | 120 | 121 | class _ExpressionSemantics(object): 122 | keywords = ['in', 'not', 'is', 'and', 'or'] 123 | def __init__(self, compiler, context): 124 | # type: (Any, Any) -> None 125 | self.compiler = compiler 126 | self.context = context 127 | 128 | def _default(self, ast, node_type=None, *args): 129 | # type: (Any, str, Any) -> _Result 130 | 131 | if isinstance(ast, _Result): 132 | return ast 133 | 134 | if not node_type: 135 | return ast 136 | 137 | elif node_type == "unary": 138 | operator, operand = ast 139 | result = self.compiler.compile_unary(self.context, 140 | operator, 141 | operand.value) 142 | elif node_type == "binary": 143 | left, rest = ast 144 | left = left.value 145 | 146 | for op, right in rest: 147 | # Get the object's value 148 | right = right.value 149 | 150 | left = self.compiler.compile_binary(self.context, op, 151 | left, right) 152 | result = left 153 | 154 | elif node_type == "binarynr": 155 | left, operator, right = ast 156 | result = self.compiler.compile_binary(self.context, operator, 157 | left.value, right.value) 158 | else: 159 | raise Exception("Unknown node type '{}'".format(node_type)) 160 | 161 | if isinstance(result, _Result): 162 | raise Exception("Internal compiler error - " 163 | "unexpected _Result() object") 164 | # Variable is already wrapped 165 | 166 | return _Result(result) 167 | 168 | def variable(self, ast): 169 | # type: (Any) -> _Result 170 | # Note: ast is expected to be a _Result() from the `reference` rule 171 | value = ast.value 172 | if not isinstance(ast, _Result): 173 | import pdb; pdb.set_trace() 174 | result = self.compiler.compile_variable(self.context, value) 175 | return _Result(result) 176 | 177 | def reference(self, ast): 178 | # type: (Any) -> _Result 179 | return _Result(Variable(ast)) 180 | 181 | def function(self, ast): 182 | # type: (Any) -> _Result 183 | ref = ast.ref.value 184 | args = [arg.value for arg in ast.args or []] 185 | result = self.compiler.compile_function(self.context, ref, args) 186 | 187 | return _Result(result) 188 | 189 | def NUMBER(self, ast): 190 | # type: (Any) -> _Result 191 | 192 | try: 193 | value = int(ast) # type: Union[int, float] 194 | except ValueError: 195 | value = float(ast) 196 | 197 | result = self.compiler.compile_literal(self.context, value) 198 | 199 | return _Result(result) 200 | 201 | def STRING(self, ast): 202 | # type: (Any) -> _Result 203 | # Strip the surrounding quotes 204 | value = compat.unicode_escape(compat.text_type(ast[1:-1])) 205 | 206 | result = self.compiler.compile_literal(self.context, value) 207 | return _Result(result) 208 | 209 | def NAME(self, ast): 210 | # type: (Any) -> _Result 211 | if ast.lower() in self.keywords: 212 | raise FailedSemantics("'{}' is a keyword.".format(ast)) 213 | return ast 214 | 215 | 216 | class Compiler(object): 217 | def __init__(self, context=None): 218 | # type: (Any) -> None 219 | """Creates an expression compiler with a `context` object. The context 220 | object is a custom object that subclasses might use during the 221 | compilation process for example to get variables by name, function 222 | objects. Context can be also used store information while compiling 223 | multiple expressions such as list of used attributes for analyzing 224 | requirements for query construction.""" 225 | self.context = context 226 | 227 | def compile(self, text, context=None): 228 | # type: (str, Optional[Any]) -> Any 229 | """Compiles the `text` expression, returns a finalized object. """ 230 | 231 | if context is None: 232 | context = self.context 233 | 234 | parser = ExpressionParser() 235 | 236 | result = parser.parse(text, 237 | rule_name="arithmetic_expression", 238 | comments_re="#.*", 239 | ignorecase=False, 240 | semantics=_ExpressionSemantics(self, context)) 241 | 242 | # Result is of type _Result 243 | 244 | return self.finalize(context, result.value) 245 | 246 | def compile_literal(self, context, literal): 247 | # type: (Any, Any) -> Any 248 | """Compile a literal object such as number or a string. Default 249 | implementation returns a string or numeric object.""" 250 | return literal 251 | 252 | def compile_variable(self, context, reference): 253 | # type: (Any, Any) -> Any 254 | """Compile variable `reference`. Default implementation returns 255 | `Variable` object.""" 256 | return reference 257 | 258 | def compile_binary(self, context, operator, left, right): 259 | # type: (Any, Any, Any, Any) -> Any 260 | """Compile `operator` with operands `left` and `right`. Default 261 | implementation returns `BinaryOperator` object with attributes 262 | `operator`, `left` and `right`.""" 263 | return BinaryOperator(operator, left, right) 264 | 265 | def compile_unary(self, context, operator, operand): 266 | # type: (Any, Any, Any) -> Any 267 | """Called when an unary `operator` is encountered. Default 268 | implementation returns `UnaryOperator` object with attributes 269 | `operator` and `operand`""" 270 | return UnaryOperator(operator, operand) 271 | 272 | def compile_function(self, conext, function, args): 273 | # type: (Any, Any, List[Any]) -> Any 274 | """Called when a function call is encountered in the expression. 275 | `function` is a `Variable` object (you can use 276 | `str(function)` to get the full function name reference as string), 277 | `args` is a list of function arguments. 278 | """ 279 | return Function(function, args) 280 | 281 | def finalize(self, context, obj): 282 | # type: (Any, Any) -> Any 283 | """Return final object as a result of expression compilation. By 284 | default returns the object returned by the last executed compilation 285 | method. 286 | 287 | Subclasses can override this method if they want to wrap the result 288 | object in another object or to finalize collected statement analysis.""" 289 | return obj 290 | 291 | 292 | class ExpressionInspector(Compiler): 293 | """Preprocesses an expression. Returns tuple of sets (`variables`, 294 | `functions`)""" 295 | def __init__(self): 296 | # type: () -> None 297 | super(ExpressionInspector, self).__init__() 298 | 299 | self.variables = set() # type: Set[str] 300 | self.functions = set() # type: Set[str] 301 | 302 | def compile_variable(self, context, variable): 303 | # type: (Any, Any) -> Any 304 | self.variables.add(variable.name) 305 | return variable 306 | 307 | def compile_function(self, context, function, args): 308 | # type: (Any, Any, Any) -> Any 309 | self.functions.add(function.name) 310 | return function 311 | 312 | def finalize(self, context, obj): 313 | # type: (Any, Any) -> Tuple[Any, Any] 314 | return (self.variables, self.functions) 315 | 316 | def inspect_variables(text): 317 | # type: (str) -> Set[str] 318 | """Return set of variables in expression `text`""" 319 | inspector = ExpressionInspector() 320 | inspector.compile(text) 321 | return inspector.variables 322 | -------------------------------------------------------------------------------- /expressions/grammar.ebnf: -------------------------------------------------------------------------------- 1 | arithmetic_expression = test $; 2 | test = or_test; 3 | or_test(binary) = and_test {'or' and_test }; 4 | and_test(binary) = not_test {'and' not_test }; 5 | not_test(unary) = 'not' not_test | comparison; 6 | comparison(binary) = or_expr { comparison_operator or_expr }; 7 | 8 | or_expr(binary) = and_expr { '|' and_expr }; 9 | and_expr(binary) = shift_expr { '&' shift_expr }; 10 | shift_expr(binary) = arith_expr { ('<<' | '>>') arith_expr }; 11 | 12 | arith_expr(binary) = term {('+' | '-') term} ; 13 | term(binary) = factor {('*' | '/' | '%') factor} ; 14 | factor(unary) = ('+' | '-' | '~') factor | power ; 15 | power = atom ['^' factor] ; 16 | 17 | atom = NUMBER 18 | | STRING 19 | | function 20 | | variable 21 | | ( '(' @:test ')' ); 22 | 23 | variable = reference; 24 | function = ref:reference '(' [ args:arglist ] ')'; 25 | arglist = { @+:argument ','} @+:argument; 26 | argument = test; 27 | 28 | reference = { @+:NAME '.'} @+:NAME; 29 | 30 | NUMBER = ?/[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?/? ; 31 | STRING = ?/'[^'\\\r\n]*(?:\\.[^'\\\r\n]*)*'/? ; 32 | 33 | comparison_operator = ('=' | '!=' | '<' | '<=' | '>' | '>=' 34 | | 'in' | 'is' ); 35 | 36 | (* Allow any unicode character to be an identifier *) 37 | NAME = ?/\w+/?; 38 | 39 | (*TODO: Missing operators and features: 40 | 41 | – array subscript '[' low [ ':' high ] ']' 42 | – not in, is not 43 | – || string concatenation operator 44 | *) 45 | -------------------------------------------------------------------------------- /expressions/grammar.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # CAVEAT UTILITOR 5 | # 6 | # This file was automatically generated by Grako. 7 | # 8 | # https://pypi.python.org/pypi/grako/ 9 | # 10 | # Any changes you make to it will be overwritten the next time 11 | # the file is generated. 12 | 13 | 14 | from __future__ import print_function, division, absolute_import, unicode_literals 15 | from grako.parsing import graken, Parser 16 | 17 | 18 | __version__ = (2014, 9, 19, 16, 34, 11, 4) 19 | 20 | __all__ = [ 21 | 'ExpressionParser', 22 | 'ExpressionSemantics', 23 | 'main' 24 | ] 25 | 26 | 27 | class ExpressionParser(Parser): 28 | def __init__(self, whitespace=None, nameguard=True, **kwargs): 29 | super(ExpressionParser, self).__init__( 30 | whitespace=whitespace, 31 | nameguard=nameguard, 32 | **kwargs 33 | ) 34 | 35 | @graken() 36 | def _arithmetic_expression_(self): 37 | self._test_() 38 | self._check_eof() 39 | 40 | @graken() 41 | def _test_(self): 42 | self._or_test_() 43 | 44 | @graken('binary') 45 | def _or_test_(self): 46 | self._and_test_() 47 | 48 | def block0(): 49 | self._token('or') 50 | self._and_test_() 51 | self._closure(block0) 52 | 53 | @graken('binary') 54 | def _and_test_(self): 55 | self._not_test_() 56 | 57 | def block0(): 58 | self._token('and') 59 | self._not_test_() 60 | self._closure(block0) 61 | 62 | @graken('unary') 63 | def _not_test_(self): 64 | with self._choice(): 65 | with self._option(): 66 | self._token('not') 67 | self._not_test_() 68 | with self._option(): 69 | self._comparison_() 70 | self._error('no available options') 71 | 72 | @graken('binary') 73 | def _comparison_(self): 74 | self._or_expr_() 75 | 76 | def block0(): 77 | self._comparison_operator_() 78 | self._or_expr_() 79 | self._closure(block0) 80 | 81 | @graken('binary') 82 | def _or_expr_(self): 83 | self._and_expr_() 84 | 85 | def block0(): 86 | self._token('|') 87 | self._and_expr_() 88 | self._closure(block0) 89 | 90 | @graken('binary') 91 | def _and_expr_(self): 92 | self._shift_expr_() 93 | 94 | def block0(): 95 | self._token('&') 96 | self._shift_expr_() 97 | self._closure(block0) 98 | 99 | @graken('binary') 100 | def _shift_expr_(self): 101 | self._arith_expr_() 102 | 103 | def block0(): 104 | with self._group(): 105 | with self._choice(): 106 | with self._option(): 107 | self._token('<<') 108 | with self._option(): 109 | self._token('>>') 110 | self._error('expecting one of: << >>') 111 | self._arith_expr_() 112 | self._closure(block0) 113 | 114 | @graken('binary') 115 | def _arith_expr_(self): 116 | self._term_() 117 | 118 | def block0(): 119 | with self._group(): 120 | with self._choice(): 121 | with self._option(): 122 | self._token('+') 123 | with self._option(): 124 | self._token('-') 125 | self._error('expecting one of: + -') 126 | self._term_() 127 | self._closure(block0) 128 | 129 | @graken('binary') 130 | def _term_(self): 131 | self._factor_() 132 | 133 | def block0(): 134 | with self._group(): 135 | with self._choice(): 136 | with self._option(): 137 | self._token('*') 138 | with self._option(): 139 | self._token('/') 140 | with self._option(): 141 | self._token('%') 142 | with self._option(): 143 | self._token('//') 144 | self._error('expecting one of: % * / //') 145 | self._factor_() 146 | self._closure(block0) 147 | 148 | @graken('unary') 149 | def _factor_(self): 150 | with self._choice(): 151 | with self._option(): 152 | with self._group(): 153 | with self._choice(): 154 | with self._option(): 155 | self._token('+') 156 | with self._option(): 157 | self._token('-') 158 | with self._option(): 159 | self._token('~') 160 | self._error('expecting one of: + - ~') 161 | self._factor_() 162 | with self._option(): 163 | self._power_() 164 | self._error('no available options') 165 | 166 | @graken() 167 | def _power_(self): 168 | self._atom_() 169 | with self._optional(): 170 | self._token('^') 171 | self._factor_() 172 | 173 | @graken() 174 | def _atom_(self): 175 | with self._choice(): 176 | with self._option(): 177 | self._NUMBER_() 178 | with self._option(): 179 | self._STRING_() 180 | with self._option(): 181 | self._function_() 182 | with self._option(): 183 | self._variable_() 184 | with self._option(): 185 | with self._group(): 186 | self._token('(') 187 | self._test_() 188 | self.ast['@'] = self.last_node 189 | self._token(')') 190 | self._error('no available options') 191 | 192 | @graken() 193 | def _variable_(self): 194 | self._reference_() 195 | 196 | @graken() 197 | def _function_(self): 198 | self._reference_() 199 | self.ast['ref'] = self.last_node 200 | self._token('(') 201 | with self._optional(): 202 | self._arglist_() 203 | self.ast['args'] = self.last_node 204 | self._token(')') 205 | 206 | self.ast._define( 207 | ['ref', 'args'], 208 | [] 209 | ) 210 | 211 | @graken() 212 | def _arglist_(self): 213 | 214 | def block0(): 215 | self._argument_() 216 | self.ast.setlist('@', self.last_node) 217 | self._token(',') 218 | self._closure(block0) 219 | self._argument_() 220 | self.ast.setlist('@', self.last_node) 221 | 222 | @graken() 223 | def _argument_(self): 224 | self._test_() 225 | 226 | @graken() 227 | def _reference_(self): 228 | 229 | def block0(): 230 | self._NAME_() 231 | self.ast.setlist('@', self.last_node) 232 | self._token('.') 233 | self._closure(block0) 234 | self._NAME_() 235 | self.ast.setlist('@', self.last_node) 236 | 237 | @graken() 238 | def _NUMBER_(self): 239 | self._pattern(r'[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?') 240 | 241 | @graken() 242 | def _STRING_(self): 243 | self._pattern(r"'[^'\\\r\n]*(?:\\.[^'\\\r\n]*)*'") 244 | 245 | @graken() 246 | def _comparison_operator_(self): 247 | with self._group(): 248 | with self._choice(): 249 | with self._option(): 250 | self._token('==') 251 | with self._option(): 252 | self._token('!=') 253 | with self._option(): 254 | self._token('<') 255 | with self._option(): 256 | self._token('<=') 257 | with self._option(): 258 | self._token('>') 259 | with self._option(): 260 | self._token('>=') 261 | with self._option(): 262 | self._token('in') 263 | with self._option(): 264 | self._token('is') 265 | self._error('expecting one of: != < <= == > >= in is') 266 | 267 | @graken() 268 | def _NAME_(self): 269 | self._pattern(r'\w+') 270 | 271 | 272 | class ExpressionSemantics(object): 273 | def arithmetic_expression(self, ast): 274 | return ast 275 | 276 | def test(self, ast): 277 | return ast 278 | 279 | def or_test(self, ast): 280 | return ast 281 | 282 | def and_test(self, ast): 283 | return ast 284 | 285 | def not_test(self, ast): 286 | return ast 287 | 288 | def comparison(self, ast): 289 | return ast 290 | 291 | def or_expr(self, ast): 292 | return ast 293 | 294 | def and_expr(self, ast): 295 | return ast 296 | 297 | def shift_expr(self, ast): 298 | return ast 299 | 300 | def arith_expr(self, ast): 301 | return ast 302 | 303 | def term(self, ast): 304 | return ast 305 | 306 | def factor(self, ast): 307 | return ast 308 | 309 | def power(self, ast): 310 | return ast 311 | 312 | def atom(self, ast): 313 | return ast 314 | 315 | def variable(self, ast): 316 | return ast 317 | 318 | def function(self, ast): 319 | return ast 320 | 321 | def arglist(self, ast): 322 | return ast 323 | 324 | def argument(self, ast): 325 | return ast 326 | 327 | def reference(self, ast): 328 | return ast 329 | 330 | def NUMBER(self, ast): 331 | return ast 332 | 333 | def STRING(self, ast): 334 | return ast 335 | 336 | def comparison_operator(self, ast): 337 | return ast 338 | 339 | def NAME(self, ast): 340 | return ast 341 | 342 | 343 | def main(filename, startrule, trace=False, whitespace=None): 344 | import json 345 | with open(filename) as f: 346 | text = f.read() 347 | parser = ExpressionParser(parseinfo=False) 348 | ast = parser.parse( 349 | text, 350 | startrule, 351 | filename=filename, 352 | trace=trace, 353 | whitespace=whitespace) 354 | print('AST:') 355 | print(ast) 356 | print() 357 | print('JSON:') 358 | print(json.dumps(ast, indent=2)) 359 | print() 360 | 361 | if __name__ == '__main__': 362 | import argparse 363 | import string 364 | import sys 365 | 366 | class ListRules(argparse.Action): 367 | def __call__(self, parser, namespace, values, option_string): 368 | print('Rules:') 369 | for r in ExpressionParser.rule_list(): 370 | print(r) 371 | print() 372 | sys.exit(0) 373 | 374 | parser = argparse.ArgumentParser(description="Simple parser for Expression.") 375 | parser.add_argument('-l', '--list', action=ListRules, nargs=0, 376 | help="list all rules and exit") 377 | parser.add_argument('-t', '--trace', action='store_true', 378 | help="output trace information") 379 | parser.add_argument('-w', '--whitespace', type=str, default=string.whitespace, 380 | help="whitespace specification") 381 | parser.add_argument('file', metavar="FILE", help="the input file to parse") 382 | parser.add_argument('startrule', metavar="STARTRULE", 383 | help="the start rule for parsing") 384 | args = parser.parse_args() 385 | 386 | main( 387 | args.file, 388 | args.startrule, 389 | trace=args.trace, 390 | whitespace=args.whitespace 391 | ) 392 | -------------------------------------------------------------------------------- /generate_parser.sh: -------------------------------------------------------------------------------- 1 | # Generate grammar parser 2 | 3 | GRAMMAR=expressions/grammar.ebnf 4 | PARSER=expressions/grammar.py 5 | NAME=Expression 6 | 7 | python -m grako -o $PARSER -m $NAME $GRAMMAR 8 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from setuptools import setup, find_packages 3 | 4 | requirements = ['grako>=3.9.3'] 5 | 6 | setup( 7 | name = "expressions", 8 | version = "0.2.4", 9 | 10 | packages = find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]), 11 | package_data = { 12 | # If any package contains *.txt or *.rst files, include them: 13 | '': ['*.txt', '*.rst'], 14 | }, 15 | 16 | classifiers = [ 17 | 'Development Status :: 5 - Production/Stable', 18 | 'Intended Audience :: Developers', 19 | 'Intended Audience :: Science/Research', 20 | 'License :: OSI Approved :: MIT License', 21 | 'Programming Language :: Python :: 3', 22 | 'Topic :: Scientific/Engineering', 23 | 'Topic :: Utilities' 24 | ], 25 | 26 | install_requires = requirements, 27 | 28 | test_suite = "tests", 29 | 30 | # metadata for upload to PyPI 31 | author = "Stefan Urbanek", 32 | author_email = "stefan.urbanek@gmail.com", 33 | description = "Extensible arithmetic expression parser and compiler", 34 | license = "MIT license", 35 | keywords = "arithmetic expression", 36 | url = "http://databrewery.org" 37 | 38 | ) 39 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataBrewery/expressions/8ae32131cc9c184ca60210ba48c5bbf1f90d9a4d/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_expressions.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf8 -*- 2 | import unittest 3 | from expressions import Compiler, ExpressionInspector 4 | from expressions import Variable, Function, UnaryOperator, BinaryOperator 5 | 6 | class ValidatingCompiler(Compiler): 7 | def compile_variable(self, context, variable): 8 | if variable not in context: 9 | raise ExpressionError(variable) 10 | def compile_function(self, context, function, args): 11 | if function not in context: 12 | raise ExpressionError(function) 13 | def compile_literal(self, context, literal): 14 | pass 15 | def compile_operator(self, context, operator, op1, op2): 16 | pass 17 | 18 | class FunctionCompiler(Compiler): 19 | def compile_variable(self, context, variable): 20 | return variable 21 | def compile_function(self, context, function, args): 22 | return "CALL %s(%s)" % (function, ", ".join(args)) 23 | 24 | class CompilerTestCase(unittest.TestCase): 25 | def test_basic(self): 26 | compiler = Compiler() 27 | result = compiler.compile("1") 28 | self.assertEqual(result, 1) 29 | self.assertIsInstance(result, int) 30 | 31 | result = compiler.compile("1.2") 32 | self.assertEqual(result, 1.2) 33 | self.assertIsInstance(result, float) 34 | 35 | result = compiler.compile("'a string'") 36 | self.assertEqual(result, "a string") 37 | self.assertIsInstance(result, str) 38 | 39 | def test_strings(self): 40 | compiler = Compiler() 41 | 42 | result = compiler.compile("''") 43 | self.assertEqual(result, "") 44 | 45 | result = compiler.compile("'a \\' quote'") 46 | self.assertEqual(result, "a ' quote") 47 | 48 | def test_variable(self): 49 | compiler = Compiler() 50 | result = compiler.compile("foo") 51 | self.assertIsInstance(result, Variable) 52 | self.assertEqual(result.name, "foo") 53 | self.assertEqual(result.reference, ["foo"]) 54 | 55 | result = compiler.compile("foo.bar.baz") 56 | self.assertIsInstance(result, Variable) 57 | self.assertEqual(result.name, "foo.bar.baz") 58 | self.assertEqual(result.reference, ["foo", "bar", "baz"]) 59 | 60 | def test_function(self): 61 | compiler = Compiler() 62 | 63 | result = compiler.compile("foo()") 64 | self.assertIsInstance(result, Function) 65 | self.assertEqual(result.name, "foo") 66 | self.assertEqual(result.reference, ["foo"]) 67 | self.assertEqual(result.args, []) 68 | 69 | result = compiler.compile("foo.bar.baz()") 70 | self.assertIsInstance(result, Function) 71 | self.assertEqual(result.name, "foo.bar.baz") 72 | self.assertEqual(result.reference, ["foo", "bar", "baz"]) 73 | self.assertEqual(result.args, []) 74 | 75 | result = compiler.compile("foo(10,20,30)") 76 | self.assertIsInstance(result, Function) 77 | self.assertEqual(result.args, [10, 20, 30]) 78 | 79 | def test_unary(self): 80 | compiler = Compiler() 81 | 82 | result = compiler.compile("+1") 83 | self.assertIsInstance(result, UnaryOperator) 84 | self.assertEqual(result.operator, "+") 85 | self.assertEqual(result.operand, 1) 86 | 87 | def test_binary(self): 88 | compiler = Compiler() 89 | 90 | result = compiler.compile("101 + 202") 91 | self.assertIsInstance(result, BinaryOperator) 92 | self.assertEqual(result.operator, "+") 93 | self.assertEqual(result.left, 101) 94 | self.assertEqual(result.right, 202) 95 | 96 | @unittest.skip("later") 97 | def test_validating_compiler(self): 98 | compiler = ValidatingCompiler() 99 | result = compiler.compile("a+a", ["a", "b"]) 100 | 101 | @unittest.skip("later") 102 | def test_function_call_compile(self): 103 | compiler = FunctionCompiler() 104 | 105 | result = compiler.compile("f()") 106 | self.assertEqual("CALL f()", result) 107 | 108 | result = compiler.compile("f(x)") 109 | self.assertEqual("CALL f(x)", result) 110 | 111 | result = compiler.compile("f(x, y)") 112 | self.assertEqual("CALL f(x, y)", result) 113 | 114 | class CustomCompilersTestCase(unittest.TestCase): 115 | def test_preprocessor(self): 116 | pp = ExpressionInspector() 117 | pp.compile("foo(a + b) * bar(b + c)") 118 | 119 | functions = set(pp.functions) 120 | variables = set(pp.variables) 121 | 122 | self.assertEqual(functions, set(["foo", "bar"])) 123 | self.assertEqual(variables, set(["a", "b", "c"])) 124 | 125 | def test_preprocessor_unique(self): 126 | pp = ExpressionInspector() 127 | pp.compile("foo(a,a,b,b,c,c,c,c) + foo(a,b,c)") 128 | 129 | functions = sorted(pp.functions) 130 | variables = sorted(pp.variables) 131 | 132 | self.assertEqual(functions, ["foo"]) 133 | self.assertEqual(variables, ["a", "b", "c"]) 134 | --------------------------------------------------------------------------------