├── tests ├── __init__.py ├── test_yaml_security.py ├── test_complex_parsing.py ├── test_sqlalchemy_querying.py └── test_basic_parsing.py ├── requirements.txt ├── MANIFEST.in ├── setup.cfg ├── scripts ├── translate_readme.sh └── pypi.sh ├── .travis.yml ├── mlalchemy ├── __init__.py ├── errors.py ├── constants.py ├── utils.py ├── testing.py ├── parser.py └── structures.py ├── LICENSE.md ├── .gitignore ├── setup.py ├── README.md └── README.rst /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | sqlalchemy>=1.1 2 | future>=0.16.0 3 | PyYAML>=3.11 4 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst 2 | include LICENSE.md 3 | include requirements.txt 4 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.rst 3 | 4 | [bdist_wheel] 5 | universal=1 6 | 7 | -------------------------------------------------------------------------------- /scripts/translate_readme.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pandoc --from=markdown --to=rst --output=README.rst README.md 3 | 4 | -------------------------------------------------------------------------------- /scripts/pypi.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | rm dist/* 3 | python setup.py sdist bdist_wheel 4 | rm dist/*.egg 5 | twine upload dist/* 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.7" 4 | - "3.5" 5 | - "3.6" 6 | install: 7 | - "pip install -r requirements.txt" 8 | script: 9 | - "python -m unittest discover" 10 | -------------------------------------------------------------------------------- /mlalchemy/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | from mlalchemy.constants import * 4 | from mlalchemy.errors import * 5 | from mlalchemy.structures import * 6 | from mlalchemy.parser import * 7 | 8 | 9 | __version__ = "0.2.2" 10 | 11 | -------------------------------------------------------------------------------- /tests/test_yaml_security.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import unicode_literals 4 | 5 | import unittest 6 | import yaml 7 | 8 | from mlalchemy import * 9 | from mlalchemy.testing import MLAlchemyTestCase 10 | 11 | 12 | class TestYamlSecurity(MLAlchemyTestCase): 13 | 14 | def test_basic_yaml_security(self): 15 | with self.assertRaises(yaml.constructor.ConstructorError): 16 | parse_yaml_query('!!python/object/apply:os.system ["echo Hello"]') 17 | 18 | 19 | if __name__ == "__main__": 20 | unittest.main() 21 | 22 | -------------------------------------------------------------------------------- /mlalchemy/errors.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import unicode_literals 4 | 5 | __all__ = [ 6 | "MLAlchemyError", 7 | "InvalidOperatorError", 8 | "InvalidComparatorError", 9 | "QuerySyntaxError", 10 | "InvalidTableError", 11 | "InvalidFieldError" 12 | ] 13 | 14 | 15 | class MLAlchemyError(Exception): 16 | pass 17 | 18 | 19 | class InvalidOperatorError(MLAlchemyError): 20 | pass 21 | 22 | 23 | class InvalidComparatorError(MLAlchemyError): 24 | pass 25 | 26 | 27 | class QuerySyntaxError(MLAlchemyError): 28 | pass 29 | 30 | 31 | class InvalidTableError(MLAlchemyError): 32 | pass 33 | 34 | 35 | class InvalidFieldError(MLAlchemyError): 36 | pass 37 | -------------------------------------------------------------------------------- /mlalchemy/constants.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import unicode_literals 4 | 5 | __all__ = [ 6 | "OP_AND", 7 | "OP_OR", 8 | "OP_NOT", 9 | "OPERATORS", 10 | "COMP_EQ", 11 | "COMP_GT", 12 | "COMP_GTE", 13 | "COMP_LT", 14 | "COMP_LTE", 15 | "COMP_NEQ", 16 | "COMP_LIKE", 17 | "COMP_IN", 18 | "COMP_NIN", 19 | "COMP_IS", 20 | "COMPARATORS", 21 | "ORDER_ASC", 22 | "ORDER_DESC", 23 | "QUERY_ORDERS" 24 | ] 25 | 26 | 27 | # Supported logical SQL query operators 28 | OP_AND = "$and" 29 | OP_OR = "$or" 30 | OP_NOT = "$not" 31 | OPERATORS = {OP_AND, OP_OR, OP_NOT} 32 | 33 | # Supported SQL query comparators 34 | COMP_EQ = "$eq" 35 | COMP_GT = "$gt" 36 | COMP_GTE = "$gte" 37 | COMP_LT = "$lt" 38 | COMP_LTE = "$lte" 39 | COMP_NEQ = "$neq" 40 | COMP_LIKE = "$like" 41 | COMP_IN = "$in" 42 | COMP_NIN = "$nin" 43 | COMP_IS = "$is" 44 | COMPARATORS = {COMP_EQ, COMP_GT, COMP_GTE, COMP_LT, COMP_LTE, COMP_NEQ, COMP_LIKE, COMP_IN, COMP_NIN, 45 | COMP_IS} 46 | 47 | ORDER_ASC = "asc" 48 | ORDER_DESC = "desc" 49 | QUERY_ORDERS = {ORDER_ASC, ORDER_DESC} 50 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # The MIT License (MIT) 2 | 3 | Copyright (c) 2017 Thane Thomson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 9 | of the Software, and to permit persons to whom the Software is furnished to do 10 | so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /mlalchemy/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import unicode_literals 4 | 5 | from datetime import date, datetime 6 | import json 7 | import re 8 | 9 | __all__ = [ 10 | "is_camelcase_string", 11 | "is_kebabcase_string", 12 | "camelcase_to_snakecase", 13 | "kebabcase_to_snakecase", 14 | "json_date_serializer", 15 | "json_dumps" 16 | ] 17 | 18 | KEBABCASE_DETECT_RE = re.compile(r"^(([a-z][a-z0-9]+)\-)*([a-z][a-z0-9]+)$") 19 | KEBABCASE_REPLACE_RE = re.compile(r"([a-z]+)\-") 20 | 21 | CAMELCASE_DETECT_RE = re.compile(r"^([a-zA-Z][a-z0-9]*)([A-Z][a-z0-9]*)*$") 22 | CAMELCASE_FIRST_CAP_RE = re.compile(r"(.)([A-Z][a-z]+)") 23 | CAMELCASE_ALL_CAP_RE = re.compile(r"([a-z0-9])([A-Z])") 24 | 25 | 26 | def is_camelcase_string(s): 27 | """Checks whether or not the given string is supplied in camelCase.""" 28 | return CAMELCASE_DETECT_RE.match(s) is not None 29 | 30 | 31 | def is_kebabcase_string(s): 32 | """Checks whether the given string is supplied in skewer-case.""" 33 | return KEBABCASE_DETECT_RE.match(s) is not None 34 | 35 | 36 | def camelcase_to_snakecase(s): 37 | s1 = CAMELCASE_FIRST_CAP_RE.sub(r"\1_\2", s) 38 | return CAMELCASE_ALL_CAP_RE.sub(r"\1_\2", s1).lower() 39 | 40 | 41 | def kebabcase_to_snakecase(s): 42 | return KEBABCASE_REPLACE_RE.sub(r"\1_", s) 43 | 44 | 45 | def json_date_serializer(obj): 46 | if isinstance(obj, (date, datetime)): 47 | return obj.isoformat() 48 | raise TypeError("Type not serializable") 49 | 50 | 51 | def json_dumps(obj, indent=None): 52 | return json.dumps(obj, indent=indent, default=json_date_serializer) 53 | -------------------------------------------------------------------------------- /mlalchemy/testing.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import unicode_literals 4 | 5 | import unittest 6 | 7 | from mlalchemy.structures import * 8 | 9 | __all__ = [ 10 | "MLAlchemyTestCase" 11 | ] 12 | 13 | 14 | class MLAlchemyTestCase(unittest.TestCase): 15 | """Helper class to speed up the writing of relatively complex unit tests.""" 16 | 17 | def assertQueryEquals(self, src, q): 18 | self.assertIsInstance(q, MLQuery) 19 | self.assertEqual(src.table, q.table) 20 | self.assertQueryFragmentEquals(src.query_fragment, q.query_fragment) 21 | self.assertEqual(src.order_by, q.order_by) 22 | self.assertEqual(src.offset, q.offset) 23 | self.assertEqual(src.limit, q.limit) 24 | 25 | def assertQueryFragmentEquals(self, src, qf): 26 | self.assertIsInstance(qf, MLQueryFragment) 27 | self.assertEqual(src.op, qf.op) 28 | self.assertEqual(len(src.clauses), len(qf.clauses)) 29 | self.assertEqual(len(src.sub_fragments), len(qf.sub_fragments)) 30 | 31 | for i in range(len(src.clauses)): 32 | self.assertClauseEquals( 33 | src.clauses[i], 34 | qf.clauses[i] 35 | ) 36 | 37 | for i in range(len(src.sub_fragments)): 38 | self.assertQueryFragmentEquals( 39 | src.sub_fragments[i], 40 | qf.sub_fragments[i] 41 | ) 42 | 43 | def assertClauseEquals(self, src, clause): 44 | self.assertIsInstance(clause, MLClause) 45 | self.assertEqual(src.field, clause.field) 46 | self.assertEqual(src.comp, clause.comp) 47 | self.assertEqual(src.value, clause.value) 48 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask instance folder 57 | instance/ 58 | 59 | # Scrapy stuff: 60 | .scrapy 61 | 62 | # Sphinx documentation 63 | docs/_build/ 64 | 65 | # PyBuilder 66 | target/ 67 | 68 | # IPython Notebook 69 | .ipynb_checkpoints 70 | 71 | # pyenv 72 | .python-version 73 | 74 | # celery beat schedule file 75 | celerybeat-schedule 76 | 77 | # dotenv 78 | .env 79 | 80 | # virtualenv 81 | venv/ 82 | ENV/ 83 | 84 | # Spyder project settings 85 | .spyderproject 86 | 87 | # Rope project settings 88 | .ropeproject 89 | 90 | # Additional folders 91 | bin/ 92 | include/ 93 | pip-selfcheck.json 94 | 95 | # IntelliJ files 96 | *.iml 97 | /.idea/ 98 | 99 | # Generated folders 100 | public/ 101 | 102 | *.bak 103 | 104 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | 4 | import re 5 | from io import open 6 | import os.path 7 | from setuptools import setup 8 | 9 | 10 | def read_file(filename): 11 | full_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), filename) 12 | with open(full_path, "rt", encoding="utf-8") as f: 13 | lines = f.readlines() 14 | return lines 15 | 16 | 17 | def get_version(): 18 | pattern = re.compile(r"__version__ = \"(?P[0-9.a-zA-Z-]+)\"") 19 | for line in read_file(os.path.join("mlalchemy", "__init__.py")): 20 | m = pattern.match(line) 21 | if m is not None: 22 | return m.group('version') 23 | raise ValueError("Cannot extract version number for MLAlchemy") 24 | 25 | 26 | setup( 27 | name="mlalchemy", 28 | version=get_version(), 29 | description="Library for converting YAML/JSON to SQLAlchemy SELECT queries", 30 | long_description="".join(read_file("README.rst")), 31 | author="Thane Thomson", 32 | author_email="connect@thanethomson.com", 33 | url="https://github.com/thanethomson/MLAlchemy", 34 | install_requires=[r.strip() for r in read_file("requirements.txt") if len(r.strip()) > 0], 35 | license='MIT', 36 | packages=["mlalchemy"], 37 | include_package_data=True, 38 | classifiers=[ 39 | "Development Status :: 4 - Beta", 40 | "Intended Audience :: Developers", 41 | "License :: OSI Approved :: MIT License", 42 | "Natural Language :: English", 43 | "Operating System :: POSIX", 44 | "Operating System :: MacOS", 45 | "Programming Language :: Python :: 2.7", 46 | "Programming Language :: Python :: 3.5", 47 | "Programming Language :: Python :: 3.6", 48 | "Topic :: Database", 49 | "Topic :: Utilities", 50 | "Topic :: Software Development :: Libraries" 51 | ] 52 | ) 53 | -------------------------------------------------------------------------------- /tests/test_complex_parsing.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import unicode_literals 4 | 5 | import unittest 6 | 7 | from mlalchemy import * 8 | from mlalchemy.testing import MLAlchemyTestCase 9 | 10 | 11 | class TestComplexMLAlchemyParsing(MLAlchemyTestCase): 12 | 13 | def test_parse_complex_query1(self): 14 | query = parse_query({ 15 | "from": "SomeTable", 16 | "where": [ 17 | { 18 | "$or": [ 19 | {"field1": 1}, 20 | {"field2": "something"} 21 | ] 22 | }, 23 | { 24 | "$not": {"field3": "else"} 25 | }, 26 | { 27 | "$and": [ 28 | { 29 | "$gt": {"field1": 5} 30 | }, 31 | { 32 | "$like": {"field3": "hello%"} 33 | } 34 | ] 35 | } 36 | ] 37 | }) 38 | self.assertQueryEquals( 39 | MLQuery( 40 | table="SomeTable", 41 | query_fragment=MLQueryFragment( 42 | OP_AND, 43 | sub_fragments=[ 44 | MLQueryFragment( 45 | OP_OR, 46 | clauses=[ 47 | MLClause("field1", COMP_EQ, 1), 48 | MLClause("field2", COMP_EQ, "something") 49 | ] 50 | ), 51 | MLQueryFragment( 52 | OP_NOT, 53 | clauses=[MLClause("field3", COMP_EQ, "else")] 54 | ), 55 | MLQueryFragment( 56 | OP_AND, 57 | clauses=[ 58 | MLClause("field1", COMP_GT, 5), 59 | MLClause("field3", COMP_LIKE, "hello%") 60 | ] 61 | ) 62 | ] 63 | ) 64 | ), 65 | query 66 | ) 67 | 68 | 69 | if __name__ == "__main__": 70 | unittest.main() 71 | -------------------------------------------------------------------------------- /mlalchemy/parser.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import unicode_literals 4 | 5 | from future.utils import iteritems 6 | 7 | import yaml 8 | import json 9 | 10 | from mlalchemy.errors import * 11 | from mlalchemy.structures import * 12 | from mlalchemy.constants import * 13 | from mlalchemy.utils import * 14 | 15 | import logging 16 | logger = logging.getLogger(__name__) 17 | 18 | __all__ = [ 19 | "parse_yaml_query", 20 | "parse_json_query", 21 | "parse_query", 22 | "parse_query_fragment" 23 | ] 24 | 25 | 26 | def parse_yaml_query(yaml_content): 27 | """Parses the given YAML string to attempt to extract a query. 28 | 29 | Args: 30 | yaml_content: A string containing YAML content. 31 | 32 | Returns: 33 | On success, the processed MLQuery object. 34 | """ 35 | logger.debug("Attempting to parse YAML content:\n%s" % yaml_content) 36 | return parse_query(yaml.safe_load(yaml_content)) 37 | 38 | 39 | def parse_json_query(json_content): 40 | """Parses the given JSON string to attempt to extract a query. 41 | 42 | Args: 43 | json_content: A string containing JSON content. 44 | 45 | Returns: 46 | On success, the processed MLQuery object. 47 | """ 48 | logger.debug("Attempting to parse JSON content:\n%s" % json_content) 49 | return parse_query(json.loads(json_content)) 50 | 51 | 52 | def parse_query(qd): 53 | """Parses the given query dictionary to produce an MLQuery object. 54 | 55 | Args: 56 | qd: A Python dictionary (pre-parsed from JSON/YAML) from which to extract the query. 57 | 58 | Returns: 59 | On success, the processed MLQuery object. 60 | """ 61 | if not isinstance(qd, dict): 62 | raise TypeError("Argument for query parsing must be a Python dictionary") 63 | if 'from' not in qd: 64 | raise QuerySyntaxError("Missing \"from\" argument in query") 65 | 66 | logger.debug("Attempting to parse query dictionary:\n%s" % json_dumps(qd, indent=2)) 67 | 68 | qf = parse_query_fragment(qd['where']).simplify() if 'where' in qd else None 69 | if isinstance(qf, MLClause): 70 | qf = MLQueryFragment(OP_AND, clauses=[qf]) 71 | 72 | return MLQuery( 73 | qd['from'], 74 | query_fragment=qf, 75 | order_by=qd.get('orderBy', qd.get('order-by', qd.get('order_by', None))), 76 | offset=qd.get('offset', None), 77 | limit=qd.get('limit', None) 78 | ) 79 | 80 | 81 | def parse_query_fragment(q, op=OP_AND, comp=COMP_EQ): 82 | """Parses the given query object for its query fragment only.""" 83 | if not isinstance(q, list) and not isinstance(q, dict): 84 | raise TypeError("\"Where\" clause in query fragment must either be a list or a dictionary") 85 | 86 | # ensure we're always dealing with a list 87 | if not isinstance(q, list): 88 | q = [q] 89 | 90 | clauses = [] 91 | sub_fragments = [] 92 | 93 | for sub_q in q: 94 | if not isinstance(sub_q, dict): 95 | raise TypeError("Sub-fragment must be a dictionary: %s" % sub_q) 96 | 97 | for k, v in iteritems(sub_q): 98 | # if v is a sub-fragment with a specific operator 99 | if k in OPERATORS: 100 | s = parse_query_fragment(v, op=k, comp=comp).simplify() 101 | elif k in COMPARATORS: 102 | # it's a sub-fragment, but its comparator is explicitly specified 103 | s = parse_query_fragment(v, op=op, comp=k).simplify() 104 | else: 105 | # it must be a clause 106 | s = MLClause(k, comp, v) 107 | 108 | if isinstance(s, MLQueryFragment): 109 | sub_fragments.append(s) 110 | elif isinstance(s, MLClause): 111 | clauses.append(s) 112 | 113 | return MLQueryFragment(op, clauses=clauses, sub_fragments=sub_fragments) 114 | -------------------------------------------------------------------------------- /tests/test_sqlalchemy_querying.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import unicode_literals 4 | from future.utils import iteritems 5 | 6 | import os 7 | import unittest 8 | import logging 9 | 10 | from sqlalchemy import create_engine, Column, Integer, String, Date 11 | from sqlalchemy.ext.declarative import declarative_base 12 | from sqlalchemy.orm import sessionmaker 13 | 14 | from datetime import date 15 | 16 | from mlalchemy import parse_yaml_query, parse_json_query 17 | from mlalchemy.constants import * 18 | 19 | Base = declarative_base() 20 | 21 | 22 | class User(Base): 23 | __tablename__ = "users" 24 | 25 | id = Column(Integer, primary_key=True) 26 | first_name = Column(String) 27 | last_name = Column(String) 28 | date_of_birth = Column(Date) 29 | children = Column(Integer) 30 | 31 | 32 | YAML_QUERY_ALL_USERS = "from: User" 33 | JSON_QUERY_ALL_USERS = """{"from": "User"}""" 34 | YAML_QUERY_YOUNG_USERS = """from: User 35 | where: 36 | $gt: 37 | date-of-birth: 1988-01-01 38 | """ 39 | JSON_QUERY_LASTNAME_MICHAEL = """{ 40 | "from": "User", 41 | "where": { 42 | "$like": { 43 | "lastName": "Mich%" 44 | } 45 | } 46 | }""" 47 | YAML_LIMIT_QUERY = """from: User 48 | limit: 2 49 | """ 50 | 51 | YAML_ORDERED_QUERY = """from: User 52 | order-by: "-date-of-birth" 53 | """ 54 | 55 | YAML_COMPLEX_ORDERED_QUERY = """from: User 56 | order-by: 57 | - last-name 58 | - "-date-of-birth" 59 | """ 60 | 61 | YAML_COMPARATOR_QUERIES = { 62 | COMP_EQ: ("from: User\n" 63 | "where:\n" 64 | " first-name: Michael", {1,}), 65 | COMP_GT: ("from: User\n" 66 | "where:\n" 67 | " $gt:\n" 68 | " children: 2", {3,}), 69 | COMP_GTE: ("from: User\n" 70 | "where:\n" 71 | " $gte:\n" 72 | " children: 2", {2, 3, 4}), 73 | COMP_LT: ("from: User\n" 74 | "where:\n" 75 | " $lt:\n" 76 | " children: 2", {1,}), 77 | COMP_LTE: ("from: User\n" 78 | "where:\n" 79 | " $lte:\n" 80 | " children: 2", {1, 2, 4}), 81 | COMP_NEQ: ("from: User\n" 82 | "where:\n" 83 | " $neq:\n" 84 | " last-name: Michaels", {1,}), 85 | COMP_LIKE: ("from: User\n" 86 | "where:\n" 87 | " $like:\n" 88 | " first-name: Mich%", {1,}), 89 | COMP_IN: ("from: User\n" 90 | "where:\n" 91 | " $in:\n" 92 | " last-name:\n" 93 | " - Anderson\n" 94 | " - Michaels", {1, 2, 3}), 95 | COMP_NIN: ("from: User\n" 96 | "where:\n" 97 | " $nin:\n" 98 | " children:\n" 99 | " - 2\n" 100 | " - 3", {1,}), 101 | COMP_IS: ("from: User\n" 102 | "where:\n" 103 | " $is:\n" 104 | " last-name: null", {4,}) 105 | } 106 | 107 | 108 | DEBUG_LOGGING = (os.environ.get("DEBUG", False) == "True") 109 | 110 | 111 | class TestSqlAlchemyQuerying(unittest.TestCase): 112 | 113 | engine = None 114 | session = None 115 | tables = None 116 | data = None 117 | 118 | def setUp(self): 119 | self.engine = create_engine("sqlite:///:memory:", echo=DEBUG_LOGGING) 120 | Base.metadata.create_all(self.engine) 121 | Session = sessionmaker(bind=self.engine) 122 | self.session = Session() 123 | # add some initial data 124 | user1 = User(first_name="Michael", last_name="Anderson", date_of_birth=date(1980, 1, 1), children=0) 125 | user2 = User(first_name="James", last_name="Michaels", date_of_birth=date(1976, 10, 23), children=2) 126 | user3 = User(first_name="Andrew", last_name="Michaels", date_of_birth=date(1988, 8, 12), children=3) 127 | self.session.add_all([user1, user2, user3]) 128 | self.session.commit() 129 | 130 | self.tables = { 131 | "User": User 132 | } 133 | self.data = { 134 | "User": { 135 | user1.id: user1, 136 | user2.id: user2, 137 | user3.id: user3 138 | } 139 | } 140 | if DEBUG_LOGGING: 141 | logging.basicConfig( 142 | level=logging.DEBUG, 143 | format='%(asctime)s\t%(name)s\t%(levelname)s\t%(message)s', 144 | ) 145 | 146 | def tearDown(self): 147 | self.session.close() 148 | 149 | def test_all_comparators(self): 150 | user4 = User(first_name="Gary", last_name=None, date_of_birth=date(1985, 2, 3), children=2) 151 | self.session.add(user4) 152 | self.session.commit() 153 | for comp, crit in iteritems(YAML_COMPARATOR_QUERIES): 154 | qs, expected_ids = crit 155 | results = parse_yaml_query(qs).to_sqlalchemy(self.session, self.tables).all() 156 | seen_ids = set([result.id for result in results]) 157 | self.assertEqual(expected_ids, seen_ids, "Failed with comparator: %s (expected %s, got %s)" % ( 158 | comp, expected_ids, seen_ids 159 | )) 160 | 161 | def test_basic_querying(self): 162 | self.assertAllUsers(parse_yaml_query(YAML_QUERY_ALL_USERS)) 163 | self.assertAllUsers(parse_json_query(JSON_QUERY_ALL_USERS)) 164 | self.assertYoungUsers(parse_yaml_query(YAML_QUERY_YOUNG_USERS)) 165 | self.assertMichaelsUsers(parse_json_query(JSON_QUERY_LASTNAME_MICHAEL)) 166 | self.assertLimits(parse_yaml_query(YAML_LIMIT_QUERY)) 167 | self.assertOrderedSimple(parse_yaml_query(YAML_ORDERED_QUERY)) 168 | self.assertOrderedComplex(parse_yaml_query(YAML_COMPLEX_ORDERED_QUERY)) 169 | 170 | def assertAllUsers(self, mlquery): 171 | seen_users = self.query_seen_users(mlquery) 172 | self.assertEqual(3, len(seen_users)) 173 | for user_id, user in iteritems(self.data["User"]): 174 | self.assertIn(user_id, seen_users) 175 | 176 | def assertYoungUsers(self, mlquery): 177 | seen_users = self.query_seen_users(mlquery) 178 | self.assertEqual(1, len(seen_users)) 179 | # ID 3 180 | self.assertIn(3, seen_users) 181 | 182 | def assertMichaelsUsers(self, mlquery): 183 | seen_users = self.query_seen_users(mlquery) 184 | self.assertEqual(2, len(seen_users)) 185 | self.assertIn(2, seen_users) 186 | self.assertIn(3, seen_users) 187 | 188 | def assertLimits(self, mlquery): 189 | seen_users = self.query_seen_users(mlquery) 190 | self.assertEqual(2, len(seen_users)) 191 | self.assertIn(1, seen_users) 192 | self.assertIn(2, seen_users) 193 | 194 | def assertOrderedSimple(self, mlquery): 195 | results = mlquery.to_sqlalchemy(self.session, self.tables).all() 196 | self.assertEqual(3, results[0].id) 197 | self.assertEqual(1, results[1].id) 198 | self.assertEqual(2, results[2].id) 199 | 200 | def assertOrderedComplex(self, mlquery): 201 | results = mlquery.to_sqlalchemy(self.session, self.tables).all() 202 | self.assertEqual(1, results[0].id) 203 | self.assertEqual(3, results[1].id) 204 | self.assertEqual(2, results[2].id) 205 | 206 | def query_seen_users(self, mlquery): 207 | results = mlquery.to_sqlalchemy(self.session, self.tables).all() 208 | seen_users = set() 209 | for result in results: 210 | seen_users.add(result.id) 211 | return seen_users 212 | 213 | 214 | if __name__ == "__main__": 215 | unittest.main() 216 | -------------------------------------------------------------------------------- /tests/test_basic_parsing.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import unicode_literals 4 | 5 | import unittest 6 | 7 | from mlalchemy import * 8 | from mlalchemy.testing import MLAlchemyTestCase 9 | 10 | 11 | class TestBasicMLAlchemyParsing(MLAlchemyTestCase): 12 | 13 | def test_parse_basic_query1(self): 14 | query = parse_query({ 15 | "from": "SomeTable", 16 | "where": { 17 | "field": "value" 18 | } 19 | }) 20 | self.assertQueryEquals( 21 | MLQuery( 22 | "SomeTable", 23 | query_fragment=MLQueryFragment( 24 | OP_AND, 25 | clauses=[ 26 | MLClause( 27 | "field", 28 | COMP_EQ, 29 | "value" 30 | ) 31 | ] 32 | ) 33 | ), 34 | query 35 | ) 36 | 37 | def test_parse_basic_query2(self): 38 | query = parse_query({ 39 | "from": "SomeTable", 40 | "where": [ 41 | {"field1": "value"}, 42 | {"field2": 1} 43 | ] 44 | }) 45 | self.assertQueryEquals( 46 | MLQuery( 47 | "SomeTable", 48 | query_fragment=MLQueryFragment( 49 | OP_AND, 50 | clauses=[ 51 | MLClause( 52 | "field1", 53 | COMP_EQ, 54 | "value" 55 | ), 56 | MLClause( 57 | "field2", 58 | COMP_EQ, 59 | 1 60 | ) 61 | ] 62 | ) 63 | ), 64 | query 65 | ) 66 | 67 | def test_parse_basic_query3(self): 68 | query = parse_query({ 69 | "from": "SomeTable", 70 | "where": { 71 | "$or": [ 72 | {"field1": "value"}, 73 | {"field2": 1} 74 | ] 75 | } 76 | }) 77 | self.assertQueryEquals( 78 | MLQuery( 79 | "SomeTable", 80 | query_fragment=MLQueryFragment( 81 | OP_OR, 82 | clauses=[ 83 | MLClause( 84 | "field1", 85 | COMP_EQ, 86 | "value" 87 | ), 88 | MLClause( 89 | "field2", 90 | COMP_EQ, 91 | 1 92 | ) 93 | ] 94 | ) 95 | ), 96 | query 97 | ) 98 | 99 | def test_parse_basic_query4(self): 100 | query = parse_query({ 101 | "from": "SomeTable", 102 | "where": [ 103 | { 104 | "$not": { 105 | "field1": 1 106 | } 107 | } 108 | ], 109 | "orderBy": "field1", 110 | "offset": 10, 111 | "limit": 50 112 | }) 113 | self.assertQueryEquals( 114 | MLQuery( 115 | "SomeTable", 116 | query_fragment=MLQueryFragment( 117 | OP_NOT, 118 | clauses=[ 119 | MLClause("field1", COMP_EQ, 1) 120 | ] 121 | ), 122 | order_by=["field1"], 123 | offset=10, 124 | limit=50 125 | ), 126 | query 127 | ) 128 | 129 | def test_parse_basic_query5(self): 130 | query = parse_query({ 131 | "from": "SomeTable", 132 | "where": { 133 | "$gt": { 134 | "field1": 5 135 | } 136 | } 137 | }) 138 | self.assertQueryEquals( 139 | MLQuery( 140 | "SomeTable", 141 | query_fragment=MLQueryFragment( 142 | OP_AND, 143 | clauses=[ 144 | MLClause("field1", COMP_GT, 5) 145 | ] 146 | ) 147 | ), 148 | query 149 | ) 150 | 151 | def test_parse_basic_query6(self): 152 | query = parse_query({ 153 | "from": "SomeTable", 154 | "where": [ 155 | { 156 | "$gt": { 157 | "field1": 5 158 | } 159 | }, 160 | { 161 | "$lt": { 162 | "field2": 10 163 | } 164 | } 165 | ] 166 | }) 167 | self.assertQueryEquals( 168 | MLQuery( 169 | "SomeTable", 170 | query_fragment=MLQueryFragment( 171 | OP_AND, 172 | clauses=[ 173 | MLClause("field1", COMP_GT, 5), 174 | MLClause("field2", COMP_LT, 10) 175 | ] 176 | ) 177 | ), 178 | query 179 | ) 180 | 181 | def test_parse_basic_query7(self): 182 | query = parse_query({ 183 | "from": "SomeTable", 184 | "where": {"field1": 1}, 185 | "orderBy": "-field1" 186 | }) 187 | self.assertQueryEquals( 188 | MLQuery( 189 | "SomeTable", 190 | query_fragment=MLQueryFragment( 191 | OP_AND, 192 | clauses=[ 193 | MLClause("field1", COMP_EQ, 1) 194 | ] 195 | ), 196 | order_by=["-field1"] 197 | ), 198 | query 199 | ) 200 | 201 | def test_parse_basic_query8(self): 202 | query = parse_query({ 203 | "from": "SomeTable", 204 | "where": {"field1": 1}, 205 | "orderBy": ["-field1", "field2", "-field3"] 206 | }) 207 | self.assertQueryEquals( 208 | MLQuery( 209 | "SomeTable", 210 | query_fragment=MLQueryFragment( 211 | OP_AND, 212 | clauses=[ 213 | MLClause("field1", COMP_EQ, 1) 214 | ] 215 | ), 216 | order_by=["-field1", "field2", "-field3"] 217 | ), 218 | query 219 | ) 220 | 221 | def test_parse_invalid_basic_queries(self): 222 | with self.assertRaises(TypeError): 223 | parse_query([]) 224 | with self.assertRaises(QuerySyntaxError): 225 | parse_query(dict()) 226 | 227 | def test_parse_invalid_basic_not_op(self): 228 | with self.assertRaises(QuerySyntaxError): 229 | parse_query({ 230 | "from": "SomeTable", 231 | "where": { 232 | "$not": [ 233 | {"field1": 1}, 234 | {"field2": 2} 235 | ] 236 | } 237 | }) 238 | 239 | def test_parse_invalid_field_name(self): 240 | with self.assertRaises(TypeError): 241 | parse_query({ 242 | "from": "SomeTable", 243 | "where": {1: 1} 244 | }) 245 | 246 | def test_parse_invalid_sub_fragment(self): 247 | with self.assertRaises(TypeError): 248 | parse_query({ 249 | "from": "SomeTable", 250 | "where": 1 251 | }) 252 | 253 | 254 | if __name__ == "__main__": 255 | unittest.main() 256 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MLAlchemy 2 | 3 | [![Build Status](https://travis-ci.org/thanethomson/MLAlchemy.svg?branch=master)](https://travis-ci.org/thanethomson/MLAlchemy) 4 | [![PyPI](https://img.shields.io/pypi/v/mlalchemy.svg)](https://pypi.python.org/pypi/mlalchemy) 5 | [![PyPI](https://img.shields.io/pypi/pyversions/mlalchemy.svg)](https://pypi.python.org/pypi/mlalchemy) 6 | 7 | ## Overview 8 | MLAlchemy is a Python-based utility library aimed at allowing relatively safe 9 | conversion from YAML/JSON to SQLAlchemy read-only queries. One use case here is 10 | to allow RESTful web applications (written in Python) to receive YAML- or 11 | JSON-based queries for data, e.g. from a front-end JavaScript-based application. 12 | 13 | The name "MLAlchemy" is an abbreviation for "Markup Language for 14 | SQLAlchemy". 15 | 16 | ## Installation 17 | Installation via PyPI: 18 | 19 | ```bash 20 | > pip install mlalchemy 21 | ``` 22 | 23 | ## Query Examples 24 | To get a feel for what MLAlchemy queries look like, take a look at the 25 | following. **Note**: All field names are converted from `camelCase` or `kebab-case` 26 | to `snake_case` prior to query execution. 27 | 28 | ### Example YAML Queries 29 | Fetching all the entries from a table called `Users`: 30 | 31 | ```yaml 32 | from: Users 33 | ``` 34 | 35 | Limiting the users to only those with the last name "Michaels": 36 | 37 | ```yaml 38 | from: Users 39 | where: 40 | last-name: Michaels 41 | ``` 42 | 43 | A more complex YAML query: 44 | 45 | ```yaml 46 | from: Users 47 | where: 48 | $or: 49 | last-name: Michaels 50 | first-name: Michael 51 | $gt: 52 | date-of-birth: 1984-01-01 53 | ``` 54 | 55 | The raw SQL query for the above would look like: 56 | 57 | ```sql 58 | SELECT * FROM users WHERE 59 | (last_name = "Michaels" OR first_name = "Michael") AND 60 | (date_of_birth > "1984-01-01") 61 | ``` 62 | 63 | ### Example JSON Queries 64 | The same queries as above, but in JSON format. To fetch all entries 65 | in the `Users` table: 66 | 67 | ```json 68 | { 69 | "from": "Users" 70 | } 71 | ``` 72 | 73 | Limiting the users to only those with the last name "Michaels": 74 | 75 | ```json 76 | { 77 | "from": "Users", 78 | "where": { 79 | "lastName": "Michaels" 80 | } 81 | } 82 | ``` 83 | 84 | And finally, the more complex query: 85 | 86 | ```json 87 | { 88 | "from": "Users", 89 | "where": { 90 | "$or": { 91 | "lastName": "Michaels", 92 | "firstName": "Michael" 93 | }, 94 | "$gt": { 95 | "dateOfBirth": "1984-01-01" 96 | } 97 | } 98 | } 99 | ``` 100 | 101 | ## Usage 102 | A simple example of how to use MLAlchemy: 103 | 104 | ```python 105 | from sqlalchemy import create_engine, Column, Integer, String, Date 106 | from sqlalchemy.ext.declarative import declarative_base 107 | from sqlalchemy.orm import sessionmaker 108 | 109 | from mlalchemy import parse_yaml_query, parse_json_query 110 | 111 | Base = declarative_base() 112 | 113 | 114 | class User(Base): 115 | __tablename__ = "users" 116 | 117 | id = Column(Integer, primary_key=True) 118 | first_name = Column(String) 119 | last_name = Column(String) 120 | date_of_birth = Column(Date) 121 | 122 | 123 | # use an in-memory SQLite database for this example 124 | engine = create_engine("sqlite:///:memory:") 125 | Base.metadata.create_all(engine) 126 | Session = sessionmaker(bind=engine) 127 | session = Session() 128 | 129 | # add a couple of dummy users 130 | user1 = User(first_name="Michael", last_name="Anderson", date_of_birth=date(1980, 1, 1)) 131 | user2 = User(first_name="James", last_name="Michaels", date_of_birth=date(1976, 10, 23)) 132 | user3 = User(first_name="Andrew", last_name="Michaels", date_of_birth=date(1988, 8, 12)) 133 | session.add_all([user1, user2, user3]) 134 | session.commit() 135 | 136 | # we need a lookup table for MLAlchemy 137 | tables = { 138 | "User": User 139 | } 140 | 141 | # try a simple YAML-based query first 142 | all_users = parse_yaml_query("from: User").to_sqlalchemy(session, tables).all() 143 | print(all_users) 144 | 145 | # same query, but this time in JSON 146 | all_users = parse_json_query("""{"from": "User"}""").to_sqlalchemy(session, tables).all() 147 | print(all_users) 148 | 149 | # a slightly more complex query 150 | young_users = parse_yaml_query("""from: User 151 | where: 152 | $gt: 153 | date-of-birth: 1988-01-01 154 | """).to_sqlalchemy(session, tables).all() 155 | print(young_users) 156 | ``` 157 | 158 | ## Query Language Syntax 159 | As mentioned before, queries can either be supplied in YAML format or 160 | in JSON format to one of the respective parsers. 161 | 162 | ### `from` 163 | At present, MLAlchemy can only support selecting data from a single 164 | table (multi-table support is planned in future). Here, the `from` 165 | parameter allows you to specify the name of the table from which 166 | to select data. 167 | 168 | ### `where` 169 | The `where` parameter defines, in hierarchical fashion, the structure 170 | of the logical query to perform. There are 3 kinds of key types in 171 | the JSON/YAML structures, as described in the following table. 172 | 173 | | Kind | Description | Options | 174 | | --------------- | ----------------------------------------------------- | -------------------------------------------------------------------------- | 175 | | **Operators** | Logical (boolean) operators for combining sub-clauses | `$and`, `$or`, `$not` | 176 | | **Comparators** | Comparative operators for comparing fields to values | `$eq`, `$gt`, `$gte`, `$lt`, `$lte`, `$like`, `$neq`, `$in`, `$nin`, `$is` | 177 | | **Field Names** | The name of a field in the `from` table | (Depends on table) | 178 | 179 | ### `order-by` (YAML) or `orderBy` (JSON) 180 | Provides the ordering for the resulting query. Must either be a single 181 | field name or a list of field names, with the direction specifier in 182 | front of the field name. For example: 183 | 184 | ```yaml 185 | # Order by "field2" in ascending order 186 | order-by: field2 187 | ``` 188 | 189 | Another example: 190 | 191 | ```yaml 192 | # Order by "field2" in *descending* order 193 | order-by: "-field2" 194 | ``` 195 | 196 | A more complex example: 197 | 198 | ```yaml 199 | # Order first by "field1" in ascending order, then by "field2" in 200 | # descending order 201 | order-by: 202 | - field1 203 | - "-field2" 204 | ``` 205 | 206 | ### `offset` 207 | Specifies the number of results to skip before providing results. If 208 | not specified, no results are skipped. 209 | 210 | ### `limit` 211 | Specifies the maximum number of results to return. If not specified, 212 | there will be no limit to the number of returned results. 213 | 214 | ## Query Examples 215 | 216 | ### Example 1: Simple Query 217 | The following is an example of a relatively simple query in YAML format: 218 | 219 | ```yaml 220 | from: SomeTable 221 | where: 222 | - $gt: 223 | field1: 5 224 | - $lt: 225 | field2: 3 226 | order-by: 227 | - field1 228 | offset: 2 229 | limit: 10 230 | ``` 231 | 232 | This would translate into the following SQLAlchemy query: 233 | 234 | ```python 235 | from sqlalchemy.sql.expression import and_ 236 | 237 | session.query(SomeTable).filter( 238 | and_(SomeTable.field1 > 5, SomeTable.field2 < 3) 239 | ) \ 240 | .order_by(SomeTable.field1) \ 241 | .offset(2) \ 242 | .limit(10) 243 | ``` 244 | 245 | ### Example 2: Slightly More Complex Query 246 | The following is an example of a more complex query in YAML format: 247 | 248 | ```yaml 249 | from: SomeTable 250 | where: 251 | - $or: 252 | field1: 5 253 | field2: something 254 | - $not: 255 | $like: 256 | field3: "else%" 257 | ``` 258 | 259 | This would translate into the following SQLAlchemy query: 260 | 261 | ```python 262 | from sqlalchemy.sql.expression import and_, or_, not_ 263 | 264 | session.query(SomeTable) \ 265 | .filter( 266 | and_( 267 | or_( 268 | SomeTable.field1 == 5, 269 | SomeTable.field2 == "something" 270 | ), 271 | not_( 272 | SomeTable.field3.like("else%") 273 | ) 274 | ) 275 | ) 276 | ``` 277 | 278 | ### Example 3: Complex JSON Query 279 | The following is an example of a relatively complex query in 280 | JSON format: 281 | 282 | ```json 283 | { 284 | "from": "SomeTable", 285 | "where": [ 286 | { 287 | "$or": [ 288 | {"field1": 10}, 289 | { 290 | "$gt": { 291 | "field2": 5 292 | } 293 | } 294 | ], 295 | "$and": [ 296 | {"field3": "somevalue"}, 297 | {"field4": "othervalue"}, 298 | { 299 | "$or": { 300 | "field5": 5, 301 | "field6": 6 302 | } 303 | } 304 | ] 305 | } 306 | ], 307 | "orderBy": [ 308 | "field1", 309 | "-field2" 310 | ], 311 | "offset": 2, 312 | "limit": 10 313 | } 314 | ``` 315 | 316 | This query would be translated into the following SQLAlchemy code: 317 | 318 | ```python 319 | from sqlalchemy.sql.expression import and_, or_, not_ 320 | 321 | session.query(SomeTable) \ 322 | .filter( 323 | and_( 324 | or_( 325 | SomeTable.field1 == 10, 326 | SomeTable.field2 > 5 327 | ), 328 | and_( 329 | SomeTable.field3 == "somevalue", 330 | SomeTable.field4 == "othervalue", 331 | or_( 332 | SomeTable.field5 == 5, 333 | SomeTable.field6 == 6 334 | ) 335 | ) 336 | ) 337 | ) \ 338 | .order_by(SomeTable.field1, SomeTable.field2.desc()) \ 339 | .offset(2) \ 340 | .limit(10) 341 | ``` 342 | 343 | ## License 344 | **The MIT License (MIT)** 345 | 346 | Copyright (c) 2017 Thane Thomson 347 | 348 | Permission is hereby granted, free of charge, to any person obtaining a copy of 349 | this software and associated documentation files (the "Software"), to deal in 350 | the Software without restriction, including without limitation the rights to 351 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 352 | of the Software, and to permit persons to whom the Software is furnished to do 353 | so, subject to the following conditions: 354 | 355 | The above copyright notice and this permission notice shall be included in all 356 | copies or substantial portions of the Software. 357 | 358 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 359 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 360 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 361 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 362 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 363 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 364 | SOFTWARE. 365 | 366 | -------------------------------------------------------------------------------- /mlalchemy/structures.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import unicode_literals 4 | from past.builtins import basestring 5 | 6 | from sqlalchemy.sql.expression import and_, or_, not_ 7 | from sqlalchemy.orm.attributes import QueryableAttribute 8 | 9 | from mlalchemy.constants import * 10 | from mlalchemy.errors import * 11 | from mlalchemy.utils import * 12 | 13 | import logging 14 | logger = logging.getLogger(__name__) 15 | 16 | __all__ = [ 17 | "MLQuery", 18 | "MLQueryFragment", 19 | "MLClause" 20 | ] 21 | 22 | 23 | class MLQuery(object): 24 | """Broad data structure used to represent a selection query in its entirety.""" 25 | 26 | def __init__(self, table, query_fragment=None, order_by=None, offset=None, limit=None): 27 | """Constructor. 28 | 29 | Args: 30 | table: The name of the table being queried. 31 | query_fragment: A single instance of MLQueryFragment that represents the top of the hierarchy of 32 | query fragments making up this query. This can be None (i.e. no query criteria; all entries of 33 | the specified table will be selected). 34 | order_by: A string or list indicating the name of the field by which to order results, including 35 | the direction of ordering. For example, if a field's name is "title", specifying order_by as 36 | "title" will sort by "title" in the *ascending* direction. Specifying "-title" will sort in the 37 | descending direction. This can also be an ordered list of field names/directions. 38 | offset: The number of entries to skip. Set to None if no offset is required. 39 | limit: The maximum number of entries to return. Set to None to specify no limit. 40 | """ 41 | if not isinstance(table, basestring): 42 | raise TypeError("The table name supplied to an MLQuery object must be a string") 43 | if query_fragment is not None and not isinstance(query_fragment, MLQueryFragment): 44 | raise TypeError("Primary query fragment for MLQuery must be of type MLQueryFragment") 45 | if order_by is not None and not isinstance(order_by, basestring) and not isinstance(order_by, list): 46 | raise TypeError("Query ordering parameter must be a string or a list") 47 | 48 | self.unique_field_names = set() 49 | self.table = table 50 | self.query_fragment = query_fragment 51 | 52 | if self.query_fragment is not None: 53 | self.unique_field_names = self.unique_field_names.union(self.query_fragment.unique_field_names) 54 | 55 | self.order_by = [] 56 | if order_by is not None: 57 | # make sure our order_by field is a list 58 | if not isinstance(order_by, list): 59 | order_by = [order_by] 60 | 61 | for ob in order_by: 62 | field_name = ob.strip("-") 63 | # make sure it's in snake_case 64 | if is_camelcase_string(field_name): 65 | field_name = camelcase_to_snakecase(field_name) 66 | elif is_kebabcase_string(field_name): 67 | field_name = kebabcase_to_snakecase(field_name) 68 | self.order_by.append({field_name: ORDER_DESC if ob[0] == "-" else ORDER_ASC}) 69 | self.unique_field_names.add(field_name) 70 | 71 | self.offset = offset 72 | self.limit = limit 73 | 74 | def as_dict(self): 75 | return { 76 | "table": self.table, 77 | "query_fragment": self.query_fragment.as_dict() if self.query_fragment is not None else None, 78 | "order_by": self.order_by, 79 | "offset": self.offset, 80 | "limit": self.limit 81 | } 82 | 83 | def unpack(self): 84 | return self.table, self.query_fragment, self.order_by, self.offset, self.limit 85 | 86 | def __repr__(self): 87 | return json_dumps(self.as_dict(), indent=2) 88 | 89 | def to_sqlalchemy(self, session, tables): 90 | if not isinstance(tables, dict): 91 | raise TypeError("Supplied tables structure for MLQuery-to-SQLAlchemy query conversion must be a dictionary") 92 | if self.table not in tables: 93 | raise InvalidTableError("Table does not exist in tables dictionary: %s" % self.table) 94 | 95 | table = tables[self.table] 96 | query = session.query(table) 97 | 98 | logger.debug("Attempting to build SQLAlchemy query for table \"%s\":\n%s" % (self.table, self)) 99 | 100 | if self.query_fragment is not None: 101 | query = query.filter(self.query_fragment.to_sqlalchemy(table)) 102 | 103 | if self.order_by is not None: 104 | criteria = [] 105 | for order_by in self.order_by: 106 | field, direction = [i for i in order_by.items()][0] 107 | criterion = getattr(table, field) 108 | if not isinstance(criterion, QueryableAttribute): 109 | raise InvalidFieldError("Invalid field for specified table: %s" % field) 110 | 111 | if direction == ORDER_ASC: 112 | criterion = criterion.asc() 113 | elif direction == ORDER_DESC: 114 | criterion = criterion.desc() 115 | criteria.append(criterion) 116 | query = query.order_by(*criteria) 117 | 118 | if self.offset is not None: 119 | query = query.offset(self.offset) 120 | 121 | if self.limit is not None: 122 | query = query.limit(self.limit) 123 | 124 | return query 125 | 126 | 127 | class MLQueryFragment(object): 128 | """Recursive object to allow for relatively complex data selection queries.""" 129 | 130 | def __init__(self, op, clauses=None, sub_fragments=None): 131 | """Constructor. 132 | 133 | Args: 134 | op: The operator to join clauses and sub-queries. 135 | clauses: A list of clauses (MLClause objects) for the query. Set to None if there are none. 136 | sub_fragments: A list of sub-queries (MLQueryFragment objects) within this query. Set to None if there are 137 | none. 138 | """ 139 | if op not in OPERATORS: 140 | raise InvalidOperatorError("Invalid operator: %s" % op) 141 | 142 | if clauses is not None and not isinstance(clauses, list): 143 | raise TypeError("MLQueryFragment clauses must either be None or a list") 144 | if sub_fragments is not None and not isinstance(sub_fragments, list): 145 | raise TypeError("MLQueryFragment sub-fragments must either be None or a list") 146 | 147 | # ensure an empty list if no clauses or sub-queries 148 | if clauses is None: 149 | clauses = [] 150 | if sub_fragments is None: 151 | sub_fragments = [] 152 | 153 | self.unique_field_names = set() 154 | 155 | for clause in clauses: 156 | if not isinstance(clause, MLClause): 157 | raise TypeError("All clauses within an MLQueryFragment must be of type MLClause") 158 | self.unique_field_names.add(clause.field) 159 | 160 | for sub_frag in sub_fragments: 161 | if not isinstance(sub_frag, MLQueryFragment): 162 | raise TypeError("All sub-fragments within an MLQueryFragment must be of type MLQueryFragment") 163 | 164 | self.unique_field_names = self.unique_field_names.union(sub_frag.unique_field_names) 165 | 166 | if op == OP_NOT and (len(clauses) + len(sub_fragments)) > 1: 167 | raise QuerySyntaxError("NOT operations can only contain a single clause or sub-query fragment") 168 | 169 | self.op = op 170 | self.clauses = clauses 171 | self.sub_fragments = sub_fragments 172 | 173 | def unpack(self): 174 | return self.op, self.clauses, self.sub_fragments 175 | 176 | def as_dict(self): 177 | return { 178 | "op": self.op, 179 | "clauses": [clause.as_dict() for clause in self.clauses], 180 | "sub_fragments": [sub_fragment.as_dict() for sub_fragment in self.sub_fragments] 181 | } 182 | 183 | def __repr__(self): 184 | return json_dumps(self.as_dict(), indent=2) 185 | 186 | def simplify(self): 187 | op = self.op 188 | clauses = [clause for clause in self.clauses] 189 | sub_fragments = [] 190 | 191 | for sub_fragment in self.sub_fragments: 192 | s = sub_fragment.simplify() 193 | if isinstance(s, MLClause): 194 | clauses.append(s) 195 | elif isinstance(s, MLQueryFragment): 196 | sub_fragments.append(s) 197 | 198 | # if this query fragment is only made up of a single clause 199 | if len(clauses) == 1 and len(sub_fragments) == 0 and op == OP_AND: 200 | return clauses[0] 201 | 202 | # if this query fragment is just a single sub-fragment, collapse its properties into the simplified 203 | # fragment we're currently generating 204 | if len(clauses) == 0 and len(sub_fragments) == 1: 205 | op, clauses, sub_fragments = sub_fragments[0].unpack() 206 | 207 | return MLQueryFragment(op, clauses=clauses, sub_fragments=sub_fragments) 208 | 209 | def to_sqlalchemy(self, table): 210 | filter_criteria = [clause.to_sqlalchemy(table) for clause in self.clauses] 211 | filter_criteria.extend([sub_frag.to_sqlalchemy(table) for sub_frag in self.sub_fragments]) 212 | 213 | if self.op == OP_OR: 214 | return or_(*filter_criteria) 215 | elif self.op == OP_NOT: 216 | return not_(*filter_criteria) 217 | 218 | return and_(*filter_criteria) 219 | 220 | 221 | class MLClause(object): 222 | """A single clause in an MLQuery object.""" 223 | 224 | def __init__(self, field, comp, value): 225 | """Constructor. 226 | 227 | Args: 228 | field: The name of the field relevant to this clause. 229 | comp: The comparator when performing the comparison. 230 | value: The value to which this field's value is being compared. 231 | """ 232 | if comp not in COMPARATORS: 233 | raise InvalidComparatorError("Invalid comparator: %s" % comp) 234 | 235 | if not isinstance(field, basestring): 236 | raise TypeError("Clause field names must be strings") 237 | 238 | # ensure field name is in snake_case 239 | if is_kebabcase_string(field): 240 | field = kebabcase_to_snakecase(field) 241 | elif is_camelcase_string(field): 242 | field = camelcase_to_snakecase(field) 243 | 244 | self.field = field 245 | self.comp = comp 246 | self.value = value 247 | 248 | def as_dict(self): 249 | return { 250 | "field": self.field, 251 | "comp": self.comp, 252 | "value": self.value 253 | } 254 | 255 | def unpack(self): 256 | return self.field, self.comp, self.value 257 | 258 | def __repr__(self): 259 | return json_dumps(self.as_dict(), indent=2) 260 | 261 | def to_sqlalchemy(self, table): 262 | col = getattr(table, self.field) 263 | # make sure it's the right kind of field 264 | if not isinstance(col, QueryableAttribute): 265 | raise InvalidFieldError("Invalid field for specified table: %s" % self.field) 266 | 267 | if self.comp == COMP_EQ: 268 | return col == self.value 269 | elif self.comp == COMP_GT: 270 | return col > self.value 271 | elif self.comp == COMP_GTE: 272 | return col >= self.value 273 | elif self.comp == COMP_LT: 274 | return col < self.value 275 | elif self.comp == COMP_LTE: 276 | return col <= self.value 277 | elif self.comp == COMP_NEQ: 278 | return col != self.value 279 | elif self.comp == COMP_LIKE: 280 | return col.like(self.value) 281 | elif self.comp == COMP_IN: 282 | return col.in_(self.value) 283 | elif self.comp == COMP_NIN: 284 | return ~col.in_(self.value) 285 | elif self.comp == COMP_IS: 286 | return col.is_(self.value) 287 | 288 | # default to equals 289 | return col == self.value 290 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | MLAlchemy 2 | ========= 3 | 4 | Overview 5 | -------- 6 | 7 | MLAlchemy is a Python-based utility library aimed at allowing relatively 8 | safe conversion from YAML/JSON to SQLAlchemy read-only queries. One use 9 | case here is to allow RESTful web applications (written in Python) to 10 | receive YAML- or JSON-based queries for data, e.g. from a front-end 11 | JavaScript-based application. 12 | 13 | The name "MLAlchemy" is an abbreviation for "Markup Language for 14 | SQLAlchemy". 15 | 16 | Installation 17 | ------------ 18 | 19 | Installation via PyPI: 20 | 21 | .. code:: bash 22 | 23 | > pip install mlalchemy 24 | 25 | Query Examples 26 | -------------- 27 | 28 | To get a feel for what MLAlchemy queries look like, take a look at the 29 | following. **Note**: All field names are converted from ``camelCase`` or 30 | ``kebab-case`` to ``snake_case`` prior to query execution. 31 | 32 | Example YAML Queries 33 | ~~~~~~~~~~~~~~~~~~~~ 34 | 35 | Fetching all the entries from a table called ``Users``: 36 | 37 | .. code:: yaml 38 | 39 | from: Users 40 | 41 | Limiting the users to only those with the last name "Michaels": 42 | 43 | .. code:: yaml 44 | 45 | from: Users 46 | where: 47 | last-name: Michaels 48 | 49 | A more complex YAML query: 50 | 51 | .. code:: yaml 52 | 53 | from: Users 54 | where: 55 | $or: 56 | last-name: Michaels 57 | first-name: Michael 58 | $gt: 59 | date-of-birth: 1984-01-01 60 | 61 | The raw SQL query for the above would look like: 62 | 63 | .. code:: sql 64 | 65 | SELECT * FROM users WHERE 66 | (last_name = "Michaels" OR first_name = "Michael") AND 67 | (date_of_birth > "1984-01-01") 68 | 69 | Example JSON Queries 70 | ~~~~~~~~~~~~~~~~~~~~ 71 | 72 | The same queries as above, but in JSON format. To fetch all entries in 73 | the ``Users`` table: 74 | 75 | .. code:: json 76 | 77 | { 78 | "from": "Users" 79 | } 80 | 81 | Limiting the users to only those with the last name "Michaels": 82 | 83 | .. code:: json 84 | 85 | { 86 | "from": "Users", 87 | "where": { 88 | "lastName": "Michaels" 89 | } 90 | } 91 | 92 | And finally, the more complex query: 93 | 94 | .. code:: json 95 | 96 | { 97 | "from": "Users", 98 | "where": { 99 | "$or": { 100 | "lastName": "Michaels", 101 | "firstName": "Michael" 102 | }, 103 | "$gt": { 104 | "dateOfBirth": "1984-01-01" 105 | } 106 | } 107 | } 108 | 109 | Usage 110 | ----- 111 | 112 | A simple example of how to use MLAlchemy: 113 | 114 | .. code:: python 115 | 116 | from sqlalchemy import create_engine, Column, Integer, String, Date 117 | from sqlalchemy.ext.declarative import declarative_base 118 | from sqlalchemy.orm import sessionmaker 119 | 120 | from mlalchemy import parse_yaml_query, parse_json_query 121 | 122 | Base = declarative_base() 123 | 124 | 125 | class User(Base): 126 | __tablename__ = "users" 127 | 128 | id = Column(Integer, primary_key=True) 129 | first_name = Column(String) 130 | last_name = Column(String) 131 | date_of_birth = Column(Date) 132 | 133 | 134 | # use an in-memory SQLite database for this example 135 | engine = create_engine("sqlite:///:memory:") 136 | Base.metadata.create_all(engine) 137 | Session = sessionmaker(bind=engine) 138 | session = Session() 139 | 140 | # add a couple of dummy users 141 | user1 = User(first_name="Michael", last_name="Anderson", date_of_birth=date(1980, 1, 1)) 142 | user2 = User(first_name="James", last_name="Michaels", date_of_birth=date(1976, 10, 23)) 143 | user3 = User(first_name="Andrew", last_name="Michaels", date_of_birth=date(1988, 8, 12)) 144 | session.add_all([user1, user2, user3]) 145 | session.commit() 146 | 147 | # we need a lookup table for MLAlchemy 148 | tables = { 149 | "User": User 150 | } 151 | 152 | # try a simple YAML-based query first 153 | all_users = parse_yaml_query("from: User").to_sqlalchemy(session, tables).all() 154 | print(all_users) 155 | 156 | # same query, but this time in JSON 157 | all_users = parse_json_query("""{"from": "User"}""").to_sqlalchemy(session, tables).all() 158 | print(all_users) 159 | 160 | # a slightly more complex query 161 | young_users = parse_yaml_query("""from: User 162 | where: 163 | $gt: 164 | date-of-birth: 1988-01-01 165 | """).to_sqlalchemy(session, tables).all() 166 | print(young_users) 167 | 168 | Query Language Syntax 169 | --------------------- 170 | 171 | As mentioned before, queries can either be supplied in YAML format or in 172 | JSON format to one of the respective parsers. 173 | 174 | ``from`` 175 | ~~~~~~~~ 176 | 177 | At present, MLAlchemy can only support selecting data from a single 178 | table (multi-table support is planned in future). Here, the ``from`` 179 | parameter allows you to specify the name of the table from which to 180 | select data. 181 | 182 | ``where`` 183 | ~~~~~~~~~ 184 | 185 | The ``where`` parameter defines, in hierarchical fashion, the structure 186 | of the logical query to perform. There are 3 kinds of key types in the 187 | JSON/YAML structures, as described in the following table. 188 | 189 | +-----------------+----------------------------+---------------------------------------+ 190 | | Kind | Description | Options | 191 | +=================+============================+=======================================+ 192 | | **Operators** | Logical (boolean) | ``$and``, ``$or``, ``$not`` | 193 | | | operators for combining | | 194 | | | sub-clauses | | 195 | +-----------------+----------------------------+---------------------------------------+ 196 | | **Comparators** | Comparative operators for | ``$eq``, ``$gt``, ``$gte``, ``$lt``, | 197 | | | comparing fields to values | ``$lte``, ``$like``, ``$neq``, | 198 | | | | ``$in``, ``$nin``, ``$is`` | 199 | +-----------------+----------------------------+---------------------------------------+ 200 | | **Field Names** | The name of a field in the | (Depends on table) | 201 | | | ``from`` table | | 202 | +-----------------+----------------------------+---------------------------------------+ 203 | 204 | ``order-by`` (YAML) or ``orderBy`` (JSON) 205 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 206 | 207 | Provides the ordering for the resulting query. Must either be a single 208 | field name or a list of field names, with the direction specifier in 209 | front of the field name. For example: 210 | 211 | .. code:: yaml 212 | 213 | # Order by "field2" in ascending order 214 | order-by: field2 215 | 216 | Another example: 217 | 218 | .. code:: yaml 219 | 220 | # Order by "field2" in *descending* order 221 | order-by: "-field2" 222 | 223 | A more complex example: 224 | 225 | .. code:: yaml 226 | 227 | # Order first by "field1" in ascending order, then by "field2" in 228 | # descending order 229 | order-by: 230 | - field1 231 | - "-field2" 232 | 233 | ``offset`` 234 | ~~~~~~~~~~ 235 | 236 | Specifies the number of results to skip before providing results. If not 237 | specified, no results are skipped. 238 | 239 | ``limit`` 240 | ~~~~~~~~~ 241 | 242 | Specifies the maximum number of results to return. If not specified, 243 | there will be no limit to the number of returned results. 244 | 245 | Query Examples 246 | -------------- 247 | 248 | Example 1: Simple Query 249 | ~~~~~~~~~~~~~~~~~~~~~~~ 250 | 251 | The following is an example of a relatively simple query in YAML format: 252 | 253 | .. code:: yaml 254 | 255 | from: SomeTable 256 | where: 257 | - $gt: 258 | field1: 5 259 | - $lt: 260 | field2: 3 261 | order-by: 262 | - field1 263 | offset: 2 264 | limit: 10 265 | 266 | This would translate into the following SQLAlchemy query: 267 | 268 | .. code:: python 269 | 270 | from sqlalchemy.sql.expression import and_ 271 | 272 | session.query(SomeTable).filter( 273 | and_(SomeTable.field1 > 5, SomeTable.field2 < 3) 274 | ) \ 275 | .order_by(SomeTable.field1) \ 276 | .offset(2) \ 277 | .limit(10) 278 | 279 | Example 2: Slightly More Complex Query 280 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 281 | 282 | The following is an example of a more complex query in YAML format: 283 | 284 | .. code:: yaml 285 | 286 | from: SomeTable 287 | where: 288 | - $or: 289 | field1: 5 290 | field2: something 291 | - $not: 292 | $like: 293 | field3: "else%" 294 | 295 | This would translate into the following SQLAlchemy query: 296 | 297 | .. code:: python 298 | 299 | from sqlalchemy.sql.expression import and_, or_, not_ 300 | 301 | session.query(SomeTable) \ 302 | .filter( 303 | and_( 304 | or_( 305 | SomeTable.field1 == 5, 306 | SomeTable.field2 == "something" 307 | ), 308 | not_( 309 | SomeTable.field3.like("else%") 310 | ) 311 | ) 312 | ) 313 | 314 | Example 3: Complex JSON Query 315 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 316 | 317 | The following is an example of a relatively complex query in JSON 318 | format: 319 | 320 | .. code:: json 321 | 322 | { 323 | "from": "SomeTable", 324 | "where": [ 325 | { 326 | "$or": [ 327 | {"field1": 10}, 328 | { 329 | "$gt": { 330 | "field2": 5 331 | } 332 | } 333 | ], 334 | "$and": [ 335 | {"field3": "somevalue"}, 336 | {"field4": "othervalue"}, 337 | { 338 | "$or": { 339 | "field5": 5, 340 | "field6": 6 341 | } 342 | } 343 | ] 344 | } 345 | ], 346 | "orderBy": [ 347 | "field1", 348 | "-field2" 349 | ], 350 | "offset": 2, 351 | "limit": 10 352 | } 353 | 354 | This query would be translated into the following SQLAlchemy code: 355 | 356 | .. code:: python 357 | 358 | from sqlalchemy.sql.expression import and_, or_, not_ 359 | 360 | session.query(SomeTable) \ 361 | .filter( 362 | and_( 363 | or_( 364 | SomeTable.field1 == 10, 365 | SomeTable.field2 > 5 366 | ), 367 | and_( 368 | SomeTable.field3 == "somevalue", 369 | SomeTable.field4 == "othervalue", 370 | or_( 371 | SomeTable.field5 == 5, 372 | SomeTable.field6 == 6 373 | ) 374 | ) 375 | ) 376 | ) \ 377 | .order_by(SomeTable.field1, SomeTable.field2.desc()) \ 378 | .offset(2) \ 379 | .limit(10) 380 | 381 | License 382 | ------- 383 | 384 | **The MIT License (MIT)** 385 | 386 | Copyright (c) 2017 Thane Thomson 387 | 388 | Permission is hereby granted, free of charge, to any person obtaining a 389 | copy of this software and associated documentation files (the 390 | "Software"), to deal in the Software without restriction, including 391 | without limitation the rights to use, copy, modify, merge, publish, 392 | distribute, sublicense, and/or sell copies of the Software, and to 393 | permit persons to whom the Software is furnished to do so, subject to 394 | the following conditions: 395 | 396 | The above copyright notice and this permission notice shall be included 397 | in all copies or substantial portions of the Software. 398 | 399 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 400 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 401 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 402 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 403 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 404 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 405 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 406 | --------------------------------------------------------------------------------