├── .gitignore ├── .travis.yml ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.md ├── contribute.json ├── docs └── Student Project 2019.md ├── moz_sql_parser ├── __init__.py ├── debugs.py ├── formatting.py ├── keywords.py └── sql_parser.py ├── requirements.txt ├── setup.py ├── setuptools.json └── tests ├── README.md ├── __init__.py ├── test_errors.py ├── test_format_and_parse.py ├── test_formatting.py ├── test_meta.py ├── test_resources.py ├── test_simple.py └── util.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.log 2 | *.pyc 3 | *.tab 4 | out 5 | .idea 6 | .svn 7 | *.iml 8 | /moz_sql_parser.egg-info 9 | /build 10 | /dist 11 | /moz-sql-parser 12 | vendor/ 13 | ._* 14 | .DS_Store 15 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.7" 4 | - "3.6" 5 | 6 | install: 7 | - "pip install ." 8 | 9 | env: 10 | - PYTHONPATH=. 11 | 12 | # command to run tests 13 | script: 14 | - python -m unittest discover tests 15 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Community Participation Guidelines 2 | 3 | This repository is governed by Mozilla's code of conduct and etiquette guidelines. 4 | For more details, please read the 5 | [Mozilla Community Participation Guidelines](https://www.mozilla.org/about/governance/policies/participation/). 6 | 7 | ## How to Report 8 | For more information on how to report violations of the Community Participation Guidelines, please read our '[How to Report](https://www.mozilla.org/about/governance/policies/participation/reporting/)' page. 9 | 10 | 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Mozilla Public License, version 2.0 2 | 3 | 1. Definitions 4 | 5 | 1.1. "Contributor" 6 | 7 | means each individual or legal entity that creates, contributes to the 8 | creation of, or owns Covered Software. 9 | 10 | 1.2. "Contributor Version" 11 | 12 | means the combination of the Contributions of others (if any) used by a 13 | Contributor and that particular Contributor's Contribution. 14 | 15 | 1.3. "Contribution" 16 | 17 | means Covered Software of a particular Contributor. 18 | 19 | 1.4. "Covered Software" 20 | 21 | means Source Code Form to which the initial Contributor has attached the 22 | notice in Exhibit A, the Executable Form of such Source Code Form, and 23 | Modifications of such Source Code Form, in each case including portions 24 | thereof. 25 | 26 | 1.5. "Incompatible With Secondary Licenses" 27 | means 28 | 29 | a. that the initial Contributor has attached the notice described in 30 | Exhibit B to the Covered Software; or 31 | 32 | b. that the Covered Software was made available under the terms of 33 | version 1.1 or earlier of the License, but not also under the terms of 34 | a Secondary License. 35 | 36 | 1.6. "Executable Form" 37 | 38 | means any form of the work other than Source Code Form. 39 | 40 | 1.7. "Larger Work" 41 | 42 | means a work that combines Covered Software with other material, in a 43 | separate file or files, that is not Covered Software. 44 | 45 | 1.8. "License" 46 | 47 | means this document. 48 | 49 | 1.9. "Licensable" 50 | 51 | means having the right to grant, to the maximum extent possible, whether 52 | at the time of the initial grant or subsequently, any and all of the 53 | rights conveyed by this License. 54 | 55 | 1.10. "Modifications" 56 | 57 | means any of the following: 58 | 59 | a. any file in Source Code Form that results from an addition to, 60 | deletion from, or modification of the contents of Covered Software; or 61 | 62 | b. any new file in Source Code Form that contains any Covered Software. 63 | 64 | 1.11. "Patent Claims" of a Contributor 65 | 66 | means any patent claim(s), including without limitation, method, 67 | process, and apparatus claims, in any patent Licensable by such 68 | Contributor that would be infringed, but for the grant of the License, 69 | by the making, using, selling, offering for sale, having made, import, 70 | or transfer of either its Contributions or its Contributor Version. 71 | 72 | 1.12. "Secondary License" 73 | 74 | means either the GNU General Public License, Version 2.0, the GNU Lesser 75 | General Public License, Version 2.1, the GNU Affero General Public 76 | License, Version 3.0, or any later versions of those licenses. 77 | 78 | 1.13. "Source Code Form" 79 | 80 | means the form of the work preferred for making modifications. 81 | 82 | 1.14. "You" (or "Your") 83 | 84 | means an individual or a legal entity exercising rights under this 85 | License. For legal entities, "You" includes any entity that controls, is 86 | controlled by, or is under common control with You. For purposes of this 87 | definition, "control" means (a) the power, direct or indirect, to cause 88 | the direction or management of such entity, whether by contract or 89 | otherwise, or (b) ownership of more than fifty percent (50%) of the 90 | outstanding shares or beneficial ownership of such entity. 91 | 92 | 93 | 2. License Grants and Conditions 94 | 95 | 2.1. Grants 96 | 97 | Each Contributor hereby grants You a world-wide, royalty-free, 98 | non-exclusive license: 99 | 100 | a. under intellectual property rights (other than patent or trademark) 101 | Licensable by such Contributor to use, reproduce, make available, 102 | modify, display, perform, distribute, and otherwise exploit its 103 | Contributions, either on an unmodified basis, with Modifications, or 104 | as part of a Larger Work; and 105 | 106 | b. under Patent Claims of such Contributor to make, use, sell, offer for 107 | sale, have made, import, and otherwise transfer either its 108 | Contributions or its Contributor Version. 109 | 110 | 2.2. Effective Date 111 | 112 | The licenses granted in Section 2.1 with respect to any Contribution 113 | become effective for each Contribution on the date the Contributor first 114 | distributes such Contribution. 115 | 116 | 2.3. Limitations on Grant Scope 117 | 118 | The licenses granted in this Section 2 are the only rights granted under 119 | this License. No additional rights or licenses will be implied from the 120 | distribution or licensing of Covered Software under this License. 121 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 122 | Contributor: 123 | 124 | a. for any code that a Contributor has removed from Covered Software; or 125 | 126 | b. for infringements caused by: (i) Your and any other third party's 127 | modifications of Covered Software, or (ii) the combination of its 128 | Contributions with other software (except as part of its Contributor 129 | Version); or 130 | 131 | c. under Patent Claims infringed by Covered Software in the absence of 132 | its Contributions. 133 | 134 | This License does not grant any rights in the trademarks, service marks, 135 | or logos of any Contributor (except as may be necessary to comply with 136 | the notice requirements in Section 3.4). 137 | 138 | 2.4. Subsequent Licenses 139 | 140 | No Contributor makes additional grants as a result of Your choice to 141 | distribute the Covered Software under a subsequent version of this 142 | License (see Section 10.2) or under the terms of a Secondary License (if 143 | permitted under the terms of Section 3.3). 144 | 145 | 2.5. Representation 146 | 147 | Each Contributor represents that the Contributor believes its 148 | Contributions are its original creation(s) or it has sufficient rights to 149 | grant the rights to its Contributions conveyed by this License. 150 | 151 | 2.6. Fair Use 152 | 153 | This License is not intended to limit any rights You have under 154 | applicable copyright doctrines of fair use, fair dealing, or other 155 | equivalents. 156 | 157 | 2.7. Conditions 158 | 159 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in 160 | Section 2.1. 161 | 162 | 163 | 3. Responsibilities 164 | 165 | 3.1. Distribution of Source Form 166 | 167 | All distribution of Covered Software in Source Code Form, including any 168 | Modifications that You create or to which You contribute, must be under 169 | the terms of this License. You must inform recipients that the Source 170 | Code Form of the Covered Software is governed by the terms of this 171 | License, and how they can obtain a copy of this License. You may not 172 | attempt to alter or restrict the recipients' rights in the Source Code 173 | Form. 174 | 175 | 3.2. Distribution of Executable Form 176 | 177 | If You distribute Covered Software in Executable Form then: 178 | 179 | a. such Covered Software must also be made available in Source Code Form, 180 | as described in Section 3.1, and You must inform recipients of the 181 | Executable Form how they can obtain a copy of such Source Code Form by 182 | reasonable means in a timely manner, at a charge no more than the cost 183 | of distribution to the recipient; and 184 | 185 | b. You may distribute such Executable Form under the terms of this 186 | License, or sublicense it under different terms, provided that the 187 | license for the Executable Form does not attempt to limit or alter the 188 | recipients' rights in the Source Code Form under this License. 189 | 190 | 3.3. Distribution of a Larger Work 191 | 192 | You may create and distribute a Larger Work under terms of Your choice, 193 | provided that You also comply with the requirements of this License for 194 | the Covered Software. If the Larger Work is a combination of Covered 195 | Software with a work governed by one or more Secondary Licenses, and the 196 | Covered Software is not Incompatible With Secondary Licenses, this 197 | License permits You to additionally distribute such Covered Software 198 | under the terms of such Secondary License(s), so that the recipient of 199 | the Larger Work may, at their option, further distribute the Covered 200 | Software under the terms of either this License or such Secondary 201 | License(s). 202 | 203 | 3.4. Notices 204 | 205 | You may not remove or alter the substance of any license notices 206 | (including copyright notices, patent notices, disclaimers of warranty, or 207 | limitations of liability) contained within the Source Code Form of the 208 | Covered Software, except that You may alter any license notices to the 209 | extent required to remedy known factual inaccuracies. 210 | 211 | 3.5. Application of Additional Terms 212 | 213 | You may choose to offer, and to charge a fee for, warranty, support, 214 | indemnity or liability obligations to one or more recipients of Covered 215 | Software. However, You may do so only on Your own behalf, and not on 216 | behalf of any Contributor. You must make it absolutely clear that any 217 | such warranty, support, indemnity, or liability obligation is offered by 218 | You alone, and You hereby agree to indemnify every Contributor for any 219 | liability incurred by such Contributor as a result of warranty, support, 220 | indemnity or liability terms You offer. You may include additional 221 | disclaimers of warranty and limitations of liability specific to any 222 | jurisdiction. 223 | 224 | 4. Inability to Comply Due to Statute or Regulation 225 | 226 | If it is impossible for You to comply with any of the terms of this License 227 | with respect to some or all of the Covered Software due to statute, 228 | judicial order, or regulation then You must: (a) comply with the terms of 229 | this License to the maximum extent possible; and (b) describe the 230 | limitations and the code they affect. Such description must be placed in a 231 | text file included with all distributions of the Covered Software under 232 | this License. Except to the extent prohibited by statute or regulation, 233 | such description must be sufficiently detailed for a recipient of ordinary 234 | skill to be able to understand it. 235 | 236 | 5. Termination 237 | 238 | 5.1. The rights granted under this License will terminate automatically if You 239 | fail to comply with any of its terms. However, if You become compliant, 240 | then the rights granted under this License from a particular Contributor 241 | are reinstated (a) provisionally, unless and until such Contributor 242 | explicitly and finally terminates Your grants, and (b) on an ongoing 243 | basis, if such Contributor fails to notify You of the non-compliance by 244 | some reasonable means prior to 60 days after You have come back into 245 | compliance. Moreover, Your grants from a particular Contributor are 246 | reinstated on an ongoing basis if such Contributor notifies You of the 247 | non-compliance by some reasonable means, this is the first time You have 248 | received notice of non-compliance with this License from such 249 | Contributor, and You become compliant prior to 30 days after Your receipt 250 | of the notice. 251 | 252 | 5.2. If You initiate litigation against any entity by asserting a patent 253 | infringement claim (excluding declaratory judgment actions, 254 | counter-claims, and cross-claims) alleging that a Contributor Version 255 | directly or indirectly infringes any patent, then the rights granted to 256 | You by any and all Contributors for the Covered Software under Section 257 | 2.1 of this License shall terminate. 258 | 259 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user 260 | license agreements (excluding distributors and resellers) which have been 261 | validly granted by You or Your distributors under this License prior to 262 | termination shall survive termination. 263 | 264 | 6. Disclaimer of Warranty 265 | 266 | Covered Software is provided under this License on an "as is" basis, 267 | without warranty of any kind, either expressed, implied, or statutory, 268 | including, without limitation, warranties that the Covered Software is free 269 | of defects, merchantable, fit for a particular purpose or non-infringing. 270 | The entire risk as to the quality and performance of the Covered Software 271 | is with You. Should any Covered Software prove defective in any respect, 272 | You (not any Contributor) assume the cost of any necessary servicing, 273 | repair, or correction. This disclaimer of warranty constitutes an essential 274 | part of this License. No use of any Covered Software is authorized under 275 | this License except under this disclaimer. 276 | 277 | 7. Limitation of Liability 278 | 279 | Under no circumstances and under no legal theory, whether tort (including 280 | negligence), contract, or otherwise, shall any Contributor, or anyone who 281 | distributes Covered Software as permitted above, be liable to You for any 282 | direct, indirect, special, incidental, or consequential damages of any 283 | character including, without limitation, damages for lost profits, loss of 284 | goodwill, work stoppage, computer failure or malfunction, or any and all 285 | other commercial damages or losses, even if such party shall have been 286 | informed of the possibility of such damages. This limitation of liability 287 | shall not apply to liability for death or personal injury resulting from 288 | such party's negligence to the extent applicable law prohibits such 289 | limitation. Some jurisdictions do not allow the exclusion or limitation of 290 | incidental or consequential damages, so this exclusion and limitation may 291 | not apply to You. 292 | 293 | 8. Litigation 294 | 295 | Any litigation relating to this License may be brought only in the courts 296 | of a jurisdiction where the defendant maintains its principal place of 297 | business and such litigation shall be governed by laws of that 298 | jurisdiction, without reference to its conflict-of-law provisions. Nothing 299 | in this Section shall prevent a party's ability to bring cross-claims or 300 | counter-claims. 301 | 302 | 9. Miscellaneous 303 | 304 | This License represents the complete agreement concerning the subject 305 | matter hereof. If any provision of this License is held to be 306 | unenforceable, such provision shall be reformed only to the extent 307 | necessary to make it enforceable. Any law or regulation which provides that 308 | the language of a contract shall be construed against the drafter shall not 309 | be used to construe this License against a Contributor. 310 | 311 | 312 | 10. Versions of the License 313 | 314 | 10.1. New Versions 315 | 316 | Mozilla Foundation is the license steward. Except as provided in Section 317 | 10.3, no one other than the license steward has the right to modify or 318 | publish new versions of this License. Each version will be given a 319 | distinguishing version number. 320 | 321 | 10.2. Effect of New Versions 322 | 323 | You may distribute the Covered Software under the terms of the version 324 | of the License under which You originally received the Covered Software, 325 | or under the terms of any subsequent version published by the license 326 | steward. 327 | 328 | 10.3. Modified Versions 329 | 330 | If you create software not governed by this License, and you want to 331 | create a new license for such software, you may create and use a 332 | modified version of this License if you rename the license and remove 333 | any references to the name of the license steward (except to note that 334 | such modified license differs from this License). 335 | 336 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 337 | Licenses If You choose to distribute Source Code Form that is 338 | Incompatible With Secondary Licenses under the terms of this version of 339 | the License, the notice described in Exhibit B of this License must be 340 | attached. 341 | 342 | Exhibit A - Source Code Form License Notice 343 | 344 | This Source Code Form is subject to the 345 | terms of the Mozilla Public License, v. 346 | 2.0. If a copy of the MPL was not 347 | distributed with this file, You can 348 | obtain one at 349 | http://mozilla.org/MPL/2.0/. 350 | 351 | If it is not possible or desirable to put the notice in a particular file, 352 | then You may include the notice in a location (such as a LICENSE file in a 353 | relevant directory) where a recipient would be likely to look for such a 354 | notice. 355 | 356 | You may add additional accurate notices of copyright ownership. 357 | 358 | Exhibit B - "Incompatible With Secondary Licenses" Notice 359 | 360 | This Source Code Form is "Incompatible 361 | With Secondary Licenses", as defined by 362 | the Mozilla Public License, v. 2.0. 363 | 364 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## NOTICE - June 2021 2 | 3 | The Moz SQL Parser repository is now archived. But the good news is that a fork of this project, called More SQL Parsing!, remains under active development independent of Mozilla. That project can be found at https://github.com/klahnakoski/mo-sql-parsing. 4 | 5 | # Moz SQL Parser 6 | 7 | Let's make a SQL parser so we can provide a familiar interface to non-sql datastores! 8 | 9 | 10 | |Branch |Status | 11 | |------------|---------| 12 | |master | [![Build Status](https://travis-ci.org/mozilla/moz-sql-parser.svg?branch=master)](https://travis-ci.org/mozilla/moz-sql-parser) | 13 | |dev | [![Build Status](https://travis-ci.org/mozilla/moz-sql-parser.svg?branch=dev)](https://travis-ci.org/mozilla/moz-sql-parser) | 14 | 15 | 16 | ## Problem Statement 17 | 18 | SQL is a familiar language used to access databases. Although, each database vendor has its quirky implementation, the average developer does not know enough SQL to be concerned with those quirks. This familiar core SQL (lowest common denominator, if you will) is useful enough to explore data in primitive ways. It is hoped that, once programmers have reviewed a datastore with basic SQL queries, and they see the value of that data, they will be motivated to use the datastore's native query format. 19 | 20 | ## Objectives 21 | 22 | The primary objective of this library is to convert some subset of [SQL-92](https://en.wikipedia.org/wiki/SQL-92) queries to JSON-izable parse trees. A big enough subset to provide superficial data access via SQL, but not so much as we must deal with the document-relational impedance mismatch. 23 | 24 | ## Non-Objectives 25 | 26 | * No plans to provide update statements, like `update` or `insert` 27 | * No plans to expand the language to all of SQL:2011 28 | * No plans to provide data access tools 29 | 30 | 31 | ## Project Status 32 | 33 | There are [over 400 tests](https://github.com/mozilla/moz-sql-parser/tree/dev/tests). This parser is good enough for basic usage, including inner queries. 34 | 35 | You can see the parser in action at [https://sql.telemetry.mozilla.org/](https://sql.telemetry.mozilla.org/) while using the ActiveData datasource 36 | 37 | ## Install 38 | 39 | pip install moz-sql-parser 40 | 41 | ## Parsing SQL 42 | 43 | >>> from moz_sql_parser import parse 44 | >>> import json 45 | >>> json.dumps(parse("select count(1) from jobs")) 46 | '{"select": {"value": {"count": 1}}, "from": "jobs"}' 47 | 48 | Each SQL query is parsed to an object: Each clause is assigned to an object property of the same name. 49 | 50 | >>> json.dumps(parse("select a as hello, b as world from jobs")) 51 | '{"select": [{"value": "a", "name": "hello"}, {"value": "b", "name": "world"}], "from": "jobs"}' 52 | 53 | The `SELECT` clause is an array of objects containing `name` and `value` properties. 54 | 55 | ### Recursion Limit 56 | 57 | **WARNING!** There is a recursion limit of `1500`. This prevents parsing of complex expressions or deeply nested nested queries. You can increase the recursion limit *after* you have imported `moz_sql_parser`, and before you `parse`: 58 | 59 | >>> from moz_sql_parser import parse 60 | >>> sys.setrecursionlimit(3000) 61 | >>> parse(complicated_sql) 62 | 63 | 64 | ## Generating SQL 65 | 66 | You may also generate SQL from the a given JSON document. This is done by the formatter, which is still incomplete (Jan2020). 67 | 68 | >>> from moz_sql_parser import format 69 | >>> format({"from":"test", "select":["a.b", "c"]}) 70 | 'SELECT a.b, c FROM test' 71 | 72 | 73 | ## Contributing 74 | 75 | In the event that the parser is not working for you, you can help make this better but simply pasting your sql (or JSON) into a new issue. Extra points if you describe the problem. Even more points if you submit a PR with a test. If you also submit a fix, then you also have my gratitude. 76 | 77 | 78 | ## Run Tests 79 | 80 | See [the tests directory](https://github.com/mozilla/moz-sql-parser/tree/dev/tests) for instructions running tests, or writing new ones. 81 | 82 | ## More about implementation 83 | 84 | SQL queries are translated to JSON objects: Each clause is assigned to an object property of the same name. 85 | 86 | 87 | # SELECT * FROM dual WHERE a>b ORDER BY a+b 88 | { 89 | "select": "*", 90 | "from": "dual", 91 | "where": {"gt": ["a", "b"]}, 92 | "orderby": {"value": {"add": ["a", "b"]}} 93 | } 94 | 95 | Expressions are also objects, but with only one property: The name of the operation, and the value holding (an array of) parameters for that operation. 96 | 97 | {op: parameters} 98 | 99 | and you can see this pattern in the previous example: 100 | 101 | {"gt": ["a","b"]} 102 | 103 | 104 | ### Notes 105 | 106 | * Uses the glorious `pyparsing` library (see https://github.com/pyparsing/pyparsing) to define the grammar, and define the shape of the tokens it generates. 107 | * [sqlparse](https://pypi.python.org/pypi/sqlparse) does not provide a tree, rather a list of tokens. 108 | -------------------------------------------------------------------------------- /contribute.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "moz-sql-parser", 3 | "description": "Extract Parse Tree from SQL", 4 | "repository": { 5 | "url": "https://github.com/mozilla/moz-sql-parser", 6 | "license": "MPL2", 7 | "type": "git", 8 | "tests": "https://github.com/mozilla/moz-sql-parser/tree/dev/tests", 9 | "clone": "https://github.com/mozilla/moz-sql-parser.git" 10 | }, 11 | "participate": { 12 | "home": "https://github.com/mozilla/moz-sql-parser", 13 | "docs": "https://github.com/mozilla/moz-sql-parser/docs", 14 | "irc-contacts": [ 15 | "ekyle" 16 | ] 17 | }, 18 | "bugs": { 19 | "list": "https://github.com/mozilla/moz-sql-parser/issues", 20 | "report": "https://github.com/mozilla/moz-sql-parser/issues/new" 21 | }, 22 | "urls": { 23 | "stage": "" 24 | }, 25 | "keywords": [ 26 | "python", 27 | "mozilla" 28 | ] 29 | } 30 | 31 | -------------------------------------------------------------------------------- /docs/Student Project 2019.md: -------------------------------------------------------------------------------- 1 | 2 | # PyParsing Student Project (GSOC 2019) 3 | 4 |
See Student Questions below
5 | 6 | ## Background 7 | 8 | `moz-sql-parser` uses the [pyparsing library](https://github.com/pyparsing/pyparsing): This library makes the language specification easy, much like a [PEG](https://en.wikipedia.org/wiki/Parsing_expression_grammar). Personally, this library provides the best parser specification language I have seen anywhere else: taking advantage pf Python's operator overloading and visual simplicity to provide a simple-yet-powerful domain specific language. 9 | 10 | ## Problem 1 11 | 12 | Mozilla has a [simple SQL parser](https://github.com/mozilla/moz-sql-parser), but [it does not work for "complex" SQL](https://github.com/mozilla/moz-sql-parser/issues/41). Actually, we can hardly call the SQL "complex" when it breaks with so few tokens. 13 | 14 | ## Solutions for Problem 1 15 | 16 | 17 | Depending on how deep you look, there are three ways this problem can be solved 18 | 19 | ### moz-sql-parser should better-define the grammar rules 20 | 21 | The language specification for infix operators uses too much stack space. To reduce this stack space, the operators (and their operands) should be parsed as an alternating sequence of operators and operands, with some post-processing to assemble the parse tree in precedence order. 22 | 23 | I do not like this solution because it is working around `pyparsing` rather than with it. The grammar gets complicated, without doing any more. Plus, this type of solution can be made to work in general, for the benefit of others. 24 | 25 | 26 | ### The pyparsing infixNotation is busted 27 | 28 | The problem is caused by [infixNotation in the pyparsing library](https://github.com/pyparsing/pyparsing/issues/26). If we fix pyparsing's infixNotation method, much like we would have fixed `moz-sql-parser`, then we can gain all the same benefits, while benefiting others who use pyparsing. 29 | 30 | This is more work, as the pyparsing library will require some refactoring to track more context on the alternating operators/operands for later tree assembly. 31 | 32 | I *think* this is the most pragmatic solution, but it may just defer the inevitable O(2^N) parsing problems; which will show up in some other parsing sequence; leaving `moz-sql-parser` still slow on "complex" SQL. 33 | 34 | ### Backtrack parsers have inherent O(2^n) issues 35 | 36 | **This might be invalid. The `infixNotation()` already prevents backtracking at the expense of consuming stackspace, maybe** 37 | 38 | Fundamentally, the problem is caused by backtracking parsers, which run the risk of O(2^n) parsing times. `infixNotation()` generates a number of left-recursion parsing rules, which cause the O(2^n) parsing times. We could attempt to solve this: The known solution is to [remove the left recursion](https://en.wikipedia.org/wiki/Left_recursion#Removing_left_recursion). This will result in a significantly faster parser with much less stack usage. 39 | 40 | But, this solution is complicated. Removing left recursion changes the parser significantly; which results in a different parse tree. It is not clear how easy it is to implement "suitable bookkeeping" (see wikipedia link) to fix that problem. This is made more complicated by the pyparsing code, which may have design that directly conflicts with this strategy: It may be easier to write a new parser generator. 41 | 42 | Going with this solution requires a student with exceptional skills. 43 | 44 | 45 | ## GSOC 46 | 47 | The project steps would look something like: 48 | 49 | * Fork pyparsing code, use it as a basis for your work 50 | * Write prototype parser re-writer to remove left recursion, including the "book keeping" required to assemble final parse tree **this is the hard part** 51 | * At this point we have a better parser. 52 | * Split up project into a number of refactoring PRs for pyparsing project; separating the many DSL features from the core parsing logic; merging the fork back into pyparsing. 53 | * One final PR to pyparsing that will replace the old parser with the new one 54 | 55 | 56 | ## Problem 2 57 | 58 | The runtime of parsing an SQL statement with the Mozilla SQL Parser is relatively slow. When compared with the popular [sqlparse](https://github.com/andialbrecht/sqlparse) Python project, getting an initial result with Mozilla's parser is upwards of 10x slower. (However, the downside of sqlparse is that the outputted format is simply a list of tokens, which would require extensive post-processing to generate the desired tree structure that moz-sql-parser provides. 59 | 60 | ## Solutions for Problem 2 61 | 62 | There are probably multiple areas where the runtime can be improved; here are a few options: 63 | 64 | ### Solve problem 1 65 | The solutions for problem 1 may also improve the runtime for parsing, at least for more simple SQL statements. 66 | 67 | ## Revise how pyparsing used 68 | The pyparsing project recently include a list of [performance tips](https://github.com/pyparsing/pyparsing/wiki/Performance-Tips), and some of these can probably be used to speed up the Mozilla parser. 69 | 70 | ## Improvements to pyparsing 71 | The pyparsing library could benefit from optimizations. This includes cleaning up the basic data structures it uses: Using less attributes, not copying whole objects, using **slots_**, etc. Additionally, some of the code could be ported to Cython or C extensions, where certain operations can be much faster in a lower-level language. 72 | 73 | ## Student Questions 74 | 75 | I will be adding questions here: 76 | 77 | 78 | **April 3rd, 2019** 79 | 80 | **How can I make the parsing more efficient?** 81 | 82 | Look at `infixNotation()`, notice how it handles a single operator, we will focus on just `opAssoc.LEFT`, and `arity==2` because the rest have a similar problem: 83 | 84 | https://github.com/pyparsing/pyparsing/blob/0d88a303a7f7e574bfc0c06ad6f84ca8c9d4d248/pyparsing.py#L5661 85 | 86 | matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 87 | 88 | I am not sure what the first term `_FB(lastExpr + opExpr + lastExpr) ` is doing, I am assuming it optimizes. the second term does the parsing. Here, I have simplified it: 89 | 90 | matchExpr = Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 91 | 92 | Notice that the `lastExpr` is from the last iteration of the loop: 93 | 94 | thisExpr <<= ( matchExpr.setName(termName) | lastExpr ) 95 | lastExpr = thisExpr 96 | 97 | which means `lastExpr` is recursive, and as deep as there are operators (moz-sql-parser has ?19? operators) Let me simplify the expression more, and put it in terms of `N` (`N` is the the number of operators available in SQL expressions). 98 | 99 | lastExpr[N] = lastExpr[N-1] + OneOrMore( opExpr + lastExpr[N-1] ) 100 | 101 | For any operator, this parser will explore the possible 2^19 branches**. If you also consider that each step consumes some amount of stack space to parse the base expressions, then you can imagine why the parser runs out of stack space easily. 102 | 103 | ** Until now, I had thought the parser tried all 2^19 branches, but that may not be the case; The parser is depth-first, so logarithmic with respect to the search space. Therefore, the runtime may be a respectable `O(N*E)` where `N` is the number of possible operators, and `E` is the number of observed operators. The main problem is still the heavy use of stack space to parse the possible expressions. 104 | 105 | 106 | **February 14th, 2019** 107 | 108 | 109 | **Any tasks you want me to complete?** 110 | 111 | The first step in that project is to refactor the `pyparsing.py` code. By refactoring you will get a sense of the codebase. Your first couple of attempts may fail as you learn what the code is about. In those cases, go back to master branch and start a new branch. 112 | 113 | * A simple refactoring is splitting the single file into many, and ensuring it still works; this helps with understanding what parts depend on what other parts, and may reveal where the complicated logic is. Some things will move to new files easily; those are not usually not the core of the program. As you split into files you will notice a certain number of methods are tangled up with each other: that is where the real logic of the program is 114 | * Another refactoring is cleaning up the token attributes: right now they are dynamically assigned (because Python is awesome) but standardizing them and turning them into __slots__ will make the program a bit faster 115 | * There are a number of "examples"; they should be converted to tests so they are easy to run. Plus, the tests will help ensure your refactorings are good. 116 | 117 | 118 | **What will the selection be based on?** 119 | 120 | Beyond the GSOC requirements, the student will be accepted if they can convince me they can make the parser faster. I expect you to show knowledge of *the theory of computation*, and demonstrate you can handle complicated code. If your plan points out details that I did not think of, then you will have proven you understand the domain. 121 | 122 | 123 | **Is there anything I can do now so as to improve my chances** 124 | 125 | Refactoring is probably best; it will give you a head start in the `pyparsing` itself, and may reveal the details of implementation you can put in your proposal to impress me. 126 | 127 | 128 | **What are the very first steps?** 129 | 130 | 1. fork the `pyparsing` repo, 131 | 2. clone it locally, 132 | 3. make a branch, and 133 | 4. start a refactoring 134 | 5. make a pull request **on your own repo**, and ask me for a "review" 135 | 136 | The Github Pull Request will allow us to discuss code easier. 137 | 138 | 139 | **What if I get the project done early?** 140 | 141 | GSOC demands you work all summer. So, no matter how far you get, you still must put in summer hours. **if** you get accepted to GSOC, and **if** you are successful, then I will just give you more work. 142 | 143 | 144 | **Can you guide me, even before the application process?** 145 | 146 | Yes, I am happy to provide feedback on any effort you put towards this project, or any others. So, even if you do not get admitted to GSOC, I can provide you with code reviews, suggestions, and evaluations. 147 | 148 | 149 | -------------------------------------------------------------------------------- /moz_sql_parser/__init__.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # 3 | # This Source Code Form is subject to the terms of the Mozilla Public 4 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 5 | # You can obtain one at http://mozilla.org/MPL/2.0/. 6 | # 7 | # Author: Kyle Lahnakoski (kyle@lahnakoski.com) 8 | # 9 | 10 | from __future__ import absolute_import, division, unicode_literals 11 | 12 | from collections import Mapping 13 | import json 14 | from threading import Lock 15 | 16 | from mo_future import binary_type, items, number_types, text 17 | from pyparsing import ParseException, ParseResults 18 | 19 | from moz_sql_parser.debugs import all_exceptions 20 | from moz_sql_parser.sql_parser import SQLParser 21 | 22 | 23 | def __deploy__(): 24 | # ONLY MEANT TO BE RUN FOR DEPLOYMENT 25 | from mo_files import File 26 | source_file = File("moz_sql_parser/sql_parser.py") 27 | lines = source_file.read().split("\n") 28 | lines = [ 29 | "sys.setrecursionlimit(1500)" if line.startswith("sys.setrecursionlimit") else line 30 | for line in lines 31 | ] 32 | source_file.write("\n".join(lines)) 33 | 34 | 35 | parseLocker = Lock() # ENSURE ONLY ONE PARSING AT A TIME 36 | 37 | 38 | def parse(sql): 39 | with parseLocker: 40 | try: 41 | all_exceptions.clear() 42 | sql = sql.rstrip().rstrip(";") 43 | parse_result = SQLParser.parseString(sql, parseAll=True) 44 | return _scrub(parse_result) 45 | except Exception as e: 46 | if isinstance(e, ParseException) and e.msg == "Expected end of text": 47 | problems = all_exceptions.get(e.loc, []) 48 | expecting = [ 49 | f 50 | for f in (set(p.msg.lstrip("Expected").strip() for p in problems)-{"Found unwanted token"}) 51 | if not f.startswith("{") 52 | ] 53 | raise ParseException(sql, e.loc, "Expecting one of (" + (", ".join(expecting)) + ")") 54 | raise 55 | 56 | 57 | def format(json, **kwargs): 58 | from moz_sql_parser.formatting import Formatter 59 | return Formatter(**kwargs).format(json) 60 | 61 | 62 | def _scrub(result): 63 | if isinstance(result, text): 64 | return result 65 | elif isinstance(result, binary_type): 66 | return result.decode('utf8') 67 | elif isinstance(result, number_types): 68 | return result 69 | elif not result: 70 | return {} 71 | elif isinstance(result, (list, ParseResults)): 72 | if not result: 73 | return None 74 | elif len(result) == 1: 75 | return _scrub(result[0]) 76 | else: 77 | output = [ 78 | rr 79 | for r in result 80 | for rr in [_scrub(r)] 81 | if rr != None 82 | ] 83 | # IF ALL MEMBERS OF A LIST ARE LITERALS, THEN MAKE THE LIST LITERAL 84 | if all(isinstance(r, number_types) for r in output): 85 | pass 86 | elif all(isinstance(r, number_types) or (isinstance(r, Mapping) and "literal" in r.keys()) for r in output): 87 | output = {"literal": [r['literal'] if isinstance(r, Mapping) else r for r in output]} 88 | return output 89 | elif not items(result): 90 | return {} 91 | else: 92 | return { 93 | k: vv 94 | for k, v in result.items() 95 | for vv in [_scrub(v)] 96 | if vv != None 97 | } 98 | 99 | 100 | _ = json.dumps 101 | 102 | 103 | __all__ = [ 104 | 'parse', 105 | 'format' 106 | ] 107 | -------------------------------------------------------------------------------- /moz_sql_parser/debugs.py: -------------------------------------------------------------------------------- 1 | DEBUG = False 2 | 3 | all_exceptions = {} 4 | 5 | 6 | def record_exception(instring, loc, expr, exc): 7 | # if DEBUG: 8 | # print ("Exception raised:" + _ustr(exc)) 9 | es = all_exceptions.setdefault(loc, []) 10 | es.append(exc) 11 | 12 | 13 | def nothing(*args): 14 | pass 15 | 16 | 17 | if DEBUG: 18 | debug = (None, None, None) 19 | else: 20 | debug = (nothing, nothing, record_exception) 21 | -------------------------------------------------------------------------------- /moz_sql_parser/formatting.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # 3 | # This Source Code Form is subject to the terms of the Mozilla Public 4 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 5 | # You can obtain one at http://mozilla.org/MPL/2.0/. 6 | # 7 | # Author: Beto Dealmeida (beto@dealmeida.net) 8 | # 9 | 10 | from __future__ import absolute_import 11 | from __future__ import division 12 | from __future__ import unicode_literals 13 | 14 | import re 15 | 16 | from mo_future import string_types, text, first, long, is_text 17 | 18 | from moz_sql_parser.keywords import RESERVED, reserved_keywords, join_keywords, precedence, binary_ops 19 | 20 | VALID = re.compile(r'^[a-zA-Z_]\w*$') 21 | 22 | 23 | def should_quote(identifier): 24 | """ 25 | Return true if a given identifier should be quoted. 26 | 27 | This is usually true when the identifier: 28 | 29 | - is a reserved word 30 | - contain spaces 31 | - does not match the regex `[a-zA-Z_]\\w*` 32 | 33 | """ 34 | return ( 35 | identifier != '*' and ( 36 | not VALID.match(identifier) or identifier in reserved_keywords)) 37 | 38 | 39 | def split_field(field): 40 | """ 41 | RETURN field AS ARRAY OF DOT-SEPARATED FIELDS 42 | """ 43 | if field == "." or field==None: 44 | return [] 45 | elif is_text(field) and "." in field: 46 | if field.startswith(".."): 47 | remainder = field.lstrip(".") 48 | back = len(field) - len(remainder) - 1 49 | return [-1]*back + [k.replace("\a", ".") for k in remainder.replace("\\.", "\a").split(".")] 50 | else: 51 | return [k.replace("\a", ".") for k in field.replace("\\.", "\a").split(".")] 52 | else: 53 | return [field] 54 | 55 | 56 | def join_field(path): 57 | """ 58 | RETURN field SEQUENCE AS STRING 59 | """ 60 | output = ".".join([f.replace(".", "\\.") for f in path if f != None]) 61 | return output if output else "." 62 | 63 | # potent = [f for f in path if f != "."] 64 | # if not potent: 65 | # return "." 66 | # return ".".join([f.replace(".", "\\.") for f in potent]) 67 | 68 | 69 | 70 | def escape(ident, ansi_quotes, should_quote): 71 | """ 72 | Escape identifiers. 73 | 74 | ANSI uses single quotes, but many databases use back quotes. 75 | 76 | """ 77 | def esc(identifier): 78 | if not should_quote(identifier): 79 | return identifier 80 | 81 | quote = '"' if ansi_quotes else '`' 82 | identifier = identifier.replace(quote, 2*quote) 83 | return '{0}{1}{2}'.format(quote, identifier, quote) 84 | return join_field(esc(f) for f in split_field(ident)) 85 | 86 | 87 | def Operator(op): 88 | prec = precedence[binary_ops[op]] 89 | op = ' {0} '.format(op).upper() 90 | 91 | def func(self, json): 92 | acc = [] 93 | 94 | for v in json: 95 | sql = self.dispatch(v) 96 | if isinstance(v, (text, int, float, long)): 97 | acc.append(sql) 98 | continue 99 | 100 | p = precedence.get(first(v.keys())) 101 | if p is None: 102 | acc.append(sql) 103 | continue 104 | if p>=prec: 105 | acc.append("(" + sql + ")") 106 | else: 107 | acc.append(sql) 108 | return op.join(acc) 109 | return func 110 | 111 | 112 | class Formatter: 113 | 114 | clauses = [ 115 | 'with_', 116 | 'select', 117 | 'from_', 118 | 'where', 119 | 'groupby', 120 | 'having', 121 | 'orderby', 122 | 'limit', 123 | 'offset', 124 | ] 125 | 126 | # simple operators 127 | _concat = Operator('||') 128 | _mul = Operator('*') 129 | _div = Operator('/') 130 | _mod = Operator('%') 131 | _add = Operator('+') 132 | _sub = Operator('-') 133 | _neq = Operator('<>') 134 | _gt = Operator('>') 135 | _lt = Operator('<') 136 | _gte = Operator('>=') 137 | _lte = Operator('<=') 138 | _eq = Operator('=') 139 | _or = Operator('or') 140 | _and = Operator('and') 141 | _binary_and = Operator("&") 142 | _binary_or = Operator("|") 143 | 144 | def __init__(self, ansi_quotes=True, should_quote=should_quote): 145 | self.ansi_quotes = ansi_quotes 146 | self.should_quote = should_quote 147 | 148 | def format(self, json): 149 | if 'union' in json: 150 | return self.union(json['union']) 151 | elif 'union_all' in json: 152 | return self.union_all(json['union_all']) 153 | else: 154 | return self.query(json) 155 | 156 | def dispatch(self, json): 157 | if isinstance(json, list): 158 | return self.delimited_list(json) 159 | if isinstance(json, dict): 160 | if len(json) == 0: 161 | return '' 162 | elif 'value' in json: 163 | return self.value(json) 164 | elif 'from' in json: 165 | # Nested queries 166 | return '({})'.format(self.format(json)) 167 | elif 'select' in json: 168 | # Nested queries 169 | return '({})'.format(self.format(json)) 170 | else: 171 | return self.op(json) 172 | if isinstance(json, string_types): 173 | return escape(json, self.ansi_quotes, self.should_quote) 174 | 175 | return text(json) 176 | 177 | def delimited_list(self, json): 178 | return ', '.join(self.dispatch(element) for element in json) 179 | 180 | def value(self, json): 181 | parts = [self.dispatch(json['value'])] 182 | if 'name' in json: 183 | parts.extend(['AS', self.dispatch(json['name'])]) 184 | return ' '.join(parts) 185 | 186 | def op(self, json): 187 | if 'on' in json: 188 | return self._on(json) 189 | 190 | if len(json) > 1: 191 | raise Exception('Operators should have only one key!') 192 | key, value = list(json.items())[0] 193 | 194 | # check if the attribute exists, and call the corresponding method; 195 | # note that we disallow keys that start with `_` to avoid giving access 196 | # to magic methods 197 | attr = '_{0}'.format(key) 198 | if hasattr(self, attr) and not key.startswith('_'): 199 | method = getattr(self, attr) 200 | return method(value) 201 | 202 | # treat as regular function call 203 | if isinstance(value, dict) and len(value) == 0: 204 | return key.upper() + "()" # NOT SURE IF AN EMPTY dict SHOULD BE DELT WITH HERE, OR IN self.dispatch() 205 | else: 206 | return '{0}({1})'.format(key.upper(), self.dispatch(value)) 207 | 208 | def _binary_not(self, value): 209 | return '~{0}'.format(self.dispatch(value)) 210 | 211 | def _exists(self, value): 212 | return '{0} IS NOT NULL'.format(self.dispatch(value)) 213 | 214 | def _missing(self, value): 215 | return '{0} IS NULL'.format(self.dispatch(value)) 216 | 217 | def _like(self, pair): 218 | return '{0} LIKE {1}'.format(self.dispatch(pair[0]), self.dispatch(pair[1])) 219 | 220 | def _nlike(self, pair): 221 | return '{0} NOT LIKE {1}'.format(self.dispatch(pair[0]), self.dispatch(pair[1])) 222 | 223 | def _is(self, pair): 224 | return '{0} IS {1}'.format(self.dispatch(pair[0]), self.dispatch(pair[1])) 225 | 226 | def _in(self, json): 227 | valid = self.dispatch(json[1]) 228 | # `(10, 11, 12)` does not get parsed as literal, so it's formatted as 229 | # `10, 11, 12`. This fixes it. 230 | if not valid.startswith('('): 231 | valid = '({0})'.format(valid) 232 | 233 | return '{0} IN {1}'.format(json[0], valid) 234 | 235 | def _nin(self, json): 236 | valid = self.dispatch(json[1]) 237 | # `(10, 11, 12)` does not get parsed as literal, so it's formatted as 238 | # `10, 11, 12`. This fixes it. 239 | if not valid.startswith('('): 240 | valid = '({0})'.format(valid) 241 | 242 | return '{0} NOT IN {1}'.format(json[0], valid) 243 | 244 | def _case(self, checks): 245 | parts = ['CASE'] 246 | for check in checks: 247 | if isinstance(check, dict): 248 | if 'when' in check and 'then' in check: 249 | parts.extend(['WHEN', self.dispatch(check['when'])]) 250 | parts.extend(['THEN', self.dispatch(check['then'])]) 251 | else: 252 | parts.extend(['ELSE', self.dispatch(check)]) 253 | else: 254 | parts.extend(['ELSE', self.dispatch(check)]) 255 | parts.append('END') 256 | return ' '.join(parts) 257 | 258 | def _literal(self, json): 259 | if isinstance(json, list): 260 | return '({0})'.format(', '.join(self._literal(v) for v in json)) 261 | elif isinstance(json, string_types): 262 | return "'{0}'".format(json.replace("'", "''")) 263 | else: 264 | return str(json) 265 | 266 | def _between(self, json): 267 | return '{0} BETWEEN {1} AND {2}'.format(self.dispatch(json[0]), self.dispatch(json[1]), self.dispatch(json[2])) 268 | 269 | def _not_between(self, json): 270 | return '{0} NOT BETWEEN {1} AND {2}'.format(self.dispatch(json[0]), self.dispatch(json[1]), self.dispatch(json[2])) 271 | 272 | def _on(self, json): 273 | detected_join = join_keywords & set(json.keys()) 274 | if len(detected_join) == 0: 275 | raise Exception( 276 | 'Fail to detect join type! Detected: "{}" Except one of: "{}"'.format( 277 | [on_keyword for on_keyword in json if on_keyword != 'on'][0], 278 | '", "'.join(join_keywords) 279 | ) 280 | ) 281 | 282 | join_keyword = detected_join.pop() 283 | 284 | return '{0} {1} ON {2}'.format( 285 | join_keyword.upper(), self.dispatch(json[join_keyword]), self.dispatch(json['on']) 286 | ) 287 | 288 | def union(self, json): 289 | return ' UNION '.join(self.query(query) for query in json) 290 | 291 | def union_all(self, json): 292 | return ' UNION ALL '.join(self.query(query) for query in json) 293 | 294 | def query(self, json): 295 | return ' '.join( 296 | part 297 | for clause in self.clauses 298 | for part in [getattr(self, clause)(json)] 299 | if part 300 | ) 301 | 302 | def with_(self, json): 303 | if 'with' in json: 304 | with_ = json['with'] 305 | if not isinstance(with_, list): 306 | with_ = [with_] 307 | parts = ', '.join( 308 | '{0} AS {1}'.format(part['name'], self.dispatch(part['value'])) 309 | for part in with_ 310 | ) 311 | return 'WITH {0}'.format(parts) 312 | 313 | def select(self, json): 314 | if 'select' in json: 315 | return 'SELECT {0}'.format(self.dispatch(json['select'])) 316 | 317 | def from_(self, json): 318 | is_join = False 319 | if 'from' in json: 320 | from_ = json['from'] 321 | if 'union' in from_: 322 | return self.union(from_['union']) 323 | if not isinstance(from_, list): 324 | from_ = [from_] 325 | 326 | parts = [] 327 | for token in from_: 328 | if join_keywords & set(token): 329 | is_join = True 330 | parts.append(self.dispatch(token)) 331 | joiner = ' ' if is_join else ', ' 332 | rest = joiner.join(parts) 333 | return 'FROM {0}'.format(rest) 334 | 335 | def where(self, json): 336 | if 'where' in json: 337 | return 'WHERE {0}'.format(self.dispatch(json['where'])) 338 | 339 | def groupby(self, json): 340 | if 'groupby' in json: 341 | return 'GROUP BY {0}'.format(self.dispatch(json['groupby'])) 342 | 343 | def having(self, json): 344 | if 'having' in json: 345 | return 'HAVING {0}'.format(self.dispatch(json['having'])) 346 | 347 | def orderby(self, json): 348 | if 'orderby' in json: 349 | orderby = json['orderby'] 350 | if isinstance(orderby, dict): 351 | orderby = [orderby] 352 | return 'ORDER BY {0}'.format(','.join([ 353 | '{0} {1}'.format(self.dispatch(o), o.get('sort', '').upper()).strip() 354 | for o in orderby 355 | ])) 356 | 357 | def limit(self, json): 358 | if 'limit' in json: 359 | if json['limit']: 360 | return 'LIMIT {0}'.format(self.dispatch(json['limit'])) 361 | 362 | def offset(self, json): 363 | if 'offset' in json: 364 | return 'OFFSET {0}'.format(self.dispatch(json['offset'])) 365 | -------------------------------------------------------------------------------- /moz_sql_parser/keywords.py: -------------------------------------------------------------------------------- 1 | from pyparsing import Keyword, MatchFirst 2 | 3 | from moz_sql_parser.debugs import debug 4 | 5 | sql_reserved_words = [ 6 | "AND", 7 | "AS", 8 | "ASC", 9 | "BETWEEN", 10 | "CASE", 11 | "COLLATE_NOCASE", 12 | "CROSS_JOIN", 13 | "DESC", 14 | "END", 15 | "ELSE", 16 | "FROM", 17 | "FULL_JOIN", 18 | "FULL_OUTER_JOIN", 19 | "GROUP_BY", 20 | "HAVING", 21 | "IN", 22 | "INNER_JOIN", 23 | "IS", 24 | "IS_NOT", 25 | "JOIN", 26 | "LEFT_JOIN", 27 | "LEFT_OUTER_JOIN", 28 | "LIKE", 29 | "LIMIT", 30 | "NOT_BETWEEN", 31 | "NOT_IN", 32 | "NOT_LIKE", 33 | "OFFSET", 34 | "ON", 35 | "OR", 36 | "ORDER_BY", 37 | "RESERVED", 38 | "RIGHT_JOIN", 39 | "RIGHT_OUTER_JOIN", 40 | "SELECT", 41 | "THEN", 42 | "UNION", 43 | "UNION_ALL", 44 | "USING", 45 | "WITH", 46 | "WHEN", 47 | "WHERE", 48 | ] 49 | 50 | reserved_keywords = [] 51 | for name in sql_reserved_words: 52 | n = name.lower().replace("_", " ") 53 | value = locals()[name] = ( 54 | Keyword(n, caseless=True).setName(n).setDebugActions(*debug) 55 | ) 56 | reserved_keywords.append(value) 57 | RESERVED = MatchFirst(reserved_keywords) 58 | 59 | join_keywords = { 60 | "join", 61 | "full join", 62 | "cross join", 63 | "inner join", 64 | "left join", 65 | "right join", 66 | "full outer join", 67 | "right outer join", 68 | "left outer join", 69 | } 70 | 71 | unary_ops = {"-": "neg", "~": "binary_not"} 72 | 73 | binary_ops = { 74 | "||": "concat", 75 | "*": "mul", 76 | "/": "div", 77 | "%": "mod", 78 | "+": "add", 79 | "-": "sub", 80 | "&": "binary_and", 81 | "|": "binary_or", 82 | "<": "lt", 83 | "<=": "lte", 84 | ">": "gt", 85 | ">=": "gte", 86 | "=": "eq", 87 | "==": "eq", 88 | "!=": "neq", 89 | "<>": "neq", 90 | "not in": "nin", 91 | "is not": "neq", 92 | "is": "eq", 93 | "not like": "nlike", 94 | "not between": "not_between", 95 | "or": "or", 96 | "and": "and", 97 | } 98 | 99 | precedence = { 100 | "concat": 1, 101 | "mul": 2, 102 | "div": 2, 103 | "mod": 2, 104 | "add": 3, 105 | "sub": 3, 106 | "binary_and": 4, 107 | "binary_or": 4, 108 | "gte": 5, 109 | "lte": 5, 110 | "lt": 5, 111 | "gt": 6, 112 | "eq": 7, 113 | "neq": 7, 114 | "between": 8, 115 | "not_between": 8, 116 | "in": 8, 117 | "nin": 8, 118 | "is": 8, 119 | "like": 8, 120 | "nlike": 8, 121 | "and": 10, 122 | "or": 11, 123 | } 124 | 125 | durations = [ 126 | "milliseconds", 127 | "seconds", 128 | "minutes", 129 | "hours", 130 | "days", 131 | "weeks", 132 | "months", 133 | "years", 134 | ] 135 | -------------------------------------------------------------------------------- /moz_sql_parser/sql_parser.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # 3 | # This Source Code Form is subject to the terms of the Mozilla Public 4 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 5 | # You can obtain one at http://mozilla.org/MPL/2.0/. 6 | # 7 | # Author: Kyle Lahnakoski (kyle@lahnakoski.com) 8 | # 9 | 10 | from __future__ import absolute_import, division, unicode_literals 11 | 12 | import ast 13 | import operator 14 | import sys 15 | 16 | from mo_future import reduce, text 17 | from pyparsing import Combine, Forward, Group, Keyword, Literal, Optional, ParserElement, Regex, Word, ZeroOrMore, \ 18 | alphanums, alphas, delimitedList, infixNotation, opAssoc, restOfLine 19 | 20 | from moz_sql_parser.debugs import debug 21 | from moz_sql_parser.keywords import AND, AS, ASC, BETWEEN, CASE, COLLATE_NOCASE, CROSS_JOIN, DESC, ELSE, END, FROM, \ 22 | FULL_JOIN, FULL_OUTER_JOIN, GROUP_BY, HAVING, IN, INNER_JOIN, IS, IS_NOT, JOIN, LEFT_JOIN, LEFT_OUTER_JOIN, LIKE, \ 23 | LIMIT, NOT_BETWEEN, NOT_IN, NOT_LIKE, OFFSET, ON, OR, ORDER_BY, RESERVED, RIGHT_JOIN, RIGHT_OUTER_JOIN, SELECT, \ 24 | THEN, UNION, UNION_ALL, USING, WHEN, WHERE, binary_ops, unary_ops, WITH, durations 25 | 26 | ParserElement.enablePackrat() 27 | 28 | # PYPARSING USES A LOT OF STACK SPACE 29 | sys.setrecursionlimit(3000) 30 | 31 | IDENT_CHAR = alphanums + "@_$" 32 | 33 | KNOWN_OPS = [ 34 | # https://www.sqlite.org/lang_expr.html 35 | Literal("||").setName("concat").setDebugActions(*debug), 36 | ( 37 | Literal("*").setName("mul") | 38 | Literal("/").setName("div") | 39 | Literal("%").setName("mod") 40 | ).setDebugActions(*debug), 41 | ( 42 | Literal("+").setName("add") | 43 | Literal("-").setName("sub") 44 | ).setDebugActions(*debug), 45 | Literal("&").setName("binary_and").setDebugActions(*debug), 46 | Literal("|").setName("binary_or").setDebugActions(*debug), 47 | ( 48 | Literal(">=").setName("gte") | 49 | Literal("<=").setName("lte") | 50 | Literal("<").setName("lt") | 51 | Literal(">").setName("gt") 52 | ).setDebugActions(*debug), 53 | ( 54 | Literal("==").setName("eq") | 55 | Literal("!=").setName("neq") | 56 | Literal("<>").setName("neq") | 57 | Literal("=").setName("eq") 58 | ).setDebugActions(*debug), 59 | 60 | (BETWEEN.setName("between").setDebugActions(*debug), AND), 61 | (NOT_BETWEEN.setName("not_between").setDebugActions(*debug), AND), 62 | IN.setName("in").setDebugActions(*debug), 63 | NOT_IN.setName("nin").setDebugActions(*debug), 64 | IS_NOT.setName("neq").setDebugActions(*debug), 65 | IS.setName("is").setDebugActions(*debug), 66 | LIKE.setName("like").setDebugActions(*debug), 67 | NOT_LIKE.setName("nlike").setDebugActions(*debug), 68 | AND.setName("and").setDebugActions(*debug), 69 | OR.setName("or").setDebugActions(*debug) 70 | ] 71 | 72 | def to_json_operator(instring, tokensStart, retTokens): 73 | # ARRANGE INTO {op: params} FORMAT 74 | tok = retTokens[0] 75 | op = tok[1] 76 | clean_op = op.lower() 77 | clean_op = binary_ops.get(clean_op, clean_op) 78 | 79 | for o in KNOWN_OPS: 80 | if isinstance(o, tuple): 81 | # TRINARY OPS 82 | if o[0].matches(op): 83 | return {clean_op: [tok[0], tok[2], tok[4]]} 84 | elif o.matches(op): 85 | break 86 | else: 87 | if op == COLLATE_NOCASE.match: 88 | op = COLLATE_NOCASE.name 89 | return {op: tok[0]} 90 | else: 91 | raise Exception("not found") 92 | 93 | if clean_op == "eq": 94 | if tok[2] == "null": 95 | return {"missing": tok[0]} 96 | elif tok[0] == "null": 97 | return {"missing": tok[2]} 98 | elif clean_op == "neq": 99 | if tok[2] == "null": 100 | return {"exists": tok[0]} 101 | elif tok[0] == "null": 102 | return {"exists": tok[2]} 103 | elif clean_op == "is": 104 | if tok[2] == 'null': 105 | return {"missing": tok[0]} 106 | else: 107 | return {"exists": tok[0]} 108 | 109 | 110 | operands = [tok[0], tok[2]] 111 | simple = {clean_op: operands} 112 | if len(tok) <= 3: 113 | return simple 114 | 115 | if clean_op in {"add", "mul", "and", "or"}: 116 | # ACCUMULATE SUBSEQUENT, IDENTICAL OPS 117 | for i in range(3, len(tok), 2): 118 | if tok[i] != op: 119 | return to_json_operator(None, None, [[simple] + tok[i:]]) 120 | else: 121 | operands.append(tok[i+1]) 122 | return simple 123 | else: 124 | # SIMPLE BINARY 125 | return to_json_operator(None, None, [[simple] + tok[3:]]) 126 | 127 | 128 | def to_json_call(instring, tokensStart, retTokens): 129 | # ARRANGE INTO {op: params} FORMAT 130 | tok = retTokens 131 | op = tok.op.lower() 132 | op = unary_ops.get(op, op) 133 | 134 | params = tok.params 135 | if not params: 136 | params = None 137 | elif len(params) == 1: 138 | params = params[0] 139 | return {op: params} 140 | 141 | 142 | def to_case_call(instring, tokensStart, retTokens): 143 | tok = retTokens 144 | cases = list(tok.case) 145 | elze = getattr(tok, "else", None) 146 | if elze: 147 | cases.append(elze) 148 | return {"case": cases} 149 | 150 | 151 | def to_date_call(instring, tokensStart, retTokens): 152 | return {"date": retTokens.params} 153 | 154 | 155 | def to_interval_call(instring, tokensStart, retTokens): 156 | # ARRANGE INTO {interval: params} FORMAT 157 | return {"interval": [retTokens['count'], retTokens['duration'][:-1]]} 158 | 159 | 160 | def to_when_call(instring, tokensStart, retTokens): 161 | tok = retTokens 162 | return {"when": tok.when, "then":tok.then} 163 | 164 | 165 | def to_join_call(instring, tokensStart, retTokens): 166 | tok = retTokens 167 | 168 | if tok.join.name: 169 | output = {tok.op: {"name": tok.join.name, "value": tok.join.value}} 170 | else: 171 | output = {tok.op: tok.join} 172 | 173 | if tok.on: 174 | output['on'] = tok.on 175 | 176 | if tok.using: 177 | output['using'] = tok.using 178 | return output 179 | 180 | 181 | def to_select_call(instring, tokensStart, retTokens): 182 | tok = retTokens[0].asDict() 183 | 184 | if tok.get('value')[0][0] == '*': 185 | return '*' 186 | else: 187 | return tok 188 | 189 | 190 | def to_union_call(instring, tokensStart, retTokens): 191 | tok = retTokens[0].asDict() 192 | unions = tok['from']['union'] 193 | if len(unions) == 1: 194 | output = unions[0] 195 | else: 196 | sources = [unions[i] for i in range(0, len(unions), 2)] 197 | operators = [unions[i] for i in range(1, len(unions), 2)] 198 | op = operators[0].lower().replace(" ", "_") 199 | if any(o.lower().replace(" ", "_") != op for o in operators[1:]): 200 | raise Exception("Expecting all \"union all\" or all \"union\", not some combination") 201 | 202 | if not tok.get('orderby') and not tok.get('limit'): 203 | return {op: sources} 204 | else: 205 | output = {"from": {op: sources}} 206 | 207 | if tok.get('orderby'): 208 | output["orderby"] = tok.get('orderby') 209 | if tok.get('limit'): 210 | output["limit"] = tok.get('limit') 211 | return output 212 | 213 | 214 | def to_with_clause(instring, tokensStart, retTokens): 215 | tok = retTokens[0] 216 | query = tok['query'][0] 217 | if tok['with']: 218 | assignments = [ 219 | {"name": w.name, "value": w.value[0]} 220 | for w in tok['with'] 221 | ] 222 | query['with'] = assignments 223 | return query 224 | 225 | 226 | def unquote(instring, tokensStart, retTokens): 227 | val = retTokens[0] 228 | if val.startswith("'") and val.endswith("'"): 229 | val = "'"+val[1:-1].replace("''", "\\'")+"'" 230 | # val = val.replace(".", "\\.") 231 | elif val.startswith('"') and val.endswith('"'): 232 | val = '"'+val[1:-1].replace('""', '\\"')+'"' 233 | # val = val.replace(".", "\\.") 234 | elif val.startswith('`') and val.endswith('`'): 235 | val = '"' + val[1:-1].replace("``","`") + '"' 236 | elif val.startswith("+"): 237 | val = val[1:] 238 | un = ast.literal_eval(val) 239 | return un 240 | 241 | 242 | def to_string(instring, tokensStart, retTokens): 243 | val = retTokens[0] 244 | val = "'"+val[1:-1].replace("''", "\\'")+"'" 245 | return {"literal": ast.literal_eval(val)} 246 | 247 | # NUMBERS 248 | realNum = Regex(r"[+-]?(\d+\.\d*|\.\d+)([eE][+-]?\d+)?").addParseAction(unquote) 249 | intNum = Regex(r"[+-]?\d+([eE]\+?\d+)?").addParseAction(unquote) 250 | 251 | # STRINGS, NUMBERS, VARIABLES 252 | sqlString = Regex(r"\'(\'\'|\\.|[^'])*\'").addParseAction(to_string) 253 | identString = Regex(r'\"(\"\"|\\.|[^"])*\"').addParseAction(unquote) 254 | mysqlidentString = Regex(r'\`(\`\`|\\.|[^`])*\`').addParseAction(unquote) 255 | ident = Combine(~RESERVED + (delimitedList(Literal("*") | identString | mysqlidentString | Word(IDENT_CHAR), delim=".", combine=True))).setName("identifier") 256 | 257 | # EXPRESSIONS 258 | expr = Forward() 259 | 260 | # CASE 261 | case = ( 262 | CASE + 263 | Group(ZeroOrMore((WHEN + expr("when") + THEN + expr("then")).addParseAction(to_when_call)))("case") + 264 | Optional(ELSE + expr("else")) + 265 | END 266 | ).addParseAction(to_case_call) 267 | 268 | ordered_sql = Forward() 269 | 270 | 271 | call_function = ( 272 | ident.copy()("op").setName("function name").setDebugActions(*debug) + 273 | Literal("(").suppress() + 274 | Optional(ordered_sql | Group(delimitedList(expr)))("params") + 275 | Literal(")").suppress() 276 | ).addParseAction(to_json_call).setDebugActions(*debug) 277 | 278 | 279 | def _or(values): 280 | output = values[0] 281 | for v in values[1:]: 282 | output |= v 283 | return output 284 | 285 | 286 | interval = ( 287 | Keyword("interval", caseless=True).suppress().setDebugActions(*debug) + 288 | (realNum | intNum)("count").setDebugActions(*debug) + 289 | _or([Keyword(d, caseless=True)("duration") for d in durations]) 290 | ).addParseAction(to_interval_call).setDebugActions(*debug) 291 | 292 | compound = ( 293 | Keyword("null", caseless=True).setName("null").setDebugActions(*debug) | 294 | (Keyword("not", caseless=True)("op").setDebugActions(*debug) + expr("params")).addParseAction(to_json_call) | 295 | (Keyword("distinct", caseless=True)("op").setDebugActions(*debug) + expr("params")).addParseAction(to_json_call) | 296 | (Keyword("date", caseless=True).setDebugActions(*debug) + sqlString("params")).addParseAction(to_date_call) | 297 | interval | 298 | case | 299 | (Literal("(").suppress() + ordered_sql + Literal(")").suppress()) | 300 | (Literal("(").suppress() + Group(delimitedList(expr)) + Literal(")").suppress()) | 301 | realNum.setName("float").setDebugActions(*debug) | 302 | intNum.setName("int").setDebugActions(*debug) | 303 | (Literal("~")("op").setDebugActions(*debug) + expr("params")).addParseAction(to_json_call) | 304 | (Literal("-")("op").setDebugActions(*debug) + expr("params")).addParseAction(to_json_call) | 305 | sqlString.setName("string").setDebugActions(*debug) | 306 | call_function | 307 | ident.copy().setName("variable").setDebugActions(*debug) 308 | ) 309 | expr << Group(infixNotation( 310 | compound, 311 | [ 312 | ( 313 | o, 314 | 3 if isinstance(o, tuple) else 2, 315 | opAssoc.LEFT, 316 | to_json_operator 317 | ) 318 | for o in KNOWN_OPS 319 | ]+[ 320 | ( 321 | COLLATE_NOCASE, 322 | 1, 323 | opAssoc.LEFT, 324 | to_json_operator 325 | ) 326 | ] 327 | ).setName("expression").setDebugActions(*debug)) 328 | 329 | # SQL STATEMENT 330 | selectColumn = Group( 331 | Group(expr).setName("expression1")("value").setDebugActions(*debug) + Optional(Optional(AS) + ident.copy().setName("column_name1")("name").setDebugActions(*debug)) | 332 | Literal('*')("value").setDebugActions(*debug) 333 | ).setName("column").addParseAction(to_select_call) 334 | 335 | table_source = ( 336 | ( 337 | (Literal("(").suppress() + ordered_sql + Literal(")").suppress()).setDebugActions(*debug) | 338 | call_function 339 | )("value").setName("table source").setDebugActions(*debug) + 340 | Optional( 341 | Optional(AS) + 342 | ident("name").setName("table alias").setDebugActions(*debug) 343 | ) 344 | | 345 | ( 346 | ident("value").setName("table name").setDebugActions(*debug) + 347 | Optional(AS) + 348 | ident("name").setName("table alias").setDebugActions(*debug) 349 | ) 350 | | 351 | ident.setName("table name").setDebugActions(*debug) 352 | ) 353 | 354 | join = ( 355 | (CROSS_JOIN | FULL_JOIN | FULL_OUTER_JOIN | INNER_JOIN | JOIN | LEFT_JOIN | LEFT_OUTER_JOIN | RIGHT_JOIN | RIGHT_OUTER_JOIN)("op") + 356 | Group(table_source)("join") + 357 | Optional((ON + expr("on")) | (USING + expr("using"))) 358 | ).addParseAction(to_join_call) 359 | 360 | sortColumn = expr("value").setName("sort1").setDebugActions(*debug) + Optional(DESC("sort") | ASC("sort")) | \ 361 | expr("value").setName("sort2").setDebugActions(*debug) 362 | 363 | unordered_sql = Group( 364 | SELECT.suppress().setDebugActions(*debug) + delimitedList(selectColumn)("select") + 365 | Optional( 366 | (FROM.suppress().setDebugActions(*debug) + delimitedList(Group(table_source)) + ZeroOrMore(join))("from") + 367 | Optional(WHERE.suppress().setDebugActions(*debug) + expr.setName("where"))("where") + 368 | Optional(GROUP_BY.suppress().setDebugActions(*debug) + delimitedList(Group(selectColumn))("groupby").setName("groupby")) + 369 | Optional(HAVING.suppress().setDebugActions(*debug) + expr("having").setName("having")) + 370 | Optional(LIMIT.suppress().setDebugActions(*debug) + expr("limit")) + 371 | Optional(OFFSET.suppress().setDebugActions(*debug) + expr("offset")) 372 | ) 373 | ) 374 | 375 | ordered_sql << Group( 376 | Group(Group( 377 | unordered_sql + 378 | ZeroOrMore((UNION_ALL | UNION) + unordered_sql) 379 | )("union"))("from") + 380 | Optional(ORDER_BY.suppress().setDebugActions(*debug) + delimitedList(Group(sortColumn))("orderby").setName("orderby")) + 381 | Optional(LIMIT.suppress().setDebugActions(*debug) + expr("limit")) + 382 | Optional(OFFSET.suppress().setDebugActions(*debug) + expr("offset")) 383 | ).addParseAction(to_union_call) 384 | 385 | statement = Group(Group(Optional( 386 | WITH.suppress().setDebugActions(*debug) + 387 | delimitedList( 388 | Group( 389 | ident("name").setDebugActions(*debug) + 390 | AS.suppress().setDebugActions(*debug) + 391 | Literal("(").suppress().setDebugActions(*debug) + 392 | ordered_sql("value").setDebugActions(*debug) + 393 | Literal(")").suppress().setDebugActions(*debug) 394 | ) 395 | ) 396 | ))("with") + ordered_sql("query")).addParseAction(to_with_clause) 397 | 398 | SQLParser = statement 399 | 400 | # IGNORE SOME COMMENTS 401 | oracleSqlComment = Literal("--") + restOfLine 402 | mySqlComment = Literal("#") + restOfLine 403 | SQLParser.ignore(oracleSqlComment | mySqlComment) 404 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | mo-future>=3 2 | pyparsing==2.3.1 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # THIS FILE IS AUTOGENERATED! 3 | from __future__ import unicode_literals 4 | from setuptools import setup 5 | setup( 6 | description=u'Extract Parse Tree from SQL', 7 | license=u'MPL 2.0', 8 | author=u'Kyle Lahnakoski', 9 | author_email=u'kyle@lahnakoski.com', 10 | long_description_content_type=u'text/markdown', 11 | include_package_data=True, 12 | classifiers=["Development Status :: 3 - Alpha","Topic :: Software Development :: Libraries","Topic :: Software Development :: Libraries :: Python Modules","Programming Language :: SQL","Programming Language :: Python :: 2.7","Programming Language :: Python :: 3.6","License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)"], 13 | install_requires=["mo-future>=3.31.20024","pyparsing==2.3.1"], 14 | version=u'3.32.20026', 15 | url=u'https://github.com/mozilla/moz-sql-parser', 16 | zip_safe=True, 17 | packages=["moz_sql_parser"], 18 | long_description=u'# Moz SQL Parser\n\nLet\'s make a SQL parser so we can provide a familiar interface to non-sql datastores!\n\n\n|Branch |Status |\n|------------|---------|\n|master | [![Build Status](https://travis-ci.org/mozilla/moz-sql-parser.svg?branch=master)](https://travis-ci.org/mozilla/moz-sql-parser) |\n|dev | [![Build Status](https://travis-ci.org/mozilla/moz-sql-parser.svg?branch=dev)](https://travis-ci.org/mozilla/moz-sql-parser) |\n\n\n## Problem Statement\n\nSQL is a familiar language used to access databases. Although, each database vendor has its quirky implementation, the average developer does not know enough SQL to be concerned with those quirks. This familiar core SQL (lowest common denominator, if you will) is useful enough to explore data in primitive ways. It is hoped that, once programmers have reviewed a datastore with basic SQL queries, and they see the value of that data, they will be motivated to use the datastore\'s native query format.\n\n## Objectives\n\nThe primary objective of this library is to convert some subset of [SQL-92](https://en.wikipedia.org/wiki/SQL-92) queries to JSON-izable parse trees. A big enough subset to provide superficial data access via SQL, but not so much as we must deal with the document-relational impedance mismatch.\n\n## Non-Objectives \n\n* No plans to provide update statements, like `update` or `insert`\n* No plans to expand the language to all of SQL:2011\n* No plans to provide data access tools \n\n\n## Project Status\n\nThere are [over 400 tests](https://github.com/mozilla/moz-sql-parser/tree/dev/tests). This parser is good enough for basic usage, including inner queries.\n\nYou can see the parser in action at [https://sql.telemetry.mozilla.org/](https://sql.telemetry.mozilla.org/) while using the ActiveData datasource\n\n## Install\n\n pip install moz-sql-parser\n\n## Parsing SQL\n\n >>> from moz_sql_parser import parse\n >>> import json\n >>> json.dumps(parse("select count(1) from jobs"))\n \'{"select": {"value": {"count": 1}}, "from": "jobs"}\'\n \nEach SQL query is parsed to an object: Each clause is assigned to an object property of the same name. \n\n >>> json.dumps(parse("select a as hello, b as world from jobs"))\n \'{"select": [{"value": "a", "name": "hello"}, {"value": "b", "name": "world"}], "from": "jobs"}\'\n\nThe `SELECT` clause is an array of objects containing `name` and `value` properties. \n\n### Recursion Limit \n\n**WARNING!** There is a recursion limit of `1500`. This prevents parsing of complex expressions or deeply nested nested queries. You can increase the recursion limit *after* you have imported `moz_sql_parser`, and before you `parse`:\n\n >>> from moz_sql_parser import parse\n >>> sys.setrecursionlimit(3000)\n >>> parse(complicated_sql)\n\n\n## Generating SQL\n\nYou may also generate SQL from the a given JSON document. This is done by the formatter, which is still incomplete (Jan2020).\n\n >>> from moz_sql_parser import format\n >>> format({"from":"test", "select":["a.b", "c"]})\n \'SELECT a.b, c FROM test\'\n\n\n## Contributing\n\nIn the event that the parser is not working for you, you can help make this better but simply pasting your sql (or JSON) into a new issue. Extra points if you describe the problem. Even more points if you submit a PR with a test. If you also submit a fix, then you also have my gratitude. \n\n\n## Run Tests\n\nSee [the tests directory](https://github.com/mozilla/moz-sql-parser/tree/dev/tests) for instructions running tests, or writing new ones.\n\n## More about implementation\n\nSQL queries are translated to JSON objects: Each clause is assigned to an object property of the same name.\n\n \n # SELECT * FROM dual WHERE a>b ORDER BY a+b\n {\n "select": "*", \n "from": "dual", \n "where": {"gt": ["a", "b"]}, \n "orderby": {"value": {"add": ["a", "b"]}}\n }\n \nExpressions are also objects, but with only one property: The name of the operation, and the value holding (an array of) parameters for that operation. \n\n {op: parameters}\n\nand you can see this pattern in the previous example:\n\n {"gt": ["a","b"]}\n\n\n### Notes\n\n* Uses the glorious `pyparsing` library (see https://github.com/pyparsing/pyparsing) to define the grammar, and define the shape of the tokens it generates. \n* [sqlparse](https://pypi.python.org/pypi/sqlparse) does not provide a tree, rather a list of tokens. \n', 19 | name=u'moz-sql-parser' 20 | ) -------------------------------------------------------------------------------- /setuptools.json: -------------------------------------------------------------------------------- 1 | { 2 | "author": "Kyle Lahnakoski", 3 | "author_email": "kyle@lahnakoski.com", 4 | "classifiers": [ 5 | "Development Status :: 3 - Alpha", 6 | "Topic :: Software Development :: Libraries", 7 | "Topic :: Software Development :: Libraries :: Python Modules", 8 | "Programming Language :: SQL", 9 | "Programming Language :: Python :: 2.7", 10 | "Programming Language :: Python :: 3.6", 11 | "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)" 12 | ], 13 | "description": "Extract Parse Tree from SQL", 14 | "include_package_data": true, 15 | "install_requires": ["mo-future>=3.31.20024", "pyparsing==2.3.1"], 16 | "license": "MPL 2.0", 17 | "long_description": { 18 | "$concat": [ 19 | "# Moz SQL Parser", 20 | "", 21 | "Let's make a SQL parser so we can provide a familiar interface to non-sql datastores!", 22 | "", 23 | "", 24 | "|Branch |Status |", 25 | "|------------|---------|", 26 | "|master | [![Build Status](https://travis-ci.org/mozilla/moz-sql-parser.svg?branch=master)](https://travis-ci.org/mozilla/moz-sql-parser) |", 27 | "|dev | [![Build Status](https://travis-ci.org/mozilla/moz-sql-parser.svg?branch=dev)](https://travis-ci.org/mozilla/moz-sql-parser) |", 28 | "", 29 | "", 30 | "## Problem Statement", 31 | "", 32 | "SQL is a familiar language used to access databases. Although, each database vendor has its quirky implementation, the average developer does not know enough SQL to be concerned with those quirks. This familiar core SQL (lowest common denominator, if you will) is useful enough to explore data in primitive ways. It is hoped that, once programmers have reviewed a datastore with basic SQL queries, and they see the value of that data, they will be motivated to use the datastore's native query format.", 33 | "", 34 | "## Objectives", 35 | "", 36 | "The primary objective of this library is to convert some subset of [SQL-92](https://en.wikipedia.org/wiki/SQL-92) queries to JSON-izable parse trees. A big enough subset to provide superficial data access via SQL, but not so much as we must deal with the document-relational impedance mismatch.", 37 | "", 38 | "## Non-Objectives ", 39 | "", 40 | "* No plans to provide update statements, like `update` or `insert`", 41 | "* No plans to expand the language to all of SQL:2011", 42 | "* No plans to provide data access tools ", 43 | "", 44 | "", 45 | "## Project Status", 46 | "", 47 | "There are [over 400 tests](https://github.com/mozilla/moz-sql-parser/tree/dev/tests). This parser is good enough for basic usage, including inner queries.", 48 | "", 49 | "You can see the parser in action at [https://sql.telemetry.mozilla.org/](https://sql.telemetry.mozilla.org/) while using the ActiveData datasource", 50 | "", 51 | "## Install", 52 | "", 53 | " pip install moz-sql-parser", 54 | "", 55 | "## Parsing SQL", 56 | "", 57 | " >>> from moz_sql_parser import parse", 58 | " >>> import json", 59 | " >>> json.dumps(parse(\"select count(1) from jobs\"))", 60 | " '{\"select\": {\"value\": {\"count\": 1}}, \"from\": \"jobs\"}'", 61 | " ", 62 | "Each SQL query is parsed to an object: Each clause is assigned to an object property of the same name. ", 63 | "", 64 | " >>> json.dumps(parse(\"select a as hello, b as world from jobs\"))", 65 | " '{\"select\": [{\"value\": \"a\", \"name\": \"hello\"}, {\"value\": \"b\", \"name\": \"world\"}], \"from\": \"jobs\"}'", 66 | "", 67 | "The `SELECT` clause is an array of objects containing `name` and `value` properties. ", 68 | "", 69 | "### Recursion Limit ", 70 | "", 71 | "**WARNING!** There is a recursion limit of `1500`. This prevents parsing of complex expressions or deeply nested nested queries. You can increase the recursion limit *after* you have imported `moz_sql_parser`, and before you `parse`:", 72 | "", 73 | " >>> from moz_sql_parser import parse", 74 | " >>> sys.setrecursionlimit(3000)", 75 | " >>> parse(complicated_sql)", 76 | "", 77 | "", 78 | "## Generating SQL", 79 | "", 80 | "You may also generate SQL from the a given JSON document. This is done by the formatter, which is still incomplete (Jan2020).", 81 | "", 82 | " >>> from moz_sql_parser import format", 83 | " >>> format({\"from\":\"test\", \"select\":[\"a.b\", \"c\"]})", 84 | " 'SELECT a.b, c FROM test'", 85 | "", 86 | "", 87 | "## Contributing", 88 | "", 89 | "In the event that the parser is not working for you, you can help make this better but simply pasting your sql (or JSON) into a new issue. Extra points if you describe the problem. Even more points if you submit a PR with a test. If you also submit a fix, then you also have my gratitude. ", 90 | "", 91 | "", 92 | "## Run Tests", 93 | "", 94 | "See [the tests directory](https://github.com/mozilla/moz-sql-parser/tree/dev/tests) for instructions running tests, or writing new ones.", 95 | "", 96 | "## More about implementation", 97 | "", 98 | "SQL queries are translated to JSON objects: Each clause is assigned to an object property of the same name.", 99 | "", 100 | " ", 101 | " # SELECT * FROM dual WHERE a>b ORDER BY a+b", 102 | " {", 103 | " \"select\": \"*\", ", 104 | " \"from\": \"dual\", ", 105 | " \"where\": {\"gt\": [\"a\", \"b\"]}, ", 106 | " \"orderby\": {\"value\": {\"add\": [\"a\", \"b\"]}}", 107 | " }", 108 | " ", 109 | "Expressions are also objects, but with only one property: The name of the operation, and the value holding (an array of) parameters for that operation. ", 110 | "", 111 | " {op: parameters}", 112 | "", 113 | "and you can see this pattern in the previous example:", 114 | "", 115 | " {\"gt\": [\"a\",\"b\"]}", 116 | "", 117 | "", 118 | "### Notes", 119 | "", 120 | "* Uses the glorious `pyparsing` library (see https://github.com/pyparsing/pyparsing) to define the grammar, and define the shape of the tokens it generates. ", 121 | "* [sqlparse](https://pypi.python.org/pypi/sqlparse) does not provide a tree, rather a list of tokens. ", 122 | "" 123 | ], 124 | "separator": "\n" 125 | }, 126 | "long_description_content_type": "text/markdown", 127 | "name": "moz-sql-parser", 128 | "packages": ["moz_sql_parser"], 129 | "url": "https://github.com/mozilla/moz-sql-parser", 130 | "version": "3.32.20026", 131 | "zip_safe": true 132 | } -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | # Moz SQL Parser Tests 2 | 3 | The test suite has over 400 tests. 4 | 5 | ## Running Tests 6 | 7 | For __Linux__: 8 | 9 | git clone https://github.com/mozilla/moz-sql-parser.git 10 | cd moz-sql-parser 11 | pip install -r requirements.txt 12 | set PYTHONPATH=. 13 | python -m unittest discover tests 14 | 15 | For __Windows__: 16 | 17 | git clone https://github.com/mozilla/moz-sql-parser.git 18 | cd moz-sql-parser 19 | pip install -r requirements.txt 20 | set PYTHONPATH=. 21 | python.exe -m unittest discover tests 22 | 23 | ### Debugging Suggestions 24 | 25 | * Once you have written a failing test, you can `DEBUG=True` in the 26 | `sql_parser.py` to print out a trace of matching attempts. 27 | * If you added more `ParserElement`s, you may want to add `.setDebugActions(*debug)` 28 | to each, so they print out thier matching attempts too. 29 | * Even though you can use Python strings for literals, they will not be 30 | attempted in all cases; wrap as a `Literal` or `Keyword`. This problem 31 | is known to lurk while matching `infixNotation`. 32 | * Feel free to leave in rulers, if you use them 33 | 34 | 35 | ## History 36 | 37 | * **January 2018** - fixes for Python3 38 | * **July 2017** - Add suite to start testing error messages, with hope of improving them 39 | * **April 2017** - All tests pass 40 | * **March 2017** - Added a number of test cases, most of which are missing the expected JSON parse tree, so they fail. 41 | 42 | 43 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/moz-sql-parser/273e470a854be6701278946defc1163f52998384/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_errors.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # 3 | # This Source Code Form is subject to the terms of the Mozilla Public 4 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 5 | # You can obtain one at http://mozilla.org/MPL/2.0/. 6 | # 7 | # Author: Kyle Lahnakoski (kyle@lahnakoski.com) 8 | # 9 | 10 | from __future__ import absolute_import, division, unicode_literals 11 | 12 | from unittest import TestCase 13 | 14 | from moz_sql_parser import parse, format 15 | try: 16 | from tests.util import assertRaises 17 | except ImportError: 18 | from .util import assertRaises # RELATIVE IMPORT SO WE CAN RUN IN pyLibrary 19 | 20 | 21 | class TestErrors(TestCase): 22 | 23 | def test_dash_in_tablename(self): 24 | assertRaises( 25 | ["group by", "order by", "having", "limit", "where"], 26 | # 012345678901234567890123456789012345678901234567890123456789 27 | lambda: parse("select * from coverage-summary.source.file.covered limit 20") 28 | ) 29 | 30 | def test_join_on_using_together(self): 31 | assertRaises( 32 | "Expecting one of", 33 | lambda: parse("SELECT * FROM t1 JOIN t2 ON t1.id=t2.id USING (id)") 34 | ) 35 | 36 | def test_bad_join_name(self): 37 | bad_json = {'select': {'value': 't1.field1'}, 38 | 'from': ['t1', {'left intro join': 't2', 'on': {'eq': ['t1.id', 't2.id']}}]} 39 | assertRaises( 40 | ["Fail to detect join type", 'left intro join'], 41 | lambda: format(bad_json) 42 | ) 43 | 44 | def test_order_by_must_follow_union(self): 45 | assertRaises( 46 | ["(at char 27)"], 47 | lambda: parse("select a from b order by a union select 2") 48 | ) 49 | 50 | -------------------------------------------------------------------------------- /tests/test_formatting.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # 3 | # This Source Code Form is subject to the terms of the Mozilla Public 4 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 5 | # You can obtain one at http://mozilla.org/MPL/2.0/. 6 | # 7 | # Author: Beto Dealmeida (beto@dealmeida.net) 8 | # 9 | 10 | from __future__ import absolute_import, division, unicode_literals 11 | 12 | from unittest import TestCase 13 | 14 | from moz_sql_parser import format 15 | 16 | 17 | class TestSimple(TestCase): 18 | 19 | def test_two_tables(self): 20 | result = format({ 21 | "select": "*", 22 | "from": ["XYZZY", "ABC"] 23 | }) 24 | expected = "SELECT * FROM XYZZY, ABC" 25 | self.assertEqual(result, expected) 26 | 27 | def test_dot_table_name(self): 28 | result = format({ 29 | "select": "*", 30 | "from": "SYS.XYZZY", 31 | }) 32 | expected = "SELECT * FROM SYS.XYZZY" 33 | self.assertEqual(result, expected) 34 | 35 | def select_one_column(self): 36 | result = format({ 37 | "select": [{"value": "A"}], 38 | "from": ["dual"], 39 | }) 40 | expected = "SELECT A FROM dual" 41 | self.assertEqual(result, expected) 42 | 43 | def test_select_quote(self): 44 | result = format({ 45 | "select": {"value": {"literal": "'"}}, 46 | "from": "dual", 47 | }) 48 | expected = "SELECT '''' FROM dual" 49 | self.assertEqual(result, expected) 50 | 51 | def test_select_quoted_name(self): 52 | result = format({ 53 | "select": [ 54 | {"name": "@*#&", "value": "a"}, 55 | {"name": "test.g.g.c", "value": "b"} 56 | ], 57 | "from": "dual", 58 | }) 59 | expected = 'SELECT a AS "@*#&", b AS test.g.g.c FROM dual' 60 | self.assertEqual(result, expected) 61 | 62 | def test_select_expression(self): 63 | result = format({ 64 | "select": {"value": {"add": [ 65 | "a", 66 | {"div": ["b", 2]}, 67 | {"mul": [45, "c"]}, 68 | {"div": [2, "d"]}, 69 | ]}}, 70 | "from": "dual", 71 | }) 72 | expected = "SELECT a + b / 2 + 45 * c + 2 / d FROM dual" 73 | self.assertEqual(result, expected) 74 | 75 | def test_select_underscore_name(self): 76 | result = format({ 77 | "select": {"value": "_id"}, 78 | "from": "dual", 79 | }) 80 | expected = "SELECT _id FROM dual" 81 | self.assertEqual(result, expected) 82 | 83 | def test_select_dots_names(self): 84 | result = format({ 85 | "select": {"value": "a.b.c._d"}, 86 | "from": "dual", 87 | }) 88 | expected = "SELECT a.b.c._d FROM dual" 89 | self.assertEqual(result, expected) 90 | 91 | def select_many_column(self): 92 | result = format({ 93 | "select": [ 94 | {"value": "a"}, 95 | {"value": "b"}, 96 | {"value": "c"}, 97 | ], 98 | "from": ["dual"], 99 | }) 100 | expected = "SELECT a, b, c FROM dual" 101 | self.assertEqual(result, expected) 102 | 103 | def test_where_neq(self): 104 | result = format({ 105 | "select": "*", 106 | "from": "dual", 107 | "where": {"neq": ["a", {"literal": "test"}]}, 108 | }) 109 | expected = "SELECT * FROM dual WHERE a <> 'test'" 110 | self.assertEqual(result, expected) 111 | 112 | def test_where_in(self): 113 | result = format({ 114 | "select": {"value": "a"}, 115 | "from": "dual", 116 | "where": {"in": [ 117 | "a", 118 | {"literal": ["r", "g", "b"]}, 119 | ]}, 120 | }) 121 | expected = "SELECT a FROM dual WHERE a IN ('r', 'g', 'b')" 122 | self.assertEqual(result, expected) 123 | 124 | def test_where_in_and_in(self): 125 | result = format({ 126 | "select": {"value": "a"}, 127 | "from": "dual", 128 | "where": {"and": [ 129 | {"in": [ 130 | "a", 131 | {"literal": ["r", "g", "b"]}, 132 | ]}, 133 | {"in": [ 134 | "b", 135 | [10, 11, 12], 136 | ]}, 137 | ]}, 138 | }) 139 | expected = "SELECT a FROM dual WHERE a IN ('r', 'g', 'b') AND b IN (10, 11, 12)" 140 | self.assertEqual(result, expected) 141 | 142 | def test_eq(self): 143 | result = format({ 144 | "select": [ 145 | {"value": "a"}, 146 | {"value": "b"}, 147 | ], 148 | "from": ["t1", "t2"], 149 | "where": {"eq": ["t1.a", "t2.b"]}, 150 | }) 151 | expected = "SELECT a, b FROM t1, t2 WHERE t1.a = t2.b" 152 | self.assertEqual(result, expected) 153 | 154 | def test_is_null(self): 155 | result = format({ 156 | "select": [ 157 | {"value": "a"}, 158 | {"value": "b"}, 159 | ], 160 | "from": "t1", 161 | "where": {"missing": "t1.a"}, 162 | }) 163 | expected = "SELECT a, b FROM t1 WHERE t1.a IS NULL" 164 | self.assertEqual(result, expected) 165 | 166 | def test_is_not_null(self): 167 | result = format({ 168 | "select": [ 169 | {"value": "a"}, 170 | {"value": "b"}, 171 | ], 172 | "from": "t1", 173 | "where": {"exists": "t1.a"}, 174 | }) 175 | expected = "SELECT a, b FROM t1 WHERE t1.a IS NOT NULL" 176 | self.assertEqual(result, expected) 177 | 178 | def test_groupby(self): 179 | result = format({ 180 | "select": [ 181 | {"value": "a"}, 182 | {"name": "b", "value": {"count": 1}}, 183 | ], 184 | "from": "mytable", 185 | "groupby": {"value": "a"}, 186 | }) 187 | expected = "SELECT a, COUNT(1) AS b FROM mytable GROUP BY a" 188 | self.assertEqual(result, expected) 189 | 190 | def test_function(self): 191 | result = format({ 192 | "select": {"value": {"count": 1}}, 193 | "from": "mytable", 194 | }) 195 | expected = "SELECT COUNT(1) FROM mytable" 196 | self.assertEqual(result, expected) 197 | 198 | def test_order_by(self): 199 | result = format({ 200 | "select": {"value": {"count": 1}}, 201 | "from": "dual", 202 | "orderby": {"value": "a"}, 203 | }) 204 | expected = "SELECT COUNT(1) FROM dual ORDER BY a" 205 | self.assertEqual(result, expected) 206 | 207 | def test_order_by_asc(self): 208 | result = format({ 209 | "select": {"value": {"count": 1}}, 210 | "from": "dual", 211 | "orderby": {"value": "a", "sort": "asc"}, 212 | }) 213 | expected = "SELECT COUNT(1) FROM dual ORDER BY a ASC" 214 | self.assertEqual(result, expected) 215 | 216 | def test_order_by_desc(self): 217 | result = format({ 218 | "select": {"value": {"count": 1}}, 219 | "from": "dual", 220 | "orderby": {"value": "a", "sort": "desc"}, 221 | }) 222 | expected = "SELECT COUNT(1) FROM dual ORDER BY a DESC" 223 | self.assertEqual(result, expected) 224 | 225 | def test_neg_or_precedence(self): 226 | result = format({ 227 | 'from': 'table1', 228 | 'where': {'or': [{'eq': ['A', -900]}, {'eq': ['B', 100]}]}, 229 | 'select': [{'value': 'B'}, {'value': 'C'}], 230 | }) 231 | expected = "SELECT B, C FROM table1 WHERE A = -900 OR B = 100" 232 | self.assertEqual(result, expected) 233 | 234 | def test_negative_number(self): 235 | result = format({ 236 | 'from': 'table1', 237 | 'where': {'eq': ['A', -900]}, 238 | 'select': {'value': 'a'}, 239 | }) 240 | expected = "SELECT a FROM table1 WHERE A = -900" 241 | self.assertEqual(result, expected) 242 | 243 | def test_like_in_where(self): 244 | result = format({ 245 | 'from': 'table1', 246 | 'where': {'like': ['A', {"literal": "%20%"}]}, 247 | 'select': {'value': 'a'}, 248 | }) 249 | expected = "SELECT a FROM table1 WHERE A LIKE '%20%'" 250 | self.assertEqual(result, expected) 251 | 252 | def test_like_in_select(self): 253 | result = format({ 254 | 'from': 'table1', 255 | 'select': {'name': 'bb', 'value': {"case": [{"when": {"like": ["A", {"literal": "bb%"}]}, "then": 1}, 0]}}, 256 | }) 257 | expected = "SELECT CASE WHEN A LIKE 'bb%' THEN 1 ELSE 0 END AS bb FROM table1" 258 | self.assertEqual(result, expected) 259 | 260 | def test_like_from_pr16(self): 261 | result = format({ 262 | 'from': 'trade', 263 | 'where': {"and": [ 264 | {"like": ["school", {"literal": "%shool"}]}, 265 | {"eq": ["name", {"literal": "abc"}]}, 266 | {"in": ["id", {"literal": ["1", "2"]}]}, 267 | ]}, 268 | 'select': "*", 269 | }) 270 | expected = "SELECT * FROM trade WHERE school LIKE '%shool' AND name = 'abc' AND id IN ('1', '2')" 271 | self.assertEqual(result, expected) 272 | 273 | def test_in_expression(self): 274 | result = format({ 275 | 'from': 'task', 276 | 'select': "*", 277 | "where": {"in": [ 278 | "repo.branch.name", 279 | {"literal": ["try", "mozilla-central"]}, 280 | ]}, 281 | }) 282 | expected = "SELECT * FROM task WHERE repo.branch.name IN ('try', 'mozilla-central')" 283 | self.assertEqual(result, expected) 284 | 285 | def test_joined_table_name(self): 286 | result = format({ 287 | 'from': [ 288 | {'name': 't1', 'value': 'table1'}, 289 | {'on': {'eq': ['t1.id', 't3.id']}, 'join': {'name': 't3', 'value': 'table3'}}, 290 | ], 291 | 'select': '*', 292 | }) 293 | expected = "SELECT * FROM table1 AS t1 JOIN table3 AS t3 ON t1.id = t3.id" 294 | self.assertEqual(result, expected) 295 | 296 | def test_not_equal(self): 297 | result = format({ 298 | 'select': '*', 299 | 'from': "task", 300 | "where": {"and": [ 301 | {"exists": "build.product"}, 302 | {"neq": ["build.product", {"literal": "firefox"}]}, 303 | ]}, 304 | }) 305 | expected = "SELECT * FROM task WHERE build.product IS NOT NULL AND build.product <> 'firefox'" 306 | self.assertEqual(result, expected) 307 | 308 | def test_union(self): 309 | result = format({ 310 | 'union': [ 311 | {'select': '*', 'from': 'a'}, 312 | {'select': '*', 'from': 'b'}, 313 | ], 314 | }) 315 | expected = "SELECT * FROM a UNION SELECT * FROM b" 316 | self.assertEqual(result, expected) 317 | 318 | def test_limit(self): 319 | result = format({'select': '*', 'from': 'a', 'limit': 10}) 320 | expected = "SELECT * FROM a LIMIT 10" 321 | self.assertEqual(result, expected) 322 | 323 | def test_offset(self): 324 | result = format({'select': '*', 'from': 'a', 'limit': 10, 'offset': 10}) 325 | expected = "SELECT * FROM a LIMIT 10 OFFSET 10" 326 | self.assertEqual(result, expected) 327 | 328 | def test_count_literal(self): 329 | result = format({'select': {'value': {'count': {'literal': 'literal'}}}, 'from': 'a'}) 330 | expected = "SELECT COUNT('literal') FROM a" 331 | self.assertEqual(result, expected) 332 | 333 | def test_no_arguments(self): 334 | result = format({'select': {'value': {'now': {}}}}) 335 | expected = "SELECT NOW()" 336 | self.assertEqual(result, expected) 337 | 338 | def test_between(self): 339 | result = format({ 340 | "select": [ 341 | {"value": "a"} 342 | ], 343 | "from": ["t1"], 344 | "where": {"between": ["t1.a", 10, {'literal': 'ABC'}]}, 345 | }) 346 | expected = "SELECT a FROM t1 WHERE t1.a BETWEEN 10 AND 'ABC'" 347 | self.assertEqual(result, expected) 348 | 349 | def test_binary_and(self): 350 | expected = "SELECT * FROM t WHERE c & 4" 351 | result = format({ 352 | "select": "*", 353 | "from": "t", 354 | "where": {"binary_and": ["c", 4]} 355 | }) 356 | self.assertEqual(result, expected) 357 | 358 | def test_binary_or(self): 359 | expected = "SELECT * FROM t WHERE c | 4" 360 | result = format({ 361 | "select": "*", 362 | "from": "t", 363 | "where": {"binary_or": ["c", 4]} 364 | }) 365 | self.assertEqual(result, expected) 366 | 367 | def test_binary_not(self): 368 | expected = "SELECT * FROM t WHERE ~c" 369 | result = format({ 370 | "select": "*", 371 | "from": "t", 372 | "where": {"binary_not": "c"} 373 | }) 374 | self.assertEqual(result, expected) 375 | 376 | def test_issue_104(self): 377 | expected = ( 378 | "SELECT NomPropriete AS Categorie, ROUND(AVG(NotePonderee), 2) AS \"Moyenne des notes\", ROUND(AVG(Complexite), 2) AS \"Complexite moyenne\"" 379 | " FROM Propriete, Categorie, Jeu" 380 | " WHERE IdPropriete = IdCategorie" 381 | " AND Categorie.IdJeu = Jeu.IdJeu" 382 | " AND NotePonderee > 0" 383 | " GROUP BY IdPropriete, NomPropriete" 384 | " ORDER BY \"Moyenne des notes\" DESC,\"Complexite moyenne\" DESC" 385 | ) 386 | result = format({ 387 | 'select': [ 388 | {'value': 'NomPropriete', 'name': 'Categorie'}, 389 | {'value': {'round': [{'avg': 'NotePonderee'}, 2]}, 'name': 'Moyenne des notes'}, 390 | {'value': {'round': [{'avg': 'Complexite'}, 2]}, 'name': 'Complexite moyenne'}], 391 | 'from': ['Propriete', 'Categorie', 'Jeu'], 392 | 'where': {'and': [ 393 | {'eq': ['IdPropriete', 'IdCategorie']}, {'eq': ['Categorie.IdJeu', 'Jeu.IdJeu']}, 394 | {'gt': ['NotePonderee', 0]} 395 | ]}, 396 | 'groupby': [ 397 | {'value': 'IdPropriete'}, {'value': 'NomPropriete'} 398 | ], 399 | 'orderby': [ 400 | {'value': 'Moyenne des notes', 'sort': 'desc'}, 401 | {'value': 'Complexite moyenne', 'sort': 'desc'} 402 | ] 403 | }) 404 | self.assertEqual(result, expected) 405 | 406 | def test_with_cte(self): 407 | expected = "WITH t AS (SELECT a FROM table) SELECT * FROM t" 408 | result = format({'select': '*', 'from': 't', 'with': {'name': 409 | 't', 'value': {'select': {'value': 'a'}, 'from': 'table'}} 410 | }) 411 | self.assertEqual(result, expected) 412 | 413 | def test_with_cte_various(self): 414 | expected = "WITH t1 AS (SELECT a FROM table), t2 AS (SELECT 1) SELECT * FROM t1, t2" 415 | result = format({'select': '*', 'from': ['t1', 't2'], 416 | 'with': [{'name': 't1', 'value': {'select': {'value': 'a'}, 'from': 'table'}}, 417 | {'name': 't2', 'value': {'select': {'value': 1}}}]}) 418 | self.assertEqual(result, expected) 419 | -------------------------------------------------------------------------------- /tests/test_meta.py: -------------------------------------------------------------------------------- 1 | 2 | # encoding: utf-8 3 | # 4 | # This Source Code Form is subject to the terms of the Mozilla Public 5 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 6 | # You can obtain one at http://mozilla.org/MPL/2.0/. 7 | # 8 | # Author: Kyle Lahnakoski (kyle@lahnakoski.com) 9 | # 10 | 11 | from __future__ import absolute_import, division, unicode_literals 12 | 13 | import os 14 | import sys 15 | from unittest import TestCase 16 | from moz_sql_parser import sql_parser 17 | from moz_sql_parser.debugs import DEBUG 18 | 19 | _ensure_imported = sql_parser 20 | 21 | 22 | class TestSimple(TestCase): 23 | """ 24 | THESE TESTS ARE FOR VERIFYING THE STATE OF THE REPO, NOT HTE STATE OF THE CODE 25 | """ 26 | 27 | def test_recursion_limit(self): 28 | if os.environ.get('TRAVIS_BRANCH') == 'master': 29 | limit = sys.getrecursionlimit() 30 | self.assertEqual(limit, 1500) 31 | 32 | def test_debug_is_off(self): 33 | self.assertFalse(DEBUG, "Turn off debugging") 34 | 35 | -------------------------------------------------------------------------------- /tests/test_resources.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # 3 | # This Source Code Form is subject to the terms of the Mozilla Public 4 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 5 | # You can obtain one at http://mozilla.org/MPL/2.0/. 6 | # 7 | # Author: Kyle Lahnakoski (kyle@lahnakoski.com) 8 | # 9 | from __future__ import absolute_import, division, unicode_literals 10 | 11 | import os 12 | from unittest import TestCase, skipIf 13 | 14 | from moz_sql_parser import parse 15 | 16 | IS_MASTER = os.environ.get('TRAVIS_BRANCH') == "master" 17 | 18 | 19 | class TestResources(TestCase): 20 | def test_001(self): 21 | sql = "SELECT * FROM test1" 22 | result = parse(sql) 23 | expected = {"from": "test1", "select": "*"} 24 | self.assertEqual(result, expected) 25 | 26 | def test_002(self): 27 | sql = "SELECT * FROM test1, test2" 28 | result = parse(sql) 29 | expected = {"from": ["test1", "test2"], "select": "*"} 30 | self.assertEqual(result, expected) 31 | 32 | def test_003(self): 33 | sql = "SELECT * FROM test2, test1" 34 | result = parse(sql) 35 | expected = {"from": ["test2", "test1"], "select": "*"} 36 | self.assertEqual(result, expected) 37 | 38 | def test_004(self): 39 | sql = "SELECT f1 FROM test1" 40 | result = parse(sql) 41 | expected = {"from": "test1", "select": {"value": "f1"}} 42 | self.assertEqual(result, expected) 43 | 44 | def test_005(self): 45 | sql = "SELECT f2 FROM test1" 46 | result = parse(sql) 47 | expected = {"from": "test1", "select": {"value": "f2"}} 48 | self.assertEqual(result, expected) 49 | 50 | def test_006(self): 51 | sql = "SELECT f2, f1 FROM test1" 52 | result = parse(sql) 53 | expected = {"from": "test1", "select": [{"value": "f2"}, {"value": "f1"}]} 54 | self.assertEqual(result, expected) 55 | 56 | def test_007(self): 57 | sql = "SELECT f1, f2 FROM test1" 58 | result = parse(sql) 59 | expected = {"from": "test1", "select": [{"value": "f1"}, {"value": "f2"}]} 60 | self.assertEqual(result, expected) 61 | 62 | def test_008(self): 63 | sql = "SELECT *, * FROM test1" 64 | result = parse(sql) 65 | expected = {"from": "test1", "select": ["*", "*"]} 66 | self.assertEqual(result, expected) 67 | 68 | def test_009(self): 69 | sql = "SELECT *, min(f1,f2), max(f1,f2) FROM test1" 70 | result = parse(sql) 71 | expected = { 72 | "from": "test1", 73 | "select": [ 74 | "*", 75 | {"value": {"min": ["f1", "f2"]}}, 76 | {"value": {"max": ["f1", "f2"]}} 77 | ] 78 | } 79 | self.assertEqual(result, expected) 80 | 81 | def test_010(self): 82 | sql = "SELECT 'one', *, 'two', * FROM test1" 83 | result = parse(sql) 84 | expected = { 85 | "from": "test1", 86 | "select": [ 87 | {"value": {"literal": "one"}}, 88 | "*", 89 | {"value": {"literal": "two"}}, 90 | "*" 91 | ] 92 | } 93 | self.assertEqual(result, expected) 94 | 95 | def test_014(self): 96 | sql = "SELECT *, 'hi' FROM test1, test2" 97 | result = parse(sql) 98 | expected = { 99 | "from": ["test1", "test2"], 100 | "select": [ 101 | "*", 102 | {"value": {"literal": "hi"}} 103 | ] 104 | } 105 | self.assertEqual(result, expected) 106 | 107 | def test_015(self): 108 | sql = "SELECT 'one', *, 'two', * FROM test1, test2" 109 | result = parse(sql) 110 | expected = { 111 | "from": ["test1", "test2"], 112 | "select": [ 113 | {"value": {"literal": "one"}}, 114 | "*", 115 | {"value": {"literal": "two"}}, 116 | "*" 117 | ] 118 | } 119 | self.assertEqual(result, expected) 120 | 121 | def test_016(self): 122 | sql = "SELECT test1.f1, test2.r1 FROM test1, test2" 123 | result = parse(sql) 124 | expected = { 125 | "from": ["test1", "test2"], 126 | "select": [ 127 | {"value": "test1.f1"}, 128 | {"value": "test2.r1"} 129 | ] 130 | } 131 | self.assertEqual(result, expected) 132 | 133 | def test_017(self): 134 | sql = "SELECT test1.f1, test2.r1 FROM test2, test1" 135 | result = parse(sql) 136 | expected = { 137 | "from": ["test2", "test1"], 138 | "select": [ 139 | {"value": "test1.f1"}, 140 | {"value": "test2.r1"} 141 | ] 142 | } 143 | self.assertEqual(result, expected) 144 | 145 | def test_019(self): 146 | sql = "SELECT * FROM test1 AS a, test1 AS b" 147 | result = parse(sql) 148 | expected = { 149 | "from": [ 150 | {"value": "test1", "name": "a"}, 151 | {"value": "test1", "name": "b"} 152 | ], 153 | "select": "*" 154 | } 155 | self.assertEqual(result, expected) 156 | 157 | def test_020(self): 158 | sql = "SELECT max(test1.f1,test2.r1), min(test1.f2,test2.r2)\nFROM test2, test1" 159 | result = parse(sql) 160 | expected = { 161 | "from": ["test2", "test1"], 162 | "select": [ 163 | {"value": {"max": ["test1.f1", "test2.r1"]}}, 164 | {"value": {"min": ["test1.f2", "test2.r2"]}} 165 | ] 166 | } 167 | self.assertEqual(result, expected) 168 | 169 | def test_021(self): 170 | sql = "SELECT min(test1.f1,test2.r1), max(test1.f2,test2.r2)\nFROM test1, test2" 171 | result = parse(sql) 172 | expected = { 173 | "from": ["test1", "test2"], 174 | "select": [ 175 | {"value": {"min": ["test1.f1", "test2.r1"]}}, 176 | {"value": {"max": ["test1.f2", "test2.r2"]}} 177 | ] 178 | } 179 | self.assertEqual(result, expected) 180 | 181 | def test_022(self): 182 | sql = "SELECT count(f1,f2) FROM test1" 183 | result = parse(sql) 184 | expected = { 185 | "from": "test1", 186 | "select": {"value": {"count": ["f1", "f2"]}} 187 | } 188 | self.assertEqual(result, expected) 189 | 190 | def test_023(self): 191 | sql = "SELECT count(f1) FROM test1" 192 | result = parse(sql) 193 | expected = { 194 | "from": "test1", 195 | "select": {"value": {"count": "f1"}} 196 | 197 | } 198 | self.assertEqual(result, expected) 199 | 200 | def test_024(self): 201 | sql = "SELECT Count() FROM test1" 202 | result = parse(sql) 203 | expected = { 204 | "from": "test1", 205 | "select": {"value": {"count": {}}} 206 | } 207 | self.assertEqual(result, expected) 208 | 209 | def test_025(self): 210 | sql = "SELECT COUNT(*) FROM test1" 211 | result = parse(sql) 212 | expected = { 213 | "from": "test1", 214 | "select": {"value": {"count": "*"}} 215 | } 216 | self.assertEqual(result, expected) 217 | 218 | def test_026(self): 219 | sql = "SELECT COUNT(*)+1 FROM test1" 220 | result = parse(sql) 221 | expected = { 222 | "from": "test1", 223 | "select": {"value": {"add": [{"count": "*"}, 1]}} 224 | } 225 | self.assertEqual(result, expected) 226 | 227 | def test_027(self): 228 | sql = "SELECT count(*),count(a),count(b) FROM t3" 229 | result = parse(sql) 230 | expected = { 231 | "from": "t3", 232 | "select":[ 233 | {"value": {"count": "*"}}, 234 | {"value": {"count": "a"}}, 235 | {"value": {"count": "b"}} 236 | ] 237 | } 238 | self.assertEqual(result, expected) 239 | 240 | def test_028(self): 241 | sql = "SELECT count(*),count(a),count(b) FROM t4" 242 | result = parse(sql) 243 | expected = { 244 | "from": "t4", 245 | "select":[ 246 | {"value": {"count": "*"}}, 247 | {"value": {"count": "a"}}, 248 | {"value": {"count": "b"}} 249 | ] 250 | } 251 | self.assertEqual(result, expected) 252 | 253 | def test_029(self): 254 | sql = "SELECT count(*),count(a),count(b) FROM t4 WHERE b=5" 255 | result = parse(sql) 256 | expected = { 257 | "from": "t4", 258 | "select":[ 259 | {"value": {"count": "*"}}, 260 | {"value": {"count": "a"}}, 261 | {"value": {"count": "b"}} 262 | ], 263 | "where": {"eq": ["b", 5]} 264 | } 265 | self.assertEqual(result, expected) 266 | 267 | def test_030(self): 268 | sql = "SELECT min(*) FROM test1" 269 | result = parse(sql) 270 | expected = { 271 | "from": "test1", 272 | "select": {"value": {"min":"*"}} 273 | } 274 | self.assertEqual(result, expected) 275 | 276 | def test_031(self): 277 | sql = "SELECT Min(f1) FROM test1" 278 | result = parse(sql) 279 | expected = { 280 | "from": "test1", 281 | "select": {"value": {"min": "f1"}} 282 | } 283 | self.assertEqual(result, expected) 284 | 285 | def test_032(self): 286 | sql = "SELECT MIN(f1,f2) FROM test1" 287 | result = parse(sql) 288 | expected = { 289 | "from": "test1", 290 | "select": {"value": {"min": ["f1", "f2"]}} 291 | } 292 | self.assertEqual(result, expected) 293 | 294 | def test_033(self): 295 | sql = "SELECT coalesce(min(a),'xyzzy') FROM t3" 296 | result = parse(sql) 297 | expected = { 298 | "from": "t3", 299 | "select": {"value": {"coalesce": [{"min": "a"}, {"literal": "xyzzy"}]}} 300 | } 301 | self.assertEqual(result, expected) 302 | 303 | def test_034(self): 304 | sql = "SELECT min(coalesce(a,'xyzzy')) FROM t3" 305 | result = parse(sql) 306 | expected = { 307 | "from": "t3", 308 | "select": {"value": {"min": {"coalesce": ["a", {"literal": "xyzzy"}]}}} 309 | } 310 | self.assertEqual(result, expected) 311 | 312 | def test_035(self): 313 | sql = "SELECT min(b), min(b) FROM t4" 314 | result = parse(sql) 315 | expected = { 316 | "from": "t4", 317 | "select": [{"value": {"min": "b"}}, {"value": {"min": "b"}}] 318 | } 319 | self.assertEqual(result, expected) 320 | 321 | def test_036(self): 322 | sql = "SELECT MAX(*) FROM test1" 323 | result = parse(sql) 324 | expected = { 325 | "from": "test1", 326 | "select": {"value": {"max": "*"}} 327 | } 328 | self.assertEqual(result, expected) 329 | 330 | def test_037(self): 331 | sql = "SELECT Max(f1) FROM test1" 332 | result = parse(sql) 333 | expected = { 334 | "from": "test1", 335 | "select": {"value": {"max": "f1"}} 336 | } 337 | self.assertEqual(result, expected) 338 | 339 | def test_038(self): 340 | sql = "SELECT max(f1,f2) FROM test1" 341 | result = parse(sql) 342 | expected = { 343 | "from": "test1", 344 | "select": {"value": {"max": ["f1", "f2"]}} 345 | } 346 | self.assertEqual(result, expected) 347 | 348 | def test_039(self): 349 | sql = "SELECT MAX(f1,f2)+1 FROM test1" 350 | result = parse(sql) 351 | expected = { 352 | "from": "test1", 353 | "select": {"value": {"add": [{"max": ["f1", "f2"]}, 1]}} 354 | } 355 | self.assertEqual(result, expected) 356 | 357 | def test_040(self): 358 | sql = "SELECT MAX(f1)+1 FROM test1" 359 | result = parse(sql) 360 | expected = { 361 | "from": "test1", 362 | "select": {"value": {"add": [{"max": "f1"}, 1]}} 363 | } 364 | self.assertEqual(result, expected) 365 | 366 | def test_041(self): 367 | # 0123456789012345678901234567890123456789 368 | sql = "SELECT coalesce(max(a),'xyzzy') FROM t3" 369 | result = parse(sql) 370 | expected = { 371 | "from": "t3", 372 | "select": {"value": {"coalesce": [{"max": "a"}, {"literal": "xyzzy"}]}} 373 | } 374 | self.assertEqual(result, expected) 375 | 376 | def test_042(self): 377 | sql = "SELECT max(coalesce(a,'xyzzy')) FROM t3" 378 | result = parse(sql) 379 | expected = { 380 | "from": "t3", 381 | "select": {"value": {"max": {"coalesce": ["a", {"literal": "xyzzy"}]}}} 382 | } 383 | self.assertEqual(result, expected) 384 | 385 | def test_043(self): 386 | sql = "SELECT SUM(*) FROM test1" 387 | result = parse(sql) 388 | expected = { 389 | "from": "test1", 390 | "select": {"value": {"sum":"*"}} 391 | } 392 | self.assertEqual(result, expected) 393 | 394 | def test_044(self): 395 | sql = "SELECT Sum(f1) FROM test1" 396 | result = parse(sql) 397 | expected = { 398 | "from": "test1", 399 | "select": {"value": {"sum":"f1"}} 400 | } 401 | self.assertEqual(result, expected) 402 | 403 | def test_045(self): 404 | sql = "SELECT sum(f1,f2) FROM test1" 405 | result = parse(sql) 406 | expected = { 407 | "from": "test1", 408 | "select": {"value": {"sum": ["f1", "f2"]}} 409 | } 410 | self.assertEqual(result, expected) 411 | 412 | def test_046(self): 413 | sql = "SELECT SUM(f1)+1 FROM test1" 414 | result = parse(sql) 415 | expected = { 416 | "from": "test1", 417 | "select": {"value": {"add": [{"sum": "f1"}, 1]}} 418 | } 419 | self.assertEqual(result, expected) 420 | 421 | def test_047(self): 422 | sql = "SELECT sum(a) FROM t3" 423 | result = parse(sql) 424 | expected = { 425 | "from": "t3", 426 | "select": {"value": {"sum": "a"}} 427 | } 428 | self.assertEqual(result, expected) 429 | 430 | def test_048(self): 431 | sql = "SELECT XYZZY(f1) FROM test1" 432 | result = parse(sql) 433 | expected = { 434 | "from": "test1", 435 | "select": {"value": {"xyzzy": "f1"}} 436 | } 437 | self.assertEqual(result, expected) 438 | 439 | def test_049(self): 440 | sql = "SELECT SUM(min(f1,f2)) FROM test1" 441 | result = parse(sql) 442 | expected = { 443 | "from": "test1", 444 | "select": {"value": {"sum": {"min": ["f1", "f2"]}}} 445 | } 446 | self.assertEqual(result, expected) 447 | 448 | def test_050(self): 449 | sql = "SELECT SUM(min(f1)) FROM test1" 450 | result = parse(sql) 451 | expected = { 452 | "from": "test1", 453 | "select": {"value": {"sum": {"min": "f1"}}} 454 | } 455 | self.assertEqual(result, expected) 456 | 457 | def test_052(self): 458 | sql = "SELECT f1 FROM test1 WHERE f1<11" 459 | result = parse(sql) 460 | expected = { 461 | "from": "test1", 462 | "select": {"value": "f1"}, 463 | "where": {"lt": ["f1", 11]} 464 | } 465 | self.assertEqual(result, expected) 466 | 467 | def test_053(self): 468 | sql = "SELECT f1 FROM test1 WHERE f1<=11" 469 | result = parse(sql) 470 | expected = { 471 | "from": "test1", 472 | "select": {"value": "f1"}, 473 | "where": {"lte": ["f1", 11]} 474 | } 475 | self.assertEqual(result, expected) 476 | 477 | def test_054(self): 478 | sql = "SELECT f1 FROM test1 WHERE f1=11" 479 | result = parse(sql) 480 | expected = { 481 | "from": "test1", 482 | "select": {"value": "f1"}, 483 | "where": {"eq": ["f1", 11]} 484 | } 485 | self.assertEqual(result, expected) 486 | 487 | def test_055(self): 488 | sql = "SELECT f1 FROM test1 WHERE f1>=11" 489 | result = parse(sql) 490 | expected = { 491 | "from": "test1", 492 | "select": {"value": "f1"}, 493 | "where": {"gte": ["f1", 11]} 494 | } 495 | self.assertEqual(result, expected) 496 | 497 | def test_056(self): 498 | sql = "SELECT f1 FROM test1 WHERE f1>11" 499 | result = parse(sql) 500 | expected = { 501 | "from": "test1", 502 | "select": {"value": "f1"}, 503 | "where": {"gt": ["f1", 11]} 504 | } 505 | self.assertEqual(result, expected) 506 | 507 | def test_057(self): 508 | sql = "SELECT f1 FROM test1 WHERE f1!=11" 509 | result = parse(sql) 510 | expected = { 511 | "from": "test1", 512 | "select": {"value": "f1"}, 513 | "where": {"neq": ["f1", 11]} 514 | } 515 | self.assertEqual(result, expected) 516 | 517 | def test_058(self): 518 | sql = "SELECT f1 FROM test1 WHERE min(f1,f2)!=11" 519 | result = parse(sql) 520 | expected = { 521 | "from": "test1", 522 | "select": {"value": "f1"}, 523 | "where": {"neq": [{"min": ["f1", "f2"]}, 11]} 524 | } 525 | self.assertEqual(result, expected) 526 | 527 | def test_059(self): 528 | sql = "SELECT f1 FROM test1 WHERE max(f1,f2)!=11" 529 | result = parse(sql) 530 | expected = { 531 | "from": "test1", 532 | "select": {"value": "f1"}, 533 | "where": {"neq": [{"max": ["f1", "f2"]}, 11]} 534 | } 535 | self.assertEqual(result, expected) 536 | 537 | def test_060(self): 538 | sql = "SELECT f1 FROM test1 WHERE count(f1,f2)!=11" 539 | result = parse(sql) 540 | expected = { 541 | "from": "test1", 542 | "select": {"value": "f1"}, 543 | "where": {"neq": [{"count": ["f1", "f2"]}, 11]} 544 | } 545 | self.assertEqual(result, expected) 546 | 547 | def test_061(self): 548 | sql = "SELECT f1 FROM test1 ORDER BY f1" 549 | result = parse(sql) 550 | expected = { 551 | "from": "test1", 552 | "select": {"value": "f1"}, 553 | "orderby": {"value": "f1"} 554 | } 555 | self.assertEqual(result, expected) 556 | 557 | def test_062(self): 558 | sql = "SELECT f1 FROM test1 ORDER BY -f1" 559 | result = parse(sql) 560 | expected = { 561 | "from": "test1", 562 | "select": {"value": "f1"}, 563 | "orderby": {"value": {"neg": "f1"}} 564 | } 565 | self.assertEqual(result, expected) 566 | 567 | def test_063(self): 568 | sql = "SELECT f1 FROM test1 ORDER BY min(f1,f2)" 569 | result = parse(sql) 570 | expected = { 571 | "from": "test1", 572 | "select": {"value": "f1"}, 573 | "orderby": {"value": {"min": ["f1", "f2"]}} 574 | } 575 | self.assertEqual(result, expected) 576 | 577 | def test_064(self): 578 | sql = "SELECT f1 FROM test1 ORDER BY min(f1)" 579 | result = parse(sql) 580 | expected = { 581 | "from": "test1", 582 | "select": {"value": "f1"}, 583 | "orderby": {"value": {"min": "f1"}} 584 | } 585 | self.assertEqual(result, expected) 586 | 587 | def test_065(self): 588 | sql = "SELECT f1 FROM test1 ORDER BY 8.4" 589 | result = parse(sql) 590 | expected = { 591 | "from": "test1", 592 | "select": {"value": "f1"}, 593 | "orderby": {"value": 8.4} 594 | } 595 | self.assertEqual(result, expected) 596 | 597 | def test_066(self): 598 | sql = "SELECT f1 FROM test1 ORDER BY '8.4'" 599 | result = parse(sql) 600 | expected = { 601 | "from": "test1", 602 | "select": {"value": "f1"}, 603 | "orderby": {"value": {"literal": "8.4"}} 604 | } 605 | self.assertEqual(result, expected) 606 | 607 | def test_067(self): 608 | sql = "SELECT * FROM t5 ORDER BY 1" 609 | result = parse(sql) 610 | expected = { 611 | "from": "t5", 612 | "select": "*", 613 | "orderby": {"value": 1} 614 | } 615 | self.assertEqual(result, expected) 616 | 617 | def test_068(self): 618 | sql = "SELECT * FROM t5 ORDER BY 2" 619 | result = parse(sql) 620 | expected = { 621 | "from": "t5", 622 | "select": "*", 623 | "orderby": {"value": 2} 624 | } 625 | self.assertEqual(result, expected) 626 | 627 | def test_069(self): 628 | sql = "SELECT * FROM t5 ORDER BY +2" 629 | result = parse(sql) 630 | expected = { 631 | "from": "t5", 632 | "select": "*", 633 | "orderby": {"value": 2} 634 | } 635 | self.assertEqual(result, expected) 636 | 637 | def test_070(self): 638 | sql = "SELECT * FROM t5 ORDER BY 2, 1 DESC" 639 | result = parse(sql) 640 | expected = { 641 | "from": "t5", 642 | "select": "*", 643 | "orderby": [{"value": 2}, {"value": 1, "sort": "desc"}] 644 | } 645 | self.assertEqual(result, expected) 646 | 647 | def test_071(self): 648 | sql = "SELECT * FROM t5 ORDER BY 1 DESC, b" 649 | result = parse(sql) 650 | expected = { 651 | "from": "t5", 652 | "select": "*", 653 | "orderby": [{"value": 1, "sort": "desc"}, {"value": "b"}] 654 | } 655 | self.assertEqual(result, expected) 656 | 657 | def test_072(self): 658 | sql = "SELECT * FROM t5 ORDER BY b DESC, 1" 659 | result = parse(sql) 660 | expected = { 661 | "from": "t5", 662 | "select": "*", 663 | "orderby": [{"value": "b", "sort": "desc"}, {"value": 1}] 664 | } 665 | self.assertEqual(result, expected) 666 | 667 | def test_073(self): 668 | sql = "SELECT max(f1) FROM test1 ORDER BY f2" 669 | result = parse(sql) 670 | expected = { 671 | "from": "test1", 672 | "select": {"value": {"max": "f1"}}, 673 | "orderby": {"value": "f2"} 674 | } 675 | self.assertEqual(result, expected) 676 | 677 | def test_078(self): 678 | sql = "SELECT A.f1, B.f1 FROM test1 as A, test1 as B\nORDER BY A.f1, B.f1" 679 | result = parse(sql) 680 | expected = { 681 | "from": [{"value": "test1", "name": "A"}, {"value": "test1", "name": "B"}], 682 | "select": [{"value": "A.f1"}, {"value": "B.f1"}], 683 | "orderby": [{"value": "A.f1"}, {"value": "B.f1"}] 684 | } 685 | self.assertEqual(result, expected) 686 | 687 | def test_086(self): 688 | # 1111111111222222222233333333334444444444555555555566666666667777777777 689 | # 01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 690 | sql = "SELECT a FROM t6 WHERE b IN\n(SELECT b FROM t6 WHERE a<='b' UNION SELECT '3' AS x\nORDER BY 1 LIMIT 1)" 691 | result = parse(sql) 692 | expected = { 693 | "from": "t6", 694 | "select": {"value": "a"}, 695 | "where": {"in": ["b", { 696 | "from": {"union": [ 697 | { 698 | "from": "t6", 699 | "select": {"value": "b"}, 700 | "where": {"lte": ["a", {"literal": "b"}]}, 701 | }, 702 | { 703 | "select": {"value": {"literal": "3"}, "name": "x"} 704 | } 705 | ]}, 706 | "orderby": {"value": 1}, 707 | "limit": 1 708 | }]} 709 | } 710 | self.assertEqual(result, expected) 711 | 712 | def test_087(self): 713 | sql = "SELECT a FROM t6 WHERE b IN\n(SELECT b FROM t6 WHERE a<='b' UNION SELECT '3' AS x\nORDER BY 1 DESC LIMIT 1)" 714 | result = parse(sql) 715 | expected = { 716 | "from": "t6", 717 | "select": {"value": "a"}, 718 | "where": {"in": ["b", { 719 | "from": {"union": [ 720 | { 721 | "from": "t6", 722 | "select": {"value": "b"}, 723 | "where": {"lte": ["a", {"literal": "b"}]}, 724 | }, 725 | { 726 | "select": {"value": {"literal": "3"}, "name": "x"} 727 | } 728 | ]}, 729 | "orderby": {"value": 1, "sort": "desc"}, 730 | "limit": 1 731 | }]} 732 | } 733 | self.assertEqual(result, expected) 734 | 735 | def test_088(self): 736 | sql = "SELECT a FROM t6 WHERE b IN\n(SELECT b FROM t6 WHERE a<='b' UNION SELECT '3' AS x\nORDER BY b LIMIT 2)\nORDER BY a" 737 | result = parse(sql) 738 | expected = { 739 | "from": "t6", 740 | "select": {"value": "a"}, 741 | "where": {"in": ["b", { 742 | "from": {"union": [ 743 | { 744 | "from": "t6", 745 | "select": {"value": "b"}, 746 | "where": {"lte": ["a", {"literal": "b"}]}, 747 | }, 748 | { 749 | "select": {"value": {"literal": "3"}, "name": "x"} 750 | } 751 | ]}, 752 | "orderby": {"value": "b"}, 753 | "limit": 2 754 | }]}, 755 | "orderby": {"value": "a"} 756 | } 757 | self.assertEqual(result, expected) 758 | 759 | def test_089(self): 760 | sql = "SELECT a FROM t6 WHERE b IN\n(SELECT b FROM t6 WHERE a<='b' UNION SELECT '3' AS x\nORDER BY x DESC LIMIT 2)\nORDER BY a" 761 | result = parse(sql) 762 | expected = { 763 | "from": "t6", 764 | "select": {"value": "a"}, 765 | "where": {"in": ["b", { 766 | "from": {"union": [ 767 | { 768 | "from": "t6", 769 | "select": {"value": "b"}, 770 | "where": {"lte": ["a", {"literal": "b"}]}, 771 | }, 772 | { 773 | "select": {"value": {"literal": "3"}, "name": "x"} 774 | } 775 | ]}, 776 | "orderby": {"value": "x", "sort": "desc"}, 777 | "limit": 2 778 | }]}, 779 | "orderby": {"value": "a"} 780 | } 781 | self.assertEqual(result, expected) 782 | 783 | def test_090(self): 784 | sql = "SELECT f1 FROM test1 UNION SELECT WHERE" 785 | self.assertRaises(Exception, parse, sql) 786 | 787 | def test_091(self): 788 | sql = "SELECT f1 FROM test1 as 'hi', test2 as" 789 | self.assertRaises(Exception, parse, sql) 790 | 791 | def test_093(self): 792 | sql = "SELECT count(f1,f2) FROM test1" 793 | result = parse(sql) 794 | expected = { 795 | "from": "test1", 796 | "select": {"value": {"count": ["f1", "f2"]}} 797 | } 798 | self.assertEqual(result, expected) 799 | 800 | def test_094(self): 801 | sql = "SELECT f1 FROM test1 ORDER BY f2, f1" 802 | result = parse(sql) 803 | expected = { 804 | "from": "test1", 805 | "select": {"value": "f1"}, 806 | "orderby": [{"value": "f2"}, {"value": "f1"}] 807 | } 808 | self.assertEqual(result, expected) 809 | 810 | def test_095(self): 811 | sql = "SELECT f1 FROM test1 WHERE 4.3+2.4 OR 1 ORDER BY f1" 812 | result = parse(sql) 813 | expected = { 814 | "from": "test1", 815 | "select": {"value": "f1"}, 816 | "where": {"or": [{"add": [4.3, 2.4]}, 1]}, 817 | "orderby": {"value": "f1"} 818 | } 819 | self.assertEqual(result, expected) 820 | 821 | @skipIf(IS_MASTER, "does not work on master, not enough stack space") 822 | def test_096(self): 823 | # 01234567890123456789012345678901234567890123456789012345678901 234567890123456789 824 | sql = "SELECT f1 FROM test1 WHERE ('x' || f1) BETWEEN 'x10' AND 'x20'\nORDER BY f1" 825 | result = parse(sql) 826 | expected = { 827 | "from": "test1", 828 | "select": {"value": "f1"}, 829 | "where": {"between": [ 830 | {"concat": [{"literal": "x"}, "f1"]}, 831 | {"literal": "x10"}, 832 | {"literal": "x20"} 833 | ]}, 834 | "orderby": {"value": "f1"} 835 | } 836 | self.assertEqual(result, expected) 837 | 838 | def test_097(self): 839 | sql = "SELECT f1 FROM test1 WHERE 5-3==2\nORDER BY f1" 840 | result = parse(sql) 841 | expected = { 842 | "from": "test1", 843 | "select": {"value": "f1"}, 844 | "where": {"eq": [{"sub": [5, 3]}, 2]}, 845 | "orderby": {"value": "f1"} 846 | } 847 | self.assertEqual(result, expected) 848 | 849 | @skipIf(IS_MASTER, "does not work on master, not enough stack space") 850 | def test_098(self): 851 | sql = "SELECT coalesce(f1/(f1-11),'x'),\ncoalesce(min(f1/(f1-11),5),'y'),\ncoalesce(max(f1/(f1-33),6),'z')\nFROM test1 ORDER BY f1" 852 | result = parse(sql) 853 | expected = { 854 | "from": "test1", 855 | "orderby": {"value": "f1"}, 856 | "select": [ 857 | {"value": {"coalesce": [{"div": ["f1", {"sub": ["f1", 11]}]}, {"literal": "x"}]}}, 858 | {"value": {"coalesce": [{"min": [{"div": ["f1", {"sub": ["f1", 11]}]}, 5]}, {"literal": "y"}]}}, 859 | {"value": {"coalesce": [{"max": [{"div": ["f1", {"sub": ["f1", 33]}]}, 6]}, {"literal": "z"}]}}, 860 | ] 861 | } 862 | self.assertEqual(result, expected) 863 | 864 | def test_099(self): 865 | sql = "SELECT min(1,2,3), -max(1,2,3)\nFROM test1 ORDER BY f1" 866 | result = parse(sql) 867 | expected = { 868 | "from": "test1", 869 | "orderby": {"value":"f1"}, 870 | "select": [ 871 | {"value":{"min": [1, 2, 3]}}, 872 | {"value":{"neg": {"max": [1, 2, 3]}}} 873 | ] 874 | } 875 | self.assertEqual(result, expected) 876 | 877 | def test_100(self): 878 | sql = "SELECT * FROM test1 WHERE f1<0" 879 | result = parse(sql) 880 | expected = { 881 | "from": "test1", 882 | "select": "*", 883 | "where": {"lt": ["f1", 0]} 884 | } 885 | self.assertEqual(result, expected) 886 | 887 | def test_103(self): 888 | sql = "SELECT * FROM test1 WHERE f1<(select count(*) from test2)" 889 | result = parse(sql) 890 | expected = { 891 | "from": "test1", 892 | "select": "*", 893 | "where": {"lt": ["f1", { 894 | "from": "test2", 895 | "select": {"value": {"count": "*"}} 896 | }]} 897 | } 898 | self.assertEqual(result, expected) 899 | 900 | def test_104(self): 901 | sql = "SELECT * FROM test1 ORDER BY f1" 902 | result = parse(sql) 903 | expected = { 904 | "from": "test1", 905 | "select": "*", 906 | "orderby": {"value": "f1"} 907 | } 908 | self.assertEqual(result, expected) 909 | 910 | def test_105(self): 911 | sql = "SELECT * FROM test1 WHERE f1<0 ORDER BY f1" 912 | result = parse(sql) 913 | expected = { 914 | "from": "test1", 915 | "select": "*", 916 | "where": {"lt": ["f1", 0]}, 917 | "orderby": {"value": "f1"} 918 | } 919 | self.assertEqual(result, expected) 920 | 921 | def test_106(self): 922 | sql = "SELECT f1 AS x FROM test1 ORDER BY x" 923 | result = parse(sql) 924 | expected = { 925 | "from": "test1", 926 | "select": {"value": "f1", "name": "x"}, 927 | "orderby": {"value": "x"} 928 | } 929 | self.assertEqual(result, expected) 930 | 931 | def test_107(self): 932 | # 0123456789012345678901234567890123456789 933 | sql = "SELECT f1 AS x FROM test1 ORDER BY -x" 934 | result = parse(sql) 935 | expected = { 936 | "from": "test1", 937 | "select": {"value": "f1", "name": "x"}, 938 | "orderby": {"value": {"neg": "x"}} 939 | } 940 | self.assertEqual(result, expected) 941 | 942 | def test_108(self): 943 | sql = "SELECT f1-23 AS x FROM test1 ORDER BY abs(x)" 944 | result = parse(sql) 945 | expected = { 946 | "from": "test1", 947 | "select": {"value": {"sub": ["f1", 23]}, "name": "x"}, 948 | "orderby": {"value": {"abs": "x"}} 949 | } 950 | self.assertEqual(result, expected) 951 | 952 | def test_109(self): 953 | sql = "SELECT f1-23 AS x FROM test1 ORDER BY -abs(x)" 954 | result = parse(sql) 955 | expected = { 956 | "from": "test1", 957 | "select": {"value": {"sub": ["f1", 23]}, "name": "x"}, 958 | "orderby": {"value": {"neg": {"abs": "x"}}} 959 | } 960 | self.assertEqual(result, expected) 961 | 962 | def test_110(self): 963 | sql = "SELECT f1-22 AS x, f2-22 as y FROM test1" 964 | result = parse(sql) 965 | expected = { 966 | "from": "test1", 967 | "select": [ 968 | {"value": {"sub": ["f1", 22]}, "name": "x"}, 969 | {"value": {"sub": ["f2", 22]}, "name": "y"}, 970 | ] 971 | } 972 | self.assertEqual(result, expected) 973 | 974 | def test_111(self): 975 | sql = "SELECT f1-22 AS x, f2-22 as y FROM test1 WHERE x>0 AND y<50" 976 | result = parse(sql) 977 | expected = { 978 | "from": "test1", 979 | "select": [ 980 | {"value": {"sub": ["f1", 22]}, "name": "x"}, 981 | {"value": {"sub": ["f2", 22]}, "name": "y"}, 982 | ], 983 | "where": {"and": [ 984 | {"gt": ["x", 0]}, 985 | {"lt": ["y", 50]} 986 | ]} 987 | } 988 | self.assertEqual(result, expected) 989 | 990 | def test_112(self): 991 | sql = "SELECT f1 COLLATE nocase AS x FROM test1 ORDER BY x" 992 | result = parse(sql) 993 | expected = { 994 | "from": "test1", 995 | "select": {"name":"x", "value": {"collate nocase": "f1"}}, 996 | "orderby": {"value": "x"} 997 | } 998 | self.assertEqual(result, expected) 999 | 1000 | def test_113(self): 1001 | sql = "SELECT * FROM t3, t4" 1002 | result = parse(sql) 1003 | expected = { 1004 | "from": ["t3", "t4"], 1005 | "select": "*" 1006 | } 1007 | self.assertEqual(result, expected) 1008 | 1009 | def test_114(self): 1010 | sql = "SELECT t3.*, t4.b FROM t3, t4" 1011 | result = parse(sql) 1012 | expected = { 1013 | "from": ["t3", "t4"], 1014 | "select": [{"value": "t3.*"}, {"value": "t4.b"}] 1015 | } 1016 | self.assertEqual(result, expected) 1017 | 1018 | def test_115(self): 1019 | sql = "SELECT \"t3\".*, t4.b FROM t3, t4" 1020 | result = parse(sql) 1021 | expected = { 1022 | "from": ["t3", "t4"], 1023 | "select": [{"value": "t3.*"}, {"value": "t4.b"}] 1024 | } 1025 | self.assertEqual(result, expected) 1026 | 1027 | def test_116(self): 1028 | sql = "SELECT t3.b, t4.* FROM t3, t4" 1029 | result = parse(sql) 1030 | expected = { 1031 | "from": ["t3", "t4"], 1032 | "select": [{"value": "t3.b"}, {"value": "t4.*"}] 1033 | } 1034 | self.assertEqual(result, expected) 1035 | 1036 | def test_118a(self): 1037 | sql = "SELECT * FROM t3 UNION SELECT 3 AS 'a', 4 ORDER BY a" 1038 | self.assertRaises(Exception, parse, sql) 1039 | 1040 | def test_118b(self): 1041 | sql = "SELECT * FROM t3 UNION SELECT 3 AS \"a\", 4 ORDER BY a" 1042 | result = parse(sql) 1043 | expected = { 1044 | "from":{"union": [ 1045 | {"from": "t3", "select": "*"}, 1046 | {"select": [{"value": 3, "name": "a"}, {"value": 4}]} 1047 | ]}, 1048 | "orderby": {"value": "a"} 1049 | } 1050 | self.assertEqual(result, expected) 1051 | 1052 | def test_118c(self): 1053 | sql = "SELECT * FROM t3 UNION SELECT 3 AS a, 4 ORDER BY a" 1054 | result = parse(sql) 1055 | expected = { 1056 | "from":{"union": [ 1057 | {"from": "t3", "select": "*"}, 1058 | {"select": [{"value": 3, "name": "a"}, {"value": 4}]} 1059 | ]}, 1060 | "orderby": {"value": "a"} 1061 | } 1062 | self.assertEqual(result, expected) 1063 | 1064 | def test_119(self): 1065 | sql = "SELECT 3, 4 UNION SELECT * FROM t3" 1066 | result = parse(sql) 1067 | expected = {"union": [ 1068 | {"select": [{"value": 3}, {"value": 4}]}, 1069 | { 1070 | "from": "t3", 1071 | "select": "*" 1072 | } 1073 | ]} 1074 | self.assertEqual(result, expected) 1075 | 1076 | def test_120(self): 1077 | sql = "SELECT * FROM t3 WHERE a=(SELECT 1)" 1078 | result = parse(sql) 1079 | expected = { 1080 | "from": "t3", 1081 | "select": "*", 1082 | "where": {"eq": ["a", {"select": {"value": 1}}]} 1083 | } 1084 | self.assertEqual(result, expected) 1085 | 1086 | def test_121(self): 1087 | sql = "SELECT * FROM t3 WHERE a=(SELECT 2)" 1088 | result = parse(sql) 1089 | expected = { 1090 | "from": "t3", 1091 | "select": "*", 1092 | "where": {"eq": ["a", {"select": {"value": 2}}]} 1093 | } 1094 | self.assertEqual(result, expected) 1095 | 1096 | def test_125(self): 1097 | # 11111111112222222222333333333344444444445555555555666666666677777777778888888888 1098 | # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 1099 | sql = "SELECT count(\n(SELECT a FROM abc WHERE a = NULL AND b >= upper.c)\n) FROM abc AS upper" 1100 | result = parse(sql) 1101 | expected = { 1102 | "from": {"value": "abc", "name": "upper"}, 1103 | "select": {"value": {"count": { 1104 | "from": "abc", 1105 | "select": {"value": "a"}, 1106 | "where": {"and": [ 1107 | {"missing": "a"}, 1108 | {"gte": ["b", "upper.c"]} 1109 | ]} 1110 | }}} 1111 | 1112 | } 1113 | self.assertEqual(result, expected) 1114 | 1115 | def test_126(self): 1116 | sql = "SELECT name FROM sqlite_master WHERE type = 'table'" 1117 | result = parse(sql) 1118 | expected = { 1119 | "from": "sqlite_master", 1120 | "select": {"value": "name"}, 1121 | "where": {"eq": ["type", {"literal": "table"}]} 1122 | } 1123 | self.assertEqual(result, expected) 1124 | 1125 | def test_128(self): 1126 | sql = "SELECT 10 IN (SELECT rowid FROM sqlite_master)" 1127 | result = parse(sql) 1128 | expected = { 1129 | "select": {"value": {"in": [ 1130 | 10, 1131 | {"from": "sqlite_master", "select": {"value": "rowid"}} 1132 | ]}}, 1133 | } 1134 | self.assertEqual(result, expected) 1135 | 1136 | def test_131(self): 1137 | sql = "SELECT 2 IN (SELECT a FROM t1)" 1138 | result = parse(sql) 1139 | expected = { 1140 | "select": {"value": {"in": [ 1141 | 2, 1142 | {"from": "t1", "select": {"value": "a"}} 1143 | ]}}, 1144 | } 1145 | self.assertEqual(result, expected) 1146 | 1147 | def test_139(self): 1148 | sql = "SELECT count(*) FROM tbl2" 1149 | result = parse(sql) 1150 | expected = {"from": "tbl2", "select": {"value": {"count":"*"}}} 1151 | self.assertEqual(result, expected) 1152 | 1153 | def test_140(self): 1154 | sql = "SELECT count(*) FROM tbl2 WHERE f2>1000" 1155 | result = parse(sql) 1156 | expected = { 1157 | "from": "tbl2", 1158 | "select": {"value": {"count": "*"}}, 1159 | "where": {"gt": ["f2", 1000]} 1160 | } 1161 | self.assertEqual(result, expected) 1162 | 1163 | def test_141(self): 1164 | sql = "SELECT f1 FROM tbl2 WHERE 1000=f2" 1165 | result = parse(sql) 1166 | expected = { 1167 | "from": "tbl2", 1168 | "select": {"value": "f1"}, 1169 | "where": {"eq": [1000, "f2"]} 1170 | } 1171 | self.assertEqual(result, expected) 1172 | 1173 | def test_144(self): 1174 | sql = "SELECT f1 FROM tbl2 WHERE f2=1000" 1175 | result = parse(sql) 1176 | expected = { 1177 | "from": "tbl2", 1178 | "select": {"value": "f1"}, 1179 | "where": {"eq": ["f2", 1000]} 1180 | } 1181 | self.assertEqual(result, expected) 1182 | 1183 | def test_145(self): 1184 | sql = "SELECT * FROM tbl2 WHERE 1000=f2" 1185 | result = parse(sql) 1186 | expected = { 1187 | "from": "tbl2", 1188 | "select": "*", 1189 | "where": {"eq": [1000, "f2"]} 1190 | } 1191 | self.assertEqual(result, expected) 1192 | 1193 | def test_146(self): 1194 | sql = "SELECT * FROM tbl2 WHERE f2=1000" 1195 | result = parse(sql) 1196 | expected = { 1197 | "from": "tbl2", 1198 | "select": "*", 1199 | "where": {"eq": ["f2", 1000]} 1200 | } 1201 | self.assertEqual(result, expected) 1202 | 1203 | def test_148(self): 1204 | sql = "SELECT f1 FROM tbl2 WHERE f2==2000" 1205 | result = parse(sql) 1206 | expected = { 1207 | "from": "tbl2", 1208 | "select": {"value": "f1"}, 1209 | "where": {"eq": ["f2", 2000]} 1210 | } 1211 | self.assertEqual(result, expected) 1212 | 1213 | def test_150(self): 1214 | sql = "SELECT * FROM aa CROSS JOIN bb WHERE b" 1215 | result = parse(sql) 1216 | expected = { 1217 | "from": ["aa", {"cross join": "bb"}], 1218 | "select": "*", 1219 | "where": "b" 1220 | } 1221 | self.assertEqual(result, expected) 1222 | 1223 | def test_151(self): 1224 | sql = "SELECT * FROM aa CROSS JOIN bb WHERE NOT b" 1225 | result = parse(sql) 1226 | expected = { 1227 | "from": ["aa", {"cross join": "bb"}], 1228 | "select": "*", 1229 | "where": {"not": "b"} 1230 | } 1231 | self.assertEqual(result, expected) 1232 | 1233 | def test_152(self): 1234 | sql = "SELECT * FROM aa, bb WHERE min(a,b)" 1235 | result = parse(sql) 1236 | expected = { 1237 | "from": ["aa", "bb"], 1238 | "select": "*", 1239 | "where": {"min": ["a", "b"]} 1240 | } 1241 | self.assertEqual(result, expected) 1242 | 1243 | def test_153(self): 1244 | sql = "SELECT * FROM aa, bb WHERE NOT min(a,b)" 1245 | result = parse(sql) 1246 | expected = { 1247 | "from": ["aa", "bb"], 1248 | "select": "*", 1249 | "where": {"not": {"min": ["a", "b"]}} 1250 | } 1251 | self.assertEqual(result, expected) 1252 | 1253 | def test_154(self): 1254 | sql = "SELECT * FROM aa, bb WHERE CASE WHEN a=b-1 THEN 1 END" 1255 | result = parse(sql) 1256 | expected = { 1257 | "from":["aa","bb"], 1258 | "select": "*", 1259 | "where": {"case": 1260 | {"when": {"eq": ["a", {"sub": ["b", 1]}]}, "then": 1} 1261 | } 1262 | } 1263 | self.assertEqual(result, expected) 1264 | 1265 | def test_155(self): 1266 | sql = "SELECT * FROM aa, bb WHERE CASE WHEN a=b-1 THEN 0 ELSE 1 END" 1267 | result = parse(sql) 1268 | expected = { 1269 | "from":["aa","bb"], 1270 | "select": "*", 1271 | "where": {"case": [ 1272 | {"when": {"eq": ["a", {"sub": ["b", 1]}]}, "then": 0}, 1273 | 1 1274 | ]} 1275 | } 1276 | self.assertEqual(result, expected) 1277 | 1278 | def test_158(self): 1279 | sql = "SELECT DISTINCT log FROM t1 ORDER BY log" 1280 | result = parse(sql) 1281 | expected = { 1282 | "from":"t1", 1283 | "select": {"value": {"distinct": "log"}}, 1284 | "orderby": {"value":"log"} 1285 | } 1286 | self.assertEqual(result, expected) 1287 | 1288 | def test_160(self): 1289 | sql = "SELECT min(n),min(log),max(n),max(log),sum(n),sum(log),avg(n),avg(log)\nFROM t1" 1290 | result = parse(sql) 1291 | expected = { 1292 | "from": "t1", 1293 | "select": [ 1294 | {"value": {"min": "n"}}, 1295 | {"value": {"min": "log"}}, 1296 | {"value": {"max": "n"}}, 1297 | {"value": {"max": "log"}}, 1298 | {"value": {"sum": "n"}}, 1299 | {"value": {"sum": "log"}}, 1300 | {"value": {"avg": "n"}}, 1301 | {"value": {"avg": "log"}} 1302 | ] 1303 | } 1304 | self.assertEqual(result, expected) 1305 | 1306 | def test_161(self): 1307 | sql = "SELECT max(n)/avg(n), max(log)/avg(log) FROM t1" 1308 | result = parse(sql) 1309 | expected = { 1310 | "from": "t1", 1311 | "select": [ 1312 | {"value": {"div": [{"max": "n"}, {"avg": "n"}]}}, 1313 | {"value": {"div": [{"max": "log"}, {"avg": "log"}]}}, 1314 | ] 1315 | } 1316 | self.assertEqual(result, expected) 1317 | 1318 | def test_162(self): 1319 | # 012345678901234567890123456789012345678901234567890123456789 1320 | sql = "SELECT log, count(*) FROM t1 GROUP BY log ORDER BY log" 1321 | result = parse(sql) 1322 | expected = { 1323 | "from":"t1", 1324 | "select":[{"value":"log"}, {"value":{"count":"*"}}], 1325 | "groupby": {"value": "log"}, 1326 | "orderby": {"value": "log"} 1327 | } 1328 | self.assertEqual(result, expected) 1329 | 1330 | def test_163(self): 1331 | sql = "SELECT log, min(n) FROM t1 GROUP BY log ORDER BY log" 1332 | result = parse(sql) 1333 | expected = { 1334 | "from":"t1", 1335 | "select":[{"value":"log"}, {"value":{"min":"n"}}], 1336 | "groupby": {"value": "log"}, 1337 | "orderby": {"value": "log"} 1338 | } 1339 | self.assertEqual(result, expected) 1340 | 1341 | def test_164(self): 1342 | sql = "SELECT log, avg(n) FROM t1 GROUP BY log ORDER BY log" 1343 | result = parse(sql) 1344 | expected = { 1345 | "from":"t1", 1346 | "select":[{"value":"log"}, {"value":{"avg":"n"}}], 1347 | "groupby": {"value": "log"}, 1348 | "orderby": {"value": "log"} 1349 | } 1350 | self.assertEqual(result, expected) 1351 | 1352 | def test_165(self): 1353 | sql = "SELECT log, avg(n)+1 FROM t1 GROUP BY log ORDER BY log" 1354 | result = parse(sql) 1355 | expected = { 1356 | "from":"t1", 1357 | "select": [{"value": "log"}, {"value": {"add": [{"avg": "n"}, 1]}}], 1358 | "groupby": {"value": "log"}, 1359 | "orderby": {"value": "log"} 1360 | } 1361 | self.assertEqual(result, expected) 1362 | 1363 | def test_166(self): 1364 | sql = "SELECT log, avg(n)-min(n) FROM t1 GROUP BY log ORDER BY log" 1365 | result = parse(sql) 1366 | expected = { 1367 | "from":"t1", 1368 | "select": [{"value": "log"}, {"value": {"sub": [{"avg": "n"}, {"min": "n"}]}}], 1369 | "groupby": {"value": "log"}, 1370 | "orderby": {"value": "log"} 1371 | } 1372 | self.assertEqual(result, expected) 1373 | 1374 | def test_167(self): 1375 | sql = "SELECT log*2+1, avg(n)-min(n) FROM t1 GROUP BY log ORDER BY log" 1376 | result = parse(sql) 1377 | expected = { 1378 | "from": "t1", 1379 | "select": [ 1380 | {"value": {"add": [{"mul": ["log", 2]}, 1]}}, 1381 | {"value": {"sub": [{"avg": "n"}, {"min": "n"}]}} 1382 | ], 1383 | "groupby": {"value": "log"}, 1384 | "orderby": {"value": "log"} 1385 | } 1386 | self.assertEqual(result, expected) 1387 | 1388 | def test_168(self): 1389 | sql = "SELECT log*2+1 as x, count(*) FROM t1 GROUP BY x ORDER BY x" 1390 | result = parse(sql) 1391 | expected = { 1392 | "from": "t1", 1393 | "select": [ 1394 | {"value": {"add": [{"mul": ["log", 2]}, 1]}, "name": "x"}, 1395 | {"value": {"count": "*"}} 1396 | ], 1397 | "groupby": {"value": "x"}, 1398 | "orderby": {"value": "x"} 1399 | } 1400 | self.assertEqual(result, expected) 1401 | 1402 | def test_169(self): 1403 | sql = "SELECT log*2+1 AS x, count(*) AS y FROM t1 GROUP BY x ORDER BY y, x" 1404 | result = parse(sql) 1405 | expected = { 1406 | "from": "t1", 1407 | "select": [ 1408 | {"value": {"add": [{"mul": ["log", 2]}, 1]}, "name": "x"}, 1409 | {"value": {"count": "*"}, "name": "y"} 1410 | ], 1411 | "groupby": {"value": "x"}, 1412 | "orderby": [{"value": "y"}, {"value": "x"}] 1413 | } 1414 | self.assertEqual(result, expected) 1415 | 1416 | def test_170(self): 1417 | sql = "SELECT log*2+1 AS x, count(*) AS y FROM t1 GROUP BY x ORDER BY 10-(x+y)" 1418 | result = parse(sql) 1419 | expected = { 1420 | "from": "t1", 1421 | "select": [ 1422 | {"value": {"add": [{"mul": ["log", 2]}, 1]}, "name": "x"}, 1423 | {"value": {"count": "*"}, "name": "y"} 1424 | ], 1425 | "groupby": {"value": "x"}, 1426 | "orderby": {"value": {"sub":[10, {"add":["x", "y"]}]}} 1427 | } 1428 | self.assertEqual(result, expected) 1429 | 1430 | def test_171(self): 1431 | sql = "SELECT log, count(*) FROM t1 GROUP BY something HAVING log>=4" 1432 | result = parse(sql) 1433 | expected = { 1434 | "from": "t1", 1435 | "select": [ 1436 | {"value": "log"}, 1437 | {"value": {"count": "*"}} 1438 | ], 1439 | "groupby": {"value": "something"}, 1440 | "having": {"gte": ["log", 4]} 1441 | } 1442 | self.assertEqual(result, expected) 1443 | 1444 | def test_172(self): 1445 | sql = "SELECT log, count(*) FROM t1 GROUP BY log HAVING log>=4 ORDER BY log" 1446 | result = parse(sql) 1447 | expected = { 1448 | "from": "t1", 1449 | "select": [ 1450 | {"value": "log"}, 1451 | {"value": {"count": "*"}} 1452 | ], 1453 | "groupby": {"value": "log"}, 1454 | "having": {"gte": ["log", 4]}, 1455 | "orderby": {"value": "log"} 1456 | } 1457 | self.assertEqual(result, expected) 1458 | 1459 | def test_173(self): 1460 | sql = "SELECT log, count(*) FROM t1\nGROUP BY log\nHAVING count(*)>=4\nORDER BY log" 1461 | result = parse(sql) 1462 | expected = { 1463 | "from": "t1", 1464 | "select": [ 1465 | {"value": "log"}, 1466 | {"value": {"count": "*"}} 1467 | ], 1468 | "groupby": {"value": "log"}, 1469 | "having": {"gte": [{"count":"*"}, 4]}, 1470 | "orderby": {"value": "log"} 1471 | } 1472 | self.assertEqual(result, expected) 1473 | 1474 | def test_174(self): 1475 | sql = "SELECT log, count(*) FROM t1\nGROUP BY log\nHAVING count(*)>=4\nORDER BY max(n)+0" 1476 | result = parse(sql) 1477 | expected = { 1478 | "from": "t1", 1479 | "select": [ 1480 | {"value": "log"}, 1481 | {"value": {"count": "*"}} 1482 | ], 1483 | "groupby": {"value": "log"}, 1484 | "having": {"gte": [{"count":"*"}, 4]}, 1485 | "orderby": {"value": {"add":[{"max":"n"}, 0]}} 1486 | } 1487 | self.assertEqual(result, expected) 1488 | 1489 | def test_175(self): 1490 | sql = "SELECT log AS x, count(*) AS y FROM t1\nGROUP BY x\nHAVING y>=4\nORDER BY max(n)+0" 1491 | result = parse(sql) 1492 | expected = { 1493 | "from": "t1", 1494 | "select": [ 1495 | {"value": "log", "name":"x"}, 1496 | {"value": {"count": "*"}, "name":"y"} 1497 | ], 1498 | "groupby": {"value": "x"}, 1499 | "having": {"gte": ["y", 4]}, 1500 | "orderby": {"value": {"add":[{"max":"n"}, 0]}} 1501 | } 1502 | self.assertEqual(result, expected) 1503 | 1504 | def test_176(self): 1505 | sql = "SELECT log AS x FROM t1\nGROUP BY x\nHAVING count(*)>=4\nORDER BY max(n)+0" 1506 | result = parse(sql) 1507 | expected = { 1508 | "from": "t1", 1509 | "select": {"value": "log", "name": "x"}, 1510 | "groupby": {"value": "x"}, 1511 | "having": {"gte": [{"count": "*"}, 4]}, 1512 | "orderby": {"value": {"add": [{"max": "n"}, 0]}} 1513 | } 1514 | self.assertEqual(result, expected) 1515 | 1516 | def test_177(self): 1517 | sql = "SELECT log, count(*), avg(n), max(n+log*2) FROM t1\nGROUP BY log\nORDER BY max(n+log*2)+0, avg(n)+0" 1518 | result = parse(sql) 1519 | expected = { 1520 | "from": "t1", 1521 | "select": [ 1522 | {"value": "log"}, 1523 | {"value": {"count": "*"}}, 1524 | {"value": {"avg": "n"}}, 1525 | {"value": {"max": {"add": ["n", {"mul": ["log", 2]}]}}} 1526 | ], 1527 | "groupby": {"value": "log"}, 1528 | "orderby": [ 1529 | {"value": {"add": [ 1530 | {"max": {"add": ["n", {"mul": ["log", 2]}]}}, 1531 | 0 1532 | ]}}, 1533 | {"value": {"add": [{"avg": "n"}, 0]}} 1534 | ] 1535 | } 1536 | self.assertEqual(result, expected) 1537 | 1538 | def test_178(self): 1539 | sql = "SELECT log, count(*), avg(n), max(n+log*2) FROM t1\nGROUP BY log\nORDER BY max(n+log*2)+0, min(log,avg(n))+0" 1540 | result = parse(sql) 1541 | expected = { 1542 | "from": "t1", 1543 | "select": [ 1544 | {"value": "log"}, 1545 | {"value": {"count": "*"}}, 1546 | {"value": {"avg": "n"}}, 1547 | {"value": {"max": {"add": ["n", {"mul": ["log", 2]}]}}} 1548 | ], 1549 | "groupby": {"value": "log"}, 1550 | "orderby": [ 1551 | {"value": {"add": [ 1552 | {"max": {"add": ["n", {"mul": ["log", 2]}]}}, 1553 | 0 1554 | ]}}, 1555 | {"value": {"add": [{"min": ["log", {"avg": "n"}]}, 0]}} 1556 | ] 1557 | } 1558 | self.assertEqual(result, expected) 1559 | 1560 | def test_179(self): 1561 | sql = "SELECT log, min(n) FROM t1 GROUP BY log ORDER BY log" 1562 | result = parse(sql) 1563 | expected = { 1564 | "from": "t1", 1565 | "select": [{"value": "log"}, {"value": {"min": "n"}}], 1566 | "groupby": {"value": "log"}, 1567 | "orderby": {"value": "log"} 1568 | } 1569 | self.assertEqual(result, expected) 1570 | 1571 | def test_180(self): 1572 | sql = "SELECT log, min(n) FROM t1 GROUP BY log ORDER BY log DESC" 1573 | result = parse(sql) 1574 | expected = { 1575 | "from": "t1", 1576 | "select": [{"value": "log"}, {"value": {"min": "n"}}], 1577 | "groupby": {"value": "log"}, 1578 | "orderby": {"value": "log", "sort":"desc"} 1579 | } 1580 | self.assertEqual(result, expected) 1581 | 1582 | def test_181(self): 1583 | sql = "SELECT log, min(n) FROM t1 GROUP BY log ORDER BY 1" 1584 | result = parse(sql) 1585 | expected = { 1586 | "from": "t1", 1587 | "select": [{"value": "log"}, {"value": {"min": "n"}}], 1588 | "groupby": {"value": "log"}, 1589 | "orderby": {"value": 1} 1590 | } 1591 | self.assertEqual(result, expected) 1592 | 1593 | def test_183(self): 1594 | sql = "SELECT log, min(n) FROM t1 GROUP BY log ORDER BY log" 1595 | result = parse(sql) 1596 | expected = { 1597 | "from": "t1", 1598 | "select": [{"value": "log"}, {"value": {"min": "n"}}], 1599 | "groupby": {"value": "log"}, 1600 | "orderby": {"value": "log"} 1601 | } 1602 | self.assertEqual(result, expected) 1603 | 1604 | def test_184(self): 1605 | # 012345678901234567890123456789012345678901234567890123456789 1606 | sql = "SELECT log, min(n) FROM t1 GROUP BY log ORDER BY log DESC" 1607 | result = parse(sql) 1608 | expected = { 1609 | "from": "t1", 1610 | "select": [{"value": "log"}, {"value": {"min": "n"}}], 1611 | "groupby": {"value": "log"}, 1612 | "orderby": {"value": "log", "sort": "desc"} 1613 | } 1614 | self.assertEqual(result, expected) 1615 | 1616 | def test_185(self): 1617 | sql = "SELECT log, min(n) FROM t1 GROUP BY log ORDER BY 1" 1618 | result = parse(sql) 1619 | expected = { 1620 | "from": "t1", 1621 | "select": [{"value": "log"}, {"value": {"min": "n"}}], 1622 | "groupby": {"value": "log"}, 1623 | "orderby": {"value": 1} 1624 | } 1625 | self.assertEqual(result, expected) 1626 | 1627 | def test_186(self): 1628 | sql = "SELECT log, min(n) FROM t1 GROUP BY log ORDER BY 1 DESC" 1629 | result = parse(sql) 1630 | expected = { 1631 | "from": "t1", 1632 | "select": [{"value": "log"}, {"value": {"min": "n"}}], 1633 | "groupby": {"value": "log"}, 1634 | "orderby": {"value": 1, "sort": "desc"} 1635 | } 1636 | self.assertEqual(result, expected) 1637 | 1638 | def test_187(self): 1639 | # 01234567890123456789012345678901234567890123456789 1640 | sql = "SELECT a, sum(b) FROM t2 WHERE b=5 GROUP BY a" 1641 | result = parse(sql) 1642 | expected = { 1643 | "from": "t2", 1644 | "select": [{"value": "a"}, {"value": {"sum": "b"}}], 1645 | "groupby": {"value": "a"}, 1646 | "where": {"eq": ["b", 5]} 1647 | } 1648 | self.assertEqual(result, expected) 1649 | 1650 | def test_188(self): 1651 | sql = "SELECT a, sum(b) FROM t2 WHERE b=5" 1652 | result = parse(sql) 1653 | expected = { 1654 | "from": "t2", 1655 | "select": [{"value": "a"}, {"value": {"sum": "b"}}], 1656 | "where": {"eq": ["b", 5]} 1657 | } 1658 | self.assertEqual(result, expected) 1659 | 1660 | def test_189(self): 1661 | sql = "SELECT typeof(sum(a3)) FROM a" 1662 | result = parse(sql) 1663 | expected = { 1664 | "from": "a", 1665 | "select": {"value": {"typeof": {"sum": "a3"}}} 1666 | } 1667 | self.assertEqual(result, expected) 1668 | 1669 | def test_190(self): 1670 | sql = "SELECT typeof(sum(a3)) FROM a GROUP BY a1" 1671 | result = parse(sql) 1672 | expected = { 1673 | "from": "a", 1674 | "select": {"value": {"typeof": {"sum": "a3"}}}, 1675 | "groupby": {"value": "a1"} 1676 | } 1677 | self.assertEqual(result, expected) 1678 | -------------------------------------------------------------------------------- /tests/test_simple.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # 3 | # This Source Code Form is subject to the terms of the Mozilla Public 4 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 5 | # You can obtain one at http://mozilla.org/MPL/2.0/. 6 | # 7 | # Author: Kyle Lahnakoski (kyle@lahnakoski.com) 8 | # 9 | 10 | from __future__ import absolute_import, division, unicode_literals 11 | 12 | import json 13 | from unittest import TestCase, skipIf 14 | 15 | from moz_sql_parser import parse 16 | from test_resources import IS_MASTER 17 | 18 | try: 19 | from tests.util import assertRaises 20 | except ImportError: 21 | from .util import assertRaises # RELATIVE IMPORT SO WE CAN RUN IN pyLibrary 22 | 23 | 24 | class TestSimple(TestCase): 25 | 26 | def test_two_tables(self): 27 | result = parse("SELECT * from XYZZY, ABC") 28 | expected = { 29 | "select": "*", 30 | "from": ["XYZZY", "ABC"] 31 | } 32 | self.assertEqual(result, expected) 33 | 34 | def test_dot_table_name(self): 35 | result = parse("select * from SYS.XYZZY") 36 | expected = { 37 | "select": "*", 38 | "from": "SYS.XYZZY" 39 | } 40 | self.assertEqual(result, expected) 41 | 42 | def test_select_one_column(self): 43 | result = parse("Select A from dual") 44 | expected = { 45 | "select": {"value": "A"}, 46 | "from": "dual" 47 | } 48 | self.assertEqual(result, expected) 49 | 50 | def test_select_quote(self): 51 | result = parse("Select '''' from dual") 52 | expected = { 53 | "select": {"value": {"literal": "'"}}, 54 | "from": "dual" 55 | } 56 | self.assertEqual(result, expected) 57 | 58 | def test_select_quoted_name(self): 59 | result = parse('Select a "@*#&", b as test."g.g".c from dual') 60 | expected = { 61 | "select": [ 62 | {"name": "@*#&", "value": "a"}, 63 | {"name": "test.g.g.c", "value": "b"} 64 | ], 65 | "from": "dual" 66 | } 67 | self.assertEqual(result, expected) 68 | 69 | def test_select_expression(self): 70 | # 1 2 3 4 5 6 71 | # 0123456789012345678901234567890123456789012345678901234567890123456789 72 | result = parse("SELECT a + b/2 + 45*c + (2/d) from dual") 73 | expected = { 74 | "select": {"value": {"add": [ 75 | "a", 76 | {"div": ["b", 2]}, 77 | {"mul": [45, "c"]}, 78 | {"div": [2, "d"]} 79 | ]}}, 80 | "from": "dual" 81 | } 82 | self.assertEqual(result, expected) 83 | 84 | def test_select_underscore_name(self): 85 | # 1 2 3 4 5 6 86 | # 0123456789012345678901234567890123456789012345678901234567890123456789 87 | result = parse("select _id from dual") 88 | expected = { 89 | "select": {"value": "_id"}, 90 | "from": "dual" 91 | } 92 | self.assertEqual(result, expected) 93 | 94 | def test_select_dots_names(self): 95 | # 1 2 3 4 5 6 96 | # 0123456789012345678901234567890123456789012345678901234567890123456789 97 | result = parse("select a.b.c._d from dual") 98 | expected = { 99 | "select": {"value": "a.b.c._d"}, 100 | "from": "dual" 101 | } 102 | self.assertEqual(result, expected) 103 | 104 | def test_select_many_column(self): 105 | result = parse("Select a, b, c from dual") 106 | expected = { 107 | "select": [ 108 | {"value": "a"}, 109 | {"value": "b"}, 110 | {"value": "c"} 111 | ], 112 | "from": "dual" 113 | } 114 | self.assertEqual(result, expected) 115 | 116 | def test_bad_select1(self): 117 | assertRaises('Expected select', lambda: parse("se1ect A, B, C from dual")) 118 | 119 | def test_bad_select2(self): 120 | assertRaises('Expected {{expression1 [{[as] column_name1}]}', lambda: parse("Select &&& FROM dual")) 121 | 122 | def test_bad_from(self): 123 | assertRaises('(at char 20)', lambda: parse("select A, B, C frum dual")) 124 | 125 | def test_incomplete1(self): 126 | assertRaises('Expected {{expression1 [{[as] column_name1}]}', lambda: parse("SELECT")) 127 | 128 | def test_incomplete2(self): 129 | assertRaises("", lambda: parse("SELECT * FROM")) 130 | 131 | def test_where_neq(self): 132 | # 1 2 3 4 5 6 133 | # 0123456789012345678901234567890123456789012345678901234567890123456789 134 | result = parse("SELECT * FROM dual WHERE a<>'test'") 135 | expected = { 136 | "select": "*", 137 | "from": "dual", 138 | "where": {"neq": ["a", {"literal": "test"}]} 139 | } 140 | self.assertEqual(result, expected) 141 | 142 | def test_where_in(self): 143 | result = parse("SELECT a FROM dual WHERE a in ('r', 'g', 'b')") 144 | expected = { 145 | "select": {"value": "a"}, 146 | "from": "dual", 147 | "where": {"in": [ 148 | "a", 149 | {"literal": ["r", "g", "b"]} 150 | ]} 151 | } 152 | self.assertEqual(result, expected) 153 | 154 | def test_where_in_and_in(self): 155 | # 1 2 3 4 5 6 156 | # 0123456789012345678901234567890123456789012345678901234567890123456789 157 | result = parse("SELECT a FROM dual WHERE a in ('r', 'g', 'b') AND b in (10, 11, 12)") 158 | expected = { 159 | "select": {"value": "a"}, 160 | "from": "dual", 161 | "where": {"and": [ 162 | {"in": [ 163 | "a", 164 | {"literal": ["r", "g", "b"]} 165 | ]}, 166 | {"in": [ 167 | "b", 168 | [10, 11, 12] 169 | ]} 170 | ]} 171 | } 172 | self.assertEqual(result, expected) 173 | 174 | def test_eq(self): 175 | result = parse("SELECT a, b FROM t1, t2 WHERE t1.a=t2.b") 176 | expected = { 177 | "select": [ 178 | {"value": "a"}, 179 | {"value": "b"} 180 | ], 181 | "from": ["t1", "t2"], 182 | "where": {"eq": ["t1.a", "t2.b"]} 183 | } 184 | self.assertEqual(result, expected) 185 | 186 | def test_is_null(self): 187 | result = parse("SELECT a, b FROM t1 WHERE t1.a IS NULL") 188 | expected = { 189 | "select": [ 190 | {"value": "a"}, 191 | {"value": "b"} 192 | ], 193 | "from": "t1", 194 | "where": {"missing": "t1.a"} 195 | } 196 | self.assertEqual(result, expected) 197 | 198 | def test_is_not_null(self): 199 | result = parse("SELECT a, b FROM t1 WHERE t1.a IS NOT NULL") 200 | expected = { 201 | "select": [ 202 | {"value": "a"}, 203 | {"value": "b"} 204 | ], 205 | "from": "t1", 206 | "where": {"exists": "t1.a"} 207 | } 208 | self.assertEqual(result, expected) 209 | 210 | def test_groupby(self): 211 | result = parse("select a, count(1) as b from mytable group by a") 212 | expected = { 213 | "select": [ 214 | {"value": "a"}, 215 | {"name": "b", "value": {"count": 1}} 216 | ], 217 | "from": "mytable", 218 | "groupby": {"value": "a"} 219 | } 220 | self.assertEqual(result, expected) 221 | 222 | def test_function(self): 223 | # 0 1 2 224 | # 0123456789012345678901234567890 225 | result = parse("select count(1) from mytable") 226 | expected = { 227 | "select": {"value": {"count": 1}}, 228 | "from": "mytable" 229 | } 230 | self.assertEqual(result, expected) 231 | 232 | def test_function_underscore(self): 233 | # 0 1 2 234 | # 0123456789012345678901234567890 235 | result = parse("select DATE_TRUNC('2019-04-12', WEEK) from mytable") 236 | expected = { 237 | 'select': {'value': {'date_trunc': [{'literal': '2019-04-12'}, 'WEEK']}}, 238 | "from": "mytable" 239 | } 240 | self.assertEqual(result, expected) 241 | 242 | def test_order_by(self): 243 | result = parse("select count(1) from dual order by a") 244 | expected = { 245 | "select": {"value": {"count": 1}}, 246 | "from": "dual", 247 | "orderby": {"value": "a"} 248 | } 249 | self.assertEqual(result, expected) 250 | 251 | def test_order_by_asc(self): 252 | result = parse("select count(1) from dual order by a asc") 253 | expected = { 254 | "select": {"value": {"count": 1}}, 255 | "from": "dual", 256 | "orderby": {"value": "a", "sort": "asc"} 257 | } 258 | self.assertEqual(result, expected) 259 | 260 | def test_neg_or_precedence(self): 261 | result = parse("select B,C from table1 where A=-900 or B=100") 262 | expected = { 263 | 'from': 'table1', 264 | 'where': {'or': [{'eq': ['A', -900]}, {'eq': ['B', 100]}]}, 265 | 'select': [{'value': 'B'}, {'value': 'C'}] 266 | } 267 | self.assertEqual(result, expected) 268 | 269 | def test_negative_number(self): 270 | result = parse("select a from table1 where A=-900") 271 | expected = { 272 | 'from': 'table1', 273 | 'where': {'eq': ['A', -900]}, 274 | 'select': {'value': 'a'} 275 | } 276 | self.assertEqual(result, expected) 277 | 278 | def test_like_in_where(self): 279 | result = parse("select a from table1 where A like '%20%'") 280 | expected = { 281 | 'from': 'table1', 282 | 'where': {'like': ['A', {"literal": "%20%"}]}, 283 | 'select': {'value': 'a'} 284 | } 285 | self.assertEqual(result, expected) 286 | 287 | def test_not_like_in_where(self): 288 | result = parse("select a from table1 where A not like '%20%'") 289 | expected = { 290 | 'from': 'table1', 291 | 'where': {'nlike': ['A', {"literal": "%20%"}]}, 292 | 'select': {'value': 'a'} 293 | } 294 | self.assertEqual(result, expected) 295 | 296 | def test_like_in_select(self): 297 | result = parse("select case when A like 'bb%' then 1 else 0 end as bb from table1") 298 | expected = { 299 | 'from': 'table1', 300 | 'select': {'name': 'bb', 'value': {"case": [{"when": {"like": ["A", {"literal": "bb%"}]}, "then": 1}, 0]}} 301 | } 302 | self.assertEqual(result, expected) 303 | 304 | def test_not_like_in_select(self): 305 | result = parse("select case when A not like 'bb%' then 1 else 0 end as bb from table1") 306 | expected = { 307 | 'from': 'table1', 308 | 'select': {'name': 'bb', 'value': {"case": [{"when": {"nlike": ["A", {"literal": "bb%"}]}, "then": 1}, 0]}} 309 | } 310 | self.assertEqual(result, expected) 311 | 312 | def test_like_from_pr16(self): 313 | result = parse("select * from trade where school LIKE '%shool' and name='abc' and id IN ('1','2')") 314 | expected = { 315 | 'from': 'trade', 316 | 'where': {"and": [ 317 | {"like": ["school", {"literal": "%shool"}]}, 318 | {"eq": ["name", {"literal": "abc"}]}, 319 | {"in": ["id", {"literal": ["1", "2"]}]} 320 | ]}, 321 | 'select': "*" 322 | } 323 | self.assertEqual(result, expected) 324 | 325 | def test_in_expression(self): 326 | result = parse("select * from task where repo.branch.name in ('try', 'mozilla-central')") 327 | expected = { 328 | 'from': 'task', 329 | 'select': "*", 330 | "where": {"in": [ 331 | "repo.branch.name", 332 | {"literal": ["try", "mozilla-central"]} 333 | ]} 334 | } 335 | self.assertEqual(result, expected) 336 | 337 | def test_not_in_expression(self): 338 | result = parse("select * from task where repo.branch.name not in ('try', 'mozilla-central')") 339 | expected = { 340 | 'from': 'task', 341 | 'select': "*", 342 | "where": {"nin": [ 343 | "repo.branch.name", 344 | {"literal": ["try", "mozilla-central"]} 345 | ]} 346 | } 347 | self.assertEqual(result, expected) 348 | 349 | def test_joined_table_name(self): 350 | result = parse("SELECT * FROM table1 t1 JOIN table3 t3 ON t1.id = t3.id") 351 | 352 | expected = { 353 | 'from': [ 354 | {'name': 't1', 'value': 'table1'}, 355 | {'on': {'eq': ['t1.id', 't3.id']}, 'join': {'name': 't3', 'value': 'table3'}} 356 | ], 357 | 'select': '*' 358 | } 359 | self.assertEqual(result, expected) 360 | 361 | def test_not_equal(self): 362 | # 0 1 2 3 4 5 6 7 8 363 | # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 364 | result = parse("select * from task where build.product is not null and build.product!='firefox'") 365 | 366 | expected = { 367 | 'select': '*', 368 | 'from': "task", 369 | "where": {"and": [ 370 | {"exists": "build.product"}, 371 | {"neq": ["build.product", {"literal": "firefox"}]} 372 | ]} 373 | } 374 | self.assertEqual(result, expected) 375 | 376 | 377 | def test_pr19(self): 378 | result = parse("select empid from emp where ename like 's%' ") 379 | expected = { 380 | 'from': 'emp', 381 | 'where': {"like": ["ename", {"literal": "s%"}]}, 382 | 'select': {"value": "empid"} 383 | } 384 | self.assertEqual(result, expected) 385 | 386 | def test_backtick(self): 387 | result = parse("SELECT `user ID` FROM a") 388 | expected = {'select': {'value': 'user ID'}, 'from': 'a'} 389 | self.assertEqual(result, expected) 390 | 391 | def test_backtick_escape(self): 392 | result = parse("SELECT `user`` ID` FROM a") 393 | expected = {'select': {'value': 'user` ID'}, 'from': 'a'} 394 | self.assertEqual(result, expected) 395 | 396 | def test_left_join(self): 397 | result = parse("SELECT t1.field1 FROM t1 LEFT JOIN t2 ON t1.id = t2.id") 398 | expected = {'select': {'value': 't1.field1'}, 399 | 'from': ['t1', 400 | {'left join': 't2', 'on': {'eq': ['t1.id', 't2.id']}}]} 401 | self.assertEqual(result, expected) 402 | 403 | def test_multiple_left_join(self): 404 | result = parse("SELECT t1.field1 " 405 | "FROM t1 " 406 | "LEFT JOIN t2 ON t1.id = t2.id " 407 | "LEFT JOIN t3 ON t1.id = t3.id" 408 | ) 409 | expected = {'select': {'value': 't1.field1'}, 410 | 'from': ['t1', 411 | {'left join': 't2', 'on': {'eq': ['t1.id', 't2.id']}}, 412 | {'left join': 't3', 'on': {'eq': ['t1.id', 't3.id']}} 413 | ]} 414 | self.assertEqual(result, expected) 415 | 416 | def test_union(self): 417 | result = parse("SELECT b FROM t6 UNION SELECT '3' AS x ORDER BY x") 418 | expected = { 419 | "from": {'union': [ 420 | {'from': 't6', 'select': {'value': 'b'}}, 421 | {'select': {'value': {'literal': '3'}, 'name': 'x'}} 422 | ]}, 423 | 'orderby': {"value": 'x'} 424 | } 425 | self.assertEqual(result, expected) 426 | 427 | def test_left_outer_join(self): 428 | result = parse("SELECT t1.field1 FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.id") 429 | expected = {'select': {'value': 't1.field1'}, 430 | 'from': ['t1', 431 | {'left outer join': 't2', 'on': {'eq': ['t1.id', 't2.id']}}]} 432 | self.assertEqual(result, expected) 433 | 434 | def test_right_join(self): 435 | result = parse("SELECT t1.field1 FROM t1 RIGHT JOIN t2 ON t1.id = t2.id") 436 | expected = {'select': {'value': 't1.field1'}, 437 | 'from': ['t1', 438 | {'right join': 't2', 'on': {'eq': ['t1.id', 't2.id']}}]} 439 | self.assertEqual(result, expected) 440 | 441 | def test_right_outer_join(self): 442 | result = parse("SELECT t1.field1 FROM t1 RIGHT OUTER JOIN t2 ON t1.id = t2.id") 443 | expected = {'select': {'value': 't1.field1'}, 444 | 'from': ['t1', 445 | {'right outer join': 't2', 'on': {'eq': ['t1.id', 't2.id']}}]} 446 | self.assertEqual(result, expected) 447 | 448 | def test_full_join(self): 449 | result = parse("SELECT t1.field1 FROM t1 FULL JOIN t2 ON t1.id = t2.id") 450 | expected = {'select': {'value': 't1.field1'}, 451 | 'from': ['t1', 452 | {'full join': 't2', 'on': {'eq': ['t1.id', 't2.id']}}]} 453 | self.assertEqual(result, expected) 454 | 455 | def test_full_outer_join(self): 456 | result = parse("SELECT t1.field1 FROM t1 FULL OUTER JOIN t2 ON t1.id = t2.id") 457 | expected = {'select': {'value': 't1.field1'}, 458 | 'from': ['t1', 459 | {'full outer join': 't2', 'on': {'eq': ['t1.id', 't2.id']}}]} 460 | self.assertEqual(result, expected) 461 | 462 | def test_join_via_using(self): 463 | result = parse("SELECT t1.field1 FROM t1 JOIN t2 USING (id)") 464 | expected = {'select': {'value': 't1.field1'}, 465 | 'from': ['t1', 466 | {'join': 't2', 'using': 'id'}]} 467 | self.assertEqual(result, expected) 468 | 469 | def test_where_between(self): 470 | result = parse("SELECT a FROM dual WHERE a BETWEEN 1 and 2") 471 | expected = { 472 | "select": {"value": "a"}, 473 | "from": "dual", 474 | "where": {"between": ["a", 1, 2]} 475 | } 476 | self.assertEqual(result, expected) 477 | 478 | def test_where_not_between(self): 479 | result = parse("SELECT a FROM dual WHERE a NOT BETWEEN 1 and 2") 480 | expected = { 481 | "select": {"value": "a"}, 482 | "from": "dual", 483 | "where": {"not_between": ["a", 1, 2]} 484 | } 485 | self.assertEqual(result, expected) 486 | 487 | def test_select_from_select(self): 488 | # 0 1 2 3 489 | # 0123456789012345678901234567890123456789 490 | result = parse("SELECT b.a FROM ( SELECT 2 AS a ) b") 491 | expected = { 492 | 'select': {'value': 'b.a'}, 493 | 'from': { 494 | "name": "b", 495 | "value": { 496 | "select": {"value": 2, "name": "a"} 497 | } 498 | } 499 | } 500 | self.assertEqual(result, expected) 501 | 502 | def test_unicode_strings(self): 503 | result = parse("select '0:普通,1:旗舰' from mobile") 504 | expected = { 505 | 'select': {'value': {"literal": '0:普通,1:旗舰'}}, 506 | 'from': "mobile" 507 | } 508 | self.assertEqual(result, expected) 509 | 510 | def test_issue68(self): 511 | result = parse("select deflate(sum(int(mobile_price.price))) from mobile") 512 | expected = { 513 | 'select': {'value': {"deflate": {"sum": {"int": "mobile_price.price"}}}}, 514 | 'from': "mobile" 515 | } 516 | self.assertEqual(result, expected) 517 | 518 | def test_issue_90(self): 519 | result = parse("""SELECT MIN(cn.name) AS from_company 520 | FROM company_name AS cn, company_type AS ct, keyword AS k, movie_link AS ml, title AS t 521 | WHERE cn.country_code !='[pl]' AND ct.kind IS NOT NULL AND t.production_year > 1950 AND ml.movie_id = t.id 522 | """) 523 | 524 | expected = { 525 | 'select': {'value': {"min": "cn.name"}, "name": "from_company"}, 526 | 'from': [ 527 | {"value": "company_name", "name": "cn"}, 528 | {"value": "company_type", "name": "ct"}, 529 | {"value": "keyword", "name": "k"}, 530 | {"value": "movie_link", "name": "ml"}, 531 | {"value": "title", "name": "t"} 532 | ], 533 | "where": {"and": [ 534 | {"neq": ["cn.country_code", {"literal": "[pl]"}]}, 535 | {"exists": "ct.kind"}, 536 | {"gt": ["t.production_year", 1950]}, 537 | {"eq": ["ml.movie_id", "t.id"]} 538 | ]} 539 | } 540 | self.assertEqual(result, expected) 541 | 542 | def test_issue_68a(self): 543 | sql = """ 544 | SELECT * 545 | FROM aka_name AS an, cast_info AS ci, info_type AS it, link_type AS lt, movie_link AS ml, name AS n, person_info AS pi, title AS t 546 | WHERE 547 | an.name is not NULL 548 | and (an.name LIKE '%a%' or an.name LIKE 'A%') 549 | AND it.info ='mini biography' 550 | AND lt.link in ('references', 'referenced in', 'features', 'featured in') 551 | AND n.name_pcode_cf BETWEEN 'A' AND 'F' 552 | AND (n.gender = 'm' OR (n.gender = 'f' AND n.name LIKE 'A%')) 553 | AND pi.note is not NULL 554 | AND t.production_year BETWEEN 1980 AND 2010 555 | AND n.id = an.person_id 556 | AND n.id = pi.person_id 557 | AND ci.person_id = n.id 558 | AND t.id = ci.movie_id 559 | AND ml.linked_movie_id = t.id 560 | AND lt.id = ml.link_type_id 561 | AND it.id = pi.info_type_id 562 | AND pi.person_id = an.person_id 563 | AND pi.person_id = ci.person_id 564 | AND an.person_id = ci.person_id 565 | AND ci.movie_id = ml.linked_movie_id 566 | """ 567 | result = parse(sql) 568 | expected = { 569 | 'from': [ 570 | {'name': 'an', 'value': 'aka_name'}, 571 | {'name': 'ci', 'value': 'cast_info'}, 572 | {'name': 'it', 'value': 'info_type'}, 573 | {'name': 'lt', 'value': 'link_type'}, 574 | {'name': 'ml', 'value': 'movie_link'}, 575 | {'name': 'n', 'value': 'name'}, 576 | {'name': 'pi', 'value': 'person_info'}, 577 | {'name': 't', 'value': 'title'} 578 | ], 579 | 'select': '*', 580 | 'where': {'and': [ 581 | {'exists': 'an.name'}, 582 | {'or': [{'like': ['an.name', {'literal': '%a%'}]}, 583 | {'like': ['an.name', {'literal': 'A%'}]}]}, 584 | {'eq': ['it.info', {'literal': 'mini biography'}]}, 585 | {'in': ['lt.link', 586 | {'literal': ['references', 587 | 'referenced in', 588 | 'features', 589 | 'featured in']}]}, 590 | {'between': ['n.name_pcode_cf', 591 | {'literal': 'A'}, 592 | {'literal': 'F'}]}, 593 | {'or': [{'eq': ['n.gender', {'literal': 'm'}]}, 594 | {'and': [{'eq': ['n.gender', {'literal': 'f'}]}, 595 | {'like': ['n.name', {'literal': 'A%'}]}]}]}, 596 | {'exists': 'pi.note'}, 597 | {'between': ['t.production_year', 1980, 2010]}, 598 | {'eq': ['n.id', 'an.person_id']}, 599 | {'eq': ['n.id', 'pi.person_id']}, 600 | {'eq': ['ci.person_id', 'n.id']}, 601 | {'eq': ['t.id', 'ci.movie_id']}, 602 | {'eq': ['ml.linked_movie_id', 't.id']}, 603 | {'eq': ['lt.id', 'ml.link_type_id']}, 604 | {'eq': ['it.id', 'pi.info_type_id']}, 605 | {'eq': ['pi.person_id', 'an.person_id']}, 606 | {'eq': ['pi.person_id', 'ci.person_id']}, 607 | {'eq': ['an.person_id', 'ci.person_id']}, 608 | {'eq': ['ci.movie_id', 'ml.linked_movie_id']} 609 | ]} 610 | } 611 | self.assertEqual(result, expected) 612 | 613 | def test_issue_68b(self): 614 | # 0 1 2 3 4 5 6 7 8 9 615 | # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 616 | sql = "SELECT COUNT(*) AS CNT FROM test.tb WHERE (id IN (unhex('1'),unhex('2'))) AND status=1;" 617 | result = parse(sql) 618 | expected = { 619 | "select": {"value": {"count": "*"}, "name": "CNT"}, 620 | "from": "test.tb", 621 | "where": {"and": [ 622 | {"in": ["id", [ 623 | {"unhex": {"literal": "1"}}, 624 | {"unhex": {"literal": "2"}} 625 | ]]}, 626 | {"eq": ["status", 1]} 627 | 628 | ]} 629 | } 630 | self.assertEqual(result, expected) 631 | 632 | def test_binary_and(self): 633 | sql = "SELECT * FROM t WHERE c & 4;" 634 | result = parse(sql) 635 | expected = { 636 | "select": "*", 637 | "from": "t", 638 | "where": {"binary_and": ["c", 4]} 639 | } 640 | self.assertEqual(result, expected) 641 | 642 | def test_binary_or(self): 643 | sql = "SELECT * FROM t WHERE c | 4;" 644 | result = parse(sql) 645 | expected = { 646 | "select": "*", 647 | "from": "t", 648 | "where": {"binary_or": ["c", 4]} 649 | } 650 | self.assertEqual(result, expected) 651 | 652 | def test_binary_not(self): 653 | sql = "SELECT * FROM t WHERE ~c;" 654 | result = parse(sql) 655 | expected = { 656 | "select": "*", 657 | "from": "t", 658 | "where": {"binary_not": "c"} 659 | } 660 | self.assertEqual(result, expected) 661 | 662 | def test_or_and(self): 663 | sql = "SELECT * FROM dual WHERE a OR b AND c" 664 | result = parse(sql) 665 | expected = { 666 | "select": "*", 667 | "from": "dual", 668 | "where": {"or": ["a", {"and": ["b", "c"]}]} 669 | } 670 | self.assertEqual(result, expected) 671 | 672 | def test_and_or(self): 673 | sql = "SELECT * FROM dual WHERE a AND b or c" 674 | result = parse(sql) 675 | expected = { 676 | "select": "*", 677 | "from": "dual", 678 | "where": {"or": [{"and": ["a", "b"]}, "c"]} 679 | } 680 | self.assertEqual(result, expected) 681 | 682 | def test_underscore_function1(self): 683 | sql = "SELECT _()" 684 | result = parse(sql) 685 | expected = { 686 | "select": {"value": {"_": {}}}, 687 | } 688 | self.assertEqual(result, expected) 689 | 690 | def test_underscore_function2(self): 691 | sql = "SELECT _a(a$b)" 692 | result = parse(sql) 693 | expected = { 694 | "select": {"value": {"_a": "a$b"}}, 695 | } 696 | self.assertEqual(result, expected) 697 | 698 | def test_underscore_function3(self): 699 | sql = "SELECT _$$_(a, b$)" 700 | result = parse(sql) 701 | expected = { 702 | "select": {"value": {"_$$_": ["a", "b$"]}}, 703 | } 704 | self.assertEqual(result, expected) 705 | 706 | def test_union_all1(self): 707 | # 0 1 2 3 4 5 6 7 8 9 708 | # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 709 | result = parse("SELECT b FROM t6 UNION ALL SELECT '3' AS x ORDER BY x") 710 | expected = { 711 | "from": {'union_all': [ 712 | {'from': 't6', 'select': {'value': 'b'}}, 713 | {'select': {'value': {'literal': '3'}, 'name': 'x'}} 714 | ]}, 715 | 'orderby': {"value": 'x'} 716 | } 717 | self.assertEqual(result, expected) 718 | 719 | def test_union_all2(self): 720 | result = parse("SELECT b UNION ALL SELECT c") 721 | expected = {'union_all': [ 722 | {'select': {'value': 'b'}}, 723 | {'select': {'value': 'c'}}, 724 | ]} 725 | self.assertEqual(result, expected) 726 | 727 | def test_issue106(self): 728 | result = parse(""" 729 | SELECT * 730 | FROM MyTable 731 | GROUP BY Col 732 | HAVING AVG(X) >= 2 733 | AND AVG(X) <= 4 734 | OR AVG(X) = 5; 735 | """) 736 | expected = { 737 | 'select': '*', 738 | 'from': 'MyTable', 739 | 'groupby': {'value': 'Col'}, 740 | 'having': {'or': [ 741 | {'and': [ 742 | {'gte': [{'avg': 'X'}, 2]}, 743 | {'lte': [{'avg': 'X'}, 4]} 744 | ]}, 745 | {'eq': [{'avg': 'X'}, 5]} 746 | ]} 747 | } 748 | self.assertEqual(result, expected) 749 | 750 | def test_issue97_function_names(self): 751 | sql = "SELECT ST_AsText(ST_MakePoint(174, -36));" 752 | result = parse(sql) 753 | expected = { 754 | 'select': { 755 | 'value': { 756 | 'st_astext': { 757 | 'st_makepoint': [174, -36] 758 | } 759 | } 760 | } 761 | } 762 | self.assertEqual(result, expected) 763 | 764 | def test_issue91_order_of_operations1(self): 765 | sql = "select 5-4+2" 766 | result = parse(sql) 767 | expected = {"select": {"value": {"add": [{"sub": [5, 4]}, 2]}}} 768 | self.assertEqual(result, expected) 769 | 770 | def test_issue91_order_of_operations2(self): 771 | sql = "select 5/4*2" 772 | result = parse(sql) 773 | expected = {"select": {"value": {"mul": [{"div": [5, 4]}, 2]}}} 774 | self.assertEqual(result, expected) 775 | 776 | def test_issue_92(self): 777 | sql = "SELECT * FROM `movies`" 778 | result = parse(sql) 779 | expected = {"select": "*", "from": "movies"} 780 | self.assertEqual(result, expected) 781 | 782 | def test_with_clause(self): 783 | sql = ( 784 | " WITH dept_count AS (" 785 | " SELECT deptno, COUNT(*) AS dept_count" 786 | " FROM emp" 787 | " GROUP BY deptno" 788 | ")" 789 | " SELECT " 790 | " e.ename AS employee_name," 791 | " dc1.dept_count AS emp_dept_count," 792 | " m.ename AS manager_name," 793 | " dc2.dept_count AS mgr_dept_count" 794 | " FROM " 795 | " emp e," 796 | " dept_count dc1," 797 | " emp m," 798 | " dept_count dc2" 799 | " WHERE " 800 | " e.deptno = dc1.deptno" 801 | " AND e.mgr = m.empno" 802 | " AND m.deptno = dc2.deptno;" 803 | ) 804 | result = parse(sql) 805 | expected = { 806 | 'with': { 807 | 'name': 'dept_count', 808 | 'value': { 809 | 'from': 'emp', 810 | 'groupby': {'value': 'deptno'}, 811 | 'select': [ 812 | {'value': 'deptno'}, 813 | {'name': 'dept_count', 'value': {'count': '*'}} 814 | ] 815 | } 816 | }, 817 | 'from': [ 818 | {'name': 'e', 'value': 'emp'}, 819 | {'name': 'dc1', 'value': 'dept_count'}, 820 | {'name': 'm', 'value': 'emp'}, 821 | {'name': 'dc2', 'value': 'dept_count'} 822 | ] 823 | , 824 | 'select': [ 825 | {'name': 'employee_name', 'value': 'e.ename'}, 826 | {'name': 'emp_dept_count', 'value': 'dc1.dept_count'}, 827 | {'name': 'manager_name', 'value': 'm.ename'}, 828 | {'name': 'mgr_dept_count', 'value': 'dc2.dept_count'} 829 | ], 830 | 'where': {'and': [ 831 | {'eq': ['e.deptno', 'dc1.deptno']}, 832 | {'eq': ['e.mgr', 'm.empno']}, 833 | {'eq': ['m.deptno', 'dc2.deptno']} 834 | ]} 835 | } 836 | 837 | self.assertEqual(result, expected) 838 | 839 | def test_2with_clause(self): 840 | # 0 1 2 3 4 5 6 7 8 9 841 | # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 842 | sql = ( 843 | " WITH a AS (SELECT 1), b AS (SELECT 2)" 844 | " SELECT * FROM a UNION ALL SELECT * FROM b" 845 | ) 846 | result = parse(sql) 847 | expected = { 848 | "with": [ 849 | {"name": "a", "value": {"select": {"value": 1}}}, 850 | {"name": "b", "value": {"select": {"value": 2}}} 851 | ], 852 | "union_all": [ 853 | {"select": "*", "from": "a"}, 854 | {"select": "*", "from": "b"}, 855 | ] 856 | } 857 | self.assertEqual(result, expected) 858 | 859 | #@skipIf(not IS_MASTER, "Takes too long, and does not test net new features") 860 | def test_issue_103(self): 861 | # 0 1 2 3 4 5 6 7 8 9 862 | # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 863 | sql = """SELECT G.ITEM_ID AS "ITEM_ID", fn_get_dimension_by_grcom (H.BU_COD, G.ITEM_ID) AS "DESCRIPTION", trim(G.EAN11) "EANCODE", trim(DECODE (G.MATNR_ORIG_B2F, NULL, DECODE (G.MATNR_ORIG, NULL, G.MATNR, G.MATNR_ORIG), G.MATNR_ORIG_B2F)) AS "CODICE_PRODOTTO", DECODE (H.BRAND, 'BF GOODRICH', 'BFGOODRICH', H.BRAND) AS "BRAND_ID", H.BRAND AS "XBRAND", H.MARKET5 AS "MKT_ID", m.MC_COUNTRY_CODE AS "COUNTRY_CODE", H.BU_COD AS "GRCOM", H.DATE_FROM AS "PRICELIST_DATE", H.CURRENCY AS "CURRENCY_ID", K.CR_DESCRIPTION AS "CURRENCY_DESC", K.CR_DESCRIPTION AS "CURRENCY_SHORT_DESC", G.PATTERN AS "BTS_ID", P.PATTERN AS "PATTERN_SHORT_DESC", trim(G.SERIES) AS "SERIE", trim(G.WIDTH) AS "CORDA", trim(G.RIM) AS "CALETTAMENTO", G.STRUTTURA AS "STRUTTURA", DECODE (IS_NUMBER (G.WIDTH), 0, 0, TO_NUMBER (G.WIDTH)) AS "CORDA_NOM", DECODE (IS_NUMBER (G.SERIES), 0, 0, TO_NUMBER (G.SERIES)) AS "SERIE_NOM", 0 AS "STRUTTURA_NOM", DECODE (IS_NUMBER (G.RIM), 0, 0, TO_NUMBER (G.RIM)) AS "CALETTAMENTO_NOM", trim(G.LOADIN1) AS "LOAD_INDEX", trim(DECODE (TRIM (G.LOADIN2), '', NULL, TRIM (G.LOADIN2))) AS "LOAD_INDEX_1", trim(G.EXTRA_LOAD_FLAG) AS "EXTRA_LOAD_INDEX", G.RUNFLAT_FLAG AS "RUNFLAT_ID", DECODE (TRIM (G.OEMARK), '', NULL, TRIM (G.OEMARK)) AS "OE_MARK", trim(G.SPEEDIN1) AS "SPEED_INDEX", trim(DECODE (TRIM (G.SPEEDIN2), '', NULL, TRIM (G.SPEEDIN2))) AS "SPEED_INDEX_1", trim(G.CODE_MKS) AS "CODE_MKS", G.DESCR_MKS AS "MKS", D.PRICE AS "GROSS_PRICE", trim(fn_get_dimension_loadindex (g.item_id)) AS "DESCR_LOADINDEX", trim(fn_get_dimension_speedindex (g.item_id)) AS "DESCR_SPEEDINDEX", DECODE (TRIM (G.LOADIN1DB), '', NULL, TRIM (G.LOADIN1DB)) AS "LOADINDEX1DOUBLEMOUNT", DECODE (TRIM (G.LOADIN2DB), '', NULL, TRIM (G.LOADIN2DB)) AS "LOADINDEX2DOUBLEMOUNT", DECODE (TRIM (G.NOISECLASS), '', NULL, TRIM (G.NOISECLASS)) AS "NOISECLASS", DECODE (G.ARTICLEGROUPCODE, '01', 'Tyre', '02', 'Rim', NULL) AS "ARTICLEGROUP", G.ARTICLEGROUPCODE AS "ARTICLEGROUPCODE", DECODE (IS_NUMBER (G.DEPTH), 1, G.DEPTH, NULL) AS "ORIGINALTREADDEPTH", DECODE (IS_NUMBER (G.WEIGHT), 1, TO_NUMBER (G.WEIGHT) * 1000, NULL) AS "WEIGHT", DECODE (g.pncs, 'Yes', 1, 'No', 0, NULL) AS "PNCS", DECODE (g.sealind, 'Yes', 1, 'No', 0, NULL) AS "SELFSEALING", DECODE (g.sealind, 'Yes', g.RUNFLAT_FLAG_SEALIND, NULL) AS "SELFSEALINGINDICATOR", DECODE (g.extra_load, 'Yes', 1, 'No', 0, NULL) AS "EXTRA_LOAD", g.application_code AS "APPLICATION_CODE", NULL AS "PRODUCTSEGMENT", DECODE (g.application_code, 'F1', 'FittedUnitCar', 'F2', 'FittedUnitVan', 'F9', 'FittedUnitSuv', '01', 'Car', '02', 'Van', '03', 'Truck', '04', 'EM', '05', 'AS', '06', 'Industry', '08', 'Moto', '09', 'SUV', NULL) AS "APPLICATION", DECODE (g.SNOWFLAG, 'Yes', 1, 'No', 0, NULL) AS "SNOWFLAG", DECODE (g.RUNFLAT, 'Yes', 1, 'No', 0, NULL) AS "RUNFLAT", DECODE (TRIM (g.NOISE_PERFORMANCE), '', NULL, TRIM (G.NOISE_PERFORMANCE)) AS "NOISE_PERFORMANCE", DECODE (TRIM (g.rollres), '', NULL, TRIM (G.rollres)) AS "ROLLRES", DECODE (TRIM (g.wetgrip), '', NULL, TRIM (G.wetgrip)) AS "WETGRIP", g.MANUFACTURER AS "MANUFACTURER", DECODE (DECODE (IS_NUMBER (g.season), 1, TO_NUMBER (g.season), 0), 1, 'summer', 2, 'winter', 10, 'allseasons', NULL) AS "SEASONALITY" FROM DIM_CURRENCY k, P2_PATTERN_ALL p, P2_MATERIAL_ALL g, DW.DIM_MARKET_CHANNEL m, PRLST_DETAIL d, (SELECT H1.PRICELIST_ID, H1.BRAND, H1.BU_COD, H1.MARKET5, H1.DATE_FROM, H1.CURRENCY FROM PRCLST_HEADER h1, LOOKUP_BRAND b1 WHERE H1.ENABLE_VIEWING_B2F = 1 AND (H1.BRAND, H1.BU_COD, H1.MARKET5, H1.DATE_FROM) IN ( SELECT H2.BRAND, H2.BU_COD, H2.MARKET5, MAX (H2.DATE_FROM) FROM PRCLST_HEADER h2 WHERE H2.BU_COD = 'CAR' AND H2.ENABLE_VIEWING_B2F = 1 GROUP BY H2.BRAND, H2.BU_COD, H2.MARKET5) AND H1.BRAND = B1.BRAND) h WHERE h.currency = K.CR_COD_CURRENCY_SAP AND h.pricelist_id = D.PRICELIST_ID AND H.BRAND = G.BRCONA AND D.IPCODE = G.MATNR AND P.BRAND = G.BRCONA AND upper(P.PATTERN) = upper(G.PATTERN) AND h.market5 = m.MARKET_CHANNEL_CODE AND G.IS_USER = 1 AND (G.BRCONA, G.MATNR) NOT IN (SELECT C.BRCONA, C.MATNR FROM P2_MAT_USER_CONFLICTS c WHERE C.LAST_ACTION IN (21)) ORDER BY G.ITEM_ID""" 864 | result = parse(sql) 865 | expected = json.loads("""{"select": [{"value": "G.ITEM_ID", "name": "ITEM_ID"}, {"value": {"fn_get_dimension_by_grcom": ["H.BU_COD", "G.ITEM_ID"]}, "name": "DESCRIPTION"}, {"value": {"trim": "G.EAN11"}, "name": "EANCODE"}, {"value": {"trim": {"decode": ["G.MATNR_ORIG_B2F", "null", {"decode": ["G.MATNR_ORIG", "null", "G.MATNR", "G.MATNR_ORIG"]}, "G.MATNR_ORIG_B2F"]}}, "name": "CODICE_PRODOTTO"}, {"value": {"decode": ["H.BRAND", {"literal": "BF GOODRICH"}, {"literal": "BFGOODRICH"}, "H.BRAND"]}, "name": "BRAND_ID"}, {"value": "H.BRAND", "name": "XBRAND"}, {"value": "H.MARKET5", "name": "MKT_ID"}, {"value": "m.MC_COUNTRY_CODE", "name": "COUNTRY_CODE"}, {"value": "H.BU_COD", "name": "GRCOM"}, {"value": "H.DATE_FROM", "name": "PRICELIST_DATE"}, {"value": "H.CURRENCY", "name": "CURRENCY_ID"}, {"value": "K.CR_DESCRIPTION", "name": "CURRENCY_DESC"}, {"value": "K.CR_DESCRIPTION", "name": "CURRENCY_SHORT_DESC"}, {"value": "G.PATTERN", "name": "BTS_ID"}, {"value": "P.PATTERN", "name": "PATTERN_SHORT_DESC"}, {"value": {"trim": "G.SERIES"}, "name": "SERIE"}, {"value": {"trim": "G.WIDTH"}, "name": "CORDA"}, {"value": {"trim": "G.RIM"}, "name": "CALETTAMENTO"}, {"value": "G.STRUTTURA", "name": "STRUTTURA"}, {"value": {"decode": [{"is_number": "G.WIDTH"}, 0, 0, {"to_number": "G.WIDTH"}]}, "name": "CORDA_NOM"}, {"value": {"decode": [{"is_number": "G.SERIES"}, 0, 0, {"to_number": "G.SERIES"}]}, "name": "SERIE_NOM"}, {"value": 0, "name": "STRUTTURA_NOM"}, {"value": {"decode": [{"is_number": "G.RIM"}, 0, 0, {"to_number": "G.RIM"}]}, "name": "CALETTAMENTO_NOM"}, {"value": {"trim": "G.LOADIN1"}, "name": "LOAD_INDEX"}, {"value": {"trim": {"decode": [{"trim": "G.LOADIN2"}, {"literal": ""}, "null", {"trim": "G.LOADIN2"}]}}, "name": "LOAD_INDEX_1"}, {"value": {"trim": "G.EXTRA_LOAD_FLAG"}, "name": "EXTRA_LOAD_INDEX"}, {"value": "G.RUNFLAT_FLAG", "name": "RUNFLAT_ID"}, {"value": {"decode": [{"trim": "G.OEMARK"}, {"literal": ""}, "null", {"trim": "G.OEMARK"}]}, "name": "OE_MARK"}, {"value": {"trim": "G.SPEEDIN1"}, "name": "SPEED_INDEX"}, {"value": {"trim": {"decode": [{"trim": "G.SPEEDIN2"}, {"literal": ""}, "null", {"trim": "G.SPEEDIN2"}]}}, "name": "SPEED_INDEX_1"}, {"value": {"trim": "G.CODE_MKS"}, "name": "CODE_MKS"}, {"value": "G.DESCR_MKS", "name": "MKS"}, {"value": "D.PRICE", "name": "GROSS_PRICE"}, {"value": {"trim": {"fn_get_dimension_loadindex": "g.item_id"}}, "name": "DESCR_LOADINDEX"}, {"value": {"trim": {"fn_get_dimension_speedindex": "g.item_id"}}, "name": "DESCR_SPEEDINDEX"}, {"value": {"decode": [{"trim": "G.LOADIN1DB"}, {"literal": ""}, "null", {"trim": "G.LOADIN1DB"}]}, "name": "LOADINDEX1DOUBLEMOUNT"}, {"value": {"decode": [{"trim": "G.LOADIN2DB"}, {"literal": ""}, "null", {"trim": "G.LOADIN2DB"}]}, "name": "LOADINDEX2DOUBLEMOUNT"}, {"value": {"decode": [{"trim": "G.NOISECLASS"}, {"literal": ""}, "null", {"trim": "G.NOISECLASS"}]}, "name": "NOISECLASS"}, {"value": {"decode": ["G.ARTICLEGROUPCODE", {"literal": "01"}, {"literal": "Tyre"}, {"literal": "02"}, {"literal": "Rim"}, "null"]}, "name": "ARTICLEGROUP"}, {"value": "G.ARTICLEGROUPCODE", "name": "ARTICLEGROUPCODE"}, {"value": {"decode": [{"is_number": "G.DEPTH"}, 1, "G.DEPTH", "null"]}, "name": "ORIGINALTREADDEPTH"}, {"value": {"decode": [{"is_number": "G.WEIGHT"}, 1, {"mul": [{"to_number": "G.WEIGHT"}, 1000]}, "null"]}, "name": "WEIGHT"}, {"value": {"decode": ["g.pncs", {"literal": "Yes"}, 1, {"literal": "No"}, 0, "null"]}, "name": "PNCS"}, {"value": {"decode": ["g.sealind", {"literal": "Yes"}, 1, {"literal": "No"}, 0, "null"]}, "name": "SELFSEALING"}, {"value": {"decode": ["g.sealind", {"literal": "Yes"}, "g.RUNFLAT_FLAG_SEALIND", "null"]}, "name": "SELFSEALINGINDICATOR"}, {"value": {"decode": ["g.extra_load", {"literal": "Yes"}, 1, {"literal": "No"}, 0, "null"]}, "name": "EXTRA_LOAD"}, {"value": "g.application_code", "name": "APPLICATION_CODE"}, {"value": "null", "name": "PRODUCTSEGMENT"}, {"value": {"decode": ["g.application_code", {"literal": "F1"}, {"literal": "FittedUnitCar"}, {"literal": "F2"}, {"literal": "FittedUnitVan"}, {"literal": "F9"}, {"literal": "FittedUnitSuv"}, {"literal": "01"}, {"literal": "Car"}, {"literal": "02"}, {"literal": "Van"}, {"literal": "03"}, {"literal": "Truck"}, {"literal": "04"}, {"literal": "EM"}, {"literal": "05"}, {"literal": "AS"}, {"literal": "06"}, {"literal": "Industry"}, {"literal": "08"}, {"literal": "Moto"}, {"literal": "09"}, {"literal": "SUV"}, "null"]}, "name": "APPLICATION"}, {"value": {"decode": ["g.SNOWFLAG", {"literal": "Yes"}, 1, {"literal": "No"}, 0, "null"]}, "name": "SNOWFLAG"}, {"value": {"decode": ["g.RUNFLAT", {"literal": "Yes"}, 1, {"literal": "No"}, 0, "null"]}, "name": "RUNFLAT"}, {"value": {"decode": [{"trim": "g.NOISE_PERFORMANCE"}, {"literal": ""}, "null", {"trim": "G.NOISE_PERFORMANCE"}]}, "name": "NOISE_PERFORMANCE"}, {"value": {"decode": [{"trim": "g.rollres"}, {"literal": ""}, "null", {"trim": "G.rollres"}]}, "name": "ROLLRES"}, {"value": {"decode": [{"trim": "g.wetgrip"}, {"literal": ""}, "null", {"trim": "G.wetgrip"}]}, "name": "WETGRIP"}, {"value": "g.MANUFACTURER", "name": "MANUFACTURER"}, {"value": {"decode": [{"decode": [{"is_number": "g.season"}, 1, {"to_number": "g.season"}, 0]}, 1, {"literal": "summer"}, 2, {"literal": "winter"}, 10, {"literal": "allseasons"}, "null"]}, "name": "SEASONALITY"}], "from": [{"value": "DIM_CURRENCY", "name": "k"}, {"value": "P2_PATTERN_ALL", "name": "p"}, {"value": "P2_MATERIAL_ALL", "name": "g"}, {"value": "DW.DIM_MARKET_CHANNEL", "name": "m"}, {"value": "PRLST_DETAIL", "name": "d"}, {"value": {"select": [{"value": "H1.PRICELIST_ID"}, {"value": "H1.BRAND"}, {"value": "H1.BU_COD"}, {"value": "H1.MARKET5"}, {"value": "H1.DATE_FROM"}, {"value": "H1.CURRENCY"}], "from": [{"value": "PRCLST_HEADER", "name": "h1"}, {"value": "LOOKUP_BRAND", "name": "b1"}], "where": {"and": [{"eq": ["H1.ENABLE_VIEWING_B2F", 1]}, {"in": [["H1.BRAND", "H1.BU_COD", "H1.MARKET5", "H1.DATE_FROM"], {"select": [{"value": "H2.BRAND"}, {"value": "H2.BU_COD"}, {"value": "H2.MARKET5"}, {"value": {"max": "H2.DATE_FROM"}}], "from": {"value": "PRCLST_HEADER", "name": "h2"}, "where": {"and": [{"eq": ["H2.BU_COD", {"literal": "CAR"}]}, {"eq": ["H2.ENABLE_VIEWING_B2F", 1]}]}, "groupby": [{"value": "H2.BRAND"}, {"value": "H2.BU_COD"}, {"value": "H2.MARKET5"}]}]}, {"eq": ["H1.BRAND", "B1.BRAND"]}]}}, "name": "h"}], "where": {"and": [{"eq": ["h.currency", "K.CR_COD_CURRENCY_SAP"]}, {"eq": ["h.pricelist_id", "D.PRICELIST_ID"]}, {"eq": ["H.BRAND", "G.BRCONA"]}, {"eq": ["D.IPCODE", "G.MATNR"]}, {"eq": ["P.BRAND", "G.BRCONA"]}, {"eq": [{"upper": "P.PATTERN"}, {"upper": "G.PATTERN"}]}, {"eq": ["h.market5", "m.MARKET_CHANNEL_CODE"]}, {"eq": ["G.IS_USER", 1]}, {"nin": [["G.BRCONA", "G.MATNR"], {"select": [{"value": "C.BRCONA"}, {"value": "C.MATNR"}], "from": {"value": "P2_MAT_USER_CONFLICTS", "name": "c"}, "where": {"in": ["C.LAST_ACTION", 21]}}]}]}, "orderby": {"value": "G.ITEM_ID"}}""") 866 | self.assertEqual(result, expected) 867 | 868 | def test_issue_38a(self): 869 | sql = "SELECT a IN ('abc',3,'def')" 870 | result = parse(sql) 871 | expected = {"select": {"value": {"in": ["a", {"literal": ['abc', 3, 'def']}]}}} 872 | self.assertEqual(result, expected) 873 | 874 | def test_issue_38b(self): 875 | sql = "SELECT a IN (abc,3,'def')" 876 | result = parse(sql) 877 | expected = {"select": {"value": {"in": ["a", ["abc", 3, {"literal": 'def'}]]}}} 878 | self.assertEqual(result, expected) 879 | 880 | @skipIf(IS_MASTER, "stack too deep") 881 | def test_issue_107_recursion(self): 882 | sql = ( 883 | " SELECT city_name" 884 | " FROM city" 885 | " WHERE population = (" 886 | " SELECT MAX(population)" 887 | " FROM city" 888 | " WHERE state_name IN (" 889 | " SELECT state_name" 890 | " FROM state" 891 | " WHERE area = (SELECT MIN(area) FROM state)" 892 | " )" 893 | " )" 894 | ) 895 | result = parse(sql) 896 | expected = { 897 | 'from': 'city', 898 | 'select': {'value': 'city_name'}, 899 | 'where': {'eq': [ 900 | 'population', 901 | { 902 | 'from': 'city', 903 | 'select': {'value': {'max': 'population'}}, 904 | 'where': {'in': [ 905 | 'state_name', 906 | { 907 | 'from': 'state', 908 | 'select': {'value': 'state_name'}, 909 | 'where': {'eq': [ 910 | 'area', 911 | { 912 | 'from': 'state', 913 | 'select': {'value': {'min': 'area'}} 914 | } 915 | ]} 916 | } 917 | ]} 918 | } 919 | ]} 920 | } 921 | self.assertEqual(result, expected) 922 | 923 | def test_issue_95(self): 924 | # 0 1 2 3 4 5 6 7 8 9 925 | # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 926 | sql = "select * from some_table.some_function('parameter', 1, some_col)" 927 | result = parse(sql) 928 | expected = {"select": "*", "from": {"value": {"some_table.some_function": [{"literal": 'parameter'}, 1, "some_col"]}}} 929 | self.assertEqual(result, expected) 930 | 931 | def test_at_ident(self): 932 | sql = "select @@version_comment" 933 | result = parse(sql) 934 | expected = {"select": {"value": "@@version_comment"}} 935 | self.assertEqual(result, expected) 936 | 937 | def test_date(self): 938 | sql = "select DATE '2020 01 25'" 939 | result = parse(sql) 940 | expected = {"select": {"value": {"date": {"literal": "2020 01 25"}}}} 941 | self.assertEqual(result, expected) 942 | 943 | def test_interval(self): 944 | sql = "select INTErval 30.5 monthS" 945 | result = parse(sql) 946 | expected = {"select": {"value": {"interval": [30.5, "month"]}}} 947 | self.assertEqual(result, expected) 948 | 949 | def test_date_less_interval(self): 950 | sql = "select DATE '2020 01 25' - interval 4 seconds" 951 | result = parse(sql) 952 | expected = {"select": {"value": {"sub": [{"date": {"literal":"2020 01 25"}}, {"interval": [4, "second"]}]}}} 953 | self.assertEqual(result, expected) 954 | 955 | -------------------------------------------------------------------------------- /tests/util.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # 3 | # This Source Code Form is subject to the terms of the Mozilla Public 4 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 5 | # You can obtain one at http://mozilla.org/MPL/2.0/. 6 | # 7 | 8 | from __future__ import absolute_import, division, unicode_literals 9 | 10 | from mo_future import text 11 | 12 | 13 | def assertRaises(expected_text_in_error, method): 14 | try: 15 | method() 16 | raise Exception("expecting an exception") 17 | except Exception as e: 18 | text_error = text(e) 19 | if not isinstance(expected_text_in_error, (list, tuple, set)): 20 | expected_text_in_error = [expected_text_in_error] 21 | 22 | if any(e not in text_error for e in expected_text_in_error): 23 | raise Exception("wrong error raised") 24 | --------------------------------------------------------------------------------