├── Context.sublime-menu ├── Default (Linux).sublime-keymap ├── Default (OSX).sublime-keymap ├── Default (Windows).sublime-keymap ├── Default.sublime-commands ├── LICENSE ├── Main.sublime-menu ├── Preferences.sublime-settings ├── Readme.ja.md ├── Readme.md ├── enum ├── LICENSE ├── README ├── __init__.py ├── doc │ ├── enum.pdf │ └── enum.rst └── test.py ├── sqlparse ├── __init__.py ├── engine │ ├── __init__.py │ ├── filter.py │ └── grouping.py ├── exceptions.py ├── filters.py ├── formatter.py ├── functions.py ├── keywords.py ├── lexer.py ├── pipeline.py ├── sql.py ├── tokens.py └── utils.py ├── uroborosqlfmt ├── __init__.py ├── api.py ├── commentsyntax.py ├── config.py ├── exceptions.py ├── filters.py ├── grouping.py ├── sql.py └── tokenutils.py └── urobosql_formatter.py /Context.sublime-menu: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "uroborosql-formatter", 4 | "caption": "SQL Format", 5 | "command": "uroborosql_format" 6 | } 7 | ] -------------------------------------------------------------------------------- /Default (Linux).sublime-keymap: -------------------------------------------------------------------------------- 1 | [ 2 | { "keys": ["ctrl+alt+f"], "command": "uroborosql_format" } 3 | ] -------------------------------------------------------------------------------- /Default (OSX).sublime-keymap: -------------------------------------------------------------------------------- 1 | [ 2 | { "keys": ["ctrl+super+f"], "command": "uroborosql_format" } 3 | ] 4 | -------------------------------------------------------------------------------- /Default (Windows).sublime-keymap: -------------------------------------------------------------------------------- 1 | [ 2 | { "keys": ["ctrl+alt+f"], "command": "uroborosql_format" } 3 | ] -------------------------------------------------------------------------------- /Default.sublime-commands: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "caption": "uroboroSQL Formatter: Format SQL", 4 | "command": "uroborosql_format" 5 | } 6 | ] 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2016, Future Corporation 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /Main.sublime-menu: -------------------------------------------------------------------------------- 1 | [{ 2 | "caption": "Edit", 3 | "mnemonic": "E", 4 | "id": "edit", 5 | "children": [{ 6 | "caption": "-" 7 | }, { 8 | "command": "uroborosql_format", 9 | "id": "uroborosql_format", 10 | "caption": "SQL Format" 11 | }] 12 | }, { 13 | "caption": "Preferences", 14 | "mnemonic": "n", 15 | "id": "preferences", 16 | "children": [{ 17 | "caption": "Package Settings", 18 | "mnemonic": "P", 19 | "id": "package-settings", 20 | "children": [{ 21 | "caption": "uroboroSQL Formatter", 22 | "children": [{ 23 | "command": "open_file", 24 | "args": { 25 | "file": "${packages}/sublime-uroborosql-formatter/Preferences.sublime-settings" 26 | }, 27 | "caption": "Settings – Default" 28 | }, { 29 | "command": "open_file", 30 | "args": { 31 | "file": "${packages}/User/sublime-uroborosql-formatter.sublime-settings" 32 | }, 33 | "caption": "Settings – User" 34 | }] 35 | }] 36 | }] 37 | }] -------------------------------------------------------------------------------- /Preferences.sublime-settings: -------------------------------------------------------------------------------- 1 | { 2 | "uf_tab_size": 4, 3 | "uf_translate_tabs_to_spaces": true, 4 | "uf_case": "upper", // "upper" or "lower" or "capitalize" or "nochange" 5 | // You have to set [uf_case]option, 6 | // when you set [uf_reserved_case]option below. 7 | // The applocation does not accept the reserved_case option individually. 8 | "uf_reserved_case": "upper", // "upper" or "lower" or "capitalize" or "nochange" 9 | "uf_reserved_words":"ABS,ALL,ALLOCATE,ALTER,AND,ANY,ARE,ARRAY,ARRAY_AGG,ARRAY_MAX_CARDINALITY,AS,ASENSITIVE,ASYMMETRIC,AT,ATOMIC,AUTHORIZATION,AVG,BEGIN,BEGIN_FRAME,BEGIN_PARTITION,BETWEEN,BIGINT,BINARY,BLOB,BOOLEAN,BOTH,BY,CALL,CALLED,CARDINALITY,CASCADED,CASE,CAST,CEIL,CEILING,CHAR,CHARACTER,CHARACTER_LENGTH,CHAR_LENGTH,CHECK,CLOB,CLOSE,COALESCE,COLLATE,COLLECT,COLUMN,COMMIT,CONDITION,CONNECT,CONSTRAINT,CONTAINS,CONVERT,CORR,CORRESPONDING,COUNT,COVAR_POP,COVAR_SAMP,CREATE,CROSS,CUBE,CUME_DIST,CURRENT,CURRENT_CATALOG,CURRENT_DATE,CURRENT_DEFAULT_TRANSFORM_GROUP,CURRENT_PATH,CURRENT_ROLE,CURRENT_ROW,CURRENT_SCHEMA,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_TRANSFORM_GROUP_FOR_TYPE,CURRENT_USER,CURSOR,CYCLE,DATALINK,DATE,DAY,DEALLOCATE,DEC,DECIMAL,DECLARE,DEFAULT,DELETE,DENSE_RANK,DEREF,DESCRIBE,DETERMINISTIC,DISCONNECT,DISTINCT,DLNEWCOPY,DLPREVIOUSCOPY,DLURLCOMPLETE,DLURLCOMPLETEONLY,DLURLCOMPLETEWRITE,DLURLPATH,DLURLPATHONLY,DLURLPATHWRITE,DLURLSCHEME,DLURLSERVER,DLVALUE,DOUBLE,DROP,DYNAMIC,EACH,ELEMENT,ELSE,END,END-EXEC,END_FRAME,END_PARTITION,EQUALS,ESCAPE,EVERY,EXCEPT,EXEC,EXECUTE,EXISTS,EXP,EXTERNAL,EXTRACT,FALSE,FETCH,FILTER,FIRST_VALUE,FLOAT,FLOOR,FOR,FOREIGN,FRAME_ROW,FREE,FROM,FULL,FUNCTION,FUSION,GET,GLOBAL,GRANT,GROUP,GROUPING,GROUPS,HAVING,HOLD,HOUR,IDENTITY,IMPORT,IN,INDICATOR,INNER,INOUT,INSENSITIVE,INSERT,INT,INTEGER,INTERSECT,INTERSECTION,INTERVAL,INTO,IS,JOIN,LAG,LANGUAGE,LARGE,LAST_VALUE,LATERAL,LEAD,LEADING,LEFT,LIKE,LIKE_REGEX,LN,LOCAL,LOCALTIME,LOCALTIMESTAMP,LOWER,MATCH,MAX,MEMBER,MERGE,METHOD,MIN,MINUTE,MOD,MODIFIES,MODULE,MONTH,MULTISET,NATIONAL,NATURAL,NCHAR,NCLOB,NEW,NO,NONE,NORMALIZE,NOT,NTH_VALUE,NTILE,NULL,NULLIF,NUMERIC,OCCURRENCES_REGEX,OCTET_LENGTH,OF,OFFSET,OLD,ON,ONLY,OPEN,OR,ORDER,OUT,OUTER,OVER,OVERLAPS,OVERLAY,PARAMETER,PARTITION,PERCENT,PERCENTILE_CONT,PERCENTILE_DISC,PERCENT_RANK,PERIOD,PORTION,POSITION,POSITION_REGEX,POWER,PRECEDES,PRECISION,PREPARE,PRIMARY,PROCEDURE,RANGE,RANK,READS,REAL,RECURSIVE,REF,REFERENCES,REFERENCING,REGR_AVGX,REGR_AVGY,REGR_COUNT,REGR_INTERCEPT,REGR_R2,REGR_SLOPE,REGR_SXX,REGR_SXY,REGR_SYY,RELEASE,RESULT,RETURN,RETURNS,REVOKE,RIGHT,ROLLBACK,ROLLUP,ROW,ROWS,ROW_NUMBER,SAVEPOINT,SCOPE,SCROLL,SEARCH,SECOND,SELECT,SENSITIVE,SESSION_USER,SET,SIMILAR,SMALLINT,SOME,SPECIFIC,SPECIFICTYPE,SQL,SQLEXCEPTION,SQLSTATE,SQLWARNING,SQRT,START,STATIC,STDDEV_POP,STDDEV_SAMP,SUBMULTISET,SUBSTRING,SUBSTRING_REGEX,SUCCEEDS,SUM,SYMMETRIC,SYSTEM,SYSTEM_TIME,SYSTEM_USER,TABLE,TABLESAMPLE,THEN,TIME,TIMESTAMP,TIMEZONE_HOUR,TIMEZONE_MINUTE,TO,TRAILING,TRANSLATE,TRANSLATE_REGEX,TRANSLATION,TREAT,TRIGGER,TRIM,TRIM_ARRAY,TRUE,TRUNCATE,UESCAPE,UNION,UNIQUE,UNKNOWN,UNNEST,UPDATE,UPPER,USER,USING,VALUE,VALUES,VALUE_OF,VARBINARY,VARCHAR,VARYING,VAR_POP,VAR_SAMP,VERSIONING,WHEN,WHENEVER,WHERE,WIDTH_BUCKET,WINDOW,WITH,WITHIN,WITHOUT,XML,XMLAGG,XMLATTRIBUTES,XMLBINARY,XMLCAST,XMLCOMMENT,XMLCONCAT,XMLDOCUMENT,XMLELEMENT,XMLEXISTS,XMLFOREST,XMLITERATE,XMLNAMESPACES,XMLPARSE,XMLPI,XMLQUERY,XMLSERIALIZE,XMLTABLE,XMLTEXT,XMLVALIDATE,YEAR", 10 | "uf_comment_syntax": "uroboroSQL", // "uroboroSQL" or "doma2" 11 | "uf_escapesequence_u005c": false, 12 | "uf_save_on_format": false, 13 | "uf_save_on_format_extensions": ["sql"] 14 | } -------------------------------------------------------------------------------- /Readme.ja.md: -------------------------------------------------------------------------------- 1 | Sublime uroboroSQL Formatter 2 | ==================== 3 | 4 | [![Package Control](https://img.shields.io/packagecontrol/dt/uroboroSQL%20Formatter.svg)](https://packagecontrol.io/packages/uroboroSQL%20Formatter) 5 | 6 | Sublime uroboroSQL Formatterは、エンタープライズシステムで用いられることの多い、 7 | 非常に長いSQL(1K step以上)でも見やすく保守性の高いスタイルへフォーマットするための 8 | Sublime Text 3のプラグインです。 9 | 10 | 特に日本など、英語を母国語としない国では、SELECT句などにコメントを入れることがあります。 11 | その場合、AS句とコメントの縦位置を揃えて、もはや芸術的とも言える見やすさを追求しますが、 12 | これ自動的に実現するために開発したものです。 13 | 14 | #### 一般的なフォーマッタの場合 15 | 16 | ```sql 17 | SELECT MI.MAKER_CD AS ITEM_MAKER_CD -- メーカーコード 18 | , 19 | MI.BRAND_CD AS ITEM_BRAND_CD -- ブランドコード 20 | , 21 | MI.ITEM_CD AS ITEM_CD -- 商品コード 22 | , 23 | MI.CATEGORY AS ITEM_CATEGORY -- 商品カテゴリ 24 | FROM M_ITEM MI -- 商品マスタ 25 | 26 | WHERE 1 = 1 27 | AND MI.ARRIVAL_DATE = '2016-12-01' -- 入荷日 28 | ``` 29 | 30 | #### Sublime uroboroSQL Formatterの場合 31 | 32 | ```sql 33 | SELECT 34 | MI.MAKER_CD AS ITEM_MAKER_CD -- メーカーコード 35 | , MI.BRAND_CD AS ITEM_BRAND_CD -- ブランドコード 36 | , MI.ITEM_CD AS ITEM_CD -- 商品コード 37 | , MI.CATEGORY AS ITEM_CATEGORY -- 商品カテゴリ 38 | FROM 39 | M_ITEM MI -- 商品マスタ 40 | WHERE 41 | 1 = 1 42 | AND MI.ARRIVAL_DATE = '2016-12-01' -- 入荷日 43 | 44 | ``` 45 | 46 | Features 47 | -------- 48 | 49 | - SELECT句、WHERE句内でASや演算子の縦位置を揃えてフォーマット 50 | - 各行の末尾にコメントがあっても正しくフォーマット 51 | 52 | How to Use 53 | ---------- 54 | 55 | - Editメニューから「SQL Format」を選択 56 | - エディタ上のコンテキストメニューから「SQL Format」を選択 57 | - コマンドPaletteから「uroboroSQL Formatter: Format SQL」を選択 58 | - ctrl + cmd + f on OS X, or ctrl + alt + f on Windows 59 | 60 | Settings 61 | -------- 62 | 63 | ```json 64 | { 65 | "uf_tab_size": 4, 66 | "uf_translate_tabs_to_spaces": true, 67 | "uf_case": "upper", // "upper" or "lower" or "capitalize" or "nochange" 68 | "uf_reserved_case": "upper", // "upper" or "lower" or "capitalize" or "nochange" 69 | "uf_reserved_words":"SELECT, FROM, WHERE, CREATE" // Put commas to separate reserved words 70 | "uf_comment_syntax": "uroboroSQL", // "uroboroSQL" or "doma2" 71 | "uf_escapesequence_u005c": false, 72 | "uf_save_on_format": true, 73 | "uf_save_on_format_extensions": [".sql"] 74 | } 75 | ``` 76 | 77 | - uf_tab_size 78 | - フォーマット後のインデントのタブサイズを指定します。4つを推奨します。 79 | - uf_translate_tabs_to_spaces 80 | - フォーマット後のインデントをタブにするかスペースにするかを指定します。trueにすることでスペースになります。 81 | - uf_case 82 | - 全ての文字(予約語と識別子等全て)を指定のケース(大文字、小文字、文頭大文字、変換しない)に変換する時に使用する。 83 | - uf_reserved_case 84 | - 予約語を指定のケース(大文字、小文字、文頭大文字)に変換する時に使用する。なお、"nochange"オプションを指定した場合は予約語に限ったケース変換はせず、全てのケースを「uf_case」に従って変換する。 85 | - uf_reserved_words 86 | - 予約語のみをケース変換したい場合は、「uf_reserved_case」に指定のケース(upper, lower, capitalize or nochange)を設定するとともに、当項目に予約語のリストをカンマ区切りで指定します。 87 | - uf_comment_syntax 88 | - コメントのシンタックス形式を指定します。 89 | - 「uroboroSQL」または「doma2」の指定が可能です。 90 | - 通常のSQLの場合は、どちらを指定してもかまいません。 91 | - uf_escapesequence_u005c 92 | - SQL内でエスケープシーケンスをバックスラッシュで指定している場合にtrueを指定します。 93 | - uf_save_on_format 94 | - ファイル保存時に自動的にフォーマットする場合にtrueを指定します。 95 | - uf_save_on_format_extensions 96 | - フォーマットするファイルの拡張子をリストで指定します。 97 | 98 | 99 | License 100 | ------- 101 | 102 | [python-sqlparse library](https://github.com/andialbrecht/sqlparse) and this code are both on [2-clauses BSD](http://www.opensource.org/licenses/bsd-license.php) 103 | 104 | --- 105 | 106 | Copyright 2018 by Future Architect. 107 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | Sublime uroboroSQL Formatter 2 | ==================== 3 | 4 | [![Package Control](https://img.shields.io/packagecontrol/dt/uroboroSQL%20Formatter.svg)](https://packagecontrol.io/packages/uroboroSQL%20Formatter) 5 | 6 | Sublime uroboroSQL Formatter is often used in enterprise systems, For formatting to a highly maintainable style even for very long SQL (1 K step or more) It is a plug-in of Sublime Text 3. 7 | 8 | In particular, in countries where English is not their mother tongue, such as Japan, comments may be included in SELECT clauses. In that case, we will align the vertical position of the AS clause and the comment, pursuing the viewability which can be said as artistic anymore, This was developed to realize this automatically. 9 | 10 | for Japanese, [Readme.ja.md](Readme.ja.md) 11 | 12 | #### In case of general formatter 13 | 14 | ```sql 15 | SELECT MI.MAKER_CD AS ITEM_MAKER_CD -- maker code 16 | , 17 | MI.BRAND_CD AS ITEM_BRAND_CD -- brand code 18 | , 19 | MI.ITEM_CD AS ITEM_CD -- item code 20 | , 21 | MI.CATEGORY AS ITEM_CATEGORY -- item category 22 | FROM M_ITEM MI -- item master 23 | 24 | WHERE 1 = 1 25 | AND MI.ARRIVAL_DATE = '2016-12-01' -- arrival date 26 | ``` 27 | 28 | #### In case of Sublime uroboroSQL Formatter 29 | 30 | ```sql 31 | SELECT 32 | MI.MAKER_CD AS ITEM_MAKER_CD -- maker code 33 | , MI.BRAND_CD AS ITEM_BRAND_CD -- brand code 34 | , MI.ITEM_CD AS ITEM_CD -- item code 35 | , MI.CATEGORY AS ITEM_CATEGORY -- item category 36 | FROM 37 | M_ITEM MI -- item master 38 | WHERE 39 | 1 = 1 40 | AND MI.ARRIVAL_DATE = '2016-12-01' -- arrival date 41 | 42 | ``` 43 | 44 | Features 45 | -------- 46 | 47 | - Formatting by aligning the vertical position of AS and operator in SELECT clause, WHERE clause 48 | - Even if there is a comment at the end of each line, the format 49 | 50 | How to Use 51 | ---------- 52 | 53 | - Select "SQL Format" from the Edit menu 54 | - Select "SQL Format" from the context menu on the editor 55 | - Select "uroboroSQL Formatter: Format SQL" from the command Palette 56 | - ctrl + cmd + f on OS X, or ctrl + alt + f on Windows 57 | 58 | Settings 59 | -------- 60 | 61 | ```json 62 | { 63 | "uf_tab_size": 4, 64 | "uf_translate_tabs_to_spaces": true, 65 | "uf_case": "upper", // "upper" or "lower" or "capitalize" or "nochange" 66 | "uf_reserved_case": "upper", // "upper" or "lower" or "capitalize" or "nochange" 67 | "uf_reserved_words":"SELECT, FROM, WHERE, CREATE", // Put commas to separate reserved words 68 | "uf_comment_syntax": "uroboroSQL", // "uroboroSQL" or "doma2" 69 | "uf_escapesequence_u005c": false, 70 | "uf_save_on_format": true, 71 | "uf_save_on_format_extensions": [".sql"] 72 | } 73 | ``` 74 | 75 | - uf_tab_size 76 | - Specify the tab size of the indent after formatting. We recommend 4. 77 | - uf_translate_tabs_to_spaces 78 | - Specify whether the indent after formatting is tab or space. It becomes a space by setting it to true. 79 | - uf_case 80 | - If you want to convert all words to a specific case, set "upper", "lower", "capitalize". If the "nochange" option is selected, the word cases will not be changed from the original. 81 | - uf_reserved_case 82 | - If you want to convert reserved words to a specific case, set "upper", "lower" or "capitalize". If the "nochange" option is selected, it will not change the cases of resered words and they will be changed based on the "uf_case" setting. 83 | - uf_reserved_words 84 | - If you want to convert only reserved words to a specific case, please input reserved words sepalated by a comma. (The input is not case sensitive. So you can input reserved words in any case.) 85 | - uf_comment_syntax 86 | - It specifies the comment syntax format. 87 | - You can specify the "uroboroSQL" or "doma2". 88 | - In the case of normal SQL, you can specify either. 89 | - uf_escapesequence_u005c 90 | - If you have specified the escape sequence with a backslash in the SQL to specify the true. 91 | - uf_save_on_format 92 | - Specify true when formatting automatically when saving files. 93 | - uf_save_on_format_extensions 94 | - Specify the extension of the file to be formatted in a list. 95 | 96 | License 97 | ------- 98 | 99 | [python-sqlparse library](https://github.com/andialbrecht/sqlparse) and this code are both on [2-clauses BSD](http://www.opensource.org/licenses/bsd-license.php) 100 | 101 | --- 102 | 103 | Copyright 2018 by Future Architect. 104 | -------------------------------------------------------------------------------- /enum/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013, Ethan Furman. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | 8 | Redistributions of source code must retain the above 9 | copyright notice, this list of conditions and the 10 | following disclaimer. 11 | 12 | Redistributions in binary form must reproduce the above 13 | copyright notice, this list of conditions and the following 14 | disclaimer in the documentation and/or other materials 15 | provided with the distribution. 16 | 17 | Neither the name Ethan Furman nor the names of any 18 | contributors may be used to endorse or promote products 19 | derived from this software without specific prior written 20 | permission. 21 | 22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 23 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 26 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 | POSSIBILITY OF SUCH DAMAGE. 33 | -------------------------------------------------------------------------------- /enum/README: -------------------------------------------------------------------------------- 1 | enum34 is the new Python stdlib enum module available in Python 3.4 2 | backported for previous versions of Python from 2.4 to 3.3. 3 | tested on 2.6, 2.7, and 3.3+ 4 | -------------------------------------------------------------------------------- /enum/doc/enum.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/future-architect/Sublime-uroboroSQL-formatter/7b1c1d81d377a8d341847247020173d1861b6a8f/enum/doc/enum.pdf -------------------------------------------------------------------------------- /enum/doc/enum.rst: -------------------------------------------------------------------------------- 1 | ``enum`` --- support for enumerations 2 | ======================================== 3 | 4 | .. :synopsis: enumerations are sets of symbolic names bound to unique, constant 5 | values. 6 | .. :moduleauthor:: Ethan Furman 7 | .. :sectionauthor:: Barry Warsaw , 8 | .. :sectionauthor:: Eli Bendersky , 9 | .. :sectionauthor:: Ethan Furman 10 | 11 | ---------------- 12 | 13 | An enumeration is a set of symbolic names (members) bound to unique, constant 14 | values. Within an enumeration, the members can be compared by identity, and 15 | the enumeration itself can be iterated over. 16 | 17 | 18 | Module Contents 19 | --------------- 20 | 21 | This module defines two enumeration classes that can be used to define unique 22 | sets of names and values: ``Enum`` and ``IntEnum``. It also defines 23 | one decorator, ``unique``. 24 | 25 | ``Enum`` 26 | 27 | Base class for creating enumerated constants. See section `Functional API`_ 28 | for an alternate construction syntax. 29 | 30 | ``IntEnum`` 31 | 32 | Base class for creating enumerated constants that are also subclasses of ``int``. 33 | 34 | ``unique`` 35 | 36 | Enum class decorator that ensures only one name is bound to any one value. 37 | 38 | 39 | Creating an Enum 40 | ---------------- 41 | 42 | Enumerations are created using the ``class`` syntax, which makes them 43 | easy to read and write. An alternative creation method is described in 44 | `Functional API`_. To define an enumeration, subclass ``Enum`` as 45 | follows:: 46 | 47 | >>> from enum import Enum 48 | >>> class Color(Enum): 49 | ... red = 1 50 | ... green = 2 51 | ... blue = 3 52 | 53 | Note: Nomenclature 54 | 55 | - The class ``Color`` is an *enumeration* (or *enum*) 56 | - The attributes ``Color.red``, ``Color.green``, etc., are 57 | *enumeration members* (or *enum members*). 58 | - The enum members have *names* and *values* (the name of 59 | ``Color.red`` is ``red``, the value of ``Color.blue`` is 60 | ``3``, etc.) 61 | 62 | Note: 63 | 64 | Even though we use the ``class`` syntax to create Enums, Enums 65 | are not normal Python classes. See `How are Enums different?`_ for 66 | more details. 67 | 68 | Enumeration members have human readable string representations:: 69 | 70 | >>> print(Color.red) 71 | Color.red 72 | 73 | ...while their ``repr`` has more information:: 74 | 75 | >>> print(repr(Color.red)) 76 | 77 | 78 | The *type* of an enumeration member is the enumeration it belongs to:: 79 | 80 | >>> type(Color.red) 81 | 82 | >>> isinstance(Color.green, Color) 83 | True 84 | >>> 85 | 86 | Enum members also have a property that contains just their item name:: 87 | 88 | >>> print(Color.red.name) 89 | red 90 | 91 | Enumerations support iteration. In Python 3.x definition order is used; in 92 | Python 2.x the definition order is not available, but class attribute 93 | ``__order__`` is supported; otherwise, value order is used:: 94 | 95 | >>> class Shake(Enum): 96 | ... __order__ = 'vanilla chocolate cookies mint' # only needed in 2.x 97 | ... vanilla = 7 98 | ... chocolate = 4 99 | ... cookies = 9 100 | ... mint = 3 101 | ... 102 | >>> for shake in Shake: 103 | ... print(shake) 104 | ... 105 | Shake.vanilla 106 | Shake.chocolate 107 | Shake.cookies 108 | Shake.mint 109 | 110 | The ``__order__`` attribute is always removed, and in 3.x it is also ignored 111 | (order is definition order); however, in the stdlib version it will be ignored 112 | but not removed. 113 | 114 | Enumeration members are hashable, so they can be used in dictionaries and sets:: 115 | 116 | >>> apples = {} 117 | >>> apples[Color.red] = 'red delicious' 118 | >>> apples[Color.green] = 'granny smith' 119 | >>> apples == {Color.red: 'red delicious', Color.green: 'granny smith'} 120 | True 121 | 122 | 123 | Programmatic access to enumeration members and their attributes 124 | --------------------------------------------------------------- 125 | 126 | Sometimes it's useful to access members in enumerations programmatically (i.e. 127 | situations where ``Color.red`` won't do because the exact color is not known 128 | at program-writing time). ``Enum`` allows such access:: 129 | 130 | >>> Color(1) 131 | 132 | >>> Color(3) 133 | 134 | 135 | If you want to access enum members by *name*, use item access:: 136 | 137 | >>> Color['red'] 138 | 139 | >>> Color['green'] 140 | 141 | 142 | If have an enum member and need its ``name`` or ``value``:: 143 | 144 | >>> member = Color.red 145 | >>> member.name 146 | 'red' 147 | >>> member.value 148 | 1 149 | 150 | 151 | Duplicating enum members and values 152 | ----------------------------------- 153 | 154 | Having two enum members (or any other attribute) with the same name is invalid; 155 | in Python 3.x this would raise an error, but in Python 2.x the second member 156 | simply overwrites the first:: 157 | 158 | >>> # python 2.x 159 | >>> class Shape(Enum): 160 | ... square = 2 161 | ... square = 3 162 | ... 163 | >>> Shape.square 164 | 165 | 166 | >>> # python 3.x 167 | >>> class Shape(Enum): 168 | ... square = 2 169 | ... square = 3 170 | Traceback (most recent call last): 171 | ... 172 | TypeError: Attempted to reuse key: 'square' 173 | 174 | However, two enum members are allowed to have the same value. Given two members 175 | A and B with the same value (and A defined first), B is an alias to A. By-value 176 | lookup of the value of A and B will return A. By-name lookup of B will also 177 | return A:: 178 | 179 | >>> class Shape(Enum): 180 | ... __order__ = 'square diamond circle alias_for_square' # only needed in 2.x 181 | ... square = 2 182 | ... diamond = 1 183 | ... circle = 3 184 | ... alias_for_square = 2 185 | ... 186 | >>> Shape.square 187 | 188 | >>> Shape.alias_for_square 189 | 190 | >>> Shape(2) 191 | 192 | 193 | 194 | Allowing aliases is not always desirable. ``unique`` can be used to ensure 195 | that none exist in a particular enumeration:: 196 | 197 | >>> from enum import unique 198 | >>> @unique 199 | ... class Mistake(Enum): 200 | ... __order__ = 'one two three four' # only needed in 2.x 201 | ... one = 1 202 | ... two = 2 203 | ... three = 3 204 | ... four = 3 205 | Traceback (most recent call last): 206 | ... 207 | ValueError: duplicate names found in : four -> three 208 | 209 | Iterating over the members of an enum does not provide the aliases:: 210 | 211 | >>> list(Shape) 212 | [, , ] 213 | 214 | The special attribute ``__members__`` is a dictionary mapping names to members. 215 | It includes all names defined in the enumeration, including the aliases:: 216 | 217 | >>> for name, member in sorted(Shape.__members__.items()): 218 | ... name, member 219 | ... 220 | ('alias_for_square', ) 221 | ('circle', ) 222 | ('diamond', ) 223 | ('square', ) 224 | 225 | The ``__members__`` attribute can be used for detailed programmatic access to 226 | the enumeration members. For example, finding all the aliases:: 227 | 228 | >>> [name for name, member in Shape.__members__.items() if member.name != name] 229 | ['alias_for_square'] 230 | 231 | Comparisons 232 | ----------- 233 | 234 | Enumeration members are compared by identity:: 235 | 236 | >>> Color.red is Color.red 237 | True 238 | >>> Color.red is Color.blue 239 | False 240 | >>> Color.red is not Color.blue 241 | True 242 | 243 | Ordered comparisons between enumeration values are *not* supported. Enum 244 | members are not integers (but see `IntEnum`_ below):: 245 | 246 | >>> Color.red < Color.blue 247 | Traceback (most recent call last): 248 | File "", line 1, in 249 | TypeError: unorderable types: Color() < Color() 250 | 251 | .. warning:: 252 | 253 | In Python 2 *everything* is ordered, even though the ordering may not 254 | make sense. If you want your enumerations to have a sensible ordering 255 | check out the `OrderedEnum`_ recipe below. 256 | 257 | 258 | Equality comparisons are defined though:: 259 | 260 | >>> Color.blue == Color.red 261 | False 262 | >>> Color.blue != Color.red 263 | True 264 | >>> Color.blue == Color.blue 265 | True 266 | 267 | Comparisons against non-enumeration values will always compare not equal 268 | (again, ``IntEnum`` was explicitly designed to behave differently, see 269 | below):: 270 | 271 | >>> Color.blue == 2 272 | False 273 | 274 | 275 | Allowed members and attributes of enumerations 276 | ---------------------------------------------- 277 | 278 | The examples above use integers for enumeration values. Using integers is 279 | short and handy (and provided by default by the `Functional API`_), but not 280 | strictly enforced. In the vast majority of use-cases, one doesn't care what 281 | the actual value of an enumeration is. But if the value *is* important, 282 | enumerations can have arbitrary values. 283 | 284 | Enumerations are Python classes, and can have methods and special methods as 285 | usual. If we have this enumeration:: 286 | 287 | >>> class Mood(Enum): 288 | ... funky = 1 289 | ... happy = 3 290 | ... 291 | ... def describe(self): 292 | ... # self is the member here 293 | ... return self.name, self.value 294 | ... 295 | ... def __str__(self): 296 | ... return 'my custom str! {0}'.format(self.value) 297 | ... 298 | ... @classmethod 299 | ... def favorite_mood(cls): 300 | ... # cls here is the enumeration 301 | ... return cls.happy 302 | 303 | Then:: 304 | 305 | >>> Mood.favorite_mood() 306 | 307 | >>> Mood.happy.describe() 308 | ('happy', 3) 309 | >>> str(Mood.funky) 310 | 'my custom str! 1' 311 | 312 | The rules for what is allowed are as follows: _sunder_ names (starting and 313 | ending with a single underscore) are reserved by enum and cannot be used; 314 | all other attributes defined within an enumeration will become members of this 315 | enumeration, with the exception of *__dunder__* names and descriptors (methods 316 | are also descriptors). 317 | 318 | Note: 319 | 320 | If your enumeration defines ``__new__`` and/or ``__init__`` then 321 | whatever value(s) were given to the enum member will be passed into 322 | those methods. See `Planet`_ for an example. 323 | 324 | 325 | Restricted subclassing of enumerations 326 | -------------------------------------- 327 | 328 | Subclassing an enumeration is allowed only if the enumeration does not define 329 | any members. So this is forbidden:: 330 | 331 | >>> class MoreColor(Color): 332 | ... pink = 17 333 | Traceback (most recent call last): 334 | ... 335 | TypeError: Cannot extend enumerations 336 | 337 | But this is allowed:: 338 | 339 | >>> class Foo(Enum): 340 | ... def some_behavior(self): 341 | ... pass 342 | ... 343 | >>> class Bar(Foo): 344 | ... happy = 1 345 | ... sad = 2 346 | ... 347 | 348 | Allowing subclassing of enums that define members would lead to a violation of 349 | some important invariants of types and instances. On the other hand, it makes 350 | sense to allow sharing some common behavior between a group of enumerations. 351 | (See `OrderedEnum`_ for an example.) 352 | 353 | 354 | Pickling 355 | -------- 356 | 357 | Enumerations can be pickled and unpickled:: 358 | 359 | >>> from enum.test_enum import Fruit 360 | >>> from pickle import dumps, loads 361 | >>> Fruit.tomato is loads(dumps(Fruit.tomato, 2)) 362 | True 363 | 364 | The usual restrictions for pickling apply: picklable enums must be defined in 365 | the top level of a module, since unpickling requires them to be importable 366 | from that module. 367 | 368 | Note: 369 | 370 | With pickle protocol version 4 (introduced in Python 3.4) it is possible 371 | to easily pickle enums nested in other classes. 372 | 373 | 374 | 375 | Functional API 376 | -------------- 377 | 378 | The ``Enum`` class is callable, providing the following functional API:: 379 | 380 | >>> Animal = Enum('Animal', 'ant bee cat dog') 381 | >>> Animal 382 | 383 | >>> Animal.ant 384 | 385 | >>> Animal.ant.value 386 | 1 387 | >>> list(Animal) 388 | [, , , ] 389 | 390 | The semantics of this API resemble ``namedtuple``. The first argument 391 | of the call to ``Enum`` is the name of the enumeration. 392 | 393 | The second argument is the *source* of enumeration member names. It can be a 394 | whitespace-separated string of names, a sequence of names, a sequence of 395 | 2-tuples with key/value pairs, or a mapping (e.g. dictionary) of names to 396 | values. The last two options enable assigning arbitrary values to 397 | enumerations; the others auto-assign increasing integers starting with 1. A 398 | new class derived from ``Enum`` is returned. In other words, the above 399 | assignment to ``Animal`` is equivalent to:: 400 | 401 | >>> class Animals(Enum): 402 | ... ant = 1 403 | ... bee = 2 404 | ... cat = 3 405 | ... dog = 4 406 | 407 | Pickling enums created with the functional API can be tricky as frame stack 408 | implementation details are used to try and figure out which module the 409 | enumeration is being created in (e.g. it will fail if you use a utility 410 | function in separate module, and also may not work on IronPython or Jython). 411 | The solution is to specify the module name explicitly as follows:: 412 | 413 | >>> Animals = Enum('Animals', 'ant bee cat dog', module=__name__) 414 | 415 | Derived Enumerations 416 | -------------------- 417 | 418 | IntEnum 419 | ^^^^^^^ 420 | 421 | A variation of ``Enum`` is provided which is also a subclass of 422 | ``int``. Members of an ``IntEnum`` can be compared to integers; 423 | by extension, integer enumerations of different types can also be compared 424 | to each other:: 425 | 426 | >>> from enum import IntEnum 427 | >>> class Shape(IntEnum): 428 | ... circle = 1 429 | ... square = 2 430 | ... 431 | >>> class Request(IntEnum): 432 | ... post = 1 433 | ... get = 2 434 | ... 435 | >>> Shape == 1 436 | False 437 | >>> Shape.circle == 1 438 | True 439 | >>> Shape.circle == Request.post 440 | True 441 | 442 | However, they still can't be compared to standard ``Enum`` enumerations:: 443 | 444 | >>> class Shape(IntEnum): 445 | ... circle = 1 446 | ... square = 2 447 | ... 448 | >>> class Color(Enum): 449 | ... red = 1 450 | ... green = 2 451 | ... 452 | >>> Shape.circle == Color.red 453 | False 454 | 455 | ``IntEnum`` values behave like integers in other ways you'd expect:: 456 | 457 | >>> int(Shape.circle) 458 | 1 459 | >>> ['a', 'b', 'c'][Shape.circle] 460 | 'b' 461 | >>> [i for i in range(Shape.square)] 462 | [0, 1] 463 | 464 | For the vast majority of code, ``Enum`` is strongly recommended, 465 | since ``IntEnum`` breaks some semantic promises of an enumeration (by 466 | being comparable to integers, and thus by transitivity to other 467 | unrelated enumerations). It should be used only in special cases where 468 | there's no other choice; for example, when integer constants are 469 | replaced with enumerations and backwards compatibility is required with code 470 | that still expects integers. 471 | 472 | 473 | Others 474 | ^^^^^^ 475 | 476 | While ``IntEnum`` is part of the ``enum`` module, it would be very 477 | simple to implement independently:: 478 | 479 | class IntEnum(int, Enum): 480 | pass 481 | 482 | This demonstrates how similar derived enumerations can be defined; for example 483 | a ``StrEnum`` that mixes in ``str`` instead of ``int``. 484 | 485 | Some rules: 486 | 487 | 1. When subclassing ``Enum``, mix-in types must appear before 488 | ``Enum`` itself in the sequence of bases, as in the ``IntEnum`` 489 | example above. 490 | 2. While ``Enum`` can have members of any type, once you mix in an 491 | additional type, all the members must have values of that type, e.g. 492 | ``int`` above. This restriction does not apply to mix-ins which only 493 | add methods and don't specify another data type such as ``int`` or 494 | ``str``. 495 | 3. When another data type is mixed in, the ``value`` attribute is *not the 496 | same* as the enum member itself, although it is equivalant and will compare 497 | equal. 498 | 4. %-style formatting: ``%s`` and ``%r`` call ``Enum``'s ``__str__`` and 499 | ``__repr__`` respectively; other codes (such as ``%i`` or ``%h`` for 500 | IntEnum) treat the enum member as its mixed-in type. 501 | 502 | Note: Prior to Python 3.4 there is a bug in ``str``'s %-formatting: ``int`` 503 | subclasses are printed as strings and not numbers when the ``%d``, ``%i``, 504 | or ``%u`` codes are used. 505 | 5. ``str.__format__`` (or ``format``) will use the mixed-in 506 | type's ``__format__``. If the ``Enum``'s ``str`` or 507 | ``repr`` is desired use the ``!s`` or ``!r`` ``str`` format codes. 508 | 509 | 510 | Decorators 511 | ---------- 512 | 513 | unique 514 | ^^^^^^ 515 | 516 | A ``class`` decorator specifically for enumerations. It searches an 517 | enumeration's ``__members__`` gathering any aliases it finds; if any are 518 | found ``ValueError`` is raised with the details:: 519 | 520 | >>> @unique 521 | ... class NoDupes(Enum): 522 | ... first = 'one' 523 | ... second = 'two' 524 | ... third = 'two' 525 | Traceback (most recent call last): 526 | ... 527 | ValueError: duplicate names found in : third -> second 528 | 529 | 530 | Interesting examples 531 | -------------------- 532 | 533 | While ``Enum`` and ``IntEnum`` are expected to cover the majority of 534 | use-cases, they cannot cover them all. Here are recipes for some different 535 | types of enumerations that can be used directly, or as examples for creating 536 | one's own. 537 | 538 | 539 | AutoNumber 540 | ^^^^^^^^^^ 541 | 542 | Avoids having to specify the value for each enumeration member:: 543 | 544 | >>> class AutoNumber(Enum): 545 | ... def __new__(cls): 546 | ... value = len(cls.__members__) + 1 547 | ... obj = object.__new__(cls) 548 | ... obj._value_ = value 549 | ... return obj 550 | ... 551 | >>> class Color(AutoNumber): 552 | ... __order__ = "red green blue" # only needed in 2.x 553 | ... red = () 554 | ... green = () 555 | ... blue = () 556 | ... 557 | >>> Color.green.value == 2 558 | True 559 | 560 | Note: 561 | 562 | The `__new__` method, if defined, is used during creation of the Enum 563 | members; it is then replaced by Enum's `__new__` which is used after 564 | class creation for lookup of existing members. Due to the way Enums are 565 | supposed to behave, there is no way to customize Enum's `__new__`. 566 | 567 | 568 | UniqueEnum 569 | ^^^^^^^^^^ 570 | 571 | Raises an error if a duplicate member name is found instead of creating an 572 | alias:: 573 | 574 | >>> class UniqueEnum(Enum): 575 | ... def __init__(self, *args): 576 | ... cls = self.__class__ 577 | ... if any(self.value == e.value for e in cls): 578 | ... a = self.name 579 | ... e = cls(self.value).name 580 | ... raise ValueError( 581 | ... "aliases not allowed in UniqueEnum: %r --> %r" 582 | ... % (a, e)) 583 | ... 584 | >>> class Color(UniqueEnum): 585 | ... red = 1 586 | ... green = 2 587 | ... blue = 3 588 | ... grene = 2 589 | Traceback (most recent call last): 590 | ... 591 | ValueError: aliases not allowed in UniqueEnum: 'grene' --> 'green' 592 | 593 | 594 | OrderedEnum 595 | ^^^^^^^^^^^ 596 | 597 | An ordered enumeration that is not based on ``IntEnum`` and so maintains 598 | the normal ``Enum`` invariants (such as not being comparable to other 599 | enumerations):: 600 | 601 | >>> class OrderedEnum(Enum): 602 | ... def __ge__(self, other): 603 | ... if self.__class__ is other.__class__: 604 | ... return self._value_ >= other._value_ 605 | ... return NotImplemented 606 | ... def __gt__(self, other): 607 | ... if self.__class__ is other.__class__: 608 | ... return self._value_ > other._value_ 609 | ... return NotImplemented 610 | ... def __le__(self, other): 611 | ... if self.__class__ is other.__class__: 612 | ... return self._value_ <= other._value_ 613 | ... return NotImplemented 614 | ... def __lt__(self, other): 615 | ... if self.__class__ is other.__class__: 616 | ... return self._value_ < other._value_ 617 | ... return NotImplemented 618 | ... 619 | >>> class Grade(OrderedEnum): 620 | ... __ordered__ = 'A B C D F' 621 | ... A = 5 622 | ... B = 4 623 | ... C = 3 624 | ... D = 2 625 | ... F = 1 626 | ... 627 | >>> Grade.C < Grade.A 628 | True 629 | 630 | 631 | Planet 632 | ^^^^^^ 633 | 634 | If ``__new__`` or ``__init__`` is defined the value of the enum member 635 | will be passed to those methods:: 636 | 637 | >>> class Planet(Enum): 638 | ... MERCURY = (3.303e+23, 2.4397e6) 639 | ... VENUS = (4.869e+24, 6.0518e6) 640 | ... EARTH = (5.976e+24, 6.37814e6) 641 | ... MARS = (6.421e+23, 3.3972e6) 642 | ... JUPITER = (1.9e+27, 7.1492e7) 643 | ... SATURN = (5.688e+26, 6.0268e7) 644 | ... URANUS = (8.686e+25, 2.5559e7) 645 | ... NEPTUNE = (1.024e+26, 2.4746e7) 646 | ... def __init__(self, mass, radius): 647 | ... self.mass = mass # in kilograms 648 | ... self.radius = radius # in meters 649 | ... @property 650 | ... def surface_gravity(self): 651 | ... # universal gravitational constant (m3 kg-1 s-2) 652 | ... G = 6.67300E-11 653 | ... return G * self.mass / (self.radius * self.radius) 654 | ... 655 | >>> Planet.EARTH.value 656 | (5.976e+24, 6378140.0) 657 | >>> Planet.EARTH.surface_gravity 658 | 9.802652743337129 659 | 660 | 661 | How are Enums different? 662 | ------------------------ 663 | 664 | Enums have a custom metaclass that affects many aspects of both derived Enum 665 | classes and their instances (members). 666 | 667 | 668 | Enum Classes 669 | ^^^^^^^^^^^^ 670 | 671 | The ``EnumMeta`` metaclass is responsible for providing the 672 | ``__contains__``, ``__dir__``, ``__iter__`` and other methods that 673 | allow one to do things with an ``Enum`` class that fail on a typical 674 | class, such as ``list(Color)`` or ``some_var in Color``. ``EnumMeta`` is 675 | responsible for ensuring that various other methods on the final ``Enum`` 676 | class are correct (such as ``__new__``, ``__getnewargs__``, 677 | ``__str__`` and ``__repr__``). 678 | 679 | .. note:: 680 | 681 | ``__dir__`` is not changed in the Python 2 line as it messes up some 682 | of the decorators included in the stdlib. 683 | 684 | 685 | Enum Members (aka instances) 686 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 687 | 688 | The most interesting thing about Enum members is that they are singletons. 689 | ``EnumMeta`` creates them all while it is creating the ``Enum`` 690 | class itself, and then puts a custom ``__new__`` in place to ensure 691 | that no new ones are ever instantiated by returning only the existing 692 | member instances. 693 | 694 | 695 | Finer Points 696 | ^^^^^^^^^^^^ 697 | 698 | ``Enum`` members are instances of an ``Enum`` class, and even though they 699 | are accessible as `EnumClass.member1.member2`, they should not be 700 | accessed directly from the member as that lookup may fail or, worse, 701 | return something besides the ``Enum`` member you were looking for 702 | (changed in version 1.1.1):: 703 | 704 | >>> class FieldTypes(Enum): 705 | ... name = 1 706 | ... value = 2 707 | ... size = 3 708 | ... 709 | >>> FieldTypes.value.size 710 | 711 | >>> FieldTypes.size.value 712 | 3 713 | 714 | The ``__members__`` attribute is only available on the class. 715 | 716 | In Python 3.x ``__members__`` is always an ``OrderedDict``, with the order being 717 | the definition order. In Python 2.7 ``__members__`` is an ``OrderedDict`` if 718 | ``__order__`` was specified, and a plain ``dict`` otherwise. In all other Python 719 | 2.x versions ``__members__`` is a plain ``dict`` even if ``__order__`` was specified 720 | as the ``OrderedDict`` type didn't exist yet. 721 | 722 | If you give your ``Enum`` subclass extra methods, like the `Planet`_ 723 | class above, those methods will show up in a `dir` of the member, 724 | but not of the class:: 725 | 726 | >>> dir(Planet) 727 | ['EARTH', 'JUPITER', 'MARS', 'MERCURY', 'NEPTUNE', 'SATURN', 'URANUS', 728 | 'VENUS', '__class__', '__doc__', '__members__', '__module__'] 729 | >>> dir(Planet.EARTH) 730 | ['__class__', '__doc__', '__module__', 'name', 'surface_gravity', 'value'] 731 | 732 | A ``__new__`` method will only be used for the creation of the 733 | ``Enum`` members -- after that it is replaced. This means if you wish to 734 | change how ``Enum`` members are looked up you either have to write a 735 | helper function or a ``classmethod``. 736 | -------------------------------------------------------------------------------- /sqlparse/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com 2 | # 3 | # This module is part of python-sqlparse and is released under 4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php. 5 | 6 | """Parse SQL statements.""" 7 | 8 | 9 | __version__ = '0.1.19' 10 | 11 | 12 | # Setup namespace 13 | from sqlparse import engine 14 | from sqlparse import filters 15 | from sqlparse import formatter 16 | 17 | # Deprecated in 0.1.5. Will be removed in 0.2.0 18 | from sqlparse.exceptions import SQLParseError 19 | 20 | 21 | def parse(sql, encoding=None): 22 | """Parse sql and return a list of statements. 23 | 24 | :param sql: A string containting one or more SQL statements. 25 | :param encoding: The encoding of the statement (optional). 26 | :returns: A tuple of :class:`~sqlparse.sql.Statement` instances. 27 | """ 28 | return tuple(parsestream(sql, encoding)) 29 | 30 | 31 | def parsestream(stream, encoding=None): 32 | """Parses sql statements from file-like object. 33 | 34 | :param stream: A file-like object. 35 | :param encoding: The encoding of the stream contents (optional). 36 | :returns: A generator of :class:`~sqlparse.sql.Statement` instances. 37 | """ 38 | stack = engine.FilterStack() 39 | stack.full_analyze() 40 | return stack.run(stream, encoding) 41 | 42 | 43 | def format(sql, **options): 44 | """Format *sql* according to *options*. 45 | 46 | Available options are documented in :ref:`formatting`. 47 | 48 | In addition to the formatting options this function accepts the 49 | keyword "encoding" which determines the encoding of the statement. 50 | 51 | :returns: The formatted SQL statement as string. 52 | """ 53 | encoding = options.pop('encoding', None) 54 | stack = engine.FilterStack() 55 | options = formatter.validate_options(options) 56 | stack = formatter.build_filter_stack(stack, options) 57 | stack.postprocess.append(filters.SerializerUnicode()) 58 | return ''.join(stack.run(sql, encoding)) 59 | 60 | 61 | def split(sql, encoding=None): 62 | """Split *sql* into single statements. 63 | 64 | :param sql: A string containting one or more SQL statements. 65 | :param encoding: The encoding of the statement (optional). 66 | :returns: A list of strings. 67 | """ 68 | stack = engine.FilterStack() 69 | stack.split_statements = True 70 | return [str(stmt).strip() for stmt in stack.run(sql, encoding)] 71 | 72 | 73 | from sqlparse.engine.filter import StatementFilter 74 | 75 | 76 | def split2(stream): 77 | splitter = StatementFilter() 78 | return list(splitter.process(None, stream)) 79 | -------------------------------------------------------------------------------- /sqlparse/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com 2 | # 3 | # This module is part of python-sqlparse and is released under 4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php. 5 | 6 | """filter""" 7 | 8 | from sqlparse import lexer 9 | from sqlparse.engine import grouping 10 | from sqlparse.engine.filter import StatementFilter 11 | 12 | # XXX remove this when cleanup is complete 13 | Filter = object 14 | 15 | 16 | class FilterStack(object): 17 | 18 | def __init__(self): 19 | self.preprocess = [] 20 | self.stmtprocess = [] 21 | self.postprocess = [] 22 | self.split_statements = False 23 | self._grouping = False 24 | 25 | def _flatten(self, stream): 26 | for token in stream: 27 | if token.is_group(): 28 | for t in self._flatten(token.tokens): 29 | yield t 30 | else: 31 | yield token 32 | 33 | def enable_grouping(self): 34 | self._grouping = True 35 | 36 | def full_analyze(self): 37 | self.enable_grouping() 38 | 39 | def run(self, sql, encoding=None): 40 | stream = lexer.tokenize(sql, encoding) 41 | # Process token stream 42 | if self.preprocess: 43 | for filter_ in self.preprocess: 44 | stream = filter_.process(self, stream) 45 | 46 | if (self.stmtprocess or self.postprocess or self.split_statements 47 | or self._grouping): 48 | splitter = StatementFilter() 49 | stream = splitter.process(self, stream) 50 | 51 | if self._grouping: 52 | 53 | def _group(stream): 54 | for stmt in stream: 55 | grouping.group(stmt) 56 | yield stmt 57 | stream = _group(stream) 58 | 59 | if self.stmtprocess: 60 | 61 | def _run1(stream): 62 | ret = [] 63 | for stmt in stream: 64 | for filter_ in self.stmtprocess: 65 | filter_.process(self, stmt) 66 | ret.append(stmt) 67 | return ret 68 | stream = _run1(stream) 69 | 70 | if self.postprocess: 71 | 72 | def _run2(stream): 73 | for stmt in stream: 74 | stmt.tokens = list(self._flatten(stmt.tokens)) 75 | for filter_ in self.postprocess: 76 | stmt = filter_.process(self, stmt) 77 | yield stmt 78 | stream = _run2(stream) 79 | 80 | return stream 81 | -------------------------------------------------------------------------------- /sqlparse/engine/filter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from sqlparse.sql import Statement, Token 4 | from sqlparse import tokens as T 5 | 6 | 7 | class StatementFilter: 8 | "Filter that split stream at individual statements" 9 | 10 | def __init__(self): 11 | self._in_declare = False 12 | self._in_dbldollar = False 13 | self._is_create = False 14 | self._begin_depth = 0 15 | 16 | def _reset(self): 17 | "Set the filter attributes to its default values" 18 | self._in_declare = False 19 | self._in_dbldollar = False 20 | self._is_create = False 21 | self._begin_depth = 0 22 | 23 | def _change_splitlevel(self, ttype, value): 24 | "Get the new split level (increase, decrease or remain equal)" 25 | # PostgreSQL 26 | if (ttype == T.Name.Builtin 27 | and value.startswith('$') and value.endswith('$')): 28 | if self._in_dbldollar: 29 | self._in_dbldollar = False 30 | return -1 31 | else: 32 | self._in_dbldollar = True 33 | return 1 34 | elif self._in_dbldollar: 35 | return 0 36 | 37 | # ANSI 38 | if ttype not in T.Keyword: 39 | return 0 40 | 41 | unified = value.upper() 42 | 43 | if unified == 'DECLARE' and self._is_create and self._begin_depth == 0: 44 | self._in_declare = True 45 | return 1 46 | 47 | if unified == 'BEGIN': 48 | self._begin_depth += 1 49 | if self._in_declare or self._is_create: 50 | # FIXME(andi): This makes no sense. 51 | return 1 52 | return 0 53 | 54 | if unified in ('END IF', 'END FOR'): 55 | return -1 56 | 57 | if unified == 'END': 58 | # Should this respect a preceeding BEGIN? 59 | # In CASE ... WHEN ... END this results in a split level -1. 60 | self._begin_depth = max(0, self._begin_depth - 1) 61 | return -1 62 | 63 | if ttype is T.Keyword.DDL and unified.startswith('CREATE'): 64 | self._is_create = True 65 | return 0 66 | 67 | if (unified in ('IF', 'FOR') 68 | and self._is_create and self._begin_depth > 0): 69 | return 1 70 | 71 | # Default 72 | return 0 73 | 74 | def process(self, stack, stream): 75 | "Process the stream" 76 | consume_ws = False 77 | splitlevel = 0 78 | stmt = None 79 | stmt_tokens = [] 80 | 81 | # Run over all stream tokens 82 | for ttype, value in stream: 83 | # Yield token if we finished a statement and there's no whitespaces 84 | if consume_ws and ttype not in (T.Whitespace, T.Comment.Single): 85 | stmt.tokens = stmt_tokens 86 | yield stmt 87 | 88 | # Reset filter and prepare to process next statement 89 | self._reset() 90 | consume_ws = False 91 | splitlevel = 0 92 | stmt = None 93 | 94 | # Create a new statement if we are not currently in one of them 95 | if stmt is None: 96 | stmt = Statement() 97 | stmt_tokens = [] 98 | 99 | # Change current split level (increase, decrease or remain equal) 100 | splitlevel += self._change_splitlevel(ttype, value) 101 | 102 | # Append the token to the current statement 103 | stmt_tokens.append(Token(ttype, value)) 104 | 105 | # Check if we get the end of a statement 106 | if splitlevel <= 0 and ttype is T.Punctuation and value == ';': 107 | consume_ws = True 108 | 109 | # Yield pending statement (if any) 110 | if stmt is not None: 111 | stmt.tokens = stmt_tokens 112 | yield stmt 113 | -------------------------------------------------------------------------------- /sqlparse/engine/grouping.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import itertools 4 | 5 | from sqlparse import sql 6 | from sqlparse import tokens as T 7 | import sys 8 | 9 | try: 10 | next 11 | except NameError: # Python < 2.6 12 | next = lambda i: i.__next__() 13 | 14 | 15 | def _group_left_right(tlist, ttype, value, cls, 16 | check_right=lambda t: True, 17 | check_left=lambda t: True, 18 | include_semicolon=False): 19 | [_group_left_right(sgroup, ttype, value, cls, check_right, check_left, 20 | include_semicolon) for sgroup in tlist.get_sublists() 21 | if not isinstance(sgroup, cls)] 22 | idx = 0 23 | token = tlist.token_next_match(idx, ttype, value) 24 | while token: 25 | right = tlist.token_next(tlist.token_index(token)) 26 | left = tlist.token_prev(tlist.token_index(token)) 27 | if right is None or not check_right(right): 28 | token = tlist.token_next_match(tlist.token_index(token) + 1, 29 | ttype, value) 30 | elif left is None or not check_left(left): 31 | token = tlist.token_next_match(tlist.token_index(token) + 1, 32 | ttype, value) 33 | else: 34 | if include_semicolon: 35 | sright = tlist.token_next_match(tlist.token_index(right), 36 | T.Punctuation, ';') 37 | if sright is not None: 38 | # only overwrite "right" if a semicolon is actually 39 | # present. 40 | right = sright 41 | tokens = tlist.tokens_between(left, right)[1:] 42 | if not isinstance(left, cls): 43 | new = cls([left]) 44 | new_idx = tlist.token_index(left) 45 | tlist.tokens.remove(left) 46 | tlist.tokens.insert(new_idx, new) 47 | left = new 48 | left.tokens.extend(tokens) 49 | for t in tokens: 50 | tlist.tokens.remove(t) 51 | token = tlist.token_next_match(tlist.token_index(left) + 1, 52 | ttype, value) 53 | 54 | 55 | def _find_matching(idx, tlist, start_ttype, start_value, end_ttype, end_value): 56 | depth = 1 57 | for tok in tlist.tokens[idx:]: 58 | if tok.match(start_ttype, start_value): 59 | depth += 1 60 | elif tok.match(end_ttype, end_value): 61 | depth -= 1 62 | if depth == 1: 63 | return tok 64 | return None 65 | 66 | 67 | def _group_matching(tlist, start_ttype, start_value, end_ttype, end_value, 68 | cls, include_semicolon=False, recurse=False): 69 | 70 | #bugfix recurse 71 | # [_group_matching(sgroup, start_ttype, start_value, end_ttype, end_value, 72 | # cls, include_semicolon) for sgroup in tlist.get_sublists() 73 | [_group_matching(sgroup, start_ttype, start_value, end_ttype, end_value, 74 | cls, include_semicolon, recurse) for sgroup in tlist.get_sublists() 75 | if recurse] 76 | if isinstance(tlist, cls): 77 | idx = 1 78 | else: 79 | idx = 0 80 | token = tlist.token_next_match(idx, start_ttype, start_value) 81 | while token: 82 | tidx = tlist.token_index(token) 83 | end = _find_matching(tidx, tlist, start_ttype, start_value, 84 | end_ttype, end_value) 85 | if end is None: 86 | idx = tidx + 1 87 | else: 88 | if include_semicolon: 89 | next_ = tlist.token_next(tlist.token_index(end)) 90 | if next_ and next_.match(T.Punctuation, ';'): 91 | end = next_ 92 | group = tlist.group_tokens(cls, tlist.tokens_between(token, end)) 93 | _group_matching(group, start_ttype, start_value, 94 | end_ttype, end_value, cls, include_semicolon) 95 | idx = tlist.token_index(group) + 1 96 | token = tlist.token_next_match(idx, start_ttype, start_value) 97 | 98 | 99 | def group_if(tlist): 100 | _group_matching(tlist, T.Keyword, 'IF', T.Keyword, 'END IF', sql.If, True) 101 | 102 | 103 | def group_for(tlist): 104 | _group_matching(tlist, T.Keyword, 'FOR', T.Keyword, 'END LOOP', 105 | sql.For, True) 106 | 107 | 108 | def group_foreach(tlist): 109 | _group_matching(tlist, T.Keyword, 'FOREACH', T.Keyword, 'END LOOP', 110 | sql.For, True) 111 | 112 | 113 | def group_begin(tlist): 114 | _group_matching(tlist, T.Keyword, 'BEGIN', T.Keyword, 'END', 115 | sql.Begin, True) 116 | 117 | 118 | def group_as(tlist): 119 | 120 | def _right_valid(token): 121 | # Currently limited to DML/DDL. Maybe additional more non SQL reserved 122 | # keywords should appear here (see issue8). 123 | return not token.ttype in (T.DML, T.DDL) 124 | 125 | def _left_valid(token): 126 | if token.ttype is T.Keyword and token.value in ('NULL',): 127 | return True 128 | return token.ttype is not T.Keyword 129 | 130 | _group_left_right(tlist, T.Keyword, 'AS', sql.Identifier, 131 | check_right=_right_valid, 132 | check_left=_left_valid) 133 | 134 | 135 | def group_assignment(tlist): 136 | _group_left_right(tlist, T.Assignment, ':=', sql.Assignment, 137 | include_semicolon=True) 138 | 139 | 140 | def group_comparison(tlist): 141 | 142 | def _parts_valid(token): 143 | return (token.ttype in (T.String.Symbol, T.String.Single, 144 | T.Name, T.Number, T.Number.Float, 145 | T.Number.Integer, T.Literal, 146 | T.Literal.Number.Integer, T.Name.Placeholder) 147 | or isinstance(token, (sql.Identifier, sql.Parenthesis)) 148 | or (token.ttype is T.Keyword 149 | and token.value.upper() in ['NULL', ])) 150 | _group_left_right(tlist, T.Operator.Comparison, None, sql.Comparison, 151 | check_left=_parts_valid, check_right=_parts_valid) 152 | 153 | 154 | def group_case(tlist): 155 | _group_matching(tlist, T.Keyword, 'CASE', T.Keyword, 'END', sql.Case, 156 | include_semicolon=True, recurse=True) 157 | 158 | 159 | def group_identifier(tlist): 160 | def _consume_cycle(tl, i): 161 | # TODO: Usage of Wildcard token is ambivalent here. 162 | x = itertools.cycle(( 163 | lambda y: (y.match(T.Punctuation, '.') 164 | or y.ttype in (T.Operator, 165 | T.Wildcard, 166 | T.Name) 167 | or isinstance(y, sql.SquareBrackets)), 168 | lambda y: (y.ttype in (T.String.Symbol, 169 | T.Name, 170 | T.Wildcard, 171 | T.Literal.String.Single, 172 | T.Literal.Number.Integer, 173 | T.Literal.Number.Float) 174 | or isinstance(y, (sql.Parenthesis, 175 | sql.SquareBrackets, 176 | sql.Function))))) 177 | for t in tl.tokens[i:]: 178 | # Don't take whitespaces into account. 179 | if t.ttype is T.Whitespace: 180 | yield t 181 | continue 182 | if next(x)(t): 183 | yield t 184 | else: 185 | if isinstance(t, sql.Comment) and t.is_multiline(): 186 | yield t 187 | return 188 | 189 | def _next_token(tl, i): 190 | # chooses the next token. if two tokens are found then the 191 | # first is returned. 192 | t1 = tl.token_next_by_type( 193 | i, (T.String.Symbol, T.Name, T.Literal.Number.Integer, 194 | T.Literal.Number.Float)) 195 | 196 | i1 = tl.token_index(t1, start=i) if t1 else None 197 | t2_end = None if i1 is None else i1 + 1 198 | t2 = tl.token_next_by_instance(i, (sql.Function, sql.Parenthesis), end=t2_end) 199 | 200 | if t1 and t2: 201 | i2 = tl.token_index(t2, start=i) 202 | if i1 > i2: 203 | return t2 204 | else: 205 | return t1 206 | elif t1: 207 | return t1 208 | else: 209 | return t2 210 | 211 | # bottom up approach: group subgroups first 212 | [group_identifier(sgroup) for sgroup in tlist.get_sublists() 213 | if not isinstance(sgroup, sql.Identifier)] 214 | 215 | # real processing 216 | idx = 0 217 | token = _next_token(tlist, idx) 218 | while token: 219 | identifier_tokens = [token] + list( 220 | _consume_cycle(tlist, 221 | tlist.token_index(token, start=idx) + 1)) 222 | # remove trailing whitespace 223 | if identifier_tokens and identifier_tokens[-1].ttype is T.Whitespace: 224 | identifier_tokens = identifier_tokens[:-1] 225 | if not (len(identifier_tokens) == 1 226 | and (isinstance(identifier_tokens[0], (sql.Function, sql.Parenthesis)) 227 | or identifier_tokens[0].ttype in (T.Literal.Number.Integer, 228 | T.Literal.Number.Float))): 229 | group = tlist.group_tokens(sql.Identifier, identifier_tokens) 230 | idx = tlist.token_index(group, start=idx) + 1 231 | else: 232 | idx += 1 233 | token = _next_token(tlist, idx) 234 | 235 | 236 | def group_identifier_list(tlist): 237 | [group_identifier_list(sgroup) for sgroup in tlist.get_sublists() 238 | if not isinstance(sgroup, sql.IdentifierList)] 239 | # Allowed list items 240 | fend1_funcs = [lambda t: isinstance(t, (sql.Identifier, sql.Function, 241 | sql.Case)), 242 | lambda t: t.is_whitespace(), 243 | lambda t: t.ttype == T.Name, 244 | lambda t: t.ttype == T.Wildcard, 245 | lambda t: t.match(T.Keyword, 'null'), 246 | lambda t: t.match(T.Keyword, 'role'), 247 | lambda t: t.ttype == T.Number.Integer, 248 | lambda t: t.ttype == T.String.Single, 249 | lambda t: t.ttype == T.Name.Placeholder, 250 | lambda t: t.ttype == T.Keyword, 251 | lambda t: isinstance(t, sql.Comparison), 252 | lambda t: isinstance(t, sql.Comment), 253 | lambda t: t.ttype == T.Comment.Multiline, 254 | ] 255 | tcomma = tlist.token_next_match(0, T.Punctuation, ',') 256 | start = None 257 | while tcomma is not None: 258 | # Go back one idx to make sure to find the correct tcomma 259 | idx = tlist.token_index(tcomma) 260 | before = tlist.token_prev(idx) 261 | after = tlist.token_next(idx) 262 | # Check if the tokens around tcomma belong to a list 263 | bpassed = apassed = False 264 | for func in fend1_funcs: 265 | if before is not None and func(before): 266 | bpassed = True 267 | if after is not None and func(after): 268 | apassed = True 269 | if not bpassed or not apassed: 270 | # Something's wrong here, skip ahead to next "," 271 | start = None 272 | tcomma = tlist.token_next_match(idx + 1, 273 | T.Punctuation, ',') 274 | else: 275 | if start is None: 276 | start = before 277 | after_idx = tlist.token_index(after, start=idx) 278 | next_ = tlist.token_next(after_idx) 279 | if next_ is None or not next_.match(T.Punctuation, ','): 280 | # Reached the end of the list 281 | tokens = tlist.tokens_between(start, after) 282 | group = tlist.group_tokens(sql.IdentifierList, tokens) 283 | start = None 284 | tcomma = tlist.token_next_match(tlist.token_index(group) + 1, 285 | T.Punctuation, ',') 286 | else: 287 | tcomma = next_ 288 | 289 | 290 | def group_brackets(tlist): 291 | """Group parentheses () or square brackets [] 292 | 293 | This is just like _group_matching, but complicated by the fact that 294 | round brackets can contain square bracket groups and vice versa 295 | """ 296 | 297 | if isinstance(tlist, (sql.Parenthesis, sql.SquareBrackets)): 298 | idx = 1 299 | else: 300 | idx = 0 301 | 302 | # Find the first opening bracket 303 | token = tlist.token_next_match(idx, T.Punctuation, ['(', '[']) 304 | 305 | while token: 306 | start_val = token.value # either '(' or '[' 307 | if start_val == '(': 308 | end_val = ')' 309 | group_class = sql.Parenthesis 310 | else: 311 | end_val = ']' 312 | group_class = sql.SquareBrackets 313 | 314 | tidx = tlist.token_index(token) 315 | 316 | # Find the corresponding closing bracket 317 | end = _find_matching(tidx, tlist, T.Punctuation, start_val, 318 | T.Punctuation, end_val) 319 | 320 | if end is None: 321 | idx = tidx + 1 322 | else: 323 | group = tlist.group_tokens(group_class, 324 | tlist.tokens_between(token, end)) 325 | 326 | # Check for nested bracket groups within this group 327 | group_brackets(group) 328 | idx = tlist.token_index(group) + 1 329 | 330 | # Find the next opening bracket 331 | token = tlist.token_next_match(idx, T.Punctuation, ['(', '[']) 332 | 333 | 334 | def group_comments(tlist): 335 | [group_comments(sgroup) for sgroup in tlist.get_sublists() 336 | if not isinstance(sgroup, sql.Comment)] 337 | idx = 0 338 | token = tlist.token_next_by_type(idx, T.Comment) 339 | while token: 340 | tidx = tlist.token_index(token) 341 | end = tlist.token_not_matching(tidx + 1, 342 | [lambda t: t.ttype in T.Comment, 343 | lambda t: t.is_whitespace()]) 344 | if end is None: 345 | idx = tidx + 1 346 | else: 347 | eidx = tlist.token_index(end) 348 | grp_tokens = tlist.tokens_between(token, 349 | tlist.token_prev(eidx, False)) 350 | group = tlist.group_tokens(sql.Comment, grp_tokens) 351 | idx = tlist.token_index(group) 352 | token = tlist.token_next_by_type(idx, T.Comment) 353 | 354 | 355 | # bugfix Oracle10g merge and Oracle connect by 356 | def group_where(tlist): 357 | def end_match(token): 358 | stopwords = ('ORDER', 'GROUP', 'LIMIT', 'UNION', 'EXCEPT', 'HAVING', 359 | 'WHEN', # for Oracle10g merge 360 | 'CONNECT', # for Oracle connect by 361 | ) 362 | if token.match(T.Keyword, stopwords): 363 | return True 364 | if token.match(T.DML, ('DELETE')): # for Oracle10g merge 365 | return True 366 | if token.match(T.DML, ('START')): # for Oracle connect by 367 | return True 368 | 369 | return False 370 | 371 | [group_where(sgroup) for sgroup in tlist.get_sublists() 372 | if not isinstance(sgroup, sql.Where)] 373 | idx = 0 374 | token = tlist.token_next_match(idx, T.Keyword, 'WHERE') 375 | while token: 376 | tidx = tlist.token_index(token) 377 | end = tlist.token_matching(tidx + 1, (end_match, )) 378 | if end is None: 379 | end = tlist._groupable_tokens[-1] 380 | else: 381 | end = tlist.tokens[tlist.token_index(end) - 1] 382 | group = tlist.group_tokens(sql.Where, 383 | tlist.tokens_between(token, end), 384 | ignore_ws=True) 385 | idx = tlist.token_index(group) 386 | token = tlist.token_next_match(idx, T.Keyword, 'WHERE') 387 | 388 | 389 | def group_aliased(tlist): 390 | clss = (sql.Identifier, sql.Function, sql.Case) 391 | [group_aliased(sgroup) for sgroup in tlist.get_sublists() 392 | if not isinstance(sgroup, clss)] 393 | idx = 0 394 | token = tlist.token_next_by_instance(idx, clss) 395 | while token: 396 | next_ = tlist.token_next(tlist.token_index(token)) 397 | if next_ is not None and isinstance(next_, clss): 398 | # for jython str.upper() 399 | # if not next_.value.upper().startswith('VARCHAR'): 400 | text = next_.value 401 | if sys.version_info[0] < 3 and isinstance(text, str): 402 | text = text.decode('utf-8').upper().encode('utf-8') 403 | if not text.startswith('VARCHAR'): 404 | grp = tlist.tokens_between(token, next_)[1:] 405 | token.tokens.extend(grp) 406 | for t in grp: 407 | tlist.tokens.remove(t) 408 | idx = tlist.token_index(token) + 1 409 | token = tlist.token_next_by_instance(idx, clss) 410 | 411 | 412 | def group_typecasts(tlist): 413 | _group_left_right(tlist, T.Punctuation, '::', sql.Identifier) 414 | 415 | 416 | def group_functions(tlist): 417 | [group_functions(sgroup) for sgroup in tlist.get_sublists() 418 | if not isinstance(sgroup, sql.Function)] 419 | idx = 0 420 | token = tlist.token_next_by_type(idx, T.Name) 421 | while token: 422 | next_ = tlist.token_next(token) 423 | if not isinstance(next_, sql.Parenthesis): 424 | idx = tlist.token_index(token) + 1 425 | else: 426 | func = tlist.group_tokens(sql.Function, 427 | tlist.tokens_between(token, next_)) 428 | idx = tlist.token_index(func) + 1 429 | token = tlist.token_next_by_type(idx, T.Name) 430 | 431 | 432 | def group_order(tlist): 433 | idx = 0 434 | token = tlist.token_next_by_type(idx, T.Keyword.Order) 435 | while token: 436 | prev = tlist.token_prev(token) 437 | if isinstance(prev, sql.Identifier): 438 | ido = tlist.group_tokens(sql.Identifier, 439 | tlist.tokens_between(prev, token)) 440 | idx = tlist.token_index(ido) + 1 441 | else: 442 | idx = tlist.token_index(token) + 1 443 | token = tlist.token_next_by_type(idx, T.Keyword.Order) 444 | 445 | 446 | def align_comments(tlist): 447 | [align_comments(sgroup) for sgroup in tlist.get_sublists()] 448 | idx = 0 449 | token = tlist.token_next_by_instance(idx, sql.Comment) 450 | while token: 451 | before = tlist.token_prev(tlist.token_index(token)) 452 | if isinstance(before, sql.TokenList): 453 | grp = tlist.tokens_between(before, token)[1:] 454 | before.tokens.extend(grp) 455 | for t in grp: 456 | tlist.tokens.remove(t) 457 | idx = tlist.token_index(before) + 1 458 | else: 459 | idx = tlist.token_index(token) + 1 460 | token = tlist.token_next_by_instance(idx, sql.Comment) 461 | 462 | 463 | def group(tlist): 464 | for func in [ 465 | group_comments, 466 | group_brackets, 467 | group_functions, 468 | # bugfix Oracle10g merge 469 | # group_where, 470 | # group_case, 471 | group_case, 472 | group_where, 473 | 474 | group_identifier, 475 | group_order, 476 | group_typecasts, 477 | group_as, 478 | group_aliased, 479 | group_assignment, 480 | group_comparison, 481 | align_comments, 482 | group_identifier_list, 483 | group_if, 484 | group_for, 485 | group_foreach, 486 | group_begin, 487 | ]: 488 | func(tlist) 489 | -------------------------------------------------------------------------------- /sqlparse/exceptions.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2012 Andi Albrecht, albrecht.andi@gmail.com 2 | # 3 | # This module is part of python-sqlparse and is released under 4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php. 5 | 6 | """Exceptions used in this package.""" 7 | 8 | 9 | class SQLParseError(Exception): 10 | """Base class for exceptions in this module.""" 11 | -------------------------------------------------------------------------------- /sqlparse/filters.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import re 4 | 5 | from os.path import abspath, join 6 | 7 | from sqlparse import sql, tokens as T 8 | from sqlparse.engine import FilterStack 9 | from sqlparse.lexer import tokenize 10 | from sqlparse.pipeline import Pipeline 11 | from sqlparse.tokens import (Comment, Comparison, Keyword, Name, Punctuation, 12 | String, Whitespace) 13 | from sqlparse.utils import memoize_generator 14 | from sqlparse.utils import split_unquoted_newlines 15 | 16 | 17 | # -------------------------- 18 | # token process 19 | 20 | class _CaseFilter: 21 | 22 | ttype = None 23 | 24 | def __init__(self, case=None): 25 | if case is None: 26 | case = 'upper' 27 | assert case in ['lower', 'upper', 'capitalize'] 28 | # for jython str.upper() 29 | # self.convert = getattr(str, case) 30 | def get_convert(): 31 | import sys 32 | if sys.version_info[0] < 3: 33 | unicodecase = getattr(unicode, case) 34 | def convert(s): 35 | if isinstance(s, str): 36 | return unicodecase(s.decode('utf-8')).encode('utf-8') 37 | else: 38 | return unicodecase(s) 39 | return convert 40 | else: 41 | return getattr(str, case) 42 | self.convert = get_convert() 43 | 44 | def process(self, stack, stream): 45 | for ttype, value in stream: 46 | if ttype in self.ttype: 47 | value = self.convert(value) 48 | yield ttype, value 49 | 50 | 51 | class KeywordCaseFilter(_CaseFilter): 52 | ttype = T.Keyword 53 | 54 | 55 | class IdentifierCaseFilter(_CaseFilter): 56 | ttype = (T.Name, T.String.Symbol) 57 | 58 | def process(self, stack, stream): 59 | for ttype, value in stream: 60 | if ttype in self.ttype and not value.strip()[0] == '"': 61 | value = self.convert(value) 62 | yield ttype, value 63 | 64 | 65 | class TruncateStringFilter: 66 | 67 | def __init__(self, width, char): 68 | self.width = max(width, 1) 69 | self.char = str(char) 70 | 71 | def process(self, stack, stream): 72 | for ttype, value in stream: 73 | if ttype is T.Literal.String.Single: 74 | if value[:2] == '\'\'': 75 | inner = value[2:-2] 76 | quote = '\'\'' 77 | else: 78 | inner = value[1:-1] 79 | quote = '\'' 80 | if len(inner) > self.width: 81 | value = ''.join((quote, inner[:self.width], self.char, 82 | quote)) 83 | yield ttype, value 84 | 85 | 86 | class GetComments: 87 | """Get the comments from a stack""" 88 | def process(self, stack, stream): 89 | for token_type, value in stream: 90 | if token_type in Comment: 91 | yield token_type, value 92 | 93 | 94 | class StripComments: 95 | """Strip the comments from a stack""" 96 | def process(self, stack, stream): 97 | for token_type, value in stream: 98 | if token_type not in Comment: 99 | yield token_type, value 100 | 101 | 102 | def StripWhitespace(stream): 103 | "Strip the useless whitespaces from a stream leaving only the minimal ones" 104 | last_type = None 105 | has_space = False 106 | ignore_group = frozenset((Comparison, Punctuation)) 107 | 108 | for token_type, value in stream: 109 | # We got a previous token (not empty first ones) 110 | if last_type: 111 | if token_type in Whitespace: 112 | has_space = True 113 | continue 114 | 115 | # Ignore first empty spaces and dot-commas 116 | elif token_type in (Whitespace, Whitespace.Newline, ignore_group): 117 | continue 118 | 119 | # Yield a whitespace if it can't be ignored 120 | if has_space: 121 | if not ignore_group.intersection((last_type, token_type)): 122 | yield Whitespace, ' ' 123 | has_space = False 124 | 125 | # Yield the token and set its type for checking with the next one 126 | yield token_type, value 127 | last_type = token_type 128 | 129 | 130 | class IncludeStatement: 131 | """Filter that enable a INCLUDE statement""" 132 | 133 | def __init__(self, dirpath=".", maxrecursive=10, raiseexceptions=False): 134 | if maxrecursive <= 0: 135 | raise ValueError('Max recursion limit reached') 136 | 137 | self.dirpath = abspath(dirpath) 138 | self.maxRecursive = maxrecursive 139 | self.raiseexceptions = raiseexceptions 140 | 141 | self.detected = False 142 | 143 | @memoize_generator 144 | def process(self, stack, stream): 145 | # Run over all tokens in the stream 146 | for token_type, value in stream: 147 | # INCLUDE statement found, set detected mode 148 | if token_type in Name and value.upper() == 'INCLUDE': 149 | self.detected = True 150 | continue 151 | 152 | # INCLUDE statement was found, parse it 153 | elif self.detected: 154 | # Omit whitespaces 155 | if token_type in Whitespace: 156 | continue 157 | 158 | # Found file path to include 159 | if token_type in String.Symbol: 160 | # if token_type in tokens.String.Symbol: 161 | 162 | # Get path of file to include 163 | path = join(self.dirpath, value[1:-1]) 164 | 165 | try: 166 | f = open(path) 167 | raw_sql = f.read() 168 | f.close() 169 | 170 | # There was a problem loading the include file 171 | except IOError as err: 172 | # Raise the exception to the interpreter 173 | if self.raiseexceptions: 174 | raise 175 | 176 | # Put the exception as a comment on the SQL code 177 | yield Comment, '-- IOError: %s\n' % err 178 | 179 | else: 180 | # Create new FilterStack to parse readed file 181 | # and add all its tokens to the main stack recursively 182 | try: 183 | filtr = IncludeStatement(self.dirpath, 184 | self.maxRecursive - 1, 185 | self.raiseexceptions) 186 | 187 | # Max recursion limit reached 188 | except ValueError as err: 189 | # Raise the exception to the interpreter 190 | if self.raiseexceptions: 191 | raise 192 | 193 | # Put the exception as a comment on the SQL code 194 | yield Comment, '-- ValueError: %s\n' % err 195 | 196 | stack = FilterStack() 197 | stack.preprocess.append(filtr) 198 | 199 | for tv in stack.run(raw_sql): 200 | yield tv 201 | 202 | # Set normal mode 203 | self.detected = False 204 | 205 | # Don't include any token while in detected mode 206 | continue 207 | 208 | # Normal token 209 | yield token_type, value 210 | 211 | 212 | # ---------------------- 213 | # statement process 214 | 215 | class StripCommentsFilter: 216 | 217 | def _get_next_comment(self, tlist): 218 | # TODO(andi) Comment types should be unified, see related issue38 219 | token = tlist.token_next_by_instance(0, sql.Comment) 220 | if token is None: 221 | token = tlist.token_next_by_type(0, T.Comment) 222 | return token 223 | 224 | def _process(self, tlist): 225 | token = self._get_next_comment(tlist) 226 | while token: 227 | tidx = tlist.token_index(token) 228 | prev = tlist.token_prev(tidx, False) 229 | next_ = tlist.token_next(tidx, False) 230 | # Replace by whitespace if prev and next exist and if they're not 231 | # whitespaces. This doesn't apply if prev or next is a paranthesis. 232 | if (prev is not None and next_ is not None 233 | and not prev.is_whitespace() and not next_.is_whitespace() 234 | and not (prev.match(T.Punctuation, '(') 235 | or next_.match(T.Punctuation, ')'))): 236 | tlist.tokens[tidx] = sql.Token(T.Whitespace, ' ') 237 | else: 238 | tlist.tokens.pop(tidx) 239 | token = self._get_next_comment(tlist) 240 | 241 | def process(self, stack, stmt): 242 | [self.process(stack, sgroup) for sgroup in stmt.get_sublists()] 243 | self._process(stmt) 244 | 245 | 246 | # for jython (object) 247 | class StripWhitespaceFilter(object): 248 | 249 | def _stripws(self, tlist): 250 | func_name = '_stripws_%s' % tlist.__class__.__name__.lower() 251 | func = getattr(self, func_name, self._stripws_default) 252 | func(tlist) 253 | 254 | def _stripws_default(self, tlist): 255 | last_was_ws = False 256 | for token in tlist.tokens: 257 | if token.is_whitespace(): 258 | if last_was_ws: 259 | token.value = '' 260 | else: 261 | token.value = ' ' 262 | last_was_ws = token.is_whitespace() 263 | 264 | def _stripws_identifierlist(self, tlist): 265 | # Removes newlines before commas, see issue140 266 | last_nl = None 267 | for token in tlist.tokens[:]: 268 | if (token.ttype is T.Punctuation 269 | and token.value == ',' 270 | and last_nl is not None): 271 | tlist.tokens.remove(last_nl) 272 | if token.is_whitespace(): 273 | last_nl = token 274 | else: 275 | last_nl = None 276 | return self._stripws_default(tlist) 277 | 278 | def _stripws_parenthesis(self, tlist): 279 | if tlist.tokens[1].is_whitespace(): 280 | tlist.tokens.pop(1) 281 | if tlist.tokens[-2].is_whitespace(): 282 | tlist.tokens.pop(-2) 283 | self._stripws_default(tlist) 284 | 285 | def process(self, stack, stmt, depth=0): 286 | [self.process(stack, sgroup, depth + 1) 287 | for sgroup in stmt.get_sublists()] 288 | self._stripws(stmt) 289 | if ( 290 | depth == 0 291 | and stmt.tokens 292 | and stmt.tokens[-1].is_whitespace() 293 | ): 294 | stmt.tokens.pop(-1) 295 | 296 | 297 | # for jython (object) 298 | class ReindentFilter(object): 299 | 300 | def __init__(self, width=2, char=' ', line_width=None): 301 | self.width = width 302 | self.char = char 303 | self.indent = 0 304 | self.offset = 0 305 | self.line_width = line_width 306 | self._curr_stmt = None 307 | self._last_stmt = None 308 | 309 | def _flatten_up_to_token(self, token): 310 | """Yields all tokens up to token plus the next one.""" 311 | # helper for _get_offset 312 | iterator = self._curr_stmt.flatten() 313 | for t in iterator: 314 | yield t 315 | if t == token: 316 | raise StopIteration 317 | 318 | def _get_offset(self, token): 319 | raw = ''.join(map(str, self._flatten_up_to_token(token))) 320 | line = raw.splitlines()[-1] 321 | # Now take current offset into account and return relative offset. 322 | full_offset = len(line) - len(self.char * (self.width * self.indent)) 323 | return full_offset - self.offset 324 | 325 | def nl(self): 326 | # TODO: newline character should be configurable 327 | space = (self.char * ((self.indent * self.width) + self.offset)) 328 | # Detect runaway indenting due to parsing errors 329 | if len(space) > 200: 330 | # something seems to be wrong, flip back 331 | self.indent = self.offset = 0 332 | space = (self.char * ((self.indent * self.width) + self.offset)) 333 | ws = '\n' + space 334 | return sql.Token(T.Whitespace, ws) 335 | 336 | def _split_kwds(self, tlist): 337 | split_words = ('FROM', 'STRAIGHT_JOIN$', 'JOIN$', 'AND', 'OR', 338 | 'GROUP', 'ORDER', 'UNION', 'VALUES', 339 | 'SET', 'BETWEEN', 'EXCEPT', 'HAVING') 340 | 341 | def _next_token(i): 342 | t = tlist.token_next_match(i, T.Keyword, split_words, 343 | regex=True) 344 | if t and t.value.upper() == 'BETWEEN': 345 | t = _next_token(tlist.token_index(t) + 1) 346 | if t and t.value.upper() == 'AND': 347 | t = _next_token(tlist.token_index(t) + 1) 348 | return t 349 | 350 | idx = 0 351 | token = _next_token(idx) 352 | added = set() 353 | while token: 354 | prev = tlist.token_prev(tlist.token_index(token), False) 355 | offset = 1 356 | if prev and prev.is_whitespace() and prev not in added: 357 | tlist.tokens.pop(tlist.token_index(prev)) 358 | offset += 1 359 | uprev = str(prev) 360 | if (prev and (uprev.endswith('\n') or uprev.endswith('\r'))): 361 | nl = tlist.token_next(token) 362 | else: 363 | nl = self.nl() 364 | added.add(nl) 365 | tlist.insert_before(token, nl) 366 | offset += 1 367 | token = _next_token(tlist.token_index(nl) + offset) 368 | 369 | def _split_statements(self, tlist): 370 | idx = 0 371 | token = tlist.token_next_by_type(idx, (T.Keyword.DDL, T.Keyword.DML)) 372 | while token: 373 | prev = tlist.token_prev(tlist.token_index(token), False) 374 | if prev and prev.is_whitespace(): 375 | tlist.tokens.pop(tlist.token_index(prev)) 376 | # only break if it's not the first token 377 | if prev: 378 | nl = self.nl() 379 | tlist.insert_before(token, nl) 380 | token = tlist.token_next_by_type(tlist.token_index(token) + 1, 381 | (T.Keyword.DDL, T.Keyword.DML)) 382 | 383 | def _process(self, tlist): 384 | func_name = '_process_%s' % tlist.__class__.__name__.lower() 385 | func = getattr(self, func_name, self._process_default) 386 | func(tlist) 387 | 388 | def _process_where(self, tlist): 389 | token = tlist.token_next_match(0, T.Keyword, 'WHERE') 390 | try: 391 | tlist.insert_before(token, self.nl()) 392 | except ValueError: # issue121, errors in statement 393 | pass 394 | self.indent += 1 395 | self._process_default(tlist) 396 | self.indent -= 1 397 | 398 | def _process_having(self, tlist): 399 | token = tlist.token_next_match(0, T.Keyword, 'HAVING') 400 | try: 401 | tlist.insert_before(token, self.nl()) 402 | except ValueError: # issue121, errors in statement 403 | pass 404 | self.indent += 1 405 | self._process_default(tlist) 406 | self.indent -= 1 407 | 408 | def _process_parenthesis(self, tlist): 409 | first = tlist.token_next(0) 410 | indented = False 411 | if first and first.ttype in (T.Keyword.DML, T.Keyword.DDL): 412 | self.indent += 1 413 | tlist.tokens.insert(0, self.nl()) 414 | indented = True 415 | num_offset = self._get_offset( 416 | tlist.token_next_match(0, T.Punctuation, '(')) 417 | self.offset += num_offset 418 | self._process_default(tlist, stmts=not indented) 419 | if indented: 420 | self.indent -= 1 421 | self.offset -= num_offset 422 | 423 | def _process_identifierlist(self, tlist): 424 | identifiers = list(tlist.get_identifiers()) 425 | if len(identifiers) > 1 and not tlist.within(sql.Function): 426 | first = list(identifiers[0].flatten())[0] 427 | if self.char == '\t': 428 | # when using tabs we don't count the actual word length 429 | # in spaces. 430 | num_offset = 1 431 | else: 432 | num_offset = self._get_offset(first) - len(first.value) 433 | self.offset += num_offset 434 | for token in identifiers[1:]: 435 | tlist.insert_before(token, self.nl()) 436 | self.offset -= num_offset 437 | self._process_default(tlist) 438 | 439 | def _process_case(self, tlist): 440 | is_first = True 441 | num_offset = None 442 | case = tlist.tokens[0] 443 | outer_offset = self._get_offset(case) - len(case.value) 444 | self.offset += outer_offset 445 | for cond, value in tlist.get_cases(): 446 | if is_first: 447 | tcond = list(cond[0].flatten())[0] 448 | is_first = False 449 | num_offset = self._get_offset(tcond) - len(tcond.value) 450 | self.offset += num_offset 451 | continue 452 | if cond is None: 453 | token = value[0] 454 | else: 455 | token = cond[0] 456 | tlist.insert_before(token, self.nl()) 457 | # Line breaks on group level are done. Now let's add an offset of 458 | # 5 (=length of "when", "then", "else") and process subgroups. 459 | self.offset += 5 460 | self._process_default(tlist) 461 | self.offset -= 5 462 | if num_offset is not None: 463 | self.offset -= num_offset 464 | end = tlist.token_next_match(0, T.Keyword, 'END') 465 | tlist.insert_before(end, self.nl()) 466 | self.offset -= outer_offset 467 | 468 | def _process_default(self, tlist, stmts=True, kwds=True): 469 | if stmts: 470 | self._split_statements(tlist) 471 | if kwds: 472 | self._split_kwds(tlist) 473 | [self._process(sgroup) for sgroup in tlist.get_sublists()] 474 | 475 | def process(self, stack, stmt): 476 | if isinstance(stmt, sql.Statement): 477 | self._curr_stmt = stmt 478 | self._process(stmt) 479 | if isinstance(stmt, sql.Statement): 480 | if self._last_stmt is not None: 481 | if str(self._last_stmt).endswith('\n'): 482 | nl = '\n' 483 | else: 484 | nl = '\n\n' 485 | stmt.tokens.insert( 486 | 0, sql.Token(T.Whitespace, nl)) 487 | if self._last_stmt != stmt: 488 | self._last_stmt = stmt 489 | 490 | 491 | # FIXME: Doesn't work ;) 492 | class RightMarginFilter: 493 | 494 | keep_together = ( 495 | # sql.TypeCast, sql.Identifier, sql.Alias, 496 | ) 497 | 498 | def __init__(self, width=79): 499 | self.width = width 500 | self.line = '' 501 | 502 | def _process(self, stack, group, stream): 503 | for token in stream: 504 | if token.is_whitespace() and '\n' in token.value: 505 | if token.value.endswith('\n'): 506 | self.line = '' 507 | else: 508 | self.line = token.value.splitlines()[-1] 509 | elif (token.is_group() 510 | and not token.__class__ in self.keep_together): 511 | token.tokens = self._process(stack, token, token.tokens) 512 | else: 513 | val = str(token) 514 | if len(self.line) + len(val) > self.width: 515 | match = re.search('^ +', self.line) 516 | if match is not None: 517 | indent = match.group() 518 | else: 519 | indent = '' 520 | yield sql.Token(T.Whitespace, '\n%s' % indent) 521 | self.line = indent 522 | self.line += val 523 | yield token 524 | 525 | def process(self, stack, group): 526 | return 527 | group.tokens = self._process(stack, group, group.tokens) 528 | 529 | 530 | class ColumnsSelect: 531 | """Get the columns names of a SELECT query""" 532 | def process(self, stack, stream): 533 | mode = 0 534 | oldValue = "" 535 | parenthesis = 0 536 | 537 | for token_type, value in stream: 538 | # Ignore comments 539 | if token_type in Comment: 540 | continue 541 | 542 | # We have not detected a SELECT statement 543 | if mode == 0: 544 | if token_type in Keyword and value == 'SELECT': 545 | mode = 1 546 | 547 | # We have detected a SELECT statement 548 | elif mode == 1: 549 | if value == 'FROM': 550 | if oldValue: 551 | yield oldValue 552 | 553 | mode = 3 # Columns have been checked 554 | 555 | elif value == 'AS': 556 | oldValue = "" 557 | mode = 2 558 | 559 | elif (token_type == Punctuation 560 | and value == ',' and not parenthesis): 561 | if oldValue: 562 | yield oldValue 563 | oldValue = "" 564 | 565 | elif token_type not in Whitespace: 566 | if value == '(': 567 | parenthesis += 1 568 | elif value == ')': 569 | parenthesis -= 1 570 | 571 | oldValue += value 572 | 573 | # We are processing an AS keyword 574 | elif mode == 2: 575 | # We check also for Keywords because a bug in SQLParse 576 | if token_type == Name or token_type == Keyword: 577 | yield value 578 | mode = 1 579 | 580 | 581 | # --------------------------- 582 | # postprocess 583 | 584 | class SerializerUnicode: 585 | 586 | def process(self, stack, stmt): 587 | raw = str(stmt) 588 | lines = split_unquoted_newlines(raw) 589 | res = '\n'.join(line.rstrip() for line in lines) 590 | return res 591 | 592 | 593 | def Tokens2Unicode(stream): 594 | result = "" 595 | 596 | for _, value in stream: 597 | result += str(value) 598 | 599 | return result 600 | 601 | 602 | class OutputFilter: 603 | varname_prefix = '' 604 | 605 | def __init__(self, varname='sql'): 606 | self.varname = self.varname_prefix + varname 607 | self.count = 0 608 | 609 | def _process(self, stream, varname, has_nl): 610 | raise NotImplementedError 611 | 612 | def process(self, stack, stmt): 613 | self.count += 1 614 | if self.count > 1: 615 | varname = '%s%d' % (self.varname, self.count) 616 | else: 617 | varname = self.varname 618 | 619 | has_nl = len(str(stmt).strip().splitlines()) > 1 620 | stmt.tokens = self._process(stmt.tokens, varname, has_nl) 621 | return stmt 622 | 623 | 624 | class OutputPythonFilter(OutputFilter): 625 | def _process(self, stream, varname, has_nl): 626 | # SQL query asignation to varname 627 | if self.count > 1: 628 | yield sql.Token(T.Whitespace, '\n') 629 | yield sql.Token(T.Name, varname) 630 | yield sql.Token(T.Whitespace, ' ') 631 | yield sql.Token(T.Operator, '=') 632 | yield sql.Token(T.Whitespace, ' ') 633 | if has_nl: 634 | yield sql.Token(T.Operator, '(') 635 | yield sql.Token(T.Text, "'") 636 | 637 | # Print the tokens on the quote 638 | for token in stream: 639 | # Token is a new line separator 640 | if token.is_whitespace() and '\n' in token.value: 641 | # Close quote and add a new line 642 | yield sql.Token(T.Text, " '") 643 | yield sql.Token(T.Whitespace, '\n') 644 | 645 | # Quote header on secondary lines 646 | yield sql.Token(T.Whitespace, ' ' * (len(varname) + 4)) 647 | yield sql.Token(T.Text, "'") 648 | 649 | # Indentation 650 | after_lb = token.value.split('\n', 1)[1] 651 | if after_lb: 652 | yield sql.Token(T.Whitespace, after_lb) 653 | continue 654 | 655 | # Token has escape chars 656 | elif "'" in token.value: 657 | token.value = token.value.replace("'", "\\'") 658 | 659 | # Put the token 660 | yield sql.Token(T.Text, token.value) 661 | 662 | # Close quote 663 | yield sql.Token(T.Text, "'") 664 | if has_nl: 665 | yield sql.Token(T.Operator, ')') 666 | 667 | 668 | class OutputPHPFilter(OutputFilter): 669 | varname_prefix = '$' 670 | 671 | def _process(self, stream, varname, has_nl): 672 | # SQL query asignation to varname (quote header) 673 | if self.count > 1: 674 | yield sql.Token(T.Whitespace, '\n') 675 | yield sql.Token(T.Name, varname) 676 | yield sql.Token(T.Whitespace, ' ') 677 | if has_nl: 678 | yield sql.Token(T.Whitespace, ' ') 679 | yield sql.Token(T.Operator, '=') 680 | yield sql.Token(T.Whitespace, ' ') 681 | yield sql.Token(T.Text, '"') 682 | 683 | # Print the tokens on the quote 684 | for token in stream: 685 | # Token is a new line separator 686 | if token.is_whitespace() and '\n' in token.value: 687 | # Close quote and add a new line 688 | yield sql.Token(T.Text, ' ";') 689 | yield sql.Token(T.Whitespace, '\n') 690 | 691 | # Quote header on secondary lines 692 | yield sql.Token(T.Name, varname) 693 | yield sql.Token(T.Whitespace, ' ') 694 | yield sql.Token(T.Operator, '.=') 695 | yield sql.Token(T.Whitespace, ' ') 696 | yield sql.Token(T.Text, '"') 697 | 698 | # Indentation 699 | after_lb = token.value.split('\n', 1)[1] 700 | if after_lb: 701 | yield sql.Token(T.Whitespace, after_lb) 702 | continue 703 | 704 | # Token has escape chars 705 | elif '"' in token.value: 706 | token.value = token.value.replace('"', '\\"') 707 | 708 | # Put the token 709 | yield sql.Token(T.Text, token.value) 710 | 711 | # Close quote 712 | yield sql.Token(T.Text, '"') 713 | yield sql.Token(T.Punctuation, ';') 714 | 715 | 716 | class Limit: 717 | """Get the LIMIT of a query. 718 | 719 | If not defined, return -1 (SQL specification for no LIMIT query) 720 | """ 721 | def process(self, stack, stream): 722 | index = 7 723 | stream = list(stream) 724 | stream.reverse() 725 | 726 | # Run over all tokens in the stream from the end 727 | for token_type, value in stream: 728 | index -= 1 729 | 730 | # if index and token_type in Keyword: 731 | if index and token_type in Keyword and value == 'LIMIT': 732 | return stream[4 - index][1] 733 | 734 | return -1 735 | 736 | 737 | def compact(stream): 738 | """Function that return a compacted version of the stream""" 739 | pipe = Pipeline() 740 | 741 | pipe.append(StripComments()) 742 | pipe.append(StripWhitespace) 743 | 744 | return pipe(stream) 745 | -------------------------------------------------------------------------------- /sqlparse/formatter.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com 2 | # 3 | # This module is part of python-sqlparse and is released under 4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php. 5 | 6 | """SQL formatter""" 7 | 8 | from sqlparse import filters 9 | from sqlparse.exceptions import SQLParseError 10 | 11 | 12 | def validate_options(options): 13 | """Validates options.""" 14 | kwcase = options.get('keyword_case', None) 15 | if kwcase not in [None, 'upper', 'lower', 'capitalize']: 16 | raise SQLParseError('Invalid value for keyword_case: %r' % kwcase) 17 | 18 | idcase = options.get('identifier_case', None) 19 | if idcase not in [None, 'upper', 'lower', 'capitalize']: 20 | raise SQLParseError('Invalid value for identifier_case: %r' % idcase) 21 | 22 | ofrmt = options.get('output_format', None) 23 | if ofrmt not in [None, 'sql', 'python', 'php']: 24 | raise SQLParseError('Unknown output format: %r' % ofrmt) 25 | 26 | strip_comments = options.get('strip_comments', False) 27 | if strip_comments not in [True, False]: 28 | raise SQLParseError('Invalid value for strip_comments: %r' 29 | % strip_comments) 30 | 31 | strip_ws = options.get('strip_whitespace', False) 32 | if strip_ws not in [True, False]: 33 | raise SQLParseError('Invalid value for strip_whitespace: %r' 34 | % strip_ws) 35 | 36 | truncate_strings = options.get('truncate_strings', None) 37 | if truncate_strings is not None: 38 | try: 39 | truncate_strings = int(truncate_strings) 40 | except (ValueError, TypeError): 41 | raise SQLParseError('Invalid value for truncate_strings: %r' 42 | % truncate_strings) 43 | if truncate_strings <= 1: 44 | raise SQLParseError('Invalid value for truncate_strings: %r' 45 | % truncate_strings) 46 | options['truncate_strings'] = truncate_strings 47 | options['truncate_char'] = options.get('truncate_char', '[...]') 48 | 49 | reindent = options.get('reindent', False) 50 | if reindent not in [True, False]: 51 | raise SQLParseError('Invalid value for reindent: %r' 52 | % reindent) 53 | elif reindent: 54 | options['strip_whitespace'] = True 55 | indent_tabs = options.get('indent_tabs', False) 56 | if indent_tabs not in [True, False]: 57 | raise SQLParseError('Invalid value for indent_tabs: %r' % indent_tabs) 58 | elif indent_tabs: 59 | options['indent_char'] = '\t' 60 | else: 61 | options['indent_char'] = ' ' 62 | indent_width = options.get('indent_width', 2) 63 | try: 64 | indent_width = int(indent_width) 65 | except (TypeError, ValueError): 66 | raise SQLParseError('indent_width requires an integer') 67 | if indent_width < 1: 68 | raise SQLParseError('indent_width requires an positive integer') 69 | options['indent_width'] = indent_width 70 | 71 | right_margin = options.get('right_margin', None) 72 | if right_margin is not None: 73 | try: 74 | right_margin = int(right_margin) 75 | except (TypeError, ValueError): 76 | raise SQLParseError('right_margin requires an integer') 77 | if right_margin < 10: 78 | raise SQLParseError('right_margin requires an integer > 10') 79 | options['right_margin'] = right_margin 80 | 81 | return options 82 | 83 | 84 | def build_filter_stack(stack, options): 85 | """Setup and return a filter stack. 86 | 87 | Args: 88 | stack: :class:`~sqlparse.filters.FilterStack` instance 89 | options: Dictionary with options validated by validate_options. 90 | """ 91 | # Token filter 92 | if options.get('keyword_case', None): 93 | stack.preprocess.append( 94 | filters.KeywordCaseFilter(options['keyword_case'])) 95 | 96 | if options.get('identifier_case', None): 97 | stack.preprocess.append( 98 | filters.IdentifierCaseFilter(options['identifier_case'])) 99 | 100 | if options.get('truncate_strings', None) is not None: 101 | stack.preprocess.append(filters.TruncateStringFilter( 102 | width=options['truncate_strings'], char=options['truncate_char'])) 103 | 104 | # After grouping 105 | if options.get('strip_comments', False): 106 | stack.enable_grouping() 107 | stack.stmtprocess.append(filters.StripCommentsFilter()) 108 | 109 | if (options.get('strip_whitespace', False) 110 | or options.get('reindent', False)): 111 | stack.enable_grouping() 112 | stack.stmtprocess.append(filters.StripWhitespaceFilter()) 113 | 114 | if options.get('reindent', False): 115 | stack.enable_grouping() 116 | stack.stmtprocess.append( 117 | filters.ReindentFilter(char=options['indent_char'], 118 | width=options['indent_width'])) 119 | 120 | if options.get('right_margin', False): 121 | stack.enable_grouping() 122 | stack.stmtprocess.append( 123 | filters.RightMarginFilter(width=options['right_margin'])) 124 | 125 | # Serializer 126 | if options.get('output_format'): 127 | frmt = options['output_format'] 128 | if frmt.lower() == 'php': 129 | fltr = filters.OutputPHPFilter() 130 | elif frmt.lower() == 'python': 131 | fltr = filters.OutputPythonFilter() 132 | else: 133 | fltr = None 134 | if fltr is not None: 135 | stack.postprocess.append(fltr) 136 | 137 | return stack 138 | -------------------------------------------------------------------------------- /sqlparse/functions.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 17/05/2012 3 | 4 | @author: piranna 5 | 6 | Several utility functions to extract info from the SQL sentences 7 | ''' 8 | 9 | from sqlparse.filters import ColumnsSelect, Limit 10 | from sqlparse.pipeline import Pipeline 11 | from sqlparse.tokens import Keyword, Whitespace 12 | 13 | 14 | def getlimit(stream): 15 | """Function that return the LIMIT of a input SQL """ 16 | pipe = Pipeline() 17 | 18 | pipe.append(Limit()) 19 | 20 | result = pipe(stream) 21 | try: 22 | return int(result) 23 | except ValueError: 24 | return result 25 | 26 | 27 | def getcolumns(stream): 28 | """Function that return the colums of a SELECT query""" 29 | pipe = Pipeline() 30 | 31 | pipe.append(ColumnsSelect()) 32 | 33 | return pipe(stream) 34 | 35 | 36 | class IsType(object): 37 | """Functor that return is the statement is of a specific type""" 38 | def __init__(self, type): 39 | self.type = type 40 | 41 | def __call__(self, stream): 42 | for token_type, value in stream: 43 | if token_type not in Whitespace: 44 | return token_type in Keyword and value == self.type 45 | -------------------------------------------------------------------------------- /sqlparse/keywords.py: -------------------------------------------------------------------------------- 1 | from sqlparse import tokens 2 | 3 | KEYWORDS = { 4 | 'ABORT': tokens.Keyword, 5 | 'ABS': tokens.Keyword, 6 | 'ABSOLUTE': tokens.Keyword, 7 | 'ACCESS': tokens.Keyword, 8 | 'ADA': tokens.Keyword, 9 | 'ADD': tokens.Keyword, 10 | 'ADMIN': tokens.Keyword, 11 | 'AFTER': tokens.Keyword, 12 | 'AGGREGATE': tokens.Keyword, 13 | 'ALIAS': tokens.Keyword, 14 | 'ALL': tokens.Keyword, 15 | 'ALLOCATE': tokens.Keyword, 16 | 'ANALYSE': tokens.Keyword, 17 | 'ANALYZE': tokens.Keyword, 18 | 'ANY': tokens.Keyword, 19 | 'ARE': tokens.Keyword, 20 | 'ASC': tokens.Keyword.Order, 21 | 'ASENSITIVE': tokens.Keyword, 22 | 'ASSERTION': tokens.Keyword, 23 | 'ASSIGNMENT': tokens.Keyword, 24 | 'ASYMMETRIC': tokens.Keyword, 25 | 'AT': tokens.Keyword, 26 | 'ATOMIC': tokens.Keyword, 27 | 'AUTHORIZATION': tokens.Keyword, 28 | 'AVG': tokens.Keyword, 29 | 30 | 'BACKWARD': tokens.Keyword, 31 | 'BEFORE': tokens.Keyword, 32 | 'BEGIN': tokens.Keyword, 33 | 'BETWEEN': tokens.Keyword, 34 | 'BITVAR': tokens.Keyword, 35 | 'BIT_LENGTH': tokens.Keyword, 36 | 'BOTH': tokens.Keyword, 37 | 'BREADTH': tokens.Keyword, 38 | 39 | # 'C': tokens.Keyword, # most likely this is an alias 40 | 'CACHE': tokens.Keyword, 41 | 'CALL': tokens.Keyword, 42 | 'CALLED': tokens.Keyword, 43 | 'CARDINALITY': tokens.Keyword, 44 | 'CASCADE': tokens.Keyword, 45 | 'CASCADED': tokens.Keyword, 46 | 'CAST': tokens.Keyword, 47 | 'CATALOG': tokens.Keyword, 48 | 'CATALOG_NAME': tokens.Keyword, 49 | 'CHAIN': tokens.Keyword, 50 | 'CHARACTERISTICS': tokens.Keyword, 51 | 'CHARACTER_LENGTH': tokens.Keyword, 52 | 'CHARACTER_SET_CATALOG': tokens.Keyword, 53 | 'CHARACTER_SET_NAME': tokens.Keyword, 54 | 'CHARACTER_SET_SCHEMA': tokens.Keyword, 55 | 'CHAR_LENGTH': tokens.Keyword, 56 | 'CHECK': tokens.Keyword, 57 | 'CHECKED': tokens.Keyword, 58 | 'CHECKPOINT': tokens.Keyword, 59 | 'CLASS': tokens.Keyword, 60 | 'CLASS_ORIGIN': tokens.Keyword, 61 | 'CLOB': tokens.Keyword, 62 | 'CLOSE': tokens.Keyword, 63 | 'CLUSTER': tokens.Keyword, 64 | 'COALESCE': tokens.Keyword, 65 | 'COBOL': tokens.Keyword, 66 | 'COLLATE': tokens.Keyword, 67 | 'COLLATION': tokens.Keyword, 68 | 'COLLATION_CATALOG': tokens.Keyword, 69 | 'COLLATION_NAME': tokens.Keyword, 70 | 'COLLATION_SCHEMA': tokens.Keyword, 71 | 'COLLECT': tokens.Keyword, 72 | 'COLUMN': tokens.Keyword, 73 | 'COLUMN_NAME': tokens.Keyword, 74 | 'COMMAND_FUNCTION': tokens.Keyword, 75 | 'COMMAND_FUNCTION_CODE': tokens.Keyword, 76 | 'COMMENT': tokens.Keyword, 77 | 'COMMIT': tokens.Keyword.DML, 78 | 'COMMITTED': tokens.Keyword, 79 | 'COMPLETION': tokens.Keyword, 80 | 'CONDITION_NUMBER': tokens.Keyword, 81 | 'CONNECT': tokens.Keyword, 82 | 'CONNECTION': tokens.Keyword, 83 | 'CONNECTION_NAME': tokens.Keyword, 84 | 'CONSTRAINT': tokens.Keyword, 85 | 'CONSTRAINTS': tokens.Keyword, 86 | 'CONSTRAINT_CATALOG': tokens.Keyword, 87 | 'CONSTRAINT_NAME': tokens.Keyword, 88 | 'CONSTRAINT_SCHEMA': tokens.Keyword, 89 | 'CONSTRUCTOR': tokens.Keyword, 90 | 'CONTAINS': tokens.Keyword, 91 | 'CONTINUE': tokens.Keyword, 92 | 'CONVERSION': tokens.Keyword, 93 | 'CONVERT': tokens.Keyword, 94 | 'COPY': tokens.Keyword, 95 | 'CORRESPONTING': tokens.Keyword, 96 | 'COUNT': tokens.Keyword, 97 | 'CREATEDB': tokens.Keyword, 98 | 'CREATEUSER': tokens.Keyword, 99 | 'CROSS': tokens.Keyword, 100 | 'CUBE': tokens.Keyword, 101 | 'CURRENT': tokens.Keyword, 102 | 'CURRENT_DATE': tokens.Keyword, 103 | 'CURRENT_PATH': tokens.Keyword, 104 | 'CURRENT_ROLE': tokens.Keyword, 105 | 'CURRENT_TIME': tokens.Keyword, 106 | 'CURRENT_TIMESTAMP': tokens.Keyword, 107 | 'CURRENT_USER': tokens.Keyword, 108 | 'CURSOR': tokens.Keyword, 109 | 'CURSOR_NAME': tokens.Keyword, 110 | 'CYCLE': tokens.Keyword, 111 | 112 | 'DATA': tokens.Keyword, 113 | 'DATABASE': tokens.Keyword, 114 | 'DATETIME_INTERVAL_CODE': tokens.Keyword, 115 | 'DATETIME_INTERVAL_PRECISION': tokens.Keyword, 116 | 'DAY': tokens.Keyword, 117 | 'DEALLOCATE': tokens.Keyword, 118 | 'DECLARE': tokens.Keyword, 119 | 'DEFAULT': tokens.Keyword, 120 | 'DEFAULTS': tokens.Keyword, 121 | 'DEFERRABLE': tokens.Keyword, 122 | 'DEFERRED': tokens.Keyword, 123 | 'DEFINED': tokens.Keyword, 124 | 'DEFINER': tokens.Keyword, 125 | 'DELIMITER': tokens.Keyword, 126 | 'DELIMITERS': tokens.Keyword, 127 | 'DEREF': tokens.Keyword, 128 | 'DESC': tokens.Keyword.Order, 129 | 'DESCRIBE': tokens.Keyword, 130 | 'DESCRIPTOR': tokens.Keyword, 131 | 'DESTROY': tokens.Keyword, 132 | 'DESTRUCTOR': tokens.Keyword, 133 | 'DETERMINISTIC': tokens.Keyword, 134 | 'DIAGNOSTICS': tokens.Keyword, 135 | 'DICTIONARY': tokens.Keyword, 136 | 'DISCONNECT': tokens.Keyword, 137 | 'DISPATCH': tokens.Keyword, 138 | 'DO': tokens.Keyword, 139 | 'DOMAIN': tokens.Keyword, 140 | 'DYNAMIC': tokens.Keyword, 141 | 'DYNAMIC_FUNCTION': tokens.Keyword, 142 | 'DYNAMIC_FUNCTION_CODE': tokens.Keyword, 143 | 144 | 'EACH': tokens.Keyword, 145 | 'ENCODING': tokens.Keyword, 146 | 'ENCRYPTED': tokens.Keyword, 147 | 'END-EXEC': tokens.Keyword, 148 | 'EQUALS': tokens.Keyword, 149 | 'ESCAPE': tokens.Keyword, 150 | 'EVERY': tokens.Keyword, 151 | 'EXCEPT': tokens.Keyword, 152 | 'ESCEPTION': tokens.Keyword, 153 | 'EXCLUDING': tokens.Keyword, 154 | 'EXCLUSIVE': tokens.Keyword, 155 | 'EXEC': tokens.Keyword, 156 | 'EXECUTE': tokens.Keyword, 157 | 'EXISTING': tokens.Keyword, 158 | 'EXISTS': tokens.Keyword, 159 | 'EXTERNAL': tokens.Keyword, 160 | 'EXTRACT': tokens.Keyword, 161 | 162 | 'FALSE': tokens.Keyword, 163 | 'FETCH': tokens.Keyword, 164 | 'FINAL': tokens.Keyword, 165 | 'FIRST': tokens.Keyword, 166 | 'FORCE': tokens.Keyword, 167 | 'FOREACH': tokens.Keyword, 168 | 'FOREIGN': tokens.Keyword, 169 | 'FORTRAN': tokens.Keyword, 170 | 'FORWARD': tokens.Keyword, 171 | 'FOUND': tokens.Keyword, 172 | 'FREE': tokens.Keyword, 173 | 'FREEZE': tokens.Keyword, 174 | 'FULL': tokens.Keyword, 175 | 'FUNCTION': tokens.Keyword, 176 | 177 | # 'G': tokens.Keyword, 178 | 'GENERAL': tokens.Keyword, 179 | 'GENERATED': tokens.Keyword, 180 | 'GET': tokens.Keyword, 181 | 'GLOBAL': tokens.Keyword, 182 | 'GO': tokens.Keyword, 183 | 'GOTO': tokens.Keyword, 184 | 'GRANT': tokens.Keyword, 185 | 'GRANTED': tokens.Keyword, 186 | 'GROUPING': tokens.Keyword, 187 | 188 | 'HANDLER': tokens.Keyword, 189 | 'HAVING': tokens.Keyword, 190 | 'HIERARCHY': tokens.Keyword, 191 | 'HOLD': tokens.Keyword, 192 | 'HOST': tokens.Keyword, 193 | 194 | 'IDENTITY': tokens.Keyword, 195 | 'IGNORE': tokens.Keyword, 196 | 'ILIKE': tokens.Keyword, 197 | 'IMMEDIATE': tokens.Keyword, 198 | 'IMMUTABLE': tokens.Keyword, 199 | 200 | 'IMPLEMENTATION': tokens.Keyword, 201 | 'IMPLICIT': tokens.Keyword, 202 | 'INCLUDING': tokens.Keyword, 203 | 'INCREMENT': tokens.Keyword, 204 | 'INDEX': tokens.Keyword, 205 | 206 | 'INDITCATOR': tokens.Keyword, 207 | 'INFIX': tokens.Keyword, 208 | 'INHERITS': tokens.Keyword, 209 | 'INITIALIZE': tokens.Keyword, 210 | 'INITIALLY': tokens.Keyword, 211 | 'INOUT': tokens.Keyword, 212 | 'INPUT': tokens.Keyword, 213 | 'INSENSITIVE': tokens.Keyword, 214 | 'INSTANTIABLE': tokens.Keyword, 215 | 'INSTEAD': tokens.Keyword, 216 | 'INTERSECT': tokens.Keyword, 217 | 'INTO': tokens.Keyword, 218 | 'INVOKER': tokens.Keyword, 219 | 'IS': tokens.Keyword, 220 | 'ISNULL': tokens.Keyword, 221 | 'ISOLATION': tokens.Keyword, 222 | 'ITERATE': tokens.Keyword, 223 | 224 | # 'K': tokens.Keyword, 225 | 'KEY': tokens.Keyword, 226 | 'KEY_MEMBER': tokens.Keyword, 227 | 'KEY_TYPE': tokens.Keyword, 228 | 229 | 'LANCOMPILER': tokens.Keyword, 230 | 'LANGUAGE': tokens.Keyword, 231 | 'LARGE': tokens.Keyword, 232 | 'LAST': tokens.Keyword, 233 | 'LATERAL': tokens.Keyword, 234 | 'LEADING': tokens.Keyword, 235 | 'LENGTH': tokens.Keyword, 236 | 'LESS': tokens.Keyword, 237 | 'LEVEL': tokens.Keyword, 238 | 'LIMIT': tokens.Keyword, 239 | 'LISTEN': tokens.Keyword, 240 | 'LOAD': tokens.Keyword, 241 | 'LOCAL': tokens.Keyword, 242 | 'LOCALTIME': tokens.Keyword, 243 | 'LOCALTIMESTAMP': tokens.Keyword, 244 | 'LOCATION': tokens.Keyword, 245 | 'LOCATOR': tokens.Keyword, 246 | 'LOCK': tokens.Keyword, 247 | 'LOWER': tokens.Keyword, 248 | 249 | # 'M': tokens.Keyword, 250 | 'MAP': tokens.Keyword, 251 | 'MATCH': tokens.Keyword, 252 | 'MAXVALUE': tokens.Keyword, 253 | 'MESSAGE_LENGTH': tokens.Keyword, 254 | 'MESSAGE_OCTET_LENGTH': tokens.Keyword, 255 | 'MESSAGE_TEXT': tokens.Keyword, 256 | 'METHOD': tokens.Keyword, 257 | 'MINUTE': tokens.Keyword, 258 | 'MINVALUE': tokens.Keyword, 259 | 'MOD': tokens.Keyword, 260 | 'MODE': tokens.Keyword, 261 | 'MODIFIES': tokens.Keyword, 262 | 'MODIFY': tokens.Keyword, 263 | 'MONTH': tokens.Keyword, 264 | 'MORE': tokens.Keyword, 265 | 'MOVE': tokens.Keyword, 266 | 'MUMPS': tokens.Keyword, 267 | 268 | 'NAMES': tokens.Keyword, 269 | 'NATIONAL': tokens.Keyword, 270 | 'NATURAL': tokens.Keyword, 271 | 'NCHAR': tokens.Keyword, 272 | 'NCLOB': tokens.Keyword, 273 | 'NEW': tokens.Keyword, 274 | 'NEXT': tokens.Keyword, 275 | 'NO': tokens.Keyword, 276 | 'NOCREATEDB': tokens.Keyword, 277 | 'NOCREATEUSER': tokens.Keyword, 278 | 'NONE': tokens.Keyword, 279 | 'NOT': tokens.Keyword, 280 | 'NOTHING': tokens.Keyword, 281 | 'NOTIFY': tokens.Keyword, 282 | 'NOTNULL': tokens.Keyword, 283 | 'NULL': tokens.Keyword, 284 | 'NULLABLE': tokens.Keyword, 285 | 'NULLIF': tokens.Keyword, 286 | 287 | 'OBJECT': tokens.Keyword, 288 | 'OCTET_LENGTH': tokens.Keyword, 289 | 'OF': tokens.Keyword, 290 | 'OFF': tokens.Keyword, 291 | 'OFFSET': tokens.Keyword, 292 | 'OIDS': tokens.Keyword, 293 | 'OLD': tokens.Keyword, 294 | 'ONLY': tokens.Keyword, 295 | 'OPEN': tokens.Keyword, 296 | 'OPERATION': tokens.Keyword, 297 | 'OPERATOR': tokens.Keyword, 298 | 'OPTION': tokens.Keyword, 299 | 'OPTIONS': tokens.Keyword, 300 | 'ORDINALITY': tokens.Keyword, 301 | 'OUT': tokens.Keyword, 302 | 'OUTPUT': tokens.Keyword, 303 | 'OVERLAPS': tokens.Keyword, 304 | 'OVERLAY': tokens.Keyword, 305 | 'OVERRIDING': tokens.Keyword, 306 | 'OWNER': tokens.Keyword, 307 | 308 | 'PAD': tokens.Keyword, 309 | 'PARAMETER': tokens.Keyword, 310 | 'PARAMETERS': tokens.Keyword, 311 | 'PARAMETER_MODE': tokens.Keyword, 312 | 'PARAMATER_NAME': tokens.Keyword, 313 | 'PARAMATER_ORDINAL_POSITION': tokens.Keyword, 314 | 'PARAMETER_SPECIFIC_CATALOG': tokens.Keyword, 315 | 'PARAMETER_SPECIFIC_NAME': tokens.Keyword, 316 | 'PARAMATER_SPECIFIC_SCHEMA': tokens.Keyword, 317 | 'PARTIAL': tokens.Keyword, 318 | 'PASCAL': tokens.Keyword, 319 | 'PENDANT': tokens.Keyword, 320 | 'PLACING': tokens.Keyword, 321 | 'PLI': tokens.Keyword, 322 | 'POSITION': tokens.Keyword, 323 | 'POSTFIX': tokens.Keyword, 324 | 'PRECISION': tokens.Keyword, 325 | 'PREFIX': tokens.Keyword, 326 | 'PREORDER': tokens.Keyword, 327 | 'PREPARE': tokens.Keyword, 328 | 'PRESERVE': tokens.Keyword, 329 | 'PRIMARY': tokens.Keyword, 330 | 'PRIOR': tokens.Keyword, 331 | 'PRIVILEGES': tokens.Keyword, 332 | 'PROCEDURAL': tokens.Keyword, 333 | 'PROCEDURE': tokens.Keyword, 334 | 'PUBLIC': tokens.Keyword, 335 | 336 | 'RAISE': tokens.Keyword, 337 | 'READ': tokens.Keyword, 338 | 'READS': tokens.Keyword, 339 | 'RECHECK': tokens.Keyword, 340 | 'RECURSIVE': tokens.Keyword, 341 | 'REF': tokens.Keyword, 342 | 'REFERENCES': tokens.Keyword, 343 | 'REFERENCING': tokens.Keyword, 344 | 'REINDEX': tokens.Keyword, 345 | 'RELATIVE': tokens.Keyword, 346 | 'RENAME': tokens.Keyword, 347 | 'REPEATABLE': tokens.Keyword, 348 | 'RESET': tokens.Keyword, 349 | 'RESTART': tokens.Keyword, 350 | 'RESTRICT': tokens.Keyword, 351 | 'RESULT': tokens.Keyword, 352 | 'RETURN': tokens.Keyword, 353 | 'RETURNED_LENGTH': tokens.Keyword, 354 | 'RETURNED_OCTET_LENGTH': tokens.Keyword, 355 | 'RETURNED_SQLSTATE': tokens.Keyword, 356 | 'RETURNS': tokens.Keyword, 357 | 'REVOKE': tokens.Keyword, 358 | 'RIGHT': tokens.Keyword, 359 | 'ROLE': tokens.Keyword, 360 | 'ROLLBACK': tokens.Keyword.DML, 361 | 'ROLLUP': tokens.Keyword, 362 | 'ROUTINE': tokens.Keyword, 363 | 'ROUTINE_CATALOG': tokens.Keyword, 364 | 'ROUTINE_NAME': tokens.Keyword, 365 | 'ROUTINE_SCHEMA': tokens.Keyword, 366 | 'ROW': tokens.Keyword, 367 | 'ROWS': tokens.Keyword, 368 | 'ROW_COUNT': tokens.Keyword, 369 | 'RULE': tokens.Keyword, 370 | 371 | 'SAVE_POINT': tokens.Keyword, 372 | 'SCALE': tokens.Keyword, 373 | 'SCHEMA': tokens.Keyword, 374 | 'SCHEMA_NAME': tokens.Keyword, 375 | 'SCOPE': tokens.Keyword, 376 | 'SCROLL': tokens.Keyword, 377 | 'SEARCH': tokens.Keyword, 378 | 'SECOND': tokens.Keyword, 379 | 'SECURITY': tokens.Keyword, 380 | 'SELF': tokens.Keyword, 381 | 'SENSITIVE': tokens.Keyword, 382 | 'SERIALIZABLE': tokens.Keyword, 383 | 'SERVER_NAME': tokens.Keyword, 384 | 'SESSION': tokens.Keyword, 385 | 'SESSION_USER': tokens.Keyword, 386 | 'SETOF': tokens.Keyword, 387 | 'SETS': tokens.Keyword, 388 | 'SHARE': tokens.Keyword, 389 | 'SHOW': tokens.Keyword, 390 | 'SIMILAR': tokens.Keyword, 391 | 'SIMPLE': tokens.Keyword, 392 | 'SIZE': tokens.Keyword, 393 | 'SOME': tokens.Keyword, 394 | 'SOURCE': tokens.Keyword, 395 | 'SPACE': tokens.Keyword, 396 | 'SPECIFIC': tokens.Keyword, 397 | 'SPECIFICTYPE': tokens.Keyword, 398 | 'SPECIFIC_NAME': tokens.Keyword, 399 | 'SQL': tokens.Keyword, 400 | 'SQLCODE': tokens.Keyword, 401 | 'SQLERROR': tokens.Keyword, 402 | 'SQLEXCEPTION': tokens.Keyword, 403 | 'SQLSTATE': tokens.Keyword, 404 | 'SQLWARNING': tokens.Keyword, 405 | 'STABLE': tokens.Keyword, 406 | 'START': tokens.Keyword.DML, 407 | 'STATE': tokens.Keyword, 408 | 'STATEMENT': tokens.Keyword, 409 | 'STATIC': tokens.Keyword, 410 | 'STATISTICS': tokens.Keyword, 411 | 'STDIN': tokens.Keyword, 412 | 'STDOUT': tokens.Keyword, 413 | 'STORAGE': tokens.Keyword, 414 | 'STRICT': tokens.Keyword, 415 | 'STRUCTURE': tokens.Keyword, 416 | 'STYPE': tokens.Keyword, 417 | 'SUBCLASS_ORIGIN': tokens.Keyword, 418 | 'SUBLIST': tokens.Keyword, 419 | 'SUBSTRING': tokens.Keyword, 420 | 'SUM': tokens.Keyword, 421 | 'SYMMETRIC': tokens.Keyword, 422 | 'SYSID': tokens.Keyword, 423 | 'SYSTEM': tokens.Keyword, 424 | 'SYSTEM_USER': tokens.Keyword, 425 | 426 | 'TABLE': tokens.Keyword, 427 | 'TABLE_NAME': tokens.Keyword, 428 | 'TEMP': tokens.Keyword, 429 | 'TEMPLATE': tokens.Keyword, 430 | 'TEMPORARY': tokens.Keyword, 431 | 'TERMINATE': tokens.Keyword, 432 | 'THAN': tokens.Keyword, 433 | 'TIMESTAMP': tokens.Keyword, 434 | 'TIMEZONE_HOUR': tokens.Keyword, 435 | 'TIMEZONE_MINUTE': tokens.Keyword, 436 | 'TO': tokens.Keyword, 437 | 'TOAST': tokens.Keyword, 438 | 'TRAILING': tokens.Keyword, 439 | 'TRANSATION': tokens.Keyword, 440 | 'TRANSACTIONS_COMMITTED': tokens.Keyword, 441 | 'TRANSACTIONS_ROLLED_BACK': tokens.Keyword, 442 | 'TRANSATION_ACTIVE': tokens.Keyword, 443 | 'TRANSFORM': tokens.Keyword, 444 | 'TRANSFORMS': tokens.Keyword, 445 | 'TRANSLATE': tokens.Keyword, 446 | 'TRANSLATION': tokens.Keyword, 447 | 'TREAT': tokens.Keyword, 448 | 'TRIGGER': tokens.Keyword, 449 | 'TRIGGER_CATALOG': tokens.Keyword, 450 | 'TRIGGER_NAME': tokens.Keyword, 451 | 'TRIGGER_SCHEMA': tokens.Keyword, 452 | 'TRIM': tokens.Keyword, 453 | 'TRUE': tokens.Keyword, 454 | 'TRUNCATE': tokens.Keyword, 455 | 'TRUSTED': tokens.Keyword, 456 | 'TYPE': tokens.Keyword, 457 | 458 | 'UNCOMMITTED': tokens.Keyword, 459 | 'UNDER': tokens.Keyword, 460 | 'UNENCRYPTED': tokens.Keyword, 461 | 'UNION': tokens.Keyword, 462 | 'UNIQUE': tokens.Keyword, 463 | 'UNKNOWN': tokens.Keyword, 464 | 'UNLISTEN': tokens.Keyword, 465 | 'UNNAMED': tokens.Keyword, 466 | 'UNNEST': tokens.Keyword, 467 | 'UNTIL': tokens.Keyword, 468 | 'UPPER': tokens.Keyword, 469 | 'USAGE': tokens.Keyword, 470 | 'USE': tokens.Keyword, 471 | 'USER': tokens.Keyword, 472 | 'USER_DEFINED_TYPE_CATALOG': tokens.Keyword, 473 | 'USER_DEFINED_TYPE_NAME': tokens.Keyword, 474 | 'USER_DEFINED_TYPE_SCHEMA': tokens.Keyword, 475 | 'USING': tokens.Keyword, 476 | 477 | 'VACUUM': tokens.Keyword, 478 | 'VALID': tokens.Keyword, 479 | 'VALIDATOR': tokens.Keyword, 480 | 'VALUES': tokens.Keyword, 481 | 'VARIABLE': tokens.Keyword, 482 | 'VERBOSE': tokens.Keyword, 483 | 'VERSION': tokens.Keyword, 484 | 'VIEW': tokens.Keyword, 485 | 'VOLATILE': tokens.Keyword, 486 | 487 | 'WHENEVER': tokens.Keyword, 488 | 'WITH': tokens.Keyword, 489 | 'WITHOUT': tokens.Keyword, 490 | 'WORK': tokens.Keyword, 491 | 'WRITE': tokens.Keyword, 492 | 493 | 'YEAR': tokens.Keyword, 494 | 495 | 'ZONE': tokens.Keyword, 496 | 497 | # Name.Builtin 498 | 'ARRAY': tokens.Name.Builtin, 499 | 'BIGINT': tokens.Name.Builtin, 500 | 'BINARY': tokens.Name.Builtin, 501 | 'BIT': tokens.Name.Builtin, 502 | 'BLOB': tokens.Name.Builtin, 503 | 'BOOLEAN': tokens.Name.Builtin, 504 | 'CHAR': tokens.Name.Builtin, 505 | 'CHARACTER': tokens.Name.Builtin, 506 | 'DATE': tokens.Name.Builtin, 507 | 'DEC': tokens.Name.Builtin, 508 | 'DECIMAL': tokens.Name.Builtin, 509 | 'FLOAT': tokens.Name.Builtin, 510 | 'INT': tokens.Name.Builtin, 511 | 'INT8': tokens.Name.Builtin, 512 | 'INTEGER': tokens.Name.Builtin, 513 | 'INTERVAL': tokens.Name.Builtin, 514 | 'LONG': tokens.Name.Builtin, 515 | 'NUMBER': tokens.Name.Builtin, 516 | 'NUMERIC': tokens.Name.Builtin, 517 | 'REAL': tokens.Name.Builtin, 518 | 'SERIAL': tokens.Name.Builtin, 519 | 'SERIAL8': tokens.Name.Builtin, 520 | 'SIGNED': tokens.Name.Builtin, 521 | 'SMALLINT': tokens.Name.Builtin, 522 | 'TEXT': tokens.Name.Builtin, 523 | 'TINYINT': tokens.Name.Builtin, 524 | 'UNSIGNED': tokens.Name.Builtin, 525 | 'VARCHAR': tokens.Name.Builtin, 526 | 'VARCHAR2': tokens.Name.Builtin, 527 | 'VARYING': tokens.Name.Builtin, 528 | } 529 | 530 | 531 | KEYWORDS_COMMON = { 532 | 'SELECT': tokens.Keyword.DML, 533 | 'INSERT': tokens.Keyword.DML, 534 | 'DELETE': tokens.Keyword.DML, 535 | 'UPDATE': tokens.Keyword.DML, 536 | 'REPLACE': tokens.Keyword.DML, 537 | 'MERGE': tokens.Keyword.DML, 538 | 'DROP': tokens.Keyword.DDL, 539 | 'CREATE': tokens.Keyword.DDL, 540 | 'ALTER': tokens.Keyword.DDL, 541 | 542 | 'WHERE': tokens.Keyword, 543 | 'FROM': tokens.Keyword, 544 | 'INNER': tokens.Keyword, 545 | 'JOIN': tokens.Keyword, 546 | 'STRAIGHT_JOIN': tokens.Keyword, 547 | 'AND': tokens.Keyword, 548 | 'OR': tokens.Keyword, 549 | 'LIKE': tokens.Keyword, 550 | 'ON': tokens.Keyword, 551 | 'IN': tokens.Keyword, 552 | 'SET': tokens.Keyword, 553 | 554 | 'BY': tokens.Keyword, 555 | 'GROUP': tokens.Keyword, 556 | 'ORDER': tokens.Keyword, 557 | 'LEFT': tokens.Keyword, 558 | 'OUTER': tokens.Keyword, 559 | 'FULL': tokens.Keyword, 560 | 561 | 'IF': tokens.Keyword, 562 | 'END': tokens.Keyword, 563 | 'THEN': tokens.Keyword, 564 | 'LOOP': tokens.Keyword, 565 | 'AS': tokens.Keyword, 566 | 'ELSE': tokens.Keyword, 567 | 'FOR': tokens.Keyword, 568 | 569 | 'CASE': tokens.Keyword, 570 | 'WHEN': tokens.Keyword, 571 | 'MIN': tokens.Keyword, 572 | 'MAX': tokens.Keyword, 573 | 'DISTINCT': tokens.Keyword, 574 | } 575 | -------------------------------------------------------------------------------- /sqlparse/lexer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com 4 | # 5 | # This module is part of python-sqlparse and is released under 6 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php. 7 | 8 | """SQL Lexer""" 9 | 10 | # This code is based on the SqlLexer in pygments. 11 | # http://pygments.org/ 12 | # It's separated from the rest of pygments to increase performance 13 | # and to allow some customizations. 14 | 15 | import re 16 | import sys 17 | 18 | from sqlparse import tokens 19 | from sqlparse.keywords import KEYWORDS, KEYWORDS_COMMON 20 | import collections 21 | 22 | 23 | class include(str): 24 | pass 25 | 26 | 27 | class combined(tuple): 28 | """Indicates a state combined from multiple states.""" 29 | 30 | def __new__(cls, *args): 31 | return tuple.__new__(cls, args) 32 | 33 | def __init__(self, *args): 34 | # tuple.__init__ doesn't do anything 35 | pass 36 | 37 | 38 | def is_keyword(value): 39 | test = value.upper() 40 | return KEYWORDS_COMMON.get(test, KEYWORDS.get(test, tokens.Name)), value 41 | 42 | 43 | def apply_filters(stream, filters, lexer=None): 44 | """ 45 | Use this method to apply an iterable of filters to 46 | a stream. If lexer is given it's forwarded to the 47 | filter, otherwise the filter receives `None`. 48 | """ 49 | 50 | def _apply(filter_, stream): 51 | for token in filter_.filter(lexer, stream): 52 | yield token 53 | 54 | for filter_ in filters: 55 | stream = _apply(filter_, stream) 56 | return stream 57 | 58 | 59 | class LexerMeta(type): 60 | """ 61 | Metaclass for Lexer, creates the self._tokens attribute from 62 | self.tokens on the first instantiation. 63 | """ 64 | 65 | def _process_state(cls, unprocessed, processed, state): 66 | assert type(state) is str, "wrong state name %r" % state 67 | assert state[0] != '#', "invalid state name %r" % state 68 | if state in processed: 69 | return processed[state] 70 | tokenlist = processed[state] = [] 71 | rflags = cls.flags 72 | for tdef in unprocessed[state]: 73 | if isinstance(tdef, include): 74 | # it's a state reference 75 | assert tdef != state, "circular state reference %r" % state 76 | tokenlist.extend(cls._process_state( 77 | unprocessed, processed, str(tdef))) 78 | continue 79 | 80 | assert type(tdef) is tuple, "wrong rule def %r" % tdef 81 | 82 | try: 83 | rex = re.compile(tdef[0], rflags).match 84 | except Exception as err: 85 | raise ValueError(("uncompilable regex %r in state" 86 | " %r of %r: %s" 87 | % (tdef[0], state, cls, err))) 88 | 89 | assert type(tdef[1]) is tokens._TokenType or isinstance(tdef[1], collections.Callable), \ 90 | ('token type must be simple type or callable, not %r' 91 | % (tdef[1],)) 92 | 93 | if len(tdef) == 2: 94 | new_state = None 95 | else: 96 | tdef2 = tdef[2] 97 | if isinstance(tdef2, str): 98 | # an existing state 99 | if tdef2 == '#pop': 100 | new_state = -1 101 | elif tdef2 in unprocessed: 102 | new_state = (tdef2,) 103 | elif tdef2 == '#push': 104 | new_state = tdef2 105 | elif tdef2[:5] == '#pop:': 106 | new_state = -int(tdef2[5:]) 107 | else: 108 | assert False, 'unknown new state %r' % tdef2 109 | elif isinstance(tdef2, combined): 110 | # combine a new state from existing ones 111 | new_state = '_tmp_%d' % cls._tmpname 112 | cls._tmpname += 1 113 | itokens = [] 114 | for istate in tdef2: 115 | assert istate != state, \ 116 | 'circular state ref %r' % istate 117 | itokens.extend(cls._process_state(unprocessed, 118 | processed, istate)) 119 | processed[new_state] = itokens 120 | new_state = (new_state,) 121 | elif isinstance(tdef2, tuple): 122 | # push more than one state 123 | for state in tdef2: 124 | assert (state in unprocessed or 125 | state in ('#pop', '#push')), \ 126 | 'unknown new state ' + state 127 | new_state = tdef2 128 | else: 129 | assert False, 'unknown new state def %r' % tdef2 130 | tokenlist.append((rex, tdef[1], new_state)) 131 | return tokenlist 132 | 133 | def process_tokendef(cls): 134 | cls._all_tokens = {} 135 | cls._tmpname = 0 136 | processed = cls._all_tokens[cls.__name__] = {} 137 | #tokendefs = tokendefs or cls.tokens[name] 138 | for state in list(cls.tokens.keys()): 139 | cls._process_state(cls.tokens, processed, state) 140 | return processed 141 | 142 | def __call__(cls, *args, **kwds): 143 | if not hasattr(cls, '_tokens'): 144 | cls._all_tokens = {} 145 | cls._tmpname = 0 146 | if hasattr(cls, 'token_variants') and cls.token_variants: 147 | # don't process yet 148 | pass 149 | else: 150 | cls._tokens = cls.process_tokendef() 151 | 152 | return type.__call__(cls, *args, **kwds) 153 | 154 | # for jython metaclass 155 | LexerMetaHasVersion = LexerMeta("Lexer", (object, ), {"__doc__": LexerMeta.__doc__}) 156 | 157 | class Lexer(LexerMetaHasVersion): 158 | 159 | encoding = 'utf-8' 160 | stripall = False 161 | stripnl = False 162 | tabsize = 0 163 | flags = re.IGNORECASE | re.UNICODE 164 | 165 | tokens = { 166 | 'root': [ 167 | (r'(--|# ).*?(\r\n|\r|\n)', tokens.Comment.Single), 168 | # $ matches *before* newline, therefore we have two patterns 169 | # to match Comment.Single 170 | (r'(--|# ).*?$', tokens.Comment.Single), 171 | (r'(\r\n|\r|\n)', tokens.Newline), 172 | (r'\s+', tokens.Whitespace), 173 | (r'/\*', tokens.Comment.Multiline, 'multiline-comments'), 174 | (r':=', tokens.Assignment), 175 | (r'::', tokens.Punctuation), 176 | (r'[*]', tokens.Wildcard), 177 | (r'CASE\b', tokens.Keyword), # extended CASE(foo) 178 | (r"`(``|[^`])*`", tokens.Name), 179 | (r"´(´´|[^´])*´", tokens.Name), 180 | (r'\$([^\W\d]\w*)?\$', tokens.Name.Builtin), 181 | (r'\?{1}', tokens.Name.Placeholder), 182 | (r'%\(\w+\)s', tokens.Name.Placeholder), 183 | (r'%s', tokens.Name.Placeholder), 184 | (r'[$:?]\w+', tokens.Name.Placeholder), 185 | # FIXME(andi): VALUES shouldn't be listed here 186 | # see https://github.com/andialbrecht/sqlparse/pull/64 187 | (r'VALUES', tokens.Keyword), 188 | (r'(@|##|#)[^\W\d_]\w+', tokens.Name), 189 | # IN is special, it may be followed by a parenthesis, but 190 | # is never a functino, see issue183 191 | (r'in\b(?=[ (])?', tokens.Keyword), 192 | (r'[^\W\d_]\w*(?=[.(])', tokens.Name), # see issue39 193 | (r'[-]?0x[0-9a-fA-F]+', tokens.Number.Hexadecimal), 194 | (r'[-]?[0-9]*(\.[0-9]+)?[eE][-]?[0-9]+', tokens.Number.Float), 195 | (r'[-]?[0-9]*\.[0-9]+', tokens.Number.Float), 196 | (r'[-]?[0-9]+', tokens.Number.Integer), 197 | (r"'(''|\\\\|\\'|[^'])*'", tokens.String.Single), 198 | # not a real string literal in ANSI SQL: 199 | (r'(""|".*?[^\\]")', tokens.String.Symbol), 200 | # sqlite names can be escaped with [square brackets]. left bracket 201 | # cannot be preceded by word character or a right bracket -- 202 | # otherwise it's probably an array index 203 | (r'(?=~!]+', tokens.Operator.Comparison), 213 | (r'[+/@#%^&|`?^-]+', tokens.Operator), 214 | ], 215 | 'multiline-comments': [ 216 | (r'/\*', tokens.Comment.Multiline, 'multiline-comments'), 217 | (r'\*/', tokens.Comment.Multiline, '#pop'), 218 | (r'[^/\*]+', tokens.Comment.Multiline), 219 | (r'[/*]', tokens.Comment.Multiline), 220 | ]} 221 | 222 | def __init__(self): 223 | self.filters = [] 224 | 225 | def add_filter(self, filter_, **options): 226 | from sqlparse.filters import Filter 227 | if not isinstance(filter_, Filter): 228 | filter_ = filter_(**options) 229 | self.filters.append(filter_) 230 | 231 | def _decode(self, text): 232 | if sys.version_info[0] == 3: 233 | if isinstance(text, str): 234 | return text 235 | # for jython 236 | if isinstance(text, unicode): 237 | return text 238 | if self.encoding == 'guess': 239 | try: 240 | text = text.decode('utf-8') 241 | if text.startswith('\ufeff'): 242 | text = text[len('\ufeff'):] 243 | except UnicodeDecodeError: 244 | text = text.decode('latin1') 245 | else: 246 | try: 247 | text = text.decode(self.encoding) 248 | except UnicodeDecodeError: 249 | text = text.decode('unicode-escape') 250 | 251 | if self.tabsize > 0: 252 | text = text.expandtabs(self.tabsize) 253 | return text 254 | 255 | def get_tokens(self, text, unfiltered=False): 256 | """ 257 | Return an iterable of (tokentype, value) pairs generated from 258 | `text`. If `unfiltered` is set to `True`, the filtering mechanism 259 | is bypassed even if filters are defined. 260 | 261 | Also preprocess the text, i.e. expand tabs and strip it if 262 | wanted and applies registered filters. 263 | """ 264 | if isinstance(text, str): 265 | if self.stripall: 266 | text = text.strip() 267 | elif self.stripnl: 268 | text = text.strip('\n') 269 | 270 | if sys.version_info[0] < 3 and isinstance(text, str): 271 | # for jython 272 | import cStringIO 273 | text = cStringIO.StringIO(text) 274 | self.encoding = 'utf-8' 275 | else: 276 | # for jython move import section 277 | from _io import StringIO 278 | text = StringIO(text) 279 | 280 | def streamer(): 281 | for i, t, v in self.get_tokens_unprocessed(text): 282 | # for jython encode 283 | if sys.version_info[0] < 3 and isinstance(v, unicode): 284 | v = v.encode('utf-8') 285 | yield t, v 286 | stream = streamer() 287 | if not unfiltered: 288 | stream = apply_filters(stream, self.filters, self) 289 | return stream 290 | 291 | def get_tokens_unprocessed(self, stream, stack=('root',)): 292 | """ 293 | Split ``text`` into (tokentype, text) pairs. 294 | 295 | ``stack`` is the inital stack (default: ``['root']``) 296 | """ 297 | pos = 0 298 | tokendefs = self._tokens # see __call__, pylint:disable=E1101 299 | statestack = list(stack) 300 | statetokens = tokendefs[statestack[-1]] 301 | known_names = {} 302 | 303 | text = stream.read() 304 | text = self._decode(text) 305 | 306 | while 1: 307 | for rexmatch, action, new_state in statetokens: 308 | m = rexmatch(text, pos) 309 | if m: 310 | value = m.group() 311 | # bugfix -> change if order 312 | if type(action) is tokens._TokenType: 313 | yield pos, action, value 314 | elif value in known_names: 315 | yield pos, known_names[value], value 316 | elif hasattr(action, '__call__'): 317 | ttype, value = action(value) 318 | known_names[value] = ttype 319 | yield pos, ttype, value 320 | else: 321 | for item in action(self, m): 322 | yield item 323 | pos = m.end() 324 | if new_state is not None: 325 | # state transition 326 | if isinstance(new_state, tuple): 327 | for state in new_state: 328 | if state == '#pop': 329 | statestack.pop() 330 | elif state == '#push': 331 | statestack.append(statestack[-1]) 332 | elif ( 333 | # Ugly hack - multiline-comments 334 | # are not stackable 335 | state != 'multiline-comments' 336 | or not statestack 337 | or statestack[-1] != 'multiline-comments' 338 | ): 339 | statestack.append(state) 340 | elif isinstance(new_state, int): 341 | # pop 342 | del statestack[new_state:] 343 | elif new_state == '#push': 344 | statestack.append(statestack[-1]) 345 | else: 346 | assert False, "wrong state def: %r" % new_state 347 | statetokens = tokendefs[statestack[-1]] 348 | break 349 | else: 350 | try: 351 | if text[pos] == '\n': 352 | # at EOL, reset state to "root" 353 | pos += 1 354 | statestack = ['root'] 355 | statetokens = tokendefs['root'] 356 | yield pos, tokens.Text, '\n' 357 | continue 358 | yield pos, tokens.Error, text[pos] 359 | pos += 1 360 | except IndexError: 361 | break 362 | 363 | 364 | def tokenize(sql, encoding=None): 365 | """Tokenize sql. 366 | 367 | Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream 368 | of ``(token type, value)`` items. 369 | """ 370 | lexer = Lexer() 371 | if encoding is not None: 372 | lexer.encoding = encoding 373 | return lexer.get_tokens(sql) 374 | -------------------------------------------------------------------------------- /sqlparse/pipeline.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 Jesus Leganes "piranna", piranna@gmail.com 2 | # 3 | # This module is part of python-sqlparse and is released under 4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php. 5 | 6 | from types import GeneratorType 7 | import collections 8 | 9 | 10 | class Pipeline(list): 11 | """Pipeline to process filters sequentially""" 12 | 13 | def __call__(self, stream): 14 | """Run the pipeline 15 | 16 | Return a static (non generator) version of the result 17 | """ 18 | 19 | # Run the stream over all the filters on the pipeline 20 | for filter in self: 21 | # Functions and callable objects (objects with '__call__' method) 22 | if isinstance(filter, collections.Callable): 23 | stream = list(filter(stream)) 24 | 25 | # Normal filters (objects with 'process' method) 26 | else: 27 | stream = filter.process(None, stream) 28 | 29 | # If last filter return a generator, staticalize it inside a list 30 | if isinstance(stream, GeneratorType): 31 | return list(stream) 32 | return stream 33 | -------------------------------------------------------------------------------- /sqlparse/sql.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """This module contains classes representing syntactical elements of SQL.""" 4 | 5 | import re 6 | import sys 7 | 8 | from sqlparse import tokens as T 9 | 10 | 11 | class Token(object): 12 | """Base class for all other classes in this module. 13 | 14 | It represents a single token and has two instance attributes: 15 | ``value`` is the unchange value of the token and ``ttype`` is 16 | the type of the token. 17 | """ 18 | 19 | __slots__ = ('value', 'ttype', 'parent', 'normalized', 'is_keyword') 20 | 21 | def __init__(self, ttype, value): 22 | self.value = value 23 | if ttype in T.Keyword: 24 | self.normalized = value.upper() 25 | else: 26 | self.normalized = value 27 | self.ttype = ttype 28 | self.is_keyword = ttype in T.Keyword 29 | self.parent = None 30 | 31 | def __str__(self): 32 | if sys.version_info[0] == 3: 33 | return self.value 34 | else: 35 | # for jython bug 36 | # return str(self).encode('utf-8')) 37 | return self.value 38 | 39 | def __repr__(self): 40 | short = self._get_repr_value() 41 | if sys.version_info[0] < 3: 42 | short = short.encode('utf-8') 43 | return '<%s \'%s\' at 0x%07x>' % (self._get_repr_name(), 44 | short, id(self)) 45 | 46 | def __unicode__(self): 47 | """Returns a unicode representation of this object.""" 48 | return self.value or '' 49 | 50 | def to_unicode(self): 51 | """Returns a unicode representation of this object. 52 | 53 | .. deprecated:: 0.1.5 54 | Use ``unicode(token)`` (for Python 3: ``str(token)``) instead. 55 | """ 56 | return str(self) 57 | 58 | def _get_repr_name(self): 59 | return str(self.ttype).split('.')[-1] 60 | 61 | def _get_repr_value(self): 62 | raw = str(self) 63 | if len(raw) > 7: 64 | raw = raw[:6] + '...' 65 | return re.sub('\s+', ' ', raw) 66 | 67 | def flatten(self): 68 | """Resolve subgroups.""" 69 | yield self 70 | 71 | def match(self, ttype, values, regex=False): 72 | """Checks whether the token matches the given arguments. 73 | 74 | *ttype* is a token type. If this token doesn't match the given token 75 | type. 76 | *values* is a list of possible values for this token. The values 77 | are OR'ed together so if only one of the values matches ``True`` 78 | is returned. Except for keyword tokens the comparison is 79 | case-sensitive. For convenience it's ok to pass in a single string. 80 | If *regex* is ``True`` (default is ``False``) the given values are 81 | treated as regular expressions. 82 | """ 83 | type_matched = self.ttype is ttype 84 | if not type_matched or values is None: 85 | return type_matched 86 | 87 | if regex: 88 | if isinstance(values, str): 89 | values = set([values]) 90 | 91 | if self.ttype is T.Keyword: 92 | values = set(re.compile(v, re.IGNORECASE) for v in values) 93 | else: 94 | values = set(re.compile(v) for v in values) 95 | 96 | for pattern in values: 97 | if pattern.search(self.value): 98 | return True 99 | return False 100 | 101 | if isinstance(values, str): 102 | if self.is_keyword: 103 | return values.upper() == self.normalized 104 | return values == self.value 105 | 106 | if self.is_keyword: 107 | for v in values: 108 | if v.upper() == self.normalized: 109 | return True 110 | return False 111 | 112 | return self.value in values 113 | 114 | def is_group(self): 115 | """Returns ``True`` if this object has children.""" 116 | return False 117 | 118 | def is_whitespace(self): 119 | """Return ``True`` if this token is a whitespace token.""" 120 | return self.ttype and self.ttype in T.Whitespace 121 | 122 | def within(self, group_cls): 123 | """Returns ``True`` if this token is within *group_cls*. 124 | 125 | Use this method for example to check if an identifier is within 126 | a function: ``t.within(sql.Function)``. 127 | """ 128 | parent = self.parent 129 | while parent: 130 | if isinstance(parent, group_cls): 131 | return True 132 | parent = parent.parent 133 | return False 134 | 135 | def is_child_of(self, other): 136 | """Returns ``True`` if this token is a direct child of *other*.""" 137 | return self.parent == other 138 | 139 | def has_ancestor(self, other): 140 | """Returns ``True`` if *other* is in this tokens ancestry.""" 141 | parent = self.parent 142 | while parent: 143 | if parent == other: 144 | return True 145 | parent = parent.parent 146 | return False 147 | 148 | 149 | class TokenList(Token): 150 | """A group of tokens. 151 | 152 | It has an additional instance attribute ``tokens`` which holds a 153 | list of child-tokens. 154 | """ 155 | 156 | __slots__ = ('value', 'ttype', 'tokens') 157 | 158 | def __init__(self, tokens=None): 159 | if tokens is None: 160 | tokens = [] 161 | self.tokens = tokens 162 | Token.__init__(self, None, self._to_string()) 163 | 164 | def __unicode__(self): 165 | return self._to_string() 166 | 167 | def __str__(self): 168 | str_ = self._to_string() 169 | if sys.version_info[0] < 2: 170 | str_ = str_.encode('utf-8') 171 | return str_ 172 | 173 | def _to_string(self): 174 | if sys.version_info[0] == 3: 175 | return ''.join(x.value for x in self.flatten()) 176 | else: 177 | return ''.join(str(x) for x in self.flatten()) 178 | 179 | def _get_repr_name(self): 180 | return self.__class__.__name__ 181 | 182 | def _pprint_tree(self, max_depth=None, depth=0): 183 | """Pretty-print the object tree.""" 184 | indent = ' ' * (depth * 2) 185 | for idx, token in enumerate(self.tokens): 186 | if token.is_group(): 187 | pre = ' +-' 188 | else: 189 | pre = ' | ' 190 | print('%s%s%d %s \'%s\'' % (indent, pre, idx, 191 | token._get_repr_name(), 192 | token._get_repr_value())) 193 | if (token.is_group() and (max_depth is None or depth < max_depth)): 194 | token._pprint_tree(max_depth, depth + 1) 195 | 196 | def _remove_quotes(self, val): 197 | """Helper that removes surrounding quotes from strings.""" 198 | if not val: 199 | return val 200 | if val[0] in ('"', '\'') and val[-1] == val[0]: 201 | val = val[1:-1] 202 | return val 203 | 204 | def get_token_at_offset(self, offset): 205 | """Returns the token that is on position offset.""" 206 | idx = 0 207 | for token in self.flatten(): 208 | end = idx + len(token.value) 209 | if idx <= offset <= end: 210 | return token 211 | idx = end 212 | 213 | def flatten(self): 214 | """Generator yielding ungrouped tokens. 215 | 216 | This method is recursively called for all child tokens. 217 | """ 218 | for token in self.tokens: 219 | if isinstance(token, TokenList): 220 | for item in token.flatten(): 221 | yield item 222 | else: 223 | yield token 224 | 225 | # def __iter__(self): 226 | # return self 227 | # 228 | # def next(self): 229 | # for token in self.tokens: 230 | # yield token 231 | 232 | def is_group(self): 233 | return True 234 | 235 | def get_sublists(self): 236 | # return [x for x in self.tokens if isinstance(x, TokenList)] 237 | for x in self.tokens: 238 | if isinstance(x, TokenList): 239 | yield x 240 | 241 | @property 242 | def _groupable_tokens(self): 243 | return self.tokens 244 | 245 | def token_first(self, ignore_whitespace=True, ignore_comments=False): 246 | """Returns the first child token. 247 | 248 | If *ignore_whitespace* is ``True`` (the default), whitespace 249 | tokens are ignored. 250 | 251 | if *ignore_comments* is ``True`` (default: ``False``), comments are 252 | ignored too. 253 | """ 254 | for token in self.tokens: 255 | if ignore_whitespace and token.is_whitespace(): 256 | continue 257 | if ignore_comments and isinstance(token, Comment): 258 | continue 259 | return token 260 | 261 | def token_next_by_instance(self, idx, clss, end=None): 262 | """Returns the next token matching a class. 263 | 264 | *idx* is where to start searching in the list of child tokens. 265 | *clss* is a list of classes the token should be an instance of. 266 | 267 | If no matching token can be found ``None`` is returned. 268 | """ 269 | if not isinstance(clss, (list, tuple)): 270 | clss = (clss,) 271 | 272 | for token in self.tokens[idx:end]: 273 | if isinstance(token, clss): 274 | return token 275 | 276 | def token_next_by_type(self, idx, ttypes): 277 | """Returns next matching token by it's token type.""" 278 | if not isinstance(ttypes, (list, tuple)): 279 | ttypes = [ttypes] 280 | 281 | for token in self.tokens[idx:]: 282 | if token.ttype in ttypes: 283 | return token 284 | 285 | def token_next_match(self, idx, ttype, value, regex=False): 286 | """Returns next token where it's ``match`` method returns ``True``.""" 287 | if not isinstance(idx, int): 288 | idx = self.token_index(idx) 289 | 290 | for n in range(idx, len(self.tokens)): 291 | token = self.tokens[n] 292 | if token.match(ttype, value, regex): 293 | return token 294 | 295 | def token_not_matching(self, idx, funcs): 296 | for token in self.tokens[idx:]: 297 | passed = False 298 | for func in funcs: 299 | if func(token): 300 | passed = True 301 | break 302 | 303 | if not passed: 304 | return token 305 | 306 | def token_matching(self, idx, funcs): 307 | for token in self.tokens[idx:]: 308 | for func in funcs: 309 | if func(token): 310 | return token 311 | 312 | def token_prev(self, idx, skip_ws=True): 313 | """Returns the previous token relative to *idx*. 314 | 315 | If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. 316 | ``None`` is returned if there's no previous token. 317 | """ 318 | if idx is None: 319 | return None 320 | 321 | if not isinstance(idx, int): 322 | idx = self.token_index(idx) 323 | 324 | while idx: 325 | idx -= 1 326 | if self.tokens[idx].is_whitespace() and skip_ws: 327 | continue 328 | return self.tokens[idx] 329 | 330 | def token_next(self, idx, skip_ws=True): 331 | """Returns the next token relative to *idx*. 332 | 333 | If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. 334 | ``None`` is returned if there's no next token. 335 | """ 336 | if idx is None: 337 | return None 338 | 339 | if not isinstance(idx, int): 340 | idx = self.token_index(idx) 341 | 342 | while idx < len(self.tokens) - 1: 343 | idx += 1 344 | if self.tokens[idx].is_whitespace() and skip_ws: 345 | continue 346 | return self.tokens[idx] 347 | 348 | def token_index(self, token, start=0): 349 | """Return list index of token.""" 350 | if start > 0: 351 | # Performing `index` manually is much faster when starting in the middle 352 | # of the list of tokens and expecting to find the token near to the starting 353 | # index. 354 | for i in range(start, len(self.tokens)): 355 | if self.tokens[i] == token: 356 | return i 357 | return -1 358 | return self.tokens.index(token) 359 | 360 | def tokens_between(self, start, end, exclude_end=False): 361 | """Return all tokens between (and including) start and end. 362 | 363 | If *exclude_end* is ``True`` (default is ``False``) the end token 364 | is included too. 365 | """ 366 | # FIXME(andi): rename exclude_end to inlcude_end 367 | if exclude_end: 368 | offset = 0 369 | else: 370 | offset = 1 371 | end_idx = self.token_index(end) + offset 372 | start_idx = self.token_index(start) 373 | return self.tokens[start_idx:end_idx] 374 | 375 | def group_tokens(self, grp_cls, tokens, ignore_ws=False): 376 | """Replace tokens by an instance of *grp_cls*.""" 377 | idx = self.token_index(tokens[0]) 378 | if ignore_ws: 379 | while tokens and tokens[-1].is_whitespace(): 380 | tokens = tokens[:-1] 381 | for t in tokens: 382 | self.tokens.remove(t) 383 | grp = grp_cls(tokens) 384 | for token in tokens: 385 | token.parent = grp 386 | grp.parent = self 387 | self.tokens.insert(idx, grp) 388 | return grp 389 | 390 | def insert_before(self, where, token): 391 | """Inserts *token* before *where*.""" 392 | self.tokens.insert(self.token_index(where), token) 393 | 394 | def insert_after(self, where, token, skip_ws=True): 395 | """Inserts *token* after *where*.""" 396 | next_token = self.token_next(where, skip_ws=skip_ws) 397 | if next_token is None: 398 | self.tokens.append(token) 399 | else: 400 | self.tokens.insert(self.token_index(next_token), token) 401 | 402 | def has_alias(self): 403 | """Returns ``True`` if an alias is present.""" 404 | return self.get_alias() is not None 405 | 406 | def get_alias(self): 407 | """Returns the alias for this identifier or ``None``.""" 408 | 409 | # "name AS alias" 410 | kw = self.token_next_match(0, T.Keyword, 'AS') 411 | if kw is not None: 412 | return self._get_first_name(kw, keywords=True) 413 | 414 | # "name alias" or "complicated column expression alias" 415 | if len(self.tokens) > 2 \ 416 | and self.token_next_by_type(0, T.Whitespace) is not None: 417 | return self._get_first_name(reverse=True) 418 | 419 | return None 420 | 421 | def get_name(self): 422 | """Returns the name of this identifier. 423 | 424 | This is either it's alias or it's real name. The returned valued can 425 | be considered as the name under which the object corresponding to 426 | this identifier is known within the current statement. 427 | """ 428 | alias = self.get_alias() 429 | if alias is not None: 430 | return alias 431 | return self.get_real_name() 432 | 433 | def get_real_name(self): 434 | """Returns the real name (object name) of this identifier.""" 435 | # a.b 436 | dot = self.token_next_match(0, T.Punctuation, '.') 437 | if dot is not None: 438 | return self._get_first_name(self.token_index(dot)) 439 | 440 | return self._get_first_name() 441 | 442 | def get_parent_name(self): 443 | """Return name of the parent object if any. 444 | 445 | A parent object is identified by the first occuring dot. 446 | """ 447 | dot = self.token_next_match(0, T.Punctuation, '.') 448 | if dot is None: 449 | return None 450 | prev_ = self.token_prev(self.token_index(dot)) 451 | if prev_ is None: # something must be verry wrong here.. 452 | return None 453 | return self._remove_quotes(prev_.value) 454 | 455 | def _get_first_name(self, idx=None, reverse=False, keywords=False): 456 | """Returns the name of the first token with a name""" 457 | 458 | if idx and not isinstance(idx, int): 459 | idx = self.token_index(idx) + 1 460 | 461 | tokens = self.tokens[idx:] if idx else self.tokens 462 | tokens = reversed(tokens) if reverse else tokens 463 | types = [T.Name, T.Wildcard, T.String.Symbol] 464 | 465 | if keywords: 466 | types.append(T.Keyword) 467 | 468 | for tok in tokens: 469 | if tok.ttype in types: 470 | return self._remove_quotes(tok.value) 471 | elif isinstance(tok, Identifier) or isinstance(tok, Function): 472 | return tok.get_name() 473 | return None 474 | 475 | class Statement(TokenList): 476 | """Represents a SQL statement.""" 477 | 478 | __slots__ = ('value', 'ttype', 'tokens') 479 | 480 | def get_type(self): 481 | """Returns the type of a statement. 482 | 483 | The returned value is a string holding an upper-cased reprint of 484 | the first DML or DDL keyword. If the first token in this group 485 | isn't a DML or DDL keyword "UNKNOWN" is returned. 486 | 487 | Whitespaces and comments at the beginning of the statement 488 | are ignored. 489 | """ 490 | first_token = self.token_first(ignore_comments=True) 491 | if first_token is None: 492 | # An "empty" statement that either has not tokens at all 493 | # or only whitespace tokens. 494 | return 'UNKNOWN' 495 | 496 | elif first_token.ttype in (T.Keyword.DML, T.Keyword.DDL): 497 | return first_token.normalized 498 | 499 | return 'UNKNOWN' 500 | 501 | 502 | class Identifier(TokenList): 503 | """Represents an identifier. 504 | 505 | Identifiers may have aliases or typecasts. 506 | """ 507 | 508 | __slots__ = ('value', 'ttype', 'tokens') 509 | 510 | def is_wildcard(self): 511 | """Return ``True`` if this identifier contains a wildcard.""" 512 | token = self.token_next_by_type(0, T.Wildcard) 513 | return token is not None 514 | 515 | def get_typecast(self): 516 | """Returns the typecast or ``None`` of this object as a string.""" 517 | marker = self.token_next_match(0, T.Punctuation, '::') 518 | if marker is None: 519 | return None 520 | next_ = self.token_next(self.token_index(marker), False) 521 | if next_ is None: 522 | return None 523 | return str(next_) 524 | 525 | def get_ordering(self): 526 | """Returns the ordering or ``None`` as uppercase string.""" 527 | ordering = self.token_next_by_type(0, T.Keyword.Order) 528 | if ordering is None: 529 | return None 530 | return ordering.value.upper() 531 | 532 | def get_array_indices(self): 533 | """Returns an iterator of index token lists""" 534 | 535 | for tok in self.tokens: 536 | if isinstance(tok, SquareBrackets): 537 | # Use [1:-1] index to discard the square brackets 538 | yield tok.tokens[1:-1] 539 | 540 | 541 | class IdentifierList(TokenList): 542 | """A list of :class:`~sqlparse.sql.Identifier`\'s.""" 543 | 544 | __slots__ = ('value', 'ttype', 'tokens') 545 | 546 | def get_identifiers(self): 547 | """Returns the identifiers. 548 | 549 | Whitespaces and punctuations are not included in this generator. 550 | """ 551 | for x in self.tokens: 552 | if not x.is_whitespace() and not x.match(T.Punctuation, ','): 553 | yield x 554 | 555 | 556 | class Parenthesis(TokenList): 557 | """Tokens between parenthesis.""" 558 | __slots__ = ('value', 'ttype', 'tokens') 559 | 560 | @property 561 | def _groupable_tokens(self): 562 | return self.tokens[1:-1] 563 | 564 | 565 | class SquareBrackets(TokenList): 566 | """Tokens between square brackets""" 567 | 568 | __slots__ = ('value', 'ttype', 'tokens') 569 | 570 | @property 571 | def _groupable_tokens(self): 572 | return self.tokens[1:-1] 573 | 574 | class Assignment(TokenList): 575 | """An assignment like 'var := val;'""" 576 | __slots__ = ('value', 'ttype', 'tokens') 577 | 578 | 579 | class If(TokenList): 580 | """An 'if' clause with possible 'else if' or 'else' parts.""" 581 | __slots__ = ('value', 'ttype', 'tokens') 582 | 583 | 584 | class For(TokenList): 585 | """A 'FOR' loop.""" 586 | __slots__ = ('value', 'ttype', 'tokens') 587 | 588 | 589 | class Comparison(TokenList): 590 | """A comparison used for example in WHERE clauses.""" 591 | __slots__ = ('value', 'ttype', 'tokens') 592 | 593 | @property 594 | def left(self): 595 | return self.tokens[0] 596 | 597 | @property 598 | def right(self): 599 | return self.tokens[-1] 600 | 601 | 602 | class Comment(TokenList): 603 | """A comment.""" 604 | __slots__ = ('value', 'ttype', 'tokens') 605 | 606 | def is_multiline(self): 607 | return self.tokens and self.tokens[0].ttype == T.Comment.Multiline 608 | 609 | 610 | class Where(TokenList): 611 | """A WHERE clause.""" 612 | __slots__ = ('value', 'ttype', 'tokens') 613 | 614 | 615 | class Case(TokenList): 616 | """A CASE statement with one or more WHEN and possibly an ELSE part.""" 617 | 618 | __slots__ = ('value', 'ttype', 'tokens') 619 | 620 | def get_cases(self): 621 | """Returns a list of 2-tuples (condition, value). 622 | 623 | If an ELSE exists condition is None. 624 | """ 625 | CONDITION = 1 626 | VALUE = 2 627 | 628 | ret = [] 629 | mode = CONDITION 630 | 631 | for token in self.tokens: 632 | # Set mode from the current statement 633 | if token.match(T.Keyword, 'CASE'): 634 | continue 635 | 636 | elif token.match(T.Keyword, 'WHEN'): 637 | ret.append(([], [])) 638 | mode = CONDITION 639 | 640 | elif token.match(T.Keyword, 'THEN'): 641 | mode = VALUE 642 | 643 | elif token.match(T.Keyword, 'ELSE'): 644 | ret.append((None, [])) 645 | mode = VALUE 646 | 647 | elif token.match(T.Keyword, 'END'): 648 | mode = None 649 | 650 | # First condition without preceding WHEN 651 | if mode and not ret: 652 | ret.append(([], [])) 653 | 654 | # Append token depending of the current mode 655 | if mode == CONDITION: 656 | ret[-1][0].append(token) 657 | 658 | elif mode == VALUE: 659 | ret[-1][1].append(token) 660 | 661 | # Return cases list 662 | return ret 663 | 664 | 665 | class Function(TokenList): 666 | """A function or procedure call.""" 667 | 668 | __slots__ = ('value', 'ttype', 'tokens') 669 | 670 | def get_parameters(self): 671 | """Return a list of parameters.""" 672 | parenthesis = self.tokens[-1] 673 | for t in parenthesis.tokens: 674 | if isinstance(t, IdentifierList): 675 | return t.get_identifiers() 676 | elif isinstance(t, Identifier) or \ 677 | isinstance(t, Function) or \ 678 | t.ttype in T.Literal: 679 | return [t,] 680 | return [] 681 | 682 | 683 | class Begin(TokenList): 684 | """A BEGIN/END block.""" 685 | 686 | __slots__ = ('value', 'ttype', 'tokens') 687 | -------------------------------------------------------------------------------- /sqlparse/tokens.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com 2 | # 3 | # This module is part of python-sqlparse and is released under 4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php. 5 | 6 | # The Token implementation is based on pygment's token system written 7 | # by Georg Brandl. 8 | # http://pygments.org/ 9 | 10 | """Tokens""" 11 | 12 | 13 | class _TokenType(tuple): 14 | parent = None 15 | 16 | def split(self): 17 | buf = [] 18 | node = self 19 | while node is not None: 20 | buf.append(node) 21 | node = node.parent 22 | buf.reverse() 23 | return buf 24 | 25 | def __contains__(self, val): 26 | return val is not None and (self is val or val[:len(self)] == self) 27 | 28 | def __getattr__(self, val): 29 | if not val or not val[0].isupper(): 30 | return tuple.__getattribute__(self, val) 31 | new = _TokenType(self + (val,)) 32 | setattr(self, val, new) 33 | new.parent = self 34 | return new 35 | 36 | def __hash__(self): 37 | return hash(tuple(self)) 38 | 39 | def __repr__(self): 40 | return 'Token' + (self and '.' or '') + '.'.join(self) 41 | 42 | 43 | Token = _TokenType() 44 | 45 | # Special token types 46 | Text = Token.Text 47 | Whitespace = Text.Whitespace 48 | Newline = Whitespace.Newline 49 | Error = Token.Error 50 | # Text that doesn't belong to this lexer (e.g. HTML in PHP) 51 | Other = Token.Other 52 | 53 | # Common token types for source code 54 | Keyword = Token.Keyword 55 | Name = Token.Name 56 | Literal = Token.Literal 57 | String = Literal.String 58 | Number = Literal.Number 59 | Punctuation = Token.Punctuation 60 | Operator = Token.Operator 61 | Comparison = Operator.Comparison 62 | Wildcard = Token.Wildcard 63 | Comment = Token.Comment 64 | Assignment = Token.Assignement 65 | 66 | # Generic types for non-source code 67 | Generic = Token.Generic 68 | 69 | # String and some others are not direct childs of Token. 70 | # alias them: 71 | Token.Token = Token 72 | Token.String = String 73 | Token.Number = Number 74 | 75 | # SQL specific tokens 76 | DML = Keyword.DML 77 | DDL = Keyword.DDL 78 | Command = Keyword.Command 79 | 80 | Group = Token.Group 81 | Group.Parenthesis = Token.Group.Parenthesis 82 | Group.Comment = Token.Group.Comment 83 | Group.Where = Token.Group.Where 84 | -------------------------------------------------------------------------------- /sqlparse/utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 17/05/2012 3 | 4 | @author: piranna 5 | ''' 6 | 7 | import re 8 | 9 | try: 10 | from collections import OrderedDict 11 | except ImportError: 12 | OrderedDict = None 13 | 14 | 15 | if OrderedDict: 16 | class Cache(OrderedDict): 17 | """Cache with LRU algorithm using an OrderedDict as basis 18 | """ 19 | def __init__(self, maxsize=100): 20 | OrderedDict.__init__(self) 21 | 22 | self._maxsize = maxsize 23 | 24 | def __getitem__(self, key, *args, **kwargs): 25 | # Get the key and remove it from the cache, or raise KeyError 26 | value = OrderedDict.__getitem__(self, key) 27 | del self[key] 28 | 29 | # Insert the (key, value) pair on the front of the cache 30 | OrderedDict.__setitem__(self, key, value) 31 | 32 | # Return the value from the cache 33 | return value 34 | 35 | def __setitem__(self, key, value, *args, **kwargs): 36 | # Key was inserted before, remove it so we put it at front later 37 | if key in self: 38 | del self[key] 39 | 40 | # Too much items on the cache, remove the least recent used 41 | elif len(self) >= self._maxsize: 42 | self.popitem(False) 43 | 44 | # Insert the (key, value) pair on the front of the cache 45 | OrderedDict.__setitem__(self, key, value, *args, **kwargs) 46 | 47 | else: 48 | class Cache(dict): 49 | """Cache that reset when gets full 50 | """ 51 | def __init__(self, maxsize=100): 52 | dict.__init__(self) 53 | 54 | self._maxsize = maxsize 55 | 56 | def __setitem__(self, key, value, *args, **kwargs): 57 | # Reset the cache if we have too much cached entries and start over 58 | if len(self) >= self._maxsize: 59 | self.clear() 60 | 61 | # Insert the (key, value) pair on the front of the cache 62 | dict.__setitem__(self, key, value, *args, **kwargs) 63 | 64 | 65 | def memoize_generator(func): 66 | """Memoize decorator for generators 67 | 68 | Store `func` results in a cache according to their arguments as 'memoize' 69 | does but instead this works on decorators instead of regular functions. 70 | Obviusly, this is only useful if the generator will always return the same 71 | values for each specific parameters... 72 | """ 73 | cache = Cache() 74 | 75 | def wrapped_func(*args, **kwargs): 76 | # params = (args, kwargs) 77 | params = (args, tuple(sorted(kwargs.items()))) 78 | 79 | # Look if cached 80 | try: 81 | cached = cache[params] 82 | 83 | # Not cached, exec and store it 84 | except KeyError: 85 | cached = [] 86 | 87 | for item in func(*args, **kwargs): 88 | cached.append(item) 89 | yield item 90 | 91 | cache[params] = cached 92 | 93 | # Cached, yield its items 94 | else: 95 | for item in cached: 96 | yield item 97 | 98 | return wrapped_func 99 | 100 | 101 | # This regular expression replaces the home-cooked parser that was here before. 102 | # It is much faster, but requires an extra post-processing step to get the 103 | # desired results (that are compatible with what you would expect from the 104 | # str.splitlines() method). 105 | # 106 | # It matches groups of characters: newlines, quoted strings, or unquoted text, 107 | # and splits on that basis. The post-processing step puts those back together 108 | # into the actual lines of SQL. 109 | SPLIT_REGEX = re.compile(r""" 110 | ( 111 | (?: # Start of non-capturing group 112 | (?:\r\n|\r|\n) | # Match any single newline, or 113 | [^\r\n'"]+ | # Match any character series without quotes or 114 | # newlines, or 115 | "(?:[^"\\]|\\.)*" | # Match double-quoted strings, or 116 | '(?:[^'\\]|\\.)*' # Match single quoted strings 117 | ) 118 | ) 119 | """, re.VERBOSE) 120 | 121 | LINE_MATCH = re.compile(r'(\r\n|\r|\n)') 122 | 123 | def split_unquoted_newlines(text): 124 | """Split a string on all unquoted newlines. 125 | 126 | Unlike str.splitlines(), this will ignore CR/LF/CR+LF if the requisite 127 | character is inside of a string.""" 128 | lines = SPLIT_REGEX.split(text) 129 | outputlines = [''] 130 | for line in lines: 131 | if not line: 132 | continue 133 | elif LINE_MATCH.match(line): 134 | outputlines.append('') 135 | else: 136 | outputlines[-1] += line 137 | return outputlines -------------------------------------------------------------------------------- /uroborosqlfmt/__init__.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | ''' 3 | uroboroSQL formatter. 4 | @author: ota 5 | ''' 6 | 7 | 8 | __version__ = '0.0.1' 9 | 10 | import sys 11 | import re 12 | from threading import Thread, Lock 13 | import sqlparse 14 | from sqlparse.lexer import Lexer 15 | from sqlparse import tokens as T, utils 16 | from uroborosqlfmt import filters, config 17 | 18 | LOCK = Lock() 19 | 20 | def format_sql(sql, local_config = config.LocalConfig()): 21 | 22 | LOCK.acquire() 23 | try: 24 | if not config.glb.escape_sequence_u005c: 25 | # Oracle等の場合リテラルの取り方が違うので置き換える 26 | for i, data in enumerate(Lexer.tokens["root"]): 27 | single = getattr(T.String, "Single") 28 | if data[0] == r"'(''|\\\\|\\'|[^'])*'" : 29 | if data[1] == single: 30 | Lexer.tokens["root"][i] = (r"'(''|[^'])*'", single) 31 | 32 | # 初期化 33 | if hasattr(Lexer, "_tokens"): 34 | delattr(Lexer, "_tokens") 35 | if hasattr(Lexer, "token_variants"): 36 | delattr(Lexer, "token_variants") 37 | break 38 | utils.SPLIT_REGEX = re.compile(r""" 39 | ( 40 | (?: # Start of non-capturing group 41 | (?:\r\n|\r|\n) | # Match any single newline, or 42 | [^\r\n'"]+ | # Match any character series without quotes or 43 | # newlines, or 44 | "(?:[^"]|\\.)*" | # Match double-quoted strings, or 45 | '(?:[^']|\\.)*' # Match single quoted strings 46 | ) 47 | ) 48 | """, re.VERBOSE) 49 | else: 50 | # 元に戻す 51 | for i, data in enumerate(Lexer.tokens["root"]): 52 | single = getattr(T.String, "Single") 53 | if data[0] == r"'(''|[^'])*'" : 54 | if data[1] == single: 55 | Lexer.tokens["root"][i] = (r"'(''|\\\\|\\'|[^'])*'" , single) 56 | 57 | # 初期化 58 | if hasattr(Lexer, "_tokens"): 59 | delattr(Lexer, "_tokens") 60 | if hasattr(Lexer, "token_variants"): 61 | delattr(Lexer, "token_variants") 62 | break 63 | utils.SPLIT_REGEX = re.compile(r""" 64 | ( 65 | (?: # Start of non-capturing group 66 | (?:\r\n|\r|\n) | # Match any single newline, or 67 | [^\r\n'"]+ | # Match any character series without quotes or 68 | # newlines, or 69 | "(?:[^"\\]|\\.)*" | # Match double-quoted strings, or 70 | '(?:[^'\\]|\\.)*' # Match single quoted strings 71 | ) 72 | ) 73 | """, re.VERBOSE) 74 | finally: 75 | LOCK.release() 76 | 77 | stack = sqlparse.engine.FilterStack() 78 | stack.enable_grouping() 79 | 80 | if local_config.case != None: 81 | stack.preprocess.append(sqlparse.filters.KeywordCaseFilter(local_config.case)) 82 | stack.preprocess.append(sqlparse.filters.IdentifierCaseFilter(local_config.case)) 83 | 84 | if local_config.reserved_case != None: 85 | stack.preprocess.append(filters.ReservedWordCaseFilter(local_config)) 86 | 87 | stack.stmtprocess.append(filters.GroupFilter()) 88 | stack.stmtprocess.append(filters.LineDescriptionLineCommentFilter(local_config)) 89 | stack.stmtprocess.append(filters.MoveCommaFilter(local_config)) 90 | stack.stmtprocess.append(filters.StripWhitespaceAndToTabFilter()) 91 | stack.stmtprocess.append(filters.AdjustGroupFilter(local_config)) 92 | 93 | stack.stmtprocess.append(filters.OperatorFilter()) 94 | 95 | stack.stmtprocess.append(filters.CustomReindentFilter(local_config)) 96 | stack.postprocess.append(sqlparse.filters.SerializerUnicode()) 97 | 98 | if sys.version_info[0] < 3 and isinstance(sql, unicode): 99 | sql = sql.encode("utf-8") 100 | formatted = "\n".join(stack.run(sql)) 101 | return formatted.decode("utf-8") 102 | else: 103 | return "\n".join(stack.run(sql)) 104 | -------------------------------------------------------------------------------- /uroborosqlfmt/api.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | ''' 3 | uroboroSQL formatter API. 4 | @author: ota 5 | ''' 6 | 7 | import os 8 | import traceback 9 | import codecs 10 | import sys 11 | import argparse 12 | import uroborosqlfmt 13 | from uroborosqlfmt import config 14 | from uroborosqlfmt import commentsyntax 15 | from uroborosqlfmt.exceptions import SqlFormatterException 16 | from uroborosqlfmt.config import LocalConfig 17 | 18 | # pylint: disable=bare-except 19 | 20 | 21 | def format_dir(indir, outdir, local_config): 22 | """ 23 | [indir]フォルダ内のSQLファイルをフォーマットして指定フォルダ[outdir]に出力する 24 | """ 25 | if indir.endswith("/") or indir.endswith("\\"): 26 | indir = indir[:-1] 27 | if outdir.endswith("/") or outdir.endswith("\\"): 28 | outdir = outdir[:-1] 29 | 30 | for file_name, full_path in find_all_sql_files(indir): 31 | try: 32 | sql = __read_file(full_path) 33 | except: 34 | print(full_path) 35 | print(traceback.format_exc()) 36 | continue 37 | error = False 38 | try: 39 | out_sql = uroborosqlfmt.format_sql(sql, local_config) 40 | except SqlFormatterException as ex: 41 | exs = __decode(str(ex)) 42 | trace = __decode(traceback.format_exc()) 43 | out_sql = sql + "\n/*" + exs + "\n" + trace + "\n*/" 44 | error = True 45 | except: 46 | trace = __decode(traceback.format_exc()) 47 | out_sql = sql + "\n/*" + trace + "\n*/" 48 | error = True 49 | 50 | if not error: 51 | out_path = os.path.join(outdir, file_name) 52 | else: 53 | out_path = os.path.join(outdir, "formaterror_" + file_name) 54 | out_dir = os.path.dirname(out_path) 55 | if not os.path.exists(out_dir): 56 | os.makedirs(out_dir) 57 | __write_file(out_path, out_sql) 58 | 59 | 60 | def format_file(infile, outdir, local_config): 61 | """ 62 | SQLファイル(infile)をフォーマットして指定フォルダ[outdir]に出力する 63 | """ 64 | try: 65 | sql = __read_file(infile) 66 | except: 67 | print(infile) 68 | print(traceback.format_exc()) 69 | 70 | error = False 71 | 72 | try: 73 | out_sql = uroborosqlfmt.format_sql(sql, local_config) 74 | except SqlFormatterException as ex: 75 | exs = __decode(str(ex)) 76 | trace = __decode(traceback.format_exc()) 77 | out_sql = sql + "\n/*" + exs + "\n" + trace + "\n*/" 78 | error = True 79 | except: 80 | trace = __decode(traceback.format_exc()) 81 | out_sql = sql + "\n/*" + trace + "\n*/" 82 | error = True 83 | 84 | if not error: 85 | file_name = os.path.basename(infile) 86 | out_path = os.path.join(outdir, file_name) 87 | else: 88 | out_path = os.path.join(outdir, "formaterror_" + file_name) 89 | out_dir = os.path.dirname(out_path) 90 | if not os.path.exists(out_dir): 91 | os.makedirs(out_dir) 92 | __write_file(out_path, out_sql) 93 | 94 | 95 | def find_all_sql_files(directory): 96 | for root, _, files in os.walk(directory): 97 | for file_name in files: 98 | if os.path.splitext(file_name)[1].lower() == ".sql": 99 | path = os.path.join(root, file_name) 100 | yield path[len(directory) + 1:], path 101 | 102 | 103 | def __read_file(path): 104 | target_file = codecs.open(path, "r", "utf-8") 105 | ret = target_file.read() 106 | target_file.close() 107 | return ret 108 | 109 | 110 | def __write_file(path, value): 111 | target_file = codecs.open(path, "w", "utf-8") 112 | target_file.write(value) 113 | target_file.close() 114 | 115 | 116 | def __decode(text): 117 | text = str(text) 118 | if sys.version_info[0] < 3: 119 | return text.decode("utf-8") 120 | else: 121 | return text 122 | 123 | def _parse_args(test_args=None): 124 | parser = argparse.ArgumentParser(description='uroboroSQL formatter API', prog='usqlfmt') 125 | 126 | parser.add_argument('-v', '--version', action='version', version='%(prog)s 0.1.0') 127 | parser.add_argument('input_path', \ 128 | action='store', \ 129 | type=str, \ 130 | help='input directory path or input file path', \ 131 | ) 132 | parser.add_argument('output_path', \ 133 | action='store', \ 134 | type=str, \ 135 | help='output path for formatted file(s).', \ 136 | ) 137 | parser.add_argument('-m', '--mode', \ 138 | action='store', \ 139 | default='file', \ 140 | type=str, \ 141 | choices=['file', 'directory'], \ 142 | help='format target. default "file"', \ 143 | ) 144 | parser.add_argument('-a', '--case', \ 145 | action='store', \ 146 | default=None, \ 147 | type=str, \ 148 | choices=['upper', 'lower', 'capitalize'], \ 149 | ) 150 | parser.add_argument('-e', '--reserved_case', \ 151 | action='store', \ 152 | default=None, \ 153 | type=str, \ 154 | choices=['upper', 'lower', 'capitalize'], \ 155 | ) 156 | parser.add_argument('-B', '--escapesequence_u005c', \ 157 | action='store_true', \ 158 | help='use backslash escapesequence.', \ 159 | ) 160 | parser.add_argument('-c', '--comment_syntax', \ 161 | action='store', \ 162 | default='uroborosql', \ 163 | type=str, \ 164 | choices=['uroborosql', 'doma2', 'uroboro', 'doma'], \ 165 | help='SQL comment out syntax type.', \ 166 | ) 167 | parser.add_argument('-r', '--reserved_words_file_path', \ 168 | action='store', \ 169 | default=None, \ 170 | type=str, \ 171 | help='input reserved words file path.', \ 172 | ) 173 | 174 | return parser.parse_args(test_args) 175 | 176 | 177 | def __execute(): 178 | 179 | """ 180 | Check the arguments. 181 | If the number of arguments does not match the specified number, 182 | quit the application with usage massages. 183 | """ 184 | args = _parse_args() 185 | 186 | mode = args.mode 187 | case = args.case 188 | reserved_case = args.reserved_case 189 | escapesequence_u005c = args.escapesequence_u005c 190 | comment_syntax = args.comment_syntax 191 | input_path = args.input_path 192 | output_path = args.output_path 193 | reserved_words_file_path = args.reserved_words_file_path 194 | local_config = LocalConfig() 195 | 196 | set_case(local_config, case) 197 | set_reserved_case(local_config, reserved_case) 198 | set_escapesequence_u005c(escapesequence_u005c) 199 | set_comment_syntax(local_config, comment_syntax) 200 | set_reserved_words(local_config, reserved_words_file_path) 201 | 202 | """ 203 | The application requires either "file" or "directory" 204 | as a command line argument. If the argument does not match them, 205 | quit the application with usage massages. 206 | """ 207 | if mode == "file": 208 | format_file(input_path, output_path, local_config) 209 | elif mode == "directory": 210 | format_dir(input_path, output_path, local_config) 211 | else: 212 | sys.exit() 213 | 214 | print("\n===== Finish the application =====") 215 | print("Output directory path : %s" % output_path) 216 | print("===== End =====") 217 | 218 | 219 | def set_case(local_config, case): 220 | local_config.set_case(case) 221 | 222 | 223 | def set_reserved_case(local_config, reserved_case): 224 | if local_config.case == None and local_config.reserved_case != None: 225 | print ("You have to set [case(-a)]option, "\ 226 | "when you set [reserved_case(-e)]option.\n" \ 227 | "The applocation does not accept the reserved_case option individually.") 228 | sys.exit("Application quitting...") 229 | else: 230 | local_config.set_reserved_case(reserved_case) 231 | 232 | 233 | def set_escapesequence_u005c(escapesequence_u005c): 234 | config.glb.escape_sequence_u005c = escapesequence_u005c 235 | 236 | 237 | def set_comment_syntax(local_config, comment_syntax): 238 | if comment_syntax == "uroborosql" or comment_syntax == "uroboro": 239 | local_config.set_commentsyntax(commentsyntax.UroboroSqlCommentSyntax()) 240 | elif comment_syntax == "doma2" or comment_syntax == "doma": 241 | local_config.set_commentsyntax(commentsyntax.Doma2CommentSyntax()) 242 | else: 243 | sys.exit() 244 | 245 | 246 | def set_reserved_words(local_config, reserved_words_file): 247 | if reserved_words_file is not None: 248 | try: 249 | f = open(reserved_words_file, 'r') 250 | lines = f.readlines() 251 | f.close() 252 | except IOError: 253 | print("File I/O error: %s" % reserved_words_file) 254 | print("Please check the file path.") 255 | sys.exit("Application quitting...") 256 | except: 257 | print ("Unexpected error:", sys.exc_info()[0]) 258 | sys.exit("Application quitting...") 259 | else: 260 | reserved_words = [] 261 | 262 | for line in lines: 263 | reserved_words.append(line.rstrip('\n').lower()) # Eliminate newline code. 264 | 265 | local_config.set_input_reserved_words(reserved_words) 266 | 267 | 268 | if __name__ == "__main__": 269 | __execute() 270 | -------------------------------------------------------------------------------- /uroborosqlfmt/commentsyntax.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | ''' 3 | @author: ota 4 | ''' 5 | import re 6 | from sqlparse import tokens as T 7 | from uroborosqlfmt.tokenutils import EngineComment 8 | from uroborosqlfmt import tokenutils as tu 9 | 10 | 11 | # pylint: disable=unused-argument 12 | class CommentSyntax(object): 13 | def __init__(self): 14 | pass 15 | def get_block_comment_type(self, token): 16 | return EngineComment.none 17 | 18 | def get_line_comment_type(self, token): 19 | return EngineComment.none 20 | # pylint: enable=unused-argument 21 | 22 | class UroboroSqlCommentSyntax(CommentSyntax): 23 | 24 | def get_block_comment_type(self, token): 25 | tokens = token.tokens 26 | if len(tokens) >= 3 : 27 | comment = tokens[1].value 28 | if comment.strip() == "_SQL_ID_" or comment.strip() == "_SQL_IDENTIFIER_": 29 | return EngineComment.sql_identifier # _SQL_IDENTIFIER_ 30 | if tu.startswith_ignore_case(comment, ("IF", "ELIF", "ELSE", "END", "BEGIN")): 31 | return EngineComment.syntax 32 | if comment.strip() == comment and (not comment.startswith("*")) and (not comment.startswith("+")): 33 | return EngineComment.param # param 34 | return EngineComment.none 35 | 36 | def get_line_comment_type(self, token): 37 | cmm = token.token_next_by_type(0, T.Comment) 38 | comment = cmm.value[2:] 39 | if tu.startswith_ignore_case(comment.strip(), "ELSE"): 40 | return EngineComment.syntax 41 | return EngineComment.none 42 | 43 | 44 | class Doma2CommentSyntax(CommentSyntax): 45 | 46 | def get_block_comment_type(self, token): 47 | tokens = token.tokens 48 | if len(tokens) >= 3 : 49 | comment = tokens[1].value 50 | first_char = comment[0] 51 | if first_char == " " or \ 52 | re.compile("[a-z]", re.IGNORECASE).match(first_char) or \ 53 | first_char == "^" or \ 54 | first_char == "$" or \ 55 | first_char == "#" or \ 56 | first_char == "@" or \ 57 | first_char == '"' or \ 58 | first_char == "'": 59 | return EngineComment.param 60 | if first_char == "%": 61 | if tu.startswith_ignore_case(comment[1:], "expand"): 62 | return EngineComment.param 63 | return EngineComment.syntax 64 | return EngineComment.none 65 | 66 | # /* ~*/ ...--3文字目が空白であるため式コメントです。 67 | # /*a~*/ ...--3文字目がJavaの識別子の先頭で使用可能な文字であるため式コメントです。 68 | # /*$~*/ ...--3文字目がJavaの識別子の先頭で使用可能な文字であるため式コメントです。 69 | # /*%~*/ ...--3文字目が条件コメントや繰り返しコメントの始まりを表す「%」であるため式コメントです。 70 | # /*#~*/ ...--3文字目が埋め込み変数コメントを表す「#」であるため式コメントです。 71 | # /*@~*/ ...--3文字目が組み込み関数もしくはクラス名を表す「@」であるため式コメントです。 72 | # /*"~*/ ...--3文字目が文字列リテラルの引用符を表す「"」であるため式コメントです。 73 | # /*'~*/ ...--3文字目が文字リテラルの引用符を表す「'」であるため式コメントです。 74 | -------------------------------------------------------------------------------- /uroborosqlfmt/config.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | ''' 3 | @author: ota 4 | ''' 5 | from uroborosqlfmt.commentsyntax import UroboroSqlCommentSyntax 6 | 7 | 8 | class _GlobalConfig(object): 9 | def __init__(self): 10 | self.escape_sequence_u005c = False # バックスラッシュによるエスケープシーケンス 11 | 12 | def set_escape_sequence_u005c(self, escape_sequence): 13 | self.escape_sequence_u005c = escape_sequence 14 | return self 15 | 16 | 17 | class LocalConfig(object): 18 | 19 | __slots__ = ('comment_syntax', 'case', 'reserved_case','input_reserved_words') 20 | 21 | def __init__(self): 22 | self.comment_syntax = UroboroSqlCommentSyntax() 23 | self.case = None 24 | self.reserved_case = None 25 | self.input_reserved_words = None 26 | 27 | def set_commentsyntax(self, comment_syntax): 28 | self.comment_syntax = comment_syntax 29 | return self 30 | 31 | def set_case(self, case): 32 | self.case = case 33 | return self 34 | 35 | def set_reserved_case(self, reserved_case): 36 | self.reserved_case = reserved_case 37 | return self 38 | 39 | def set_input_reserved_words(self, input_reserved_words): 40 | self.input_reserved_words = input_reserved_words 41 | return self 42 | 43 | 44 | # グローバル設定 45 | glb = _GlobalConfig() # pylint: disable=invalid-name 46 | -------------------------------------------------------------------------------- /uroborosqlfmt/exceptions.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | ''' 3 | Created on 2016/07/09 4 | 5 | @author: ota 6 | ''' 7 | import sys 8 | import traceback 9 | 10 | class SqlFormatterException(Exception): 11 | ''' 12 | SqlFormatter用のExceptionクラス 13 | ''' 14 | 15 | def __init__(self, tlist, ex, trace): 16 | super(SqlFormatterException, self).__init__(ex.message if hasattr(ex, "message") else "") 17 | self.tlist = self.__decode(tlist) 18 | self.e = ex 19 | self.trace = self.__decode(trace) 20 | self.message = ex.message 21 | 22 | def __decode(self, text): 23 | text = str(text) 24 | if sys.version_info[0] < 3: 25 | return text.decode("utf-8") 26 | else: 27 | return text 28 | 29 | def __encode(self, text): 30 | if sys.version_info[0] < 3 and isinstance(text, unicode): 31 | return text.encode("utf-8") 32 | else: 33 | return text 34 | 35 | def __str__(self, *args): 36 | return self.message \ 37 | + "\ntoken:" + self.__encode(self.tlist) \ 38 | + "\ntrace:" + self.__encode(self.trace) \ 39 | + "\noriginal:" + str(self.e) 40 | 41 | @staticmethod 42 | def wrap_try_except(fnc, token, *args): 43 | try: 44 | if args: 45 | return fnc(*args) 46 | else: 47 | return fnc(token) 48 | except Exception as ex: 49 | if not isinstance(ex, SqlFormatterException): 50 | raise SqlFormatterException(token, ex, traceback.format_exc()) 51 | raise 52 | 53 | @staticmethod 54 | def to_wrap_try_except(fnc, token_arg_index): 55 | def call(*args): 56 | try: 57 | return fnc(*args) 58 | except Exception as ex: 59 | if not isinstance(ex, SqlFormatterException): 60 | raise SqlFormatterException(args[token_arg_index], ex, traceback.format_exc()) 61 | raise 62 | return call 63 | -------------------------------------------------------------------------------- /uroborosqlfmt/sql.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | ''' 3 | @author: ota 4 | ''' 5 | from sqlparse import sql, tokens as T 6 | from uroborosqlfmt import tokenutils as tu 7 | 8 | class Having(sql.TokenList): 9 | 10 | __slots__ = ('value', 'ttype', 'tokens') 11 | 12 | class When(sql.TokenList): 13 | 14 | __slots__ = ('value', 'ttype', 'tokens') 15 | 16 | class On(sql.TokenList): 17 | 18 | __slots__ = ('value', 'ttype', 'tokens') 19 | 20 | class _BaseWords(sql.TokenList): 21 | 22 | def __init__(self, tokens=None): 23 | super(_BaseWords, self).__init__(tokens=tokens) 24 | self.__target_tokens = None 25 | 26 | def _setupinit(self, target_tokens): 27 | self.__target_tokens = target_tokens 28 | 29 | def _token_word(self, hit_index): 30 | return self.__target_tokens[hit_index] 31 | 32 | def tokens_words(self): 33 | start = self._token_word(0) 34 | end = tu.token_prev_enable(self) 35 | return self.tokens_between(start, end) 36 | 37 | def get_target_tokens(self): 38 | return self.__target_tokens[:] 39 | 40 | class WithinGroupFunctions(_BaseWords): 41 | """ 42 | LISTAGG ( expr [, delimiter] ) WITHIN GROUP ( order_by ) 43 | 等のWITHIN GROUPのつく関数 44 | """ 45 | 46 | __slots__ = ('value', 'ttype', 'tokens', '_main_function', '_within', '_group') 47 | 48 | def __init__(self, tokens=None): 49 | super(WithinGroupFunctions, self).__init__(tokens=tokens) 50 | self._main_function = None 51 | self._within = None 52 | self._group = None 53 | 54 | def get_main_function(self): 55 | return self._main_function 56 | 57 | def get_within(self): 58 | return self._within 59 | 60 | def get_group(self): 61 | return self._group 62 | 63 | class Phrase(_BaseWords): 64 | """ 65 | ORDER BY、GROUP BYなど 66 | """ 67 | 68 | __slots__ = ('value', 'ttype', 'tokens') 69 | 70 | 71 | def match_phrase(self, values): 72 | tokens = self.get_target_tokens() 73 | if len(tokens) != len(values): 74 | return False 75 | 76 | for i, token in enumerate(tokens): 77 | if not tu.equals_ignore_case(token.value, values[i]): 78 | return False 79 | 80 | return True 81 | 82 | class AscDesc(_BaseWords): 83 | """ 84 | ASC DESCなど 85 | """ 86 | 87 | __slots__ = ('value', 'ttype', 'tokens') 88 | 89 | class OffsetFetch(_BaseWords): 90 | """ 91 | OFFSET句、FETCH句 92 | """ 93 | 94 | __slots__ = ('value', 'ttype', 'tokens') 95 | 96 | class LimitOffset(_BaseWords): 97 | """ 98 | LIMIT・OFFSET句 99 | """ 100 | 101 | __slots__ = ('value', 'ttype', 'tokens') 102 | 103 | class OverFunctions(_BaseWords): 104 | """ 105 | ROW_NUMBER句など 106 | """ 107 | 108 | __slots__ = ('value', 'ttype', 'tokens') 109 | 110 | def get_main_function(self): 111 | return self._token_word(0) 112 | 113 | def get_over(self): 114 | return self._token_word(1) 115 | 116 | class KeepFunctions(_BaseWords): 117 | """ 118 | KEEPのつく関数 119 | """ 120 | 121 | __slots__ = ('value', 'ttype', 'tokens') 122 | 123 | 124 | def get_main_function(self): 125 | return self._token_word(0) 126 | 127 | def get_keep(self): 128 | return self._token_word(1) 129 | 130 | class ForUpdate(_BaseWords): 131 | """ 132 | FOR UPDATE 133 | """ 134 | 135 | __slots__ = ('value', 'ttype', 'tokens') 136 | 137 | def get_for(self): 138 | return self._token_word(0) 139 | 140 | def get_update(self): 141 | return self._token_word(1) 142 | 143 | def get_of(self): 144 | for tkn in self.tokens: 145 | if tu.equals_ignore_case(tkn.value, "OF"): 146 | return tkn 147 | return None 148 | 149 | def get_wait_or_nowait(self): 150 | tokens = self.get_target_tokens() 151 | if len(tokens) < 3: 152 | return False 153 | 154 | for tkn in tokens[::-1]: 155 | if tu.is_waitornowait(tkn): 156 | return tkn 157 | return None 158 | 159 | def is_in_identifier(self): 160 | return self.get_of() 161 | 162 | class WaitOrNowait(_BaseWords): 163 | """ 164 | WAIT / NOWAIT 165 | """ 166 | 167 | __slots__ = ('value', 'ttype', 'tokens') 168 | 169 | def get_wait_or_nowait(self): 170 | return self._token_word(0) 171 | 172 | class Union(_BaseWords): 173 | """ 174 | UNION系 175 | """ 176 | 177 | __slots__ = ('value', 'ttype', 'tokens') 178 | 179 | 180 | class Join(_BaseWords): 181 | """ 182 | JOIN系 183 | """ 184 | 185 | __slots__ = ('value', 'ttype', 'tokens', 'jointoken', 'identifiertoken', 'usingtoken', 'usingparenthesistoken') 186 | 187 | class MergeWhen(_BaseWords): 188 | """ 189 | MERGEのWHEN句 190 | """ 191 | 192 | __slots__ = ('value', 'ttype', 'tokens') 193 | 194 | class MergeUpdateInsertClause(_BaseWords): 195 | """ 196 | MERGEの内のUPDATE・INSERT句 197 | """ 198 | 199 | __slots__ = ('value', 'ttype', 'tokens') 200 | 201 | class ConnectBy(_BaseWords): 202 | """ 203 | ORACLEのCONNECT BY句 204 | """ 205 | 206 | __slots__ = ('value', 'ttype', 'tokens') 207 | 208 | class StartWith(_BaseWords): 209 | """ 210 | ORACLEのSTART WITH句 211 | """ 212 | 213 | __slots__ = ('value', 'ttype', 'tokens') 214 | 215 | class With(_BaseWords): 216 | """ 217 | ORACLEのWITH句 218 | """ 219 | 220 | __slots__ = ('value', 'ttype', 'tokens') 221 | 222 | 223 | def token_with(self): 224 | return self.token_next_match(0, T.Keyword, 'WITH') 225 | 226 | class SpecialFunctionParameter(_BaseWords): 227 | """ 228 | ANSIのTRIM 229 | ORACLEのTRIM 230 | PostgreSQLのSUBSTRING 231 | 等のパラメータ 232 | """ 233 | 234 | __slots__ = ('value', 'ttype', 'tokens') 235 | 236 | class Calculation(_BaseWords): 237 | """ 238 | 計算値 239 | """ 240 | 241 | __slots__ = ('value', 'ttype', 'tokens') 242 | -------------------------------------------------------------------------------- /urobosql_formatter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import sys 4 | import sublime 5 | import sublime_plugin 6 | import threading 7 | sys.path.append(os.path.dirname(__file__)) 8 | import uroborosqlfmt 9 | from uroborosqlfmt.config import LocalConfig 10 | from uroborosqlfmt.commentsyntax import Doma2CommentSyntax 11 | 12 | 13 | class UroborosqlFormatCommand(sublime_plugin.TextCommand): 14 | 15 | """SQL format command class""" 16 | 17 | def run(self, edit, sync=False): 18 | view = self.view 19 | view.window().status_message('formatting...') 20 | self.settings = view.settings() 21 | self.user_settings = sublime.load_settings( 22 | 'sublime-uroborosql-formatter.sublime-settings') 23 | # set syntax 24 | if self.settings.get('syntax') == \ 25 | "Packages/Text/Plain text.tmLanguage": 26 | view.set_syntax_file("Packages/SQL/SQL.tmLanguage") 27 | # settings 28 | self.settings.set("tab_size", self.getval("uf_tab_size")) 29 | self.settings.set("translate_tabs_to_spaces", 30 | self.getval("uf_translate_tabs_to_spaces")) 31 | config = LocalConfig() 32 | 33 | if self.getval("uf_case") == 'nochange': 34 | config.set_case(None) 35 | else: 36 | config.set_case(self.getval("uf_case")) 37 | 38 | if self.getval("uf_reserved_case") == 'nochange': 39 | config.set_reserved_case(None) 40 | else: 41 | config.set_reserved_case(self.getval("uf_reserved_case")) 42 | 43 | # ↓for backward compatibility↓ 44 | if self.user_settings.get("uf_uppercase") == False \ 45 | and self.user_settings.get("uf_case") == None: 46 | config.set_case(None) 47 | 48 | if self.user_settings.get("uf_uppercase") == False \ 49 | and self.user_settings.get("uf_reserved_case") == None: 50 | config.set_reserved_case(None) 51 | # ↑for backward compatibility ↑ 52 | 53 | # set reserved words 54 | if self.getval("uf_reserved_words") != None: 55 | input_reserved_words_list = self.getval("uf_reserved_words") 56 | reserved_words = input_reserved_words_list.split(",") 57 | config.set_input_reserved_words(reserved_words) 58 | 59 | uroborosqlfmt.config.glb.escape_sequence_u005c = self.getval( 60 | "uf_escapesequence_u005c") 61 | if str(self.getval("uf_comment_syntax")).upper() == "DOMA2": 62 | config.set_commentsyntax(Doma2CommentSyntax()) 63 | 64 | raw_text = "" 65 | regions = view.sel() 66 | # format selection 67 | if len(regions) > 1 or not regions[0].empty(): 68 | for region in view.sel(): 69 | if not region.empty(): 70 | raw_text = view.substr(region) 71 | else: # format all 72 | region = sublime.Region(0, view.size()) 73 | raw_text = view.substr(region) 74 | 75 | if sync: 76 | self.run_format(edit, raw_text, config, region.a, region.b) 77 | else: 78 | threading.Thread(target=self.run_format, args=( 79 | edit, raw_text, config, region.a, region.b)).start() 80 | 81 | def run_format(self, edit, raw_text, config, region_a, region_b): 82 | formatted_text = uroborosqlfmt.format_sql(raw_text, config) 83 | self.view.run_command("uroborosql_format_replace", { 84 | "region_a": region_a, 85 | "region_b": region_b, 86 | "formatted_text": formatted_text 87 | }) 88 | 89 | def getval(self, key): 90 | val = self.user_settings.get(key) 91 | return val if val != None else self.settings.get(key) 92 | 93 | 94 | class UroborosqlFormatReplaceCommand(sublime_plugin.TextCommand): 95 | 96 | """SQL format replace command class""" 97 | 98 | def run(self, edit, **args): 99 | region = sublime.Region(args["region_a"], args["region_b"]) 100 | self.view.replace(edit, region, args["formatted_text"]) 101 | self.view.window().status_message('formatting... complete!!') 102 | 103 | 104 | class UroborosqlFormatListener(sublime_plugin.EventListener): 105 | 106 | """Event listner""" 107 | 108 | def on_pre_save(self, view): 109 | self.settings = view.settings() 110 | self.user_settings = sublime.load_settings( 111 | 'sublime-uroborosql-formatter.sublime-settings') 112 | if self.getval("uf_save_on_format") != True: 113 | return 114 | 115 | filepath = view.file_name() 116 | if filepath.endswith(tuple(self.getval("uf_save_on_format_extensions"))): 117 | view.run_command("uroborosql_format", {"sync": True}) 118 | 119 | def getval(self, key): 120 | val = self.user_settings.get(key) 121 | return val if val != None else self.settings.get(key) 122 | --------------------------------------------------------------------------------