├── Context.sublime-menu
├── Default (Linux).sublime-keymap
├── Default (OSX).sublime-keymap
├── Default (Windows).sublime-keymap
├── Default.sublime-commands
├── LICENSE
├── Main.sublime-menu
├── Preferences.sublime-settings
├── Readme.ja.md
├── Readme.md
├── enum
    ├── LICENSE
    ├── README
    ├── __init__.py
    ├── doc
    │   ├── enum.pdf
    │   └── enum.rst
    └── test.py
├── sqlparse
    ├── __init__.py
    ├── engine
    │   ├── __init__.py
    │   ├── filter.py
    │   └── grouping.py
    ├── exceptions.py
    ├── filters.py
    ├── formatter.py
    ├── functions.py
    ├── keywords.py
    ├── lexer.py
    ├── pipeline.py
    ├── sql.py
    ├── tokens.py
    └── utils.py
├── uroborosqlfmt
    ├── __init__.py
    ├── api.py
    ├── commentsyntax.py
    ├── config.py
    ├── exceptions.py
    ├── filters.py
    ├── grouping.py
    ├── sql.py
    └── tokenutils.py
└── urobosql_formatter.py


/Context.sublime-menu:
--------------------------------------------------------------------------------
1 | [
2 |     {
3 |         "id": "uroborosql-formatter",
4 |         "caption": "SQL Format",
5 |         "command": "uroborosql_format"
6 |     }
7 | ]


--------------------------------------------------------------------------------
/Default (Linux).sublime-keymap:
--------------------------------------------------------------------------------
1 | [
2 |   { "keys": ["ctrl+alt+f"], "command": "uroborosql_format" }
3 | ]


--------------------------------------------------------------------------------
/Default (OSX).sublime-keymap:
--------------------------------------------------------------------------------
1 | [
2 |   { "keys": ["ctrl+super+f"], "command": "uroborosql_format" }
3 | ]
4 | 


--------------------------------------------------------------------------------
/Default (Windows).sublime-keymap:
--------------------------------------------------------------------------------
1 | [
2 |   { "keys": ["ctrl+alt+f"], "command": "uroborosql_format" }
3 | ]


--------------------------------------------------------------------------------
/Default.sublime-commands:
--------------------------------------------------------------------------------
1 | [
2 |     {
3 |         "caption": "uroboroSQL Formatter: Format SQL",
4 |         "command": "uroborosql_format"
5 |     }
6 | ]
7 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 2-Clause License
 2 | 
 3 | Copyright (c) 2016, Future Corporation
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/Main.sublime-menu:
--------------------------------------------------------------------------------
 1 | [{
 2 | 	"caption": "Edit",
 3 | 	"mnemonic": "E",
 4 | 	"id": "edit",
 5 | 	"children": [{
 6 | 		"caption": "-"
 7 | 	}, {
 8 | 		"command": "uroborosql_format",
 9 | 		"id": "uroborosql_format",
10 | 		"caption": "SQL Format"
11 | 	}]
12 | }, {
13 | 	"caption": "Preferences",
14 | 	"mnemonic": "n",
15 | 	"id": "preferences",
16 | 	"children": [{
17 | 		"caption": "Package Settings",
18 | 		"mnemonic": "P",
19 | 		"id": "package-settings",
20 | 		"children": [{
21 | 			"caption": "uroboroSQL Formatter",
22 | 			"children": [{
23 | 				"command": "open_file",
24 | 				"args": {
25 | 					"file": "${packages}/sublime-uroborosql-formatter/Preferences.sublime-settings"
26 | 				},
27 | 				"caption": "Settings – Default"
28 | 			}, {
29 | 				"command": "open_file",
30 | 				"args": {
31 | 					"file": "${packages}/User/sublime-uroborosql-formatter.sublime-settings"
32 | 				},
33 | 				"caption": "Settings – User"
34 | 			}]
35 | 		}]
36 | 	}]
37 | }]


--------------------------------------------------------------------------------
/Preferences.sublime-settings:
--------------------------------------------------------------------------------
 1 | {
 2 |     "uf_tab_size": 4,
 3 |     "uf_translate_tabs_to_spaces": true,
 4 |     "uf_case": "upper", // "upper" or "lower" or "capitalize" or "nochange"
 5 |     // You have to set [uf_case]option, 
 6 |     // when you set [uf_reserved_case]option below.
 7 |     // The applocation does not accept the reserved_case option individually.
 8 |     "uf_reserved_case": "upper", // "upper" or "lower" or "capitalize" or "nochange"
 9 |     "uf_reserved_words":"ABS,ALL,ALLOCATE,ALTER,AND,ANY,ARE,ARRAY,ARRAY_AGG,ARRAY_MAX_CARDINALITY,AS,ASENSITIVE,ASYMMETRIC,AT,ATOMIC,AUTHORIZATION,AVG,BEGIN,BEGIN_FRAME,BEGIN_PARTITION,BETWEEN,BIGINT,BINARY,BLOB,BOOLEAN,BOTH,BY,CALL,CALLED,CARDINALITY,CASCADED,CASE,CAST,CEIL,CEILING,CHAR,CHARACTER,CHARACTER_LENGTH,CHAR_LENGTH,CHECK,CLOB,CLOSE,COALESCE,COLLATE,COLLECT,COLUMN,COMMIT,CONDITION,CONNECT,CONSTRAINT,CONTAINS,CONVERT,CORR,CORRESPONDING,COUNT,COVAR_POP,COVAR_SAMP,CREATE,CROSS,CUBE,CUME_DIST,CURRENT,CURRENT_CATALOG,CURRENT_DATE,CURRENT_DEFAULT_TRANSFORM_GROUP,CURRENT_PATH,CURRENT_ROLE,CURRENT_ROW,CURRENT_SCHEMA,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_TRANSFORM_GROUP_FOR_TYPE,CURRENT_USER,CURSOR,CYCLE,DATALINK,DATE,DAY,DEALLOCATE,DEC,DECIMAL,DECLARE,DEFAULT,DELETE,DENSE_RANK,DEREF,DESCRIBE,DETERMINISTIC,DISCONNECT,DISTINCT,DLNEWCOPY,DLPREVIOUSCOPY,DLURLCOMPLETE,DLURLCOMPLETEONLY,DLURLCOMPLETEWRITE,DLURLPATH,DLURLPATHONLY,DLURLPATHWRITE,DLURLSCHEME,DLURLSERVER,DLVALUE,DOUBLE,DROP,DYNAMIC,EACH,ELEMENT,ELSE,END,END-EXEC,END_FRAME,END_PARTITION,EQUALS,ESCAPE,EVERY,EXCEPT,EXEC,EXECUTE,EXISTS,EXP,EXTERNAL,EXTRACT,FALSE,FETCH,FILTER,FIRST_VALUE,FLOAT,FLOOR,FOR,FOREIGN,FRAME_ROW,FREE,FROM,FULL,FUNCTION,FUSION,GET,GLOBAL,GRANT,GROUP,GROUPING,GROUPS,HAVING,HOLD,HOUR,IDENTITY,IMPORT,IN,INDICATOR,INNER,INOUT,INSENSITIVE,INSERT,INT,INTEGER,INTERSECT,INTERSECTION,INTERVAL,INTO,IS,JOIN,LAG,LANGUAGE,LARGE,LAST_VALUE,LATERAL,LEAD,LEADING,LEFT,LIKE,LIKE_REGEX,LN,LOCAL,LOCALTIME,LOCALTIMESTAMP,LOWER,MATCH,MAX,MEMBER,MERGE,METHOD,MIN,MINUTE,MOD,MODIFIES,MODULE,MONTH,MULTISET,NATIONAL,NATURAL,NCHAR,NCLOB,NEW,NO,NONE,NORMALIZE,NOT,NTH_VALUE,NTILE,NULL,NULLIF,NUMERIC,OCCURRENCES_REGEX,OCTET_LENGTH,OF,OFFSET,OLD,ON,ONLY,OPEN,OR,ORDER,OUT,OUTER,OVER,OVERLAPS,OVERLAY,PARAMETER,PARTITION,PERCENT,PERCENTILE_CONT,PERCENTILE_DISC,PERCENT_RANK,PERIOD,PORTION,POSITION,POSITION_REGEX,POWER,PRECEDES,PRECISION,PREPARE,PRIMARY,PROCEDURE,RANGE,RANK,READS,REAL,RECURSIVE,REF,REFERENCES,REFERENCING,REGR_AVGX,REGR_AVGY,REGR_COUNT,REGR_INTERCEPT,REGR_R2,REGR_SLOPE,REGR_SXX,REGR_SXY,REGR_SYY,RELEASE,RESULT,RETURN,RETURNS,REVOKE,RIGHT,ROLLBACK,ROLLUP,ROW,ROWS,ROW_NUMBER,SAVEPOINT,SCOPE,SCROLL,SEARCH,SECOND,SELECT,SENSITIVE,SESSION_USER,SET,SIMILAR,SMALLINT,SOME,SPECIFIC,SPECIFICTYPE,SQL,SQLEXCEPTION,SQLSTATE,SQLWARNING,SQRT,START,STATIC,STDDEV_POP,STDDEV_SAMP,SUBMULTISET,SUBSTRING,SUBSTRING_REGEX,SUCCEEDS,SUM,SYMMETRIC,SYSTEM,SYSTEM_TIME,SYSTEM_USER,TABLE,TABLESAMPLE,THEN,TIME,TIMESTAMP,TIMEZONE_HOUR,TIMEZONE_MINUTE,TO,TRAILING,TRANSLATE,TRANSLATE_REGEX,TRANSLATION,TREAT,TRIGGER,TRIM,TRIM_ARRAY,TRUE,TRUNCATE,UESCAPE,UNION,UNIQUE,UNKNOWN,UNNEST,UPDATE,UPPER,USER,USING,VALUE,VALUES,VALUE_OF,VARBINARY,VARCHAR,VARYING,VAR_POP,VAR_SAMP,VERSIONING,WHEN,WHENEVER,WHERE,WIDTH_BUCKET,WINDOW,WITH,WITHIN,WITHOUT,XML,XMLAGG,XMLATTRIBUTES,XMLBINARY,XMLCAST,XMLCOMMENT,XMLCONCAT,XMLDOCUMENT,XMLELEMENT,XMLEXISTS,XMLFOREST,XMLITERATE,XMLNAMESPACES,XMLPARSE,XMLPI,XMLQUERY,XMLSERIALIZE,XMLTABLE,XMLTEXT,XMLVALIDATE,YEAR",
10 |     "uf_comment_syntax": "uroboroSQL", // "uroboroSQL" or "doma2"
11 |     "uf_escapesequence_u005c": false,
12 |     "uf_save_on_format": false,
13 |     "uf_save_on_format_extensions": ["sql"]
14 | }


--------------------------------------------------------------------------------
/Readme.ja.md:
--------------------------------------------------------------------------------
  1 | Sublime uroboroSQL Formatter
  2 | ====================
  3 | 
  4 | [![Package Control](https://img.shields.io/packagecontrol/dt/uroboroSQL%20Formatter.svg)](https://packagecontrol.io/packages/uroboroSQL%20Formatter)
  5 | 
  6 | Sublime uroboroSQL Formatterは、エンタープライズシステムで用いられることの多い、  
  7 | 非常に長いSQL(1K step以上)でも見やすく保守性の高いスタイルへフォーマットするための  
  8 | Sublime Text 3のプラグインです。
  9 | 
 10 | 特に日本など、英語を母国語としない国では、SELECT句などにコメントを入れることがあります。  
 11 | その場合、AS句とコメントの縦位置を揃えて、もはや芸術的とも言える見やすさを追求しますが、  
 12 | これ自動的に実現するために開発したものです。
 13 | 
 14 | #### 一般的なフォーマッタの場合
 15 | 
 16 | ```sql
 17 | SELECT MI.MAKER_CD AS ITEM_MAKER_CD -- メーカーコード
 18 | ,
 19 |        MI.BRAND_CD AS ITEM_BRAND_CD -- ブランドコード
 20 | ,
 21 |        MI.ITEM_CD AS ITEM_CD -- 商品コード
 22 | ,
 23 |        MI.CATEGORY AS ITEM_CATEGORY -- 商品カテゴリ
 24 | FROM M_ITEM MI -- 商品マスタ
 25 | 
 26 | WHERE 1 = 1
 27 |   AND MI.ARRIVAL_DATE = '2016-12-01' -- 入荷日
 28 | ```
 29 | 
 30 | #### Sublime uroboroSQL Formatterの場合
 31 | 
 32 | ```sql
 33 | SELECT
 34 |     MI.MAKER_CD AS  ITEM_MAKER_CD   -- メーカーコード
 35 | ,   MI.BRAND_CD AS  ITEM_BRAND_CD   -- ブランドコード
 36 | ,   MI.ITEM_CD  AS  ITEM_CD         -- 商品コード
 37 | ,   MI.CATEGORY AS  ITEM_CATEGORY   -- 商品カテゴリ
 38 | FROM
 39 |     M_ITEM  MI  -- 商品マスタ
 40 | WHERE
 41 |     1               =   1
 42 | AND MI.ARRIVAL_DATE =   '2016-12-01'    -- 入荷日
 43 | 
 44 | ```
 45 | 
 46 | Features
 47 | --------
 48 | 
 49 | -	SELECT句、WHERE句内でASや演算子の縦位置を揃えてフォーマット
 50 | -	各行の末尾にコメントがあっても正しくフォーマット
 51 | 
 52 | How to Use
 53 | ----------
 54 | 
 55 | -	Editメニューから「SQL Format」を選択
 56 | -	エディタ上のコンテキストメニューから「SQL Format」を選択
 57 | -	コマンドPaletteから「uroboroSQL Formatter: Format SQL」を選択
 58 | -	ctrl + cmd + f on OS X, or ctrl + alt + f on Windows
 59 | 
 60 | Settings
 61 | --------
 62 | 
 63 | ```json
 64 | {
 65 |     "uf_tab_size": 4,
 66 |     "uf_translate_tabs_to_spaces": true,
 67 |     "uf_case": "upper", // "upper" or "lower" or "capitalize" or "nochange"
 68 |     "uf_reserved_case": "upper", // "upper" or "lower" or "capitalize" or "nochange"
 69 |     "uf_reserved_words":"SELECT, FROM, WHERE, CREATE" // Put commas to separate reserved words
 70 |     "uf_comment_syntax": "uroboroSQL", // "uroboroSQL" or "doma2"
 71 |     "uf_escapesequence_u005c": false,
 72 |     "uf_save_on_format": true,
 73 |     "uf_save_on_format_extensions": [".sql"]
 74 | }
 75 | ```
 76 | 
 77 | -	uf_tab_size
 78 | 	-	フォーマット後のインデントのタブサイズを指定します。4つを推奨します。
 79 | -	uf_translate_tabs_to_spaces
 80 | 	-	フォーマット後のインデントをタブにするかスペースにするかを指定します。trueにすることでスペースになります。
 81 | -	uf_case
 82 | 	-	全ての文字（予約語と識別子等全て）を指定のケース（大文字、小文字、文頭大文字、変換しない）に変換する時に使用する。
 83 | -	uf_reserved_case
 84 | 	-	予約語を指定のケース（大文字、小文字、文頭大文字）に変換する時に使用する。なお、"nochange"オプションを指定した場合は予約語に限ったケース変換はせず、全てのケースを「uf_case」に従って変換する。
 85 | -	uf_reserved_words
 86 | 	-	予約語のみをケース変換したい場合は、「uf_reserved_case」に指定のケース（upper, lower, capitalize or nochange）を設定するとともに、当項目に予約語のリストをカンマ区切りで指定します。
 87 | -	uf_comment_syntax
 88 | 	-	コメントのシンタックス形式を指定します。
 89 | 	-	「uroboroSQL」または「doma2」の指定が可能です。
 90 | 		-	通常のSQLの場合は、どちらを指定してもかまいません。
 91 | -	uf_escapesequence_u005c
 92 | 	-	SQL内でエスケープシーケンスをバックスラッシュで指定している場合にtrueを指定します。
 93 | -	uf_save_on_format
 94 | 	-	ファイル保存時に自動的にフォーマットする場合にtrueを指定します。
 95 | -	uf_save_on_format_extensions
 96 | 	-	フォーマットするファイルの拡張子をリストで指定します。
 97 | 
 98 | 
 99 | License
100 | -------
101 | 
102 | [python-sqlparse library](https://github.com/andialbrecht/sqlparse) and this code are both on [2-clauses BSD](http://www.opensource.org/licenses/bsd-license.php)
103 | 
104 | ---
105 | 
106 | Copyright 2018 by Future Architect.
107 | 


--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
  1 | Sublime uroboroSQL Formatter
  2 | ====================
  3 | 
  4 | [![Package Control](https://img.shields.io/packagecontrol/dt/uroboroSQL%20Formatter.svg)](https://packagecontrol.io/packages/uroboroSQL%20Formatter)
  5 | 
  6 | Sublime uroboroSQL Formatter is often used in enterprise systems, For formatting to a highly maintainable style even for very long SQL (1 K step or more) It is a plug-in of Sublime Text 3.
  7 | 
  8 | In particular, in countries where English is not their mother tongue, such as Japan, comments may be included in SELECT clauses. In that case, we will align the vertical position of the AS clause and the comment, pursuing the viewability which can be said as artistic anymore, This was developed to realize this automatically.
  9 | 
 10 | for Japanese, [Readme.ja.md](Readme.ja.md)
 11 | 
 12 | #### In case of general formatter
 13 | 
 14 | ```sql
 15 | SELECT MI.MAKER_CD AS ITEM_MAKER_CD -- maker code
 16 | ,
 17 |        MI.BRAND_CD AS ITEM_BRAND_CD -- brand code
 18 | ,
 19 |        MI.ITEM_CD AS ITEM_CD -- item code
 20 | ,
 21 |        MI.CATEGORY AS ITEM_CATEGORY -- item category
 22 | FROM M_ITEM MI -- item master
 23 | 
 24 | WHERE 1 = 1
 25 |   AND MI.ARRIVAL_DATE = '2016-12-01' -- arrival date
 26 | ```
 27 | 
 28 | #### In case of Sublime uroboroSQL Formatter
 29 | 
 30 | ```sql
 31 | SELECT
 32 |     MI.MAKER_CD AS  ITEM_MAKER_CD   -- maker code
 33 | ,   MI.BRAND_CD AS  ITEM_BRAND_CD   -- brand code
 34 | ,   MI.ITEM_CD  AS  ITEM_CD         -- item code
 35 | ,   MI.CATEGORY AS  ITEM_CATEGORY   -- item category
 36 | FROM
 37 |     M_ITEM  MI  -- item master
 38 | WHERE
 39 |     1               =   1
 40 | AND MI.ARRIVAL_DATE =   '2016-12-01'    -- arrival date
 41 | 
 42 | ```
 43 | 
 44 | Features
 45 | --------
 46 | 
 47 | -	Formatting by aligning the vertical position of AS and operator in SELECT clause, WHERE clause
 48 | -	Even if there is a comment at the end of each line, the format
 49 | 
 50 | How to Use
 51 | ----------
 52 | 
 53 | -	Select "SQL Format" from the Edit menu
 54 | -	Select "SQL Format" from the context menu on the editor
 55 | -	Select "uroboroSQL Formatter: Format SQL" from the command Palette
 56 | -	ctrl + cmd + f on OS X, or ctrl + alt + f on Windows
 57 | 
 58 | Settings
 59 | --------
 60 | 
 61 | ```json
 62 | {
 63 |     "uf_tab_size": 4,
 64 |     "uf_translate_tabs_to_spaces": true,
 65 |     "uf_case": "upper", // "upper" or "lower" or "capitalize" or "nochange"
 66 |     "uf_reserved_case": "upper", // "upper" or "lower" or "capitalize" or "nochange"
 67 |     "uf_reserved_words":"SELECT, FROM, WHERE, CREATE", // Put commas to separate reserved words
 68 |     "uf_comment_syntax": "uroboroSQL", // "uroboroSQL" or "doma2"
 69 |     "uf_escapesequence_u005c": false,
 70 |     "uf_save_on_format": true,
 71 |     "uf_save_on_format_extensions": [".sql"]
 72 | }
 73 | ```
 74 | 
 75 | -	uf_tab_size
 76 | 	-	Specify the tab size of the indent after formatting. We recommend 4.
 77 | -	uf_translate_tabs_to_spaces
 78 | 	-	Specify whether the indent after formatting is tab or space. It becomes a space by setting it to true.
 79 | -	uf_case
 80 | 	-	If you want to convert all words to a specific case, set "upper", "lower", "capitalize". If the "nochange" option is selected, the word cases will not be changed from the original.
 81 | -	uf_reserved_case
 82 | 	-	If you want to convert reserved words to a specific case, set "upper", "lower" or "capitalize". If the "nochange" option is selected, it will not change the cases of resered words and they will be changed based on the "uf_case" setting.
 83 | -	uf_reserved_words
 84 | 	-	If you want to convert only reserved words to a specific case, please input reserved words sepalated by a comma. (The input is not case sensitive. So you can input reserved words in any case.)
 85 | -	uf_comment_syntax
 86 | 	-	It specifies the comment syntax format.
 87 | 	-	You can specify the "uroboroSQL" or "doma2".
 88 | 		-	In the case of normal SQL, you can specify either.
 89 | -	uf_escapesequence_u005c
 90 | 	-	If you have specified the escape sequence with a backslash in the SQL to specify the true.
 91 | -	uf_save_on_format
 92 | 	-	Specify true when formatting automatically when saving files.
 93 | -	uf_save_on_format_extensions
 94 | 	-	Specify the extension of the file to be formatted in a list.
 95 | 
 96 | License
 97 | -------
 98 | 
 99 | [python-sqlparse library](https://github.com/andialbrecht/sqlparse) and this code are both on [2-clauses BSD](http://www.opensource.org/licenses/bsd-license.php)
100 | 
101 | ---
102 | 
103 | Copyright 2018 by Future Architect.
104 | 


--------------------------------------------------------------------------------
/enum/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2013, Ethan Furman.
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions
 6 | are met:
 7 | 
 8 |     Redistributions of source code must retain the above
 9 |     copyright notice, this list of conditions and the
10 |     following disclaimer.
11 | 
12 |     Redistributions in binary form must reproduce the above
13 |     copyright notice, this list of conditions and the following
14 |     disclaimer in the documentation and/or other materials
15 |     provided with the distribution.
16 | 
17 |     Neither the name Ethan Furman nor the names of any
18 |     contributors may be used to endorse or promote products
19 |     derived from this software without specific prior written
20 |     permission.
21 | 
22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
26 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 | POSSIBILITY OF SUCH DAMAGE.
33 | 


--------------------------------------------------------------------------------
/enum/README:
--------------------------------------------------------------------------------
1 | enum34 is the new Python stdlib enum module available in Python 3.4
2 | backported for previous versions of Python from 2.4 to 3.3.
3 | tested on 2.6, 2.7, and 3.3+
4 | 


--------------------------------------------------------------------------------
/enum/doc/enum.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/future-architect/Sublime-uroboroSQL-formatter/7b1c1d81d377a8d341847247020173d1861b6a8f/enum/doc/enum.pdf


--------------------------------------------------------------------------------
/enum/doc/enum.rst:
--------------------------------------------------------------------------------
  1 | ``enum`` --- support for enumerations
  2 | ========================================
  3 | 
  4 | .. :synopsis: enumerations are sets of symbolic names bound to unique, constant
  5 |   values.
  6 | .. :moduleauthor:: Ethan Furman <ethan@stoneleaf.us>
  7 | .. :sectionauthor:: Barry Warsaw <barry@python.org>,
  8 | .. :sectionauthor:: Eli Bendersky <eliben@gmail.com>,
  9 | .. :sectionauthor:: Ethan Furman <ethan@stoneleaf.us>
 10 | 
 11 | ----------------
 12 | 
 13 | An enumeration is a set of symbolic names (members) bound to unique, constant
 14 | values.  Within an enumeration, the members can be compared by identity, and
 15 | the enumeration itself can be iterated over.
 16 | 
 17 | 
 18 | Module Contents
 19 | ---------------
 20 | 
 21 | This module defines two enumeration classes that can be used to define unique
 22 | sets of names and values: ``Enum`` and ``IntEnum``.  It also defines
 23 | one decorator, ``unique``.
 24 | 
 25 | ``Enum``
 26 | 
 27 | Base class for creating enumerated constants.  See section `Functional API`_
 28 | for an alternate construction syntax.
 29 | 
 30 | ``IntEnum``
 31 | 
 32 | Base class for creating enumerated constants that are also subclasses of ``int``.
 33 | 
 34 | ``unique``
 35 | 
 36 | Enum class decorator that ensures only one name is bound to any one value.
 37 | 
 38 | 
 39 | Creating an Enum
 40 | ----------------
 41 | 
 42 | Enumerations are created using the ``class`` syntax, which makes them
 43 | easy to read and write.  An alternative creation method is described in
 44 | `Functional API`_.  To define an enumeration, subclass ``Enum`` as
 45 | follows::
 46 | 
 47 |     >>> from enum import Enum
 48 |     >>> class Color(Enum):
 49 |     ...     red = 1
 50 |     ...     green = 2
 51 |     ...     blue = 3
 52 | 
 53 | Note: Nomenclature
 54 | 
 55 |   - The class ``Color`` is an *enumeration* (or *enum*)
 56 |   - The attributes ``Color.red``, ``Color.green``, etc., are
 57 |     *enumeration members* (or *enum members*).
 58 |   - The enum members have *names* and *values* (the name of
 59 |     ``Color.red`` is ``red``, the value of ``Color.blue`` is
 60 |     ``3``, etc.)
 61 |     
 62 | Note:
 63 | 
 64 |     Even though we use the ``class`` syntax to create Enums, Enums
 65 |     are not normal Python classes.  See `How are Enums different?`_ for
 66 |     more details.
 67 | 
 68 | Enumeration members have human readable string representations::
 69 | 
 70 |     >>> print(Color.red)
 71 |     Color.red
 72 | 
 73 | ...while their ``repr`` has more information::
 74 | 
 75 |     >>> print(repr(Color.red))
 76 |     <Color.red: 1>
 77 | 
 78 | The *type* of an enumeration member is the enumeration it belongs to::
 79 | 
 80 |     >>> type(Color.red)
 81 |     <enum 'Color'>
 82 |     >>> isinstance(Color.green, Color)
 83 |     True
 84 |     >>>
 85 | 
 86 | Enum members also have a property that contains just their item name::
 87 | 
 88 |     >>> print(Color.red.name)
 89 |     red
 90 | 
 91 | Enumerations support iteration.  In Python 3.x definition order is used; in
 92 | Python 2.x the definition order is not available, but class attribute
 93 | ``__order__`` is supported;  otherwise, value order is used::
 94 | 
 95 |     >>> class Shake(Enum):
 96 |     ...   __order__ = 'vanilla chocolate cookies mint'  # only needed in 2.x
 97 |     ...   vanilla = 7
 98 |     ...   chocolate = 4
 99 |     ...   cookies = 9
100 |     ...   mint = 3
101 |     ...
102 |     >>> for shake in Shake:
103 |     ...   print(shake)
104 |     ...
105 |     Shake.vanilla
106 |     Shake.chocolate
107 |     Shake.cookies
108 |     Shake.mint
109 | 
110 | The ``__order__`` attribute is always removed, and in 3.x it is also ignored
111 | (order is definition order); however, in the stdlib version it will be ignored
112 | but not removed.
113 | 
114 | Enumeration members are hashable, so they can be used in dictionaries and sets::
115 | 
116 |     >>> apples = {}
117 |     >>> apples[Color.red] = 'red delicious'
118 |     >>> apples[Color.green] = 'granny smith'
119 |     >>> apples == {Color.red: 'red delicious', Color.green: 'granny smith'}
120 |     True
121 | 
122 | 
123 | Programmatic access to enumeration members and their attributes
124 | ---------------------------------------------------------------
125 | 
126 | Sometimes it's useful to access members in enumerations programmatically (i.e.
127 | situations where ``Color.red`` won't do because the exact color is not known
128 | at program-writing time).  ``Enum`` allows such access::
129 | 
130 |     >>> Color(1)
131 |     <Color.red: 1>
132 |     >>> Color(3)
133 |     <Color.blue: 3>
134 | 
135 | If you want to access enum members by *name*, use item access::
136 | 
137 |     >>> Color['red']
138 |     <Color.red: 1>
139 |     >>> Color['green']
140 |     <Color.green: 2>
141 | 
142 | If have an enum member and need its ``name`` or ``value``::
143 | 
144 |     >>> member = Color.red
145 |     >>> member.name
146 |     'red'
147 |     >>> member.value
148 |     1
149 | 
150 | 
151 | Duplicating enum members and values
152 | -----------------------------------
153 | 
154 | Having two enum members (or any other attribute) with the same name is invalid;
155 | in Python 3.x this would raise an error, but in Python 2.x the second member
156 | simply overwrites the first::
157 | 
158 |     >>> # python 2.x
159 |     >>> class Shape(Enum):
160 |     ...   square = 2
161 |     ...   square = 3
162 |     ...
163 |     >>> Shape.square
164 |     <Shape.square: 3>
165 | 
166 |     >>> # python 3.x
167 |     >>> class Shape(Enum):
168 |     ...   square = 2
169 |     ...   square = 3
170 |     Traceback (most recent call last):
171 |     ...
172 |     TypeError: Attempted to reuse key: 'square'
173 | 
174 | However, two enum members are allowed to have the same value.  Given two members
175 | A and B with the same value (and A defined first), B is an alias to A.  By-value
176 | lookup of the value of A and B will return A.  By-name lookup of B will also
177 | return A::
178 | 
179 |     >>> class Shape(Enum):
180 |     ...   __order__ = 'square diamond circle alias_for_square'  # only needed in 2.x
181 |     ...   square = 2
182 |     ...   diamond = 1
183 |     ...   circle = 3
184 |     ...   alias_for_square = 2
185 |     ...
186 |     >>> Shape.square
187 |     <Shape.square: 2>
188 |     >>> Shape.alias_for_square
189 |     <Shape.square: 2>
190 |     >>> Shape(2)
191 |     <Shape.square: 2>
192 | 
193 | 
194 | Allowing aliases is not always desirable.  ``unique`` can be used to ensure
195 | that none exist in a particular enumeration::
196 | 
197 |     >>> from enum import unique
198 |     >>> @unique
199 |     ... class Mistake(Enum):
200 |     ...   __order__ = 'one two three four'  # only needed in 2.x
201 |     ...   one = 1
202 |     ...   two = 2
203 |     ...   three = 3
204 |     ...   four = 3
205 |     Traceback (most recent call last):
206 |     ...
207 |     ValueError: duplicate names found in <enum 'Mistake'>: four -> three
208 | 
209 | Iterating over the members of an enum does not provide the aliases::
210 | 
211 |     >>> list(Shape)
212 |     [<Shape.square: 2>, <Shape.diamond: 1>, <Shape.circle: 3>]
213 | 
214 | The special attribute ``__members__`` is a dictionary mapping names to members.
215 | It includes all names defined in the enumeration, including the aliases::
216 | 
217 |     >>> for name, member in sorted(Shape.__members__.items()):
218 |     ...   name, member
219 |     ...
220 |     ('alias_for_square', <Shape.square: 2>)
221 |     ('circle', <Shape.circle: 3>)
222 |     ('diamond', <Shape.diamond: 1>)
223 |     ('square', <Shape.square: 2>)
224 | 
225 | The ``__members__`` attribute can be used for detailed programmatic access to
226 | the enumeration members.  For example, finding all the aliases::
227 | 
228 |     >>> [name for name, member in Shape.__members__.items() if member.name != name]
229 |     ['alias_for_square']
230 | 
231 | Comparisons
232 | -----------
233 | 
234 | Enumeration members are compared by identity::
235 | 
236 |     >>> Color.red is Color.red
237 |     True
238 |     >>> Color.red is Color.blue
239 |     False
240 |     >>> Color.red is not Color.blue
241 |     True
242 | 
243 | Ordered comparisons between enumeration values are *not* supported.  Enum
244 | members are not integers (but see `IntEnum`_ below)::
245 | 
246 |     >>> Color.red < Color.blue
247 |     Traceback (most recent call last):
248 |       File "<stdin>", line 1, in <module>
249 |     TypeError: unorderable types: Color() < Color()
250 | 
251 | .. warning::
252 | 
253 |     In Python 2 *everything* is ordered, even though the ordering may not
254 |     make sense.  If you want your enumerations to have a sensible ordering
255 |     check out the `OrderedEnum`_ recipe below.
256 | 
257 | 
258 | Equality comparisons are defined though::
259 | 
260 |     >>> Color.blue == Color.red
261 |     False
262 |     >>> Color.blue != Color.red
263 |     True
264 |     >>> Color.blue == Color.blue
265 |     True
266 | 
267 | Comparisons against non-enumeration values will always compare not equal
268 | (again, ``IntEnum`` was explicitly designed to behave differently, see
269 | below)::
270 | 
271 |     >>> Color.blue == 2
272 |     False
273 | 
274 | 
275 | Allowed members and attributes of enumerations
276 | ----------------------------------------------
277 | 
278 | The examples above use integers for enumeration values.  Using integers is
279 | short and handy (and provided by default by the `Functional API`_), but not
280 | strictly enforced.  In the vast majority of use-cases, one doesn't care what
281 | the actual value of an enumeration is.  But if the value *is* important,
282 | enumerations can have arbitrary values.
283 | 
284 | Enumerations are Python classes, and can have methods and special methods as
285 | usual.  If we have this enumeration::
286 | 
287 |     >>> class Mood(Enum):
288 |     ...   funky = 1
289 |     ...   happy = 3
290 |     ... 
291 |     ...   def describe(self):
292 |     ...     # self is the member here
293 |     ...     return self.name, self.value
294 |     ... 
295 |     ...   def __str__(self):
296 |     ...     return 'my custom str! {0}'.format(self.value)
297 |     ... 
298 |     ...   @classmethod
299 |     ...   def favorite_mood(cls):
300 |     ...     # cls here is the enumeration
301 |     ...     return cls.happy
302 | 
303 | Then::
304 | 
305 |     >>> Mood.favorite_mood()
306 |     <Mood.happy: 3>
307 |     >>> Mood.happy.describe()
308 |     ('happy', 3)
309 |     >>> str(Mood.funky)
310 |     'my custom str! 1'
311 | 
312 | The rules for what is allowed are as follows: _sunder_ names (starting and
313 | ending with a single underscore) are reserved by enum and cannot be used;
314 | all other attributes defined within an enumeration will become members of this
315 | enumeration, with the exception of *__dunder__* names and descriptors (methods
316 | are also descriptors).
317 | 
318 | Note:
319 | 
320 |     If your enumeration defines ``__new__`` and/or ``__init__`` then
321 |     whatever value(s) were given to the enum member will be passed into
322 |     those methods.  See `Planet`_ for an example.
323 | 
324 | 
325 | Restricted subclassing of enumerations
326 | --------------------------------------
327 | 
328 | Subclassing an enumeration is allowed only if the enumeration does not define
329 | any members.  So this is forbidden::
330 | 
331 |     >>> class MoreColor(Color):
332 |     ...   pink = 17
333 |     Traceback (most recent call last):
334 |     ...
335 |     TypeError: Cannot extend enumerations
336 | 
337 | But this is allowed::
338 | 
339 |     >>> class Foo(Enum):
340 |     ...   def some_behavior(self):
341 |     ...     pass
342 |     ...
343 |     >>> class Bar(Foo):
344 |     ...   happy = 1
345 |     ...   sad = 2
346 |     ...
347 | 
348 | Allowing subclassing of enums that define members would lead to a violation of
349 | some important invariants of types and instances.  On the other hand, it makes
350 | sense to allow sharing some common behavior between a group of enumerations.
351 | (See `OrderedEnum`_ for an example.)
352 | 
353 | 
354 | Pickling
355 | --------
356 | 
357 | Enumerations can be pickled and unpickled::
358 | 
359 |     >>> from enum.test_enum import Fruit
360 |     >>> from pickle import dumps, loads
361 |     >>> Fruit.tomato is loads(dumps(Fruit.tomato, 2))
362 |     True
363 | 
364 | The usual restrictions for pickling apply: picklable enums must be defined in
365 | the top level of a module, since unpickling requires them to be importable
366 | from that module.
367 | 
368 | Note:
369 | 
370 |     With pickle protocol version 4 (introduced in Python 3.4) it is possible
371 |     to easily pickle enums nested in other classes.
372 | 
373 | 
374 | 
375 | Functional API
376 | --------------
377 | 
378 | The ``Enum`` class is callable, providing the following functional API::
379 | 
380 |     >>> Animal = Enum('Animal', 'ant bee cat dog')
381 |     >>> Animal
382 |     <enum 'Animal'>
383 |     >>> Animal.ant
384 |     <Animal.ant: 1>
385 |     >>> Animal.ant.value
386 |     1
387 |     >>> list(Animal)
388 |     [<Animal.ant: 1>, <Animal.bee: 2>, <Animal.cat: 3>, <Animal.dog: 4>]
389 | 
390 | The semantics of this API resemble ``namedtuple``. The first argument
391 | of the call to ``Enum`` is the name of the enumeration. 
392 | 
393 | The second argument is the *source* of enumeration member names.  It can be a
394 | whitespace-separated string of names, a sequence of names, a sequence of
395 | 2-tuples with key/value pairs, or a mapping (e.g. dictionary) of names to
396 | values.  The last two options enable assigning arbitrary values to
397 | enumerations; the others auto-assign increasing integers starting with 1.  A
398 | new class derived from ``Enum`` is returned.  In other words, the above
399 | assignment to ``Animal`` is equivalent to::
400 | 
401 |     >>> class Animals(Enum):
402 |     ...   ant = 1
403 |     ...   bee = 2
404 |     ...   cat = 3
405 |     ...   dog = 4
406 | 
407 | Pickling enums created with the functional API can be tricky as frame stack
408 | implementation details are used to try and figure out which module the
409 | enumeration is being created in (e.g. it will fail if you use a utility
410 | function in separate module, and also may not work on IronPython or Jython).
411 | The solution is to specify the module name explicitly as follows::
412 | 
413 |     >>> Animals = Enum('Animals', 'ant bee cat dog', module=__name__)
414 | 
415 | Derived Enumerations
416 | --------------------
417 | 
418 | IntEnum
419 | ^^^^^^^
420 | 
421 | A variation of ``Enum`` is provided which is also a subclass of
422 | ``int``.  Members of an ``IntEnum`` can be compared to integers;
423 | by extension, integer enumerations of different types can also be compared
424 | to each other::
425 | 
426 |     >>> from enum import IntEnum
427 |     >>> class Shape(IntEnum):
428 |     ...   circle = 1
429 |     ...   square = 2
430 |     ...
431 |     >>> class Request(IntEnum):
432 |     ...   post = 1
433 |     ...   get = 2
434 |     ...
435 |     >>> Shape == 1
436 |     False
437 |     >>> Shape.circle == 1
438 |     True
439 |     >>> Shape.circle == Request.post
440 |     True
441 | 
442 | However, they still can't be compared to standard ``Enum`` enumerations::
443 | 
444 |     >>> class Shape(IntEnum):
445 |     ...   circle = 1
446 |     ...   square = 2
447 |     ...
448 |     >>> class Color(Enum):
449 |     ...   red = 1
450 |     ...   green = 2
451 |     ...
452 |     >>> Shape.circle == Color.red
453 |     False
454 | 
455 | ``IntEnum`` values behave like integers in other ways you'd expect::
456 | 
457 |     >>> int(Shape.circle)
458 |     1
459 |     >>> ['a', 'b', 'c'][Shape.circle]
460 |     'b'
461 |     >>> [i for i in range(Shape.square)]
462 |     [0, 1]
463 | 
464 | For the vast majority of code, ``Enum`` is strongly recommended,
465 | since ``IntEnum`` breaks some semantic promises of an enumeration (by
466 | being comparable to integers, and thus by transitivity to other
467 | unrelated enumerations).  It should be used only in special cases where
468 | there's no other choice; for example, when integer constants are
469 | replaced with enumerations and backwards compatibility is required with code
470 | that still expects integers.
471 | 
472 | 
473 | Others
474 | ^^^^^^
475 | 
476 | While ``IntEnum`` is part of the ``enum`` module, it would be very
477 | simple to implement independently::
478 | 
479 |     class IntEnum(int, Enum):
480 |         pass
481 | 
482 | This demonstrates how similar derived enumerations can be defined; for example
483 | a ``StrEnum`` that mixes in ``str`` instead of ``int``.
484 | 
485 | Some rules:
486 | 
487 | 1. When subclassing ``Enum``, mix-in types must appear before
488 |    ``Enum`` itself in the sequence of bases, as in the ``IntEnum``
489 |    example above.
490 | 2. While ``Enum`` can have members of any type, once you mix in an
491 |    additional type, all the members must have values of that type, e.g.
492 |    ``int`` above.  This restriction does not apply to mix-ins which only
493 |    add methods and don't specify another data type such as ``int`` or
494 |    ``str``.
495 | 3. When another data type is mixed in, the ``value`` attribute is *not the
496 |    same* as the enum member itself, although it is equivalant and will compare
497 |    equal.
498 | 4. %-style formatting:  ``%s`` and ``%r`` call ``Enum``'s ``__str__`` and
499 |    ``__repr__`` respectively; other codes (such as ``%i`` or ``%h`` for
500 |    IntEnum) treat the enum member as its mixed-in type.
501 | 
502 |    Note: Prior to Python 3.4 there is a bug in ``str``'s %-formatting: ``int``
503 |    subclasses are printed as strings and not numbers when the ``%d``, ``%i``,
504 |    or ``%u`` codes are used.
505 | 5. ``str.__format__`` (or ``format``) will use the mixed-in
506 |    type's ``__format__``.  If the ``Enum``'s ``str`` or
507 |    ``repr`` is desired use the ``!s`` or ``!r`` ``str`` format codes.
508 | 
509 | 
510 | Decorators
511 | ----------
512 | 
513 | unique
514 | ^^^^^^
515 | 
516 | A ``class`` decorator specifically for enumerations.  It searches an
517 | enumeration's ``__members__`` gathering any aliases it finds; if any are
518 | found ``ValueError`` is raised with the details::
519 | 
520 |     >>> @unique
521 |     ... class NoDupes(Enum):
522 |     ...    first = 'one'
523 |     ...    second = 'two'
524 |     ...    third = 'two'
525 |     Traceback (most recent call last):
526 |     ...
527 |     ValueError: duplicate names found in <enum 'NoDupes'>: third -> second
528 | 
529 | 
530 | Interesting examples
531 | --------------------
532 | 
533 | While ``Enum`` and ``IntEnum`` are expected to cover the majority of
534 | use-cases, they cannot cover them all.  Here are recipes for some different
535 | types of enumerations that can be used directly, or as examples for creating
536 | one's own.
537 | 
538 | 
539 | AutoNumber
540 | ^^^^^^^^^^
541 | 
542 | Avoids having to specify the value for each enumeration member::
543 | 
544 |     >>> class AutoNumber(Enum):
545 |     ...     def __new__(cls):
546 |     ...         value = len(cls.__members__) + 1
547 |     ...         obj = object.__new__(cls)
548 |     ...         obj._value_ = value
549 |     ...         return obj
550 |     ...
551 |     >>> class Color(AutoNumber):
552 |     ...     __order__ = "red green blue"  # only needed in 2.x
553 |     ...     red = ()
554 |     ...     green = ()
555 |     ...     blue = ()
556 |     ...
557 |     >>> Color.green.value == 2
558 |     True
559 | 
560 | Note:
561 | 
562 |     The `__new__` method, if defined, is used during creation of the Enum
563 |     members; it is then replaced by Enum's `__new__` which is used after
564 |     class creation for lookup of existing members.  Due to the way Enums are
565 |     supposed to behave, there is no way to customize Enum's `__new__`.
566 | 
567 | 
568 | UniqueEnum
569 | ^^^^^^^^^^
570 | 
571 | Raises an error if a duplicate member name is found instead of creating an
572 | alias::
573 | 
574 |     >>> class UniqueEnum(Enum):
575 |     ...     def __init__(self, *args):
576 |     ...         cls = self.__class__
577 |     ...         if any(self.value == e.value for e in cls):
578 |     ...             a = self.name
579 |     ...             e = cls(self.value).name
580 |     ...             raise ValueError(
581 |     ...                     "aliases not allowed in UniqueEnum:  %r --> %r"
582 |     ...                     % (a, e))
583 |     ... 
584 |     >>> class Color(UniqueEnum):
585 |     ...     red = 1
586 |     ...     green = 2
587 |     ...     blue = 3
588 |     ...     grene = 2
589 |     Traceback (most recent call last):
590 |     ...
591 |     ValueError: aliases not allowed in UniqueEnum:  'grene' --> 'green'
592 |     
593 | 
594 | OrderedEnum
595 | ^^^^^^^^^^^
596 | 
597 | An ordered enumeration that is not based on ``IntEnum`` and so maintains
598 | the normal ``Enum`` invariants (such as not being comparable to other
599 | enumerations)::
600 | 
601 |     >>> class OrderedEnum(Enum):
602 |     ...     def __ge__(self, other):
603 |     ...         if self.__class__ is other.__class__:
604 |     ...             return self._value_ >= other._value_
605 |     ...         return NotImplemented
606 |     ...     def __gt__(self, other):
607 |     ...         if self.__class__ is other.__class__:
608 |     ...             return self._value_ > other._value_
609 |     ...         return NotImplemented
610 |     ...     def __le__(self, other):
611 |     ...         if self.__class__ is other.__class__:
612 |     ...             return self._value_ <= other._value_
613 |     ...         return NotImplemented
614 |     ...     def __lt__(self, other):
615 |     ...         if self.__class__ is other.__class__:
616 |     ...             return self._value_ < other._value_
617 |     ...         return NotImplemented
618 |     ...
619 |     >>> class Grade(OrderedEnum):
620 |     ...     __ordered__ = 'A B C D F'
621 |     ...     A = 5
622 |     ...     B = 4
623 |     ...     C = 3
624 |     ...     D = 2
625 |     ...     F = 1
626 |     ...
627 |     >>> Grade.C < Grade.A
628 |     True
629 | 
630 | 
631 | Planet
632 | ^^^^^^
633 | 
634 | If ``__new__`` or ``__init__`` is defined the value of the enum member
635 | will be passed to those methods::
636 | 
637 |     >>> class Planet(Enum):
638 |     ...     MERCURY = (3.303e+23, 2.4397e6)
639 |     ...     VENUS   = (4.869e+24, 6.0518e6)
640 |     ...     EARTH   = (5.976e+24, 6.37814e6)
641 |     ...     MARS    = (6.421e+23, 3.3972e6)
642 |     ...     JUPITER = (1.9e+27,   7.1492e7)
643 |     ...     SATURN  = (5.688e+26, 6.0268e7)
644 |     ...     URANUS  = (8.686e+25, 2.5559e7)
645 |     ...     NEPTUNE = (1.024e+26, 2.4746e7)
646 |     ...     def __init__(self, mass, radius):
647 |     ...         self.mass = mass       # in kilograms
648 |     ...         self.radius = radius   # in meters
649 |     ...     @property
650 |     ...     def surface_gravity(self):
651 |     ...         # universal gravitational constant  (m3 kg-1 s-2)
652 |     ...         G = 6.67300E-11
653 |     ...         return G * self.mass / (self.radius * self.radius)
654 |     ... 
655 |     >>> Planet.EARTH.value
656 |     (5.976e+24, 6378140.0)
657 |     >>> Planet.EARTH.surface_gravity
658 |     9.802652743337129
659 | 
660 | 
661 | How are Enums different?
662 | ------------------------
663 | 
664 | Enums have a custom metaclass that affects many aspects of both derived Enum
665 | classes and their instances (members).
666 | 
667 | 
668 | Enum Classes
669 | ^^^^^^^^^^^^
670 | 
671 | The ``EnumMeta`` metaclass is responsible for providing the
672 | ``__contains__``, ``__dir__``, ``__iter__`` and other methods that
673 | allow one to do things with an ``Enum`` class that fail on a typical
674 | class, such as ``list(Color)`` or ``some_var in Color``.  ``EnumMeta`` is
675 | responsible for ensuring that various other methods on the final ``Enum``
676 | class are correct (such as ``__new__``, ``__getnewargs__``,
677 | ``__str__`` and ``__repr__``).
678 | 
679 | .. note::
680 | 
681 |     ``__dir__`` is not changed in the Python 2 line as it messes up some
682 |     of the decorators included in the stdlib.
683 | 
684 | 
685 | Enum Members (aka instances)
686 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
687 | 
688 | The most interesting thing about Enum members is that they are singletons.
689 | ``EnumMeta`` creates them all while it is creating the ``Enum``
690 | class itself, and then puts a custom ``__new__`` in place to ensure
691 | that no new ones are ever instantiated by returning only the existing
692 | member instances.
693 | 
694 | 
695 | Finer Points
696 | ^^^^^^^^^^^^
697 | 
698 | ``Enum`` members are instances of an ``Enum`` class, and even though they
699 | are accessible as `EnumClass.member1.member2`, they should not be
700 | accessed directly from the member as that lookup may fail or, worse,
701 | return something besides the ``Enum`` member you were looking for
702 | (changed in version 1.1.1)::
703 | 
704 |     >>> class FieldTypes(Enum):
705 |     ...     name = 1
706 |     ...     value = 2
707 |     ...     size = 3
708 |     ...
709 |     >>> FieldTypes.value.size
710 |     <FieldTypes.size: 3>
711 |     >>> FieldTypes.size.value
712 |     3
713 | 
714 | The ``__members__`` attribute is only available on the class.
715 | 
716 | In Python 3.x ``__members__`` is always an ``OrderedDict``, with the order being
717 | the definition order.  In Python 2.7 ``__members__`` is an ``OrderedDict`` if
718 | ``__order__`` was specified, and a plain ``dict`` otherwise.  In all other Python
719 | 2.x versions ``__members__`` is a plain ``dict`` even if ``__order__`` was specified
720 | as the ``OrderedDict`` type didn't exist yet.
721 | 
722 | If you give your ``Enum`` subclass extra methods, like the `Planet`_
723 | class above, those methods will show up in a `dir` of the member,
724 | but not of the class::
725 | 
726 |     >>> dir(Planet)
727 |     ['EARTH', 'JUPITER', 'MARS', 'MERCURY', 'NEPTUNE', 'SATURN', 'URANUS',
728 |     'VENUS', '__class__', '__doc__', '__members__', '__module__']
729 |     >>> dir(Planet.EARTH)
730 |     ['__class__', '__doc__', '__module__', 'name', 'surface_gravity', 'value']
731 | 
732 | A ``__new__`` method will only be used for the creation of the
733 | ``Enum`` members -- after that it is replaced.  This means if you wish to
734 | change how ``Enum`` members are looked up you either have to write a
735 | helper function or a ``classmethod``.
736 | 


--------------------------------------------------------------------------------
/sqlparse/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
 2 | #
 3 | # This module is part of python-sqlparse and is released under
 4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php.
 5 | 
 6 | """Parse SQL statements."""
 7 | 
 8 | 
 9 | __version__ = '0.1.19'
10 | 
11 | 
12 | # Setup namespace
13 | from sqlparse import engine
14 | from sqlparse import filters
15 | from sqlparse import formatter
16 | 
17 | # Deprecated in 0.1.5. Will be removed in 0.2.0
18 | from sqlparse.exceptions import SQLParseError
19 | 
20 | 
21 | def parse(sql, encoding=None):
22 |     """Parse sql and return a list of statements.
23 | 
24 |     :param sql: A string containting one or more SQL statements.
25 |     :param encoding: The encoding of the statement (optional).
26 |     :returns: A tuple of :class:`~sqlparse.sql.Statement` instances.
27 |     """
28 |     return tuple(parsestream(sql, encoding))
29 | 
30 | 
31 | def parsestream(stream, encoding=None):
32 |     """Parses sql statements from file-like object.
33 | 
34 |     :param stream: A file-like object.
35 |     :param encoding: The encoding of the stream contents (optional).
36 |     :returns: A generator of :class:`~sqlparse.sql.Statement` instances.
37 |     """
38 |     stack = engine.FilterStack()
39 |     stack.full_analyze()
40 |     return stack.run(stream, encoding)
41 | 
42 | 
43 | def format(sql, **options):
44 |     """Format *sql* according to *options*.
45 | 
46 |     Available options are documented in :ref:`formatting`.
47 | 
48 |     In addition to the formatting options this function accepts the
49 |     keyword "encoding" which determines the encoding of the statement.
50 | 
51 |     :returns: The formatted SQL statement as string.
52 |     """
53 |     encoding = options.pop('encoding', None)
54 |     stack = engine.FilterStack()
55 |     options = formatter.validate_options(options)
56 |     stack = formatter.build_filter_stack(stack, options)
57 |     stack.postprocess.append(filters.SerializerUnicode())
58 |     return ''.join(stack.run(sql, encoding))
59 | 
60 | 
61 | def split(sql, encoding=None):
62 |     """Split *sql* into single statements.
63 | 
64 |     :param sql: A string containting one or more SQL statements.
65 |     :param encoding: The encoding of the statement (optional).
66 |     :returns: A list of strings.
67 |     """
68 |     stack = engine.FilterStack()
69 |     stack.split_statements = True
70 |     return [str(stmt).strip() for stmt in stack.run(sql, encoding)]
71 | 
72 | 
73 | from sqlparse.engine.filter import StatementFilter
74 | 
75 | 
76 | def split2(stream):
77 |     splitter = StatementFilter()
78 |     return list(splitter.process(None, stream))
79 | 


--------------------------------------------------------------------------------
/sqlparse/engine/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
 2 | #
 3 | # This module is part of python-sqlparse and is released under
 4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php.
 5 | 
 6 | """filter"""
 7 | 
 8 | from sqlparse import lexer
 9 | from sqlparse.engine import grouping
10 | from sqlparse.engine.filter import StatementFilter
11 | 
12 | # XXX remove this when cleanup is complete
13 | Filter = object
14 | 
15 | 
16 | class FilterStack(object):
17 | 
18 |     def __init__(self):
19 |         self.preprocess = []
20 |         self.stmtprocess = []
21 |         self.postprocess = []
22 |         self.split_statements = False
23 |         self._grouping = False
24 | 
25 |     def _flatten(self, stream):
26 |         for token in stream:
27 |             if token.is_group():
28 |                 for t in self._flatten(token.tokens):
29 |                     yield t
30 |             else:
31 |                 yield token
32 | 
33 |     def enable_grouping(self):
34 |         self._grouping = True
35 | 
36 |     def full_analyze(self):
37 |         self.enable_grouping()
38 | 
39 |     def run(self, sql, encoding=None):
40 |         stream = lexer.tokenize(sql, encoding)
41 |         # Process token stream
42 |         if self.preprocess:
43 |             for filter_ in self.preprocess:
44 |                 stream = filter_.process(self, stream)
45 | 
46 |         if (self.stmtprocess or self.postprocess or self.split_statements
47 |             or self._grouping):
48 |             splitter = StatementFilter()
49 |             stream = splitter.process(self, stream)
50 | 
51 |         if self._grouping:
52 | 
53 |             def _group(stream):
54 |                 for stmt in stream:
55 |                     grouping.group(stmt)
56 |                     yield stmt
57 |             stream = _group(stream)
58 | 
59 |         if self.stmtprocess:
60 | 
61 |             def _run1(stream):
62 |                 ret = []
63 |                 for stmt in stream:
64 |                     for filter_ in self.stmtprocess:
65 |                         filter_.process(self, stmt)
66 |                     ret.append(stmt)
67 |                 return ret
68 |             stream = _run1(stream)
69 | 
70 |         if self.postprocess:
71 | 
72 |             def _run2(stream):
73 |                 for stmt in stream:
74 |                     stmt.tokens = list(self._flatten(stmt.tokens))
75 |                     for filter_ in self.postprocess:
76 |                         stmt = filter_.process(self, stmt)
77 |                     yield stmt
78 |             stream = _run2(stream)
79 | 
80 |         return stream
81 | 


--------------------------------------------------------------------------------
/sqlparse/engine/filter.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from sqlparse.sql import Statement, Token
  4 | from sqlparse import tokens as T
  5 | 
  6 | 
  7 | class StatementFilter:
  8 |     "Filter that split stream at individual statements"
  9 | 
 10 |     def __init__(self):
 11 |         self._in_declare = False
 12 |         self._in_dbldollar = False
 13 |         self._is_create = False
 14 |         self._begin_depth = 0
 15 | 
 16 |     def _reset(self):
 17 |         "Set the filter attributes to its default values"
 18 |         self._in_declare = False
 19 |         self._in_dbldollar = False
 20 |         self._is_create = False
 21 |         self._begin_depth = 0
 22 | 
 23 |     def _change_splitlevel(self, ttype, value):
 24 |         "Get the new split level (increase, decrease or remain equal)"
 25 |         # PostgreSQL
 26 |         if (ttype == T.Name.Builtin
 27 |             and value.startswith('$') and value.endswith('$')):
 28 |             if self._in_dbldollar:
 29 |                 self._in_dbldollar = False
 30 |                 return -1
 31 |             else:
 32 |                 self._in_dbldollar = True
 33 |                 return 1
 34 |         elif self._in_dbldollar:
 35 |             return 0
 36 | 
 37 |         # ANSI
 38 |         if ttype not in T.Keyword:
 39 |             return 0
 40 | 
 41 |         unified = value.upper()
 42 | 
 43 |         if unified == 'DECLARE' and self._is_create and self._begin_depth == 0:
 44 |             self._in_declare = True
 45 |             return 1
 46 | 
 47 |         if unified == 'BEGIN':
 48 |             self._begin_depth += 1
 49 |             if self._in_declare or self._is_create:
 50 |                 # FIXME(andi): This makes no sense.
 51 |                 return 1
 52 |             return 0
 53 | 
 54 |         if unified in ('END IF', 'END FOR'):
 55 |             return -1
 56 | 
 57 |         if unified == 'END':
 58 |             # Should this respect a preceeding BEGIN?
 59 |             # In CASE ... WHEN ... END this results in a split level -1.
 60 |             self._begin_depth = max(0, self._begin_depth - 1)
 61 |             return -1
 62 | 
 63 |         if ttype is T.Keyword.DDL and unified.startswith('CREATE'):
 64 |             self._is_create = True
 65 |             return 0
 66 | 
 67 |         if (unified in ('IF', 'FOR')
 68 |             and self._is_create and self._begin_depth > 0):
 69 |             return 1
 70 | 
 71 |         # Default
 72 |         return 0
 73 | 
 74 |     def process(self, stack, stream):
 75 |         "Process the stream"
 76 |         consume_ws = False
 77 |         splitlevel = 0
 78 |         stmt = None
 79 |         stmt_tokens = []
 80 | 
 81 |         # Run over all stream tokens
 82 |         for ttype, value in stream:
 83 |             # Yield token if we finished a statement and there's no whitespaces
 84 |             if consume_ws and ttype not in (T.Whitespace, T.Comment.Single):
 85 |                 stmt.tokens = stmt_tokens
 86 |                 yield stmt
 87 | 
 88 |                 # Reset filter and prepare to process next statement
 89 |                 self._reset()
 90 |                 consume_ws = False
 91 |                 splitlevel = 0
 92 |                 stmt = None
 93 | 
 94 |             # Create a new statement if we are not currently in one of them
 95 |             if stmt is None:
 96 |                 stmt = Statement()
 97 |                 stmt_tokens = []
 98 | 
 99 |             # Change current split level (increase, decrease or remain equal)
100 |             splitlevel += self._change_splitlevel(ttype, value)
101 | 
102 |             # Append the token to the current statement
103 |             stmt_tokens.append(Token(ttype, value))
104 | 
105 |             # Check if we get the end of a statement
106 |             if splitlevel <= 0 and ttype is T.Punctuation and value == ';':
107 |                 consume_ws = True
108 | 
109 |         # Yield pending statement (if any)
110 |         if stmt is not None:
111 |             stmt.tokens = stmt_tokens
112 |             yield stmt
113 | 


--------------------------------------------------------------------------------
/sqlparse/engine/grouping.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import itertools
  4 | 
  5 | from sqlparse import sql
  6 | from sqlparse import tokens as T
  7 | import sys
  8 | 
  9 | try:
 10 |     next
 11 | except NameError:  # Python < 2.6
 12 |     next = lambda i: i.__next__()
 13 | 
 14 | 
 15 | def _group_left_right(tlist, ttype, value, cls,
 16 |                       check_right=lambda t: True,
 17 |                       check_left=lambda t: True,
 18 |                       include_semicolon=False):
 19 |     [_group_left_right(sgroup, ttype, value, cls, check_right, check_left,
 20 |                        include_semicolon) for sgroup in tlist.get_sublists()
 21 |      if not isinstance(sgroup, cls)]
 22 |     idx = 0
 23 |     token = tlist.token_next_match(idx, ttype, value)
 24 |     while token:
 25 |         right = tlist.token_next(tlist.token_index(token))
 26 |         left = tlist.token_prev(tlist.token_index(token))
 27 |         if right is None or not check_right(right):
 28 |             token = tlist.token_next_match(tlist.token_index(token) + 1,
 29 |                                            ttype, value)
 30 |         elif left is None or not check_left(left):
 31 |             token = tlist.token_next_match(tlist.token_index(token) + 1,
 32 |                                            ttype, value)
 33 |         else:
 34 |             if include_semicolon:
 35 |                 sright = tlist.token_next_match(tlist.token_index(right),
 36 |                                                 T.Punctuation, ';')
 37 |                 if sright is not None:
 38 |                     # only overwrite "right" if a semicolon is actually
 39 |                     # present.
 40 |                     right = sright
 41 |             tokens = tlist.tokens_between(left, right)[1:]
 42 |             if not isinstance(left, cls):
 43 |                 new = cls([left])
 44 |                 new_idx = tlist.token_index(left)
 45 |                 tlist.tokens.remove(left)
 46 |                 tlist.tokens.insert(new_idx, new)
 47 |                 left = new
 48 |             left.tokens.extend(tokens)
 49 |             for t in tokens:
 50 |                 tlist.tokens.remove(t)
 51 |             token = tlist.token_next_match(tlist.token_index(left) + 1,
 52 |                                            ttype, value)
 53 | 
 54 | 
 55 | def _find_matching(idx, tlist, start_ttype, start_value, end_ttype, end_value):
 56 |     depth = 1
 57 |     for tok in tlist.tokens[idx:]:
 58 |         if tok.match(start_ttype, start_value):
 59 |             depth += 1
 60 |         elif tok.match(end_ttype, end_value):
 61 |             depth -= 1
 62 |             if depth == 1:
 63 |                 return tok
 64 |     return None
 65 | 
 66 | 
 67 | def _group_matching(tlist, start_ttype, start_value, end_ttype, end_value,
 68 |                     cls, include_semicolon=False, recurse=False):
 69 | 
 70 |     #bugfix recurse
 71 |     # [_group_matching(sgroup, start_ttype, start_value, end_ttype, end_value,
 72 |     #                  cls, include_semicolon) for sgroup in tlist.get_sublists()
 73 |     [_group_matching(sgroup, start_ttype, start_value, end_ttype, end_value,
 74 |                      cls, include_semicolon, recurse) for sgroup in tlist.get_sublists()
 75 |      if recurse]
 76 |     if isinstance(tlist, cls):
 77 |         idx = 1
 78 |     else:
 79 |         idx = 0
 80 |     token = tlist.token_next_match(idx, start_ttype, start_value)
 81 |     while token:
 82 |         tidx = tlist.token_index(token)
 83 |         end = _find_matching(tidx, tlist, start_ttype, start_value,
 84 |                              end_ttype, end_value)
 85 |         if end is None:
 86 |             idx = tidx + 1
 87 |         else:
 88 |             if include_semicolon:
 89 |                 next_ = tlist.token_next(tlist.token_index(end))
 90 |                 if next_ and next_.match(T.Punctuation, ';'):
 91 |                     end = next_
 92 |             group = tlist.group_tokens(cls, tlist.tokens_between(token, end))
 93 |             _group_matching(group, start_ttype, start_value,
 94 |                             end_ttype, end_value, cls, include_semicolon)
 95 |             idx = tlist.token_index(group) + 1
 96 |         token = tlist.token_next_match(idx, start_ttype, start_value)
 97 | 
 98 | 
 99 | def group_if(tlist):
100 |     _group_matching(tlist, T.Keyword, 'IF', T.Keyword, 'END IF', sql.If, True)
101 | 
102 | 
103 | def group_for(tlist):
104 |     _group_matching(tlist, T.Keyword, 'FOR', T.Keyword, 'END LOOP',
105 |                     sql.For, True)
106 | 
107 | 
108 | def group_foreach(tlist):
109 |     _group_matching(tlist, T.Keyword, 'FOREACH', T.Keyword, 'END LOOP',
110 |                     sql.For, True)
111 | 
112 | 
113 | def group_begin(tlist):
114 |     _group_matching(tlist, T.Keyword, 'BEGIN', T.Keyword, 'END',
115 |                     sql.Begin, True)
116 | 
117 | 
118 | def group_as(tlist):
119 | 
120 |     def _right_valid(token):
121 |         # Currently limited to DML/DDL. Maybe additional more non SQL reserved
122 |         # keywords should appear here (see issue8).
123 |         return not token.ttype in (T.DML, T.DDL)
124 | 
125 |     def _left_valid(token):
126 |         if token.ttype is T.Keyword and token.value in ('NULL',):
127 |             return True
128 |         return token.ttype is not T.Keyword
129 | 
130 |     _group_left_right(tlist, T.Keyword, 'AS', sql.Identifier,
131 |                       check_right=_right_valid,
132 |                       check_left=_left_valid)
133 | 
134 | 
135 | def group_assignment(tlist):
136 |     _group_left_right(tlist, T.Assignment, ':=', sql.Assignment,
137 |                       include_semicolon=True)
138 | 
139 | 
140 | def group_comparison(tlist):
141 | 
142 |     def _parts_valid(token):
143 |         return (token.ttype in (T.String.Symbol, T.String.Single,
144 |                                 T.Name, T.Number, T.Number.Float,
145 |                                 T.Number.Integer, T.Literal,
146 |                                 T.Literal.Number.Integer, T.Name.Placeholder)
147 |                 or isinstance(token, (sql.Identifier, sql.Parenthesis))
148 |                 or (token.ttype is T.Keyword
149 |                     and token.value.upper() in ['NULL', ]))
150 |     _group_left_right(tlist, T.Operator.Comparison, None, sql.Comparison,
151 |                       check_left=_parts_valid, check_right=_parts_valid)
152 | 
153 | 
154 | def group_case(tlist):
155 |     _group_matching(tlist, T.Keyword, 'CASE', T.Keyword, 'END', sql.Case,
156 |                     include_semicolon=True, recurse=True)
157 | 
158 | 
159 | def group_identifier(tlist):
160 |     def _consume_cycle(tl, i):
161 |         # TODO: Usage of Wildcard token is ambivalent here.
162 |         x = itertools.cycle((
163 |             lambda y: (y.match(T.Punctuation, '.')
164 |                        or y.ttype in (T.Operator,
165 |                                       T.Wildcard,
166 |                                       T.Name)
167 |                        or isinstance(y, sql.SquareBrackets)),
168 |             lambda y: (y.ttype in (T.String.Symbol,
169 |                                    T.Name,
170 |                                    T.Wildcard,
171 |                                    T.Literal.String.Single,
172 |                                    T.Literal.Number.Integer,
173 |                                    T.Literal.Number.Float)
174 |                        or isinstance(y, (sql.Parenthesis,
175 |                                          sql.SquareBrackets,
176 |                                          sql.Function)))))
177 |         for t in tl.tokens[i:]:
178 |             # Don't take whitespaces into account.
179 |             if t.ttype is T.Whitespace:
180 |                 yield t
181 |                 continue
182 |             if next(x)(t):
183 |                 yield t
184 |             else:
185 |                 if isinstance(t, sql.Comment) and t.is_multiline():
186 |                     yield t
187 |                 return
188 | 
189 |     def _next_token(tl, i):
190 |         # chooses the next token. if two tokens are found then the
191 |         # first is returned.
192 |         t1 = tl.token_next_by_type(
193 |             i, (T.String.Symbol, T.Name, T.Literal.Number.Integer,
194 |                 T.Literal.Number.Float))
195 | 
196 |         i1 = tl.token_index(t1, start=i) if t1 else None
197 |         t2_end = None if i1 is None else i1 + 1
198 |         t2 = tl.token_next_by_instance(i, (sql.Function, sql.Parenthesis), end=t2_end)
199 | 
200 |         if t1 and t2:
201 |             i2 = tl.token_index(t2, start=i)
202 |             if i1 > i2:
203 |                 return t2
204 |             else:
205 |                 return t1
206 |         elif t1:
207 |             return t1
208 |         else:
209 |             return t2
210 | 
211 |     # bottom up approach: group subgroups first
212 |     [group_identifier(sgroup) for sgroup in tlist.get_sublists()
213 |      if not isinstance(sgroup, sql.Identifier)]
214 | 
215 |     # real processing
216 |     idx = 0
217 |     token = _next_token(tlist, idx)
218 |     while token:
219 |         identifier_tokens = [token] + list(
220 |             _consume_cycle(tlist,
221 |                            tlist.token_index(token, start=idx) + 1))
222 |         # remove trailing whitespace
223 |         if identifier_tokens and identifier_tokens[-1].ttype is T.Whitespace:
224 |             identifier_tokens = identifier_tokens[:-1]
225 |         if not (len(identifier_tokens) == 1
226 |                 and (isinstance(identifier_tokens[0], (sql.Function, sql.Parenthesis))
227 |                      or identifier_tokens[0].ttype in (T.Literal.Number.Integer,
228 |                                                        T.Literal.Number.Float))):
229 |             group = tlist.group_tokens(sql.Identifier, identifier_tokens)
230 |             idx = tlist.token_index(group, start=idx) + 1
231 |         else:
232 |             idx += 1
233 |         token = _next_token(tlist, idx)
234 | 
235 | 
236 | def group_identifier_list(tlist):
237 |     [group_identifier_list(sgroup) for sgroup in tlist.get_sublists()
238 |      if not isinstance(sgroup, sql.IdentifierList)]
239 |     # Allowed list items
240 |     fend1_funcs = [lambda t: isinstance(t, (sql.Identifier, sql.Function,
241 |                                             sql.Case)),
242 |                    lambda t: t.is_whitespace(),
243 |                    lambda t: t.ttype == T.Name,
244 |                    lambda t: t.ttype == T.Wildcard,
245 |                    lambda t: t.match(T.Keyword, 'null'),
246 |                    lambda t: t.match(T.Keyword, 'role'),
247 |                    lambda t: t.ttype == T.Number.Integer,
248 |                    lambda t: t.ttype == T.String.Single,
249 |                    lambda t: t.ttype == T.Name.Placeholder,
250 |                    lambda t: t.ttype == T.Keyword,
251 |                    lambda t: isinstance(t, sql.Comparison),
252 |                    lambda t: isinstance(t, sql.Comment),
253 |                    lambda t: t.ttype == T.Comment.Multiline,
254 |                    ]
255 |     tcomma = tlist.token_next_match(0, T.Punctuation, ',')
256 |     start = None
257 |     while tcomma is not None:
258 |         # Go back one idx to make sure to find the correct tcomma
259 |         idx = tlist.token_index(tcomma)
260 |         before = tlist.token_prev(idx)
261 |         after = tlist.token_next(idx)
262 |         # Check if the tokens around tcomma belong to a list
263 |         bpassed = apassed = False
264 |         for func in fend1_funcs:
265 |             if before is not None and func(before):
266 |                 bpassed = True
267 |             if after is not None and func(after):
268 |                 apassed = True
269 |         if not bpassed or not apassed:
270 |             # Something's wrong here, skip ahead to next ","
271 |             start = None
272 |             tcomma = tlist.token_next_match(idx + 1,
273 |                                             T.Punctuation, ',')
274 |         else:
275 |             if start is None:
276 |                 start = before
277 |             after_idx = tlist.token_index(after, start=idx)
278 |             next_ = tlist.token_next(after_idx)
279 |             if next_ is None or not next_.match(T.Punctuation, ','):
280 |                 # Reached the end of the list
281 |                 tokens = tlist.tokens_between(start, after)
282 |                 group = tlist.group_tokens(sql.IdentifierList, tokens)
283 |                 start = None
284 |                 tcomma = tlist.token_next_match(tlist.token_index(group) + 1,
285 |                                                 T.Punctuation, ',')
286 |             else:
287 |                 tcomma = next_
288 | 
289 | 
290 | def group_brackets(tlist):
291 |     """Group parentheses () or square brackets []
292 | 
293 |         This is just like _group_matching, but complicated by the fact that
294 |         round brackets can contain square bracket groups and vice versa
295 |     """
296 | 
297 |     if isinstance(tlist, (sql.Parenthesis, sql.SquareBrackets)):
298 |         idx = 1
299 |     else:
300 |         idx = 0
301 | 
302 |     # Find the first opening bracket
303 |     token = tlist.token_next_match(idx, T.Punctuation, ['(', '['])
304 | 
305 |     while token:
306 |         start_val = token.value  # either '(' or '['
307 |         if start_val == '(':
308 |             end_val = ')'
309 |             group_class = sql.Parenthesis
310 |         else:
311 |             end_val = ']'
312 |             group_class = sql.SquareBrackets
313 | 
314 |         tidx = tlist.token_index(token)
315 | 
316 |         # Find the corresponding closing bracket
317 |         end = _find_matching(tidx, tlist, T.Punctuation, start_val,
318 |                              T.Punctuation, end_val)
319 | 
320 |         if end is None:
321 |             idx = tidx + 1
322 |         else:
323 |             group = tlist.group_tokens(group_class,
324 |                                        tlist.tokens_between(token, end))
325 | 
326 |             # Check for nested bracket groups within this group
327 |             group_brackets(group)
328 |             idx = tlist.token_index(group) + 1
329 | 
330 |         # Find the next opening bracket
331 |         token = tlist.token_next_match(idx, T.Punctuation, ['(', '['])
332 | 
333 | 
334 | def group_comments(tlist):
335 |     [group_comments(sgroup) for sgroup in tlist.get_sublists()
336 |      if not isinstance(sgroup, sql.Comment)]
337 |     idx = 0
338 |     token = tlist.token_next_by_type(idx, T.Comment)
339 |     while token:
340 |         tidx = tlist.token_index(token)
341 |         end = tlist.token_not_matching(tidx + 1,
342 |                                        [lambda t: t.ttype in T.Comment,
343 |                                         lambda t: t.is_whitespace()])
344 |         if end is None:
345 |             idx = tidx + 1
346 |         else:
347 |             eidx = tlist.token_index(end)
348 |             grp_tokens = tlist.tokens_between(token,
349 |                                               tlist.token_prev(eidx, False))
350 |             group = tlist.group_tokens(sql.Comment, grp_tokens)
351 |             idx = tlist.token_index(group)
352 |         token = tlist.token_next_by_type(idx, T.Comment)
353 | 
354 | 
355 | # bugfix Oracle10g merge and Oracle connect by
356 | def group_where(tlist):
357 |     def end_match(token):
358 |         stopwords = ('ORDER', 'GROUP', 'LIMIT', 'UNION', 'EXCEPT', 'HAVING',
359 |                      'WHEN', # for Oracle10g merge
360 |                      'CONNECT', # for Oracle connect by
361 |                      )
362 |         if token.match(T.Keyword, stopwords):
363 |             return True
364 |         if token.match(T.DML, ('DELETE')): # for Oracle10g merge
365 |             return True
366 |         if token.match(T.DML, ('START')): # for Oracle connect by
367 |             return True
368 | 
369 |         return False
370 | 
371 |     [group_where(sgroup) for sgroup in tlist.get_sublists()
372 |      if not isinstance(sgroup, sql.Where)]
373 |     idx = 0
374 |     token = tlist.token_next_match(idx, T.Keyword, 'WHERE')
375 |     while token:
376 |         tidx = tlist.token_index(token)
377 |         end = tlist.token_matching(tidx + 1, (end_match, ))
378 |         if end is None:
379 |             end = tlist._groupable_tokens[-1]
380 |         else:
381 |             end = tlist.tokens[tlist.token_index(end) - 1]
382 |         group = tlist.group_tokens(sql.Where,
383 |                                    tlist.tokens_between(token, end),
384 |                                    ignore_ws=True)
385 |         idx = tlist.token_index(group)
386 |         token = tlist.token_next_match(idx, T.Keyword, 'WHERE')
387 | 
388 | 
389 | def group_aliased(tlist):
390 |     clss = (sql.Identifier, sql.Function, sql.Case)
391 |     [group_aliased(sgroup) for sgroup in tlist.get_sublists()
392 |      if not isinstance(sgroup, clss)]
393 |     idx = 0
394 |     token = tlist.token_next_by_instance(idx, clss)
395 |     while token:
396 |         next_ = tlist.token_next(tlist.token_index(token))
397 |         if next_ is not None and isinstance(next_, clss):
398 |             # for jython str.upper()
399 |             # if not next_.value.upper().startswith('VARCHAR'):
400 |             text = next_.value
401 |             if sys.version_info[0] < 3 and isinstance(text, str):
402 |                 text = text.decode('utf-8').upper().encode('utf-8')
403 |             if not text.startswith('VARCHAR'):
404 |                 grp = tlist.tokens_between(token, next_)[1:]
405 |                 token.tokens.extend(grp)
406 |                 for t in grp:
407 |                     tlist.tokens.remove(t)
408 |         idx = tlist.token_index(token) + 1
409 |         token = tlist.token_next_by_instance(idx, clss)
410 | 
411 | 
412 | def group_typecasts(tlist):
413 |     _group_left_right(tlist, T.Punctuation, '::', sql.Identifier)
414 | 
415 | 
416 | def group_functions(tlist):
417 |     [group_functions(sgroup) for sgroup in tlist.get_sublists()
418 |      if not isinstance(sgroup, sql.Function)]
419 |     idx = 0
420 |     token = tlist.token_next_by_type(idx, T.Name)
421 |     while token:
422 |         next_ = tlist.token_next(token)
423 |         if not isinstance(next_, sql.Parenthesis):
424 |             idx = tlist.token_index(token) + 1
425 |         else:
426 |             func = tlist.group_tokens(sql.Function,
427 |                                       tlist.tokens_between(token, next_))
428 |             idx = tlist.token_index(func) + 1
429 |         token = tlist.token_next_by_type(idx, T.Name)
430 | 
431 | 
432 | def group_order(tlist):
433 |     idx = 0
434 |     token = tlist.token_next_by_type(idx, T.Keyword.Order)
435 |     while token:
436 |         prev = tlist.token_prev(token)
437 |         if isinstance(prev, sql.Identifier):
438 |             ido = tlist.group_tokens(sql.Identifier,
439 |                                      tlist.tokens_between(prev, token))
440 |             idx = tlist.token_index(ido) + 1
441 |         else:
442 |             idx = tlist.token_index(token) + 1
443 |         token = tlist.token_next_by_type(idx, T.Keyword.Order)
444 | 
445 | 
446 | def align_comments(tlist):
447 |     [align_comments(sgroup) for sgroup in tlist.get_sublists()]
448 |     idx = 0
449 |     token = tlist.token_next_by_instance(idx, sql.Comment)
450 |     while token:
451 |         before = tlist.token_prev(tlist.token_index(token))
452 |         if isinstance(before, sql.TokenList):
453 |             grp = tlist.tokens_between(before, token)[1:]
454 |             before.tokens.extend(grp)
455 |             for t in grp:
456 |                 tlist.tokens.remove(t)
457 |             idx = tlist.token_index(before) + 1
458 |         else:
459 |             idx = tlist.token_index(token) + 1
460 |         token = tlist.token_next_by_instance(idx, sql.Comment)
461 | 
462 | 
463 | def group(tlist):
464 |     for func in [
465 |             group_comments,
466 |             group_brackets,
467 |             group_functions,
468 |             # bugfix Oracle10g merge
469 |             # group_where,
470 |             # group_case,
471 |             group_case,
472 |             group_where,
473 | 
474 |             group_identifier,
475 |             group_order,
476 |             group_typecasts,
477 |             group_as,
478 |             group_aliased,
479 |             group_assignment,
480 |             group_comparison,
481 |             align_comments,
482 |             group_identifier_list,
483 |             group_if,
484 |             group_for,
485 |             group_foreach,
486 |             group_begin,
487 |             ]:
488 |         func(tlist)
489 | 


--------------------------------------------------------------------------------
/sqlparse/exceptions.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2012 Andi Albrecht, albrecht.andi@gmail.com
 2 | #
 3 | # This module is part of python-sqlparse and is released under
 4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php.
 5 | 
 6 | """Exceptions used in this package."""
 7 | 
 8 | 
 9 | class SQLParseError(Exception):
10 |     """Base class for exceptions in this module."""
11 | 


--------------------------------------------------------------------------------
/sqlparse/filters.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import re
  4 | 
  5 | from os.path import abspath, join
  6 | 
  7 | from sqlparse import sql, tokens as T
  8 | from sqlparse.engine import FilterStack
  9 | from sqlparse.lexer import tokenize
 10 | from sqlparse.pipeline import Pipeline
 11 | from sqlparse.tokens import (Comment, Comparison, Keyword, Name, Punctuation,
 12 |                              String, Whitespace)
 13 | from sqlparse.utils import memoize_generator
 14 | from sqlparse.utils import split_unquoted_newlines
 15 | 
 16 | 
 17 | # --------------------------
 18 | # token process
 19 | 
 20 | class _CaseFilter:
 21 | 
 22 |     ttype = None
 23 | 
 24 |     def __init__(self, case=None):
 25 |         if case is None:
 26 |             case = 'upper'
 27 |         assert case in ['lower', 'upper', 'capitalize']
 28 |         # for jython str.upper()
 29 |         # self.convert = getattr(str, case)
 30 |         def get_convert():
 31 |             import sys
 32 |             if sys.version_info[0] < 3:
 33 |                 unicodecase = getattr(unicode, case)
 34 |                 def convert(s):
 35 |                     if isinstance(s, str):
 36 |                         return unicodecase(s.decode('utf-8')).encode('utf-8')
 37 |                     else:
 38 |                         return unicodecase(s)
 39 |                 return convert
 40 |             else:
 41 |                 return getattr(str, case)
 42 |         self.convert = get_convert()
 43 | 
 44 |     def process(self, stack, stream):
 45 |         for ttype, value in stream:
 46 |             if ttype in self.ttype:
 47 |                 value = self.convert(value)
 48 |             yield ttype, value
 49 | 
 50 | 
 51 | class KeywordCaseFilter(_CaseFilter):
 52 |     ttype = T.Keyword
 53 | 
 54 | 
 55 | class IdentifierCaseFilter(_CaseFilter):
 56 |     ttype = (T.Name, T.String.Symbol)
 57 | 
 58 |     def process(self, stack, stream):
 59 |         for ttype, value in stream:
 60 |             if ttype in self.ttype and not value.strip()[0] == '"':
 61 |                 value = self.convert(value)
 62 |             yield ttype, value
 63 | 
 64 | 
 65 | class TruncateStringFilter:
 66 | 
 67 |     def __init__(self, width, char):
 68 |         self.width = max(width, 1)
 69 |         self.char = str(char)
 70 | 
 71 |     def process(self, stack, stream):
 72 |         for ttype, value in stream:
 73 |             if ttype is T.Literal.String.Single:
 74 |                 if value[:2] == '\'\'':
 75 |                     inner = value[2:-2]
 76 |                     quote = '\'\''
 77 |                 else:
 78 |                     inner = value[1:-1]
 79 |                     quote = '\''
 80 |                 if len(inner) > self.width:
 81 |                     value = ''.join((quote, inner[:self.width], self.char,
 82 |                                       quote))
 83 |             yield ttype, value
 84 | 
 85 | 
 86 | class GetComments:
 87 |     """Get the comments from a stack"""
 88 |     def process(self, stack, stream):
 89 |         for token_type, value in stream:
 90 |             if token_type in Comment:
 91 |                 yield token_type, value
 92 | 
 93 | 
 94 | class StripComments:
 95 |     """Strip the comments from a stack"""
 96 |     def process(self, stack, stream):
 97 |         for token_type, value in stream:
 98 |             if token_type not in Comment:
 99 |                 yield token_type, value
100 | 
101 | 
102 | def StripWhitespace(stream):
103 |     "Strip the useless whitespaces from a stream leaving only the minimal ones"
104 |     last_type = None
105 |     has_space = False
106 |     ignore_group = frozenset((Comparison, Punctuation))
107 | 
108 |     for token_type, value in stream:
109 |         # We got a previous token (not empty first ones)
110 |         if last_type:
111 |             if token_type in Whitespace:
112 |                 has_space = True
113 |                 continue
114 | 
115 |         # Ignore first empty spaces and dot-commas
116 |         elif token_type in (Whitespace, Whitespace.Newline, ignore_group):
117 |             continue
118 | 
119 |         # Yield a whitespace if it can't be ignored
120 |         if has_space:
121 |             if not ignore_group.intersection((last_type, token_type)):
122 |                 yield Whitespace, ' '
123 |             has_space = False
124 | 
125 |         # Yield the token and set its type for checking with the next one
126 |         yield token_type, value
127 |         last_type = token_type
128 | 
129 | 
130 | class IncludeStatement:
131 |     """Filter that enable a INCLUDE statement"""
132 | 
133 |     def __init__(self, dirpath=".", maxrecursive=10, raiseexceptions=False):
134 |         if maxrecursive <= 0:
135 |             raise ValueError('Max recursion limit reached')
136 | 
137 |         self.dirpath = abspath(dirpath)
138 |         self.maxRecursive = maxrecursive
139 |         self.raiseexceptions = raiseexceptions
140 | 
141 |         self.detected = False
142 | 
143 |     @memoize_generator
144 |     def process(self, stack, stream):
145 |         # Run over all tokens in the stream
146 |         for token_type, value in stream:
147 |             # INCLUDE statement found, set detected mode
148 |             if token_type in Name and value.upper() == 'INCLUDE':
149 |                 self.detected = True
150 |                 continue
151 | 
152 |             # INCLUDE statement was found, parse it
153 |             elif self.detected:
154 |                 # Omit whitespaces
155 |                 if token_type in Whitespace:
156 |                     continue
157 | 
158 |                 # Found file path to include
159 |                 if token_type in String.Symbol:
160 | #                if token_type in tokens.String.Symbol:
161 | 
162 |                     # Get path of file to include
163 |                     path = join(self.dirpath, value[1:-1])
164 | 
165 |                     try:
166 |                         f = open(path)
167 |                         raw_sql = f.read()
168 |                         f.close()
169 | 
170 |                     # There was a problem loading the include file
171 |                     except IOError as err:
172 |                         # Raise the exception to the interpreter
173 |                         if self.raiseexceptions:
174 |                             raise
175 | 
176 |                         # Put the exception as a comment on the SQL code
177 |                         yield Comment, '-- IOError: %s\n' % err
178 | 
179 |                     else:
180 |                         # Create new FilterStack to parse readed file
181 |                         # and add all its tokens to the main stack recursively
182 |                         try:
183 |                             filtr = IncludeStatement(self.dirpath,
184 |                                                      self.maxRecursive - 1,
185 |                                                      self.raiseexceptions)
186 | 
187 |                         # Max recursion limit reached
188 |                         except ValueError as err:
189 |                             # Raise the exception to the interpreter
190 |                             if self.raiseexceptions:
191 |                                 raise
192 | 
193 |                             # Put the exception as a comment on the SQL code
194 |                             yield Comment, '-- ValueError: %s\n' % err
195 | 
196 |                         stack = FilterStack()
197 |                         stack.preprocess.append(filtr)
198 | 
199 |                         for tv in stack.run(raw_sql):
200 |                             yield tv
201 | 
202 |                     # Set normal mode
203 |                     self.detected = False
204 | 
205 |                 # Don't include any token while in detected mode
206 |                 continue
207 | 
208 |             # Normal token
209 |             yield token_type, value
210 | 
211 | 
212 | # ----------------------
213 | # statement process
214 | 
215 | class StripCommentsFilter:
216 | 
217 |     def _get_next_comment(self, tlist):
218 |         # TODO(andi) Comment types should be unified, see related issue38
219 |         token = tlist.token_next_by_instance(0, sql.Comment)
220 |         if token is None:
221 |             token = tlist.token_next_by_type(0, T.Comment)
222 |         return token
223 | 
224 |     def _process(self, tlist):
225 |         token = self._get_next_comment(tlist)
226 |         while token:
227 |             tidx = tlist.token_index(token)
228 |             prev = tlist.token_prev(tidx, False)
229 |             next_ = tlist.token_next(tidx, False)
230 |             # Replace by whitespace if prev and next exist and if they're not
231 |             # whitespaces. This doesn't apply if prev or next is a paranthesis.
232 |             if (prev is not None and next_ is not None
233 |                 and not prev.is_whitespace() and not next_.is_whitespace()
234 |                 and not (prev.match(T.Punctuation, '(')
235 |                          or next_.match(T.Punctuation, ')'))):
236 |                 tlist.tokens[tidx] = sql.Token(T.Whitespace, ' ')
237 |             else:
238 |                 tlist.tokens.pop(tidx)
239 |             token = self._get_next_comment(tlist)
240 | 
241 |     def process(self, stack, stmt):
242 |         [self.process(stack, sgroup) for sgroup in stmt.get_sublists()]
243 |         self._process(stmt)
244 | 
245 | 
246 | # for jython (object)
247 | class StripWhitespaceFilter(object):
248 | 
249 |     def _stripws(self, tlist):
250 |         func_name = '_stripws_%s' % tlist.__class__.__name__.lower()
251 |         func = getattr(self, func_name, self._stripws_default)
252 |         func(tlist)
253 | 
254 |     def _stripws_default(self, tlist):
255 |         last_was_ws = False
256 |         for token in tlist.tokens:
257 |             if token.is_whitespace():
258 |                 if last_was_ws:
259 |                     token.value = ''
260 |                 else:
261 |                     token.value = ' '
262 |             last_was_ws = token.is_whitespace()
263 | 
264 |     def _stripws_identifierlist(self, tlist):
265 |         # Removes newlines before commas, see issue140
266 |         last_nl = None
267 |         for token in tlist.tokens[:]:
268 |             if (token.ttype is T.Punctuation
269 |                 and token.value == ','
270 |                 and last_nl is not None):
271 |                 tlist.tokens.remove(last_nl)
272 |             if token.is_whitespace():
273 |                 last_nl = token
274 |             else:
275 |                 last_nl = None
276 |         return self._stripws_default(tlist)
277 | 
278 |     def _stripws_parenthesis(self, tlist):
279 |         if tlist.tokens[1].is_whitespace():
280 |             tlist.tokens.pop(1)
281 |         if tlist.tokens[-2].is_whitespace():
282 |             tlist.tokens.pop(-2)
283 |         self._stripws_default(tlist)
284 | 
285 |     def process(self, stack, stmt, depth=0):
286 |         [self.process(stack, sgroup, depth + 1)
287 |          for sgroup in stmt.get_sublists()]
288 |         self._stripws(stmt)
289 |         if (
290 |             depth == 0
291 |             and stmt.tokens
292 |             and stmt.tokens[-1].is_whitespace()
293 |         ):
294 |             stmt.tokens.pop(-1)
295 | 
296 | 
297 | # for jython (object)
298 | class ReindentFilter(object):
299 | 
300 |     def __init__(self, width=2, char=' ', line_width=None):
301 |         self.width = width
302 |         self.char = char
303 |         self.indent = 0
304 |         self.offset = 0
305 |         self.line_width = line_width
306 |         self._curr_stmt = None
307 |         self._last_stmt = None
308 | 
309 |     def _flatten_up_to_token(self, token):
310 |         """Yields all tokens up to token plus the next one."""
311 |         # helper for _get_offset
312 |         iterator = self._curr_stmt.flatten()
313 |         for t in iterator:
314 |             yield t
315 |             if t == token:
316 |                 raise StopIteration
317 | 
318 |     def _get_offset(self, token):
319 |         raw = ''.join(map(str, self._flatten_up_to_token(token)))
320 |         line = raw.splitlines()[-1]
321 |         # Now take current offset into account and return relative offset.
322 |         full_offset = len(line) - len(self.char * (self.width * self.indent))
323 |         return full_offset - self.offset
324 | 
325 |     def nl(self):
326 |         # TODO: newline character should be configurable
327 |         space = (self.char * ((self.indent * self.width) + self.offset))
328 |         # Detect runaway indenting due to parsing errors
329 |         if len(space) > 200:
330 |             # something seems to be wrong, flip back
331 |             self.indent = self.offset = 0
332 |             space = (self.char * ((self.indent * self.width) + self.offset))
333 |         ws = '\n' + space
334 |         return sql.Token(T.Whitespace, ws)
335 | 
336 |     def _split_kwds(self, tlist):
337 |         split_words = ('FROM', 'STRAIGHT_JOIN$', 'JOIN$', 'AND', 'OR',
338 |                        'GROUP', 'ORDER', 'UNION', 'VALUES',
339 |                        'SET', 'BETWEEN', 'EXCEPT', 'HAVING')
340 | 
341 |         def _next_token(i):
342 |             t = tlist.token_next_match(i, T.Keyword, split_words,
343 |                                        regex=True)
344 |             if t and t.value.upper() == 'BETWEEN':
345 |                 t = _next_token(tlist.token_index(t) + 1)
346 |                 if t and t.value.upper() == 'AND':
347 |                     t = _next_token(tlist.token_index(t) + 1)
348 |             return t
349 | 
350 |         idx = 0
351 |         token = _next_token(idx)
352 |         added = set()
353 |         while token:
354 |             prev = tlist.token_prev(tlist.token_index(token), False)
355 |             offset = 1
356 |             if prev and prev.is_whitespace() and prev not in added:
357 |                 tlist.tokens.pop(tlist.token_index(prev))
358 |                 offset += 1
359 |             uprev = str(prev)
360 |             if (prev and (uprev.endswith('\n') or uprev.endswith('\r'))):
361 |                 nl = tlist.token_next(token)
362 |             else:
363 |                 nl = self.nl()
364 |                 added.add(nl)
365 |                 tlist.insert_before(token, nl)
366 |                 offset += 1
367 |             token = _next_token(tlist.token_index(nl) + offset)
368 | 
369 |     def _split_statements(self, tlist):
370 |         idx = 0
371 |         token = tlist.token_next_by_type(idx, (T.Keyword.DDL, T.Keyword.DML))
372 |         while token:
373 |             prev = tlist.token_prev(tlist.token_index(token), False)
374 |             if prev and prev.is_whitespace():
375 |                 tlist.tokens.pop(tlist.token_index(prev))
376 |             # only break if it's not the first token
377 |             if prev:
378 |                 nl = self.nl()
379 |                 tlist.insert_before(token, nl)
380 |             token = tlist.token_next_by_type(tlist.token_index(token) + 1,
381 |                                              (T.Keyword.DDL, T.Keyword.DML))
382 | 
383 |     def _process(self, tlist):
384 |         func_name = '_process_%s' % tlist.__class__.__name__.lower()
385 |         func = getattr(self, func_name, self._process_default)
386 |         func(tlist)
387 | 
388 |     def _process_where(self, tlist):
389 |         token = tlist.token_next_match(0, T.Keyword, 'WHERE')
390 |         try:
391 |             tlist.insert_before(token, self.nl())
392 |         except ValueError:  # issue121, errors in statement
393 |             pass
394 |         self.indent += 1
395 |         self._process_default(tlist)
396 |         self.indent -= 1
397 | 
398 |     def _process_having(self, tlist):
399 |         token = tlist.token_next_match(0, T.Keyword, 'HAVING')
400 |         try:
401 |             tlist.insert_before(token, self.nl())
402 |         except ValueError:  # issue121, errors in statement
403 |             pass
404 |         self.indent += 1
405 |         self._process_default(tlist)
406 |         self.indent -= 1
407 | 
408 |     def _process_parenthesis(self, tlist):
409 |         first = tlist.token_next(0)
410 |         indented = False
411 |         if first and first.ttype in (T.Keyword.DML, T.Keyword.DDL):
412 |             self.indent += 1
413 |             tlist.tokens.insert(0, self.nl())
414 |             indented = True
415 |         num_offset = self._get_offset(
416 |             tlist.token_next_match(0, T.Punctuation, '('))
417 |         self.offset += num_offset
418 |         self._process_default(tlist, stmts=not indented)
419 |         if indented:
420 |             self.indent -= 1
421 |         self.offset -= num_offset
422 | 
423 |     def _process_identifierlist(self, tlist):
424 |         identifiers = list(tlist.get_identifiers())
425 |         if len(identifiers) > 1 and not tlist.within(sql.Function):
426 |             first = list(identifiers[0].flatten())[0]
427 |             if self.char == '\t':
428 |                 # when using tabs we don't count the actual word length
429 |                 # in spaces.
430 |                 num_offset = 1
431 |             else:
432 |                 num_offset = self._get_offset(first) - len(first.value)
433 |             self.offset += num_offset
434 |             for token in identifiers[1:]:
435 |                 tlist.insert_before(token, self.nl())
436 |             self.offset -= num_offset
437 |         self._process_default(tlist)
438 | 
439 |     def _process_case(self, tlist):
440 |         is_first = True
441 |         num_offset = None
442 |         case = tlist.tokens[0]
443 |         outer_offset = self._get_offset(case) - len(case.value)
444 |         self.offset += outer_offset
445 |         for cond, value in tlist.get_cases():
446 |             if is_first:
447 |                 tcond = list(cond[0].flatten())[0]
448 |                 is_first = False
449 |                 num_offset = self._get_offset(tcond) - len(tcond.value)
450 |                 self.offset += num_offset
451 |                 continue
452 |             if cond is None:
453 |                 token = value[0]
454 |             else:
455 |                 token = cond[0]
456 |             tlist.insert_before(token, self.nl())
457 |         # Line breaks on group level are done. Now let's add an offset of
458 |         # 5 (=length of "when", "then", "else") and process subgroups.
459 |         self.offset += 5
460 |         self._process_default(tlist)
461 |         self.offset -= 5
462 |         if num_offset is not None:
463 |             self.offset -= num_offset
464 |         end = tlist.token_next_match(0, T.Keyword, 'END')
465 |         tlist.insert_before(end, self.nl())
466 |         self.offset -= outer_offset
467 | 
468 |     def _process_default(self, tlist, stmts=True, kwds=True):
469 |         if stmts:
470 |             self._split_statements(tlist)
471 |         if kwds:
472 |             self._split_kwds(tlist)
473 |         [self._process(sgroup) for sgroup in tlist.get_sublists()]
474 | 
475 |     def process(self, stack, stmt):
476 |         if isinstance(stmt, sql.Statement):
477 |             self._curr_stmt = stmt
478 |         self._process(stmt)
479 |         if isinstance(stmt, sql.Statement):
480 |             if self._last_stmt is not None:
481 |                 if str(self._last_stmt).endswith('\n'):
482 |                     nl = '\n'
483 |                 else:
484 |                     nl = '\n\n'
485 |                 stmt.tokens.insert(
486 |                     0, sql.Token(T.Whitespace, nl))
487 |             if self._last_stmt != stmt:
488 |                 self._last_stmt = stmt
489 | 
490 | 
491 | # FIXME: Doesn't work ;)
492 | class RightMarginFilter:
493 | 
494 |     keep_together = (
495 |         # sql.TypeCast, sql.Identifier, sql.Alias,
496 |     )
497 | 
498 |     def __init__(self, width=79):
499 |         self.width = width
500 |         self.line = ''
501 | 
502 |     def _process(self, stack, group, stream):
503 |         for token in stream:
504 |             if token.is_whitespace() and '\n' in token.value:
505 |                 if token.value.endswith('\n'):
506 |                     self.line = ''
507 |                 else:
508 |                     self.line = token.value.splitlines()[-1]
509 |             elif (token.is_group()
510 |                   and not token.__class__ in self.keep_together):
511 |                 token.tokens = self._process(stack, token, token.tokens)
512 |             else:
513 |                 val = str(token)
514 |                 if len(self.line) + len(val) > self.width:
515 |                     match = re.search('^ +', self.line)
516 |                     if match is not None:
517 |                         indent = match.group()
518 |                     else:
519 |                         indent = ''
520 |                     yield sql.Token(T.Whitespace, '\n%s' % indent)
521 |                     self.line = indent
522 |                 self.line += val
523 |             yield token
524 | 
525 |     def process(self, stack, group):
526 |         return
527 |         group.tokens = self._process(stack, group, group.tokens)
528 | 
529 | 
530 | class ColumnsSelect:
531 |     """Get the columns names of a SELECT query"""
532 |     def process(self, stack, stream):
533 |         mode = 0
534 |         oldValue = ""
535 |         parenthesis = 0
536 | 
537 |         for token_type, value in stream:
538 |             # Ignore comments
539 |             if token_type in Comment:
540 |                 continue
541 | 
542 |             # We have not detected a SELECT statement
543 |             if mode == 0:
544 |                 if token_type in Keyword and value == 'SELECT':
545 |                     mode = 1
546 | 
547 |             # We have detected a SELECT statement
548 |             elif mode == 1:
549 |                 if value == 'FROM':
550 |                     if oldValue:
551 |                         yield oldValue
552 | 
553 |                     mode = 3    # Columns have been checked
554 | 
555 |                 elif value == 'AS':
556 |                     oldValue = ""
557 |                     mode = 2
558 | 
559 |                 elif (token_type == Punctuation
560 |                       and value == ',' and not parenthesis):
561 |                     if oldValue:
562 |                         yield oldValue
563 |                     oldValue = ""
564 | 
565 |                 elif token_type not in Whitespace:
566 |                     if value == '(':
567 |                         parenthesis += 1
568 |                     elif value == ')':
569 |                         parenthesis -= 1
570 | 
571 |                     oldValue += value
572 | 
573 |             # We are processing an AS keyword
574 |             elif mode == 2:
575 |                 # We check also for Keywords because a bug in SQLParse
576 |                 if token_type == Name or token_type == Keyword:
577 |                     yield value
578 |                     mode = 1
579 | 
580 | 
581 | # ---------------------------
582 | # postprocess
583 | 
584 | class SerializerUnicode:
585 | 
586 |     def process(self, stack, stmt):
587 |         raw = str(stmt)
588 |         lines = split_unquoted_newlines(raw)
589 |         res = '\n'.join(line.rstrip() for line in lines)
590 |         return res
591 | 
592 | 
593 | def Tokens2Unicode(stream):
594 |     result = ""
595 | 
596 |     for _, value in stream:
597 |         result += str(value)
598 | 
599 |     return result
600 | 
601 | 
602 | class OutputFilter:
603 |     varname_prefix = ''
604 | 
605 |     def __init__(self, varname='sql'):
606 |         self.varname = self.varname_prefix + varname
607 |         self.count = 0
608 | 
609 |     def _process(self, stream, varname, has_nl):
610 |         raise NotImplementedError
611 | 
612 |     def process(self, stack, stmt):
613 |         self.count += 1
614 |         if self.count > 1:
615 |             varname = '%s%d' % (self.varname, self.count)
616 |         else:
617 |             varname = self.varname
618 | 
619 |         has_nl = len(str(stmt).strip().splitlines()) > 1
620 |         stmt.tokens = self._process(stmt.tokens, varname, has_nl)
621 |         return stmt
622 | 
623 | 
624 | class OutputPythonFilter(OutputFilter):
625 |     def _process(self, stream, varname, has_nl):
626 |         # SQL query asignation to varname
627 |         if self.count > 1:
628 |             yield sql.Token(T.Whitespace, '\n')
629 |         yield sql.Token(T.Name, varname)
630 |         yield sql.Token(T.Whitespace, ' ')
631 |         yield sql.Token(T.Operator, '=')
632 |         yield sql.Token(T.Whitespace, ' ')
633 |         if has_nl:
634 |             yield sql.Token(T.Operator, '(')
635 |         yield sql.Token(T.Text, "'")
636 | 
637 |         # Print the tokens on the quote
638 |         for token in stream:
639 |             # Token is a new line separator
640 |             if token.is_whitespace() and '\n' in token.value:
641 |                 # Close quote and add a new line
642 |                 yield sql.Token(T.Text, " '")
643 |                 yield sql.Token(T.Whitespace, '\n')
644 | 
645 |                 # Quote header on secondary lines
646 |                 yield sql.Token(T.Whitespace, ' ' * (len(varname) + 4))
647 |                 yield sql.Token(T.Text, "'")
648 | 
649 |                 # Indentation
650 |                 after_lb = token.value.split('\n', 1)[1]
651 |                 if after_lb:
652 |                     yield sql.Token(T.Whitespace, after_lb)
653 |                 continue
654 | 
655 |             # Token has escape chars
656 |             elif "'" in token.value:
657 |                 token.value = token.value.replace("'", "\\'")
658 | 
659 |             # Put the token
660 |             yield sql.Token(T.Text, token.value)
661 | 
662 |         # Close quote
663 |         yield sql.Token(T.Text, "'")
664 |         if has_nl:
665 |             yield sql.Token(T.Operator, ')')
666 | 
667 | 
668 | class OutputPHPFilter(OutputFilter):
669 |     varname_prefix = '$'
670 | 
671 |     def _process(self, stream, varname, has_nl):
672 |         # SQL query asignation to varname (quote header)
673 |         if self.count > 1:
674 |             yield sql.Token(T.Whitespace, '\n')
675 |         yield sql.Token(T.Name, varname)
676 |         yield sql.Token(T.Whitespace, ' ')
677 |         if has_nl:
678 |             yield sql.Token(T.Whitespace, ' ')
679 |         yield sql.Token(T.Operator, '=')
680 |         yield sql.Token(T.Whitespace, ' ')
681 |         yield sql.Token(T.Text, '"')
682 | 
683 |         # Print the tokens on the quote
684 |         for token in stream:
685 |             # Token is a new line separator
686 |             if token.is_whitespace() and '\n' in token.value:
687 |                 # Close quote and add a new line
688 |                 yield sql.Token(T.Text, ' ";')
689 |                 yield sql.Token(T.Whitespace, '\n')
690 | 
691 |                 # Quote header on secondary lines
692 |                 yield sql.Token(T.Name, varname)
693 |                 yield sql.Token(T.Whitespace, ' ')
694 |                 yield sql.Token(T.Operator, '.=')
695 |                 yield sql.Token(T.Whitespace, ' ')
696 |                 yield sql.Token(T.Text, '"')
697 | 
698 |                 # Indentation
699 |                 after_lb = token.value.split('\n', 1)[1]
700 |                 if after_lb:
701 |                     yield sql.Token(T.Whitespace, after_lb)
702 |                 continue
703 | 
704 |             # Token has escape chars
705 |             elif '"' in token.value:
706 |                 token.value = token.value.replace('"', '\\"')
707 | 
708 |             # Put the token
709 |             yield sql.Token(T.Text, token.value)
710 | 
711 |         # Close quote
712 |         yield sql.Token(T.Text, '"')
713 |         yield sql.Token(T.Punctuation, ';')
714 | 
715 | 
716 | class Limit:
717 |     """Get the LIMIT of a query.
718 | 
719 |     If not defined, return -1 (SQL specification for no LIMIT query)
720 |     """
721 |     def process(self, stack, stream):
722 |         index = 7
723 |         stream = list(stream)
724 |         stream.reverse()
725 | 
726 |         # Run over all tokens in the stream from the end
727 |         for token_type, value in stream:
728 |             index -= 1
729 | 
730 | #            if index and token_type in Keyword:
731 |             if index and token_type in Keyword and value == 'LIMIT':
732 |                 return stream[4 - index][1]
733 | 
734 |         return -1
735 | 
736 | 
737 | def compact(stream):
738 |     """Function that return a compacted version of the stream"""
739 |     pipe = Pipeline()
740 | 
741 |     pipe.append(StripComments())
742 |     pipe.append(StripWhitespace)
743 | 
744 |     return pipe(stream)
745 | 


--------------------------------------------------------------------------------
/sqlparse/formatter.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
  2 | #
  3 | # This module is part of python-sqlparse and is released under
  4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php.
  5 | 
  6 | """SQL formatter"""
  7 | 
  8 | from sqlparse import filters
  9 | from sqlparse.exceptions import SQLParseError
 10 | 
 11 | 
 12 | def validate_options(options):
 13 |     """Validates options."""
 14 |     kwcase = options.get('keyword_case', None)
 15 |     if kwcase not in [None, 'upper', 'lower', 'capitalize']:
 16 |         raise SQLParseError('Invalid value for keyword_case: %r' % kwcase)
 17 | 
 18 |     idcase = options.get('identifier_case', None)
 19 |     if idcase not in [None, 'upper', 'lower', 'capitalize']:
 20 |         raise SQLParseError('Invalid value for identifier_case: %r' % idcase)
 21 | 
 22 |     ofrmt = options.get('output_format', None)
 23 |     if ofrmt not in [None, 'sql', 'python', 'php']:
 24 |         raise SQLParseError('Unknown output format: %r' % ofrmt)
 25 | 
 26 |     strip_comments = options.get('strip_comments', False)
 27 |     if strip_comments not in [True, False]:
 28 |         raise SQLParseError('Invalid value for strip_comments: %r'
 29 |                             % strip_comments)
 30 | 
 31 |     strip_ws = options.get('strip_whitespace', False)
 32 |     if strip_ws not in [True, False]:
 33 |         raise SQLParseError('Invalid value for strip_whitespace: %r'
 34 |                             % strip_ws)
 35 | 
 36 |     truncate_strings = options.get('truncate_strings', None)
 37 |     if truncate_strings is not None:
 38 |         try:
 39 |             truncate_strings = int(truncate_strings)
 40 |         except (ValueError, TypeError):
 41 |             raise SQLParseError('Invalid value for truncate_strings: %r'
 42 |                                 % truncate_strings)
 43 |         if truncate_strings <= 1:
 44 |             raise SQLParseError('Invalid value for truncate_strings: %r'
 45 |                                 % truncate_strings)
 46 |         options['truncate_strings'] = truncate_strings
 47 |         options['truncate_char'] = options.get('truncate_char', '[...]')
 48 | 
 49 |     reindent = options.get('reindent', False)
 50 |     if reindent not in [True, False]:
 51 |         raise SQLParseError('Invalid value for reindent: %r'
 52 |                             % reindent)
 53 |     elif reindent:
 54 |         options['strip_whitespace'] = True
 55 |     indent_tabs = options.get('indent_tabs', False)
 56 |     if indent_tabs not in [True, False]:
 57 |         raise SQLParseError('Invalid value for indent_tabs: %r' % indent_tabs)
 58 |     elif indent_tabs:
 59 |         options['indent_char'] = '\t'
 60 |     else:
 61 |         options['indent_char'] = ' '
 62 |     indent_width = options.get('indent_width', 2)
 63 |     try:
 64 |         indent_width = int(indent_width)
 65 |     except (TypeError, ValueError):
 66 |         raise SQLParseError('indent_width requires an integer')
 67 |     if indent_width < 1:
 68 |         raise SQLParseError('indent_width requires an positive integer')
 69 |     options['indent_width'] = indent_width
 70 | 
 71 |     right_margin = options.get('right_margin', None)
 72 |     if right_margin is not None:
 73 |         try:
 74 |             right_margin = int(right_margin)
 75 |         except (TypeError, ValueError):
 76 |             raise SQLParseError('right_margin requires an integer')
 77 |         if right_margin < 10:
 78 |             raise SQLParseError('right_margin requires an integer > 10')
 79 |     options['right_margin'] = right_margin
 80 | 
 81 |     return options
 82 | 
 83 | 
 84 | def build_filter_stack(stack, options):
 85 |     """Setup and return a filter stack.
 86 | 
 87 |     Args:
 88 |       stack: :class:`~sqlparse.filters.FilterStack` instance
 89 |       options: Dictionary with options validated by validate_options.
 90 |     """
 91 |     # Token filter
 92 |     if options.get('keyword_case', None):
 93 |         stack.preprocess.append(
 94 |             filters.KeywordCaseFilter(options['keyword_case']))
 95 | 
 96 |     if options.get('identifier_case', None):
 97 |         stack.preprocess.append(
 98 |             filters.IdentifierCaseFilter(options['identifier_case']))
 99 | 
100 |     if options.get('truncate_strings', None) is not None:
101 |         stack.preprocess.append(filters.TruncateStringFilter(
102 |             width=options['truncate_strings'], char=options['truncate_char']))
103 | 
104 |     # After grouping
105 |     if options.get('strip_comments', False):
106 |         stack.enable_grouping()
107 |         stack.stmtprocess.append(filters.StripCommentsFilter())
108 | 
109 |     if (options.get('strip_whitespace', False)
110 |         or options.get('reindent', False)):
111 |         stack.enable_grouping()
112 |         stack.stmtprocess.append(filters.StripWhitespaceFilter())
113 | 
114 |     if options.get('reindent', False):
115 |         stack.enable_grouping()
116 |         stack.stmtprocess.append(
117 |             filters.ReindentFilter(char=options['indent_char'],
118 |                                    width=options['indent_width']))
119 | 
120 |     if options.get('right_margin', False):
121 |         stack.enable_grouping()
122 |         stack.stmtprocess.append(
123 |             filters.RightMarginFilter(width=options['right_margin']))
124 | 
125 |     # Serializer
126 |     if options.get('output_format'):
127 |         frmt = options['output_format']
128 |         if frmt.lower() == 'php':
129 |             fltr = filters.OutputPHPFilter()
130 |         elif frmt.lower() == 'python':
131 |             fltr = filters.OutputPythonFilter()
132 |         else:
133 |             fltr = None
134 |         if fltr is not None:
135 |             stack.postprocess.append(fltr)
136 | 
137 |     return stack
138 | 


--------------------------------------------------------------------------------
/sqlparse/functions.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Created on 17/05/2012
 3 | 
 4 | @author: piranna
 5 | 
 6 | Several utility functions to extract info from the SQL sentences
 7 | '''
 8 | 
 9 | from sqlparse.filters import ColumnsSelect, Limit
10 | from sqlparse.pipeline import Pipeline
11 | from sqlparse.tokens import Keyword, Whitespace
12 | 
13 | 
14 | def getlimit(stream):
15 |     """Function that return the LIMIT of a input SQL """
16 |     pipe = Pipeline()
17 | 
18 |     pipe.append(Limit())
19 | 
20 |     result = pipe(stream)
21 |     try:
22 |         return int(result)
23 |     except ValueError:
24 |         return result
25 | 
26 | 
27 | def getcolumns(stream):
28 |     """Function that return the colums of a SELECT query"""
29 |     pipe = Pipeline()
30 | 
31 |     pipe.append(ColumnsSelect())
32 | 
33 |     return pipe(stream)
34 | 
35 | 
36 | class IsType(object):
37 |     """Functor that return is the statement is of a specific type"""
38 |     def __init__(self, type):
39 |         self.type = type
40 | 
41 |     def __call__(self, stream):
42 |         for token_type, value in stream:
43 |             if token_type not in Whitespace:
44 |                 return token_type in Keyword and value == self.type
45 | 


--------------------------------------------------------------------------------
/sqlparse/keywords.py:
--------------------------------------------------------------------------------
  1 | from sqlparse import tokens
  2 | 
  3 | KEYWORDS = {
  4 |     'ABORT': tokens.Keyword,
  5 |     'ABS': tokens.Keyword,
  6 |     'ABSOLUTE': tokens.Keyword,
  7 |     'ACCESS': tokens.Keyword,
  8 |     'ADA': tokens.Keyword,
  9 |     'ADD': tokens.Keyword,
 10 |     'ADMIN': tokens.Keyword,
 11 |     'AFTER': tokens.Keyword,
 12 |     'AGGREGATE': tokens.Keyword,
 13 |     'ALIAS': tokens.Keyword,
 14 |     'ALL': tokens.Keyword,
 15 |     'ALLOCATE': tokens.Keyword,
 16 |     'ANALYSE': tokens.Keyword,
 17 |     'ANALYZE': tokens.Keyword,
 18 |     'ANY': tokens.Keyword,
 19 |     'ARE': tokens.Keyword,
 20 |     'ASC': tokens.Keyword.Order,
 21 |     'ASENSITIVE': tokens.Keyword,
 22 |     'ASSERTION': tokens.Keyword,
 23 |     'ASSIGNMENT': tokens.Keyword,
 24 |     'ASYMMETRIC': tokens.Keyword,
 25 |     'AT': tokens.Keyword,
 26 |     'ATOMIC': tokens.Keyword,
 27 |     'AUTHORIZATION': tokens.Keyword,
 28 |     'AVG': tokens.Keyword,
 29 | 
 30 |     'BACKWARD': tokens.Keyword,
 31 |     'BEFORE': tokens.Keyword,
 32 |     'BEGIN': tokens.Keyword,
 33 |     'BETWEEN': tokens.Keyword,
 34 |     'BITVAR': tokens.Keyword,
 35 |     'BIT_LENGTH': tokens.Keyword,
 36 |     'BOTH': tokens.Keyword,
 37 |     'BREADTH': tokens.Keyword,
 38 | 
 39 |     # 'C': tokens.Keyword,  # most likely this is an alias
 40 |     'CACHE': tokens.Keyword,
 41 |     'CALL': tokens.Keyword,
 42 |     'CALLED': tokens.Keyword,
 43 |     'CARDINALITY': tokens.Keyword,
 44 |     'CASCADE': tokens.Keyword,
 45 |     'CASCADED': tokens.Keyword,
 46 |     'CAST': tokens.Keyword,
 47 |     'CATALOG': tokens.Keyword,
 48 |     'CATALOG_NAME': tokens.Keyword,
 49 |     'CHAIN': tokens.Keyword,
 50 |     'CHARACTERISTICS': tokens.Keyword,
 51 |     'CHARACTER_LENGTH': tokens.Keyword,
 52 |     'CHARACTER_SET_CATALOG': tokens.Keyword,
 53 |     'CHARACTER_SET_NAME': tokens.Keyword,
 54 |     'CHARACTER_SET_SCHEMA': tokens.Keyword,
 55 |     'CHAR_LENGTH': tokens.Keyword,
 56 |     'CHECK': tokens.Keyword,
 57 |     'CHECKED': tokens.Keyword,
 58 |     'CHECKPOINT': tokens.Keyword,
 59 |     'CLASS': tokens.Keyword,
 60 |     'CLASS_ORIGIN': tokens.Keyword,
 61 |     'CLOB': tokens.Keyword,
 62 |     'CLOSE': tokens.Keyword,
 63 |     'CLUSTER': tokens.Keyword,
 64 |     'COALESCE': tokens.Keyword,
 65 |     'COBOL': tokens.Keyword,
 66 |     'COLLATE': tokens.Keyword,
 67 |     'COLLATION': tokens.Keyword,
 68 |     'COLLATION_CATALOG': tokens.Keyword,
 69 |     'COLLATION_NAME': tokens.Keyword,
 70 |     'COLLATION_SCHEMA': tokens.Keyword,
 71 |     'COLLECT': tokens.Keyword,
 72 |     'COLUMN': tokens.Keyword,
 73 |     'COLUMN_NAME': tokens.Keyword,
 74 |     'COMMAND_FUNCTION': tokens.Keyword,
 75 |     'COMMAND_FUNCTION_CODE': tokens.Keyword,
 76 |     'COMMENT': tokens.Keyword,
 77 |     'COMMIT': tokens.Keyword.DML,
 78 |     'COMMITTED': tokens.Keyword,
 79 |     'COMPLETION': tokens.Keyword,
 80 |     'CONDITION_NUMBER': tokens.Keyword,
 81 |     'CONNECT': tokens.Keyword,
 82 |     'CONNECTION': tokens.Keyword,
 83 |     'CONNECTION_NAME': tokens.Keyword,
 84 |     'CONSTRAINT': tokens.Keyword,
 85 |     'CONSTRAINTS': tokens.Keyword,
 86 |     'CONSTRAINT_CATALOG': tokens.Keyword,
 87 |     'CONSTRAINT_NAME': tokens.Keyword,
 88 |     'CONSTRAINT_SCHEMA': tokens.Keyword,
 89 |     'CONSTRUCTOR': tokens.Keyword,
 90 |     'CONTAINS': tokens.Keyword,
 91 |     'CONTINUE': tokens.Keyword,
 92 |     'CONVERSION': tokens.Keyword,
 93 |     'CONVERT': tokens.Keyword,
 94 |     'COPY': tokens.Keyword,
 95 |     'CORRESPONTING': tokens.Keyword,
 96 |     'COUNT': tokens.Keyword,
 97 |     'CREATEDB': tokens.Keyword,
 98 |     'CREATEUSER': tokens.Keyword,
 99 |     'CROSS': tokens.Keyword,
100 |     'CUBE': tokens.Keyword,
101 |     'CURRENT': tokens.Keyword,
102 |     'CURRENT_DATE': tokens.Keyword,
103 |     'CURRENT_PATH': tokens.Keyword,
104 |     'CURRENT_ROLE': tokens.Keyword,
105 |     'CURRENT_TIME': tokens.Keyword,
106 |     'CURRENT_TIMESTAMP': tokens.Keyword,
107 |     'CURRENT_USER': tokens.Keyword,
108 |     'CURSOR': tokens.Keyword,
109 |     'CURSOR_NAME': tokens.Keyword,
110 |     'CYCLE': tokens.Keyword,
111 | 
112 |     'DATA': tokens.Keyword,
113 |     'DATABASE': tokens.Keyword,
114 |     'DATETIME_INTERVAL_CODE': tokens.Keyword,
115 |     'DATETIME_INTERVAL_PRECISION': tokens.Keyword,
116 |     'DAY': tokens.Keyword,
117 |     'DEALLOCATE': tokens.Keyword,
118 |     'DECLARE': tokens.Keyword,
119 |     'DEFAULT': tokens.Keyword,
120 |     'DEFAULTS': tokens.Keyword,
121 |     'DEFERRABLE': tokens.Keyword,
122 |     'DEFERRED': tokens.Keyword,
123 |     'DEFINED': tokens.Keyword,
124 |     'DEFINER': tokens.Keyword,
125 |     'DELIMITER': tokens.Keyword,
126 |     'DELIMITERS': tokens.Keyword,
127 |     'DEREF': tokens.Keyword,
128 |     'DESC': tokens.Keyword.Order,
129 |     'DESCRIBE': tokens.Keyword,
130 |     'DESCRIPTOR': tokens.Keyword,
131 |     'DESTROY': tokens.Keyword,
132 |     'DESTRUCTOR': tokens.Keyword,
133 |     'DETERMINISTIC': tokens.Keyword,
134 |     'DIAGNOSTICS': tokens.Keyword,
135 |     'DICTIONARY': tokens.Keyword,
136 |     'DISCONNECT': tokens.Keyword,
137 |     'DISPATCH': tokens.Keyword,
138 |     'DO': tokens.Keyword,
139 |     'DOMAIN': tokens.Keyword,
140 |     'DYNAMIC': tokens.Keyword,
141 |     'DYNAMIC_FUNCTION': tokens.Keyword,
142 |     'DYNAMIC_FUNCTION_CODE': tokens.Keyword,
143 | 
144 |     'EACH': tokens.Keyword,
145 |     'ENCODING': tokens.Keyword,
146 |     'ENCRYPTED': tokens.Keyword,
147 |     'END-EXEC': tokens.Keyword,
148 |     'EQUALS': tokens.Keyword,
149 |     'ESCAPE': tokens.Keyword,
150 |     'EVERY': tokens.Keyword,
151 |     'EXCEPT': tokens.Keyword,
152 |     'ESCEPTION': tokens.Keyword,
153 |     'EXCLUDING': tokens.Keyword,
154 |     'EXCLUSIVE': tokens.Keyword,
155 |     'EXEC': tokens.Keyword,
156 |     'EXECUTE': tokens.Keyword,
157 |     'EXISTING': tokens.Keyword,
158 |     'EXISTS': tokens.Keyword,
159 |     'EXTERNAL': tokens.Keyword,
160 |     'EXTRACT': tokens.Keyword,
161 | 
162 |     'FALSE': tokens.Keyword,
163 |     'FETCH': tokens.Keyword,
164 |     'FINAL': tokens.Keyword,
165 |     'FIRST': tokens.Keyword,
166 |     'FORCE': tokens.Keyword,
167 |     'FOREACH': tokens.Keyword,
168 |     'FOREIGN': tokens.Keyword,
169 |     'FORTRAN': tokens.Keyword,
170 |     'FORWARD': tokens.Keyword,
171 |     'FOUND': tokens.Keyword,
172 |     'FREE': tokens.Keyword,
173 |     'FREEZE': tokens.Keyword,
174 |     'FULL': tokens.Keyword,
175 |     'FUNCTION': tokens.Keyword,
176 | 
177 |     # 'G': tokens.Keyword,
178 |     'GENERAL': tokens.Keyword,
179 |     'GENERATED': tokens.Keyword,
180 |     'GET': tokens.Keyword,
181 |     'GLOBAL': tokens.Keyword,
182 |     'GO': tokens.Keyword,
183 |     'GOTO': tokens.Keyword,
184 |     'GRANT': tokens.Keyword,
185 |     'GRANTED': tokens.Keyword,
186 |     'GROUPING': tokens.Keyword,
187 | 
188 |     'HANDLER': tokens.Keyword,
189 |     'HAVING': tokens.Keyword,
190 |     'HIERARCHY': tokens.Keyword,
191 |     'HOLD': tokens.Keyword,
192 |     'HOST': tokens.Keyword,
193 | 
194 |     'IDENTITY': tokens.Keyword,
195 |     'IGNORE': tokens.Keyword,
196 |     'ILIKE': tokens.Keyword,
197 |     'IMMEDIATE': tokens.Keyword,
198 |     'IMMUTABLE': tokens.Keyword,
199 | 
200 |     'IMPLEMENTATION': tokens.Keyword,
201 |     'IMPLICIT': tokens.Keyword,
202 |     'INCLUDING': tokens.Keyword,
203 |     'INCREMENT': tokens.Keyword,
204 |     'INDEX': tokens.Keyword,
205 | 
206 |     'INDITCATOR': tokens.Keyword,
207 |     'INFIX': tokens.Keyword,
208 |     'INHERITS': tokens.Keyword,
209 |     'INITIALIZE': tokens.Keyword,
210 |     'INITIALLY': tokens.Keyword,
211 |     'INOUT': tokens.Keyword,
212 |     'INPUT': tokens.Keyword,
213 |     'INSENSITIVE': tokens.Keyword,
214 |     'INSTANTIABLE': tokens.Keyword,
215 |     'INSTEAD': tokens.Keyword,
216 |     'INTERSECT': tokens.Keyword,
217 |     'INTO': tokens.Keyword,
218 |     'INVOKER': tokens.Keyword,
219 |     'IS': tokens.Keyword,
220 |     'ISNULL': tokens.Keyword,
221 |     'ISOLATION': tokens.Keyword,
222 |     'ITERATE': tokens.Keyword,
223 | 
224 |     # 'K': tokens.Keyword,
225 |     'KEY': tokens.Keyword,
226 |     'KEY_MEMBER': tokens.Keyword,
227 |     'KEY_TYPE': tokens.Keyword,
228 | 
229 |     'LANCOMPILER': tokens.Keyword,
230 |     'LANGUAGE': tokens.Keyword,
231 |     'LARGE': tokens.Keyword,
232 |     'LAST': tokens.Keyword,
233 |     'LATERAL': tokens.Keyword,
234 |     'LEADING': tokens.Keyword,
235 |     'LENGTH': tokens.Keyword,
236 |     'LESS': tokens.Keyword,
237 |     'LEVEL': tokens.Keyword,
238 |     'LIMIT': tokens.Keyword,
239 |     'LISTEN': tokens.Keyword,
240 |     'LOAD': tokens.Keyword,
241 |     'LOCAL': tokens.Keyword,
242 |     'LOCALTIME': tokens.Keyword,
243 |     'LOCALTIMESTAMP': tokens.Keyword,
244 |     'LOCATION': tokens.Keyword,
245 |     'LOCATOR': tokens.Keyword,
246 |     'LOCK': tokens.Keyword,
247 |     'LOWER': tokens.Keyword,
248 | 
249 |     # 'M': tokens.Keyword,
250 |     'MAP': tokens.Keyword,
251 |     'MATCH': tokens.Keyword,
252 |     'MAXVALUE': tokens.Keyword,
253 |     'MESSAGE_LENGTH': tokens.Keyword,
254 |     'MESSAGE_OCTET_LENGTH': tokens.Keyword,
255 |     'MESSAGE_TEXT': tokens.Keyword,
256 |     'METHOD': tokens.Keyword,
257 |     'MINUTE': tokens.Keyword,
258 |     'MINVALUE': tokens.Keyword,
259 |     'MOD': tokens.Keyword,
260 |     'MODE': tokens.Keyword,
261 |     'MODIFIES': tokens.Keyword,
262 |     'MODIFY': tokens.Keyword,
263 |     'MONTH': tokens.Keyword,
264 |     'MORE': tokens.Keyword,
265 |     'MOVE': tokens.Keyword,
266 |     'MUMPS': tokens.Keyword,
267 | 
268 |     'NAMES': tokens.Keyword,
269 |     'NATIONAL': tokens.Keyword,
270 |     'NATURAL': tokens.Keyword,
271 |     'NCHAR': tokens.Keyword,
272 |     'NCLOB': tokens.Keyword,
273 |     'NEW': tokens.Keyword,
274 |     'NEXT': tokens.Keyword,
275 |     'NO': tokens.Keyword,
276 |     'NOCREATEDB': tokens.Keyword,
277 |     'NOCREATEUSER': tokens.Keyword,
278 |     'NONE': tokens.Keyword,
279 |     'NOT': tokens.Keyword,
280 |     'NOTHING': tokens.Keyword,
281 |     'NOTIFY': tokens.Keyword,
282 |     'NOTNULL': tokens.Keyword,
283 |     'NULL': tokens.Keyword,
284 |     'NULLABLE': tokens.Keyword,
285 |     'NULLIF': tokens.Keyword,
286 | 
287 |     'OBJECT': tokens.Keyword,
288 |     'OCTET_LENGTH': tokens.Keyword,
289 |     'OF': tokens.Keyword,
290 |     'OFF': tokens.Keyword,
291 |     'OFFSET': tokens.Keyword,
292 |     'OIDS': tokens.Keyword,
293 |     'OLD': tokens.Keyword,
294 |     'ONLY': tokens.Keyword,
295 |     'OPEN': tokens.Keyword,
296 |     'OPERATION': tokens.Keyword,
297 |     'OPERATOR': tokens.Keyword,
298 |     'OPTION': tokens.Keyword,
299 |     'OPTIONS': tokens.Keyword,
300 |     'ORDINALITY': tokens.Keyword,
301 |     'OUT': tokens.Keyword,
302 |     'OUTPUT': tokens.Keyword,
303 |     'OVERLAPS': tokens.Keyword,
304 |     'OVERLAY': tokens.Keyword,
305 |     'OVERRIDING': tokens.Keyword,
306 |     'OWNER': tokens.Keyword,
307 | 
308 |     'PAD': tokens.Keyword,
309 |     'PARAMETER': tokens.Keyword,
310 |     'PARAMETERS': tokens.Keyword,
311 |     'PARAMETER_MODE': tokens.Keyword,
312 |     'PARAMATER_NAME': tokens.Keyword,
313 |     'PARAMATER_ORDINAL_POSITION': tokens.Keyword,
314 |     'PARAMETER_SPECIFIC_CATALOG': tokens.Keyword,
315 |     'PARAMETER_SPECIFIC_NAME': tokens.Keyword,
316 |     'PARAMATER_SPECIFIC_SCHEMA': tokens.Keyword,
317 |     'PARTIAL': tokens.Keyword,
318 |     'PASCAL': tokens.Keyword,
319 |     'PENDANT': tokens.Keyword,
320 |     'PLACING': tokens.Keyword,
321 |     'PLI': tokens.Keyword,
322 |     'POSITION': tokens.Keyword,
323 |     'POSTFIX': tokens.Keyword,
324 |     'PRECISION': tokens.Keyword,
325 |     'PREFIX': tokens.Keyword,
326 |     'PREORDER': tokens.Keyword,
327 |     'PREPARE': tokens.Keyword,
328 |     'PRESERVE': tokens.Keyword,
329 |     'PRIMARY': tokens.Keyword,
330 |     'PRIOR': tokens.Keyword,
331 |     'PRIVILEGES': tokens.Keyword,
332 |     'PROCEDURAL': tokens.Keyword,
333 |     'PROCEDURE': tokens.Keyword,
334 |     'PUBLIC': tokens.Keyword,
335 | 
336 |     'RAISE': tokens.Keyword,
337 |     'READ': tokens.Keyword,
338 |     'READS': tokens.Keyword,
339 |     'RECHECK': tokens.Keyword,
340 |     'RECURSIVE': tokens.Keyword,
341 |     'REF': tokens.Keyword,
342 |     'REFERENCES': tokens.Keyword,
343 |     'REFERENCING': tokens.Keyword,
344 |     'REINDEX': tokens.Keyword,
345 |     'RELATIVE': tokens.Keyword,
346 |     'RENAME': tokens.Keyword,
347 |     'REPEATABLE': tokens.Keyword,
348 |     'RESET': tokens.Keyword,
349 |     'RESTART': tokens.Keyword,
350 |     'RESTRICT': tokens.Keyword,
351 |     'RESULT': tokens.Keyword,
352 |     'RETURN': tokens.Keyword,
353 |     'RETURNED_LENGTH': tokens.Keyword,
354 |     'RETURNED_OCTET_LENGTH': tokens.Keyword,
355 |     'RETURNED_SQLSTATE': tokens.Keyword,
356 |     'RETURNS': tokens.Keyword,
357 |     'REVOKE': tokens.Keyword,
358 |     'RIGHT': tokens.Keyword,
359 |     'ROLE': tokens.Keyword,
360 |     'ROLLBACK': tokens.Keyword.DML,
361 |     'ROLLUP': tokens.Keyword,
362 |     'ROUTINE': tokens.Keyword,
363 |     'ROUTINE_CATALOG': tokens.Keyword,
364 |     'ROUTINE_NAME': tokens.Keyword,
365 |     'ROUTINE_SCHEMA': tokens.Keyword,
366 |     'ROW': tokens.Keyword,
367 |     'ROWS': tokens.Keyword,
368 |     'ROW_COUNT': tokens.Keyword,
369 |     'RULE': tokens.Keyword,
370 | 
371 |     'SAVE_POINT': tokens.Keyword,
372 |     'SCALE': tokens.Keyword,
373 |     'SCHEMA': tokens.Keyword,
374 |     'SCHEMA_NAME': tokens.Keyword,
375 |     'SCOPE': tokens.Keyword,
376 |     'SCROLL': tokens.Keyword,
377 |     'SEARCH': tokens.Keyword,
378 |     'SECOND': tokens.Keyword,
379 |     'SECURITY': tokens.Keyword,
380 |     'SELF': tokens.Keyword,
381 |     'SENSITIVE': tokens.Keyword,
382 |     'SERIALIZABLE': tokens.Keyword,
383 |     'SERVER_NAME': tokens.Keyword,
384 |     'SESSION': tokens.Keyword,
385 |     'SESSION_USER': tokens.Keyword,
386 |     'SETOF': tokens.Keyword,
387 |     'SETS': tokens.Keyword,
388 |     'SHARE': tokens.Keyword,
389 |     'SHOW': tokens.Keyword,
390 |     'SIMILAR': tokens.Keyword,
391 |     'SIMPLE': tokens.Keyword,
392 |     'SIZE': tokens.Keyword,
393 |     'SOME': tokens.Keyword,
394 |     'SOURCE': tokens.Keyword,
395 |     'SPACE': tokens.Keyword,
396 |     'SPECIFIC': tokens.Keyword,
397 |     'SPECIFICTYPE': tokens.Keyword,
398 |     'SPECIFIC_NAME': tokens.Keyword,
399 |     'SQL': tokens.Keyword,
400 |     'SQLCODE': tokens.Keyword,
401 |     'SQLERROR': tokens.Keyword,
402 |     'SQLEXCEPTION': tokens.Keyword,
403 |     'SQLSTATE': tokens.Keyword,
404 |     'SQLWARNING': tokens.Keyword,
405 |     'STABLE': tokens.Keyword,
406 |     'START': tokens.Keyword.DML,
407 |     'STATE': tokens.Keyword,
408 |     'STATEMENT': tokens.Keyword,
409 |     'STATIC': tokens.Keyword,
410 |     'STATISTICS': tokens.Keyword,
411 |     'STDIN': tokens.Keyword,
412 |     'STDOUT': tokens.Keyword,
413 |     'STORAGE': tokens.Keyword,
414 |     'STRICT': tokens.Keyword,
415 |     'STRUCTURE': tokens.Keyword,
416 |     'STYPE': tokens.Keyword,
417 |     'SUBCLASS_ORIGIN': tokens.Keyword,
418 |     'SUBLIST': tokens.Keyword,
419 |     'SUBSTRING': tokens.Keyword,
420 |     'SUM': tokens.Keyword,
421 |     'SYMMETRIC': tokens.Keyword,
422 |     'SYSID': tokens.Keyword,
423 |     'SYSTEM': tokens.Keyword,
424 |     'SYSTEM_USER': tokens.Keyword,
425 | 
426 |     'TABLE': tokens.Keyword,
427 |     'TABLE_NAME': tokens.Keyword,
428 |     'TEMP': tokens.Keyword,
429 |     'TEMPLATE': tokens.Keyword,
430 |     'TEMPORARY': tokens.Keyword,
431 |     'TERMINATE': tokens.Keyword,
432 |     'THAN': tokens.Keyword,
433 |     'TIMESTAMP': tokens.Keyword,
434 |     'TIMEZONE_HOUR': tokens.Keyword,
435 |     'TIMEZONE_MINUTE': tokens.Keyword,
436 |     'TO': tokens.Keyword,
437 |     'TOAST': tokens.Keyword,
438 |     'TRAILING': tokens.Keyword,
439 |     'TRANSATION': tokens.Keyword,
440 |     'TRANSACTIONS_COMMITTED': tokens.Keyword,
441 |     'TRANSACTIONS_ROLLED_BACK': tokens.Keyword,
442 |     'TRANSATION_ACTIVE': tokens.Keyword,
443 |     'TRANSFORM': tokens.Keyword,
444 |     'TRANSFORMS': tokens.Keyword,
445 |     'TRANSLATE': tokens.Keyword,
446 |     'TRANSLATION': tokens.Keyword,
447 |     'TREAT': tokens.Keyword,
448 |     'TRIGGER': tokens.Keyword,
449 |     'TRIGGER_CATALOG': tokens.Keyword,
450 |     'TRIGGER_NAME': tokens.Keyword,
451 |     'TRIGGER_SCHEMA': tokens.Keyword,
452 |     'TRIM': tokens.Keyword,
453 |     'TRUE': tokens.Keyword,
454 |     'TRUNCATE': tokens.Keyword,
455 |     'TRUSTED': tokens.Keyword,
456 |     'TYPE': tokens.Keyword,
457 | 
458 |     'UNCOMMITTED': tokens.Keyword,
459 |     'UNDER': tokens.Keyword,
460 |     'UNENCRYPTED': tokens.Keyword,
461 |     'UNION': tokens.Keyword,
462 |     'UNIQUE': tokens.Keyword,
463 |     'UNKNOWN': tokens.Keyword,
464 |     'UNLISTEN': tokens.Keyword,
465 |     'UNNAMED': tokens.Keyword,
466 |     'UNNEST': tokens.Keyword,
467 |     'UNTIL': tokens.Keyword,
468 |     'UPPER': tokens.Keyword,
469 |     'USAGE': tokens.Keyword,
470 |     'USE': tokens.Keyword,
471 |     'USER': tokens.Keyword,
472 |     'USER_DEFINED_TYPE_CATALOG': tokens.Keyword,
473 |     'USER_DEFINED_TYPE_NAME': tokens.Keyword,
474 |     'USER_DEFINED_TYPE_SCHEMA': tokens.Keyword,
475 |     'USING': tokens.Keyword,
476 | 
477 |     'VACUUM': tokens.Keyword,
478 |     'VALID': tokens.Keyword,
479 |     'VALIDATOR': tokens.Keyword,
480 |     'VALUES': tokens.Keyword,
481 |     'VARIABLE': tokens.Keyword,
482 |     'VERBOSE': tokens.Keyword,
483 |     'VERSION': tokens.Keyword,
484 |     'VIEW': tokens.Keyword,
485 |     'VOLATILE': tokens.Keyword,
486 | 
487 |     'WHENEVER': tokens.Keyword,
488 |     'WITH': tokens.Keyword,
489 |     'WITHOUT': tokens.Keyword,
490 |     'WORK': tokens.Keyword,
491 |     'WRITE': tokens.Keyword,
492 | 
493 |     'YEAR': tokens.Keyword,
494 | 
495 |     'ZONE': tokens.Keyword,
496 | 
497 |     # Name.Builtin
498 |     'ARRAY': tokens.Name.Builtin,
499 |     'BIGINT': tokens.Name.Builtin,
500 |     'BINARY': tokens.Name.Builtin,
501 |     'BIT': tokens.Name.Builtin,
502 |     'BLOB': tokens.Name.Builtin,
503 |     'BOOLEAN': tokens.Name.Builtin,
504 |     'CHAR': tokens.Name.Builtin,
505 |     'CHARACTER': tokens.Name.Builtin,
506 |     'DATE': tokens.Name.Builtin,
507 |     'DEC': tokens.Name.Builtin,
508 |     'DECIMAL': tokens.Name.Builtin,
509 |     'FLOAT': tokens.Name.Builtin,
510 |     'INT': tokens.Name.Builtin,
511 |     'INT8': tokens.Name.Builtin,
512 |     'INTEGER': tokens.Name.Builtin,
513 |     'INTERVAL': tokens.Name.Builtin,
514 |     'LONG': tokens.Name.Builtin,
515 |     'NUMBER': tokens.Name.Builtin,
516 |     'NUMERIC': tokens.Name.Builtin,
517 |     'REAL': tokens.Name.Builtin,
518 |     'SERIAL': tokens.Name.Builtin,
519 |     'SERIAL8': tokens.Name.Builtin,
520 |     'SIGNED': tokens.Name.Builtin,
521 |     'SMALLINT': tokens.Name.Builtin,
522 |     'TEXT': tokens.Name.Builtin,
523 |     'TINYINT': tokens.Name.Builtin,
524 |     'UNSIGNED': tokens.Name.Builtin,
525 |     'VARCHAR': tokens.Name.Builtin,
526 |     'VARCHAR2': tokens.Name.Builtin,
527 |     'VARYING': tokens.Name.Builtin,
528 | }
529 | 
530 | 
531 | KEYWORDS_COMMON = {
532 |     'SELECT': tokens.Keyword.DML,
533 |     'INSERT': tokens.Keyword.DML,
534 |     'DELETE': tokens.Keyword.DML,
535 |     'UPDATE': tokens.Keyword.DML,
536 |     'REPLACE': tokens.Keyword.DML,
537 |     'MERGE': tokens.Keyword.DML,
538 |     'DROP': tokens.Keyword.DDL,
539 |     'CREATE': tokens.Keyword.DDL,
540 |     'ALTER': tokens.Keyword.DDL,
541 | 
542 |     'WHERE': tokens.Keyword,
543 |     'FROM': tokens.Keyword,
544 |     'INNER': tokens.Keyword,
545 |     'JOIN': tokens.Keyword,
546 |     'STRAIGHT_JOIN': tokens.Keyword,
547 |     'AND': tokens.Keyword,
548 |     'OR': tokens.Keyword,
549 |     'LIKE': tokens.Keyword,
550 |     'ON': tokens.Keyword,
551 |     'IN': tokens.Keyword,
552 |     'SET': tokens.Keyword,
553 | 
554 |     'BY': tokens.Keyword,
555 |     'GROUP': tokens.Keyword,
556 |     'ORDER': tokens.Keyword,
557 |     'LEFT': tokens.Keyword,
558 |     'OUTER': tokens.Keyword,
559 |     'FULL': tokens.Keyword,
560 | 
561 |     'IF': tokens.Keyword,
562 |     'END': tokens.Keyword,
563 |     'THEN': tokens.Keyword,
564 |     'LOOP': tokens.Keyword,
565 |     'AS': tokens.Keyword,
566 |     'ELSE': tokens.Keyword,
567 |     'FOR': tokens.Keyword,
568 | 
569 |     'CASE': tokens.Keyword,
570 |     'WHEN': tokens.Keyword,
571 |     'MIN': tokens.Keyword,
572 |     'MAX': tokens.Keyword,
573 |     'DISTINCT': tokens.Keyword,
574 | }
575 | 


--------------------------------------------------------------------------------
/sqlparse/lexer.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
  4 | #
  5 | # This module is part of python-sqlparse and is released under
  6 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php.
  7 | 
  8 | """SQL Lexer"""
  9 | 
 10 | # This code is based on the SqlLexer in pygments.
 11 | # http://pygments.org/
 12 | # It's separated from the rest of pygments to increase performance
 13 | # and to allow some customizations.
 14 | 
 15 | import re
 16 | import sys
 17 | 
 18 | from sqlparse import tokens
 19 | from sqlparse.keywords import KEYWORDS, KEYWORDS_COMMON
 20 | import collections
 21 | 
 22 | 
 23 | class include(str):
 24 |     pass
 25 | 
 26 | 
 27 | class combined(tuple):
 28 |     """Indicates a state combined from multiple states."""
 29 | 
 30 |     def __new__(cls, *args):
 31 |         return tuple.__new__(cls, args)
 32 | 
 33 |     def __init__(self, *args):
 34 |         # tuple.__init__ doesn't do anything
 35 |         pass
 36 | 
 37 | 
 38 | def is_keyword(value):
 39 |     test = value.upper()
 40 |     return KEYWORDS_COMMON.get(test, KEYWORDS.get(test, tokens.Name)), value
 41 | 
 42 | 
 43 | def apply_filters(stream, filters, lexer=None):
 44 |     """
 45 |     Use this method to apply an iterable of filters to
 46 |     a stream. If lexer is given it's forwarded to the
 47 |     filter, otherwise the filter receives `None`.
 48 |     """
 49 | 
 50 |     def _apply(filter_, stream):
 51 |         for token in filter_.filter(lexer, stream):
 52 |             yield token
 53 | 
 54 |     for filter_ in filters:
 55 |         stream = _apply(filter_, stream)
 56 |     return stream
 57 | 
 58 | 
 59 | class LexerMeta(type):
 60 |     """
 61 |     Metaclass for Lexer, creates the self._tokens attribute from
 62 |     self.tokens on the first instantiation.
 63 |     """
 64 | 
 65 |     def _process_state(cls, unprocessed, processed, state):
 66 |         assert type(state) is str, "wrong state name %r" % state
 67 |         assert state[0] != '#', "invalid state name %r" % state
 68 |         if state in processed:
 69 |             return processed[state]
 70 |         tokenlist = processed[state] = []
 71 |         rflags = cls.flags
 72 |         for tdef in unprocessed[state]:
 73 |             if isinstance(tdef, include):
 74 |                 # it's a state reference
 75 |                 assert tdef != state, "circular state reference %r" % state
 76 |                 tokenlist.extend(cls._process_state(
 77 |                     unprocessed, processed, str(tdef)))
 78 |                 continue
 79 | 
 80 |             assert type(tdef) is tuple, "wrong rule def %r" % tdef
 81 | 
 82 |             try:
 83 |                 rex = re.compile(tdef[0], rflags).match
 84 |             except Exception as err:
 85 |                 raise ValueError(("uncompilable regex %r in state"
 86 |                                   " %r of %r: %s"
 87 |                                   % (tdef[0], state, cls, err)))
 88 | 
 89 |             assert type(tdef[1]) is tokens._TokenType or isinstance(tdef[1], collections.Callable), \
 90 |                    ('token type must be simple type or callable, not %r'
 91 |                     % (tdef[1],))
 92 | 
 93 |             if len(tdef) == 2:
 94 |                 new_state = None
 95 |             else:
 96 |                 tdef2 = tdef[2]
 97 |                 if isinstance(tdef2, str):
 98 |                     # an existing state
 99 |                     if tdef2 == '#pop':
100 |                         new_state = -1
101 |                     elif tdef2 in unprocessed:
102 |                         new_state = (tdef2,)
103 |                     elif tdef2 == '#push':
104 |                         new_state = tdef2
105 |                     elif tdef2[:5] == '#pop:':
106 |                         new_state = -int(tdef2[5:])
107 |                     else:
108 |                         assert False, 'unknown new state %r' % tdef2
109 |                 elif isinstance(tdef2, combined):
110 |                     # combine a new state from existing ones
111 |                     new_state = '_tmp_%d' % cls._tmpname
112 |                     cls._tmpname += 1
113 |                     itokens = []
114 |                     for istate in tdef2:
115 |                         assert istate != state, \
116 |                                'circular state ref %r' % istate
117 |                         itokens.extend(cls._process_state(unprocessed,
118 |                                                           processed, istate))
119 |                     processed[new_state] = itokens
120 |                     new_state = (new_state,)
121 |                 elif isinstance(tdef2, tuple):
122 |                     # push more than one state
123 |                     for state in tdef2:
124 |                         assert (state in unprocessed or
125 |                                 state in ('#pop', '#push')), \
126 |                                'unknown new state ' + state
127 |                     new_state = tdef2
128 |                 else:
129 |                     assert False, 'unknown new state def %r' % tdef2
130 |             tokenlist.append((rex, tdef[1], new_state))
131 |         return tokenlist
132 | 
133 |     def process_tokendef(cls):
134 |         cls._all_tokens = {}
135 |         cls._tmpname = 0
136 |         processed = cls._all_tokens[cls.__name__] = {}
137 |         #tokendefs = tokendefs or cls.tokens[name]
138 |         for state in list(cls.tokens.keys()):
139 |             cls._process_state(cls.tokens, processed, state)
140 |         return processed
141 | 
142 |     def __call__(cls, *args, **kwds):
143 |         if not hasattr(cls, '_tokens'):
144 |             cls._all_tokens = {}
145 |             cls._tmpname = 0
146 |             if hasattr(cls, 'token_variants') and cls.token_variants:
147 |                 # don't process yet
148 |                 pass
149 |             else:
150 |                 cls._tokens = cls.process_tokendef()
151 | 
152 |         return type.__call__(cls, *args, **kwds)
153 | 
154 | # for jython metaclass
155 | LexerMetaHasVersion = LexerMeta("Lexer", (object, ), {"__doc__": LexerMeta.__doc__})
156 | 
157 | class Lexer(LexerMetaHasVersion):
158 | 
159 |     encoding = 'utf-8'
160 |     stripall = False
161 |     stripnl = False
162 |     tabsize = 0
163 |     flags = re.IGNORECASE | re.UNICODE
164 | 
165 |     tokens = {
166 |         'root': [
167 |             (r'(--|# ).*?(\r\n|\r|\n)', tokens.Comment.Single),
168 |             # $ matches *before* newline, therefore we have two patterns
169 |             # to match Comment.Single
170 |             (r'(--|# ).*?$', tokens.Comment.Single),
171 |             (r'(\r\n|\r|\n)', tokens.Newline),
172 |             (r'\s+', tokens.Whitespace),
173 |             (r'/\*', tokens.Comment.Multiline, 'multiline-comments'),
174 |             (r':=', tokens.Assignment),
175 |             (r'::', tokens.Punctuation),
176 |             (r'[*]', tokens.Wildcard),
177 |             (r'CASE\b', tokens.Keyword),  # extended CASE(foo)
178 |             (r"`(``|[^`])*`", tokens.Name),
179 |             (r"´(´´|[^´])*´", tokens.Name),
180 |             (r'\$([^\W\d]\w*)?\$', tokens.Name.Builtin),
181 |             (r'\?{1}', tokens.Name.Placeholder),
182 |             (r'%\(\w+\)s', tokens.Name.Placeholder),
183 |             (r'%s', tokens.Name.Placeholder),
184 |             (r'[$:?]\w+', tokens.Name.Placeholder),
185 |             # FIXME(andi): VALUES shouldn't be listed here
186 |             # see https://github.com/andialbrecht/sqlparse/pull/64
187 |             (r'VALUES', tokens.Keyword),
188 |             (r'(@|##|#)[^\W\d_]\w+', tokens.Name),
189 |             # IN is special, it may be followed by a parenthesis, but
190 |             # is never a functino, see issue183
191 |             (r'in\b(?=[ (])?', tokens.Keyword),
192 |             (r'[^\W\d_]\w*(?=[.(])', tokens.Name),  # see issue39
193 |             (r'[-]?0x[0-9a-fA-F]+', tokens.Number.Hexadecimal),
194 |             (r'[-]?[0-9]*(\.[0-9]+)?[eE][-]?[0-9]+', tokens.Number.Float),
195 |             (r'[-]?[0-9]*\.[0-9]+', tokens.Number.Float),
196 |             (r'[-]?[0-9]+', tokens.Number.Integer),
197 |             (r"'(''|\\\\|\\'|[^'])*'", tokens.String.Single),
198 |             # not a real string literal in ANSI SQL:
199 |             (r'(""|".*?[^\\]")', tokens.String.Symbol),
200 |             # sqlite names can be escaped with [square brackets]. left bracket
201 |             # cannot be preceded by word character or a right bracket --
202 |             # otherwise it's probably an array index
203 |             (r'(?<![\w\])])(\[[^\]]+\])', tokens.Name),
204 |             (r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?|(CROSS\s+|NATURAL\s+)?)?JOIN\b', tokens.Keyword),
205 |             (r'END(\s+IF|\s+LOOP)?\b', tokens.Keyword),
206 |             (r'NOT NULL\b', tokens.Keyword),
207 |             (r'CREATE(\s+OR\s+REPLACE)?\b', tokens.Keyword.DDL),
208 |             (r'DOUBLE\s+PRECISION\b', tokens.Name.Builtin),
209 |             (r'(?<=\.)[^\W\d_]\w*', tokens.Name),
210 |             (r'[^\W\d]\w*', is_keyword),
211 |             (r'[;:()\[\],\.]', tokens.Punctuation),
212 |             (r'[<>=~!]+', tokens.Operator.Comparison),
213 |             (r'[+/@#%^&|`?^-]+', tokens.Operator),
214 |         ],
215 |         'multiline-comments': [
216 |             (r'/\*', tokens.Comment.Multiline, 'multiline-comments'),
217 |             (r'\*/', tokens.Comment.Multiline, '#pop'),
218 |             (r'[^/\*]+', tokens.Comment.Multiline),
219 |             (r'[/*]', tokens.Comment.Multiline),
220 |         ]}
221 | 
222 |     def __init__(self):
223 |         self.filters = []
224 | 
225 |     def add_filter(self, filter_, **options):
226 |         from sqlparse.filters import Filter
227 |         if not isinstance(filter_, Filter):
228 |             filter_ = filter_(**options)
229 |         self.filters.append(filter_)
230 | 
231 |     def _decode(self, text):
232 |         if sys.version_info[0] == 3:
233 |             if isinstance(text, str):
234 |                 return text
235 |         # for jython
236 |         if isinstance(text, unicode):
237 |             return text
238 |         if self.encoding == 'guess':
239 |             try:
240 |                 text = text.decode('utf-8')
241 |                 if text.startswith('\ufeff'):
242 |                     text = text[len('\ufeff'):]
243 |             except UnicodeDecodeError:
244 |                 text = text.decode('latin1')
245 |         else:
246 |             try:
247 |                 text = text.decode(self.encoding)
248 |             except UnicodeDecodeError:
249 |                 text = text.decode('unicode-escape')
250 | 
251 |         if self.tabsize > 0:
252 |             text = text.expandtabs(self.tabsize)
253 |         return text
254 | 
255 |     def get_tokens(self, text, unfiltered=False):
256 |         """
257 |         Return an iterable of (tokentype, value) pairs generated from
258 |         `text`. If `unfiltered` is set to `True`, the filtering mechanism
259 |         is bypassed even if filters are defined.
260 | 
261 |         Also preprocess the text, i.e. expand tabs and strip it if
262 |         wanted and applies registered filters.
263 |         """
264 |         if isinstance(text, str):
265 |             if self.stripall:
266 |                 text = text.strip()
267 |             elif self.stripnl:
268 |                 text = text.strip('\n')
269 | 
270 |             if sys.version_info[0] < 3 and isinstance(text, str):
271 |                 # for jython
272 |                 import cStringIO
273 |                 text = cStringIO.StringIO(text)
274 |                 self.encoding = 'utf-8'
275 |             else:
276 |                 # for jython move import section
277 |                 from _io import StringIO
278 |                 text = StringIO(text)
279 | 
280 |         def streamer():
281 |             for i, t, v in self.get_tokens_unprocessed(text):
282 |                 # for jython encode
283 |                 if sys.version_info[0] < 3 and isinstance(v, unicode):
284 |                     v = v.encode('utf-8')
285 |                 yield t, v
286 |         stream = streamer()
287 |         if not unfiltered:
288 |             stream = apply_filters(stream, self.filters, self)
289 |         return stream
290 | 
291 |     def get_tokens_unprocessed(self, stream, stack=('root',)):
292 |         """
293 |         Split ``text`` into (tokentype, text) pairs.
294 | 
295 |         ``stack`` is the inital stack (default: ``['root']``)
296 |         """
297 |         pos = 0
298 |         tokendefs = self._tokens  # see __call__, pylint:disable=E1101
299 |         statestack = list(stack)
300 |         statetokens = tokendefs[statestack[-1]]
301 |         known_names = {}
302 | 
303 |         text = stream.read()
304 |         text = self._decode(text)
305 | 
306 |         while 1:
307 |             for rexmatch, action, new_state in statetokens:
308 |                 m = rexmatch(text, pos)
309 |                 if m:
310 |                     value = m.group()
311 |                     # bugfix -> change if order
312 |                     if type(action) is tokens._TokenType:
313 |                         yield pos, action, value
314 |                     elif value in known_names:
315 |                         yield pos, known_names[value], value
316 |                     elif hasattr(action, '__call__'):
317 |                         ttype, value = action(value)
318 |                         known_names[value] = ttype
319 |                         yield pos, ttype, value
320 |                     else:
321 |                         for item in action(self, m):
322 |                             yield item
323 |                     pos = m.end()
324 |                     if new_state is not None:
325 |                         # state transition
326 |                         if isinstance(new_state, tuple):
327 |                             for state in new_state:
328 |                                 if state == '#pop':
329 |                                     statestack.pop()
330 |                                 elif state == '#push':
331 |                                     statestack.append(statestack[-1])
332 |                                 elif (
333 |                                     # Ugly hack - multiline-comments
334 |                                     # are not stackable
335 |                                     state != 'multiline-comments'
336 |                                     or not statestack
337 |                                     or statestack[-1] != 'multiline-comments'
338 |                                 ):
339 |                                     statestack.append(state)
340 |                         elif isinstance(new_state, int):
341 |                             # pop
342 |                             del statestack[new_state:]
343 |                         elif new_state == '#push':
344 |                             statestack.append(statestack[-1])
345 |                         else:
346 |                             assert False, "wrong state def: %r" % new_state
347 |                         statetokens = tokendefs[statestack[-1]]
348 |                     break
349 |             else:
350 |                 try:
351 |                     if text[pos] == '\n':
352 |                         # at EOL, reset state to "root"
353 |                         pos += 1
354 |                         statestack = ['root']
355 |                         statetokens = tokendefs['root']
356 |                         yield pos, tokens.Text, '\n'
357 |                         continue
358 |                     yield pos, tokens.Error, text[pos]
359 |                     pos += 1
360 |                 except IndexError:
361 |                     break
362 | 
363 | 
364 | def tokenize(sql, encoding=None):
365 |     """Tokenize sql.
366 | 
367 |     Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream
368 |     of ``(token type, value)`` items.
369 |     """
370 |     lexer = Lexer()
371 |     if encoding is not None:
372 |         lexer.encoding = encoding
373 |     return lexer.get_tokens(sql)
374 | 


--------------------------------------------------------------------------------
/sqlparse/pipeline.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2011 Jesus Leganes "piranna", piranna@gmail.com
 2 | #
 3 | # This module is part of python-sqlparse and is released under
 4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php.
 5 | 
 6 | from types import GeneratorType
 7 | import collections
 8 | 
 9 | 
10 | class Pipeline(list):
11 |     """Pipeline to process filters sequentially"""
12 | 
13 |     def __call__(self, stream):
14 |         """Run the pipeline
15 | 
16 |         Return a static (non generator) version of the result
17 |         """
18 | 
19 |         # Run the stream over all the filters on the pipeline
20 |         for filter in self:
21 |             # Functions and callable objects (objects with '__call__' method)
22 |             if isinstance(filter, collections.Callable):
23 |                 stream = list(filter(stream))
24 | 
25 |             # Normal filters (objects with 'process' method)
26 |             else:
27 |                 stream = filter.process(None, stream)
28 | 
29 |         # If last filter return a generator, staticalize it inside a list
30 |         if isinstance(stream, GeneratorType):
31 |             return list(stream)
32 |         return stream
33 | 


--------------------------------------------------------------------------------
/sqlparse/sql.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """This module contains classes representing syntactical elements of SQL."""
  4 | 
  5 | import re
  6 | import sys
  7 | 
  8 | from sqlparse import tokens as T
  9 | 
 10 | 
 11 | class Token(object):
 12 |     """Base class for all other classes in this module.
 13 | 
 14 |     It represents a single token and has two instance attributes:
 15 |     ``value`` is the unchange value of the token and ``ttype`` is
 16 |     the type of the token.
 17 |     """
 18 | 
 19 |     __slots__ = ('value', 'ttype', 'parent', 'normalized', 'is_keyword')
 20 | 
 21 |     def __init__(self, ttype, value):
 22 |         self.value = value
 23 |         if ttype in T.Keyword:
 24 |             self.normalized = value.upper()
 25 |         else:
 26 |             self.normalized = value
 27 |         self.ttype = ttype
 28 |         self.is_keyword = ttype in T.Keyword
 29 |         self.parent = None
 30 | 
 31 |     def __str__(self):
 32 |         if sys.version_info[0] == 3:
 33 |             return self.value
 34 |         else:
 35 |             # for jython bug
 36 |             # return str(self).encode('utf-8'))
 37 |             return self.value
 38 | 
 39 |     def __repr__(self):
 40 |         short = self._get_repr_value()
 41 |         if sys.version_info[0] < 3:
 42 |             short = short.encode('utf-8')
 43 |         return '<%s \'%s\' at 0x%07x>' % (self._get_repr_name(),
 44 |                                           short, id(self))
 45 | 
 46 |     def __unicode__(self):
 47 |         """Returns a unicode representation of this object."""
 48 |         return self.value or ''
 49 | 
 50 |     def to_unicode(self):
 51 |         """Returns a unicode representation of this object.
 52 | 
 53 |         .. deprecated:: 0.1.5
 54 |            Use ``unicode(token)`` (for Python 3: ``str(token)``) instead.
 55 |         """
 56 |         return str(self)
 57 | 
 58 |     def _get_repr_name(self):
 59 |         return str(self.ttype).split('.')[-1]
 60 | 
 61 |     def _get_repr_value(self):
 62 |         raw = str(self)
 63 |         if len(raw) > 7:
 64 |             raw = raw[:6] + '...'
 65 |         return re.sub('\s+', ' ', raw)
 66 | 
 67 |     def flatten(self):
 68 |         """Resolve subgroups."""
 69 |         yield self
 70 | 
 71 |     def match(self, ttype, values, regex=False):
 72 |         """Checks whether the token matches the given arguments.
 73 | 
 74 |         *ttype* is a token type. If this token doesn't match the given token
 75 |         type.
 76 |         *values* is a list of possible values for this token. The values
 77 |         are OR'ed together so if only one of the values matches ``True``
 78 |         is returned. Except for keyword tokens the comparison is
 79 |         case-sensitive. For convenience it's ok to pass in a single string.
 80 |         If *regex* is ``True`` (default is ``False``) the given values are
 81 |         treated as regular expressions.
 82 |         """
 83 |         type_matched = self.ttype is ttype
 84 |         if not type_matched or values is None:
 85 |             return type_matched
 86 | 
 87 |         if regex:
 88 |             if isinstance(values, str):
 89 |                 values = set([values])
 90 | 
 91 |             if self.ttype is T.Keyword:
 92 |                 values = set(re.compile(v, re.IGNORECASE) for v in values)
 93 |             else:
 94 |                 values = set(re.compile(v) for v in values)
 95 | 
 96 |             for pattern in values:
 97 |                 if pattern.search(self.value):
 98 |                     return True
 99 |             return False
100 | 
101 |         if isinstance(values, str):
102 |             if self.is_keyword:
103 |                 return values.upper() == self.normalized
104 |             return values == self.value
105 | 
106 |         if self.is_keyword:
107 |             for v in values:
108 |                 if v.upper() == self.normalized:
109 |                     return True
110 |             return False
111 | 
112 |         return self.value in values
113 | 
114 |     def is_group(self):
115 |         """Returns ``True`` if this object has children."""
116 |         return False
117 | 
118 |     def is_whitespace(self):
119 |         """Return ``True`` if this token is a whitespace token."""
120 |         return self.ttype and self.ttype in T.Whitespace
121 | 
122 |     def within(self, group_cls):
123 |         """Returns ``True`` if this token is within *group_cls*.
124 | 
125 |         Use this method for example to check if an identifier is within
126 |         a function: ``t.within(sql.Function)``.
127 |         """
128 |         parent = self.parent
129 |         while parent:
130 |             if isinstance(parent, group_cls):
131 |                 return True
132 |             parent = parent.parent
133 |         return False
134 | 
135 |     def is_child_of(self, other):
136 |         """Returns ``True`` if this token is a direct child of *other*."""
137 |         return self.parent == other
138 | 
139 |     def has_ancestor(self, other):
140 |         """Returns ``True`` if *other* is in this tokens ancestry."""
141 |         parent = self.parent
142 |         while parent:
143 |             if parent == other:
144 |                 return True
145 |             parent = parent.parent
146 |         return False
147 | 
148 | 
149 | class TokenList(Token):
150 |     """A group of tokens.
151 | 
152 |     It has an additional instance attribute ``tokens`` which holds a
153 |     list of child-tokens.
154 |     """
155 | 
156 |     __slots__ = ('value', 'ttype', 'tokens')
157 | 
158 |     def __init__(self, tokens=None):
159 |         if tokens is None:
160 |             tokens = []
161 |         self.tokens = tokens
162 |         Token.__init__(self, None, self._to_string())
163 | 
164 |     def __unicode__(self):
165 |         return self._to_string()
166 | 
167 |     def __str__(self):
168 |         str_ = self._to_string()
169 |         if sys.version_info[0] < 2:
170 |             str_ = str_.encode('utf-8')
171 |         return str_
172 | 
173 |     def _to_string(self):
174 |         if sys.version_info[0] == 3:
175 |             return ''.join(x.value for x in self.flatten())
176 |         else:
177 |             return ''.join(str(x) for x in self.flatten())
178 | 
179 |     def _get_repr_name(self):
180 |         return self.__class__.__name__
181 | 
182 |     def _pprint_tree(self, max_depth=None, depth=0):
183 |         """Pretty-print the object tree."""
184 |         indent = ' ' * (depth * 2)
185 |         for idx, token in enumerate(self.tokens):
186 |             if token.is_group():
187 |                 pre = ' +-'
188 |             else:
189 |                 pre = ' | '
190 |             print('%s%s%d %s \'%s\'' % (indent, pre, idx,
191 |                                         token._get_repr_name(),
192 |                                         token._get_repr_value()))
193 |             if (token.is_group() and (max_depth is None or depth < max_depth)):
194 |                 token._pprint_tree(max_depth, depth + 1)
195 | 
196 |     def _remove_quotes(self, val):
197 |         """Helper that removes surrounding quotes from strings."""
198 |         if not val:
199 |             return val
200 |         if val[0] in ('"', '\'') and val[-1] == val[0]:
201 |             val = val[1:-1]
202 |         return val
203 | 
204 |     def get_token_at_offset(self, offset):
205 |         """Returns the token that is on position offset."""
206 |         idx = 0
207 |         for token in self.flatten():
208 |             end = idx + len(token.value)
209 |             if idx <= offset <= end:
210 |                 return token
211 |             idx = end
212 | 
213 |     def flatten(self):
214 |         """Generator yielding ungrouped tokens.
215 | 
216 |         This method is recursively called for all child tokens.
217 |         """
218 |         for token in self.tokens:
219 |             if isinstance(token, TokenList):
220 |                 for item in token.flatten():
221 |                     yield item
222 |             else:
223 |                 yield token
224 | 
225 | #    def __iter__(self):
226 | #        return self
227 | #
228 | #    def next(self):
229 | #        for token in self.tokens:
230 | #            yield token
231 | 
232 |     def is_group(self):
233 |         return True
234 | 
235 |     def get_sublists(self):
236 | #        return [x for x in self.tokens if isinstance(x, TokenList)]
237 |         for x in self.tokens:
238 |             if isinstance(x, TokenList):
239 |                 yield x
240 | 
241 |     @property
242 |     def _groupable_tokens(self):
243 |         return self.tokens
244 | 
245 |     def token_first(self, ignore_whitespace=True, ignore_comments=False):
246 |         """Returns the first child token.
247 | 
248 |         If *ignore_whitespace* is ``True`` (the default), whitespace
249 |         tokens are ignored.
250 | 
251 |         if *ignore_comments* is ``True`` (default: ``False``), comments are
252 |         ignored too.
253 |         """
254 |         for token in self.tokens:
255 |             if ignore_whitespace and token.is_whitespace():
256 |                 continue
257 |             if ignore_comments and isinstance(token, Comment):
258 |                 continue
259 |             return token
260 | 
261 |     def token_next_by_instance(self, idx, clss, end=None):
262 |         """Returns the next token matching a class.
263 | 
264 |         *idx* is where to start searching in the list of child tokens.
265 |         *clss* is a list of classes the token should be an instance of.
266 | 
267 |         If no matching token can be found ``None`` is returned.
268 |         """
269 |         if not isinstance(clss, (list, tuple)):
270 |             clss = (clss,)
271 | 
272 |         for token in self.tokens[idx:end]:
273 |             if isinstance(token, clss):
274 |                 return token
275 | 
276 |     def token_next_by_type(self, idx, ttypes):
277 |         """Returns next matching token by it's token type."""
278 |         if not isinstance(ttypes, (list, tuple)):
279 |             ttypes = [ttypes]
280 | 
281 |         for token in self.tokens[idx:]:
282 |             if token.ttype in ttypes:
283 |                 return token
284 | 
285 |     def token_next_match(self, idx, ttype, value, regex=False):
286 |         """Returns next token where it's ``match`` method returns ``True``."""
287 |         if not isinstance(idx, int):
288 |             idx = self.token_index(idx)
289 | 
290 |         for n in range(idx, len(self.tokens)):
291 |             token = self.tokens[n]
292 |             if token.match(ttype, value, regex):
293 |                 return token
294 | 
295 |     def token_not_matching(self, idx, funcs):
296 |         for token in self.tokens[idx:]:
297 |             passed = False
298 |             for func in funcs:
299 |                 if func(token):
300 |                     passed = True
301 |                     break
302 | 
303 |             if not passed:
304 |                 return token
305 | 
306 |     def token_matching(self, idx, funcs):
307 |         for token in self.tokens[idx:]:
308 |             for func in funcs:
309 |                 if func(token):
310 |                     return token
311 | 
312 |     def token_prev(self, idx, skip_ws=True):
313 |         """Returns the previous token relative to *idx*.
314 | 
315 |         If *skip_ws* is ``True`` (the default) whitespace tokens are ignored.
316 |         ``None`` is returned if there's no previous token.
317 |         """
318 |         if idx is None:
319 |             return None
320 | 
321 |         if not isinstance(idx, int):
322 |             idx = self.token_index(idx)
323 | 
324 |         while idx:
325 |             idx -= 1
326 |             if self.tokens[idx].is_whitespace() and skip_ws:
327 |                 continue
328 |             return self.tokens[idx]
329 | 
330 |     def token_next(self, idx, skip_ws=True):
331 |         """Returns the next token relative to *idx*.
332 | 
333 |         If *skip_ws* is ``True`` (the default) whitespace tokens are ignored.
334 |         ``None`` is returned if there's no next token.
335 |         """
336 |         if idx is None:
337 |             return None
338 | 
339 |         if not isinstance(idx, int):
340 |             idx = self.token_index(idx)
341 | 
342 |         while idx < len(self.tokens) - 1:
343 |             idx += 1
344 |             if self.tokens[idx].is_whitespace() and skip_ws:
345 |                 continue
346 |             return self.tokens[idx]
347 | 
348 |     def token_index(self, token, start=0):
349 |         """Return list index of token."""
350 |         if start > 0:
351 |             # Performing `index` manually is much faster when starting in the middle
352 |             # of the list of tokens and expecting to find the token near to the starting
353 |             # index.
354 |             for i in range(start, len(self.tokens)):
355 |                 if self.tokens[i] == token:
356 |                     return i
357 |             return -1
358 |         return self.tokens.index(token)
359 | 
360 |     def tokens_between(self, start, end, exclude_end=False):
361 |         """Return all tokens between (and including) start and end.
362 | 
363 |         If *exclude_end* is ``True`` (default is ``False``) the end token
364 |         is included too.
365 |         """
366 |         # FIXME(andi): rename exclude_end to inlcude_end
367 |         if exclude_end:
368 |             offset = 0
369 |         else:
370 |             offset = 1
371 |         end_idx = self.token_index(end) + offset
372 |         start_idx = self.token_index(start)
373 |         return self.tokens[start_idx:end_idx]
374 | 
375 |     def group_tokens(self, grp_cls, tokens, ignore_ws=False):
376 |         """Replace tokens by an instance of *grp_cls*."""
377 |         idx = self.token_index(tokens[0])
378 |         if ignore_ws:
379 |             while tokens and tokens[-1].is_whitespace():
380 |                 tokens = tokens[:-1]
381 |         for t in tokens:
382 |             self.tokens.remove(t)
383 |         grp = grp_cls(tokens)
384 |         for token in tokens:
385 |             token.parent = grp
386 |         grp.parent = self
387 |         self.tokens.insert(idx, grp)
388 |         return grp
389 | 
390 |     def insert_before(self, where, token):
391 |         """Inserts *token* before *where*."""
392 |         self.tokens.insert(self.token_index(where), token)
393 | 
394 |     def insert_after(self, where, token, skip_ws=True):
395 |         """Inserts *token* after *where*."""
396 |         next_token = self.token_next(where, skip_ws=skip_ws)
397 |         if next_token is None:
398 |             self.tokens.append(token)
399 |         else:
400 |             self.tokens.insert(self.token_index(next_token), token)
401 | 
402 |     def has_alias(self):
403 |         """Returns ``True`` if an alias is present."""
404 |         return self.get_alias() is not None
405 | 
406 |     def get_alias(self):
407 |         """Returns the alias for this identifier or ``None``."""
408 | 
409 |         # "name AS alias"
410 |         kw = self.token_next_match(0, T.Keyword, 'AS')
411 |         if kw is not None:
412 |             return self._get_first_name(kw, keywords=True)
413 | 
414 |         # "name alias" or "complicated column expression alias"
415 |         if len(self.tokens) > 2 \
416 |            and self.token_next_by_type(0, T.Whitespace) is not None:
417 |             return self._get_first_name(reverse=True)
418 | 
419 |         return None
420 | 
421 |     def get_name(self):
422 |         """Returns the name of this identifier.
423 | 
424 |         This is either it's alias or it's real name. The returned valued can
425 |         be considered as the name under which the object corresponding to
426 |         this identifier is known within the current statement.
427 |         """
428 |         alias = self.get_alias()
429 |         if alias is not None:
430 |             return alias
431 |         return self.get_real_name()
432 | 
433 |     def get_real_name(self):
434 |         """Returns the real name (object name) of this identifier."""
435 |         # a.b
436 |         dot = self.token_next_match(0, T.Punctuation, '.')
437 |         if dot is not None:
438 |             return self._get_first_name(self.token_index(dot))
439 | 
440 |         return self._get_first_name()
441 | 
442 |     def get_parent_name(self):
443 |         """Return name of the parent object if any.
444 | 
445 |         A parent object is identified by the first occuring dot.
446 |         """
447 |         dot = self.token_next_match(0, T.Punctuation, '.')
448 |         if dot is None:
449 |             return None
450 |         prev_ = self.token_prev(self.token_index(dot))
451 |         if prev_ is None:  # something must be verry wrong here..
452 |             return None
453 |         return self._remove_quotes(prev_.value)
454 | 
455 |     def _get_first_name(self, idx=None, reverse=False, keywords=False):
456 |         """Returns the name of the first token with a name"""
457 | 
458 |         if idx and not isinstance(idx, int):
459 |             idx = self.token_index(idx) + 1
460 | 
461 |         tokens = self.tokens[idx:] if idx else self.tokens
462 |         tokens = reversed(tokens) if reverse else tokens
463 |         types = [T.Name, T.Wildcard, T.String.Symbol]
464 | 
465 |         if keywords:
466 |             types.append(T.Keyword)
467 | 
468 |         for tok in tokens:
469 |             if tok.ttype in types:
470 |                 return self._remove_quotes(tok.value)
471 |             elif isinstance(tok, Identifier) or isinstance(tok, Function):
472 |                 return tok.get_name()
473 |         return None
474 | 
475 | class Statement(TokenList):
476 |     """Represents a SQL statement."""
477 | 
478 |     __slots__ = ('value', 'ttype', 'tokens')
479 | 
480 |     def get_type(self):
481 |         """Returns the type of a statement.
482 | 
483 |         The returned value is a string holding an upper-cased reprint of
484 |         the first DML or DDL keyword. If the first token in this group
485 |         isn't a DML or DDL keyword "UNKNOWN" is returned.
486 | 
487 |         Whitespaces and comments at the beginning of the statement
488 |         are ignored.
489 |         """
490 |         first_token = self.token_first(ignore_comments=True)
491 |         if first_token is None:
492 |             # An "empty" statement that either has not tokens at all
493 |             # or only whitespace tokens.
494 |             return 'UNKNOWN'
495 | 
496 |         elif first_token.ttype in (T.Keyword.DML, T.Keyword.DDL):
497 |             return first_token.normalized
498 | 
499 |         return 'UNKNOWN'
500 | 
501 | 
502 | class Identifier(TokenList):
503 |     """Represents an identifier.
504 | 
505 |     Identifiers may have aliases or typecasts.
506 |     """
507 | 
508 |     __slots__ = ('value', 'ttype', 'tokens')
509 | 
510 |     def is_wildcard(self):
511 |         """Return ``True`` if this identifier contains a wildcard."""
512 |         token = self.token_next_by_type(0, T.Wildcard)
513 |         return token is not None
514 | 
515 |     def get_typecast(self):
516 |         """Returns the typecast or ``None`` of this object as a string."""
517 |         marker = self.token_next_match(0, T.Punctuation, '::')
518 |         if marker is None:
519 |             return None
520 |         next_ = self.token_next(self.token_index(marker), False)
521 |         if next_ is None:
522 |             return None
523 |         return str(next_)
524 | 
525 |     def get_ordering(self):
526 |         """Returns the ordering or ``None`` as uppercase string."""
527 |         ordering = self.token_next_by_type(0, T.Keyword.Order)
528 |         if ordering is None:
529 |             return None
530 |         return ordering.value.upper()
531 | 
532 |     def get_array_indices(self):
533 |         """Returns an iterator of index token lists"""
534 | 
535 |         for tok in self.tokens:
536 |             if isinstance(tok, SquareBrackets):
537 |                 # Use [1:-1] index to discard the square brackets
538 |                 yield tok.tokens[1:-1]
539 | 
540 | 
541 | class IdentifierList(TokenList):
542 |     """A list of :class:`~sqlparse.sql.Identifier`\'s."""
543 | 
544 |     __slots__ = ('value', 'ttype', 'tokens')
545 | 
546 |     def get_identifiers(self):
547 |         """Returns the identifiers.
548 | 
549 |         Whitespaces and punctuations are not included in this generator.
550 |         """
551 |         for x in self.tokens:
552 |             if not x.is_whitespace() and not x.match(T.Punctuation, ','):
553 |                 yield x
554 | 
555 | 
556 | class Parenthesis(TokenList):
557 |     """Tokens between parenthesis."""
558 |     __slots__ = ('value', 'ttype', 'tokens')
559 | 
560 |     @property
561 |     def _groupable_tokens(self):
562 |         return self.tokens[1:-1]
563 | 
564 | 
565 | class SquareBrackets(TokenList):
566 |     """Tokens between square brackets"""
567 | 
568 |     __slots__ = ('value', 'ttype', 'tokens')
569 | 
570 |     @property
571 |     def _groupable_tokens(self):
572 |         return self.tokens[1:-1]
573 | 
574 | class Assignment(TokenList):
575 |     """An assignment like 'var := val;'"""
576 |     __slots__ = ('value', 'ttype', 'tokens')
577 | 
578 | 
579 | class If(TokenList):
580 |     """An 'if' clause with possible 'else if' or 'else' parts."""
581 |     __slots__ = ('value', 'ttype', 'tokens')
582 | 
583 | 
584 | class For(TokenList):
585 |     """A 'FOR' loop."""
586 |     __slots__ = ('value', 'ttype', 'tokens')
587 | 
588 | 
589 | class Comparison(TokenList):
590 |     """A comparison used for example in WHERE clauses."""
591 |     __slots__ = ('value', 'ttype', 'tokens')
592 | 
593 |     @property
594 |     def left(self):
595 |         return self.tokens[0]
596 | 
597 |     @property
598 |     def right(self):
599 |         return self.tokens[-1]
600 | 
601 | 
602 | class Comment(TokenList):
603 |     """A comment."""
604 |     __slots__ = ('value', 'ttype', 'tokens')
605 | 
606 |     def is_multiline(self):
607 |         return self.tokens and self.tokens[0].ttype == T.Comment.Multiline
608 | 
609 | 
610 | class Where(TokenList):
611 |     """A WHERE clause."""
612 |     __slots__ = ('value', 'ttype', 'tokens')
613 | 
614 | 
615 | class Case(TokenList):
616 |     """A CASE statement with one or more WHEN and possibly an ELSE part."""
617 | 
618 |     __slots__ = ('value', 'ttype', 'tokens')
619 | 
620 |     def get_cases(self):
621 |         """Returns a list of 2-tuples (condition, value).
622 | 
623 |         If an ELSE exists condition is None.
624 |         """
625 |         CONDITION = 1
626 |         VALUE = 2
627 | 
628 |         ret = []
629 |         mode = CONDITION
630 | 
631 |         for token in self.tokens:
632 |             # Set mode from the current statement
633 |             if token.match(T.Keyword, 'CASE'):
634 |                 continue
635 | 
636 |             elif token.match(T.Keyword, 'WHEN'):
637 |                 ret.append(([], []))
638 |                 mode = CONDITION
639 | 
640 |             elif token.match(T.Keyword, 'THEN'):
641 |                 mode = VALUE
642 | 
643 |             elif token.match(T.Keyword, 'ELSE'):
644 |                 ret.append((None, []))
645 |                 mode = VALUE
646 | 
647 |             elif token.match(T.Keyword, 'END'):
648 |                 mode = None
649 | 
650 |             # First condition without preceding WHEN
651 |             if mode and not ret:
652 |                 ret.append(([], []))
653 | 
654 |             # Append token depending of the current mode
655 |             if mode == CONDITION:
656 |                 ret[-1][0].append(token)
657 | 
658 |             elif mode == VALUE:
659 |                 ret[-1][1].append(token)
660 | 
661 |         # Return cases list
662 |         return ret
663 | 
664 | 
665 | class Function(TokenList):
666 |     """A function or procedure call."""
667 | 
668 |     __slots__ = ('value', 'ttype', 'tokens')
669 | 
670 |     def get_parameters(self):
671 |         """Return a list of parameters."""
672 |         parenthesis = self.tokens[-1]
673 |         for t in parenthesis.tokens:
674 |             if isinstance(t, IdentifierList):
675 |                 return t.get_identifiers()
676 |             elif isinstance(t, Identifier) or \
677 |                 isinstance(t, Function) or \
678 |                 t.ttype in T.Literal:
679 |                 return [t,]
680 |         return []
681 | 
682 | 
683 | class Begin(TokenList):
684 |     """A BEGIN/END block."""
685 | 
686 |     __slots__ = ('value', 'ttype', 'tokens')
687 | 


--------------------------------------------------------------------------------
/sqlparse/tokens.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
 2 | #
 3 | # This module is part of python-sqlparse and is released under
 4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php.
 5 | 
 6 | # The Token implementation is based on pygment's token system written
 7 | # by Georg Brandl.
 8 | # http://pygments.org/
 9 | 
10 | """Tokens"""
11 | 
12 | 
13 | class _TokenType(tuple):
14 |     parent = None
15 | 
16 |     def split(self):
17 |         buf = []
18 |         node = self
19 |         while node is not None:
20 |             buf.append(node)
21 |             node = node.parent
22 |         buf.reverse()
23 |         return buf
24 | 
25 |     def __contains__(self, val):
26 |         return val is not None and (self is val or val[:len(self)] == self)
27 | 
28 |     def __getattr__(self, val):
29 |         if not val or not val[0].isupper():
30 |             return tuple.__getattribute__(self, val)
31 |         new = _TokenType(self + (val,))
32 |         setattr(self, val, new)
33 |         new.parent = self
34 |         return new
35 | 
36 |     def __hash__(self):
37 |         return hash(tuple(self))
38 | 
39 |     def __repr__(self):
40 |         return 'Token' + (self and '.' or '') + '.'.join(self)
41 | 
42 | 
43 | Token = _TokenType()
44 | 
45 | # Special token types
46 | Text = Token.Text
47 | Whitespace = Text.Whitespace
48 | Newline = Whitespace.Newline
49 | Error = Token.Error
50 | # Text that doesn't belong to this lexer (e.g. HTML in PHP)
51 | Other = Token.Other
52 | 
53 | # Common token types for source code
54 | Keyword = Token.Keyword
55 | Name = Token.Name
56 | Literal = Token.Literal
57 | String = Literal.String
58 | Number = Literal.Number
59 | Punctuation = Token.Punctuation
60 | Operator = Token.Operator
61 | Comparison = Operator.Comparison
62 | Wildcard = Token.Wildcard
63 | Comment = Token.Comment
64 | Assignment = Token.Assignement
65 | 
66 | # Generic types for non-source code
67 | Generic = Token.Generic
68 | 
69 | # String and some others are not direct childs of Token.
70 | # alias them:
71 | Token.Token = Token
72 | Token.String = String
73 | Token.Number = Number
74 | 
75 | # SQL specific tokens
76 | DML = Keyword.DML
77 | DDL = Keyword.DDL
78 | Command = Keyword.Command
79 | 
80 | Group = Token.Group
81 | Group.Parenthesis = Token.Group.Parenthesis
82 | Group.Comment = Token.Group.Comment
83 | Group.Where = Token.Group.Where
84 | 


--------------------------------------------------------------------------------
/sqlparse/utils.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Created on 17/05/2012
  3 | 
  4 | @author: piranna
  5 | '''
  6 | 
  7 | import re
  8 | 
  9 | try:
 10 |     from collections import OrderedDict
 11 | except ImportError:
 12 |     OrderedDict = None
 13 | 
 14 | 
 15 | if OrderedDict:
 16 |     class Cache(OrderedDict):
 17 |         """Cache with LRU algorithm using an OrderedDict as basis
 18 |         """
 19 |         def __init__(self, maxsize=100):
 20 |             OrderedDict.__init__(self)
 21 | 
 22 |             self._maxsize = maxsize
 23 | 
 24 |         def __getitem__(self, key, *args, **kwargs):
 25 |             # Get the key and remove it from the cache, or raise KeyError
 26 |             value = OrderedDict.__getitem__(self, key)
 27 |             del self[key]
 28 | 
 29 |             # Insert the (key, value) pair on the front of the cache
 30 |             OrderedDict.__setitem__(self, key, value)
 31 | 
 32 |             # Return the value from the cache
 33 |             return value
 34 | 
 35 |         def __setitem__(self, key, value, *args, **kwargs):
 36 |             # Key was inserted before, remove it so we put it at front later
 37 |             if key in self:
 38 |                 del self[key]
 39 | 
 40 |             # Too much items on the cache, remove the least recent used
 41 |             elif len(self) >= self._maxsize:
 42 |                 self.popitem(False)
 43 | 
 44 |             # Insert the (key, value) pair on the front of the cache
 45 |             OrderedDict.__setitem__(self, key, value, *args, **kwargs)
 46 | 
 47 | else:
 48 |     class Cache(dict):
 49 |         """Cache that reset when gets full
 50 |         """
 51 |         def __init__(self, maxsize=100):
 52 |             dict.__init__(self)
 53 | 
 54 |             self._maxsize = maxsize
 55 | 
 56 |         def __setitem__(self, key, value, *args, **kwargs):
 57 |             # Reset the cache if we have too much cached entries and start over
 58 |             if len(self) >= self._maxsize:
 59 |                 self.clear()
 60 | 
 61 |             # Insert the (key, value) pair on the front of the cache
 62 |             dict.__setitem__(self, key, value, *args, **kwargs)
 63 | 
 64 | 
 65 | def memoize_generator(func):
 66 |     """Memoize decorator for generators
 67 | 
 68 |     Store `func` results in a cache according to their arguments as 'memoize'
 69 |     does but instead this works on decorators instead of regular functions.
 70 |     Obviusly, this is only useful if the generator will always return the same
 71 |     values for each specific parameters...
 72 |     """
 73 |     cache = Cache()
 74 | 
 75 |     def wrapped_func(*args, **kwargs):
 76 | #        params = (args, kwargs)
 77 |         params = (args, tuple(sorted(kwargs.items())))
 78 | 
 79 |         # Look if cached
 80 |         try:
 81 |             cached = cache[params]
 82 | 
 83 |         # Not cached, exec and store it
 84 |         except KeyError:
 85 |             cached = []
 86 | 
 87 |             for item in func(*args, **kwargs):
 88 |                 cached.append(item)
 89 |                 yield item
 90 | 
 91 |             cache[params] = cached
 92 | 
 93 |         # Cached, yield its items
 94 |         else:
 95 |             for item in cached:
 96 |                 yield item
 97 | 
 98 |     return wrapped_func
 99 | 
100 | 
101 | # This regular expression replaces the home-cooked parser that was here before.
102 | # It is much faster, but requires an extra post-processing step to get the
103 | # desired results (that are compatible with what you would expect from the
104 | # str.splitlines() method).
105 | #
106 | # It matches groups of characters: newlines, quoted strings, or unquoted text,
107 | # and splits on that basis. The post-processing step puts those back together
108 | # into the actual lines of SQL.
109 | SPLIT_REGEX = re.compile(r"""
110 | (
111 |  (?:                     # Start of non-capturing group
112 |   (?:\r\n|\r|\n)      |  # Match any single newline, or
113 |   [^\r\n'"]+          |  # Match any character series without quotes or
114 |                          # newlines, or
115 |   "(?:[^"\\]|\\.)*"   |  # Match double-quoted strings, or
116 |   '(?:[^'\\]|\\.)*'      # Match single quoted strings
117 |  )
118 | )
119 | """, re.VERBOSE)
120 | 
121 | LINE_MATCH = re.compile(r'(\r\n|\r|\n)')
122 | 
123 | def split_unquoted_newlines(text):
124 |     """Split a string on all unquoted newlines.
125 | 
126 |     Unlike str.splitlines(), this will ignore CR/LF/CR+LF if the requisite
127 |     character is inside of a string."""
128 |     lines = SPLIT_REGEX.split(text)
129 |     outputlines = ['']
130 |     for line in lines:
131 |         if not line:
132 |             continue
133 |         elif LINE_MATCH.match(line):
134 |             outputlines.append('')
135 |         else:
136 |             outputlines[-1] += line
137 |     return outputlines


--------------------------------------------------------------------------------
/uroborosqlfmt/__init__.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | '''
  3 | uroboroSQL formatter.
  4 | @author: ota
  5 | '''
  6 | 
  7 | 
  8 | __version__ = '0.0.1'
  9 | 
 10 | import sys
 11 | import re
 12 | from threading import Thread, Lock
 13 | import sqlparse
 14 | from sqlparse.lexer import Lexer
 15 | from sqlparse import tokens as T, utils
 16 | from uroborosqlfmt import filters, config
 17 | 
 18 | LOCK = Lock()
 19 | 
 20 | def format_sql(sql, local_config = config.LocalConfig()):
 21 | 
 22 |     LOCK.acquire()
 23 |     try:
 24 |         if not config.glb.escape_sequence_u005c:
 25 |             # Oracle等の場合リテラルの取り方が違うので置き換える
 26 |             for i, data in enumerate(Lexer.tokens["root"]):
 27 |                 single = getattr(T.String, "Single")
 28 |                 if data[0] == r"'(''|\\\\|\\'|[^'])*'" :
 29 |                     if data[1] == single:
 30 |                         Lexer.tokens["root"][i] = (r"'(''|[^'])*'", single)
 31 | 
 32 |                         # 初期化
 33 |                         if hasattr(Lexer, "_tokens"):
 34 |                             delattr(Lexer, "_tokens")
 35 |                         if hasattr(Lexer, "token_variants"):
 36 |                             delattr(Lexer, "token_variants")
 37 |                         break
 38 |             utils.SPLIT_REGEX = re.compile(r"""
 39 |             (
 40 |              (?:                     # Start of non-capturing group
 41 |               (?:\r\n|\r|\n)      |  # Match any single newline, or
 42 |               [^\r\n'"]+          |  # Match any character series without quotes or
 43 |                                      # newlines, or
 44 |               "(?:[^"]|\\.)*"   |  # Match double-quoted strings, or
 45 |               '(?:[^']|\\.)*'      # Match single quoted strings
 46 |              )
 47 |             )
 48 |             """, re.VERBOSE)
 49 |         else:
 50 |             # 元に戻す
 51 |             for i, data in enumerate(Lexer.tokens["root"]):
 52 |                 single = getattr(T.String, "Single")
 53 |                 if data[0] == r"'(''|[^'])*'" :
 54 |                     if data[1] == single:
 55 |                         Lexer.tokens["root"][i] = (r"'(''|\\\\|\\'|[^'])*'" , single)
 56 | 
 57 |                         # 初期化
 58 |                         if hasattr(Lexer, "_tokens"):
 59 |                             delattr(Lexer, "_tokens")
 60 |                         if hasattr(Lexer, "token_variants"):
 61 |                             delattr(Lexer, "token_variants")
 62 |                         break
 63 |             utils.SPLIT_REGEX = re.compile(r"""
 64 |             (
 65 |              (?:                     # Start of non-capturing group
 66 |               (?:\r\n|\r|\n)      |  # Match any single newline, or
 67 |               [^\r\n'"]+          |  # Match any character series without quotes or
 68 |                                      # newlines, or
 69 |               "(?:[^"\\]|\\.)*"   |  # Match double-quoted strings, or
 70 |               '(?:[^'\\]|\\.)*'      # Match single quoted strings
 71 |              )
 72 |             )
 73 |             """, re.VERBOSE)
 74 |     finally:
 75 |         LOCK.release()
 76 | 
 77 |     stack = sqlparse.engine.FilterStack()
 78 |     stack.enable_grouping()
 79 | 
 80 |     if local_config.case != None:
 81 |         stack.preprocess.append(sqlparse.filters.KeywordCaseFilter(local_config.case))
 82 |         stack.preprocess.append(sqlparse.filters.IdentifierCaseFilter(local_config.case))
 83 | 
 84 |     if local_config.reserved_case != None:
 85 |         stack.preprocess.append(filters.ReservedWordCaseFilter(local_config))
 86 | 
 87 |     stack.stmtprocess.append(filters.GroupFilter())
 88 |     stack.stmtprocess.append(filters.LineDescriptionLineCommentFilter(local_config))
 89 |     stack.stmtprocess.append(filters.MoveCommaFilter(local_config))
 90 |     stack.stmtprocess.append(filters.StripWhitespaceAndToTabFilter())
 91 |     stack.stmtprocess.append(filters.AdjustGroupFilter(local_config))
 92 | 
 93 |     stack.stmtprocess.append(filters.OperatorFilter())
 94 | 
 95 |     stack.stmtprocess.append(filters.CustomReindentFilter(local_config))
 96 |     stack.postprocess.append(sqlparse.filters.SerializerUnicode())
 97 | 
 98 |     if sys.version_info[0] < 3 and isinstance(sql, unicode):
 99 |         sql = sql.encode("utf-8")
100 |         formatted = "\n".join(stack.run(sql))
101 |         return formatted.decode("utf-8")
102 |     else:
103 |         return "\n".join(stack.run(sql))
104 | 


--------------------------------------------------------------------------------
/uroborosqlfmt/api.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | '''
  3 | uroboroSQL formatter API.
  4 | @author: ota
  5 | '''
  6 | 
  7 | import os
  8 | import traceback
  9 | import codecs
 10 | import sys
 11 | import argparse
 12 | import uroborosqlfmt
 13 | from uroborosqlfmt import config
 14 | from uroborosqlfmt import commentsyntax
 15 | from uroborosqlfmt.exceptions import SqlFormatterException
 16 | from uroborosqlfmt.config import LocalConfig
 17 | 
 18 | # pylint: disable=bare-except
 19 | 
 20 | 
 21 | def format_dir(indir, outdir, local_config):
 22 |     """
 23 |         [indir]フォルダ内のSQLファイルをフォーマットして指定フォルダ[outdir]に出力する
 24 |     """
 25 |     if indir.endswith("/") or indir.endswith("\\"):
 26 |         indir = indir[:-1]
 27 |     if outdir.endswith("/") or outdir.endswith("\\"):
 28 |         outdir = outdir[:-1]
 29 | 
 30 |     for file_name, full_path in find_all_sql_files(indir):
 31 |         try:
 32 |             sql = __read_file(full_path)
 33 |         except:
 34 |             print(full_path)
 35 |             print(traceback.format_exc())
 36 |             continue
 37 |         error = False
 38 |         try:
 39 |             out_sql = uroborosqlfmt.format_sql(sql, local_config)
 40 |         except SqlFormatterException as ex:
 41 |             exs = __decode(str(ex))
 42 |             trace = __decode(traceback.format_exc())
 43 |             out_sql = sql + "\n/*" + exs + "\n" + trace + "\n*/"
 44 |             error = True
 45 |         except:
 46 |             trace = __decode(traceback.format_exc())
 47 |             out_sql = sql + "\n/*" + trace + "\n*/"
 48 |             error = True
 49 | 
 50 |         if not error:
 51 |             out_path = os.path.join(outdir, file_name)
 52 |         else:
 53 |             out_path = os.path.join(outdir, "formaterror_" + file_name)
 54 |         out_dir = os.path.dirname(out_path)
 55 |         if not os.path.exists(out_dir):
 56 |             os.makedirs(out_dir)
 57 |         __write_file(out_path, out_sql)
 58 | 
 59 | 
 60 | def format_file(infile, outdir, local_config):
 61 |     """
 62 |         SQLファイル(infile)をフォーマットして指定フォルダ[outdir]に出力する
 63 |     """
 64 |     try:
 65 |         sql = __read_file(infile)
 66 |     except:
 67 |         print(infile)
 68 |         print(traceback.format_exc())
 69 | 
 70 |     error = False
 71 | 
 72 |     try:
 73 |         out_sql = uroborosqlfmt.format_sql(sql, local_config)
 74 |     except SqlFormatterException as ex:
 75 |         exs = __decode(str(ex))
 76 |         trace = __decode(traceback.format_exc())
 77 |         out_sql = sql + "\n/*" + exs + "\n" + trace + "\n*/"
 78 |         error = True
 79 |     except:
 80 |         trace = __decode(traceback.format_exc())
 81 |         out_sql = sql + "\n/*" + trace + "\n*/"
 82 |         error = True
 83 | 
 84 |     if not error:
 85 |         file_name = os.path.basename(infile)
 86 |         out_path = os.path.join(outdir, file_name)
 87 |     else:
 88 |         out_path = os.path.join(outdir, "formaterror_" + file_name)
 89 |     out_dir = os.path.dirname(out_path)
 90 |     if not os.path.exists(out_dir):
 91 |         os.makedirs(out_dir)
 92 |     __write_file(out_path, out_sql)
 93 | 
 94 | 
 95 | def find_all_sql_files(directory):
 96 |     for root, _, files in os.walk(directory):
 97 |         for file_name in files:
 98 |             if os.path.splitext(file_name)[1].lower() == ".sql":
 99 |                 path = os.path.join(root, file_name)
100 |                 yield path[len(directory) + 1:], path
101 | 
102 | 
103 | def __read_file(path):
104 |     target_file = codecs.open(path, "r", "utf-8")
105 |     ret = target_file.read()
106 |     target_file.close()
107 |     return ret
108 | 
109 | 
110 | def __write_file(path, value):
111 |     target_file = codecs.open(path, "w", "utf-8")
112 |     target_file.write(value)
113 |     target_file.close()
114 | 
115 | 
116 | def __decode(text):
117 |     text = str(text)
118 |     if sys.version_info[0] < 3:
119 |         return text.decode("utf-8")
120 |     else:
121 |         return text
122 | 
123 | def _parse_args(test_args=None):
124 |     parser = argparse.ArgumentParser(description='uroboroSQL formatter API', prog='usqlfmt')
125 | 
126 |     parser.add_argument('-v', '--version', action='version', version='%(prog)s 0.1.0')
127 |     parser.add_argument('input_path', \
128 |         action='store', \
129 |         type=str, \
130 |         help='input directory path or input file path', \
131 |         )
132 |     parser.add_argument('output_path', \
133 |         action='store', \
134 |         type=str, \
135 |         help='output path for formatted file(s).', \
136 |         )
137 |     parser.add_argument('-m', '--mode', \
138 |         action='store', \
139 |         default='file', \
140 |         type=str, \
141 |         choices=['file', 'directory'], \
142 |         help='format target. default "file"', \
143 |         )
144 |     parser.add_argument('-a', '--case', \
145 |         action='store', \
146 |         default=None, \
147 |         type=str, \
148 |         choices=['upper', 'lower', 'capitalize'], \
149 |         )
150 |     parser.add_argument('-e', '--reserved_case', \
151 |         action='store', \
152 |         default=None, \
153 |         type=str, \
154 |         choices=['upper', 'lower', 'capitalize'], \
155 |         )
156 |     parser.add_argument('-B', '--escapesequence_u005c', \
157 |         action='store_true', \
158 |         help='use backslash escapesequence.', \
159 |         )
160 |     parser.add_argument('-c', '--comment_syntax', \
161 |         action='store', \
162 |         default='uroborosql', \
163 |         type=str, \
164 |         choices=['uroborosql', 'doma2', 'uroboro', 'doma'], \
165 |         help='SQL comment out syntax type.', \
166 |         )
167 |     parser.add_argument('-r', '--reserved_words_file_path', \
168 |         action='store', \
169 |         default=None, \
170 |         type=str, \
171 |         help='input reserved words file path.', \
172 |         )
173 | 
174 |     return parser.parse_args(test_args)
175 | 
176 | 
177 | def __execute():
178 | 
179 |     """
180 |     Check the arguments.
181 |     If the number of arguments does not match the specified number,
182 |     quit the application with usage massages.
183 |     """
184 |     args = _parse_args()
185 | 
186 |     mode = args.mode
187 |     case = args.case
188 |     reserved_case = args.reserved_case
189 |     escapesequence_u005c = args.escapesequence_u005c
190 |     comment_syntax = args.comment_syntax
191 |     input_path = args.input_path
192 |     output_path = args.output_path
193 |     reserved_words_file_path = args.reserved_words_file_path
194 |     local_config = LocalConfig()
195 | 
196 |     set_case(local_config, case)
197 |     set_reserved_case(local_config, reserved_case)
198 |     set_escapesequence_u005c(escapesequence_u005c)
199 |     set_comment_syntax(local_config, comment_syntax)
200 |     set_reserved_words(local_config, reserved_words_file_path)
201 | 
202 |     """
203 |     The application requires either "file" or "directory"
204 |     as a command line argument. If the argument does not match them,
205 |     quit the application with usage massages.
206 |     """
207 |     if mode == "file":
208 |         format_file(input_path, output_path, local_config)
209 |     elif mode == "directory":
210 |         format_dir(input_path, output_path, local_config)
211 |     else:
212 |         sys.exit()
213 | 
214 |     print("\n===== Finish the application =====")
215 |     print("Output directory path : %s" % output_path)
216 |     print("=====           End          =====")
217 | 
218 | 
219 | def set_case(local_config, case):
220 |     local_config.set_case(case)
221 | 
222 | 
223 | def set_reserved_case(local_config, reserved_case):
224 |     if local_config.case == None and local_config.reserved_case != None:
225 |         print ("You have to set [case(-a)]option, "\
226 |                "when you set [reserved_case(-e)]option.\n" \
227 |                "The applocation does not accept the reserved_case option individually.")
228 |         sys.exit("Application quitting...")
229 |     else:
230 |         local_config.set_reserved_case(reserved_case)
231 | 
232 | 
233 | def set_escapesequence_u005c(escapesequence_u005c):
234 |     config.glb.escape_sequence_u005c = escapesequence_u005c
235 | 
236 | 
237 | def set_comment_syntax(local_config, comment_syntax):
238 |     if comment_syntax == "uroborosql" or comment_syntax == "uroboro":
239 |         local_config.set_commentsyntax(commentsyntax.UroboroSqlCommentSyntax())
240 |     elif comment_syntax == "doma2" or comment_syntax == "doma":
241 |         local_config.set_commentsyntax(commentsyntax.Doma2CommentSyntax())
242 |     else:
243 |         sys.exit()
244 | 
245 | 
246 | def set_reserved_words(local_config, reserved_words_file):
247 |     if reserved_words_file is not None:
248 |         try:
249 |             f = open(reserved_words_file, 'r')
250 |             lines = f.readlines()
251 |             f.close()
252 |         except IOError:
253 |             print("File I/O error: %s" % reserved_words_file)
254 |             print("Please check the file path.")
255 |             sys.exit("Application quitting...")
256 |         except:
257 |             print ("Unexpected error:", sys.exc_info()[0])
258 |             sys.exit("Application quitting...")
259 |         else:
260 |             reserved_words = []
261 | 
262 |             for line in lines:
263 |                 reserved_words.append(line.rstrip('\n').lower())  # Eliminate newline code.
264 | 
265 |             local_config.set_input_reserved_words(reserved_words)
266 | 
267 | 
268 | if __name__ == "__main__":
269 |     __execute()
270 | 


--------------------------------------------------------------------------------
/uroborosqlfmt/commentsyntax.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | '''
 3 | @author: ota
 4 | '''
 5 | import re
 6 | from sqlparse import tokens as T
 7 | from uroborosqlfmt.tokenutils import EngineComment
 8 | from uroborosqlfmt import tokenutils as tu
 9 | 
10 | 
11 | # pylint: disable=unused-argument
12 | class CommentSyntax(object):
13 |     def __init__(self):
14 |         pass
15 |     def get_block_comment_type(self, token):
16 |         return EngineComment.none
17 | 
18 |     def get_line_comment_type(self, token):
19 |         return EngineComment.none
20 | # pylint: enable=unused-argument
21 | 
22 | class UroboroSqlCommentSyntax(CommentSyntax):
23 | 
24 |     def get_block_comment_type(self, token):
25 |         tokens = token.tokens
26 |         if len(tokens) >= 3 :
27 |             comment = tokens[1].value
28 |             if comment.strip() == "_SQL_ID_" or comment.strip() == "_SQL_IDENTIFIER_":
29 |                 return EngineComment.sql_identifier # _SQL_IDENTIFIER_
30 |             if tu.startswith_ignore_case(comment, ("IF", "ELIF", "ELSE", "END", "BEGIN")):
31 |                 return EngineComment.syntax
32 |             if comment.strip() == comment and (not comment.startswith("*")) and (not comment.startswith("+")):
33 |                 return EngineComment.param # param
34 |         return EngineComment.none
35 | 
36 |     def get_line_comment_type(self, token):
37 |         cmm = token.token_next_by_type(0, T.Comment)
38 |         comment = cmm.value[2:]
39 |         if tu.startswith_ignore_case(comment.strip(), "ELSE"):
40 |             return EngineComment.syntax
41 |         return EngineComment.none
42 | 
43 | 
44 | class Doma2CommentSyntax(CommentSyntax):
45 | 
46 |     def get_block_comment_type(self, token):
47 |         tokens = token.tokens
48 |         if len(tokens) >= 3 :
49 |             comment = tokens[1].value
50 |             first_char = comment[0]
51 |             if first_char == " " or \
52 |                 re.compile("[a-z]", re.IGNORECASE).match(first_char) or \
53 |                 first_char == "^" or \
54 |                 first_char == "$" or \
55 |                 first_char == "#" or \
56 |                 first_char == "@" or \
57 |                 first_char == '"' or \
58 |                 first_char == "'":
59 |                 return EngineComment.param
60 |             if first_char == "%":
61 |                 if tu.startswith_ignore_case(comment[1:], "expand"):
62 |                     return EngineComment.param
63 |                 return EngineComment.syntax
64 |         return EngineComment.none
65 | 
66 | # /* ～*/ ...--3文字目が空白であるため式コメントです。
67 | # /*a～*/ ...--3文字目がJavaの識別子の先頭で使用可能な文字であるため式コメントです。
68 | # /*$～*/ ...--3文字目がJavaの識別子の先頭で使用可能な文字であるため式コメントです。
69 | # /*%～*/ ...--3文字目が条件コメントや繰り返しコメントの始まりを表す「%」であるため式コメントです。
70 | # /*#～*/ ...--3文字目が埋め込み変数コメントを表す「#」であるため式コメントです。
71 | # /*@～*/ ...--3文字目が組み込み関数もしくはクラス名を表す「@」であるため式コメントです。
72 | # /*"～*/ ...--3文字目が文字列リテラルの引用符を表す「"」であるため式コメントです。
73 | # /*'～*/ ...--3文字目が文字リテラルの引用符を表す「'」であるため式コメントです。
74 | 


--------------------------------------------------------------------------------
/uroborosqlfmt/config.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | '''
 3 | @author: ota
 4 | '''
 5 | from uroborosqlfmt.commentsyntax import UroboroSqlCommentSyntax
 6 | 
 7 | 
 8 | class _GlobalConfig(object):
 9 |     def __init__(self):
10 |         self.escape_sequence_u005c = False  # バックスラッシュによるエスケープシーケンス
11 | 
12 |     def set_escape_sequence_u005c(self, escape_sequence):
13 |         self.escape_sequence_u005c = escape_sequence
14 |         return self
15 | 
16 | 
17 | class LocalConfig(object):
18 | 
19 |     __slots__ = ('comment_syntax', 'case', 'reserved_case','input_reserved_words')
20 | 
21 |     def __init__(self):
22 |         self.comment_syntax = UroboroSqlCommentSyntax()
23 |         self.case = None
24 |         self.reserved_case = None
25 |         self.input_reserved_words = None
26 | 
27 |     def set_commentsyntax(self, comment_syntax):
28 |         self.comment_syntax = comment_syntax
29 |         return self
30 | 
31 |     def set_case(self, case):
32 |         self.case = case
33 |         return self
34 | 
35 |     def set_reserved_case(self, reserved_case):
36 |         self.reserved_case = reserved_case
37 |         return self
38 | 
39 |     def set_input_reserved_words(self, input_reserved_words):
40 |         self.input_reserved_words = input_reserved_words
41 |         return self
42 | 
43 | 
44 | # グローバル設定
45 | glb = _GlobalConfig()  # pylint: disable=invalid-name
46 | 


--------------------------------------------------------------------------------
/uroborosqlfmt/exceptions.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | '''
 3 | Created on 2016/07/09
 4 | 
 5 | @author: ota
 6 | '''
 7 | import sys
 8 | import traceback
 9 | 
10 | class SqlFormatterException(Exception):
11 |     '''
12 |         SqlFormatter用のExceptionクラス
13 |     '''
14 | 
15 |     def __init__(self, tlist, ex, trace):
16 |         super(SqlFormatterException, self).__init__(ex.message if hasattr(ex, "message") else "")
17 |         self.tlist = self.__decode(tlist)
18 |         self.e = ex
19 |         self.trace = self.__decode(trace)
20 |         self.message = ex.message
21 | 
22 |     def __decode(self, text):
23 |         text = str(text)
24 |         if sys.version_info[0] < 3:
25 |             return text.decode("utf-8")
26 |         else:
27 |             return text
28 | 
29 |     def __encode(self, text):
30 |         if sys.version_info[0] < 3 and isinstance(text, unicode):
31 |             return text.encode("utf-8")
32 |         else:
33 |             return text
34 | 
35 |     def __str__(self, *args):
36 |         return self.message \
37 |                 + "\ntoken:"  + self.__encode(self.tlist) \
38 |                 + "\ntrace:"  + self.__encode(self.trace) \
39 |                 + "\noriginal:"  + str(self.e)
40 | 
41 |     @staticmethod
42 |     def wrap_try_except(fnc, token, *args):
43 |         try:
44 |             if args:
45 |                 return fnc(*args)
46 |             else:
47 |                 return fnc(token)
48 |         except Exception as ex:
49 |             if not isinstance(ex, SqlFormatterException):
50 |                 raise SqlFormatterException(token, ex, traceback.format_exc())
51 |             raise
52 | 
53 |     @staticmethod
54 |     def to_wrap_try_except(fnc, token_arg_index):
55 |         def call(*args):
56 |             try:
57 |                 return fnc(*args)
58 |             except Exception as ex:
59 |                 if not isinstance(ex, SqlFormatterException):
60 |                     raise SqlFormatterException(args[token_arg_index], ex, traceback.format_exc())
61 |                 raise
62 |         return call
63 | 


--------------------------------------------------------------------------------
/uroborosqlfmt/sql.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | '''
  3 | @author: ota
  4 | '''
  5 | from sqlparse import sql, tokens as T
  6 | from uroborosqlfmt import tokenutils as tu
  7 | 
  8 | class Having(sql.TokenList):
  9 | 
 10 |     __slots__ = ('value', 'ttype', 'tokens')
 11 | 
 12 | class When(sql.TokenList):
 13 | 
 14 |     __slots__ = ('value', 'ttype', 'tokens')
 15 | 
 16 | class On(sql.TokenList):
 17 | 
 18 |     __slots__ = ('value', 'ttype', 'tokens')
 19 | 
 20 | class _BaseWords(sql.TokenList):
 21 | 
 22 |     def __init__(self, tokens=None):
 23 |         super(_BaseWords, self).__init__(tokens=tokens)
 24 |         self.__target_tokens = None
 25 | 
 26 |     def _setupinit(self, target_tokens):
 27 |         self.__target_tokens = target_tokens
 28 | 
 29 |     def _token_word(self, hit_index):
 30 |         return self.__target_tokens[hit_index]
 31 | 
 32 |     def tokens_words(self):
 33 |         start = self._token_word(0)
 34 |         end = tu.token_prev_enable(self)
 35 |         return self.tokens_between(start, end)
 36 | 
 37 |     def get_target_tokens(self):
 38 |         return self.__target_tokens[:]
 39 | 
 40 | class WithinGroupFunctions(_BaseWords):
 41 |     """
 42 |         LISTAGG ( expr [, delimiter] ) WITHIN GROUP ( order_by )
 43 |         等のWITHIN GROUPのつく関数
 44 |     """
 45 | 
 46 |     __slots__ = ('value', 'ttype', 'tokens', '_main_function', '_within', '_group')
 47 | 
 48 |     def __init__(self, tokens=None):
 49 |         super(WithinGroupFunctions, self).__init__(tokens=tokens)
 50 |         self._main_function = None
 51 |         self._within = None
 52 |         self._group = None
 53 | 
 54 |     def get_main_function(self):
 55 |         return self._main_function
 56 | 
 57 |     def get_within(self):
 58 |         return self._within
 59 | 
 60 |     def get_group(self):
 61 |         return self._group
 62 | 
 63 | class Phrase(_BaseWords):
 64 |     """
 65 |         ORDER BY、GROUP BYなど
 66 |     """
 67 | 
 68 |     __slots__ = ('value', 'ttype', 'tokens')
 69 | 
 70 | 
 71 |     def match_phrase(self, values):
 72 |         tokens = self.get_target_tokens()
 73 |         if len(tokens) != len(values):
 74 |             return False
 75 | 
 76 |         for i, token in enumerate(tokens):
 77 |             if not tu.equals_ignore_case(token.value, values[i]):
 78 |                 return False
 79 | 
 80 |         return True
 81 | 
 82 | class AscDesc(_BaseWords):
 83 |     """
 84 |         ASC DESCなど
 85 |     """
 86 | 
 87 |     __slots__ = ('value', 'ttype', 'tokens')
 88 | 
 89 | class OffsetFetch(_BaseWords):
 90 |     """
 91 |         OFFSET句、FETCH句
 92 |     """
 93 | 
 94 |     __slots__ = ('value', 'ttype', 'tokens')
 95 | 
 96 | class LimitOffset(_BaseWords):
 97 |     """
 98 |         LIMIT・OFFSET句
 99 |     """
100 | 
101 |     __slots__ = ('value', 'ttype', 'tokens')
102 | 
103 | class OverFunctions(_BaseWords):
104 |     """
105 |         ROW_NUMBER句など
106 |     """
107 | 
108 |     __slots__ = ('value', 'ttype', 'tokens')
109 | 
110 |     def get_main_function(self):
111 |         return self._token_word(0)
112 | 
113 |     def get_over(self):
114 |         return self._token_word(1)
115 | 
116 | class KeepFunctions(_BaseWords):
117 |     """
118 |         KEEPのつく関数
119 |     """
120 | 
121 |     __slots__ = ('value', 'ttype', 'tokens')
122 | 
123 | 
124 |     def get_main_function(self):
125 |         return self._token_word(0)
126 | 
127 |     def get_keep(self):
128 |         return self._token_word(1)
129 | 
130 | class ForUpdate(_BaseWords):
131 |     """
132 |         FOR UPDATE
133 |     """
134 | 
135 |     __slots__ = ('value', 'ttype', 'tokens')
136 | 
137 |     def get_for(self):
138 |         return self._token_word(0)
139 | 
140 |     def get_update(self):
141 |         return self._token_word(1)
142 | 
143 |     def get_of(self):
144 |         for tkn in self.tokens:
145 |             if tu.equals_ignore_case(tkn.value, "OF"):
146 |                 return tkn
147 |         return None
148 | 
149 |     def get_wait_or_nowait(self):
150 |         tokens = self.get_target_tokens()
151 |         if len(tokens) < 3:
152 |             return False
153 | 
154 |         for tkn in tokens[::-1]:
155 |             if tu.is_waitornowait(tkn):
156 |                 return tkn
157 |         return None
158 | 
159 |     def is_in_identifier(self):
160 |         return self.get_of()
161 | 
162 | class WaitOrNowait(_BaseWords):
163 |     """
164 |         WAIT / NOWAIT
165 |     """
166 | 
167 |     __slots__ = ('value', 'ttype', 'tokens')
168 | 
169 |     def get_wait_or_nowait(self):
170 |         return self._token_word(0)
171 | 
172 | class Union(_BaseWords):
173 |     """
174 |         UNION系
175 |     """
176 | 
177 |     __slots__ = ('value', 'ttype', 'tokens')
178 | 
179 | 
180 | class Join(_BaseWords):
181 |     """
182 |         JOIN系
183 |     """
184 | 
185 |     __slots__ = ('value', 'ttype', 'tokens', 'jointoken', 'identifiertoken', 'usingtoken', 'usingparenthesistoken')
186 | 
187 | class MergeWhen(_BaseWords):
188 |     """
189 |         MERGEのWHEN句
190 |     """
191 | 
192 |     __slots__ = ('value', 'ttype', 'tokens')
193 | 
194 | class MergeUpdateInsertClause(_BaseWords):
195 |     """
196 |         MERGEの内のUPDATE・INSERT句
197 |     """
198 | 
199 |     __slots__ = ('value', 'ttype', 'tokens')
200 | 
201 | class ConnectBy(_BaseWords):
202 |     """
203 |         ORACLEのCONNECT BY句
204 |     """
205 | 
206 |     __slots__ = ('value', 'ttype', 'tokens')
207 | 
208 | class StartWith(_BaseWords):
209 |     """
210 |         ORACLEのSTART WITH句
211 |     """
212 | 
213 |     __slots__ = ('value', 'ttype', 'tokens')
214 | 
215 | class With(_BaseWords):
216 |     """
217 |         ORACLEのWITH句
218 |     """
219 | 
220 |     __slots__ = ('value', 'ttype', 'tokens')
221 | 
222 | 
223 |     def token_with(self):
224 |         return self.token_next_match(0, T.Keyword, 'WITH')
225 | 
226 | class SpecialFunctionParameter(_BaseWords):
227 |     """
228 |         ANSIのTRIM
229 |         ORACLEのTRIM
230 |         PostgreSQLのSUBSTRING
231 |         等のパラメータ
232 |     """
233 | 
234 |     __slots__ = ('value', 'ttype', 'tokens')
235 | 
236 | class Calculation(_BaseWords):
237 |     """
238 |         計算値
239 |     """
240 | 
241 |     __slots__ = ('value', 'ttype', 'tokens')
242 | 


--------------------------------------------------------------------------------
/urobosql_formatter.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | import sys
  4 | import sublime
  5 | import sublime_plugin
  6 | import threading
  7 | sys.path.append(os.path.dirname(__file__))
  8 | import uroborosqlfmt
  9 | from uroborosqlfmt.config import LocalConfig
 10 | from uroborosqlfmt.commentsyntax import Doma2CommentSyntax
 11 | 
 12 | 
 13 | class UroborosqlFormatCommand(sublime_plugin.TextCommand):
 14 | 
 15 |     """SQL format command class"""
 16 | 
 17 |     def run(self, edit, sync=False):
 18 |         view = self.view
 19 |         view.window().status_message('formatting...')
 20 |         self.settings = view.settings()
 21 |         self.user_settings = sublime.load_settings(
 22 |             'sublime-uroborosql-formatter.sublime-settings')
 23 |         # set syntax
 24 |         if self.settings.get('syntax') == \
 25 |                 "Packages/Text/Plain text.tmLanguage":
 26 |             view.set_syntax_file("Packages/SQL/SQL.tmLanguage")
 27 |         # settings
 28 |         self.settings.set("tab_size", self.getval("uf_tab_size"))
 29 |         self.settings.set("translate_tabs_to_spaces",
 30 |                           self.getval("uf_translate_tabs_to_spaces"))
 31 |         config = LocalConfig()
 32 | 
 33 |         if self.getval("uf_case") == 'nochange':
 34 |             config.set_case(None)
 35 |         else:
 36 |             config.set_case(self.getval("uf_case"))
 37 | 
 38 |         if self.getval("uf_reserved_case") == 'nochange':
 39 |             config.set_reserved_case(None)
 40 |         else:
 41 |             config.set_reserved_case(self.getval("uf_reserved_case"))
 42 | 
 43 |         # ↓for backward compatibility↓
 44 |         if self.user_settings.get("uf_uppercase") == False \
 45 |                 and self.user_settings.get("uf_case") == None:
 46 |             config.set_case(None)
 47 | 
 48 |         if self.user_settings.get("uf_uppercase") == False \
 49 |                 and self.user_settings.get("uf_reserved_case") == None:
 50 |             config.set_reserved_case(None)
 51 |         # ↑for backward compatibility ↑
 52 | 
 53 |         # set reserved words
 54 |         if self.getval("uf_reserved_words") != None:
 55 |             input_reserved_words_list = self.getval("uf_reserved_words")
 56 |             reserved_words = input_reserved_words_list.split(",")
 57 |             config.set_input_reserved_words(reserved_words)
 58 | 
 59 |         uroborosqlfmt.config.glb.escape_sequence_u005c = self.getval(
 60 |             "uf_escapesequence_u005c")
 61 |         if str(self.getval("uf_comment_syntax")).upper() == "DOMA2":
 62 |             config.set_commentsyntax(Doma2CommentSyntax())
 63 | 
 64 |         raw_text = ""
 65 |         regions = view.sel()
 66 |         # format selection
 67 |         if len(regions) > 1 or not regions[0].empty():
 68 |             for region in view.sel():
 69 |                 if not region.empty():
 70 |                     raw_text = view.substr(region)
 71 |         else:  # format all
 72 |             region = sublime.Region(0, view.size())
 73 |             raw_text = view.substr(region)
 74 | 
 75 |         if sync:
 76 |             self.run_format(edit, raw_text, config, region.a, region.b)
 77 |         else:
 78 |             threading.Thread(target=self.run_format, args=(
 79 |                 edit, raw_text, config, region.a, region.b)).start()
 80 | 
 81 |     def run_format(self, edit, raw_text, config, region_a, region_b):
 82 |         formatted_text = uroborosqlfmt.format_sql(raw_text, config)
 83 |         self.view.run_command("uroborosql_format_replace", {
 84 |             "region_a": region_a,
 85 |             "region_b": region_b,
 86 |             "formatted_text": formatted_text
 87 |         })
 88 | 
 89 |     def getval(self, key):
 90 |         val = self.user_settings.get(key)
 91 |         return val if val != None else self.settings.get(key)
 92 | 
 93 | 
 94 | class UroborosqlFormatReplaceCommand(sublime_plugin.TextCommand):
 95 | 
 96 |     """SQL format replace command class"""
 97 | 
 98 |     def run(self, edit, **args):
 99 |         region = sublime.Region(args["region_a"], args["region_b"])
100 |         self.view.replace(edit, region, args["formatted_text"])
101 |         self.view.window().status_message('formatting... complete!!')
102 | 
103 | 
104 | class UroborosqlFormatListener(sublime_plugin.EventListener):
105 | 
106 |     """Event listner"""
107 | 
108 |     def on_pre_save(self, view):
109 |         self.settings = view.settings()
110 |         self.user_settings = sublime.load_settings(
111 |             'sublime-uroborosql-formatter.sublime-settings')
112 |         if self.getval("uf_save_on_format") != True:
113 |             return
114 | 
115 |         filepath = view.file_name()
116 |         if filepath.endswith(tuple(self.getval("uf_save_on_format_extensions"))):
117 |             view.run_command("uroborosql_format", {"sync": True})
118 | 
119 |     def getval(self, key):
120 |         val = self.user_settings.get(key)
121 |         return val if val != None else self.settings.get(key)
122 | 


--------------------------------------------------------------------------------