├── .gitignore
├── Default.sublime-commands
├── FormatSQL.py
├── LICENSE.txt
├── Main.sublime-menu
├── README.md
└── sqlparse
    ├── __init__.py
    ├── engine
        ├── __init__.py
        ├── filter.py
        └── grouping.py
    ├── filters.py
    ├── formatter.py
    ├── keywords.py
    ├── lexer.py
    ├── pipeline.py
    ├── sql.py
    └── tokens.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.cache
3 | *.sublime-project
4 | .DS_Store
5 | 


--------------------------------------------------------------------------------
/Default.sublime-commands:
--------------------------------------------------------------------------------
1 | [
2 |     {
3 |         "caption": "FormatSQL: Format SQL statement",
4 |         "command": "format_sql"
5 |     }
6 | ]
7 | 
8 | 


--------------------------------------------------------------------------------
/FormatSQL.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import sublime
 3 | import sublime_plugin
 4 | 
 5 | try:
 6 |     from .sqlparse import format
 7 | except ValueError:
 8 |     from sqlparse import format
 9 | 
10 | 
11 | class FormatSqlCommand(sublime_plugin.TextCommand):
12 |     def run(self, edit):
13 |         view = self.view
14 |         regions = view.sel()
15 |         # if there are more than 1 region or region one and it's not empty
16 |         if len(regions) > 1 or not regions[0].empty():
17 |             for region in view.sel():
18 |                 if not region.empty():
19 |                     s = view.substr(region)
20 |                     s = self._run(s)
21 |                     view.replace(edit, region, s)
22 |         else:  # format all text
23 |             alltextreg = sublime.Region(0, view.size())
24 |             s = view.substr(alltextreg)
25 |             s = self._run(s)
26 |             view.replace(edit, alltextreg, s)
27 | 
28 |     def _run(self, s):
29 |         settings = self.view.settings()
30 |         #indent_char = " " if settings.get("translate_tabs_to_spaces") else "\t"
31 |         indent_char = " " #TODO indent by TAB (currently not supported in python-sqlparse)
32 |         indent_size = int(settings.get("tab_size")) if indent_char == " " else 1
33 |         s = s.encode("utf-8")
34 |         return format(
35 |             s, keyword_case="upper", reindent=True, indent_width=indent_size
36 |         )
37 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | python-sqlparse and this code is on 2-clauses BSD http://www.opensource.org/licenses/bsd-license.php


--------------------------------------------------------------------------------
/Main.sublime-menu:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "id": "selection",
 4 |         "caption": "Selection",
 5 |         "children":
 6 |         [
 7 |             {
 8 |                 "id": "format",
 9 |                 "caption": "Format",
10 |                 "children":
11 |                 [
12 |                     {
13 |                         "caption": "Format SQL Statement",
14 |                         "command": "format_sql"
15 |                     }
16 |                 ]
17 |             }
18 |         ]
19 |     }
20 | ]


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Summary
 2 | 
 3 | FormatSQL formats long SQL statement to a more readable form by using [python-sqlparse library](http://code.google.com/p/python-sqlparse/).
 4 | 
 5 | 
 6 | ## How to Use
 7 | 
 8 | select sql and click menu Selection -> Format -> SQL
 9 | 
10 | 
11 | ### Configure key binding
12 | 
13 | add the following line to keymap settings
14 | 
15 | 	{ "keys": ["super+k", "super+s"], "command": "format_sql" },
16 | 
17 | 
18 | ## Example
19 | 
20 | Original:
21 | 
22 | 	select a,b from foo join bar on val1 = val2 where id = 123 and cd = 99;
23 | 
24 | Formated:
25 | 
26 | 	SELECT a,
27 | 	       b
28 | 	FROM foo
29 | 	JOIN bar ON val1 = val2
30 | 	WHERE id = 123
31 | 	    AND cd = 99;
32 | 
33 | ## License
34 | 
35 | [python-sqlparse library](http://code.google.com/p/python-sqlparse/) and this code are both on [2-clauses BSD](http://www.opensource.org/licenses/bsd-license.php)


--------------------------------------------------------------------------------
/sqlparse/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
 2 | #
 3 | # This module is part of python-sqlparse and is released under
 4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php.
 5 | 
 6 | """Parse SQL statements."""
 7 | from __future__ import absolute_import
 8 | 
 9 | __version__ = '0.1.3'
10 | 
11 | 
12 | class SQLParseError(Exception):
13 |     """Base class for exceptions in this module."""
14 | 
15 | 
16 | # Setup namespace
17 | from . import engine
18 | from . import filters
19 | from . import formatter
20 | 
21 | 
22 | def parse(sql):
23 |     """Parse sql and return a list of statements.
24 | 
25 |     *sql* is a single string containting one or more SQL statements.
26 | 
27 |     Returns a tuple of :class:`~sqlparse.sql.Statement` instances.
28 |     """
29 |     stack = engine.FilterStack()
30 |     stack.full_analyze()
31 |     return tuple(stack.run(sql))
32 | 
33 | 
34 | def format(sql, **options):
35 |     """Format *sql* according to *options*.
36 | 
37 |     Available options are documented in :ref:`formatting`.
38 | 
39 |     Returns the formatted SQL statement as string.
40 |     """
41 |     stack = engine.FilterStack()
42 |     options = formatter.validate_options(options)
43 |     stack = formatter.build_filter_stack(stack, options)
44 |     stack.postprocess.append(filters.SerializerUnicode())
45 |     return ''.join(stack.run(sql))
46 | 
47 | 
48 | def split(sql):
49 |     """Split *sql* into single statements.
50 | 
51 |     Returns a list of strings.
52 |     """
53 |     stack = engine.FilterStack()
54 |     stack.split_statements = True
55 |     return [unicode(stmt) for stmt in stack.run(sql)]
56 | 
57 | 
58 | from .engine.filter import StatementFilter
59 | def split2(stream):
60 |     splitter = StatementFilter()
61 |     return list(splitter.process(None, stream))


--------------------------------------------------------------------------------
/sqlparse/engine/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
 2 | #
 3 | # This module is part of python-sqlparse and is released under
 4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php.
 5 | 
 6 | """filter"""
 7 | from __future__ import absolute_import
 8 | from .. import lexer
 9 | from . import grouping
10 | from .filter import StatementFilter
11 | 
12 | # XXX remove this when cleanup is complete
13 | Filter = object
14 | 
15 | 
16 | class FilterStack(object):
17 | 
18 |     def __init__(self):
19 |         self.preprocess = []
20 |         self.stmtprocess = []
21 |         self.postprocess = []
22 |         self.split_statements = False
23 |         self._grouping = False
24 | 
25 |     def _flatten(self, stream):
26 |         for token in stream:
27 |             if token.is_group():
28 |                 for t in self._flatten(token.tokens):
29 |                     yield t
30 |             else:
31 |                 yield token
32 | 
33 |     def enable_grouping(self):
34 |         self._grouping = True
35 | 
36 |     def full_analyze(self):
37 |         self.enable_grouping()
38 | 
39 |     def run(self, sql):
40 |         stream = lexer.tokenize(sql)
41 |         # Process token stream
42 |         if self.preprocess:
43 |             for filter_ in self.preprocess:
44 |                 stream = filter_.process(self, stream)
45 | 
46 |         if (self.stmtprocess or self.postprocess or self.split_statements
47 |             or self._grouping):
48 |             splitter = StatementFilter()
49 |             stream = splitter.process(self, stream)
50 | 
51 |         if self._grouping:
52 | 
53 |             def _group(stream):
54 |                 for stmt in stream:
55 |                     grouping.group(stmt)
56 |                     yield stmt
57 |             stream = _group(stream)
58 | 
59 |         if self.stmtprocess:
60 | 
61 |             def _run1(stream):
62 |                 ret = []
63 |                 for stmt in stream:
64 |                     for filter_ in self.stmtprocess:
65 |                         filter_.process(self, stmt)
66 |                     ret.append(stmt)
67 |                 return ret
68 |             stream = _run1(stream)
69 | 
70 |         if self.postprocess:
71 | 
72 |             def _run2(stream):
73 |                 for stmt in stream:
74 |                     stmt.tokens = list(self._flatten(stmt.tokens))
75 |                     for filter_ in self.postprocess:
76 |                         stmt = filter_.process(self, stmt)
77 |                     yield stmt
78 |             stream = _run2(stream)
79 | 
80 |         return stream
81 | 


--------------------------------------------------------------------------------
/sqlparse/engine/filter.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import absolute_import
  3 | from ..sql import Statement, Token
  4 | from .. import tokens as T
  5 | 
  6 | 
  7 | class TokenFilter(object):
  8 | 
  9 |     def __init__(self, **options):
 10 |         self.options = options
 11 | 
 12 |     def process(self, stack, stream):
 13 |         """Process token stream."""
 14 |         raise NotImplementedError
 15 | 
 16 | 
 17 | class StatementFilter(TokenFilter):
 18 | 
 19 |     def __init__(self):
 20 |         TokenFilter.__init__(self)
 21 |         self._in_declare = False
 22 |         self._in_dbldollar = False
 23 |         self._is_create = False
 24 |         self._begin_depth = 0
 25 | 
 26 |     def _reset(self):
 27 |         self._in_declare = False
 28 |         self._in_dbldollar = False
 29 |         self._is_create = False
 30 |         self._begin_depth = 0
 31 | 
 32 |     def _change_splitlevel(self, ttype, value):
 33 |         # PostgreSQL
 34 |         if (ttype == T.Name.Builtin
 35 |             and value.startswith('$') and value.endswith('$')):
 36 |             if self._in_dbldollar:
 37 |                 self._in_dbldollar = False
 38 |                 return -1
 39 |             else:
 40 |                 self._in_dbldollar = True
 41 |                 return 1
 42 |         elif self._in_dbldollar:
 43 |             return 0
 44 | 
 45 |         # ANSI
 46 |         if ttype not in T.Keyword:
 47 |             return 0
 48 | 
 49 |         unified = value.upper()
 50 | 
 51 |         if unified == 'DECLARE' and self._is_create:
 52 |             self._in_declare = True
 53 |             return 1
 54 | 
 55 |         if unified == 'BEGIN':
 56 |             self._begin_depth += 1
 57 |             if self._in_declare:  # FIXME(andi): This makes no sense.
 58 |                 return 0
 59 |             return 0
 60 | 
 61 |         if unified == 'END':
 62 |             # Should this respect a preceeding BEGIN?
 63 |             # In CASE ... WHEN ... END this results in a split level -1.
 64 |             self._begin_depth = max(0, self._begin_depth - 1)
 65 |             return -1
 66 | 
 67 |         if ttype is T.Keyword.DDL and unified.startswith('CREATE'):
 68 |             self._is_create = True
 69 |             return 0
 70 | 
 71 |         if (unified in ('IF', 'FOR')
 72 |             and self._is_create and self._begin_depth > 0):
 73 |             return 1
 74 | 
 75 |         # Default
 76 |         return 0
 77 | 
 78 |     def process(self, stack, stream):
 79 |         splitlevel = 0
 80 |         stmt = None
 81 |         consume_ws = False
 82 |         stmt_tokens = []
 83 |         for ttype, value in stream:
 84 |             # Before appending the token
 85 |             if (consume_ws and ttype is not T.Whitespace
 86 |                 and ttype is not T.Comment.Single):
 87 |                 consume_ws = False
 88 |                 stmt.tokens = stmt_tokens
 89 |                 yield stmt
 90 |                 self._reset()
 91 |                 stmt = None
 92 |                 splitlevel = 0
 93 |             if stmt is None:
 94 |                 stmt = Statement()
 95 |                 stmt_tokens = []
 96 |             splitlevel += self._change_splitlevel(ttype, value)
 97 |             # Append the token
 98 |             stmt_tokens.append(Token(ttype, value))
 99 |             # After appending the token
100 |             if (splitlevel <= 0 and ttype is T.Punctuation
101 |                 and value == ';'):
102 |                 consume_ws = True
103 |         if stmt is not None:
104 |             stmt.tokens = stmt_tokens
105 |             yield stmt
106 | 


--------------------------------------------------------------------------------
/sqlparse/engine/grouping.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import absolute_import
  3 | import itertools
  4 | 
  5 | from .. import sql
  6 | from .. import tokens as T
  7 | 
  8 | try:
  9 |     next
 10 | except NameError:  # Python < 2.6
 11 |     next = lambda i: i.next()
 12 | 
 13 | 
 14 | def _group_left_right(tlist, ttype, value, cls,
 15 |                       check_right=lambda t: True,
 16 |                       check_left=lambda t: True,
 17 |                       include_semicolon=False):
 18 |     [_group_left_right(sgroup, ttype, value, cls, check_right,
 19 |                        include_semicolon) for sgroup in tlist.get_sublists()
 20 |      if not isinstance(sgroup, cls)]
 21 |     idx = 0
 22 |     token = tlist.token_next_match(idx, ttype, value)
 23 |     while token:
 24 |         right = tlist.token_next(tlist.token_index(token))
 25 |         left = tlist.token_prev(tlist.token_index(token))
 26 |         if right is None or not check_right(right):
 27 |             token = tlist.token_next_match(tlist.token_index(token) + 1,
 28 |                                            ttype, value)
 29 |         elif left is None or not check_right(left):
 30 |             token = tlist.token_next_match(tlist.token_index(token) + 1,
 31 |                                            ttype, value)
 32 |         else:
 33 |             if include_semicolon:
 34 |                 sright = tlist.token_next_match(tlist.token_index(right),
 35 |                                                 T.Punctuation, ';')
 36 |                 if sright is not None:
 37 |                     # only overwrite "right" if a semicolon is actually
 38 |                     # present.
 39 |                     right = sright
 40 |             tokens = tlist.tokens_between(left, right)[1:]
 41 |             if not isinstance(left, cls):
 42 |                 new = cls([left])
 43 |                 new_idx = tlist.token_index(left)
 44 |                 tlist.tokens.remove(left)
 45 |                 tlist.tokens.insert(new_idx, new)
 46 |                 left = new
 47 |             left.tokens.extend(tokens)
 48 |             for t in tokens:
 49 |                 tlist.tokens.remove(t)
 50 |             token = tlist.token_next_match(tlist.token_index(left) + 1,
 51 |                                            ttype, value)
 52 | 
 53 | 
 54 | def _group_matching(tlist, start_ttype, start_value, end_ttype, end_value,
 55 |                     cls, include_semicolon=False, recurse=False):
 56 |     def _find_matching(i, tl, stt, sva, ett, eva):
 57 |         depth = 1
 58 |         for t in tl.tokens[i:]:
 59 |             if t.match(stt, sva):
 60 |                 depth += 1
 61 |             elif t.match(ett, eva):
 62 |                 depth -= 1
 63 |                 if depth == 1:
 64 |                     return t
 65 |         return None
 66 |     [_group_matching(sgroup, start_ttype, start_value, end_ttype, end_value,
 67 |                      cls, include_semicolon) for sgroup in tlist.get_sublists()
 68 |      if recurse]
 69 |     if isinstance(tlist, cls):
 70 |         idx = 1
 71 |     else:
 72 |         idx = 0
 73 |     token = tlist.token_next_match(idx, start_ttype, start_value)
 74 |     while token:
 75 |         tidx = tlist.token_index(token)
 76 |         end = _find_matching(tidx, tlist, start_ttype, start_value,
 77 |                              end_ttype, end_value)
 78 |         if end is None:
 79 |             idx = tidx + 1
 80 |         else:
 81 |             if include_semicolon:
 82 |                 next_ = tlist.token_next(tlist.token_index(end))
 83 |                 if next_ and next_.match(T.Punctuation, ';'):
 84 |                     end = next_
 85 |             group = tlist.group_tokens(cls, tlist.tokens_between(token, end))
 86 |             _group_matching(group, start_ttype, start_value,
 87 |                             end_ttype, end_value, cls, include_semicolon)
 88 |             idx = tlist.token_index(group) + 1
 89 |         token = tlist.token_next_match(idx, start_ttype, start_value)
 90 | 
 91 | 
 92 | def group_if(tlist):
 93 |     _group_matching(tlist, T.Keyword, 'IF', T.Keyword, 'END IF', sql.If, True)
 94 | 
 95 | 
 96 | def group_for(tlist):
 97 |     _group_matching(tlist, T.Keyword, 'FOR', T.Keyword, 'END LOOP',
 98 |                     sql.For, True)
 99 | 
100 | 
101 | def group_as(tlist):
102 | 
103 |     def _right_valid(token):
104 |         # Currently limited to DML/DDL. Maybe additional more non SQL reserved
105 |         # keywords should appear here (see issue8).
106 |         return not token.ttype in (T.DML, T.DDL)
107 |     _group_left_right(tlist, T.Keyword, 'AS', sql.Identifier,
108 |                       check_right=_right_valid)
109 | 
110 | 
111 | def group_assignment(tlist):
112 |     _group_left_right(tlist, T.Assignment, ':=', sql.Assignment,
113 |                       include_semicolon=True)
114 | 
115 | 
116 | def group_comparison(tlist):
117 | 
118 |     def _parts_valid(token):
119 |         return (token.ttype in (T.String.Symbol, T.Name, T.Number,
120 |                                 T.Number.Integer, T.Literal,
121 |                                 T.Literal.Number.Integer)
122 |                 or isinstance(token, (sql.Identifier,)))
123 |     _group_left_right(tlist, T.Operator.Comparison, None, sql.Comparison,
124 |                       check_left=_parts_valid, check_right=_parts_valid)
125 | 
126 | 
127 | def group_case(tlist):
128 |     _group_matching(tlist, T.Keyword, 'CASE', T.Keyword, 'END', sql.Case,
129 |                     include_semicolon=True, recurse=True)
130 | 
131 | 
132 | def group_identifier(tlist):
133 |     def _consume_cycle(tl, i):
134 |         x = itertools.cycle((
135 |             lambda y: (y.match(T.Punctuation, '.')
136 |                        or y.ttype is T.Operator),
137 |             lambda y: (y.ttype in (T.String.Symbol,
138 |                                    T.Name,
139 |                                    T.Wildcard,
140 |                                    T.Literal.Number.Integer))))
141 |         for t in tl.tokens[i:]:
142 |             if next(x)(t):
143 |                 yield t
144 |             else:
145 |                 raise StopIteration
146 | 
147 |     def _next_token(tl, i):
148 |         # chooses the next token. if two tokens are found then the
149 |         # first is returned.
150 |         t1 = tl.token_next_by_type(i, (T.String.Symbol, T.Name))
151 |         t2 = tl.token_next_by_instance(i, sql.Function)
152 |         if t1 and t2:
153 |             i1 = tl.token_index(t1)
154 |             i2 = tl.token_index(t2)
155 |             if i1 > i2:
156 |                 return t2
157 |             else:
158 |                 return t1
159 |         elif t1:
160 |             return t1
161 |         else:
162 |             return t2
163 | 
164 |     # bottom up approach: group subgroups first
165 |     [group_identifier(sgroup) for sgroup in tlist.get_sublists()
166 |      if not isinstance(sgroup, sql.Identifier)]
167 | 
168 |     # real processing
169 |     idx = 0
170 |     token = _next_token(tlist, idx)
171 |     while token:
172 |         identifier_tokens = [token] + list(
173 |             _consume_cycle(tlist,
174 |                            tlist.token_index(token) + 1))
175 |         if not (len(identifier_tokens) == 1
176 |                 and isinstance(identifier_tokens[0], sql.Function)):
177 |             group = tlist.group_tokens(sql.Identifier, identifier_tokens)
178 |             idx = tlist.token_index(group) + 1
179 |         else:
180 |             idx += 1
181 |         token = _next_token(tlist, idx)
182 | 
183 | 
184 | def group_identifier_list(tlist):
185 |     [group_identifier_list(sgroup) for sgroup in tlist.get_sublists()
186 |      if not isinstance(sgroup, sql.IdentifierList)]
187 |     idx = 0
188 |     # Allowed list items
189 |     fend1_funcs = [lambda t: isinstance(t, (sql.Identifier, sql.Function,
190 |                                             sql.Case)),
191 |                    lambda t: t.is_whitespace(),
192 |                    lambda t: t.ttype == T.Name,
193 |                    lambda t: t.ttype == T.Wildcard,
194 |                    lambda t: t.match(T.Keyword, 'null'),
195 |                    lambda t: t.ttype == T.Number.Integer,
196 |                    lambda t: t.ttype == T.String.Single,
197 |                    lambda t: isinstance(t, sql.Comparison),
198 |                    ]
199 |     tcomma = tlist.token_next_match(idx, T.Punctuation, ',')
200 |     start = None
201 |     while tcomma is not None:
202 |         before = tlist.token_prev(tcomma)
203 |         after = tlist.token_next(tcomma)
204 |         # Check if the tokens around tcomma belong to a list
205 |         bpassed = apassed = False
206 |         for func in fend1_funcs:
207 |             if before is not None and func(before):
208 |                 bpassed = True
209 |             if after is not None and func(after):
210 |                 apassed = True
211 |         if not bpassed or not apassed:
212 |             # Something's wrong here, skip ahead to next ","
213 |             start = None
214 |             tcomma = tlist.token_next_match(tlist.token_index(tcomma) + 1,
215 |                                             T.Punctuation, ',')
216 |         else:
217 |             if start is None:
218 |                 start = before
219 |             next_ = tlist.token_next(after)
220 |             if next_ is None or not next_.match(T.Punctuation, ','):
221 |                 # Reached the end of the list
222 |                 tokens = tlist.tokens_between(start, after)
223 |                 group = tlist.group_tokens(sql.IdentifierList, tokens)
224 |                 start = None
225 |                 tcomma = tlist.token_next_match(tlist.token_index(group) + 1,
226 |                                                 T.Punctuation, ',')
227 |             else:
228 |                 tcomma = next_
229 | 
230 | 
231 | def group_parenthesis(tlist):
232 |     _group_matching(tlist, T.Punctuation, '(', T.Punctuation, ')',
233 |                     sql.Parenthesis)
234 | 
235 | 
236 | def group_comments(tlist):
237 |     [group_comments(sgroup) for sgroup in tlist.get_sublists()
238 |      if not isinstance(sgroup, sql.Comment)]
239 |     idx = 0
240 |     token = tlist.token_next_by_type(idx, T.Comment)
241 |     while token:
242 |         tidx = tlist.token_index(token)
243 |         end = tlist.token_not_matching(tidx + 1,
244 |                                        [lambda t: t.ttype in T.Comment,
245 |                                         lambda t: t.is_whitespace()])
246 |         if end is None:
247 |             idx = tidx + 1
248 |         else:
249 |             eidx = tlist.token_index(end)
250 |             grp_tokens = tlist.tokens_between(token,
251 |                                               tlist.token_prev(eidx, False))
252 |             group = tlist.group_tokens(sql.Comment, grp_tokens)
253 |             idx = tlist.token_index(group)
254 |         token = tlist.token_next_by_type(idx, T.Comment)
255 | 
256 | 
257 | def group_where(tlist):
258 |     [group_where(sgroup) for sgroup in tlist.get_sublists()
259 |      if not isinstance(sgroup, sql.Where)]
260 |     idx = 0
261 |     token = tlist.token_next_match(idx, T.Keyword, 'WHERE')
262 |     stopwords = ('ORDER', 'GROUP', 'LIMIT', 'UNION')
263 |     while token:
264 |         tidx = tlist.token_index(token)
265 |         end = tlist.token_next_match(tidx + 1, T.Keyword, stopwords)
266 |         if end is None:
267 |             end = tlist._groupable_tokens[-1]
268 |         else:
269 |             end = tlist.tokens[tlist.token_index(end) - 1]
270 |         group = tlist.group_tokens(sql.Where,
271 |                                    tlist.tokens_between(token, end),
272 |                                    ignore_ws=True)
273 |         idx = tlist.token_index(group)
274 |         token = tlist.token_next_match(idx, T.Keyword, 'WHERE')
275 | 
276 | 
277 | def group_aliased(tlist):
278 |     clss = (sql.Identifier, sql.Function, sql.Case)
279 |     [group_aliased(sgroup) for sgroup in tlist.get_sublists()
280 |      if not isinstance(sgroup, clss)]
281 |     idx = 0
282 |     token = tlist.token_next_by_instance(idx, clss)
283 |     while token:
284 |         next_ = tlist.token_next(tlist.token_index(token))
285 |         if next_ is not None and isinstance(next_, clss):
286 |             grp = tlist.tokens_between(token, next_)[1:]
287 |             token.tokens.extend(grp)
288 |             for t in grp:
289 |                 tlist.tokens.remove(t)
290 |         idx = tlist.token_index(token) + 1
291 |         token = tlist.token_next_by_instance(idx, clss)
292 | 
293 | 
294 | def group_typecasts(tlist):
295 |     _group_left_right(tlist, T.Punctuation, '::', sql.Identifier)
296 | 
297 | 
298 | def group_functions(tlist):
299 |     [group_functions(sgroup) for sgroup in tlist.get_sublists()
300 |      if not isinstance(sgroup, sql.Function)]
301 |     idx = 0
302 |     token = tlist.token_next_by_type(idx, T.Name)
303 |     while token:
304 |         next_ = tlist.token_next(token)
305 |         if not isinstance(next_, sql.Parenthesis):
306 |             idx = tlist.token_index(token) + 1
307 |         else:
308 |             func = tlist.group_tokens(sql.Function,
309 |                                       tlist.tokens_between(token, next_))
310 |             idx = tlist.token_index(func) + 1
311 |         token = tlist.token_next_by_type(idx, T.Name)
312 | 
313 | 
314 | def group(tlist):
315 |     for func in [group_parenthesis,
316 |                  group_functions,
317 |                  group_comments,
318 |                  group_where,
319 |                  group_case,
320 |                  group_identifier,
321 |                  group_typecasts,
322 |                  group_as,
323 |                  group_aliased,
324 |                  group_assignment,
325 |                  group_comparison,
326 |                  group_identifier_list,
327 |                  group_if,
328 |                  group_for]:
329 |         func(tlist)
330 | 


--------------------------------------------------------------------------------
/sqlparse/filters.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import absolute_import
  3 | from __future__ import unicode_literals
  4 | 
  5 | import re
  6 | 
  7 | from os.path import abspath, join
  8 | 
  9 | from . import sql
 10 | from . import tokens as T
 11 | from .engine import FilterStack
 12 | from .tokens import (
 13 |     Comment, Keyword, Name,
 14 |     Punctuation, String, Whitespace,
 15 | )
 16 | 
 17 | 
 18 | class Filter(object):
 19 | 
 20 |     def process(self, *args):
 21 |         raise NotImplementedError
 22 | 
 23 | 
 24 | class TokenFilter(Filter):
 25 | 
 26 |     def process(self, stack, stream):
 27 |         raise NotImplementedError
 28 | 
 29 | 
 30 | # --------------------------
 31 | # token process
 32 | 
 33 | class _CaseFilter(TokenFilter):
 34 | 
 35 |     ttype = None
 36 | 
 37 |     def __init__(self, case=None):
 38 |         if case is None:
 39 |             case = 'upper'
 40 |         assert case in ['lower', 'upper', 'capitalize']
 41 |         self.convert = getattr(str, case)
 42 | 
 43 |     def process(self, stack, stream):
 44 |         for ttype, value in stream:
 45 |             if ttype in self.ttype:
 46 |                 value = self.convert(value)
 47 |             yield ttype, value
 48 | 
 49 | 
 50 | class KeywordCaseFilter(_CaseFilter):
 51 |     ttype = T.Keyword
 52 | 
 53 | 
 54 | class IdentifierCaseFilter(_CaseFilter):
 55 |     ttype = (T.Name, T.String.Symbol)
 56 | 
 57 |     def process(self, stack, stream):
 58 |         for ttype, value in stream:
 59 |             if ttype in self.ttype and not value.strip()[0] == '"':
 60 |                 value = self.convert(value)
 61 |             yield ttype, value
 62 | 
 63 | 
 64 | class GetComments(Filter):
 65 |     """Get the comments from a stack"""
 66 |     def process(self, stack, stream):
 67 |         for token_type, value in stream:
 68 |             if token_type in Comment:
 69 |                 yield token_type, value
 70 | 
 71 | 
 72 | class StripComments(Filter):
 73 |     """Strip the comments from a stack"""
 74 |     def process(self, stack, stream):
 75 |         for token_type, value in stream:
 76 |             if token_type not in Comment:
 77 |                 yield token_type, value
 78 | 
 79 | 
 80 | class IncludeStatement(Filter):
 81 |     """Filter that enable a INCLUDE statement"""
 82 | 
 83 |     def __init__(self, dirpath=".", maxRecursive=10):
 84 |         self.dirpath = abspath(dirpath)
 85 |         self.maxRecursive = maxRecursive
 86 | 
 87 |         self.detected = False
 88 | 
 89 |     def process(self, stack, stream):
 90 |         # Run over all tokens in the stream
 91 |         for token_type, value in stream:
 92 |             # INCLUDE statement found, set detected mode
 93 |             if token_type in Name and value.upper() == 'INCLUDE':
 94 |                 self.detected = True
 95 |                 continue
 96 | 
 97 |             # INCLUDE statement was found, parse it
 98 |             elif self.detected:
 99 |                 # Omit whitespaces
100 |                 if token_type in Whitespace:
101 |                     pass
102 | 
103 |                 # Get path of file to include
104 |                 path = None
105 | 
106 |                 if token_type in String.Symbol:
107 | #                if token_type in tokens.String.Symbol:
108 |                     path = join(self.dirpath, value[1:-1])
109 | 
110 |                 # Include file if path was found
111 |                 if path:
112 |                     try:
113 |                         f = open(path)
114 |                         raw_sql = f.read()
115 |                         f.close()
116 |                     except IOError as err:
117 |                         yield Comment, u'-- IOError: %s\n' % err
118 | 
119 |                     else:
120 |                         # Create new FilterStack to parse readed file
121 |                         # and add all its tokens to the main stack recursively
122 |                         # [ToDo] Add maximum recursive iteration value
123 |                         stack = FilterStack()
124 |                         stack.preprocess.append(IncludeStatement(self.dirpath))
125 | 
126 |                         for tv in stack.run(raw_sql):
127 |                             yield tv
128 | 
129 |                     # Set normal mode
130 |                     self.detected = False
131 | 
132 |                 # Don't include any token while in detected mode
133 |                 continue
134 | 
135 |             # Normal token
136 |             yield token_type, value
137 | 
138 | 
139 | # ----------------------
140 | # statement process
141 | 
142 | class StripCommentsFilter(Filter):
143 | 
144 |     def _get_next_comment(self, tlist):
145 |         # TODO(andi) Comment types should be unified, see related issue38
146 |         token = tlist.token_next_by_instance(0, sql.Comment)
147 |         if token is None:
148 |             token = tlist.token_next_by_type(0, T.Comment)
149 |         return token
150 | 
151 |     def _process(self, tlist):
152 |         token = self._get_next_comment(tlist)
153 |         while token:
154 |             tidx = tlist.token_index(token)
155 |             prev = tlist.token_prev(tidx, False)
156 |             next_ = tlist.token_next(tidx, False)
157 |             # Replace by whitespace if prev and next exist and if they're not
158 |             # whitespaces. This doesn't apply if prev or next is a paranthesis.
159 |             if (prev is not None and next_ is not None
160 |                 and not prev.is_whitespace() and not next_.is_whitespace()
161 |                 and not (prev.match(T.Punctuation, '(')
162 |                          or next_.match(T.Punctuation, ')'))):
163 |                 tlist.tokens[tidx] = sql.Token(T.Whitespace, ' ')
164 |             else:
165 |                 tlist.tokens.pop(tidx)
166 |             token = self._get_next_comment(tlist)
167 | 
168 |     def process(self, stack, stmt):
169 |         [self.process(stack, sgroup) for sgroup in stmt.get_sublists()]
170 |         self._process(stmt)
171 | 
172 | 
173 | class StripWhitespaceFilter(Filter):
174 | 
175 |     def _stripws(self, tlist):
176 |         func_name = '_stripws_%s' % tlist.__class__.__name__.lower()
177 |         func = getattr(self, func_name, self._stripws_default)
178 |         func(tlist)
179 | 
180 |     def _stripws_default(self, tlist):
181 |         last_was_ws = False
182 |         for token in tlist.tokens:
183 |             if token.is_whitespace():
184 |                 if last_was_ws:
185 |                     token.value = ''
186 |                 else:
187 |                     token.value = ' '
188 |             last_was_ws = token.is_whitespace()
189 | 
190 |     def _stripws_parenthesis(self, tlist):
191 |         if tlist.tokens[1].is_whitespace():
192 |             tlist.tokens.pop(1)
193 |         if tlist.tokens[-2].is_whitespace():
194 |             tlist.tokens.pop(-2)
195 |         self._stripws_default(tlist)
196 | 
197 |     def process(self, stack, stmt):
198 |         [self.process(stack, sgroup) for sgroup in stmt.get_sublists()]
199 |         self._stripws(stmt)
200 |         if stmt.tokens[-1].is_whitespace():
201 |             stmt.tokens.pop(-1)
202 | 
203 | 
204 | class ReindentFilter(Filter):
205 | 
206 |     def __init__(self, width=2, char=' ', line_width=None):
207 |         self.width = width
208 |         self.char = char
209 |         self.indent = 0
210 |         self.offset = 0
211 |         self.line_width = line_width
212 |         self._curr_stmt = None
213 |         self._last_stmt = None
214 | 
215 |     def _get_offset(self, token):
216 |         all_ = list(self._curr_stmt.flatten())
217 |         idx = all_.index(token)
218 |         raw = ''.join(str(x) for x in all_[:idx + 1])
219 |         line = raw.splitlines()[-1]
220 |         # Now take current offset into account and return relative offset.
221 |         full_offset = len(line) - len(self.char * (self.width * self.indent))
222 |         return full_offset - self.offset
223 | 
224 |     def nl(self):
225 |         # TODO: newline character should be configurable
226 |         ws = '\n' + (self.char * ((self.indent * self.width) + self.offset))
227 |         return sql.Token(T.Whitespace, ws)
228 | 
229 |     def _split_kwds(self, tlist):
230 |         split_words = ('FROM', 'JOIN$', 'AND', 'OR',
231 |                        'GROUP', 'ORDER', 'UNION', 'VALUES',
232 |                        'SET', 'BETWEEN')
233 | 
234 |         def _next_token(i):
235 |             t = tlist.token_next_match(i, T.Keyword, split_words,
236 |                                        regex=True)
237 |             if t and t.value.upper() == 'BETWEEN':
238 |                 t = _next_token(tlist.token_index(t) + 1)
239 |                 if t and t.value.upper() == 'AND':
240 |                     t = _next_token(tlist.token_index(t) + 1)
241 |             return t
242 | 
243 |         idx = 0
244 |         token = _next_token(idx)
245 |         while token:
246 |             prev = tlist.token_prev(tlist.token_index(token), False)
247 |             offset = 1
248 |             if prev and prev.is_whitespace():
249 |                 tlist.tokens.pop(tlist.token_index(prev))
250 |                 offset += 1
251 |             if (prev
252 |                 and isinstance(prev, sql.Comment)
253 |                 and (str(prev).endswith('\n')
254 |                      or str(prev).endswith('\r'))):
255 |                 nl = tlist.token_next(token)
256 |             else:
257 |                 nl = self.nl()
258 |                 tlist.insert_before(token, nl)
259 |             token = _next_token(tlist.token_index(nl) + offset)
260 | 
261 |     def _split_statements(self, tlist):
262 |         idx = 0
263 |         token = tlist.token_next_by_type(idx, (T.Keyword.DDL, T.Keyword.DML))
264 |         while token:
265 |             prev = tlist.token_prev(tlist.token_index(token), False)
266 |             if prev and prev.is_whitespace():
267 |                 tlist.tokens.pop(tlist.token_index(prev))
268 |             # only break if it's not the first token
269 |             if prev:
270 |                 nl = self.nl()
271 |                 tlist.insert_before(token, nl)
272 |             token = tlist.token_next_by_type(tlist.token_index(token) + 1,
273 |                                              (T.Keyword.DDL, T.Keyword.DML))
274 | 
275 |     def _process(self, tlist):
276 |         func_name = '_process_%s' % tlist.__class__.__name__.lower()
277 |         func = getattr(self, func_name, self._process_default)
278 |         func(tlist)
279 | 
280 |     def _process_where(self, tlist):
281 |         token = tlist.token_next_match(0, T.Keyword, 'WHERE')
282 |         tlist.insert_before(token, self.nl())
283 |         self.indent += 1
284 |         self._process_default(tlist)
285 |         self.indent -= 1
286 | 
287 |     def _process_parenthesis(self, tlist):
288 |         first = tlist.token_next(0)
289 |         indented = False
290 |         if first and first.ttype in (T.Keyword.DML, T.Keyword.DDL):
291 |             self.indent += 1
292 |             tlist.tokens.insert(0, self.nl())
293 |             indented = True
294 |         num_offset = self._get_offset(tlist.token_next_match(0,
295 |                                                         T.Punctuation, '('))
296 |         self.offset += num_offset
297 |         self._process_default(tlist, stmts=not indented)
298 |         if indented:
299 |             self.indent -= 1
300 |         self.offset -= num_offset
301 | 
302 |     def _process_identifierlist(self, tlist):
303 |         identifiers = tlist.get_identifiers()
304 |         if len(identifiers) > 1 and not tlist.within(sql.Function):
305 |             first = list(identifiers[0].flatten())[0]
306 |             num_offset = self._get_offset(first) - len(first.value)
307 |             self.offset += num_offset
308 |             for token in identifiers[1:]:
309 |                 tlist.insert_before(token, self.nl())
310 |             self.offset -= num_offset
311 |         self._process_default(tlist)
312 | 
313 |     def _process_case(self, tlist):
314 |         is_first = True
315 |         num_offset = None
316 |         case = tlist.tokens[0]
317 |         outer_offset = self._get_offset(case) - len(case.value)
318 |         self.offset += outer_offset
319 |         for cond, value in tlist.get_cases():
320 |             if is_first:
321 |                 tcond = list(cond[0].flatten())[0]
322 |                 is_first = False
323 |                 num_offset = self._get_offset(tcond) - len(tcond.value)
324 |                 self.offset += num_offset
325 |                 continue
326 |             if cond is None:
327 |                 token = value[0]
328 |             else:
329 |                 token = cond[0]
330 |             tlist.insert_before(token, self.nl())
331 |         # Line breaks on group level are done. Now let's add an offset of
332 |         # 5 (=length of "when", "then", "else") and process subgroups.
333 |         self.offset += 5
334 |         self._process_default(tlist)
335 |         self.offset -= 5
336 |         if num_offset is not None:
337 |             self.offset -= num_offset
338 |         end = tlist.token_next_match(0, T.Keyword, 'END')
339 |         tlist.insert_before(end, self.nl())
340 |         self.offset -= outer_offset
341 | 
342 |     def _process_default(self, tlist, stmts=True, kwds=True):
343 |         if stmts:
344 |             self._split_statements(tlist)
345 |         if kwds:
346 |             self._split_kwds(tlist)
347 |         [self._process(sgroup) for sgroup in tlist.get_sublists()]
348 | 
349 |     def process(self, stack, stmt):
350 |         if isinstance(stmt, sql.Statement):
351 |             self._curr_stmt = stmt
352 |         self._process(stmt)
353 |         if isinstance(stmt, sql.Statement):
354 |             if self._last_stmt is not None:
355 |                 if self._last_stmt.to_unicode().endswith('\n'):
356 |                     nl = '\n'
357 |                 else:
358 |                     nl = '\n\n'
359 |                 stmt.tokens.insert(0,
360 |                     sql.Token(T.Whitespace, nl))
361 |             if self._last_stmt != stmt:
362 |                 self._last_stmt = stmt
363 | 
364 | 
365 | # FIXME: Doesn't work ;)
366 | class RightMarginFilter(Filter):
367 | 
368 |     keep_together = (
369 | #        sql.TypeCast, sql.Identifier, sql.Alias,
370 |     )
371 | 
372 |     def __init__(self, width=79):
373 |         self.width = width
374 |         self.line = ''
375 | 
376 |     def _process(self, stack, group, stream):
377 |         for token in stream:
378 |             if token.is_whitespace() and '\n' in token.value:
379 |                 if token.value.endswith('\n'):
380 |                     self.line = ''
381 |                 else:
382 |                     self.line = token.value.splitlines()[-1]
383 |             elif (token.is_group()
384 |                   and not token.__class__ in self.keep_together):
385 |                 token.tokens = self._process(stack, token, token.tokens)
386 |             else:
387 |                 val = token.to_unicode()
388 |                 if len(self.line) + len(val) > self.width:
389 |                     match = re.search('^ +', self.line)
390 |                     if match is not None:
391 |                         indent = match.group()
392 |                     else:
393 |                         indent = ''
394 |                     yield sql.Token(T.Whitespace, '\n%s' % indent)
395 |                     self.line = indent
396 |                 self.line += val
397 |             yield token
398 | 
399 |     def process(self, stack, group):
400 |         return
401 |         group.tokens = self._process(stack, group, group.tokens)
402 | 
403 | 
404 | class ColumnsSelect(Filter):
405 |     """Get the columns names of a SELECT query"""
406 |     def process(self, stack, stream):
407 |         mode = 0
408 |         oldValue = ""
409 |         parenthesis = 0
410 | 
411 |         for token_type, value in stream:
412 |             # Ignore comments
413 |             if token_type in Comment:
414 |                 continue
415 | 
416 |             # We have not detected a SELECT statement
417 |             if mode == 0:
418 |                 if token_type in Keyword and value == 'SELECT':
419 |                     mode = 1
420 | 
421 |             # We have detected a SELECT statement
422 |             elif mode == 1:
423 |                 if value == 'FROM':
424 |                     if oldValue:
425 |                         yield oldValue
426 | 
427 |                     mode = 3    # Columns have been checked
428 | 
429 |                 elif value == 'AS':
430 |                     oldValue = ""
431 |                     mode = 2
432 | 
433 |                 elif (token_type == Punctuation
434 |                       and value == ',' and not parenthesis):
435 |                     if oldValue:
436 |                         yield oldValue
437 |                     oldValue = ""
438 | 
439 |                 elif token_type not in Whitespace:
440 |                     if value == '(':
441 |                         parenthesis += 1
442 |                     elif value == ')':
443 |                         parenthesis -= 1
444 | 
445 |                     oldValue += value
446 | 
447 |             # We are processing an AS keyword
448 |             elif mode == 2:
449 |                 # We check also for Keywords because a bug in SQLParse
450 |                 if token_type == Name or token_type == Keyword:
451 |                     yield value
452 |                     mode = 1
453 | 
454 | 
455 | # ---------------------------
456 | # postprocess
457 | 
458 | class SerializerUnicode(Filter):
459 | 
460 |     def process(self, stack, stmt):
461 |         raw = stmt.to_unicode()
462 |         add_nl = raw.endswith('\n')
463 |         res = '\n'.join(line.rstrip() for line in raw.splitlines())
464 |         if add_nl:
465 |             res += '\n'
466 |         return res
467 | 
468 | def Tokens2Unicode(stream):
469 |     result = ""
470 | 
471 |     for _, value in stream:
472 |         result += str(value)
473 | 
474 |     return result
475 | 
476 | 
477 | class OutputPythonFilter(Filter):
478 | 
479 |     def __init__(self, varname='sql'):
480 |         self.varname = varname
481 |         self.cnt = 0
482 | 
483 |     def _process(self, stream, varname, count, has_nl):
484 |         if count > 1:
485 |             yield sql.Token(T.Whitespace, '\n')
486 |         yield sql.Token(T.Name, varname)
487 |         yield sql.Token(T.Whitespace, ' ')
488 |         yield sql.Token(T.Operator, '=')
489 |         yield sql.Token(T.Whitespace, ' ')
490 |         if has_nl:
491 |             yield sql.Token(T.Operator, '(')
492 |         yield sql.Token(T.Text, "'")
493 |         cnt = 0
494 |         for token in stream:
495 |             cnt += 1
496 |             if token.is_whitespace() and '\n' in token.value:
497 |                 if cnt == 1:
498 |                     continue
499 |                 after_lb = token.value.split('\n', 1)[1]
500 |                 yield sql.Token(T.Text, " '")
501 |                 yield sql.Token(T.Whitespace, '\n')
502 |                 for i in range(len(varname) + 4):
503 |                     yield sql.Token(T.Whitespace, ' ')
504 |                 yield sql.Token(T.Text, "'")
505 |                 if after_lb:  # it's the indendation
506 |                     yield sql.Token(T.Whitespace, after_lb)
507 |                 continue
508 |             elif token.value and "'" in token.value:
509 |                 token.value = token.value.replace("'", "\\'")
510 |             yield sql.Token(T.Text, token.value or '')
511 |         yield sql.Token(T.Text, "'")
512 |         if has_nl:
513 |             yield sql.Token(T.Operator, ')')
514 | 
515 |     def process(self, stack, stmt):
516 |         self.cnt += 1
517 |         if self.cnt > 1:
518 |             varname = '%s%d' % (self.varname, self.cnt)
519 |         else:
520 |             varname = self.varname
521 |         has_nl = len(stmt.to_unicode().strip().splitlines()) > 1
522 |         stmt.tokens = self._process(stmt.tokens, varname, self.cnt, has_nl)
523 |         return stmt
524 | 
525 | 
526 | class OutputPHPFilter(Filter):
527 | 
528 |     def __init__(self, varname='sql'):
529 |         self.varname = '$%s' % varname
530 |         self.count = 0
531 | 
532 |     def _process(self, stream, varname):
533 |         if self.count > 1:
534 |             yield sql.Token(T.Whitespace, '\n')
535 |         yield sql.Token(T.Name, varname)
536 |         yield sql.Token(T.Whitespace, ' ')
537 |         yield sql.Token(T.Operator, '=')
538 |         yield sql.Token(T.Whitespace, ' ')
539 |         yield sql.Token(T.Text, '"')
540 |         for token in stream:
541 |             if token.is_whitespace() and '\n' in token.value:
542 |                 after_lb = token.value.split('\n', 1)[1]
543 |                 yield sql.Token(T.Text, ' "')
544 |                 yield sql.Token(T.Operator, ';')
545 |                 yield sql.Token(T.Whitespace, '\n')
546 |                 yield sql.Token(T.Name, varname)
547 |                 yield sql.Token(T.Whitespace, ' ')
548 |                 yield sql.Token(T.Punctuation, '.')
549 |                 yield sql.Token(T.Operator, '=')
550 |                 yield sql.Token(T.Whitespace, ' ')
551 |                 yield sql.Token(T.Text, '"')
552 |                 if after_lb:
553 |                     yield sql.Token(T.Text, after_lb)
554 |                 continue
555 |             elif '"' in token.value:
556 |                 token.value = token.value.replace('"', '\\"')
557 |             yield sql.Token(T.Text, token.value)
558 |         yield sql.Token(T.Text, '"')
559 |         yield sql.Token(T.Punctuation, ';')
560 | 
561 |     def process(self, stack, stmt):
562 |         self.count += 1
563 |         if self.count > 1:
564 |             varname = '%s%d' % (self.varname, self.count)
565 |         else:
566 |             varname = self.varname
567 |         stmt.tokens = tuple(self._process(stmt.tokens, varname))
568 |         return stmt
569 | 
570 | 
571 | class Limit(Filter):
572 |     """Get the LIMIT of a query.
573 | 
574 |     If not defined, return -1 (SQL specification for no LIMIT query)
575 |     """
576 |     def process(self, stack, stream):
577 |         index = 7
578 |         stream = list(stream)
579 |         stream.reverse()
580 | 
581 |         # Run over all tokens in the stream from the end
582 |         for token_type, value in stream:
583 |             index -= 1
584 | 
585 | #            if index and token_type in Keyword:
586 |             if index and token_type in Keyword and value == 'LIMIT':
587 |                 return stream[4 - index][1]
588 | 
589 |         return -1


--------------------------------------------------------------------------------
/sqlparse/formatter.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
  2 | #
  3 | # This module is part of python-sqlparse and is released under
  4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php.
  5 | 
  6 | """SQL formatter"""
  7 | from __future__ import absolute_import
  8 | from . import SQLParseError
  9 | from . import filters
 10 | 
 11 | 
 12 | def validate_options(options):
 13 |     """Validates options."""
 14 |     kwcase = options.get('keyword_case', None)
 15 |     if kwcase not in [None, 'upper', 'lower', 'capitalize']:
 16 |         raise SQLParseError('Invalid value for keyword_case: %r' % kwcase)
 17 | 
 18 |     idcase = options.get('identifier_case', None)
 19 |     if idcase not in [None, 'upper', 'lower', 'capitalize']:
 20 |         raise SQLParseError('Invalid value for identifier_case: %r' % idcase)
 21 | 
 22 |     ofrmt = options.get('output_format', None)
 23 |     if ofrmt not in [None, 'sql', 'python', 'php']:
 24 |         raise SQLParseError('Unknown output format: %r' % ofrmt)
 25 | 
 26 |     strip_comments = options.get('strip_comments', False)
 27 |     if strip_comments not in [True, False]:
 28 |         raise SQLParseError('Invalid value for strip_comments: %r'
 29 |                             % strip_comments)
 30 | 
 31 |     strip_ws = options.get('strip_whitespace', False)
 32 |     if strip_ws not in [True, False]:
 33 |         raise SQLParseError('Invalid value for strip_whitespace: %r'
 34 |                             % strip_ws)
 35 | 
 36 |     reindent = options.get('reindent', False)
 37 |     if reindent not in [True, False]:
 38 |         raise SQLParseError('Invalid value for reindent: %r'
 39 |                             % reindent)
 40 |     elif reindent:
 41 |         options['strip_whitespace'] = True
 42 |     indent_tabs = options.get('indent_tabs', False)
 43 |     if indent_tabs not in [True, False]:
 44 |         raise SQLParseError('Invalid value for indent_tabs: %r' % indent_tabs)
 45 |     elif indent_tabs:
 46 |         options['indent_char'] = '\t'
 47 |     else:
 48 |         options['indent_char'] = ' '
 49 |     indent_width = options.get('indent_width', 2)
 50 |     try:
 51 |         indent_width = int(indent_width)
 52 |     except (TypeError, ValueError):
 53 |         raise SQLParseError('indent_width requires an integer')
 54 |     if indent_width < 1:
 55 |         raise SQLParseError('indent_width requires an positive integer')
 56 |     options['indent_width'] = indent_width
 57 | 
 58 |     right_margin = options.get('right_margin', None)
 59 |     if right_margin is not None:
 60 |         try:
 61 |             right_margin = int(right_margin)
 62 |         except (TypeError, ValueError):
 63 |             raise SQLParseError('right_margin requires an integer')
 64 |         if right_margin < 10:
 65 |             raise SQLParseError('right_margin requires an integer > 10')
 66 |     options['right_margin'] = right_margin
 67 | 
 68 |     return options
 69 | 
 70 | 
 71 | def build_filter_stack(stack, options):
 72 |     """Setup and return a filter stack.
 73 | 
 74 |     Args:
 75 |       stack: :class:`~sqlparse.filters.FilterStack` instance
 76 |       options: Dictionary with options validated by validate_options.
 77 |     """
 78 |     # Token filter
 79 |     if options.get('keyword_case', None):
 80 |         stack.preprocess.append(
 81 |             filters.KeywordCaseFilter(options['keyword_case']))
 82 | 
 83 |     if options.get('identifier_case', None):
 84 |         stack.preprocess.append(
 85 |             filters.IdentifierCaseFilter(options['identifier_case']))
 86 | 
 87 |     # After grouping
 88 |     if options.get('strip_comments', False):
 89 |         stack.enable_grouping()
 90 |         stack.stmtprocess.append(filters.StripCommentsFilter())
 91 | 
 92 |     if (options.get('strip_whitespace', False)
 93 |         or options.get('reindent', False)):
 94 |         stack.enable_grouping()
 95 |         stack.stmtprocess.append(filters.StripWhitespaceFilter())
 96 | 
 97 |     if options.get('reindent', False):
 98 |         stack.enable_grouping()
 99 |         stack.stmtprocess.append(
100 |             filters.ReindentFilter(char=options['indent_char'],
101 |                                    width=options['indent_width']))
102 | 
103 |     if options.get('right_margin', False):
104 |         stack.enable_grouping()
105 |         stack.stmtprocess.append(
106 |             filters.RightMarginFilter(width=options['right_margin']))
107 | 
108 |     # Serializer
109 |     if options.get('output_format'):
110 |         frmt = options['output_format']
111 |         if frmt.lower() == 'php':
112 |             fltr = filters.OutputPHPFilter()
113 |         elif frmt.lower() == 'python':
114 |             fltr = filters.OutputPythonFilter()
115 |         else:
116 |             fltr = None
117 |         if fltr is not None:
118 |             stack.postprocess.append(fltr)
119 | 
120 |     return stack
121 | 


--------------------------------------------------------------------------------
/sqlparse/keywords.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from . import tokens
  3 | 
  4 | KEYWORDS = {
  5 |     'ABORT': tokens.Keyword,
  6 |     'ABS': tokens.Keyword,
  7 |     'ABSOLUTE': tokens.Keyword,
  8 |     'ACCESS': tokens.Keyword,
  9 |     'ADA': tokens.Keyword,
 10 |     'ADD': tokens.Keyword,
 11 |     'ADMIN': tokens.Keyword,
 12 |     'AFTER': tokens.Keyword,
 13 |     'AGGREGATE': tokens.Keyword,
 14 |     'ALIAS': tokens.Keyword,
 15 |     'ALL': tokens.Keyword,
 16 |     'ALLOCATE': tokens.Keyword,
 17 |     'ANALYSE': tokens.Keyword,
 18 |     'ANALYZE': tokens.Keyword,
 19 |     'ANY': tokens.Keyword,
 20 |     'ARE': tokens.Keyword,
 21 |     'ASC': tokens.Keyword,
 22 |     'ASENSITIVE': tokens.Keyword,
 23 |     'ASSERTION': tokens.Keyword,
 24 |     'ASSIGNMENT': tokens.Keyword,
 25 |     'ASYMMETRIC': tokens.Keyword,
 26 |     'AT': tokens.Keyword,
 27 |     'ATOMIC': tokens.Keyword,
 28 |     'AUTHORIZATION': tokens.Keyword,
 29 |     'AVG': tokens.Keyword,
 30 | 
 31 |     'BACKWARD': tokens.Keyword,
 32 |     'BEFORE': tokens.Keyword,
 33 |     'BEGIN': tokens.Keyword,
 34 |     'BETWEEN': tokens.Keyword,
 35 |     'BITVAR': tokens.Keyword,
 36 |     'BIT_LENGTH': tokens.Keyword,
 37 |     'BOTH': tokens.Keyword,
 38 |     'BREADTH': tokens.Keyword,
 39 | 
 40 | #    'C': tokens.Keyword,  # most likely this is an alias
 41 |     'CACHE': tokens.Keyword,
 42 |     'CALL': tokens.Keyword,
 43 |     'CALLED': tokens.Keyword,
 44 |     'CARDINALITY': tokens.Keyword,
 45 |     'CASCADE': tokens.Keyword,
 46 |     'CASCADED': tokens.Keyword,
 47 |     'CAST': tokens.Keyword,
 48 |     'CATALOG': tokens.Keyword,
 49 |     'CATALOG_NAME': tokens.Keyword,
 50 |     'CHAIN': tokens.Keyword,
 51 |     'CHARACTERISTICS': tokens.Keyword,
 52 |     'CHARACTER_LENGTH': tokens.Keyword,
 53 |     'CHARACTER_SET_CATALOG': tokens.Keyword,
 54 |     'CHARACTER_SET_NAME': tokens.Keyword,
 55 |     'CHARACTER_SET_SCHEMA': tokens.Keyword,
 56 |     'CHAR_LENGTH': tokens.Keyword,
 57 |     'CHECK': tokens.Keyword,
 58 |     'CHECKED': tokens.Keyword,
 59 |     'CHECKPOINT': tokens.Keyword,
 60 |     'CLASS': tokens.Keyword,
 61 |     'CLASS_ORIGIN': tokens.Keyword,
 62 |     'CLOB': tokens.Keyword,
 63 |     'CLOSE': tokens.Keyword,
 64 |     'CLUSTER': tokens.Keyword,
 65 |     'COALSECE': tokens.Keyword,
 66 |     'COBOL': tokens.Keyword,
 67 |     'COLLATE': tokens.Keyword,
 68 |     'COLLATION': tokens.Keyword,
 69 |     'COLLATION_CATALOG': tokens.Keyword,
 70 |     'COLLATION_NAME': tokens.Keyword,
 71 |     'COLLATION_SCHEMA': tokens.Keyword,
 72 |     'COLUMN': tokens.Keyword,
 73 |     'COLUMN_NAME': tokens.Keyword,
 74 |     'COMMAND_FUNCTION': tokens.Keyword,
 75 |     'COMMAND_FUNCTION_CODE': tokens.Keyword,
 76 |     'COMMENT': tokens.Keyword,
 77 |     'COMMIT': tokens.Keyword,
 78 |     'COMMITTED': tokens.Keyword,
 79 |     'COMPLETION': tokens.Keyword,
 80 |     'CONDITION_NUMBER': tokens.Keyword,
 81 |     'CONNECT': tokens.Keyword,
 82 |     'CONNECTION': tokens.Keyword,
 83 |     'CONNECTION_NAME': tokens.Keyword,
 84 |     'CONSTRAINT': tokens.Keyword,
 85 |     'CONSTRAINTS': tokens.Keyword,
 86 |     'CONSTRAINT_CATALOG': tokens.Keyword,
 87 |     'CONSTRAINT_NAME': tokens.Keyword,
 88 |     'CONSTRAINT_SCHEMA': tokens.Keyword,
 89 |     'CONSTRUCTOR': tokens.Keyword,
 90 |     'CONTAINS': tokens.Keyword,
 91 |     'CONTINUE': tokens.Keyword,
 92 |     'CONVERSION': tokens.Keyword,
 93 |     'CONVERT': tokens.Keyword,
 94 |     'COPY': tokens.Keyword,
 95 |     'CORRESPONTING': tokens.Keyword,
 96 |     'COUNT': tokens.Keyword,
 97 |     'CREATEDB': tokens.Keyword,
 98 |     'CREATEUSER': tokens.Keyword,
 99 |     'CROSS': tokens.Keyword,
100 |     'CUBE': tokens.Keyword,
101 |     'CURRENT': tokens.Keyword,
102 |     'CURRENT_DATE': tokens.Keyword,
103 |     'CURRENT_PATH': tokens.Keyword,
104 |     'CURRENT_ROLE': tokens.Keyword,
105 |     'CURRENT_TIME': tokens.Keyword,
106 |     'CURRENT_TIMESTAMP': tokens.Keyword,
107 |     'CURRENT_USER': tokens.Keyword,
108 |     'CURSOR': tokens.Keyword,
109 |     'CURSOR_NAME': tokens.Keyword,
110 |     'CYCLE': tokens.Keyword,
111 | 
112 |     'DATA': tokens.Keyword,
113 |     'DATABASE': tokens.Keyword,
114 |     'DATETIME_INTERVAL_CODE': tokens.Keyword,
115 |     'DATETIME_INTERVAL_PRECISION': tokens.Keyword,
116 |     'DAY': tokens.Keyword,
117 |     'DEALLOCATE': tokens.Keyword,
118 |     'DECLARE': tokens.Keyword,
119 |     'DEFAULT': tokens.Keyword,
120 |     'DEFAULTS': tokens.Keyword,
121 |     'DEFERRABLE': tokens.Keyword,
122 |     'DEFERRED': tokens.Keyword,
123 |     'DEFINED': tokens.Keyword,
124 |     'DEFINER': tokens.Keyword,
125 |     'DELIMITER': tokens.Keyword,
126 |     'DELIMITERS': tokens.Keyword,
127 |     'DEREF': tokens.Keyword,
128 |     'DESC': tokens.Keyword,
129 |     'DESCRIBE': tokens.Keyword,
130 |     'DESCRIPTOR': tokens.Keyword,
131 |     'DESTROY': tokens.Keyword,
132 |     'DESTRUCTOR': tokens.Keyword,
133 |     'DETERMINISTIC': tokens.Keyword,
134 |     'DIAGNOSTICS': tokens.Keyword,
135 |     'DICTIONARY': tokens.Keyword,
136 |     'DISCONNECT': tokens.Keyword,
137 |     'DISPATCH': tokens.Keyword,
138 |     'DO': tokens.Keyword,
139 |     'DOMAIN': tokens.Keyword,
140 |     'DYNAMIC': tokens.Keyword,
141 |     'DYNAMIC_FUNCTION': tokens.Keyword,
142 |     'DYNAMIC_FUNCTION_CODE': tokens.Keyword,
143 | 
144 |     'EACH': tokens.Keyword,
145 |     'ENCODING': tokens.Keyword,
146 |     'ENCRYPTED': tokens.Keyword,
147 |     'END-EXEC': tokens.Keyword,
148 |     'EQUALS': tokens.Keyword,
149 |     'ESCAPE': tokens.Keyword,
150 |     'EVERY': tokens.Keyword,
151 |     'EXCEPT': tokens.Keyword,
152 |     'ESCEPTION': tokens.Keyword,
153 |     'EXCLUDING': tokens.Keyword,
154 |     'EXCLUSIVE': tokens.Keyword,
155 |     'EXEC': tokens.Keyword,
156 |     'EXECUTE': tokens.Keyword,
157 |     'EXISTING': tokens.Keyword,
158 |     'EXISTS': tokens.Keyword,
159 |     'EXTERNAL': tokens.Keyword,
160 |     'EXTRACT': tokens.Keyword,
161 | 
162 |     'FALSE': tokens.Keyword,
163 |     'FETCH': tokens.Keyword,
164 |     'FINAL': tokens.Keyword,
165 |     'FIRST': tokens.Keyword,
166 |     'FORCE': tokens.Keyword,
167 |     'FOREIGN': tokens.Keyword,
168 |     'FORTRAN': tokens.Keyword,
169 |     'FORWARD': tokens.Keyword,
170 |     'FOUND': tokens.Keyword,
171 |     'FREE': tokens.Keyword,
172 |     'FREEZE': tokens.Keyword,
173 |     'FULL': tokens.Keyword,
174 |     'FUNCTION': tokens.Keyword,
175 | 
176 | #    'G': tokens.Keyword,
177 |     'GENERAL': tokens.Keyword,
178 |     'GENERATED': tokens.Keyword,
179 |     'GET': tokens.Keyword,
180 |     'GLOBAL': tokens.Keyword,
181 |     'GO': tokens.Keyword,
182 |     'GOTO': tokens.Keyword,
183 |     'GRANT': tokens.Keyword,
184 |     'GRANTED': tokens.Keyword,
185 |     'GROUPING': tokens.Keyword,
186 | 
187 |     'HANDLER': tokens.Keyword,
188 |     'HAVING': tokens.Keyword,
189 |     'HIERARCHY': tokens.Keyword,
190 |     'HOLD': tokens.Keyword,
191 |     'HOST': tokens.Keyword,
192 | 
193 |     'IDENTITY': tokens.Keyword,
194 |     'IGNORE': tokens.Keyword,
195 |     'ILIKE': tokens.Keyword,
196 |     'IMMEDIATE': tokens.Keyword,
197 |     'IMMUTABLE': tokens.Keyword,
198 | 
199 |     'IMPLEMENTATION': tokens.Keyword,
200 |     'IMPLICIT': tokens.Keyword,
201 |     'INCLUDING': tokens.Keyword,
202 |     'INCREMENT': tokens.Keyword,
203 |     'INDEX': tokens.Keyword,
204 | 
205 |     'INDITCATOR': tokens.Keyword,
206 |     'INFIX': tokens.Keyword,
207 |     'INHERITS': tokens.Keyword,
208 |     'INITIALIZE': tokens.Keyword,
209 |     'INITIALLY': tokens.Keyword,
210 |     'INOUT': tokens.Keyword,
211 |     'INPUT': tokens.Keyword,
212 |     'INSENSITIVE': tokens.Keyword,
213 |     'INSTANTIABLE': tokens.Keyword,
214 |     'INSTEAD': tokens.Keyword,
215 |     'INTERSECT': tokens.Keyword,
216 |     'INTO': tokens.Keyword,
217 |     'INVOKER': tokens.Keyword,
218 |     'IS': tokens.Keyword,
219 |     'ISNULL': tokens.Keyword,
220 |     'ISOLATION': tokens.Keyword,
221 |     'ITERATE': tokens.Keyword,
222 | 
223 | #    'K': tokens.Keyword,
224 |     'KEY': tokens.Keyword,
225 |     'KEY_MEMBER': tokens.Keyword,
226 |     'KEY_TYPE': tokens.Keyword,
227 | 
228 |     'LANCOMPILER': tokens.Keyword,
229 |     'LANGUAGE': tokens.Keyword,
230 |     'LARGE': tokens.Keyword,
231 |     'LAST': tokens.Keyword,
232 |     'LATERAL': tokens.Keyword,
233 |     'LEADING': tokens.Keyword,
234 |     'LENGTH': tokens.Keyword,
235 |     'LESS': tokens.Keyword,
236 |     'LEVEL': tokens.Keyword,
237 |     'LIMIT': tokens.Keyword,
238 |     'LISTEN': tokens.Keyword,
239 |     'LOAD': tokens.Keyword,
240 |     'LOCAL': tokens.Keyword,
241 |     'LOCALTIME': tokens.Keyword,
242 |     'LOCALTIMESTAMP': tokens.Keyword,
243 |     'LOCATION': tokens.Keyword,
244 |     'LOCATOR': tokens.Keyword,
245 |     'LOCK': tokens.Keyword,
246 |     'LOWER': tokens.Keyword,
247 | 
248 | #    'M': tokens.Keyword,
249 |     'MAP': tokens.Keyword,
250 |     'MATCH': tokens.Keyword,
251 |     'MAXVALUE': tokens.Keyword,
252 |     'MESSAGE_LENGTH': tokens.Keyword,
253 |     'MESSAGE_OCTET_LENGTH': tokens.Keyword,
254 |     'MESSAGE_TEXT': tokens.Keyword,
255 |     'METHOD': tokens.Keyword,
256 |     'MINUTE': tokens.Keyword,
257 |     'MINVALUE': tokens.Keyword,
258 |     'MOD': tokens.Keyword,
259 |     'MODE': tokens.Keyword,
260 |     'MODIFIES': tokens.Keyword,
261 |     'MODIFY': tokens.Keyword,
262 |     'MONTH': tokens.Keyword,
263 |     'MORE': tokens.Keyword,
264 |     'MOVE': tokens.Keyword,
265 |     'MUMPS': tokens.Keyword,
266 | 
267 |     'NAMES': tokens.Keyword,
268 |     'NATIONAL': tokens.Keyword,
269 |     'NATURAL': tokens.Keyword,
270 |     'NCHAR': tokens.Keyword,
271 |     'NCLOB': tokens.Keyword,
272 |     'NEW': tokens.Keyword,
273 |     'NEXT': tokens.Keyword,
274 |     'NO': tokens.Keyword,
275 |     'NOCREATEDB': tokens.Keyword,
276 |     'NOCREATEUSER': tokens.Keyword,
277 |     'NONE': tokens.Keyword,
278 |     'NOT': tokens.Keyword,
279 |     'NOTHING': tokens.Keyword,
280 |     'NOTIFY': tokens.Keyword,
281 |     'NOTNULL': tokens.Keyword,
282 |     'NULL': tokens.Keyword,
283 |     'NULLABLE': tokens.Keyword,
284 |     'NULLIF': tokens.Keyword,
285 | 
286 |     'OBJECT': tokens.Keyword,
287 |     'OCTET_LENGTH': tokens.Keyword,
288 |     'OF': tokens.Keyword,
289 |     'OFF': tokens.Keyword,
290 |     'OFFSET': tokens.Keyword,
291 |     'OIDS': tokens.Keyword,
292 |     'OLD': tokens.Keyword,
293 |     'ONLY': tokens.Keyword,
294 |     'OPEN': tokens.Keyword,
295 |     'OPERATION': tokens.Keyword,
296 |     'OPERATOR': tokens.Keyword,
297 |     'OPTION': tokens.Keyword,
298 |     'OPTIONS': tokens.Keyword,
299 |     'ORDINALITY': tokens.Keyword,
300 |     'OUT': tokens.Keyword,
301 |     'OUTPUT': tokens.Keyword,
302 |     'OVERLAPS': tokens.Keyword,
303 |     'OVERLAY': tokens.Keyword,
304 |     'OVERRIDING': tokens.Keyword,
305 |     'OWNER': tokens.Keyword,
306 | 
307 |     'PAD': tokens.Keyword,
308 |     'PARAMETER': tokens.Keyword,
309 |     'PARAMETERS': tokens.Keyword,
310 |     'PARAMETER_MODE': tokens.Keyword,
311 |     'PARAMATER_NAME': tokens.Keyword,
312 |     'PARAMATER_ORDINAL_POSITION': tokens.Keyword,
313 |     'PARAMETER_SPECIFIC_CATALOG': tokens.Keyword,
314 |     'PARAMETER_SPECIFIC_NAME': tokens.Keyword,
315 |     'PARAMATER_SPECIFIC_SCHEMA': tokens.Keyword,
316 |     'PARTIAL': tokens.Keyword,
317 |     'PASCAL': tokens.Keyword,
318 |     'PENDANT': tokens.Keyword,
319 |     'PLACING': tokens.Keyword,
320 |     'PLI': tokens.Keyword,
321 |     'POSITION': tokens.Keyword,
322 |     'POSTFIX': tokens.Keyword,
323 |     'PRECISION': tokens.Keyword,
324 |     'PREFIX': tokens.Keyword,
325 |     'PREORDER': tokens.Keyword,
326 |     'PREPARE': tokens.Keyword,
327 |     'PRESERVE': tokens.Keyword,
328 |     'PRIMARY': tokens.Keyword,
329 |     'PRIOR': tokens.Keyword,
330 |     'PRIVILEGES': tokens.Keyword,
331 |     'PROCEDURAL': tokens.Keyword,
332 |     'PROCEDURE': tokens.Keyword,
333 |     'PUBLIC': tokens.Keyword,
334 | 
335 |     'RAISE': tokens.Keyword,
336 |     'READ': tokens.Keyword,
337 |     'READS': tokens.Keyword,
338 |     'RECHECK': tokens.Keyword,
339 |     'RECURSIVE': tokens.Keyword,
340 |     'REF': tokens.Keyword,
341 |     'REFERENCES': tokens.Keyword,
342 |     'REFERENCING': tokens.Keyword,
343 |     'REINDEX': tokens.Keyword,
344 |     'RELATIVE': tokens.Keyword,
345 |     'RENAME': tokens.Keyword,
346 |     'REPEATABLE': tokens.Keyword,
347 |     'RESET': tokens.Keyword,
348 |     'RESTART': tokens.Keyword,
349 |     'RESTRICT': tokens.Keyword,
350 |     'RESULT': tokens.Keyword,
351 |     'RETURN': tokens.Keyword,
352 |     'RETURNED_LENGTH': tokens.Keyword,
353 |     'RETURNED_OCTET_LENGTH': tokens.Keyword,
354 |     'RETURNED_SQLSTATE': tokens.Keyword,
355 |     'RETURNS': tokens.Keyword,
356 |     'REVOKE': tokens.Keyword,
357 |     'RIGHT': tokens.Keyword,
358 |     'ROLE': tokens.Keyword,
359 |     'ROLLBACK': tokens.Keyword,
360 |     'ROLLUP': tokens.Keyword,
361 |     'ROUTINE': tokens.Keyword,
362 |     'ROUTINE_CATALOG': tokens.Keyword,
363 |     'ROUTINE_NAME': tokens.Keyword,
364 |     'ROUTINE_SCHEMA': tokens.Keyword,
365 |     'ROW': tokens.Keyword,
366 |     'ROWS': tokens.Keyword,
367 |     'ROW_COUNT': tokens.Keyword,
368 |     'RULE': tokens.Keyword,
369 | 
370 |     'SAVE_POINT': tokens.Keyword,
371 |     'SCALE': tokens.Keyword,
372 |     'SCHEMA': tokens.Keyword,
373 |     'SCHEMA_NAME': tokens.Keyword,
374 |     'SCOPE': tokens.Keyword,
375 |     'SCROLL': tokens.Keyword,
376 |     'SEARCH': tokens.Keyword,
377 |     'SECOND': tokens.Keyword,
378 |     'SECURITY': tokens.Keyword,
379 |     'SELF': tokens.Keyword,
380 |     'SENSITIVE': tokens.Keyword,
381 |     'SERIALIZABLE': tokens.Keyword,
382 |     'SERVER_NAME': tokens.Keyword,
383 |     'SESSION': tokens.Keyword,
384 |     'SESSION_USER': tokens.Keyword,
385 |     'SETOF': tokens.Keyword,
386 |     'SETS': tokens.Keyword,
387 |     'SHARE': tokens.Keyword,
388 |     'SHOW': tokens.Keyword,
389 |     'SIMILAR': tokens.Keyword,
390 |     'SIMPLE': tokens.Keyword,
391 |     'SIZE': tokens.Keyword,
392 |     'SOME': tokens.Keyword,
393 |     'SOURCE': tokens.Keyword,
394 |     'SPACE': tokens.Keyword,
395 |     'SPECIFIC': tokens.Keyword,
396 |     'SPECIFICTYPE': tokens.Keyword,
397 |     'SPECIFIC_NAME': tokens.Keyword,
398 |     'SQL': tokens.Keyword,
399 |     'SQLCODE': tokens.Keyword,
400 |     'SQLERROR': tokens.Keyword,
401 |     'SQLEXCEPTION': tokens.Keyword,
402 |     'SQLSTATE': tokens.Keyword,
403 |     'SQLWARNING': tokens.Keyword,
404 |     'STABLE': tokens.Keyword,
405 |     'START': tokens.Keyword,
406 |     'STATE': tokens.Keyword,
407 |     'STATEMENT': tokens.Keyword,
408 |     'STATIC': tokens.Keyword,
409 |     'STATISTICS': tokens.Keyword,
410 |     'STDIN': tokens.Keyword,
411 |     'STDOUT': tokens.Keyword,
412 |     'STORAGE': tokens.Keyword,
413 |     'STRICT': tokens.Keyword,
414 |     'STRUCTURE': tokens.Keyword,
415 |     'STYPE': tokens.Keyword,
416 |     'SUBCLASS_ORIGIN': tokens.Keyword,
417 |     'SUBLIST': tokens.Keyword,
418 |     'SUBSTRING': tokens.Keyword,
419 |     'SUM': tokens.Keyword,
420 |     'SYMMETRIC': tokens.Keyword,
421 |     'SYSID': tokens.Keyword,
422 |     'SYSTEM': tokens.Keyword,
423 |     'SYSTEM_USER': tokens.Keyword,
424 | 
425 |     'TABLE': tokens.Keyword,
426 |     'TABLE_NAME': tokens.Keyword,
427 |     ' TEMP': tokens.Keyword,
428 |     'TEMPLATE': tokens.Keyword,
429 |     'TEMPORARY': tokens.Keyword,
430 |     'TERMINATE': tokens.Keyword,
431 |     'THAN': tokens.Keyword,
432 |     'TIMESTAMP': tokens.Keyword,
433 |     'TIMEZONE_HOUR': tokens.Keyword,
434 |     'TIMEZONE_MINUTE': tokens.Keyword,
435 |     'TO': tokens.Keyword,
436 |     'TOAST': tokens.Keyword,
437 |     'TRAILING': tokens.Keyword,
438 |     'TRANSATION': tokens.Keyword,
439 |     'TRANSACTIONS_COMMITTED': tokens.Keyword,
440 |     'TRANSACTIONS_ROLLED_BACK': tokens.Keyword,
441 |     'TRANSATION_ACTIVE': tokens.Keyword,
442 |     'TRANSFORM': tokens.Keyword,
443 |     'TRANSFORMS': tokens.Keyword,
444 |     'TRANSLATE': tokens.Keyword,
445 |     'TRANSLATION': tokens.Keyword,
446 |     'TREAT': tokens.Keyword,
447 |     'TRIGGER': tokens.Keyword,
448 |     'TRIGGER_CATALOG': tokens.Keyword,
449 |     'TRIGGER_NAME': tokens.Keyword,
450 |     'TRIGGER_SCHEMA': tokens.Keyword,
451 |     'TRIM': tokens.Keyword,
452 |     'TRUE': tokens.Keyword,
453 |     'TRUNCATE': tokens.Keyword,
454 |     'TRUSTED': tokens.Keyword,
455 |     'TYPE': tokens.Keyword,
456 | 
457 |     'UNCOMMITTED': tokens.Keyword,
458 |     'UNDER': tokens.Keyword,
459 |     'UNENCRYPTED': tokens.Keyword,
460 |     'UNION': tokens.Keyword,
461 |     'UNIQUE': tokens.Keyword,
462 |     'UNKNOWN': tokens.Keyword,
463 |     'UNLISTEN': tokens.Keyword,
464 |     'UNNAMED': tokens.Keyword,
465 |     'UNNEST': tokens.Keyword,
466 |     'UNTIL': tokens.Keyword,
467 |     'UPPER': tokens.Keyword,
468 |     'USAGE': tokens.Keyword,
469 |     'USER': tokens.Keyword,
470 |     'USER_DEFINED_TYPE_CATALOG': tokens.Keyword,
471 |     'USER_DEFINED_TYPE_NAME': tokens.Keyword,
472 |     'USER_DEFINED_TYPE_SCHEMA': tokens.Keyword,
473 |     'USING': tokens.Keyword,
474 | 
475 |     'VACUUM': tokens.Keyword,
476 |     'VALID': tokens.Keyword,
477 |     'VALIDATOR': tokens.Keyword,
478 |     'VALUES': tokens.Keyword,
479 |     'VARIABLE': tokens.Keyword,
480 |     'VERBOSE': tokens.Keyword,
481 |     'VERSION': tokens.Keyword,
482 |     'VIEW': tokens.Keyword,
483 |     'VOLATILE': tokens.Keyword,
484 | 
485 |     'WHENEVER': tokens.Keyword,
486 |     'WITH': tokens.Keyword,
487 |     'WITHOUT': tokens.Keyword,
488 |     'WORK': tokens.Keyword,
489 |     'WRITE': tokens.Keyword,
490 | 
491 |     'YEAR': tokens.Keyword,
492 | 
493 |     'ZONE': tokens.Keyword,
494 | 
495 | 
496 |     'ARRAY': tokens.Name.Builtin,
497 |     'BIGINT': tokens.Name.Builtin,
498 |     'BINARY': tokens.Name.Builtin,
499 |     'BIT': tokens.Name.Builtin,
500 |     'BLOB': tokens.Name.Builtin,
501 |     'BOOLEAN': tokens.Name.Builtin,
502 |     'CHAR': tokens.Name.Builtin,
503 |     'CHARACTER': tokens.Name.Builtin,
504 |     'DATE': tokens.Name.Builtin,
505 |     'DEC': tokens.Name.Builtin,
506 |     'DECIMAL': tokens.Name.Builtin,
507 |     'FLOAT': tokens.Name.Builtin,
508 |     'INT': tokens.Name.Builtin,
509 |     'INTEGER': tokens.Name.Builtin,
510 |     'INTERVAL': tokens.Name.Builtin,
511 |     'LONG': tokens.Name.Builtin,
512 |     'NUMBER': tokens.Name.Builtin,
513 |     'NUMERIC': tokens.Name.Builtin,
514 |     'REAL': tokens.Name.Builtin,
515 |     'SERIAL': tokens.Name.Builtin,
516 |     'SMALLINT': tokens.Name.Builtin,
517 |     'VARCHAR': tokens.Name.Builtin,
518 |     'VARCHAR2': tokens.Name.Builtin,
519 |     'VARYING': tokens.Name.Builtin,
520 |     'INT8': tokens.Name.Builtin,
521 |     'SERIAL8': tokens.Name.Builtin,
522 |     'TEXT': tokens.Name.Builtin,
523 |     }
524 | 
525 | 
526 | KEYWORDS_COMMON = {
527 |     'SELECT': tokens.Keyword.DML,
528 |     'INSERT': tokens.Keyword.DML,
529 |     'DELETE': tokens.Keyword.DML,
530 |     'UPDATE': tokens.Keyword.DML,
531 |     'REPLACE': tokens.Keyword.DML,
532 |     'DROP': tokens.Keyword.DDL,
533 |     'CREATE': tokens.Keyword.DDL,
534 |     'ALTER': tokens.Keyword.DDL,
535 | 
536 |     'WHERE': tokens.Keyword,
537 |     'FROM': tokens.Keyword,
538 |     'INNER': tokens.Keyword,
539 |     'JOIN': tokens.Keyword,
540 |     'AND': tokens.Keyword,
541 |     'OR': tokens.Keyword,
542 |     'LIKE': tokens.Keyword,
543 |     'ON': tokens.Keyword,
544 |     'IN': tokens.Keyword,
545 |     'SET': tokens.Keyword,
546 | 
547 |     'BY': tokens.Keyword,
548 |     'GROUP': tokens.Keyword,
549 |     'ORDER': tokens.Keyword,
550 |     'LEFT': tokens.Keyword,
551 |     'OUTER': tokens.Keyword,
552 | 
553 |     'IF': tokens.Keyword,
554 |     'END': tokens.Keyword,
555 |     'THEN': tokens.Keyword,
556 |     'LOOP': tokens.Keyword,
557 |     'AS': tokens.Keyword,
558 |     'ELSE': tokens.Keyword,
559 |     'FOR': tokens.Keyword,
560 | 
561 |     'CASE': tokens.Keyword,
562 |     'WHEN': tokens.Keyword,
563 |     'MIN': tokens.Keyword,
564 |     'MAX': tokens.Keyword,
565 |     'DISTINCT': tokens.Keyword,
566 |     }
567 | 


--------------------------------------------------------------------------------
/sqlparse/lexer.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
  4 | #
  5 | # This module is part of python-sqlparse and is released under
  6 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php.
  7 | 
  8 | """SQL Lexer"""
  9 | 
 10 | # This code is based on the SqlLexer in pygments.
 11 | # http://pygments.org/
 12 | # It's separated from the rest of pygments to increase performance
 13 | # and to allow some customizations.
 14 | from __future__ import absolute_import
 15 | from __future__ import unicode_literals
 16 | import re
 17 | 
 18 | from . import tokens
 19 | from .keywords import KEYWORDS, KEYWORDS_COMMON
 20 | 
 21 | 
 22 | class include(str):
 23 |     pass
 24 | 
 25 | 
 26 | class combined(tuple):
 27 |     """Indicates a state combined from multiple states."""
 28 | 
 29 |     def __new__(cls, *args):
 30 |         return tuple.__new__(cls, args)
 31 | 
 32 |     def __init__(self, *args):
 33 |         # tuple.__init__ doesn't do anything
 34 |         pass
 35 | 
 36 | 
 37 | def is_keyword(value):
 38 |     test = value.upper()
 39 |     return KEYWORDS_COMMON.get(test, KEYWORDS.get(test, tokens.Name)), value
 40 | 
 41 | 
 42 | def apply_filters(stream, filters, lexer=None):
 43 |     """
 44 |     Use this method to apply an iterable of filters to
 45 |     a stream. If lexer is given it's forwarded to the
 46 |     filter, otherwise the filter receives `None`.
 47 |     """
 48 | 
 49 |     def _apply(filter_, stream):
 50 |         for token in filter_.filter(lexer, stream):
 51 |             yield token
 52 | 
 53 |     for filter_ in filters:
 54 |         stream = _apply(filter_, stream)
 55 |     return stream
 56 | 
 57 | 
 58 | class LexerMeta(type):
 59 |     """
 60 |     Metaclass for Lexer, creates the self._tokens attribute from
 61 |     self.tokens on the first instantiation.
 62 |     """
 63 | 
 64 |     def _process_state(cls, unprocessed, processed, state):
 65 |         assert type(state) is str, "wrong state name %r" % state
 66 |         assert state[0] != '#', "invalid state name %r" % state
 67 |         if state in processed:
 68 |             return processed[state]
 69 |         tokenlist = processed[state] = []
 70 |         rflags = cls.flags
 71 |         for tdef in unprocessed[state]:
 72 |             if isinstance(tdef, include):
 73 |                 # it's a state reference
 74 |                 assert tdef != state, "circular state reference %r" % state
 75 |                 tokenlist.extend(cls._process_state(
 76 |                     unprocessed, processed, str(tdef)))
 77 |                 continue
 78 | 
 79 |             assert type(tdef) is tuple, "wrong rule def %r" % tdef
 80 | 
 81 |             try:
 82 |                 rex = re.compile(tdef[0], rflags).match
 83 |             except Exception as err:
 84 |                 raise ValueError(("uncompilable regex %r in state"
 85 |                                   " %r of %r: %s"
 86 |                                   % (tdef[0], state, cls, err)))
 87 | 
 88 |             assert type(tdef[1]) is tokens._TokenType or callable(tdef[1]), \
 89 |                    ('token type must be simple type or callable, not %r'
 90 |                     % (tdef[1],))
 91 | 
 92 |             if len(tdef) == 2:
 93 |                 new_state = None
 94 |             else:
 95 |                 tdef2 = tdef[2]
 96 |                 if isinstance(tdef2, str):
 97 |                     # an existing state
 98 |                     if tdef2 == '#pop':
 99 |                         new_state = -1
100 |                     elif tdef2 in unprocessed:
101 |                         new_state = (tdef2,)
102 |                     elif tdef2 == '#push':
103 |                         new_state = tdef2
104 |                     elif tdef2[:5] == '#pop:':
105 |                         new_state = -int(tdef2[5:])
106 |                     else:
107 |                         assert False, 'unknown new state %r' % tdef2
108 |                 elif isinstance(tdef2, combined):
109 |                     # combine a new state from existing ones
110 |                     new_state = '_tmp_%d' % cls._tmpname
111 |                     cls._tmpname += 1
112 |                     itokens = []
113 |                     for istate in tdef2:
114 |                         assert istate != state, \
115 |                                'circular state ref %r' % istate
116 |                         itokens.extend(cls._process_state(unprocessed,
117 |                                                           processed, istate))
118 |                     processed[new_state] = itokens
119 |                     new_state = (new_state,)
120 |                 elif isinstance(tdef2, tuple):
121 |                     # push more than one state
122 |                     for state in tdef2:
123 |                         assert (state in unprocessed or
124 |                                 state in ('#pop', '#push')), \
125 |                                'unknown new state ' + state
126 |                     new_state = tdef2
127 |                 else:
128 |                     assert False, 'unknown new state def %r' % tdef2
129 |             tokenlist.append((rex, tdef[1], new_state))
130 |         return tokenlist
131 | 
132 |     def process_tokendef(cls):
133 |         cls._all_tokens = {}
134 |         cls._tmpname = 0
135 |         processed = cls._all_tokens[cls.__name__] = {}
136 |         #tokendefs = tokendefs or cls.tokens[name]
137 |         for state in cls.tokens.keys():
138 |             cls._process_state(cls.tokens, processed, state)
139 |         return processed
140 | 
141 |     def __call__(cls, *args, **kwds):
142 |         if not hasattr(cls, '_tokens'):
143 |             cls._all_tokens = {}
144 |             cls._tmpname = 0
145 |             if hasattr(cls, 'token_variants') and cls.token_variants:
146 |                 # don't process yet
147 |                 pass
148 |             else:
149 |                 cls._tokens = cls.process_tokendef()
150 | 
151 |         return type.__call__(cls, *args, **kwds)
152 | 
153 | 
154 | class Lexer(object, metaclass=LexerMeta):
155 | 
156 |     encoding = 'utf-8'
157 |     stripall = False
158 |     stripnl = False
159 |     tabsize = 0
160 |     flags = re.IGNORECASE
161 | 
162 |     tokens = {
163 |         'root': [
164 |             (r'--.*?(\r\n|\r|\n)', tokens.Comment.Single),
165 |             # $ matches *before* newline, therefore we have two patterns
166 |             # to match Comment.Single
167 |             (r'--.*?$', tokens.Comment.Single),
168 |             (r'(\r|\n|\r\n)', tokens.Newline),
169 |             (r'\s+', tokens.Whitespace),
170 |             (r'/\*', tokens.Comment.Multiline, 'multiline-comments'),
171 |             (r':=', tokens.Assignment),
172 |             (r'::', tokens.Punctuation),
173 |             (r'[*]', tokens.Wildcard),
174 |             (r'CASE\b', tokens.Keyword),  # extended CASE(foo)
175 |             (r"`(``|[^`])*`", tokens.Name),
176 |             (r"´(´´|[^´])*´", tokens.Name),
177 |             (r'\$([a-zA-Z_][a-zA-Z0-9_]*)?\$', tokens.Name.Builtin),
178 |             (r'\?{1}', tokens.Name.Placeholder),
179 |             (r'[$:?%][a-zA-Z0-9_]+[^$:?%]?', tokens.Name.Placeholder),
180 |             (r'@[a-zA-Z_][a-zA-Z0-9_]+', tokens.Name),
181 |             (r'[a-zA-Z_][a-zA-Z0-9_]*(?=[.(])', tokens.Name),  # see issue39
182 |             (r'[<>=~!]+', tokens.Operator.Comparison),
183 |             (r'[+/@#%^&|`?^-]+', tokens.Operator),
184 |             (r'0x[0-9a-fA-F]+', tokens.Number.Hexadecimal),
185 |             (r'[0-9]*\.[0-9]+', tokens.Number.Float),
186 |             (r'[0-9]+', tokens.Number.Integer),
187 |             # TODO: Backslash escapes?
188 |             (r"(''|'.*?[^\\]')", tokens.String.Single),
189 |             # not a real string literal in ANSI SQL:
190 |             (r'(""|".*?[^\\]")', tokens.String.Symbol),
191 |             (r'(\[.*[^\]]\])', tokens.Name),
192 |             (r'(LEFT |RIGHT )?(INNER |OUTER )?JOIN\b', tokens.Keyword),
193 |             (r'END( IF| LOOP)?\b', tokens.Keyword),
194 |             (r'NOT NULL\b', tokens.Keyword),
195 |             (r'CREATE( OR REPLACE)?\b', tokens.Keyword.DDL),
196 |             (r'(?<=\.)[a-zA-Z_][a-zA-Z0-9_]*', tokens.Name),
197 |             (r'[a-zA-Z_][a-zA-Z0-9_]*', is_keyword),
198 |             (r'[;:()\[\],\.]', tokens.Punctuation),
199 |         ],
200 |         'multiline-comments': [
201 |             (r'/\*', tokens.Comment.Multiline, 'multiline-comments'),
202 |             (r'\*/', tokens.Comment.Multiline, '#pop'),
203 |             (r'[^/\*]+', tokens.Comment.Multiline),
204 |             (r'[/*]', tokens.Comment.Multiline)
205 |         ]}
206 | 
207 |     def __init__(self):
208 |         self.filters = []
209 | 
210 |     def add_filter(self, filter_, **options):
211 |         from .filters import Filter
212 |         if not isinstance(filter_, Filter):
213 |             filter_ = filter_(**options)
214 |         self.filters.append(filter_)
215 | 
216 |     def get_tokens(self, text, unfiltered=False):
217 |         """
218 |         Return an iterable of (tokentype, value) pairs generated from
219 |         `text`. If `unfiltered` is set to `True`, the filtering mechanism
220 |         is bypassed even if filters are defined.
221 | 
222 |         Also preprocess the text, i.e. expand tabs and strip it if
223 |         wanted and applies registered filters.
224 |         """
225 |         if not isinstance(text, str):
226 |             if self.encoding == 'guess':
227 |                 try:
228 |                     text = text.decode('utf-8')
229 |                     if text.startswith(u'\ufeff'):
230 |                         text = text[len(u'\ufeff'):]
231 |                 except UnicodeDecodeError:
232 |                     text = text.decode('latin1')
233 |             else:
234 |                 text = text.decode(self.encoding)
235 |         if self.stripall:
236 |             text = text.strip()
237 |         elif self.stripnl:
238 |             text = text.strip('\n')
239 |         if self.tabsize > 0:
240 |             text = text.expandtabs(self.tabsize)
241 | #        if not text.endswith('\n'):
242 | #            text += '\n'
243 | 
244 |         def streamer():
245 |             for i, t, v in self.get_tokens_unprocessed(text):
246 |                 yield t, v
247 |         stream = streamer()
248 |         if not unfiltered:
249 |             stream = apply_filters(stream, self.filters, self)
250 |         return stream
251 | 
252 |     def get_tokens_unprocessed(self, text, stack=('root',)):
253 |         """
254 |         Split ``text`` into (tokentype, text) pairs.
255 | 
256 |         ``stack`` is the inital stack (default: ``['root']``)
257 |         """
258 |         pos = 0
259 |         tokendefs = self._tokens  # see __call__, pylint:disable=E1101
260 |         statestack = list(stack)
261 |         statetokens = tokendefs[statestack[-1]]
262 |         known_names = {}
263 |         while 1:
264 |             for rexmatch, action, new_state in statetokens:
265 |                 m = rexmatch(text, pos)
266 |                 if m:
267 |                     # print rex.pattern
268 |                     value = m.group()
269 |                     if value in known_names:
270 |                         yield pos, known_names[value], value
271 |                     elif type(action) is tokens._TokenType:
272 |                         yield pos, action, value
273 |                     elif hasattr(action, '__call__'):
274 |                         ttype, value = action(value)
275 |                         known_names[value] = ttype
276 |                         yield pos, ttype, value
277 |                     else:
278 |                         for item in action(self, m):
279 |                             yield item
280 |                     pos = m.end()
281 |                     if new_state is not None:
282 |                         # state transition
283 |                         if isinstance(new_state, tuple):
284 |                             for state in new_state:
285 |                                 if state == '#pop':
286 |                                     statestack.pop()
287 |                                 elif state == '#push':
288 |                                     statestack.append(statestack[-1])
289 |                                 else:
290 |                                     statestack.append(state)
291 |                         elif isinstance(new_state, int):
292 |                             # pop
293 |                             del statestack[new_state:]
294 |                         elif new_state == '#push':
295 |                             statestack.append(statestack[-1])
296 |                         else:
297 |                             assert False, "wrong state def: %r" % new_state
298 |                         statetokens = tokendefs[statestack[-1]]
299 |                     break
300 |             else:
301 |                 try:
302 |                     if text[pos] == '\n':
303 |                         # at EOL, reset state to "root"
304 |                         pos += 1
305 |                         statestack = ['root']
306 |                         statetokens = tokendefs['root']
307 |                         yield pos, tokens.Text, u'\n'
308 |                         continue
309 |                     yield pos, tokens.Error, text[pos]
310 |                     pos += 1
311 |                 except IndexError:
312 |                     break
313 | 
314 | 
315 | def tokenize(sql):
316 |     """Tokenize sql.
317 | 
318 |     Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream
319 |     of ``(token type, value)`` items.
320 |     """
321 |     lexer = Lexer()
322 |     return lexer.get_tokens(sql)
323 | 


--------------------------------------------------------------------------------
/sqlparse/pipeline.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2011 Jesus Leganes "piranna", piranna@gmail.com
 2 | #
 3 | # This module is part of python-sqlparse and is released under
 4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php.
 5 | from __future__ import absolute_import
 6 | from types import GeneratorType
 7 | 
 8 | 
 9 | class Pipeline(list):
10 |     """Pipeline to process filters sequentially"""
11 | 
12 |     def __call__(self, stream):
13 |         """Run the pipeline
14 | 
15 |         Return a static (non generator) version of the result
16 |         """
17 | 
18 |         # Run the stream over all the filters on the pipeline
19 |         for filter in self:
20 |             # Functions and callable objects (objects with '__call__' method)
21 |             if callable(filter):
22 |                 stream = filter(stream)
23 | 
24 |             # Normal filters (objects with 'process' method)
25 |             else:
26 |                 stream = filter.process(None, stream)
27 | 
28 |         # If last filter return a generator, staticalize it inside a list
29 |         if isinstance(stream, GeneratorType):
30 |             return list(stream)
31 |         return stream
32 | 


--------------------------------------------------------------------------------
/sqlparse/sql.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """This module contains classes representing syntactical elements of SQL."""
  4 | from __future__ import absolute_import
  5 | from __future__ import print_function
  6 | from __future__ import unicode_literals
  7 | import re
  8 | import sys
  9 | 
 10 | from . import tokens as T
 11 | 
 12 | 
 13 | class Token(object):
 14 |     """Base class for all other classes in this module.
 15 | 
 16 |     It represents a single token and has two instance attributes:
 17 |     ``value`` is the unchange value of the token and ``ttype`` is
 18 |     the type of the token.
 19 |     """
 20 | 
 21 |     __slots__ = ('value', 'ttype', 'parent')
 22 | 
 23 |     def __init__(self, ttype, value):
 24 |         self.value = value
 25 |         self.ttype = ttype
 26 |         self.parent = None
 27 | 
 28 |     def __str__(self):
 29 |         if sys.version_info > (3, 0):
 30 |             return self.__unicode__()
 31 |         else:
 32 |             return unicode(self).encode('utf-8')
 33 | 
 34 |     def __repr__(self):
 35 |         short = self._get_repr_value()
 36 |         return '<%s \'%s\' at 0x%07x>' % (self._get_repr_name(),
 37 |                                           short, id(self))
 38 | 
 39 |     def __unicode__(self):
 40 |         return self.value or ''
 41 | 
 42 |     def to_unicode(self):
 43 |         """Returns a unicode representation of this object."""
 44 |         return str(self)
 45 | 
 46 |     def _get_repr_name(self):
 47 |         return str(self.ttype).split('.')[-1]
 48 | 
 49 |     def _get_repr_value(self):
 50 |         raw = str(self)
 51 |         if len(raw) > 7:
 52 |             short = raw[:6] + u'...'
 53 |         else:
 54 |             short = raw
 55 |         return re.sub('\s+', ' ', short)
 56 | 
 57 |     def flatten(self):
 58 |         """Resolve subgroups."""
 59 |         yield self
 60 | 
 61 |     def match(self, ttype, values, regex=False):
 62 |         """Checks whether the token matches the given arguments.
 63 | 
 64 |         *ttype* is a token type. If this token doesn't match the given token
 65 |         type.
 66 |         *values* is a list of possible values for this token. The values
 67 |         are OR'ed together so if only one of the values matches ``True``
 68 |         is returned. Except for keyword tokens the comparison is
 69 |         case-sensitive. For convenience it's ok to pass in a single string.
 70 |         If *regex* is ``True`` (default is ``False``) the given values are
 71 |         treated as regular expressions.
 72 |         """
 73 |         type_matched = self.ttype is ttype
 74 |         if not type_matched or values is None:
 75 |             return type_matched
 76 |         if isinstance(values, str):
 77 |             values = set([values])
 78 |         if regex:
 79 |             if self.ttype is T.Keyword:
 80 |                 values = set([re.compile(v, re.IGNORECASE) for v in values])
 81 |             else:
 82 |                 values = set([re.compile(v) for v in values])
 83 |             for pattern in values:
 84 |                 if pattern.search(self.value):
 85 |                     return True
 86 |             return False
 87 |         else:
 88 |             if self.ttype in T.Keyword:
 89 |                 values = set([v.upper() for v in values])
 90 |                 return self.value.upper() in values
 91 |             else:
 92 |                 return self.value in values
 93 | 
 94 |     def is_group(self):
 95 |         """Returns ``True`` if this object has children."""
 96 |         return False
 97 | 
 98 |     def is_whitespace(self):
 99 |         """Return ``True`` if this token is a whitespace token."""
100 |         return self.ttype and self.ttype in T.Whitespace
101 | 
102 |     def within(self, group_cls):
103 |         """Returns ``True`` if this token is within *group_cls*.
104 | 
105 |         Use this method for example to check if an identifier is within
106 |         a function: ``t.within(sql.Function)``.
107 |         """
108 |         parent = self.parent
109 |         while parent:
110 |             if isinstance(parent, group_cls):
111 |                 return True
112 |             parent = parent.parent
113 |         return False
114 | 
115 |     def is_child_of(self, other):
116 |         """Returns ``True`` if this token is a direct child of *other*."""
117 |         return self.parent == other
118 | 
119 |     def has_ancestor(self, other):
120 |         """Returns ``True`` if *other* is in this tokens ancestry."""
121 |         parent = self.parent
122 |         while parent:
123 |             if parent == other:
124 |                 return True
125 |             parent = parent.parent
126 |         return False
127 | 
128 | 
129 | class TokenList(Token):
130 |     """A group of tokens.
131 | 
132 |     It has an additional instance attribute ``tokens`` which holds a
133 |     list of child-tokens.
134 |     """
135 | 
136 |     __slots__ = ('value', 'ttype', 'tokens')
137 | 
138 |     def __init__(self, tokens=None):
139 |         if tokens is None:
140 |             tokens = []
141 |         self.tokens = tokens
142 |         Token.__init__(self, None, None)
143 | 
144 |     def __unicode__(self):
145 |         return ''.join(str(x) for x in self.flatten())
146 | 
147 |     def __str__(self):
148 |         if sys.version_info > (3, 0):
149 |             return self.__unicode__()
150 |         else:
151 |             return unicode(self).encode('utf-8')
152 | 
153 |     def _get_repr_name(self):
154 |         return self.__class__.__name__
155 | 
156 |     def _pprint_tree(self, max_depth=None, depth=0):
157 |         """Pretty-print the object tree."""
158 |         indent = ' ' * (depth * 2)
159 |         for idx, token in enumerate(self.tokens):
160 |             if token.is_group():
161 |                 pre = ' +-'
162 |             else:
163 |                 pre = ' | '
164 |             print("%s%s%d %s '%s'" % (
165 |                 indent,
166 |                 pre,
167 |                 idx,
168 |                 token._get_repr_name(),
169 |                 token._get_repr_value()
170 |             ))
171 |             if (token.is_group() and (max_depth is None or depth < max_depth)):
172 |                 token._pprint_tree(max_depth, depth + 1)
173 | 
174 |     def flatten(self):
175 |         """Generator yielding ungrouped tokens.
176 | 
177 |         This method is recursively called for all child tokens.
178 |         """
179 |         for token in self.tokens:
180 |             if isinstance(token, TokenList):
181 |                 for item in token.flatten():
182 |                     yield item
183 |             else:
184 |                 yield token
185 | 
186 |     def is_group(self):
187 |         return True
188 | 
189 |     def get_sublists(self):
190 |         return [x for x in self.tokens if isinstance(x, TokenList)]
191 | 
192 |     @property
193 |     def _groupable_tokens(self):
194 |         return self.tokens
195 | 
196 |     def token_first(self, ignore_whitespace=True):
197 |         """Returns the first child token.
198 | 
199 |         If *ignore_whitespace* is ``True`` (the default), whitespace
200 |         tokens are ignored.
201 |         """
202 |         for token in self.tokens:
203 |             if ignore_whitespace and token.is_whitespace():
204 |                 continue
205 |             return token
206 |         return None
207 | 
208 |     def token_next_by_instance(self, idx, clss):
209 |         """Returns the next token matching a class.
210 | 
211 |         *idx* is where to start searching in the list of child tokens.
212 |         *clss* is a list of classes the token should be an instance of.
213 | 
214 |         If no matching token can be found ``None`` is returned.
215 |         """
216 |         if isinstance(clss, (list, tuple)):
217 |             clss = (clss,)
218 |         if isinstance(clss, tuple):
219 |             clss = tuple(clss)
220 |         for token in self.tokens[idx:]:
221 |             if isinstance(token, clss):
222 |                 return token
223 |         return None
224 | 
225 |     def token_next_by_type(self, idx, ttypes):
226 |         """Returns next matching token by it's token type."""
227 |         if not isinstance(ttypes, (list, tuple)):
228 |             ttypes = [ttypes]
229 |         for token in self.tokens[idx:]:
230 |             if token.ttype in ttypes:
231 |                 return token
232 |         return None
233 | 
234 |     def token_next_match(self, idx, ttype, value, regex=False):
235 |         """Returns next token where it's ``match`` method returns ``True``."""
236 |         if not isinstance(idx, int):
237 |             idx = self.token_index(idx)
238 |         for token in self.tokens[idx:]:
239 |             if token.match(ttype, value, regex):
240 |                 return token
241 |         return None
242 | 
243 |     def token_not_matching(self, idx, funcs):
244 |         for token in self.tokens[idx:]:
245 |             passed = False
246 |             for func in funcs:
247 |                 if func(token):
248 |                     passed = True
249 |                     break
250 |             if not passed:
251 |                 return token
252 |         return None
253 | 
254 |     def token_matching(self, idx, funcs):
255 |         for token in self.tokens[idx:]:
256 |             for i, func in enumerate(funcs):
257 |                 if func(token):
258 |                     return token
259 |         return None
260 | 
261 |     def token_prev(self, idx, skip_ws=True):
262 |         """Returns the previous token relative to *idx*.
263 | 
264 |         If *skip_ws* is ``True`` (the default) whitespace tokens are ignored.
265 |         ``None`` is returned if there's no previous token.
266 |         """
267 |         if idx is None:
268 |             return None
269 |         if not isinstance(idx, int):
270 |             idx = self.token_index(idx)
271 |         while idx != 0:
272 |             idx -= 1
273 |             if self.tokens[idx].is_whitespace() and skip_ws:
274 |                 continue
275 |             return self.tokens[idx]
276 | 
277 |     def token_next(self, idx, skip_ws=True):
278 |         """Returns the next token relative to *idx*.
279 | 
280 |         If *skip_ws* is ``True`` (the default) whitespace tokens are ignored.
281 |         ``None`` is returned if there's no next token.
282 |         """
283 |         if idx is None:
284 |             return None
285 |         if not isinstance(idx, int):
286 |             idx = self.token_index(idx)
287 |         while idx < len(self.tokens) - 1:
288 |             idx += 1
289 |             if self.tokens[idx].is_whitespace() and skip_ws:
290 |                 continue
291 |             return self.tokens[idx]
292 | 
293 |     def token_index(self, token):
294 |         """Return list index of token."""
295 |         return self.tokens.index(token)
296 | 
297 |     def tokens_between(self, start, end, exclude_end=False):
298 |         """Return all tokens between (and including) start and end.
299 | 
300 |         If *exclude_end* is ``True`` (default is ``False``) the end token
301 |         is included too.
302 |         """
303 |         # FIXME(andi): rename exclude_end to inlcude_end
304 |         if exclude_end:
305 |             offset = 0
306 |         else:
307 |             offset = 1
308 |         end_idx = self.token_index(end) + offset
309 |         start_idx = self.token_index(start)
310 |         return self.tokens[start_idx:end_idx]
311 | 
312 |     def group_tokens(self, grp_cls, tokens, ignore_ws=False):
313 |         """Replace tokens by an instance of *grp_cls*."""
314 |         idx = self.token_index(tokens[0])
315 |         if ignore_ws:
316 |             while tokens and tokens[-1].is_whitespace():
317 |                 tokens = tokens[:-1]
318 |         for t in tokens:
319 |             self.tokens.remove(t)
320 |         grp = grp_cls(tokens)
321 |         for token in tokens:
322 |             token.parent = grp
323 |         grp.parent = self
324 |         self.tokens.insert(idx, grp)
325 |         return grp
326 | 
327 |     def insert_before(self, where, token):
328 |         """Inserts *token* before *where*."""
329 |         self.tokens.insert(self.token_index(where), token)
330 | 
331 |     def has_alias(self):
332 |         """Returns ``True`` if an alias is present."""
333 |         return self.get_alias() is not None
334 | 
335 |     def get_alias(self):
336 |         """Returns the alias for this identifier or ``None``."""
337 |         kw = self.token_next_match(0, T.Keyword, 'AS')
338 |         if kw is not None:
339 |             alias = self.token_next(self.token_index(kw))
340 |             if alias is None:
341 |                 return None
342 |         else:
343 |             next_ = self.token_next_by_instance(0, Identifier)
344 |             if next_ is None:
345 |                 return None
346 |             alias = next_
347 |         if isinstance(alias, Identifier):
348 |             return alias.get_name()
349 |         else:
350 |             return alias.to_unicode()
351 | 
352 |     def get_name(self):
353 |         """Returns the name of this identifier.
354 | 
355 |         This is either it's alias or it's real name. The returned valued can
356 |         be considered as the name under which the object corresponding to
357 |         this identifier is known within the current statement.
358 |         """
359 |         alias = self.get_alias()
360 |         if alias is not None:
361 |             return alias
362 |         return self.get_real_name()
363 | 
364 |     def get_real_name(self):
365 |         """Returns the real name (object name) of this identifier."""
366 |         # a.b
367 |         dot = self.token_next_match(0, T.Punctuation, '.')
368 |         if dot is None:
369 |             return self.token_next_by_type(0, T.Name).value
370 |         else:
371 |             next_ = self.token_next_by_type(self.token_index(dot),
372 |                                             (T.Name, T.Wildcard))
373 |             if next_ is None:  # invalid identifier, e.g. "a."
374 |                 return None
375 |             return next_.value
376 | 
377 | 
378 | 
379 | class Statement(TokenList):
380 |     """Represents a SQL statement."""
381 | 
382 |     __slots__ = ('value', 'ttype', 'tokens')
383 | 
384 |     def get_type(self):
385 |         """Returns the type of a statement.
386 | 
387 |         The returned value is a string holding an upper-cased reprint of
388 |         the first DML or DDL keyword. If the first token in this group
389 |         isn't a DML or DDL keyword "UNKNOWN" is returned.
390 |         """
391 |         first_token = self.token_first()
392 |         if first_token is None:
393 |             # An "empty" statement that either has not tokens at all
394 |             # or only whitespace tokens.
395 |             return 'UNKNOWN'
396 |         elif first_token.ttype in (T.Keyword.DML, T.Keyword.DDL):
397 |             return first_token.value.upper()
398 |         else:
399 |             return 'UNKNOWN'
400 | 
401 | 
402 | class Identifier(TokenList):
403 |     """Represents an identifier.
404 | 
405 |     Identifiers may have aliases or typecasts.
406 |     """
407 | 
408 |     __slots__ = ('value', 'ttype', 'tokens')
409 | 
410 |     def get_parent_name(self):
411 |         """Return name of the parent object if any.
412 | 
413 |         A parent object is identified by the first occuring dot.
414 |         """
415 |         dot = self.token_next_match(0, T.Punctuation, '.')
416 |         if dot is None:
417 |             return None
418 |         prev_ = self.token_prev(self.token_index(dot))
419 |         if prev_ is None:  # something must be verry wrong here..
420 |             return None
421 |         return prev_.value
422 | 
423 |     def is_wildcard(self):
424 |         """Return ``True`` if this identifier contains a wildcard."""
425 |         token = self.token_next_by_type(0, T.Wildcard)
426 |         return token is not None
427 | 
428 |     def get_typecast(self):
429 |         """Returns the typecast or ``None`` of this object as a string."""
430 |         marker = self.token_next_match(0, T.Punctuation, '::')
431 |         if marker is None:
432 |             return None
433 |         next_ = self.token_next(self.token_index(marker), False)
434 |         if next_ is None:
435 |             return None
436 |         return next_.to_unicode()
437 | 
438 | 
439 | class IdentifierList(TokenList):
440 |     """A list of :class:`~sqlparse.sql.Identifier`\'s."""
441 | 
442 |     __slots__ = ('value', 'ttype', 'tokens')
443 | 
444 |     def get_identifiers(self):
445 |         """Returns the identifiers.
446 | 
447 |         Whitespaces and punctuations are not included in this list.
448 |         """
449 |         return [x for x in self.tokens
450 |                 if not x.is_whitespace() and not x.match(T.Punctuation, ',')]
451 | 
452 | 
453 | class Parenthesis(TokenList):
454 |     """Tokens between parenthesis."""
455 |     __slots__ = ('value', 'ttype', 'tokens')
456 | 
457 |     @property
458 |     def _groupable_tokens(self):
459 |         return self.tokens[1:-1]
460 | 
461 | 
462 | class Assignment(TokenList):
463 |     """An assignment like 'var := val;'"""
464 |     __slots__ = ('value', 'ttype', 'tokens')
465 | 
466 | 
467 | class If(TokenList):
468 |     """An 'if' clause with possible 'else if' or 'else' parts."""
469 |     __slots__ = ('value', 'ttype', 'tokens')
470 | 
471 | 
472 | class For(TokenList):
473 |     """A 'FOR' loop."""
474 |     __slots__ = ('value', 'ttype', 'tokens')
475 | 
476 | 
477 | class Comparison(TokenList):
478 |     """A comparison used for example in WHERE clauses."""
479 |     __slots__ = ('value', 'ttype', 'tokens')
480 | 
481 | 
482 | class Comment(TokenList):
483 |     """A comment."""
484 |     __slots__ = ('value', 'ttype', 'tokens')
485 | 
486 | 
487 | class Where(TokenList):
488 |     """A WHERE clause."""
489 |     __slots__ = ('value', 'ttype', 'tokens')
490 | 
491 | 
492 | class Case(TokenList):
493 |     """A CASE statement with one or more WHEN and possibly an ELSE part."""
494 | 
495 |     __slots__ = ('value', 'ttype', 'tokens')
496 | 
497 |     def get_cases(self):
498 |         """Returns a list of 2-tuples (condition, value).
499 | 
500 |         If an ELSE exists condition is None.
501 |         """
502 |         ret = []
503 |         in_value = False
504 |         in_condition = True
505 |         for token in self.tokens:
506 |             if token.match(T.Keyword, 'CASE'):
507 |                 continue
508 |             elif token.match(T.Keyword, 'WHEN'):
509 |                 ret.append(([], []))
510 |                 in_condition = True
511 |                 in_value = False
512 |             elif token.match(T.Keyword, 'ELSE'):
513 |                 ret.append((None, []))
514 |                 in_condition = False
515 |                 in_value = True
516 |             elif token.match(T.Keyword, 'THEN'):
517 |                 in_condition = False
518 |                 in_value = True
519 |             elif token.match(T.Keyword, 'END'):
520 |                 in_condition = False
521 |                 in_value = False
522 |             if (in_condition or in_value) and not ret:
523 |                 # First condition withou preceding WHEN
524 |                 ret.append(([], []))
525 |             if in_condition:
526 |                 ret[-1][0].append(token)
527 |             elif in_value:
528 |                 ret[-1][1].append(token)
529 |         return ret
530 | 
531 | 
532 | class Function(TokenList):
533 |     """A function or procedure call."""
534 | 
535 |     __slots__ = ('value', 'ttype', 'tokens')
536 | 
537 |     def get_parameters(self):
538 |         """Return a list of parameters."""
539 |         parenthesis = self.tokens[-1]
540 |         for t in parenthesis.tokens:
541 |             if isinstance(t, IdentifierList):
542 |                 return t.get_identifiers()
543 |         return []
544 | 


--------------------------------------------------------------------------------
/sqlparse/tokens.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
 2 | #
 3 | # This module is part of python-sqlparse and is released under
 4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php.
 5 | 
 6 | # The Token implementation is based on pygment's token system written
 7 | # by Georg Brandl.
 8 | # http://pygments.org/
 9 | 
10 | """Tokens"""
11 | from __future__ import absolute_import
12 | 
13 | 
14 | class _TokenType(tuple):
15 |     parent = None
16 | 
17 |     def split(self):
18 |         buf = []
19 |         node = self
20 |         while node is not None:
21 |             buf.append(node)
22 |             node = node.parent
23 |         buf.reverse()
24 |         return buf
25 | 
26 |     def __contains__(self, val):
27 |         return val is not None and (self is val or val[:len(self)] == self)
28 | 
29 |     def __getattr__(self, val):
30 |         if not val or not val[0].isupper():
31 |             return tuple.__getattribute__(self, val)
32 |         new = _TokenType(self + (val,))
33 |         setattr(self, val, new)
34 |         new.parent = self
35 |         return new
36 | 
37 |     def __hash__(self):
38 |         return hash(tuple(self))
39 | 
40 |     def __repr__(self):
41 |         return 'Token' + (self and '.' or '') + '.'.join(self)
42 | 
43 | 
44 | Token = _TokenType()
45 | 
46 | # Special token types
47 | Text = Token.Text
48 | Whitespace = Text.Whitespace
49 | Newline = Whitespace.Newline
50 | Error = Token.Error
51 | # Text that doesn't belong to this lexer (e.g. HTML in PHP)
52 | Other = Token.Other
53 | 
54 | # Common token types for source code
55 | Keyword = Token.Keyword
56 | Name = Token.Name
57 | Literal = Token.Literal
58 | String = Literal.String
59 | Number = Literal.Number
60 | Punctuation = Token.Punctuation
61 | Operator = Token.Operator
62 | Comparison = Operator.Comparison
63 | Wildcard = Token.Wildcard
64 | Comment = Token.Comment
65 | Assignment = Token.Assignement
66 | 
67 | # Generic types for non-source code
68 | Generic = Token.Generic
69 | 
70 | # String and some others are not direct childs of Token.
71 | # alias them:
72 | Token.Token = Token
73 | Token.String = String
74 | Token.Number = Number
75 | 
76 | # SQL specific tokens
77 | DML = Keyword.DML
78 | DDL = Keyword.DDL
79 | Command = Keyword.Command
80 | 
81 | Group = Token.Group
82 | Group.Parenthesis = Token.Group.Parenthesis
83 | Group.Comment = Token.Group.Comment
84 | Group.Where = Token.Group.Where
85 | 


--------------------------------------------------------------------------------