├── .gitignore ├── Default.sublime-commands ├── FormatSQL.py ├── LICENSE.txt ├── Main.sublime-menu ├── README.md └── sqlparse ├── __init__.py ├── engine ├── __init__.py ├── filter.py └── grouping.py ├── filters.py ├── formatter.py ├── keywords.py ├── lexer.py ├── pipeline.py ├── sql.py └── tokens.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.cache 3 | *.sublime-project 4 | .DS_Store 5 | -------------------------------------------------------------------------------- /Default.sublime-commands: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "caption": "FormatSQL: Format SQL statement", 4 | "command": "format_sql" 5 | } 6 | ] 7 | 8 | -------------------------------------------------------------------------------- /FormatSQL.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import sublime 3 | import sublime_plugin 4 | 5 | try: 6 | from .sqlparse import format 7 | except ValueError: 8 | from sqlparse import format 9 | 10 | 11 | class FormatSqlCommand(sublime_plugin.TextCommand): 12 | def run(self, edit): 13 | view = self.view 14 | regions = view.sel() 15 | # if there are more than 1 region or region one and it's not empty 16 | if len(regions) > 1 or not regions[0].empty(): 17 | for region in view.sel(): 18 | if not region.empty(): 19 | s = view.substr(region) 20 | s = self._run(s) 21 | view.replace(edit, region, s) 22 | else: # format all text 23 | alltextreg = sublime.Region(0, view.size()) 24 | s = view.substr(alltextreg) 25 | s = self._run(s) 26 | view.replace(edit, alltextreg, s) 27 | 28 | def _run(self, s): 29 | settings = self.view.settings() 30 | #indent_char = " " if settings.get("translate_tabs_to_spaces") else "\t" 31 | indent_char = " " #TODO indent by TAB (currently not supported in python-sqlparse) 32 | indent_size = int(settings.get("tab_size")) if indent_char == " " else 1 33 | s = s.encode("utf-8") 34 | return format( 35 | s, keyword_case="upper", reindent=True, indent_width=indent_size 36 | ) 37 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | python-sqlparse and this code is on 2-clauses BSD http://www.opensource.org/licenses/bsd-license.php -------------------------------------------------------------------------------- /Main.sublime-menu: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "selection", 4 | "caption": "Selection", 5 | "children": 6 | [ 7 | { 8 | "id": "format", 9 | "caption": "Format", 10 | "children": 11 | [ 12 | { 13 | "caption": "Format SQL Statement", 14 | "command": "format_sql" 15 | } 16 | ] 17 | } 18 | ] 19 | } 20 | ] -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Summary 2 | 3 | FormatSQL formats long SQL statement to a more readable form by using [python-sqlparse library](http://code.google.com/p/python-sqlparse/). 4 | 5 | 6 | ## How to Use 7 | 8 | select sql and click menu Selection -> Format -> SQL 9 | 10 | 11 | ### Configure key binding 12 | 13 | add the following line to keymap settings 14 | 15 | { "keys": ["super+k", "super+s"], "command": "format_sql" }, 16 | 17 | 18 | ## Example 19 | 20 | Original: 21 | 22 | select a,b from foo join bar on val1 = val2 where id = 123 and cd = 99; 23 | 24 | Formated: 25 | 26 | SELECT a, 27 | b 28 | FROM foo 29 | JOIN bar ON val1 = val2 30 | WHERE id = 123 31 | AND cd = 99; 32 | 33 | ## License 34 | 35 | [python-sqlparse library](http://code.google.com/p/python-sqlparse/) and this code are both on [2-clauses BSD](http://www.opensource.org/licenses/bsd-license.php) -------------------------------------------------------------------------------- /sqlparse/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com 2 | # 3 | # This module is part of python-sqlparse and is released under 4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php. 5 | 6 | """Parse SQL statements.""" 7 | from __future__ import absolute_import 8 | 9 | __version__ = '0.1.3' 10 | 11 | 12 | class SQLParseError(Exception): 13 | """Base class for exceptions in this module.""" 14 | 15 | 16 | # Setup namespace 17 | from . import engine 18 | from . import filters 19 | from . import formatter 20 | 21 | 22 | def parse(sql): 23 | """Parse sql and return a list of statements. 24 | 25 | *sql* is a single string containting one or more SQL statements. 26 | 27 | Returns a tuple of :class:`~sqlparse.sql.Statement` instances. 28 | """ 29 | stack = engine.FilterStack() 30 | stack.full_analyze() 31 | return tuple(stack.run(sql)) 32 | 33 | 34 | def format(sql, **options): 35 | """Format *sql* according to *options*. 36 | 37 | Available options are documented in :ref:`formatting`. 38 | 39 | Returns the formatted SQL statement as string. 40 | """ 41 | stack = engine.FilterStack() 42 | options = formatter.validate_options(options) 43 | stack = formatter.build_filter_stack(stack, options) 44 | stack.postprocess.append(filters.SerializerUnicode()) 45 | return ''.join(stack.run(sql)) 46 | 47 | 48 | def split(sql): 49 | """Split *sql* into single statements. 50 | 51 | Returns a list of strings. 52 | """ 53 | stack = engine.FilterStack() 54 | stack.split_statements = True 55 | return [unicode(stmt) for stmt in stack.run(sql)] 56 | 57 | 58 | from .engine.filter import StatementFilter 59 | def split2(stream): 60 | splitter = StatementFilter() 61 | return list(splitter.process(None, stream)) -------------------------------------------------------------------------------- /sqlparse/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com 2 | # 3 | # This module is part of python-sqlparse and is released under 4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php. 5 | 6 | """filter""" 7 | from __future__ import absolute_import 8 | from .. import lexer 9 | from . import grouping 10 | from .filter import StatementFilter 11 | 12 | # XXX remove this when cleanup is complete 13 | Filter = object 14 | 15 | 16 | class FilterStack(object): 17 | 18 | def __init__(self): 19 | self.preprocess = [] 20 | self.stmtprocess = [] 21 | self.postprocess = [] 22 | self.split_statements = False 23 | self._grouping = False 24 | 25 | def _flatten(self, stream): 26 | for token in stream: 27 | if token.is_group(): 28 | for t in self._flatten(token.tokens): 29 | yield t 30 | else: 31 | yield token 32 | 33 | def enable_grouping(self): 34 | self._grouping = True 35 | 36 | def full_analyze(self): 37 | self.enable_grouping() 38 | 39 | def run(self, sql): 40 | stream = lexer.tokenize(sql) 41 | # Process token stream 42 | if self.preprocess: 43 | for filter_ in self.preprocess: 44 | stream = filter_.process(self, stream) 45 | 46 | if (self.stmtprocess or self.postprocess or self.split_statements 47 | or self._grouping): 48 | splitter = StatementFilter() 49 | stream = splitter.process(self, stream) 50 | 51 | if self._grouping: 52 | 53 | def _group(stream): 54 | for stmt in stream: 55 | grouping.group(stmt) 56 | yield stmt 57 | stream = _group(stream) 58 | 59 | if self.stmtprocess: 60 | 61 | def _run1(stream): 62 | ret = [] 63 | for stmt in stream: 64 | for filter_ in self.stmtprocess: 65 | filter_.process(self, stmt) 66 | ret.append(stmt) 67 | return ret 68 | stream = _run1(stream) 69 | 70 | if self.postprocess: 71 | 72 | def _run2(stream): 73 | for stmt in stream: 74 | stmt.tokens = list(self._flatten(stmt.tokens)) 75 | for filter_ in self.postprocess: 76 | stmt = filter_.process(self, stmt) 77 | yield stmt 78 | stream = _run2(stream) 79 | 80 | return stream 81 | -------------------------------------------------------------------------------- /sqlparse/engine/filter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from ..sql import Statement, Token 4 | from .. import tokens as T 5 | 6 | 7 | class TokenFilter(object): 8 | 9 | def __init__(self, **options): 10 | self.options = options 11 | 12 | def process(self, stack, stream): 13 | """Process token stream.""" 14 | raise NotImplementedError 15 | 16 | 17 | class StatementFilter(TokenFilter): 18 | 19 | def __init__(self): 20 | TokenFilter.__init__(self) 21 | self._in_declare = False 22 | self._in_dbldollar = False 23 | self._is_create = False 24 | self._begin_depth = 0 25 | 26 | def _reset(self): 27 | self._in_declare = False 28 | self._in_dbldollar = False 29 | self._is_create = False 30 | self._begin_depth = 0 31 | 32 | def _change_splitlevel(self, ttype, value): 33 | # PostgreSQL 34 | if (ttype == T.Name.Builtin 35 | and value.startswith('$') and value.endswith('$')): 36 | if self._in_dbldollar: 37 | self._in_dbldollar = False 38 | return -1 39 | else: 40 | self._in_dbldollar = True 41 | return 1 42 | elif self._in_dbldollar: 43 | return 0 44 | 45 | # ANSI 46 | if ttype not in T.Keyword: 47 | return 0 48 | 49 | unified = value.upper() 50 | 51 | if unified == 'DECLARE' and self._is_create: 52 | self._in_declare = True 53 | return 1 54 | 55 | if unified == 'BEGIN': 56 | self._begin_depth += 1 57 | if self._in_declare: # FIXME(andi): This makes no sense. 58 | return 0 59 | return 0 60 | 61 | if unified == 'END': 62 | # Should this respect a preceeding BEGIN? 63 | # In CASE ... WHEN ... END this results in a split level -1. 64 | self._begin_depth = max(0, self._begin_depth - 1) 65 | return -1 66 | 67 | if ttype is T.Keyword.DDL and unified.startswith('CREATE'): 68 | self._is_create = True 69 | return 0 70 | 71 | if (unified in ('IF', 'FOR') 72 | and self._is_create and self._begin_depth > 0): 73 | return 1 74 | 75 | # Default 76 | return 0 77 | 78 | def process(self, stack, stream): 79 | splitlevel = 0 80 | stmt = None 81 | consume_ws = False 82 | stmt_tokens = [] 83 | for ttype, value in stream: 84 | # Before appending the token 85 | if (consume_ws and ttype is not T.Whitespace 86 | and ttype is not T.Comment.Single): 87 | consume_ws = False 88 | stmt.tokens = stmt_tokens 89 | yield stmt 90 | self._reset() 91 | stmt = None 92 | splitlevel = 0 93 | if stmt is None: 94 | stmt = Statement() 95 | stmt_tokens = [] 96 | splitlevel += self._change_splitlevel(ttype, value) 97 | # Append the token 98 | stmt_tokens.append(Token(ttype, value)) 99 | # After appending the token 100 | if (splitlevel <= 0 and ttype is T.Punctuation 101 | and value == ';'): 102 | consume_ws = True 103 | if stmt is not None: 104 | stmt.tokens = stmt_tokens 105 | yield stmt 106 | -------------------------------------------------------------------------------- /sqlparse/engine/grouping.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | import itertools 4 | 5 | from .. import sql 6 | from .. import tokens as T 7 | 8 | try: 9 | next 10 | except NameError: # Python < 2.6 11 | next = lambda i: i.next() 12 | 13 | 14 | def _group_left_right(tlist, ttype, value, cls, 15 | check_right=lambda t: True, 16 | check_left=lambda t: True, 17 | include_semicolon=False): 18 | [_group_left_right(sgroup, ttype, value, cls, check_right, 19 | include_semicolon) for sgroup in tlist.get_sublists() 20 | if not isinstance(sgroup, cls)] 21 | idx = 0 22 | token = tlist.token_next_match(idx, ttype, value) 23 | while token: 24 | right = tlist.token_next(tlist.token_index(token)) 25 | left = tlist.token_prev(tlist.token_index(token)) 26 | if right is None or not check_right(right): 27 | token = tlist.token_next_match(tlist.token_index(token) + 1, 28 | ttype, value) 29 | elif left is None or not check_right(left): 30 | token = tlist.token_next_match(tlist.token_index(token) + 1, 31 | ttype, value) 32 | else: 33 | if include_semicolon: 34 | sright = tlist.token_next_match(tlist.token_index(right), 35 | T.Punctuation, ';') 36 | if sright is not None: 37 | # only overwrite "right" if a semicolon is actually 38 | # present. 39 | right = sright 40 | tokens = tlist.tokens_between(left, right)[1:] 41 | if not isinstance(left, cls): 42 | new = cls([left]) 43 | new_idx = tlist.token_index(left) 44 | tlist.tokens.remove(left) 45 | tlist.tokens.insert(new_idx, new) 46 | left = new 47 | left.tokens.extend(tokens) 48 | for t in tokens: 49 | tlist.tokens.remove(t) 50 | token = tlist.token_next_match(tlist.token_index(left) + 1, 51 | ttype, value) 52 | 53 | 54 | def _group_matching(tlist, start_ttype, start_value, end_ttype, end_value, 55 | cls, include_semicolon=False, recurse=False): 56 | def _find_matching(i, tl, stt, sva, ett, eva): 57 | depth = 1 58 | for t in tl.tokens[i:]: 59 | if t.match(stt, sva): 60 | depth += 1 61 | elif t.match(ett, eva): 62 | depth -= 1 63 | if depth == 1: 64 | return t 65 | return None 66 | [_group_matching(sgroup, start_ttype, start_value, end_ttype, end_value, 67 | cls, include_semicolon) for sgroup in tlist.get_sublists() 68 | if recurse] 69 | if isinstance(tlist, cls): 70 | idx = 1 71 | else: 72 | idx = 0 73 | token = tlist.token_next_match(idx, start_ttype, start_value) 74 | while token: 75 | tidx = tlist.token_index(token) 76 | end = _find_matching(tidx, tlist, start_ttype, start_value, 77 | end_ttype, end_value) 78 | if end is None: 79 | idx = tidx + 1 80 | else: 81 | if include_semicolon: 82 | next_ = tlist.token_next(tlist.token_index(end)) 83 | if next_ and next_.match(T.Punctuation, ';'): 84 | end = next_ 85 | group = tlist.group_tokens(cls, tlist.tokens_between(token, end)) 86 | _group_matching(group, start_ttype, start_value, 87 | end_ttype, end_value, cls, include_semicolon) 88 | idx = tlist.token_index(group) + 1 89 | token = tlist.token_next_match(idx, start_ttype, start_value) 90 | 91 | 92 | def group_if(tlist): 93 | _group_matching(tlist, T.Keyword, 'IF', T.Keyword, 'END IF', sql.If, True) 94 | 95 | 96 | def group_for(tlist): 97 | _group_matching(tlist, T.Keyword, 'FOR', T.Keyword, 'END LOOP', 98 | sql.For, True) 99 | 100 | 101 | def group_as(tlist): 102 | 103 | def _right_valid(token): 104 | # Currently limited to DML/DDL. Maybe additional more non SQL reserved 105 | # keywords should appear here (see issue8). 106 | return not token.ttype in (T.DML, T.DDL) 107 | _group_left_right(tlist, T.Keyword, 'AS', sql.Identifier, 108 | check_right=_right_valid) 109 | 110 | 111 | def group_assignment(tlist): 112 | _group_left_right(tlist, T.Assignment, ':=', sql.Assignment, 113 | include_semicolon=True) 114 | 115 | 116 | def group_comparison(tlist): 117 | 118 | def _parts_valid(token): 119 | return (token.ttype in (T.String.Symbol, T.Name, T.Number, 120 | T.Number.Integer, T.Literal, 121 | T.Literal.Number.Integer) 122 | or isinstance(token, (sql.Identifier,))) 123 | _group_left_right(tlist, T.Operator.Comparison, None, sql.Comparison, 124 | check_left=_parts_valid, check_right=_parts_valid) 125 | 126 | 127 | def group_case(tlist): 128 | _group_matching(tlist, T.Keyword, 'CASE', T.Keyword, 'END', sql.Case, 129 | include_semicolon=True, recurse=True) 130 | 131 | 132 | def group_identifier(tlist): 133 | def _consume_cycle(tl, i): 134 | x = itertools.cycle(( 135 | lambda y: (y.match(T.Punctuation, '.') 136 | or y.ttype is T.Operator), 137 | lambda y: (y.ttype in (T.String.Symbol, 138 | T.Name, 139 | T.Wildcard, 140 | T.Literal.Number.Integer)))) 141 | for t in tl.tokens[i:]: 142 | if next(x)(t): 143 | yield t 144 | else: 145 | raise StopIteration 146 | 147 | def _next_token(tl, i): 148 | # chooses the next token. if two tokens are found then the 149 | # first is returned. 150 | t1 = tl.token_next_by_type(i, (T.String.Symbol, T.Name)) 151 | t2 = tl.token_next_by_instance(i, sql.Function) 152 | if t1 and t2: 153 | i1 = tl.token_index(t1) 154 | i2 = tl.token_index(t2) 155 | if i1 > i2: 156 | return t2 157 | else: 158 | return t1 159 | elif t1: 160 | return t1 161 | else: 162 | return t2 163 | 164 | # bottom up approach: group subgroups first 165 | [group_identifier(sgroup) for sgroup in tlist.get_sublists() 166 | if not isinstance(sgroup, sql.Identifier)] 167 | 168 | # real processing 169 | idx = 0 170 | token = _next_token(tlist, idx) 171 | while token: 172 | identifier_tokens = [token] + list( 173 | _consume_cycle(tlist, 174 | tlist.token_index(token) + 1)) 175 | if not (len(identifier_tokens) == 1 176 | and isinstance(identifier_tokens[0], sql.Function)): 177 | group = tlist.group_tokens(sql.Identifier, identifier_tokens) 178 | idx = tlist.token_index(group) + 1 179 | else: 180 | idx += 1 181 | token = _next_token(tlist, idx) 182 | 183 | 184 | def group_identifier_list(tlist): 185 | [group_identifier_list(sgroup) for sgroup in tlist.get_sublists() 186 | if not isinstance(sgroup, sql.IdentifierList)] 187 | idx = 0 188 | # Allowed list items 189 | fend1_funcs = [lambda t: isinstance(t, (sql.Identifier, sql.Function, 190 | sql.Case)), 191 | lambda t: t.is_whitespace(), 192 | lambda t: t.ttype == T.Name, 193 | lambda t: t.ttype == T.Wildcard, 194 | lambda t: t.match(T.Keyword, 'null'), 195 | lambda t: t.ttype == T.Number.Integer, 196 | lambda t: t.ttype == T.String.Single, 197 | lambda t: isinstance(t, sql.Comparison), 198 | ] 199 | tcomma = tlist.token_next_match(idx, T.Punctuation, ',') 200 | start = None 201 | while tcomma is not None: 202 | before = tlist.token_prev(tcomma) 203 | after = tlist.token_next(tcomma) 204 | # Check if the tokens around tcomma belong to a list 205 | bpassed = apassed = False 206 | for func in fend1_funcs: 207 | if before is not None and func(before): 208 | bpassed = True 209 | if after is not None and func(after): 210 | apassed = True 211 | if not bpassed or not apassed: 212 | # Something's wrong here, skip ahead to next "," 213 | start = None 214 | tcomma = tlist.token_next_match(tlist.token_index(tcomma) + 1, 215 | T.Punctuation, ',') 216 | else: 217 | if start is None: 218 | start = before 219 | next_ = tlist.token_next(after) 220 | if next_ is None or not next_.match(T.Punctuation, ','): 221 | # Reached the end of the list 222 | tokens = tlist.tokens_between(start, after) 223 | group = tlist.group_tokens(sql.IdentifierList, tokens) 224 | start = None 225 | tcomma = tlist.token_next_match(tlist.token_index(group) + 1, 226 | T.Punctuation, ',') 227 | else: 228 | tcomma = next_ 229 | 230 | 231 | def group_parenthesis(tlist): 232 | _group_matching(tlist, T.Punctuation, '(', T.Punctuation, ')', 233 | sql.Parenthesis) 234 | 235 | 236 | def group_comments(tlist): 237 | [group_comments(sgroup) for sgroup in tlist.get_sublists() 238 | if not isinstance(sgroup, sql.Comment)] 239 | idx = 0 240 | token = tlist.token_next_by_type(idx, T.Comment) 241 | while token: 242 | tidx = tlist.token_index(token) 243 | end = tlist.token_not_matching(tidx + 1, 244 | [lambda t: t.ttype in T.Comment, 245 | lambda t: t.is_whitespace()]) 246 | if end is None: 247 | idx = tidx + 1 248 | else: 249 | eidx = tlist.token_index(end) 250 | grp_tokens = tlist.tokens_between(token, 251 | tlist.token_prev(eidx, False)) 252 | group = tlist.group_tokens(sql.Comment, grp_tokens) 253 | idx = tlist.token_index(group) 254 | token = tlist.token_next_by_type(idx, T.Comment) 255 | 256 | 257 | def group_where(tlist): 258 | [group_where(sgroup) for sgroup in tlist.get_sublists() 259 | if not isinstance(sgroup, sql.Where)] 260 | idx = 0 261 | token = tlist.token_next_match(idx, T.Keyword, 'WHERE') 262 | stopwords = ('ORDER', 'GROUP', 'LIMIT', 'UNION') 263 | while token: 264 | tidx = tlist.token_index(token) 265 | end = tlist.token_next_match(tidx + 1, T.Keyword, stopwords) 266 | if end is None: 267 | end = tlist._groupable_tokens[-1] 268 | else: 269 | end = tlist.tokens[tlist.token_index(end) - 1] 270 | group = tlist.group_tokens(sql.Where, 271 | tlist.tokens_between(token, end), 272 | ignore_ws=True) 273 | idx = tlist.token_index(group) 274 | token = tlist.token_next_match(idx, T.Keyword, 'WHERE') 275 | 276 | 277 | def group_aliased(tlist): 278 | clss = (sql.Identifier, sql.Function, sql.Case) 279 | [group_aliased(sgroup) for sgroup in tlist.get_sublists() 280 | if not isinstance(sgroup, clss)] 281 | idx = 0 282 | token = tlist.token_next_by_instance(idx, clss) 283 | while token: 284 | next_ = tlist.token_next(tlist.token_index(token)) 285 | if next_ is not None and isinstance(next_, clss): 286 | grp = tlist.tokens_between(token, next_)[1:] 287 | token.tokens.extend(grp) 288 | for t in grp: 289 | tlist.tokens.remove(t) 290 | idx = tlist.token_index(token) + 1 291 | token = tlist.token_next_by_instance(idx, clss) 292 | 293 | 294 | def group_typecasts(tlist): 295 | _group_left_right(tlist, T.Punctuation, '::', sql.Identifier) 296 | 297 | 298 | def group_functions(tlist): 299 | [group_functions(sgroup) for sgroup in tlist.get_sublists() 300 | if not isinstance(sgroup, sql.Function)] 301 | idx = 0 302 | token = tlist.token_next_by_type(idx, T.Name) 303 | while token: 304 | next_ = tlist.token_next(token) 305 | if not isinstance(next_, sql.Parenthesis): 306 | idx = tlist.token_index(token) + 1 307 | else: 308 | func = tlist.group_tokens(sql.Function, 309 | tlist.tokens_between(token, next_)) 310 | idx = tlist.token_index(func) + 1 311 | token = tlist.token_next_by_type(idx, T.Name) 312 | 313 | 314 | def group(tlist): 315 | for func in [group_parenthesis, 316 | group_functions, 317 | group_comments, 318 | group_where, 319 | group_case, 320 | group_identifier, 321 | group_typecasts, 322 | group_as, 323 | group_aliased, 324 | group_assignment, 325 | group_comparison, 326 | group_identifier_list, 327 | group_if, 328 | group_for]: 329 | func(tlist) 330 | -------------------------------------------------------------------------------- /sqlparse/filters.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | import re 6 | 7 | from os.path import abspath, join 8 | 9 | from . import sql 10 | from . import tokens as T 11 | from .engine import FilterStack 12 | from .tokens import ( 13 | Comment, Keyword, Name, 14 | Punctuation, String, Whitespace, 15 | ) 16 | 17 | 18 | class Filter(object): 19 | 20 | def process(self, *args): 21 | raise NotImplementedError 22 | 23 | 24 | class TokenFilter(Filter): 25 | 26 | def process(self, stack, stream): 27 | raise NotImplementedError 28 | 29 | 30 | # -------------------------- 31 | # token process 32 | 33 | class _CaseFilter(TokenFilter): 34 | 35 | ttype = None 36 | 37 | def __init__(self, case=None): 38 | if case is None: 39 | case = 'upper' 40 | assert case in ['lower', 'upper', 'capitalize'] 41 | self.convert = getattr(str, case) 42 | 43 | def process(self, stack, stream): 44 | for ttype, value in stream: 45 | if ttype in self.ttype: 46 | value = self.convert(value) 47 | yield ttype, value 48 | 49 | 50 | class KeywordCaseFilter(_CaseFilter): 51 | ttype = T.Keyword 52 | 53 | 54 | class IdentifierCaseFilter(_CaseFilter): 55 | ttype = (T.Name, T.String.Symbol) 56 | 57 | def process(self, stack, stream): 58 | for ttype, value in stream: 59 | if ttype in self.ttype and not value.strip()[0] == '"': 60 | value = self.convert(value) 61 | yield ttype, value 62 | 63 | 64 | class GetComments(Filter): 65 | """Get the comments from a stack""" 66 | def process(self, stack, stream): 67 | for token_type, value in stream: 68 | if token_type in Comment: 69 | yield token_type, value 70 | 71 | 72 | class StripComments(Filter): 73 | """Strip the comments from a stack""" 74 | def process(self, stack, stream): 75 | for token_type, value in stream: 76 | if token_type not in Comment: 77 | yield token_type, value 78 | 79 | 80 | class IncludeStatement(Filter): 81 | """Filter that enable a INCLUDE statement""" 82 | 83 | def __init__(self, dirpath=".", maxRecursive=10): 84 | self.dirpath = abspath(dirpath) 85 | self.maxRecursive = maxRecursive 86 | 87 | self.detected = False 88 | 89 | def process(self, stack, stream): 90 | # Run over all tokens in the stream 91 | for token_type, value in stream: 92 | # INCLUDE statement found, set detected mode 93 | if token_type in Name and value.upper() == 'INCLUDE': 94 | self.detected = True 95 | continue 96 | 97 | # INCLUDE statement was found, parse it 98 | elif self.detected: 99 | # Omit whitespaces 100 | if token_type in Whitespace: 101 | pass 102 | 103 | # Get path of file to include 104 | path = None 105 | 106 | if token_type in String.Symbol: 107 | # if token_type in tokens.String.Symbol: 108 | path = join(self.dirpath, value[1:-1]) 109 | 110 | # Include file if path was found 111 | if path: 112 | try: 113 | f = open(path) 114 | raw_sql = f.read() 115 | f.close() 116 | except IOError as err: 117 | yield Comment, u'-- IOError: %s\n' % err 118 | 119 | else: 120 | # Create new FilterStack to parse readed file 121 | # and add all its tokens to the main stack recursively 122 | # [ToDo] Add maximum recursive iteration value 123 | stack = FilterStack() 124 | stack.preprocess.append(IncludeStatement(self.dirpath)) 125 | 126 | for tv in stack.run(raw_sql): 127 | yield tv 128 | 129 | # Set normal mode 130 | self.detected = False 131 | 132 | # Don't include any token while in detected mode 133 | continue 134 | 135 | # Normal token 136 | yield token_type, value 137 | 138 | 139 | # ---------------------- 140 | # statement process 141 | 142 | class StripCommentsFilter(Filter): 143 | 144 | def _get_next_comment(self, tlist): 145 | # TODO(andi) Comment types should be unified, see related issue38 146 | token = tlist.token_next_by_instance(0, sql.Comment) 147 | if token is None: 148 | token = tlist.token_next_by_type(0, T.Comment) 149 | return token 150 | 151 | def _process(self, tlist): 152 | token = self._get_next_comment(tlist) 153 | while token: 154 | tidx = tlist.token_index(token) 155 | prev = tlist.token_prev(tidx, False) 156 | next_ = tlist.token_next(tidx, False) 157 | # Replace by whitespace if prev and next exist and if they're not 158 | # whitespaces. This doesn't apply if prev or next is a paranthesis. 159 | if (prev is not None and next_ is not None 160 | and not prev.is_whitespace() and not next_.is_whitespace() 161 | and not (prev.match(T.Punctuation, '(') 162 | or next_.match(T.Punctuation, ')'))): 163 | tlist.tokens[tidx] = sql.Token(T.Whitespace, ' ') 164 | else: 165 | tlist.tokens.pop(tidx) 166 | token = self._get_next_comment(tlist) 167 | 168 | def process(self, stack, stmt): 169 | [self.process(stack, sgroup) for sgroup in stmt.get_sublists()] 170 | self._process(stmt) 171 | 172 | 173 | class StripWhitespaceFilter(Filter): 174 | 175 | def _stripws(self, tlist): 176 | func_name = '_stripws_%s' % tlist.__class__.__name__.lower() 177 | func = getattr(self, func_name, self._stripws_default) 178 | func(tlist) 179 | 180 | def _stripws_default(self, tlist): 181 | last_was_ws = False 182 | for token in tlist.tokens: 183 | if token.is_whitespace(): 184 | if last_was_ws: 185 | token.value = '' 186 | else: 187 | token.value = ' ' 188 | last_was_ws = token.is_whitespace() 189 | 190 | def _stripws_parenthesis(self, tlist): 191 | if tlist.tokens[1].is_whitespace(): 192 | tlist.tokens.pop(1) 193 | if tlist.tokens[-2].is_whitespace(): 194 | tlist.tokens.pop(-2) 195 | self._stripws_default(tlist) 196 | 197 | def process(self, stack, stmt): 198 | [self.process(stack, sgroup) for sgroup in stmt.get_sublists()] 199 | self._stripws(stmt) 200 | if stmt.tokens[-1].is_whitespace(): 201 | stmt.tokens.pop(-1) 202 | 203 | 204 | class ReindentFilter(Filter): 205 | 206 | def __init__(self, width=2, char=' ', line_width=None): 207 | self.width = width 208 | self.char = char 209 | self.indent = 0 210 | self.offset = 0 211 | self.line_width = line_width 212 | self._curr_stmt = None 213 | self._last_stmt = None 214 | 215 | def _get_offset(self, token): 216 | all_ = list(self._curr_stmt.flatten()) 217 | idx = all_.index(token) 218 | raw = ''.join(str(x) for x in all_[:idx + 1]) 219 | line = raw.splitlines()[-1] 220 | # Now take current offset into account and return relative offset. 221 | full_offset = len(line) - len(self.char * (self.width * self.indent)) 222 | return full_offset - self.offset 223 | 224 | def nl(self): 225 | # TODO: newline character should be configurable 226 | ws = '\n' + (self.char * ((self.indent * self.width) + self.offset)) 227 | return sql.Token(T.Whitespace, ws) 228 | 229 | def _split_kwds(self, tlist): 230 | split_words = ('FROM', 'JOIN$', 'AND', 'OR', 231 | 'GROUP', 'ORDER', 'UNION', 'VALUES', 232 | 'SET', 'BETWEEN') 233 | 234 | def _next_token(i): 235 | t = tlist.token_next_match(i, T.Keyword, split_words, 236 | regex=True) 237 | if t and t.value.upper() == 'BETWEEN': 238 | t = _next_token(tlist.token_index(t) + 1) 239 | if t and t.value.upper() == 'AND': 240 | t = _next_token(tlist.token_index(t) + 1) 241 | return t 242 | 243 | idx = 0 244 | token = _next_token(idx) 245 | while token: 246 | prev = tlist.token_prev(tlist.token_index(token), False) 247 | offset = 1 248 | if prev and prev.is_whitespace(): 249 | tlist.tokens.pop(tlist.token_index(prev)) 250 | offset += 1 251 | if (prev 252 | and isinstance(prev, sql.Comment) 253 | and (str(prev).endswith('\n') 254 | or str(prev).endswith('\r'))): 255 | nl = tlist.token_next(token) 256 | else: 257 | nl = self.nl() 258 | tlist.insert_before(token, nl) 259 | token = _next_token(tlist.token_index(nl) + offset) 260 | 261 | def _split_statements(self, tlist): 262 | idx = 0 263 | token = tlist.token_next_by_type(idx, (T.Keyword.DDL, T.Keyword.DML)) 264 | while token: 265 | prev = tlist.token_prev(tlist.token_index(token), False) 266 | if prev and prev.is_whitespace(): 267 | tlist.tokens.pop(tlist.token_index(prev)) 268 | # only break if it's not the first token 269 | if prev: 270 | nl = self.nl() 271 | tlist.insert_before(token, nl) 272 | token = tlist.token_next_by_type(tlist.token_index(token) + 1, 273 | (T.Keyword.DDL, T.Keyword.DML)) 274 | 275 | def _process(self, tlist): 276 | func_name = '_process_%s' % tlist.__class__.__name__.lower() 277 | func = getattr(self, func_name, self._process_default) 278 | func(tlist) 279 | 280 | def _process_where(self, tlist): 281 | token = tlist.token_next_match(0, T.Keyword, 'WHERE') 282 | tlist.insert_before(token, self.nl()) 283 | self.indent += 1 284 | self._process_default(tlist) 285 | self.indent -= 1 286 | 287 | def _process_parenthesis(self, tlist): 288 | first = tlist.token_next(0) 289 | indented = False 290 | if first and first.ttype in (T.Keyword.DML, T.Keyword.DDL): 291 | self.indent += 1 292 | tlist.tokens.insert(0, self.nl()) 293 | indented = True 294 | num_offset = self._get_offset(tlist.token_next_match(0, 295 | T.Punctuation, '(')) 296 | self.offset += num_offset 297 | self._process_default(tlist, stmts=not indented) 298 | if indented: 299 | self.indent -= 1 300 | self.offset -= num_offset 301 | 302 | def _process_identifierlist(self, tlist): 303 | identifiers = tlist.get_identifiers() 304 | if len(identifiers) > 1 and not tlist.within(sql.Function): 305 | first = list(identifiers[0].flatten())[0] 306 | num_offset = self._get_offset(first) - len(first.value) 307 | self.offset += num_offset 308 | for token in identifiers[1:]: 309 | tlist.insert_before(token, self.nl()) 310 | self.offset -= num_offset 311 | self._process_default(tlist) 312 | 313 | def _process_case(self, tlist): 314 | is_first = True 315 | num_offset = None 316 | case = tlist.tokens[0] 317 | outer_offset = self._get_offset(case) - len(case.value) 318 | self.offset += outer_offset 319 | for cond, value in tlist.get_cases(): 320 | if is_first: 321 | tcond = list(cond[0].flatten())[0] 322 | is_first = False 323 | num_offset = self._get_offset(tcond) - len(tcond.value) 324 | self.offset += num_offset 325 | continue 326 | if cond is None: 327 | token = value[0] 328 | else: 329 | token = cond[0] 330 | tlist.insert_before(token, self.nl()) 331 | # Line breaks on group level are done. Now let's add an offset of 332 | # 5 (=length of "when", "then", "else") and process subgroups. 333 | self.offset += 5 334 | self._process_default(tlist) 335 | self.offset -= 5 336 | if num_offset is not None: 337 | self.offset -= num_offset 338 | end = tlist.token_next_match(0, T.Keyword, 'END') 339 | tlist.insert_before(end, self.nl()) 340 | self.offset -= outer_offset 341 | 342 | def _process_default(self, tlist, stmts=True, kwds=True): 343 | if stmts: 344 | self._split_statements(tlist) 345 | if kwds: 346 | self._split_kwds(tlist) 347 | [self._process(sgroup) for sgroup in tlist.get_sublists()] 348 | 349 | def process(self, stack, stmt): 350 | if isinstance(stmt, sql.Statement): 351 | self._curr_stmt = stmt 352 | self._process(stmt) 353 | if isinstance(stmt, sql.Statement): 354 | if self._last_stmt is not None: 355 | if self._last_stmt.to_unicode().endswith('\n'): 356 | nl = '\n' 357 | else: 358 | nl = '\n\n' 359 | stmt.tokens.insert(0, 360 | sql.Token(T.Whitespace, nl)) 361 | if self._last_stmt != stmt: 362 | self._last_stmt = stmt 363 | 364 | 365 | # FIXME: Doesn't work ;) 366 | class RightMarginFilter(Filter): 367 | 368 | keep_together = ( 369 | # sql.TypeCast, sql.Identifier, sql.Alias, 370 | ) 371 | 372 | def __init__(self, width=79): 373 | self.width = width 374 | self.line = '' 375 | 376 | def _process(self, stack, group, stream): 377 | for token in stream: 378 | if token.is_whitespace() and '\n' in token.value: 379 | if token.value.endswith('\n'): 380 | self.line = '' 381 | else: 382 | self.line = token.value.splitlines()[-1] 383 | elif (token.is_group() 384 | and not token.__class__ in self.keep_together): 385 | token.tokens = self._process(stack, token, token.tokens) 386 | else: 387 | val = token.to_unicode() 388 | if len(self.line) + len(val) > self.width: 389 | match = re.search('^ +', self.line) 390 | if match is not None: 391 | indent = match.group() 392 | else: 393 | indent = '' 394 | yield sql.Token(T.Whitespace, '\n%s' % indent) 395 | self.line = indent 396 | self.line += val 397 | yield token 398 | 399 | def process(self, stack, group): 400 | return 401 | group.tokens = self._process(stack, group, group.tokens) 402 | 403 | 404 | class ColumnsSelect(Filter): 405 | """Get the columns names of a SELECT query""" 406 | def process(self, stack, stream): 407 | mode = 0 408 | oldValue = "" 409 | parenthesis = 0 410 | 411 | for token_type, value in stream: 412 | # Ignore comments 413 | if token_type in Comment: 414 | continue 415 | 416 | # We have not detected a SELECT statement 417 | if mode == 0: 418 | if token_type in Keyword and value == 'SELECT': 419 | mode = 1 420 | 421 | # We have detected a SELECT statement 422 | elif mode == 1: 423 | if value == 'FROM': 424 | if oldValue: 425 | yield oldValue 426 | 427 | mode = 3 # Columns have been checked 428 | 429 | elif value == 'AS': 430 | oldValue = "" 431 | mode = 2 432 | 433 | elif (token_type == Punctuation 434 | and value == ',' and not parenthesis): 435 | if oldValue: 436 | yield oldValue 437 | oldValue = "" 438 | 439 | elif token_type not in Whitespace: 440 | if value == '(': 441 | parenthesis += 1 442 | elif value == ')': 443 | parenthesis -= 1 444 | 445 | oldValue += value 446 | 447 | # We are processing an AS keyword 448 | elif mode == 2: 449 | # We check also for Keywords because a bug in SQLParse 450 | if token_type == Name or token_type == Keyword: 451 | yield value 452 | mode = 1 453 | 454 | 455 | # --------------------------- 456 | # postprocess 457 | 458 | class SerializerUnicode(Filter): 459 | 460 | def process(self, stack, stmt): 461 | raw = stmt.to_unicode() 462 | add_nl = raw.endswith('\n') 463 | res = '\n'.join(line.rstrip() for line in raw.splitlines()) 464 | if add_nl: 465 | res += '\n' 466 | return res 467 | 468 | def Tokens2Unicode(stream): 469 | result = "" 470 | 471 | for _, value in stream: 472 | result += str(value) 473 | 474 | return result 475 | 476 | 477 | class OutputPythonFilter(Filter): 478 | 479 | def __init__(self, varname='sql'): 480 | self.varname = varname 481 | self.cnt = 0 482 | 483 | def _process(self, stream, varname, count, has_nl): 484 | if count > 1: 485 | yield sql.Token(T.Whitespace, '\n') 486 | yield sql.Token(T.Name, varname) 487 | yield sql.Token(T.Whitespace, ' ') 488 | yield sql.Token(T.Operator, '=') 489 | yield sql.Token(T.Whitespace, ' ') 490 | if has_nl: 491 | yield sql.Token(T.Operator, '(') 492 | yield sql.Token(T.Text, "'") 493 | cnt = 0 494 | for token in stream: 495 | cnt += 1 496 | if token.is_whitespace() and '\n' in token.value: 497 | if cnt == 1: 498 | continue 499 | after_lb = token.value.split('\n', 1)[1] 500 | yield sql.Token(T.Text, " '") 501 | yield sql.Token(T.Whitespace, '\n') 502 | for i in range(len(varname) + 4): 503 | yield sql.Token(T.Whitespace, ' ') 504 | yield sql.Token(T.Text, "'") 505 | if after_lb: # it's the indendation 506 | yield sql.Token(T.Whitespace, after_lb) 507 | continue 508 | elif token.value and "'" in token.value: 509 | token.value = token.value.replace("'", "\\'") 510 | yield sql.Token(T.Text, token.value or '') 511 | yield sql.Token(T.Text, "'") 512 | if has_nl: 513 | yield sql.Token(T.Operator, ')') 514 | 515 | def process(self, stack, stmt): 516 | self.cnt += 1 517 | if self.cnt > 1: 518 | varname = '%s%d' % (self.varname, self.cnt) 519 | else: 520 | varname = self.varname 521 | has_nl = len(stmt.to_unicode().strip().splitlines()) > 1 522 | stmt.tokens = self._process(stmt.tokens, varname, self.cnt, has_nl) 523 | return stmt 524 | 525 | 526 | class OutputPHPFilter(Filter): 527 | 528 | def __init__(self, varname='sql'): 529 | self.varname = '$%s' % varname 530 | self.count = 0 531 | 532 | def _process(self, stream, varname): 533 | if self.count > 1: 534 | yield sql.Token(T.Whitespace, '\n') 535 | yield sql.Token(T.Name, varname) 536 | yield sql.Token(T.Whitespace, ' ') 537 | yield sql.Token(T.Operator, '=') 538 | yield sql.Token(T.Whitespace, ' ') 539 | yield sql.Token(T.Text, '"') 540 | for token in stream: 541 | if token.is_whitespace() and '\n' in token.value: 542 | after_lb = token.value.split('\n', 1)[1] 543 | yield sql.Token(T.Text, ' "') 544 | yield sql.Token(T.Operator, ';') 545 | yield sql.Token(T.Whitespace, '\n') 546 | yield sql.Token(T.Name, varname) 547 | yield sql.Token(T.Whitespace, ' ') 548 | yield sql.Token(T.Punctuation, '.') 549 | yield sql.Token(T.Operator, '=') 550 | yield sql.Token(T.Whitespace, ' ') 551 | yield sql.Token(T.Text, '"') 552 | if after_lb: 553 | yield sql.Token(T.Text, after_lb) 554 | continue 555 | elif '"' in token.value: 556 | token.value = token.value.replace('"', '\\"') 557 | yield sql.Token(T.Text, token.value) 558 | yield sql.Token(T.Text, '"') 559 | yield sql.Token(T.Punctuation, ';') 560 | 561 | def process(self, stack, stmt): 562 | self.count += 1 563 | if self.count > 1: 564 | varname = '%s%d' % (self.varname, self.count) 565 | else: 566 | varname = self.varname 567 | stmt.tokens = tuple(self._process(stmt.tokens, varname)) 568 | return stmt 569 | 570 | 571 | class Limit(Filter): 572 | """Get the LIMIT of a query. 573 | 574 | If not defined, return -1 (SQL specification for no LIMIT query) 575 | """ 576 | def process(self, stack, stream): 577 | index = 7 578 | stream = list(stream) 579 | stream.reverse() 580 | 581 | # Run over all tokens in the stream from the end 582 | for token_type, value in stream: 583 | index -= 1 584 | 585 | # if index and token_type in Keyword: 586 | if index and token_type in Keyword and value == 'LIMIT': 587 | return stream[4 - index][1] 588 | 589 | return -1 -------------------------------------------------------------------------------- /sqlparse/formatter.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com 2 | # 3 | # This module is part of python-sqlparse and is released under 4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php. 5 | 6 | """SQL formatter""" 7 | from __future__ import absolute_import 8 | from . import SQLParseError 9 | from . import filters 10 | 11 | 12 | def validate_options(options): 13 | """Validates options.""" 14 | kwcase = options.get('keyword_case', None) 15 | if kwcase not in [None, 'upper', 'lower', 'capitalize']: 16 | raise SQLParseError('Invalid value for keyword_case: %r' % kwcase) 17 | 18 | idcase = options.get('identifier_case', None) 19 | if idcase not in [None, 'upper', 'lower', 'capitalize']: 20 | raise SQLParseError('Invalid value for identifier_case: %r' % idcase) 21 | 22 | ofrmt = options.get('output_format', None) 23 | if ofrmt not in [None, 'sql', 'python', 'php']: 24 | raise SQLParseError('Unknown output format: %r' % ofrmt) 25 | 26 | strip_comments = options.get('strip_comments', False) 27 | if strip_comments not in [True, False]: 28 | raise SQLParseError('Invalid value for strip_comments: %r' 29 | % strip_comments) 30 | 31 | strip_ws = options.get('strip_whitespace', False) 32 | if strip_ws not in [True, False]: 33 | raise SQLParseError('Invalid value for strip_whitespace: %r' 34 | % strip_ws) 35 | 36 | reindent = options.get('reindent', False) 37 | if reindent not in [True, False]: 38 | raise SQLParseError('Invalid value for reindent: %r' 39 | % reindent) 40 | elif reindent: 41 | options['strip_whitespace'] = True 42 | indent_tabs = options.get('indent_tabs', False) 43 | if indent_tabs not in [True, False]: 44 | raise SQLParseError('Invalid value for indent_tabs: %r' % indent_tabs) 45 | elif indent_tabs: 46 | options['indent_char'] = '\t' 47 | else: 48 | options['indent_char'] = ' ' 49 | indent_width = options.get('indent_width', 2) 50 | try: 51 | indent_width = int(indent_width) 52 | except (TypeError, ValueError): 53 | raise SQLParseError('indent_width requires an integer') 54 | if indent_width < 1: 55 | raise SQLParseError('indent_width requires an positive integer') 56 | options['indent_width'] = indent_width 57 | 58 | right_margin = options.get('right_margin', None) 59 | if right_margin is not None: 60 | try: 61 | right_margin = int(right_margin) 62 | except (TypeError, ValueError): 63 | raise SQLParseError('right_margin requires an integer') 64 | if right_margin < 10: 65 | raise SQLParseError('right_margin requires an integer > 10') 66 | options['right_margin'] = right_margin 67 | 68 | return options 69 | 70 | 71 | def build_filter_stack(stack, options): 72 | """Setup and return a filter stack. 73 | 74 | Args: 75 | stack: :class:`~sqlparse.filters.FilterStack` instance 76 | options: Dictionary with options validated by validate_options. 77 | """ 78 | # Token filter 79 | if options.get('keyword_case', None): 80 | stack.preprocess.append( 81 | filters.KeywordCaseFilter(options['keyword_case'])) 82 | 83 | if options.get('identifier_case', None): 84 | stack.preprocess.append( 85 | filters.IdentifierCaseFilter(options['identifier_case'])) 86 | 87 | # After grouping 88 | if options.get('strip_comments', False): 89 | stack.enable_grouping() 90 | stack.stmtprocess.append(filters.StripCommentsFilter()) 91 | 92 | if (options.get('strip_whitespace', False) 93 | or options.get('reindent', False)): 94 | stack.enable_grouping() 95 | stack.stmtprocess.append(filters.StripWhitespaceFilter()) 96 | 97 | if options.get('reindent', False): 98 | stack.enable_grouping() 99 | stack.stmtprocess.append( 100 | filters.ReindentFilter(char=options['indent_char'], 101 | width=options['indent_width'])) 102 | 103 | if options.get('right_margin', False): 104 | stack.enable_grouping() 105 | stack.stmtprocess.append( 106 | filters.RightMarginFilter(width=options['right_margin'])) 107 | 108 | # Serializer 109 | if options.get('output_format'): 110 | frmt = options['output_format'] 111 | if frmt.lower() == 'php': 112 | fltr = filters.OutputPHPFilter() 113 | elif frmt.lower() == 'python': 114 | fltr = filters.OutputPythonFilter() 115 | else: 116 | fltr = None 117 | if fltr is not None: 118 | stack.postprocess.append(fltr) 119 | 120 | return stack 121 | -------------------------------------------------------------------------------- /sqlparse/keywords.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from . import tokens 3 | 4 | KEYWORDS = { 5 | 'ABORT': tokens.Keyword, 6 | 'ABS': tokens.Keyword, 7 | 'ABSOLUTE': tokens.Keyword, 8 | 'ACCESS': tokens.Keyword, 9 | 'ADA': tokens.Keyword, 10 | 'ADD': tokens.Keyword, 11 | 'ADMIN': tokens.Keyword, 12 | 'AFTER': tokens.Keyword, 13 | 'AGGREGATE': tokens.Keyword, 14 | 'ALIAS': tokens.Keyword, 15 | 'ALL': tokens.Keyword, 16 | 'ALLOCATE': tokens.Keyword, 17 | 'ANALYSE': tokens.Keyword, 18 | 'ANALYZE': tokens.Keyword, 19 | 'ANY': tokens.Keyword, 20 | 'ARE': tokens.Keyword, 21 | 'ASC': tokens.Keyword, 22 | 'ASENSITIVE': tokens.Keyword, 23 | 'ASSERTION': tokens.Keyword, 24 | 'ASSIGNMENT': tokens.Keyword, 25 | 'ASYMMETRIC': tokens.Keyword, 26 | 'AT': tokens.Keyword, 27 | 'ATOMIC': tokens.Keyword, 28 | 'AUTHORIZATION': tokens.Keyword, 29 | 'AVG': tokens.Keyword, 30 | 31 | 'BACKWARD': tokens.Keyword, 32 | 'BEFORE': tokens.Keyword, 33 | 'BEGIN': tokens.Keyword, 34 | 'BETWEEN': tokens.Keyword, 35 | 'BITVAR': tokens.Keyword, 36 | 'BIT_LENGTH': tokens.Keyword, 37 | 'BOTH': tokens.Keyword, 38 | 'BREADTH': tokens.Keyword, 39 | 40 | # 'C': tokens.Keyword, # most likely this is an alias 41 | 'CACHE': tokens.Keyword, 42 | 'CALL': tokens.Keyword, 43 | 'CALLED': tokens.Keyword, 44 | 'CARDINALITY': tokens.Keyword, 45 | 'CASCADE': tokens.Keyword, 46 | 'CASCADED': tokens.Keyword, 47 | 'CAST': tokens.Keyword, 48 | 'CATALOG': tokens.Keyword, 49 | 'CATALOG_NAME': tokens.Keyword, 50 | 'CHAIN': tokens.Keyword, 51 | 'CHARACTERISTICS': tokens.Keyword, 52 | 'CHARACTER_LENGTH': tokens.Keyword, 53 | 'CHARACTER_SET_CATALOG': tokens.Keyword, 54 | 'CHARACTER_SET_NAME': tokens.Keyword, 55 | 'CHARACTER_SET_SCHEMA': tokens.Keyword, 56 | 'CHAR_LENGTH': tokens.Keyword, 57 | 'CHECK': tokens.Keyword, 58 | 'CHECKED': tokens.Keyword, 59 | 'CHECKPOINT': tokens.Keyword, 60 | 'CLASS': tokens.Keyword, 61 | 'CLASS_ORIGIN': tokens.Keyword, 62 | 'CLOB': tokens.Keyword, 63 | 'CLOSE': tokens.Keyword, 64 | 'CLUSTER': tokens.Keyword, 65 | 'COALSECE': tokens.Keyword, 66 | 'COBOL': tokens.Keyword, 67 | 'COLLATE': tokens.Keyword, 68 | 'COLLATION': tokens.Keyword, 69 | 'COLLATION_CATALOG': tokens.Keyword, 70 | 'COLLATION_NAME': tokens.Keyword, 71 | 'COLLATION_SCHEMA': tokens.Keyword, 72 | 'COLUMN': tokens.Keyword, 73 | 'COLUMN_NAME': tokens.Keyword, 74 | 'COMMAND_FUNCTION': tokens.Keyword, 75 | 'COMMAND_FUNCTION_CODE': tokens.Keyword, 76 | 'COMMENT': tokens.Keyword, 77 | 'COMMIT': tokens.Keyword, 78 | 'COMMITTED': tokens.Keyword, 79 | 'COMPLETION': tokens.Keyword, 80 | 'CONDITION_NUMBER': tokens.Keyword, 81 | 'CONNECT': tokens.Keyword, 82 | 'CONNECTION': tokens.Keyword, 83 | 'CONNECTION_NAME': tokens.Keyword, 84 | 'CONSTRAINT': tokens.Keyword, 85 | 'CONSTRAINTS': tokens.Keyword, 86 | 'CONSTRAINT_CATALOG': tokens.Keyword, 87 | 'CONSTRAINT_NAME': tokens.Keyword, 88 | 'CONSTRAINT_SCHEMA': tokens.Keyword, 89 | 'CONSTRUCTOR': tokens.Keyword, 90 | 'CONTAINS': tokens.Keyword, 91 | 'CONTINUE': tokens.Keyword, 92 | 'CONVERSION': tokens.Keyword, 93 | 'CONVERT': tokens.Keyword, 94 | 'COPY': tokens.Keyword, 95 | 'CORRESPONTING': tokens.Keyword, 96 | 'COUNT': tokens.Keyword, 97 | 'CREATEDB': tokens.Keyword, 98 | 'CREATEUSER': tokens.Keyword, 99 | 'CROSS': tokens.Keyword, 100 | 'CUBE': tokens.Keyword, 101 | 'CURRENT': tokens.Keyword, 102 | 'CURRENT_DATE': tokens.Keyword, 103 | 'CURRENT_PATH': tokens.Keyword, 104 | 'CURRENT_ROLE': tokens.Keyword, 105 | 'CURRENT_TIME': tokens.Keyword, 106 | 'CURRENT_TIMESTAMP': tokens.Keyword, 107 | 'CURRENT_USER': tokens.Keyword, 108 | 'CURSOR': tokens.Keyword, 109 | 'CURSOR_NAME': tokens.Keyword, 110 | 'CYCLE': tokens.Keyword, 111 | 112 | 'DATA': tokens.Keyword, 113 | 'DATABASE': tokens.Keyword, 114 | 'DATETIME_INTERVAL_CODE': tokens.Keyword, 115 | 'DATETIME_INTERVAL_PRECISION': tokens.Keyword, 116 | 'DAY': tokens.Keyword, 117 | 'DEALLOCATE': tokens.Keyword, 118 | 'DECLARE': tokens.Keyword, 119 | 'DEFAULT': tokens.Keyword, 120 | 'DEFAULTS': tokens.Keyword, 121 | 'DEFERRABLE': tokens.Keyword, 122 | 'DEFERRED': tokens.Keyword, 123 | 'DEFINED': tokens.Keyword, 124 | 'DEFINER': tokens.Keyword, 125 | 'DELIMITER': tokens.Keyword, 126 | 'DELIMITERS': tokens.Keyword, 127 | 'DEREF': tokens.Keyword, 128 | 'DESC': tokens.Keyword, 129 | 'DESCRIBE': tokens.Keyword, 130 | 'DESCRIPTOR': tokens.Keyword, 131 | 'DESTROY': tokens.Keyword, 132 | 'DESTRUCTOR': tokens.Keyword, 133 | 'DETERMINISTIC': tokens.Keyword, 134 | 'DIAGNOSTICS': tokens.Keyword, 135 | 'DICTIONARY': tokens.Keyword, 136 | 'DISCONNECT': tokens.Keyword, 137 | 'DISPATCH': tokens.Keyword, 138 | 'DO': tokens.Keyword, 139 | 'DOMAIN': tokens.Keyword, 140 | 'DYNAMIC': tokens.Keyword, 141 | 'DYNAMIC_FUNCTION': tokens.Keyword, 142 | 'DYNAMIC_FUNCTION_CODE': tokens.Keyword, 143 | 144 | 'EACH': tokens.Keyword, 145 | 'ENCODING': tokens.Keyword, 146 | 'ENCRYPTED': tokens.Keyword, 147 | 'END-EXEC': tokens.Keyword, 148 | 'EQUALS': tokens.Keyword, 149 | 'ESCAPE': tokens.Keyword, 150 | 'EVERY': tokens.Keyword, 151 | 'EXCEPT': tokens.Keyword, 152 | 'ESCEPTION': tokens.Keyword, 153 | 'EXCLUDING': tokens.Keyword, 154 | 'EXCLUSIVE': tokens.Keyword, 155 | 'EXEC': tokens.Keyword, 156 | 'EXECUTE': tokens.Keyword, 157 | 'EXISTING': tokens.Keyword, 158 | 'EXISTS': tokens.Keyword, 159 | 'EXTERNAL': tokens.Keyword, 160 | 'EXTRACT': tokens.Keyword, 161 | 162 | 'FALSE': tokens.Keyword, 163 | 'FETCH': tokens.Keyword, 164 | 'FINAL': tokens.Keyword, 165 | 'FIRST': tokens.Keyword, 166 | 'FORCE': tokens.Keyword, 167 | 'FOREIGN': tokens.Keyword, 168 | 'FORTRAN': tokens.Keyword, 169 | 'FORWARD': tokens.Keyword, 170 | 'FOUND': tokens.Keyword, 171 | 'FREE': tokens.Keyword, 172 | 'FREEZE': tokens.Keyword, 173 | 'FULL': tokens.Keyword, 174 | 'FUNCTION': tokens.Keyword, 175 | 176 | # 'G': tokens.Keyword, 177 | 'GENERAL': tokens.Keyword, 178 | 'GENERATED': tokens.Keyword, 179 | 'GET': tokens.Keyword, 180 | 'GLOBAL': tokens.Keyword, 181 | 'GO': tokens.Keyword, 182 | 'GOTO': tokens.Keyword, 183 | 'GRANT': tokens.Keyword, 184 | 'GRANTED': tokens.Keyword, 185 | 'GROUPING': tokens.Keyword, 186 | 187 | 'HANDLER': tokens.Keyword, 188 | 'HAVING': tokens.Keyword, 189 | 'HIERARCHY': tokens.Keyword, 190 | 'HOLD': tokens.Keyword, 191 | 'HOST': tokens.Keyword, 192 | 193 | 'IDENTITY': tokens.Keyword, 194 | 'IGNORE': tokens.Keyword, 195 | 'ILIKE': tokens.Keyword, 196 | 'IMMEDIATE': tokens.Keyword, 197 | 'IMMUTABLE': tokens.Keyword, 198 | 199 | 'IMPLEMENTATION': tokens.Keyword, 200 | 'IMPLICIT': tokens.Keyword, 201 | 'INCLUDING': tokens.Keyword, 202 | 'INCREMENT': tokens.Keyword, 203 | 'INDEX': tokens.Keyword, 204 | 205 | 'INDITCATOR': tokens.Keyword, 206 | 'INFIX': tokens.Keyword, 207 | 'INHERITS': tokens.Keyword, 208 | 'INITIALIZE': tokens.Keyword, 209 | 'INITIALLY': tokens.Keyword, 210 | 'INOUT': tokens.Keyword, 211 | 'INPUT': tokens.Keyword, 212 | 'INSENSITIVE': tokens.Keyword, 213 | 'INSTANTIABLE': tokens.Keyword, 214 | 'INSTEAD': tokens.Keyword, 215 | 'INTERSECT': tokens.Keyword, 216 | 'INTO': tokens.Keyword, 217 | 'INVOKER': tokens.Keyword, 218 | 'IS': tokens.Keyword, 219 | 'ISNULL': tokens.Keyword, 220 | 'ISOLATION': tokens.Keyword, 221 | 'ITERATE': tokens.Keyword, 222 | 223 | # 'K': tokens.Keyword, 224 | 'KEY': tokens.Keyword, 225 | 'KEY_MEMBER': tokens.Keyword, 226 | 'KEY_TYPE': tokens.Keyword, 227 | 228 | 'LANCOMPILER': tokens.Keyword, 229 | 'LANGUAGE': tokens.Keyword, 230 | 'LARGE': tokens.Keyword, 231 | 'LAST': tokens.Keyword, 232 | 'LATERAL': tokens.Keyword, 233 | 'LEADING': tokens.Keyword, 234 | 'LENGTH': tokens.Keyword, 235 | 'LESS': tokens.Keyword, 236 | 'LEVEL': tokens.Keyword, 237 | 'LIMIT': tokens.Keyword, 238 | 'LISTEN': tokens.Keyword, 239 | 'LOAD': tokens.Keyword, 240 | 'LOCAL': tokens.Keyword, 241 | 'LOCALTIME': tokens.Keyword, 242 | 'LOCALTIMESTAMP': tokens.Keyword, 243 | 'LOCATION': tokens.Keyword, 244 | 'LOCATOR': tokens.Keyword, 245 | 'LOCK': tokens.Keyword, 246 | 'LOWER': tokens.Keyword, 247 | 248 | # 'M': tokens.Keyword, 249 | 'MAP': tokens.Keyword, 250 | 'MATCH': tokens.Keyword, 251 | 'MAXVALUE': tokens.Keyword, 252 | 'MESSAGE_LENGTH': tokens.Keyword, 253 | 'MESSAGE_OCTET_LENGTH': tokens.Keyword, 254 | 'MESSAGE_TEXT': tokens.Keyword, 255 | 'METHOD': tokens.Keyword, 256 | 'MINUTE': tokens.Keyword, 257 | 'MINVALUE': tokens.Keyword, 258 | 'MOD': tokens.Keyword, 259 | 'MODE': tokens.Keyword, 260 | 'MODIFIES': tokens.Keyword, 261 | 'MODIFY': tokens.Keyword, 262 | 'MONTH': tokens.Keyword, 263 | 'MORE': tokens.Keyword, 264 | 'MOVE': tokens.Keyword, 265 | 'MUMPS': tokens.Keyword, 266 | 267 | 'NAMES': tokens.Keyword, 268 | 'NATIONAL': tokens.Keyword, 269 | 'NATURAL': tokens.Keyword, 270 | 'NCHAR': tokens.Keyword, 271 | 'NCLOB': tokens.Keyword, 272 | 'NEW': tokens.Keyword, 273 | 'NEXT': tokens.Keyword, 274 | 'NO': tokens.Keyword, 275 | 'NOCREATEDB': tokens.Keyword, 276 | 'NOCREATEUSER': tokens.Keyword, 277 | 'NONE': tokens.Keyword, 278 | 'NOT': tokens.Keyword, 279 | 'NOTHING': tokens.Keyword, 280 | 'NOTIFY': tokens.Keyword, 281 | 'NOTNULL': tokens.Keyword, 282 | 'NULL': tokens.Keyword, 283 | 'NULLABLE': tokens.Keyword, 284 | 'NULLIF': tokens.Keyword, 285 | 286 | 'OBJECT': tokens.Keyword, 287 | 'OCTET_LENGTH': tokens.Keyword, 288 | 'OF': tokens.Keyword, 289 | 'OFF': tokens.Keyword, 290 | 'OFFSET': tokens.Keyword, 291 | 'OIDS': tokens.Keyword, 292 | 'OLD': tokens.Keyword, 293 | 'ONLY': tokens.Keyword, 294 | 'OPEN': tokens.Keyword, 295 | 'OPERATION': tokens.Keyword, 296 | 'OPERATOR': tokens.Keyword, 297 | 'OPTION': tokens.Keyword, 298 | 'OPTIONS': tokens.Keyword, 299 | 'ORDINALITY': tokens.Keyword, 300 | 'OUT': tokens.Keyword, 301 | 'OUTPUT': tokens.Keyword, 302 | 'OVERLAPS': tokens.Keyword, 303 | 'OVERLAY': tokens.Keyword, 304 | 'OVERRIDING': tokens.Keyword, 305 | 'OWNER': tokens.Keyword, 306 | 307 | 'PAD': tokens.Keyword, 308 | 'PARAMETER': tokens.Keyword, 309 | 'PARAMETERS': tokens.Keyword, 310 | 'PARAMETER_MODE': tokens.Keyword, 311 | 'PARAMATER_NAME': tokens.Keyword, 312 | 'PARAMATER_ORDINAL_POSITION': tokens.Keyword, 313 | 'PARAMETER_SPECIFIC_CATALOG': tokens.Keyword, 314 | 'PARAMETER_SPECIFIC_NAME': tokens.Keyword, 315 | 'PARAMATER_SPECIFIC_SCHEMA': tokens.Keyword, 316 | 'PARTIAL': tokens.Keyword, 317 | 'PASCAL': tokens.Keyword, 318 | 'PENDANT': tokens.Keyword, 319 | 'PLACING': tokens.Keyword, 320 | 'PLI': tokens.Keyword, 321 | 'POSITION': tokens.Keyword, 322 | 'POSTFIX': tokens.Keyword, 323 | 'PRECISION': tokens.Keyword, 324 | 'PREFIX': tokens.Keyword, 325 | 'PREORDER': tokens.Keyword, 326 | 'PREPARE': tokens.Keyword, 327 | 'PRESERVE': tokens.Keyword, 328 | 'PRIMARY': tokens.Keyword, 329 | 'PRIOR': tokens.Keyword, 330 | 'PRIVILEGES': tokens.Keyword, 331 | 'PROCEDURAL': tokens.Keyword, 332 | 'PROCEDURE': tokens.Keyword, 333 | 'PUBLIC': tokens.Keyword, 334 | 335 | 'RAISE': tokens.Keyword, 336 | 'READ': tokens.Keyword, 337 | 'READS': tokens.Keyword, 338 | 'RECHECK': tokens.Keyword, 339 | 'RECURSIVE': tokens.Keyword, 340 | 'REF': tokens.Keyword, 341 | 'REFERENCES': tokens.Keyword, 342 | 'REFERENCING': tokens.Keyword, 343 | 'REINDEX': tokens.Keyword, 344 | 'RELATIVE': tokens.Keyword, 345 | 'RENAME': tokens.Keyword, 346 | 'REPEATABLE': tokens.Keyword, 347 | 'RESET': tokens.Keyword, 348 | 'RESTART': tokens.Keyword, 349 | 'RESTRICT': tokens.Keyword, 350 | 'RESULT': tokens.Keyword, 351 | 'RETURN': tokens.Keyword, 352 | 'RETURNED_LENGTH': tokens.Keyword, 353 | 'RETURNED_OCTET_LENGTH': tokens.Keyword, 354 | 'RETURNED_SQLSTATE': tokens.Keyword, 355 | 'RETURNS': tokens.Keyword, 356 | 'REVOKE': tokens.Keyword, 357 | 'RIGHT': tokens.Keyword, 358 | 'ROLE': tokens.Keyword, 359 | 'ROLLBACK': tokens.Keyword, 360 | 'ROLLUP': tokens.Keyword, 361 | 'ROUTINE': tokens.Keyword, 362 | 'ROUTINE_CATALOG': tokens.Keyword, 363 | 'ROUTINE_NAME': tokens.Keyword, 364 | 'ROUTINE_SCHEMA': tokens.Keyword, 365 | 'ROW': tokens.Keyword, 366 | 'ROWS': tokens.Keyword, 367 | 'ROW_COUNT': tokens.Keyword, 368 | 'RULE': tokens.Keyword, 369 | 370 | 'SAVE_POINT': tokens.Keyword, 371 | 'SCALE': tokens.Keyword, 372 | 'SCHEMA': tokens.Keyword, 373 | 'SCHEMA_NAME': tokens.Keyword, 374 | 'SCOPE': tokens.Keyword, 375 | 'SCROLL': tokens.Keyword, 376 | 'SEARCH': tokens.Keyword, 377 | 'SECOND': tokens.Keyword, 378 | 'SECURITY': tokens.Keyword, 379 | 'SELF': tokens.Keyword, 380 | 'SENSITIVE': tokens.Keyword, 381 | 'SERIALIZABLE': tokens.Keyword, 382 | 'SERVER_NAME': tokens.Keyword, 383 | 'SESSION': tokens.Keyword, 384 | 'SESSION_USER': tokens.Keyword, 385 | 'SETOF': tokens.Keyword, 386 | 'SETS': tokens.Keyword, 387 | 'SHARE': tokens.Keyword, 388 | 'SHOW': tokens.Keyword, 389 | 'SIMILAR': tokens.Keyword, 390 | 'SIMPLE': tokens.Keyword, 391 | 'SIZE': tokens.Keyword, 392 | 'SOME': tokens.Keyword, 393 | 'SOURCE': tokens.Keyword, 394 | 'SPACE': tokens.Keyword, 395 | 'SPECIFIC': tokens.Keyword, 396 | 'SPECIFICTYPE': tokens.Keyword, 397 | 'SPECIFIC_NAME': tokens.Keyword, 398 | 'SQL': tokens.Keyword, 399 | 'SQLCODE': tokens.Keyword, 400 | 'SQLERROR': tokens.Keyword, 401 | 'SQLEXCEPTION': tokens.Keyword, 402 | 'SQLSTATE': tokens.Keyword, 403 | 'SQLWARNING': tokens.Keyword, 404 | 'STABLE': tokens.Keyword, 405 | 'START': tokens.Keyword, 406 | 'STATE': tokens.Keyword, 407 | 'STATEMENT': tokens.Keyword, 408 | 'STATIC': tokens.Keyword, 409 | 'STATISTICS': tokens.Keyword, 410 | 'STDIN': tokens.Keyword, 411 | 'STDOUT': tokens.Keyword, 412 | 'STORAGE': tokens.Keyword, 413 | 'STRICT': tokens.Keyword, 414 | 'STRUCTURE': tokens.Keyword, 415 | 'STYPE': tokens.Keyword, 416 | 'SUBCLASS_ORIGIN': tokens.Keyword, 417 | 'SUBLIST': tokens.Keyword, 418 | 'SUBSTRING': tokens.Keyword, 419 | 'SUM': tokens.Keyword, 420 | 'SYMMETRIC': tokens.Keyword, 421 | 'SYSID': tokens.Keyword, 422 | 'SYSTEM': tokens.Keyword, 423 | 'SYSTEM_USER': tokens.Keyword, 424 | 425 | 'TABLE': tokens.Keyword, 426 | 'TABLE_NAME': tokens.Keyword, 427 | ' TEMP': tokens.Keyword, 428 | 'TEMPLATE': tokens.Keyword, 429 | 'TEMPORARY': tokens.Keyword, 430 | 'TERMINATE': tokens.Keyword, 431 | 'THAN': tokens.Keyword, 432 | 'TIMESTAMP': tokens.Keyword, 433 | 'TIMEZONE_HOUR': tokens.Keyword, 434 | 'TIMEZONE_MINUTE': tokens.Keyword, 435 | 'TO': tokens.Keyword, 436 | 'TOAST': tokens.Keyword, 437 | 'TRAILING': tokens.Keyword, 438 | 'TRANSATION': tokens.Keyword, 439 | 'TRANSACTIONS_COMMITTED': tokens.Keyword, 440 | 'TRANSACTIONS_ROLLED_BACK': tokens.Keyword, 441 | 'TRANSATION_ACTIVE': tokens.Keyword, 442 | 'TRANSFORM': tokens.Keyword, 443 | 'TRANSFORMS': tokens.Keyword, 444 | 'TRANSLATE': tokens.Keyword, 445 | 'TRANSLATION': tokens.Keyword, 446 | 'TREAT': tokens.Keyword, 447 | 'TRIGGER': tokens.Keyword, 448 | 'TRIGGER_CATALOG': tokens.Keyword, 449 | 'TRIGGER_NAME': tokens.Keyword, 450 | 'TRIGGER_SCHEMA': tokens.Keyword, 451 | 'TRIM': tokens.Keyword, 452 | 'TRUE': tokens.Keyword, 453 | 'TRUNCATE': tokens.Keyword, 454 | 'TRUSTED': tokens.Keyword, 455 | 'TYPE': tokens.Keyword, 456 | 457 | 'UNCOMMITTED': tokens.Keyword, 458 | 'UNDER': tokens.Keyword, 459 | 'UNENCRYPTED': tokens.Keyword, 460 | 'UNION': tokens.Keyword, 461 | 'UNIQUE': tokens.Keyword, 462 | 'UNKNOWN': tokens.Keyword, 463 | 'UNLISTEN': tokens.Keyword, 464 | 'UNNAMED': tokens.Keyword, 465 | 'UNNEST': tokens.Keyword, 466 | 'UNTIL': tokens.Keyword, 467 | 'UPPER': tokens.Keyword, 468 | 'USAGE': tokens.Keyword, 469 | 'USER': tokens.Keyword, 470 | 'USER_DEFINED_TYPE_CATALOG': tokens.Keyword, 471 | 'USER_DEFINED_TYPE_NAME': tokens.Keyword, 472 | 'USER_DEFINED_TYPE_SCHEMA': tokens.Keyword, 473 | 'USING': tokens.Keyword, 474 | 475 | 'VACUUM': tokens.Keyword, 476 | 'VALID': tokens.Keyword, 477 | 'VALIDATOR': tokens.Keyword, 478 | 'VALUES': tokens.Keyword, 479 | 'VARIABLE': tokens.Keyword, 480 | 'VERBOSE': tokens.Keyword, 481 | 'VERSION': tokens.Keyword, 482 | 'VIEW': tokens.Keyword, 483 | 'VOLATILE': tokens.Keyword, 484 | 485 | 'WHENEVER': tokens.Keyword, 486 | 'WITH': tokens.Keyword, 487 | 'WITHOUT': tokens.Keyword, 488 | 'WORK': tokens.Keyword, 489 | 'WRITE': tokens.Keyword, 490 | 491 | 'YEAR': tokens.Keyword, 492 | 493 | 'ZONE': tokens.Keyword, 494 | 495 | 496 | 'ARRAY': tokens.Name.Builtin, 497 | 'BIGINT': tokens.Name.Builtin, 498 | 'BINARY': tokens.Name.Builtin, 499 | 'BIT': tokens.Name.Builtin, 500 | 'BLOB': tokens.Name.Builtin, 501 | 'BOOLEAN': tokens.Name.Builtin, 502 | 'CHAR': tokens.Name.Builtin, 503 | 'CHARACTER': tokens.Name.Builtin, 504 | 'DATE': tokens.Name.Builtin, 505 | 'DEC': tokens.Name.Builtin, 506 | 'DECIMAL': tokens.Name.Builtin, 507 | 'FLOAT': tokens.Name.Builtin, 508 | 'INT': tokens.Name.Builtin, 509 | 'INTEGER': tokens.Name.Builtin, 510 | 'INTERVAL': tokens.Name.Builtin, 511 | 'LONG': tokens.Name.Builtin, 512 | 'NUMBER': tokens.Name.Builtin, 513 | 'NUMERIC': tokens.Name.Builtin, 514 | 'REAL': tokens.Name.Builtin, 515 | 'SERIAL': tokens.Name.Builtin, 516 | 'SMALLINT': tokens.Name.Builtin, 517 | 'VARCHAR': tokens.Name.Builtin, 518 | 'VARCHAR2': tokens.Name.Builtin, 519 | 'VARYING': tokens.Name.Builtin, 520 | 'INT8': tokens.Name.Builtin, 521 | 'SERIAL8': tokens.Name.Builtin, 522 | 'TEXT': tokens.Name.Builtin, 523 | } 524 | 525 | 526 | KEYWORDS_COMMON = { 527 | 'SELECT': tokens.Keyword.DML, 528 | 'INSERT': tokens.Keyword.DML, 529 | 'DELETE': tokens.Keyword.DML, 530 | 'UPDATE': tokens.Keyword.DML, 531 | 'REPLACE': tokens.Keyword.DML, 532 | 'DROP': tokens.Keyword.DDL, 533 | 'CREATE': tokens.Keyword.DDL, 534 | 'ALTER': tokens.Keyword.DDL, 535 | 536 | 'WHERE': tokens.Keyword, 537 | 'FROM': tokens.Keyword, 538 | 'INNER': tokens.Keyword, 539 | 'JOIN': tokens.Keyword, 540 | 'AND': tokens.Keyword, 541 | 'OR': tokens.Keyword, 542 | 'LIKE': tokens.Keyword, 543 | 'ON': tokens.Keyword, 544 | 'IN': tokens.Keyword, 545 | 'SET': tokens.Keyword, 546 | 547 | 'BY': tokens.Keyword, 548 | 'GROUP': tokens.Keyword, 549 | 'ORDER': tokens.Keyword, 550 | 'LEFT': tokens.Keyword, 551 | 'OUTER': tokens.Keyword, 552 | 553 | 'IF': tokens.Keyword, 554 | 'END': tokens.Keyword, 555 | 'THEN': tokens.Keyword, 556 | 'LOOP': tokens.Keyword, 557 | 'AS': tokens.Keyword, 558 | 'ELSE': tokens.Keyword, 559 | 'FOR': tokens.Keyword, 560 | 561 | 'CASE': tokens.Keyword, 562 | 'WHEN': tokens.Keyword, 563 | 'MIN': tokens.Keyword, 564 | 'MAX': tokens.Keyword, 565 | 'DISTINCT': tokens.Keyword, 566 | } 567 | -------------------------------------------------------------------------------- /sqlparse/lexer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com 4 | # 5 | # This module is part of python-sqlparse and is released under 6 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php. 7 | 8 | """SQL Lexer""" 9 | 10 | # This code is based on the SqlLexer in pygments. 11 | # http://pygments.org/ 12 | # It's separated from the rest of pygments to increase performance 13 | # and to allow some customizations. 14 | from __future__ import absolute_import 15 | from __future__ import unicode_literals 16 | import re 17 | 18 | from . import tokens 19 | from .keywords import KEYWORDS, KEYWORDS_COMMON 20 | 21 | 22 | class include(str): 23 | pass 24 | 25 | 26 | class combined(tuple): 27 | """Indicates a state combined from multiple states.""" 28 | 29 | def __new__(cls, *args): 30 | return tuple.__new__(cls, args) 31 | 32 | def __init__(self, *args): 33 | # tuple.__init__ doesn't do anything 34 | pass 35 | 36 | 37 | def is_keyword(value): 38 | test = value.upper() 39 | return KEYWORDS_COMMON.get(test, KEYWORDS.get(test, tokens.Name)), value 40 | 41 | 42 | def apply_filters(stream, filters, lexer=None): 43 | """ 44 | Use this method to apply an iterable of filters to 45 | a stream. If lexer is given it's forwarded to the 46 | filter, otherwise the filter receives `None`. 47 | """ 48 | 49 | def _apply(filter_, stream): 50 | for token in filter_.filter(lexer, stream): 51 | yield token 52 | 53 | for filter_ in filters: 54 | stream = _apply(filter_, stream) 55 | return stream 56 | 57 | 58 | class LexerMeta(type): 59 | """ 60 | Metaclass for Lexer, creates the self._tokens attribute from 61 | self.tokens on the first instantiation. 62 | """ 63 | 64 | def _process_state(cls, unprocessed, processed, state): 65 | assert type(state) is str, "wrong state name %r" % state 66 | assert state[0] != '#', "invalid state name %r" % state 67 | if state in processed: 68 | return processed[state] 69 | tokenlist = processed[state] = [] 70 | rflags = cls.flags 71 | for tdef in unprocessed[state]: 72 | if isinstance(tdef, include): 73 | # it's a state reference 74 | assert tdef != state, "circular state reference %r" % state 75 | tokenlist.extend(cls._process_state( 76 | unprocessed, processed, str(tdef))) 77 | continue 78 | 79 | assert type(tdef) is tuple, "wrong rule def %r" % tdef 80 | 81 | try: 82 | rex = re.compile(tdef[0], rflags).match 83 | except Exception as err: 84 | raise ValueError(("uncompilable regex %r in state" 85 | " %r of %r: %s" 86 | % (tdef[0], state, cls, err))) 87 | 88 | assert type(tdef[1]) is tokens._TokenType or callable(tdef[1]), \ 89 | ('token type must be simple type or callable, not %r' 90 | % (tdef[1],)) 91 | 92 | if len(tdef) == 2: 93 | new_state = None 94 | else: 95 | tdef2 = tdef[2] 96 | if isinstance(tdef2, str): 97 | # an existing state 98 | if tdef2 == '#pop': 99 | new_state = -1 100 | elif tdef2 in unprocessed: 101 | new_state = (tdef2,) 102 | elif tdef2 == '#push': 103 | new_state = tdef2 104 | elif tdef2[:5] == '#pop:': 105 | new_state = -int(tdef2[5:]) 106 | else: 107 | assert False, 'unknown new state %r' % tdef2 108 | elif isinstance(tdef2, combined): 109 | # combine a new state from existing ones 110 | new_state = '_tmp_%d' % cls._tmpname 111 | cls._tmpname += 1 112 | itokens = [] 113 | for istate in tdef2: 114 | assert istate != state, \ 115 | 'circular state ref %r' % istate 116 | itokens.extend(cls._process_state(unprocessed, 117 | processed, istate)) 118 | processed[new_state] = itokens 119 | new_state = (new_state,) 120 | elif isinstance(tdef2, tuple): 121 | # push more than one state 122 | for state in tdef2: 123 | assert (state in unprocessed or 124 | state in ('#pop', '#push')), \ 125 | 'unknown new state ' + state 126 | new_state = tdef2 127 | else: 128 | assert False, 'unknown new state def %r' % tdef2 129 | tokenlist.append((rex, tdef[1], new_state)) 130 | return tokenlist 131 | 132 | def process_tokendef(cls): 133 | cls._all_tokens = {} 134 | cls._tmpname = 0 135 | processed = cls._all_tokens[cls.__name__] = {} 136 | #tokendefs = tokendefs or cls.tokens[name] 137 | for state in cls.tokens.keys(): 138 | cls._process_state(cls.tokens, processed, state) 139 | return processed 140 | 141 | def __call__(cls, *args, **kwds): 142 | if not hasattr(cls, '_tokens'): 143 | cls._all_tokens = {} 144 | cls._tmpname = 0 145 | if hasattr(cls, 'token_variants') and cls.token_variants: 146 | # don't process yet 147 | pass 148 | else: 149 | cls._tokens = cls.process_tokendef() 150 | 151 | return type.__call__(cls, *args, **kwds) 152 | 153 | 154 | class Lexer(object, metaclass=LexerMeta): 155 | 156 | encoding = 'utf-8' 157 | stripall = False 158 | stripnl = False 159 | tabsize = 0 160 | flags = re.IGNORECASE 161 | 162 | tokens = { 163 | 'root': [ 164 | (r'--.*?(\r\n|\r|\n)', tokens.Comment.Single), 165 | # $ matches *before* newline, therefore we have two patterns 166 | # to match Comment.Single 167 | (r'--.*?$', tokens.Comment.Single), 168 | (r'(\r|\n|\r\n)', tokens.Newline), 169 | (r'\s+', tokens.Whitespace), 170 | (r'/\*', tokens.Comment.Multiline, 'multiline-comments'), 171 | (r':=', tokens.Assignment), 172 | (r'::', tokens.Punctuation), 173 | (r'[*]', tokens.Wildcard), 174 | (r'CASE\b', tokens.Keyword), # extended CASE(foo) 175 | (r"`(``|[^`])*`", tokens.Name), 176 | (r"´(´´|[^´])*´", tokens.Name), 177 | (r'\$([a-zA-Z_][a-zA-Z0-9_]*)?\$', tokens.Name.Builtin), 178 | (r'\?{1}', tokens.Name.Placeholder), 179 | (r'[$:?%][a-zA-Z0-9_]+[^$:?%]?', tokens.Name.Placeholder), 180 | (r'@[a-zA-Z_][a-zA-Z0-9_]+', tokens.Name), 181 | (r'[a-zA-Z_][a-zA-Z0-9_]*(?=[.(])', tokens.Name), # see issue39 182 | (r'[<>=~!]+', tokens.Operator.Comparison), 183 | (r'[+/@#%^&|`?^-]+', tokens.Operator), 184 | (r'0x[0-9a-fA-F]+', tokens.Number.Hexadecimal), 185 | (r'[0-9]*\.[0-9]+', tokens.Number.Float), 186 | (r'[0-9]+', tokens.Number.Integer), 187 | # TODO: Backslash escapes? 188 | (r"(''|'.*?[^\\]')", tokens.String.Single), 189 | # not a real string literal in ANSI SQL: 190 | (r'(""|".*?[^\\]")', tokens.String.Symbol), 191 | (r'(\[.*[^\]]\])', tokens.Name), 192 | (r'(LEFT |RIGHT )?(INNER |OUTER )?JOIN\b', tokens.Keyword), 193 | (r'END( IF| LOOP)?\b', tokens.Keyword), 194 | (r'NOT NULL\b', tokens.Keyword), 195 | (r'CREATE( OR REPLACE)?\b', tokens.Keyword.DDL), 196 | (r'(?<=\.)[a-zA-Z_][a-zA-Z0-9_]*', tokens.Name), 197 | (r'[a-zA-Z_][a-zA-Z0-9_]*', is_keyword), 198 | (r'[;:()\[\],\.]', tokens.Punctuation), 199 | ], 200 | 'multiline-comments': [ 201 | (r'/\*', tokens.Comment.Multiline, 'multiline-comments'), 202 | (r'\*/', tokens.Comment.Multiline, '#pop'), 203 | (r'[^/\*]+', tokens.Comment.Multiline), 204 | (r'[/*]', tokens.Comment.Multiline) 205 | ]} 206 | 207 | def __init__(self): 208 | self.filters = [] 209 | 210 | def add_filter(self, filter_, **options): 211 | from .filters import Filter 212 | if not isinstance(filter_, Filter): 213 | filter_ = filter_(**options) 214 | self.filters.append(filter_) 215 | 216 | def get_tokens(self, text, unfiltered=False): 217 | """ 218 | Return an iterable of (tokentype, value) pairs generated from 219 | `text`. If `unfiltered` is set to `True`, the filtering mechanism 220 | is bypassed even if filters are defined. 221 | 222 | Also preprocess the text, i.e. expand tabs and strip it if 223 | wanted and applies registered filters. 224 | """ 225 | if not isinstance(text, str): 226 | if self.encoding == 'guess': 227 | try: 228 | text = text.decode('utf-8') 229 | if text.startswith(u'\ufeff'): 230 | text = text[len(u'\ufeff'):] 231 | except UnicodeDecodeError: 232 | text = text.decode('latin1') 233 | else: 234 | text = text.decode(self.encoding) 235 | if self.stripall: 236 | text = text.strip() 237 | elif self.stripnl: 238 | text = text.strip('\n') 239 | if self.tabsize > 0: 240 | text = text.expandtabs(self.tabsize) 241 | # if not text.endswith('\n'): 242 | # text += '\n' 243 | 244 | def streamer(): 245 | for i, t, v in self.get_tokens_unprocessed(text): 246 | yield t, v 247 | stream = streamer() 248 | if not unfiltered: 249 | stream = apply_filters(stream, self.filters, self) 250 | return stream 251 | 252 | def get_tokens_unprocessed(self, text, stack=('root',)): 253 | """ 254 | Split ``text`` into (tokentype, text) pairs. 255 | 256 | ``stack`` is the inital stack (default: ``['root']``) 257 | """ 258 | pos = 0 259 | tokendefs = self._tokens # see __call__, pylint:disable=E1101 260 | statestack = list(stack) 261 | statetokens = tokendefs[statestack[-1]] 262 | known_names = {} 263 | while 1: 264 | for rexmatch, action, new_state in statetokens: 265 | m = rexmatch(text, pos) 266 | if m: 267 | # print rex.pattern 268 | value = m.group() 269 | if value in known_names: 270 | yield pos, known_names[value], value 271 | elif type(action) is tokens._TokenType: 272 | yield pos, action, value 273 | elif hasattr(action, '__call__'): 274 | ttype, value = action(value) 275 | known_names[value] = ttype 276 | yield pos, ttype, value 277 | else: 278 | for item in action(self, m): 279 | yield item 280 | pos = m.end() 281 | if new_state is not None: 282 | # state transition 283 | if isinstance(new_state, tuple): 284 | for state in new_state: 285 | if state == '#pop': 286 | statestack.pop() 287 | elif state == '#push': 288 | statestack.append(statestack[-1]) 289 | else: 290 | statestack.append(state) 291 | elif isinstance(new_state, int): 292 | # pop 293 | del statestack[new_state:] 294 | elif new_state == '#push': 295 | statestack.append(statestack[-1]) 296 | else: 297 | assert False, "wrong state def: %r" % new_state 298 | statetokens = tokendefs[statestack[-1]] 299 | break 300 | else: 301 | try: 302 | if text[pos] == '\n': 303 | # at EOL, reset state to "root" 304 | pos += 1 305 | statestack = ['root'] 306 | statetokens = tokendefs['root'] 307 | yield pos, tokens.Text, u'\n' 308 | continue 309 | yield pos, tokens.Error, text[pos] 310 | pos += 1 311 | except IndexError: 312 | break 313 | 314 | 315 | def tokenize(sql): 316 | """Tokenize sql. 317 | 318 | Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream 319 | of ``(token type, value)`` items. 320 | """ 321 | lexer = Lexer() 322 | return lexer.get_tokens(sql) 323 | -------------------------------------------------------------------------------- /sqlparse/pipeline.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2011 Jesus Leganes "piranna", piranna@gmail.com 2 | # 3 | # This module is part of python-sqlparse and is released under 4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php. 5 | from __future__ import absolute_import 6 | from types import GeneratorType 7 | 8 | 9 | class Pipeline(list): 10 | """Pipeline to process filters sequentially""" 11 | 12 | def __call__(self, stream): 13 | """Run the pipeline 14 | 15 | Return a static (non generator) version of the result 16 | """ 17 | 18 | # Run the stream over all the filters on the pipeline 19 | for filter in self: 20 | # Functions and callable objects (objects with '__call__' method) 21 | if callable(filter): 22 | stream = filter(stream) 23 | 24 | # Normal filters (objects with 'process' method) 25 | else: 26 | stream = filter.process(None, stream) 27 | 28 | # If last filter return a generator, staticalize it inside a list 29 | if isinstance(stream, GeneratorType): 30 | return list(stream) 31 | return stream 32 | -------------------------------------------------------------------------------- /sqlparse/sql.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """This module contains classes representing syntactical elements of SQL.""" 4 | from __future__ import absolute_import 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | import re 8 | import sys 9 | 10 | from . import tokens as T 11 | 12 | 13 | class Token(object): 14 | """Base class for all other classes in this module. 15 | 16 | It represents a single token and has two instance attributes: 17 | ``value`` is the unchange value of the token and ``ttype`` is 18 | the type of the token. 19 | """ 20 | 21 | __slots__ = ('value', 'ttype', 'parent') 22 | 23 | def __init__(self, ttype, value): 24 | self.value = value 25 | self.ttype = ttype 26 | self.parent = None 27 | 28 | def __str__(self): 29 | if sys.version_info > (3, 0): 30 | return self.__unicode__() 31 | else: 32 | return unicode(self).encode('utf-8') 33 | 34 | def __repr__(self): 35 | short = self._get_repr_value() 36 | return '<%s \'%s\' at 0x%07x>' % (self._get_repr_name(), 37 | short, id(self)) 38 | 39 | def __unicode__(self): 40 | return self.value or '' 41 | 42 | def to_unicode(self): 43 | """Returns a unicode representation of this object.""" 44 | return str(self) 45 | 46 | def _get_repr_name(self): 47 | return str(self.ttype).split('.')[-1] 48 | 49 | def _get_repr_value(self): 50 | raw = str(self) 51 | if len(raw) > 7: 52 | short = raw[:6] + u'...' 53 | else: 54 | short = raw 55 | return re.sub('\s+', ' ', short) 56 | 57 | def flatten(self): 58 | """Resolve subgroups.""" 59 | yield self 60 | 61 | def match(self, ttype, values, regex=False): 62 | """Checks whether the token matches the given arguments. 63 | 64 | *ttype* is a token type. If this token doesn't match the given token 65 | type. 66 | *values* is a list of possible values for this token. The values 67 | are OR'ed together so if only one of the values matches ``True`` 68 | is returned. Except for keyword tokens the comparison is 69 | case-sensitive. For convenience it's ok to pass in a single string. 70 | If *regex* is ``True`` (default is ``False``) the given values are 71 | treated as regular expressions. 72 | """ 73 | type_matched = self.ttype is ttype 74 | if not type_matched or values is None: 75 | return type_matched 76 | if isinstance(values, str): 77 | values = set([values]) 78 | if regex: 79 | if self.ttype is T.Keyword: 80 | values = set([re.compile(v, re.IGNORECASE) for v in values]) 81 | else: 82 | values = set([re.compile(v) for v in values]) 83 | for pattern in values: 84 | if pattern.search(self.value): 85 | return True 86 | return False 87 | else: 88 | if self.ttype in T.Keyword: 89 | values = set([v.upper() for v in values]) 90 | return self.value.upper() in values 91 | else: 92 | return self.value in values 93 | 94 | def is_group(self): 95 | """Returns ``True`` if this object has children.""" 96 | return False 97 | 98 | def is_whitespace(self): 99 | """Return ``True`` if this token is a whitespace token.""" 100 | return self.ttype and self.ttype in T.Whitespace 101 | 102 | def within(self, group_cls): 103 | """Returns ``True`` if this token is within *group_cls*. 104 | 105 | Use this method for example to check if an identifier is within 106 | a function: ``t.within(sql.Function)``. 107 | """ 108 | parent = self.parent 109 | while parent: 110 | if isinstance(parent, group_cls): 111 | return True 112 | parent = parent.parent 113 | return False 114 | 115 | def is_child_of(self, other): 116 | """Returns ``True`` if this token is a direct child of *other*.""" 117 | return self.parent == other 118 | 119 | def has_ancestor(self, other): 120 | """Returns ``True`` if *other* is in this tokens ancestry.""" 121 | parent = self.parent 122 | while parent: 123 | if parent == other: 124 | return True 125 | parent = parent.parent 126 | return False 127 | 128 | 129 | class TokenList(Token): 130 | """A group of tokens. 131 | 132 | It has an additional instance attribute ``tokens`` which holds a 133 | list of child-tokens. 134 | """ 135 | 136 | __slots__ = ('value', 'ttype', 'tokens') 137 | 138 | def __init__(self, tokens=None): 139 | if tokens is None: 140 | tokens = [] 141 | self.tokens = tokens 142 | Token.__init__(self, None, None) 143 | 144 | def __unicode__(self): 145 | return ''.join(str(x) for x in self.flatten()) 146 | 147 | def __str__(self): 148 | if sys.version_info > (3, 0): 149 | return self.__unicode__() 150 | else: 151 | return unicode(self).encode('utf-8') 152 | 153 | def _get_repr_name(self): 154 | return self.__class__.__name__ 155 | 156 | def _pprint_tree(self, max_depth=None, depth=0): 157 | """Pretty-print the object tree.""" 158 | indent = ' ' * (depth * 2) 159 | for idx, token in enumerate(self.tokens): 160 | if token.is_group(): 161 | pre = ' +-' 162 | else: 163 | pre = ' | ' 164 | print("%s%s%d %s '%s'" % ( 165 | indent, 166 | pre, 167 | idx, 168 | token._get_repr_name(), 169 | token._get_repr_value() 170 | )) 171 | if (token.is_group() and (max_depth is None or depth < max_depth)): 172 | token._pprint_tree(max_depth, depth + 1) 173 | 174 | def flatten(self): 175 | """Generator yielding ungrouped tokens. 176 | 177 | This method is recursively called for all child tokens. 178 | """ 179 | for token in self.tokens: 180 | if isinstance(token, TokenList): 181 | for item in token.flatten(): 182 | yield item 183 | else: 184 | yield token 185 | 186 | def is_group(self): 187 | return True 188 | 189 | def get_sublists(self): 190 | return [x for x in self.tokens if isinstance(x, TokenList)] 191 | 192 | @property 193 | def _groupable_tokens(self): 194 | return self.tokens 195 | 196 | def token_first(self, ignore_whitespace=True): 197 | """Returns the first child token. 198 | 199 | If *ignore_whitespace* is ``True`` (the default), whitespace 200 | tokens are ignored. 201 | """ 202 | for token in self.tokens: 203 | if ignore_whitespace and token.is_whitespace(): 204 | continue 205 | return token 206 | return None 207 | 208 | def token_next_by_instance(self, idx, clss): 209 | """Returns the next token matching a class. 210 | 211 | *idx* is where to start searching in the list of child tokens. 212 | *clss* is a list of classes the token should be an instance of. 213 | 214 | If no matching token can be found ``None`` is returned. 215 | """ 216 | if isinstance(clss, (list, tuple)): 217 | clss = (clss,) 218 | if isinstance(clss, tuple): 219 | clss = tuple(clss) 220 | for token in self.tokens[idx:]: 221 | if isinstance(token, clss): 222 | return token 223 | return None 224 | 225 | def token_next_by_type(self, idx, ttypes): 226 | """Returns next matching token by it's token type.""" 227 | if not isinstance(ttypes, (list, tuple)): 228 | ttypes = [ttypes] 229 | for token in self.tokens[idx:]: 230 | if token.ttype in ttypes: 231 | return token 232 | return None 233 | 234 | def token_next_match(self, idx, ttype, value, regex=False): 235 | """Returns next token where it's ``match`` method returns ``True``.""" 236 | if not isinstance(idx, int): 237 | idx = self.token_index(idx) 238 | for token in self.tokens[idx:]: 239 | if token.match(ttype, value, regex): 240 | return token 241 | return None 242 | 243 | def token_not_matching(self, idx, funcs): 244 | for token in self.tokens[idx:]: 245 | passed = False 246 | for func in funcs: 247 | if func(token): 248 | passed = True 249 | break 250 | if not passed: 251 | return token 252 | return None 253 | 254 | def token_matching(self, idx, funcs): 255 | for token in self.tokens[idx:]: 256 | for i, func in enumerate(funcs): 257 | if func(token): 258 | return token 259 | return None 260 | 261 | def token_prev(self, idx, skip_ws=True): 262 | """Returns the previous token relative to *idx*. 263 | 264 | If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. 265 | ``None`` is returned if there's no previous token. 266 | """ 267 | if idx is None: 268 | return None 269 | if not isinstance(idx, int): 270 | idx = self.token_index(idx) 271 | while idx != 0: 272 | idx -= 1 273 | if self.tokens[idx].is_whitespace() and skip_ws: 274 | continue 275 | return self.tokens[idx] 276 | 277 | def token_next(self, idx, skip_ws=True): 278 | """Returns the next token relative to *idx*. 279 | 280 | If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. 281 | ``None`` is returned if there's no next token. 282 | """ 283 | if idx is None: 284 | return None 285 | if not isinstance(idx, int): 286 | idx = self.token_index(idx) 287 | while idx < len(self.tokens) - 1: 288 | idx += 1 289 | if self.tokens[idx].is_whitespace() and skip_ws: 290 | continue 291 | return self.tokens[idx] 292 | 293 | def token_index(self, token): 294 | """Return list index of token.""" 295 | return self.tokens.index(token) 296 | 297 | def tokens_between(self, start, end, exclude_end=False): 298 | """Return all tokens between (and including) start and end. 299 | 300 | If *exclude_end* is ``True`` (default is ``False``) the end token 301 | is included too. 302 | """ 303 | # FIXME(andi): rename exclude_end to inlcude_end 304 | if exclude_end: 305 | offset = 0 306 | else: 307 | offset = 1 308 | end_idx = self.token_index(end) + offset 309 | start_idx = self.token_index(start) 310 | return self.tokens[start_idx:end_idx] 311 | 312 | def group_tokens(self, grp_cls, tokens, ignore_ws=False): 313 | """Replace tokens by an instance of *grp_cls*.""" 314 | idx = self.token_index(tokens[0]) 315 | if ignore_ws: 316 | while tokens and tokens[-1].is_whitespace(): 317 | tokens = tokens[:-1] 318 | for t in tokens: 319 | self.tokens.remove(t) 320 | grp = grp_cls(tokens) 321 | for token in tokens: 322 | token.parent = grp 323 | grp.parent = self 324 | self.tokens.insert(idx, grp) 325 | return grp 326 | 327 | def insert_before(self, where, token): 328 | """Inserts *token* before *where*.""" 329 | self.tokens.insert(self.token_index(where), token) 330 | 331 | def has_alias(self): 332 | """Returns ``True`` if an alias is present.""" 333 | return self.get_alias() is not None 334 | 335 | def get_alias(self): 336 | """Returns the alias for this identifier or ``None``.""" 337 | kw = self.token_next_match(0, T.Keyword, 'AS') 338 | if kw is not None: 339 | alias = self.token_next(self.token_index(kw)) 340 | if alias is None: 341 | return None 342 | else: 343 | next_ = self.token_next_by_instance(0, Identifier) 344 | if next_ is None: 345 | return None 346 | alias = next_ 347 | if isinstance(alias, Identifier): 348 | return alias.get_name() 349 | else: 350 | return alias.to_unicode() 351 | 352 | def get_name(self): 353 | """Returns the name of this identifier. 354 | 355 | This is either it's alias or it's real name. The returned valued can 356 | be considered as the name under which the object corresponding to 357 | this identifier is known within the current statement. 358 | """ 359 | alias = self.get_alias() 360 | if alias is not None: 361 | return alias 362 | return self.get_real_name() 363 | 364 | def get_real_name(self): 365 | """Returns the real name (object name) of this identifier.""" 366 | # a.b 367 | dot = self.token_next_match(0, T.Punctuation, '.') 368 | if dot is None: 369 | return self.token_next_by_type(0, T.Name).value 370 | else: 371 | next_ = self.token_next_by_type(self.token_index(dot), 372 | (T.Name, T.Wildcard)) 373 | if next_ is None: # invalid identifier, e.g. "a." 374 | return None 375 | return next_.value 376 | 377 | 378 | 379 | class Statement(TokenList): 380 | """Represents a SQL statement.""" 381 | 382 | __slots__ = ('value', 'ttype', 'tokens') 383 | 384 | def get_type(self): 385 | """Returns the type of a statement. 386 | 387 | The returned value is a string holding an upper-cased reprint of 388 | the first DML or DDL keyword. If the first token in this group 389 | isn't a DML or DDL keyword "UNKNOWN" is returned. 390 | """ 391 | first_token = self.token_first() 392 | if first_token is None: 393 | # An "empty" statement that either has not tokens at all 394 | # or only whitespace tokens. 395 | return 'UNKNOWN' 396 | elif first_token.ttype in (T.Keyword.DML, T.Keyword.DDL): 397 | return first_token.value.upper() 398 | else: 399 | return 'UNKNOWN' 400 | 401 | 402 | class Identifier(TokenList): 403 | """Represents an identifier. 404 | 405 | Identifiers may have aliases or typecasts. 406 | """ 407 | 408 | __slots__ = ('value', 'ttype', 'tokens') 409 | 410 | def get_parent_name(self): 411 | """Return name of the parent object if any. 412 | 413 | A parent object is identified by the first occuring dot. 414 | """ 415 | dot = self.token_next_match(0, T.Punctuation, '.') 416 | if dot is None: 417 | return None 418 | prev_ = self.token_prev(self.token_index(dot)) 419 | if prev_ is None: # something must be verry wrong here.. 420 | return None 421 | return prev_.value 422 | 423 | def is_wildcard(self): 424 | """Return ``True`` if this identifier contains a wildcard.""" 425 | token = self.token_next_by_type(0, T.Wildcard) 426 | return token is not None 427 | 428 | def get_typecast(self): 429 | """Returns the typecast or ``None`` of this object as a string.""" 430 | marker = self.token_next_match(0, T.Punctuation, '::') 431 | if marker is None: 432 | return None 433 | next_ = self.token_next(self.token_index(marker), False) 434 | if next_ is None: 435 | return None 436 | return next_.to_unicode() 437 | 438 | 439 | class IdentifierList(TokenList): 440 | """A list of :class:`~sqlparse.sql.Identifier`\'s.""" 441 | 442 | __slots__ = ('value', 'ttype', 'tokens') 443 | 444 | def get_identifiers(self): 445 | """Returns the identifiers. 446 | 447 | Whitespaces and punctuations are not included in this list. 448 | """ 449 | return [x for x in self.tokens 450 | if not x.is_whitespace() and not x.match(T.Punctuation, ',')] 451 | 452 | 453 | class Parenthesis(TokenList): 454 | """Tokens between parenthesis.""" 455 | __slots__ = ('value', 'ttype', 'tokens') 456 | 457 | @property 458 | def _groupable_tokens(self): 459 | return self.tokens[1:-1] 460 | 461 | 462 | class Assignment(TokenList): 463 | """An assignment like 'var := val;'""" 464 | __slots__ = ('value', 'ttype', 'tokens') 465 | 466 | 467 | class If(TokenList): 468 | """An 'if' clause with possible 'else if' or 'else' parts.""" 469 | __slots__ = ('value', 'ttype', 'tokens') 470 | 471 | 472 | class For(TokenList): 473 | """A 'FOR' loop.""" 474 | __slots__ = ('value', 'ttype', 'tokens') 475 | 476 | 477 | class Comparison(TokenList): 478 | """A comparison used for example in WHERE clauses.""" 479 | __slots__ = ('value', 'ttype', 'tokens') 480 | 481 | 482 | class Comment(TokenList): 483 | """A comment.""" 484 | __slots__ = ('value', 'ttype', 'tokens') 485 | 486 | 487 | class Where(TokenList): 488 | """A WHERE clause.""" 489 | __slots__ = ('value', 'ttype', 'tokens') 490 | 491 | 492 | class Case(TokenList): 493 | """A CASE statement with one or more WHEN and possibly an ELSE part.""" 494 | 495 | __slots__ = ('value', 'ttype', 'tokens') 496 | 497 | def get_cases(self): 498 | """Returns a list of 2-tuples (condition, value). 499 | 500 | If an ELSE exists condition is None. 501 | """ 502 | ret = [] 503 | in_value = False 504 | in_condition = True 505 | for token in self.tokens: 506 | if token.match(T.Keyword, 'CASE'): 507 | continue 508 | elif token.match(T.Keyword, 'WHEN'): 509 | ret.append(([], [])) 510 | in_condition = True 511 | in_value = False 512 | elif token.match(T.Keyword, 'ELSE'): 513 | ret.append((None, [])) 514 | in_condition = False 515 | in_value = True 516 | elif token.match(T.Keyword, 'THEN'): 517 | in_condition = False 518 | in_value = True 519 | elif token.match(T.Keyword, 'END'): 520 | in_condition = False 521 | in_value = False 522 | if (in_condition or in_value) and not ret: 523 | # First condition withou preceding WHEN 524 | ret.append(([], [])) 525 | if in_condition: 526 | ret[-1][0].append(token) 527 | elif in_value: 528 | ret[-1][1].append(token) 529 | return ret 530 | 531 | 532 | class Function(TokenList): 533 | """A function or procedure call.""" 534 | 535 | __slots__ = ('value', 'ttype', 'tokens') 536 | 537 | def get_parameters(self): 538 | """Return a list of parameters.""" 539 | parenthesis = self.tokens[-1] 540 | for t in parenthesis.tokens: 541 | if isinstance(t, IdentifierList): 542 | return t.get_identifiers() 543 | return [] 544 | -------------------------------------------------------------------------------- /sqlparse/tokens.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com 2 | # 3 | # This module is part of python-sqlparse and is released under 4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php. 5 | 6 | # The Token implementation is based on pygment's token system written 7 | # by Georg Brandl. 8 | # http://pygments.org/ 9 | 10 | """Tokens""" 11 | from __future__ import absolute_import 12 | 13 | 14 | class _TokenType(tuple): 15 | parent = None 16 | 17 | def split(self): 18 | buf = [] 19 | node = self 20 | while node is not None: 21 | buf.append(node) 22 | node = node.parent 23 | buf.reverse() 24 | return buf 25 | 26 | def __contains__(self, val): 27 | return val is not None and (self is val or val[:len(self)] == self) 28 | 29 | def __getattr__(self, val): 30 | if not val or not val[0].isupper(): 31 | return tuple.__getattribute__(self, val) 32 | new = _TokenType(self + (val,)) 33 | setattr(self, val, new) 34 | new.parent = self 35 | return new 36 | 37 | def __hash__(self): 38 | return hash(tuple(self)) 39 | 40 | def __repr__(self): 41 | return 'Token' + (self and '.' or '') + '.'.join(self) 42 | 43 | 44 | Token = _TokenType() 45 | 46 | # Special token types 47 | Text = Token.Text 48 | Whitespace = Text.Whitespace 49 | Newline = Whitespace.Newline 50 | Error = Token.Error 51 | # Text that doesn't belong to this lexer (e.g. HTML in PHP) 52 | Other = Token.Other 53 | 54 | # Common token types for source code 55 | Keyword = Token.Keyword 56 | Name = Token.Name 57 | Literal = Token.Literal 58 | String = Literal.String 59 | Number = Literal.Number 60 | Punctuation = Token.Punctuation 61 | Operator = Token.Operator 62 | Comparison = Operator.Comparison 63 | Wildcard = Token.Wildcard 64 | Comment = Token.Comment 65 | Assignment = Token.Assignement 66 | 67 | # Generic types for non-source code 68 | Generic = Token.Generic 69 | 70 | # String and some others are not direct childs of Token. 71 | # alias them: 72 | Token.Token = Token 73 | Token.String = String 74 | Token.Number = Number 75 | 76 | # SQL specific tokens 77 | DML = Keyword.DML 78 | DDL = Keyword.DDL 79 | Command = Keyword.Command 80 | 81 | Group = Token.Group 82 | Group.Parenthesis = Token.Group.Parenthesis 83 | Group.Comment = Token.Group.Comment 84 | Group.Where = Token.Group.Where 85 | --------------------------------------------------------------------------------