├── tests ├── __init__.py ├── test_cases │ ├── abbreviated_keywords.py │ ├── linespace.py │ ├── invalid_name.py │ ├── fstring.py │ ├── whitespace.py │ ├── semicolon_newline.py │ ├── uppercase_keywords.py │ ├── subquery.py │ └── alignment.py └── test_cases.py ├── .gitignore ├── flake8_sql ├── __init__.py ├── keywords.py ├── parser.py └── linter.py ├── MANIFEST.in ├── setup.cfg ├── .travis.yml ├── tox.ini ├── CHANGELOG.rst ├── LICENSE ├── setup.py └── README.rst /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.cache/ 2 | /build/ 3 | /venv/ 4 | __pycache__/ 5 | *~ 6 | -------------------------------------------------------------------------------- /flake8_sql/__init__.py: -------------------------------------------------------------------------------- 1 | from .linter import Linter 2 | 3 | 4 | __all__ = (Linter) 5 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include CHANGELOG.rst 2 | include LICENSE 3 | include README.md 4 | recursive-include flask8_sql *.py 5 | recursive-include tests *.py 6 | -------------------------------------------------------------------------------- /tests/test_cases/abbreviated_keywords.py: -------------------------------------------------------------------------------- 1 | query = "SELECT CHAR_LENGTH(clmn) FROM tbl" # Q442 2 | query = "SELECT CHARACTER_LENGTH(clmn) FROM tbl" 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [wheel] 2 | universal = 1 3 | 4 | [check-manifest] 5 | ignore = tox.ini 6 | 7 | [flake8] 8 | application-import-names = flake8_sql, tests 9 | exclude = .tox, tests/test_cases/ 10 | max_line_length = 100 11 | -------------------------------------------------------------------------------- /tests/test_cases/linespace.py: -------------------------------------------------------------------------------- 1 | query = "SELECT abc FROM xyz" 2 | query = """SELECT abc FROM xyz 3 | WHERE def = 'def'""" # Q445 4 | query = """UPDATE abd SET xx = 'xx' 5 | WHERE def = 'def'""" # Q445 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.5" 4 | - "3.6" 5 | - "3.7" 6 | - "3.8" 7 | before_install: 8 | - pip install --upgrade pip 9 | install: 10 | - pip install tox-travis 11 | script: 12 | - tox 13 | sudo: false 14 | -------------------------------------------------------------------------------- /tests/test_cases/invalid_name.py: -------------------------------------------------------------------------------- 1 | query = "SELECT aColumn FROM tbl" # Q441 2 | query = "SELECT a_column FROM tbl" 3 | query = "INSERT INTO tbl VALUES SOMETHING" # Q441 4 | query = "INSERT INTO tbl VALUES something" 5 | query = "SELECT invalid_ FROM tbl" # Q441 6 | -------------------------------------------------------------------------------- /tests/test_cases/fstring.py: -------------------------------------------------------------------------------- 1 | tbl = "tbl" 2 | query = f"SELECT ca,cb FROM {tbl}" # Q443 3 | query = f"""SELECT abc 4 | FROM {tbl} 5 | WHERE def = 'def'""" # Q447 6 | query = f"""SELECT abc 7 | FROM {tbl} 8 | WHERE def = 'def'""" 9 | -------------------------------------------------------------------------------- /tests/test_cases/whitespace.py: -------------------------------------------------------------------------------- 1 | query = "SELECT ca,cb FROM tbl" # Q443 2 | query = "SELECT ca ,cb FROM tbl" # Q443 3 | query = "SELECT ca, cb FROM tbl" 4 | query = "SELECT ca FROM tbl WHERE ca= 'b'" # Q444 5 | query = "SELECT ca FROM tbl WHERE ca ='b'" # Q444 6 | query = "SELECT ca FROM tbl WHERE ca = 'b'" 7 | query = "SELECT ca FROM tbl WHERE ca != 'b'" 8 | -------------------------------------------------------------------------------- /tests/test_cases/semicolon_newline.py: -------------------------------------------------------------------------------- 1 | query = """INSERT INTO tbl (clm1, clm2) 2 | VALUES (val1, val2), 3 | (val3, val4); SELECT clm 4 | FROM tbl;""" # Q446 5 | query = """INSERT INTO tbl (clm1, clm2) 6 | VALUES (val1, val2), 7 | (val3, val4); 8 | SELECT clm 9 | FROM tbl;""" 10 | -------------------------------------------------------------------------------- /tests/test_cases/uppercase_keywords.py: -------------------------------------------------------------------------------- 1 | query = """select clmn 2 | FROM tbl""" # Q440 3 | query = """SELECT clmn 4 | FROM tbl""" 5 | query = "insert INTO tbl values vl" # Q440 6 | query = "INSERT INTO tbl VALUES vl" 7 | query = "delete from tbl" # Q440 8 | query = "DELETE FROM tbl" 9 | query = "update tbl set clmn = x" # Q440 10 | query = "UPDATE tbl SET clmn = x" 11 | qyart = "UPDATE tbl SET EXTRACT(abc from xyz)" # Q440 12 | -------------------------------------------------------------------------------- /flake8_sql/keywords.py: -------------------------------------------------------------------------------- 1 | ABBREVIATED_KEYWORDS = { 2 | 'CHAR', 3 | 'CHAR_LENGTH', 4 | 'DEC', 5 | 'PROC', 6 | } 7 | 8 | ROOT_KEYWORDS = { 9 | 'AND', 10 | 'BY', 11 | 'FROM', 12 | 'INTO', 13 | 'JOIN', 14 | 'LIMIT', 15 | 'OFFSET', 16 | 'OR', 17 | 'RETURNING', 18 | 'SELECT', 19 | 'SET', 20 | 'UNION', 21 | 'UPDATE', 22 | 'USING', 23 | 'VALUES', 24 | 'WHERE', 25 | } 26 | 27 | ROOT_KEYWORD_DESCRIPTORS = { 28 | 'FOR', 29 | 'GROUP', 30 | 'INNER', 31 | 'INSERT', 32 | 'LEFT', 33 | 'ORDER', 34 | 'OUTER', 35 | 'RIGHT', 36 | } 37 | -------------------------------------------------------------------------------- /tests/test_cases/subquery.py: -------------------------------------------------------------------------------- 1 | query = """SELECT abc 2 | FROM xyz 3 | WHERE def IN 4 | (SELECT hij 5 | FROM ijk)""" 6 | query = """SELECT abc 7 | FROM xyz 8 | WHERE def IN 9 | (SELECT hij 10 | FROM ijk)""" # Q448 Q449 11 | query = """SELECT abc 12 | FROM xyz 13 | WHERE def = 'def'; 14 | SELECT hij 15 | FROM ijk""" 16 | query = """UPDATE xyz 17 | SET abc = 18 | (SELECT def 19 | FROM ijk 20 | WHERE feg = 'feg') 21 | WHERE fgh = 'fgh'""" 22 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py35,py36,py37,py38,pep8,setuppy,manifest 3 | 4 | [testenv] 5 | deps = pytest 6 | commands = py.test tests/ 7 | 8 | [testenv:pep8] 9 | basepython = python3.8 10 | deps = 11 | flake8 12 | pep8-naming 13 | flake8-import-order 14 | flake8-strict 15 | sqlparse 16 | commands = flake8 flake8_sql/ tests/ 17 | 18 | [testenv:setuppy] 19 | basepython = python3.7 20 | deps = 21 | docutils 22 | Pygments 23 | commands = 24 | python setup.py check \ 25 | --metadata \ 26 | --restructuredtext \ 27 | --strict 28 | 29 | [testenv:manifest] 30 | basepython = python3.8 31 | deps = check-manifest 32 | commands = check-manifest 33 | -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | 0.4.1 2020-06-27 2 | ---------------- 3 | 4 | * Fix wrong lineno used on Python 3.8. 5 | 6 | 0.4.0 2020-01-12 7 | ---------------- 8 | 9 | * Support Python 3.7 & 3.8, drop Python 3.3 & 3.4. 10 | * Do not consider FROM keywords within functions as ROOT keywords. 11 | 12 | 0.3.0 2018-08-12 13 | ---------------- 14 | 15 | * Support f-string formatted SQL statements. 16 | * Install typing for Python versions < 3.5 17 | 18 | 0.2.0 2017-02-06 19 | ---------------- 20 | 21 | * Add additional root keywords and descriptors. 22 | * Align on ``INTO`` rather than ``INSERT``, i.e. the former is now the 23 | root keyword and the latter is a descriptor. 24 | 25 | 0.1.0 2017-02-06 26 | ---------------- 27 | 28 | * First release. 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright P G Jones 2016, 2017. 2 | 3 | Permission is hereby granted, free of charge, to any person 4 | obtaining a copy of this software and associated documentation 5 | files (the "Software"), to deal in the Software without 6 | restriction, including without limitation the rights to use, 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the 9 | Software is furnished to do so, subject to the following 10 | conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /tests/test_cases/alignment.py: -------------------------------------------------------------------------------- 1 | query = """SELECT abc 2 | FROM xyz 3 | WHERE def = 'def'""" # Q447 4 | query = """SELECT abc 5 | FROM xyz 6 | WHERE def = 'def' 7 | ORDER BY abc""" 8 | query = """SELECT abc 9 | FROM xyz 10 | WHERE def = 'def' AND feg = 'feg'""" # Q447 11 | query = """SELECT abc 12 | FROM xyz 13 | LEFT OUTER JOIN ijk ON abc.id = ijk.id""" 14 | query = """SELECT abc 15 | FROM xyz 16 | WHERE def = 'def' 17 | AND feg = 'feg' 18 | OR ijk = 'ijk'""" 19 | query = """INSERT INTO xyz (clm1, clm2) 20 | VALUES (abc, def)""" # Q447 21 | query = """INSERT INTO xyz (clm1, clm2) 22 | VALUES (abc, def) 23 | RETURNING id""" # Q447 24 | query = """INSERT INTO xyz (clm1, clm2) 25 | VALUES (abc, def)""" 26 | query = """INSERT INTO xyz (clm1, clm2) 27 | SELECT abc 28 | FROM def""" # Q447 29 | query = """SELECT abc 30 | FROM xyz 31 | JOIN ghj ON jkl = def 32 | WHERE def = 'def'""" # Q447 33 | query = """SELECT abc, 34 | def 35 | FROM xyz""" # Q449 36 | query = """SELECT abc, 37 | def 38 | FROM xyz""" 39 | query = """SELECT abc 40 | FROM xyz 41 | WHERE abc >= EXTRACT(abc FROM xyz.def)""" 42 | -------------------------------------------------------------------------------- /tests/test_cases.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import glob 3 | import os 4 | import re 5 | 6 | import pytest 7 | 8 | from flake8_sql import Linter 9 | 10 | 11 | ERROR_RX = re.compile("# ((Q[0-9]{3} ?)+) ?.*$") 12 | 13 | 14 | def _extract_expected_errors(lines): 15 | expected = set() 16 | for lineno, line in enumerate(lines): 17 | match = ERROR_RX.search(line) 18 | if match is not None: 19 | for error_code in match.group(1).split(): 20 | expected.add((lineno + 1, error_code)) 21 | return expected 22 | 23 | 24 | def _load_test_cases(): 25 | base_path = os.path.dirname(__file__) 26 | test_cases = [] 27 | test_case_path = os.path.join(base_path, 'test_cases') 28 | wildcard_path = os.path.join(test_case_path, '*.py') 29 | 30 | for filename in glob.glob(wildcard_path): 31 | print(filename) 32 | if filename.endswith('fstring.py') and not hasattr(ast, 'JoinedStr'): 33 | continue 34 | fullpath = os.path.join(test_case_path, filename) 35 | with open(fullpath) as file_: 36 | data = file_.read() 37 | lines = data.splitlines() 38 | codes = _extract_expected_errors(lines) 39 | tree = ast.parse(data) 40 | test_cases.append((tree, lines, codes)) 41 | 42 | return test_cases 43 | 44 | 45 | @pytest.mark.parametrize( 46 | 'tree, lines, expected', 47 | _load_test_cases(), 48 | ) 49 | def test_styles(tree, lines, expected): 50 | checker = Linter(tree, lines) 51 | codes = set() 52 | for lineno, _, message, _ in checker.run(): 53 | code, _ = message.split(' ', 1) 54 | codes.add((lineno, code)) 55 | assert codes == expected 56 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from setuptools import setup, find_packages 4 | 5 | PROJECT_ROOT = os.path.dirname(__file__) 6 | 7 | with open(os.path.join(PROJECT_ROOT, 'flake8_sql', 'linter.py')) as file_: 8 | version_line = [line for line in file_ if line.startswith('__version__')][0] 9 | 10 | __version__ = version_line.split('=')[1].strip().strip("'").strip('"') 11 | 12 | with open(os.path.join(PROJECT_ROOT, 'README.rst')) as file_: 13 | long_description = file_.read() 14 | 15 | setup( 16 | name='flake8-SQL', 17 | version=__version__, 18 | description='Flake8 plugin that checks SQL code against opinionated style rules', 19 | long_description=long_description, 20 | url='https://github.com/pgjones/flake8-sql', 21 | author='P G Jones', 22 | author_email='philip.graham.jones@googlemail.com', 23 | keywords=[ 24 | 'flake8', 25 | 'plugin', 26 | 'sql', 27 | ], 28 | license='MIT', 29 | classifiers=[ 30 | 'Development Status :: 3 - Alpha', 31 | 'Environment :: Console', 32 | 'Framework :: Flake8', 33 | 'Intended Audience :: Developers', 34 | 'License :: OSI Approved :: MIT License', 35 | 'Operating System :: OS Independent', 36 | 'Programming Language :: Python :: 3', 37 | "Programming Language :: Python :: 3.5", 38 | "Programming Language :: Python :: 3.6", 39 | "Programming Language :: Python :: 3.7", 40 | "Programming Language :: Python :: 3.8", 41 | 'Programming Language :: Python', 42 | 'Topic :: Software Development :: Libraries :: Python Modules', 43 | 'Topic :: Software Development :: Quality Assurance', 44 | ], 45 | packages=find_packages(exclude=["tests", "tests.*"]), 46 | py_modules=['flake8_sql'], 47 | install_requires=[ 48 | 'flake8', 49 | 'setuptools', 50 | 'sqlparse', 51 | ], 52 | entry_points={ 53 | 'flake8.extension': [ 54 | 'Q4 = flake8_sql:Linter', 55 | ], 56 | }, 57 | zip_safe=False, 58 | ) 59 | -------------------------------------------------------------------------------- /flake8_sql/parser.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Generator, List, Tuple 2 | 3 | import sqlparse 4 | 5 | from .keywords import ROOT_KEYWORDS 6 | 7 | 8 | class Token: 9 | 10 | def __init__(self, token: sqlparse.sql.Token, row: int, col: int, depth: int) -> None: 11 | self._token = token 12 | self.row = row 13 | self.col = col 14 | self.depth = depth 15 | 16 | @property 17 | def is_whitespace(self) -> bool: 18 | return self._token.is_whitespace 19 | 20 | @property 21 | def is_keyword(self) -> bool: 22 | return self._token.is_keyword 23 | 24 | @property 25 | def is_root_keyword(self) -> bool: 26 | if not self.is_keyword: 27 | return False 28 | value = self.value.split()[-1].upper() 29 | if value == "FROM" and isinstance(self._token.parent.parent, sqlparse.sql.Function): 30 | return False 31 | return value in ROOT_KEYWORDS 32 | 33 | @property 34 | def is_function_name(self) -> bool: 35 | # Note the only name-token who's grandparent is a function is 36 | # the function identifier. 37 | return ( 38 | self._token.ttype == sqlparse.tokens.Name and 39 | self._token.within(sqlparse.sql.Function) and 40 | isinstance(self._token.parent.parent, sqlparse.sql.Function) and 41 | sqlparse.keywords.is_keyword(self._token.value)[0] == sqlparse.tokens.Token.Keyword 42 | ) 43 | 44 | @property 45 | def is_name(self) -> bool: 46 | return self._token.ttype == sqlparse.tokens.Name and not self.is_keyword 47 | 48 | @property 49 | def is_punctuation(self) -> bool: 50 | return self._token.ttype == sqlparse.tokens.Punctuation 51 | 52 | @property 53 | def is_comparison(self) -> bool: 54 | return self._token.ttype == sqlparse.tokens.Comparison 55 | 56 | @property 57 | def is_newline(self) -> bool: 58 | return self._token.ttype == sqlparse.tokens.Text.Whitespace.Newline 59 | 60 | @property 61 | def value(self) -> str: 62 | return self._token.value 63 | 64 | 65 | class Parser: 66 | 67 | def __init__(self, sql: str, initial_offset: int) -> None: 68 | self._initial_offset = initial_offset 69 | self._tokens = [] # type: Tuple[sqlparse.sql.Token, int] 70 | depth = 0 71 | for statement in sqlparse.parse(sql): 72 | for token in statement.tokens: 73 | if token.is_group: 74 | self._tokens.extend(_flatten_group(token, depth)) 75 | else: 76 | self._tokens.append((token, depth)) 77 | 78 | def __iter__(self) -> Generator[Token, Any, None]: 79 | row = 0 80 | col = self._initial_offset 81 | for sql_token, depth in self._tokens: 82 | token = Token(sql_token, row, col, depth) 83 | yield token 84 | if token.is_newline: 85 | row += 1 86 | col = 0 87 | else: 88 | col += len(token.value) 89 | 90 | 91 | def _flatten_group(token: sqlparse.sql.Token, depth: int = 0) -> List[sqlparse.sql.Token]: 92 | tokens = [] 93 | for item in token.tokens: 94 | if item.ttype == sqlparse.tokens.DML: 95 | depth += 1 96 | if item.is_group: 97 | tokens.extend(_flatten_group(item, depth)) 98 | else: 99 | tokens.append((item, depth)) 100 | return tokens 101 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Flake8-SQL 2 | ========== 3 | 4 | |Build Status| 5 | 6 | Flake8-SQL is a `flake8 `__ 7 | plugin that looks for SQL queries and checks then against an 8 | opinionated style. This style mostly follows `SQL Style Guide 9 | `__, but differ in the two following 10 | ways. Firstly alignement should be with the ``INTO`` rather than 11 | ``INSERT`` keyword, i.e. 12 | 13 | :: 14 | 15 | INSERT INTO table (columns) 16 | VALUES (values) 17 | 18 | Secondly ``JOIN`` should be aligned to the left of the river, i.e. 19 | 20 | :: 21 | 22 | SELECT * 23 | FROM table1 24 | JOIN table2 ON ... 25 | 26 | Warnings 27 | -------- 28 | 29 | Q440 Keyword is not uppercase 30 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 31 | 32 | All the SQL reserved 33 | `keywords `__ 34 | should be uppercase. 35 | 36 | Q441 Name is not valid 37 | ~~~~~~~~~~~~~~~~~~~~~~ 38 | 39 | All the non SQL keywords should be snake\_case, start with a letter 40 | and not end with an `\_`. Due to a limitation snake\_case is checks 41 | ensure that the word is lowercase. 42 | 43 | Q442 Avoid abbreviated keywords 44 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 45 | 46 | Avoid using `abbreviated 47 | keywords `__ 48 | instead use the full length version. 49 | 50 | Q443 Incorrect whitespace around comma 51 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 52 | 53 | Commas should be followed by whitespace, but not preceded. 54 | 55 | Q444 Incorrect whitespace around equals 56 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 57 | 58 | Equals should be surrounded with whitespace. 59 | 60 | Q445 Missing linespace between root keywords 61 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 62 | 63 | The root keywords ``SELECT``, ``FROM``, ``INSERT``, ``VALUES``, ``DELETE 64 | FROM``, ``WHERE``, ``UPDATE``, ``AND``, ``OR`` and ``SET`` should be 65 | on separate lines (unless the entire query is on one line). 66 | 67 | Q446 Missing newline after semicolon 68 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 69 | 70 | Semicolons must be at the end of the line. 71 | 72 | Q447 Root keywords should be right aligned 73 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 74 | 75 | The root keywords ``SELECT``, ``FROM``, ``INSERT``, ``VALUES``, 76 | ``WHERE``, ``UPDATE``, ``AND``, ``OR``, ``JOIN`` and ``SET`` should be 77 | right aligned i.e. 78 | 79 | :: 80 | 81 | SELECT * 82 | FROM table 83 | 84 | Q448 subquery should be aligned to the right of the river 85 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 86 | 87 | Any subquery should be aligned to the right of the river i.e. 88 | 89 | :: 90 | 91 | SELECT * 92 | FROM table 93 | WHERE column IN 94 | (SELECT column 95 | FROM table) 96 | 97 | Q449 tokens should be aligned to the right of the river 98 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 99 | 100 | Any tokens should be aligned to the right of the river i.e 101 | 102 | :: 103 | 104 | SELECT column1, 105 | column2 106 | FROM table 107 | 108 | Configuration 109 | ------------- 110 | 111 | At times it is simpler to use a reserved keyword as an identifier than 112 | go to the effort to avoid it. To allow for this set the 113 | ``sql-excepted-names`` option to a comma separated list of these 114 | names. 115 | 116 | 117 | Limitations 118 | ----------- 119 | 120 | String constants are sought out in the code and considered SQL if they 121 | contain select from, insert into values, update set or delete from in 122 | order. This may and is likely to lead to false positives, in which case 123 | simply add ``# noqa`` to have this plugin ignore the string. 124 | 125 | F-Strings are formatted with the formatted values, ``{...}``, replaced 126 | with the constant ``formatted_value`` before being linted. This leads 127 | to the error message referring to ``formatted_value`` rather than what 128 | was actually written. 129 | 130 | 131 | .. |Build Status| image:: https://travis-ci.org/pgjones/flake8-sql.svg?branch=master 132 | :target: https://travis-ci.org/pgjones/flake8-sql 133 | -------------------------------------------------------------------------------- /flake8_sql/linter.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import re 3 | from collections import deque 4 | from typing import Any, Generator, Iterable, List, Optional, Tuple, TypeVar 5 | 6 | from .keywords import ABBREVIATED_KEYWORDS, ROOT_KEYWORD_DESCRIPTORS 7 | from .parser import Parser 8 | 9 | 10 | __version__ = '0.4.1' 11 | 12 | SQL_RE = re.compile( 13 | r'(select\s.*from\s|' 14 | r'delete\s+from\s|' 15 | r'insert\s+into\s.*values\s|' 16 | r'update\s.*set\s)', 17 | re.IGNORECASE | re.DOTALL, 18 | ) 19 | 20 | 21 | class Linter: 22 | name = 'sql' 23 | version = __version__ 24 | excepted_names = [] 25 | 26 | def __init__(self, tree: Any, lines: List[str]) -> None: 27 | self.tree = tree 28 | self.lines = lines 29 | 30 | @classmethod 31 | def add_options(cls, parser): 32 | parser.add_option( 33 | '--sql-excepted-names', 34 | default='', 35 | action='store', 36 | type='string', 37 | help='Names not to consider keywords', 38 | parse_from_config=True, 39 | comma_separated_list=True, 40 | ) 41 | 42 | @classmethod 43 | def parse_options(cls, options): 44 | cls.excepted_names = [name.upper() for name in options.sql_excepted_names] 45 | 46 | def run(self) -> Generator[Tuple[int, int, str, type], Any, None]: 47 | for node in _ast_walk(self.tree): 48 | if isinstance(node, ast.Str) and SQL_RE.search(node.s) is not None: 49 | initial_offset = _get_initial_offset(node, self.lines) 50 | parser = Parser(node.s, initial_offset) 51 | yield from self._check_query_words(node, parser) 52 | yield from self._check_query_whitespace(node, parser) 53 | yield from self._check_query_alignment(node, parser) 54 | 55 | def _check_query_words( 56 | self, query: ast.Str, parser: Parser, 57 | ) -> Generator[Tuple[int, int, str, type], Any, None]: 58 | query_end_lineno = _get_query_end_lineno(query) 59 | 60 | for token in parser: 61 | word = token.value 62 | if token.is_keyword or token.is_function_name: 63 | if not word.isupper() and word.upper() not in self.excepted_names: 64 | yield( 65 | query_end_lineno, query.col_offset, 66 | "Q440 keyword {} is not uppercase".format(word), 67 | type(self), 68 | ) 69 | if word.upper() in ABBREVIATED_KEYWORDS: 70 | yield( 71 | query_end_lineno, query.col_offset, 72 | "Q442 avoid abbreviated keywords, {}".format(word), 73 | type(self), 74 | ) 75 | elif token.is_name and (not word.islower() or word.endswith('_')): 76 | yield( 77 | query_end_lineno, query.col_offset, 78 | "Q441 name {} is not valid, must be snake_case, and cannot " 79 | "end with `_`".format(word), 80 | type(self), 81 | ) 82 | 83 | def _check_query_whitespace( 84 | self, query: ast.Str, parser: Parser, 85 | ) -> Generator[Tuple[int, int, str, type], Any, None]: 86 | query_end_lineno = _get_query_end_lineno(query) 87 | 88 | for before, token, after in _pre_post_iter(parser): 89 | pre_whitespace = (before is not None and before.is_whitespace) 90 | post_whitespace = (after is not None and after.is_whitespace) 91 | post_newline = (after is None or after.is_newline) 92 | if token.is_punctuation: 93 | if token.value == ',' and not post_whitespace: 94 | yield( 95 | query_end_lineno, query.col_offset, 96 | 'Q443 incorrect whitespace around comma', 97 | type(self), 98 | ) 99 | elif token.value == ';' and not post_newline: 100 | yield( 101 | query_end_lineno, query.col_offset, 102 | 'Q446 missing newline after semicolon', 103 | type(self), 104 | ) 105 | elif ( 106 | token.is_comparison 107 | and (not pre_whitespace or not post_whitespace) 108 | ): 109 | yield( 110 | query_end_lineno, query.col_offset, 111 | 'Q444 incorrect whitespace around equals', 112 | type(self), 113 | ) 114 | 115 | def _check_query_alignment( 116 | self, query: ast.Str, parser: Parser, 117 | ) -> Generator[Tuple[int, int, str, type], Any, None]: 118 | if len(query.s.splitlines()) == 1: # Single line queries are exempt 119 | return 120 | 121 | query_end_lineno = _get_query_end_lineno(query) 122 | 123 | roots = [] 124 | for token in parser: 125 | if token.value == ';': 126 | roots = [] 127 | elif len(roots) < token.depth + 1: 128 | if token.is_root_keyword: 129 | roots.append(token) 130 | if len(roots) > 1: 131 | previous_root = roots[token.depth - 1] 132 | if token.col < previous_root.col + len(previous_root.value) + 1: 133 | yield ( 134 | query_end_lineno, query.col_offset, 135 | 'Q448 subquery should be aligned to the right of the river', 136 | type(self), 137 | ) 138 | elif token.is_root_keyword: 139 | previous_root = roots[token.depth] 140 | if previous_root.row == token.row: 141 | message = "Q445 missing linespace between root_keywords {} and {}".format( 142 | previous_root.value, token.value, 143 | ) 144 | yield (query_end_lineno, query.col_offset, message, type(self)) 145 | elif previous_root.col + len(previous_root.value) != token.col + len(token.value): 146 | message = "Q447 root_keywords {} and {} are not right aligned".format( 147 | previous_root.value, token.value, 148 | ) 149 | yield (query_end_lineno, query.col_offset, message, type(self)) 150 | elif not token.is_whitespace and token.value not in ROOT_KEYWORD_DESCRIPTORS: 151 | previous_root = roots[token.depth] 152 | if token.col < previous_root.col + len(previous_root.value) + 1: 153 | message = "Q449 token {} should be aligned to the right of the river".format( 154 | token.value, 155 | ) 156 | yield (query_end_lineno, query.col_offset, message, type(self)) 157 | 158 | 159 | T = TypeVar('T') 160 | 161 | 162 | def _pre_post_iter( 163 | iterable: Iterable[T], 164 | ) -> Generator[Tuple[Optional[T], T, Optional[T]], Any, None]: 165 | iterator = iter(iterable) 166 | before = None 167 | current = next(iterator) 168 | for after in iterator: 169 | yield (before, current, after) 170 | before = current 171 | current = after 172 | yield (before, current, None) 173 | 174 | 175 | def _get_initial_offset(query: ast.Str, physical_lines: List[str]) -> int: 176 | logical_lines = query.s.splitlines() 177 | query_end_lineno = _get_query_end_lineno(query) 178 | first_physical_line = physical_lines[query_end_lineno - len(logical_lines)] 179 | return first_physical_line.find(logical_lines[0]) 180 | 181 | 182 | def _get_query_end_lineno(query: ast.Str) -> int: 183 | """Get the lineno for the last line of the given query. 184 | 185 | In Python versions below 3.8, this could be obtained by `ast.expr.lineno`. 186 | However Python 3.8 changed this to be the first line, and for the last line 187 | you would instead have to use `ast.expr.end_lineno`. The real kicker here is 188 | that this field is NOT required to be set by the compiler, so we have no 189 | guarantee that it can be used. In practice, it is set for multi-line strings 190 | which is suitable for our purposes - so we just need to handle the case for a 191 | single-line string for which we can use the first lineno. 192 | """ 193 | try: 194 | end_lineno = query.end_lineno 195 | except AttributeError: 196 | # Should only happen for non multi-line strings or Python versions below 3.8. 197 | end_lineno = query.lineno 198 | 199 | return end_lineno 200 | 201 | 202 | def _ast_walk(node: ast.AST) -> Generator[ast.AST, None, None]: 203 | if not hasattr(ast, 'JoinedStr'): # No f-strings 204 | yield from ast.walk(node) 205 | else: # f-strings supported 206 | todo = deque([node]) 207 | while todo: 208 | node = todo.popleft() 209 | if isinstance(node, ast.JoinedStr): 210 | lineno = _get_query_end_lineno(node) 211 | merged_node = ast.Str(s='', lineno=lineno, col_offset=node.col_offset) 212 | for child in ast.iter_child_nodes(node): 213 | if isinstance(child, ast.Str): 214 | merged_node.s += child.s 215 | elif isinstance(child, ast.FormattedValue): 216 | merged_node.s += 'formatted_value' 217 | todo.append(merged_node) 218 | else: 219 | todo.extend(ast.iter_child_nodes(node)) 220 | yield node 221 | --------------------------------------------------------------------------------