├── .editorconfig ├── .gitignore ├── LICENSE ├── README.md ├── codeql ├── __init__.py ├── bqrs.py ├── common.py ├── database.py └── query.py ├── docs └── index.md ├── publish.bat ├── setup.py └── test.py /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig 2 | # http://editorconfig.org 3 | 4 | root = true 5 | 6 | [**.{py}] 7 | indent_style = space 8 | indent_size = 4 9 | trim_trailing_whitespace = true 10 | insert_final_newline = true 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # PyCharm 2 | .idea 3 | 4 | # Python 5 | *.pyc 6 | *.pyo 7 | *.pyd 8 | 9 | # Package 10 | /*egg-info 11 | /build 12 | /dist 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Alexandro Sanchez Bach 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | CodeQL for Python 2 | ================= 3 | 4 | 7 | 8 | Unofficial Python 3.x bindings for the CodeQL CLI application. 9 | 10 | Install the package via: 11 | 12 | ```bash 13 | pip install git+https://github.com/AlexAltea/codeql-python.git 14 | ```` 15 | 16 | ## Usage 17 | 18 | ```python 19 | import codeql 20 | 21 | # Open databases from files or folders 22 | db = codeql.Database('path/to/db.zip') 23 | 24 | # Queries return a CSV-like array of arrays 25 | results = db.query('select "Hello"') 26 | assert(results[0][1] == 'Hello') 27 | 28 | # Queries with external libraries are supported as well 29 | codeql.set_search_path('path/to/codeql') 30 | results = db.query(''' 31 | import cpp 32 | from BlockStmt block 33 | select block 34 | ''') 35 | 36 | # Create temporary databases from inlined sources 37 | db = codeql.Database.from_cpp(''' 38 | int main() { 39 | return 1337 + 1337 + 1337; 40 | } 41 | ''') 42 | results = db.query(''' 43 | import cpp 44 | from Literal literal where 45 | literal.getType() instanceof IntType and 46 | literal.getValue().toInt() = 1337 47 | select literal 48 | ''') 49 | assert(len(results[1:]) == 3) 50 | ``` 51 | -------------------------------------------------------------------------------- /codeql/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | CodeQL for Python. 5 | """ 6 | 7 | # Imports 8 | from .bqrs import * 9 | from .database import * 10 | from .query import * 11 | -------------------------------------------------------------------------------- /codeql/bqrs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | CodeQL for Python. 5 | """ 6 | 7 | import csv 8 | import io 9 | import os 10 | import shutil 11 | import tempfile 12 | 13 | from .common import * 14 | 15 | class BQRS(object): 16 | def __init__(self, path): 17 | """ 18 | Arguments: 19 | path -- Location of the query results file 20 | """ 21 | # Temporaries will be cleaned up on destructor 22 | self.path = path 23 | 24 | # Helpers 25 | def run_command(self, command, options=[], post=[]): 26 | return run(['bqrs', command] + options + [self.path]) 27 | 28 | def parse(self): 29 | path = temporary_file(suffix='.csv') 30 | self.decode(format='csv', output=path) 31 | with open(path, 'r') as f: 32 | return list(csv.reader(f, delimiter=',')) 33 | 34 | # Interface 35 | def info(self, format): 36 | """ 37 | Display metadata for a BQRS file. 38 | 39 | This command displays an overview of the data contained in the compact binary BQRS file that is the result of executing a 40 | query. It shows the names and sizes of each result set (table) in the BQRS file, and the column types of each result set. 41 | 42 | It can also optionally precompute offsets for using the pagination options of codeql bqrs decode. This is mainly useful 43 | for IDE plugins. 44 | """ 45 | options = ['-v'] 46 | self.run_command('info', options) 47 | 48 | def decode(self, format=None, output=None): 49 | """ 50 | Convert result data from BQRS into other forms. 51 | 52 | The decoded output will be written to standard output, unless the --output option is specified. 53 | """ 54 | options = [] 55 | if format: 56 | options += [f'--format={format:s}'] 57 | if output: 58 | options += ['-o', output] 59 | self.run_command('decode', options) 60 | 61 | def diff(self, other): 62 | """ 63 | Compute the difference between two result sets. 64 | """ 65 | if type(other) == BQRS: 66 | other = other.path 67 | self.run_command('diff', post=[other]) 68 | -------------------------------------------------------------------------------- /codeql/common.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | CodeQL for Python. 5 | """ 6 | 7 | import os 8 | import subprocess 9 | import tempfile 10 | import uuid 11 | 12 | # Configuration 13 | codeql_path = 'codeql' 14 | search_path = None 15 | library_path = None 16 | 17 | # Temporaries 18 | temp_path = None 19 | 20 | def temporary_root(): 21 | global temp_path 22 | if temp_path is None: 23 | temp_path = tempfile.TemporaryDirectory(prefix="codeql-python_") 24 | return temp_path.name 25 | 26 | def temporary_path(prefix, suffix): 27 | name = '' 28 | if prefix: 29 | name += prefix 30 | name += uuid.uuid4().hex 31 | if suffix: 32 | name += suffix 33 | return os.path.join(temporary_root(), name) 34 | 35 | def temporary_dir(create=True, prefix=None, suffix=None): 36 | path = temporary_path(prefix, suffix) 37 | if create: 38 | os.mkdir(path) 39 | return path 40 | 41 | def temporary_file(create=True, prefix=None, suffix=None): 42 | path = temporary_path(prefix, suffix) 43 | if create: 44 | open(path, 'a').close() 45 | return path 46 | 47 | # Environment 48 | def set_search_path(path): 49 | global search_path 50 | if type(path) == list: 51 | separator = ';' if os.name == 'nt' else ':' 52 | path = separator.join(path) 53 | search_path = path 54 | 55 | def run(args): 56 | command = [codeql_path] + list(map(str, args)) 57 | return subprocess.run(command, stdout=subprocess.DEVNULL) 58 | -------------------------------------------------------------------------------- /codeql/database.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | CodeQL for Python. 5 | """ 6 | 7 | import os 8 | import shutil 9 | import tempfile 10 | 11 | import codeql 12 | from .common import * 13 | 14 | # Constants 15 | CODEQL_QLPACK = ''' 16 | name: codeql-python 17 | version: 0.0.0 18 | libraryPathDependencies: {} 19 | ''' 20 | 21 | class Database(object): 22 | def __init__(self, path, temp=False): 23 | """ 24 | Arguments: 25 | path -- Path of the database 26 | temp -- Remove database path in destructor 27 | """ 28 | self.path = path 29 | self.temp = temp 30 | 31 | def __del__(self): 32 | if self.temp: 33 | shutil.rmtree(self.path) 34 | 35 | # Helpers 36 | def run_command(self, command, options=[], post=[]): 37 | run(['database', command] + options + [self.path] + post) 38 | 39 | @staticmethod 40 | def from_cpp(code, command=None): 41 | # Get default compiler 42 | compilers = ['cxx', 'clang++', 'g++', 'cc', 'clang', 'gcc'] 43 | if command is None: 44 | for compiler in compilers: 45 | if shutil.which(compiler) is not None: 46 | command = [compiler, '-c'] 47 | break 48 | # Create database 49 | directory = temporary_dir() 50 | fpath = os.path.join(directory, 'source.cpp') 51 | with open(fpath, 'w') as f: 52 | f.write(code) 53 | command.append(fpath) 54 | return Database.create('cpp', directory, command) 55 | 56 | def query(self, ql): 57 | """ 58 | Syntactic sugar to execute a CodeQL snippet and parse the results. 59 | """ 60 | # Prepare query directory 61 | if not hasattr(self, 'qldir'): 62 | self.qldir = temporary_dir() 63 | qlpack_path = os.path.join(self.qldir, 'qlpack.yml') 64 | with open(qlpack_path, mode='w') as f: 65 | qlpack_text = CODEQL_QLPACK.format('codeql-cpp') 66 | f.write(qlpack_text) 67 | # Perform query 68 | query_path = os.path.join(self.qldir, 'query.ql') 69 | reply_path = os.path.join(self.qldir, 'reply.csv') 70 | with open(query_path, mode='w') as f: 71 | f.write(ql) 72 | query = codeql.Query(query_path) 73 | bqrs = query.run(database=self) 74 | return bqrs.parse() 75 | 76 | # Interface 77 | @staticmethod 78 | def create(language, source, command=None, location=None): 79 | """ 80 | Create a CodeQL database instance for a source tree that can be analyzed 81 | using one of the CodeQL products. 82 | 83 | Arguments: 84 | language -- The language that the new database will be used to analyze. 85 | source -- The root source code directory. 86 | In many cases, this will be the checkout root. Files within it are 87 | considered to be the primary source files for this database. 88 | In some output formats, files will be referred to by their relative path 89 | from this directory. 90 | command -- For compiled languages, build commands that will cause the 91 | compiler to be invoked on the source code to analyze. These commands 92 | will be executed under an instrumentation environment that allows 93 | analysis of generated code and (in some cases) standard libraries. 94 | database -- Path to generated database 95 | """ 96 | # Syntactic sugar: Default location to temporary directory 97 | if location is None: 98 | location = temporary_dir() 99 | 100 | # Create and submit command 101 | args = ['database', 'create', '-l', language, '-s', source] 102 | if command is not None: 103 | if type(command) == list: 104 | command = ' '.join(map(lambda x: f'"{x}"' if ' ' in x else x, command)) 105 | args += ['-c', command] 106 | args.append(location) 107 | run(args) 108 | 109 | # Return database instance 110 | return Database(location) 111 | 112 | 113 | def analyze(self, queries, format, output): 114 | """ 115 | Analyze a database, producing meaningful results in the context of the 116 | source code. 117 | 118 | Run a query suite (or some individual queries) against a CodeQL 119 | database, producing results, styled as alerts or paths, in SARIF or 120 | another interpreted format. 121 | 122 | This command combines the effect of the codeql database run-queries 123 | and codeql database interpret-results commands. If you want to run 124 | queries whose results don't meet the requirements for being interpreted 125 | as source-code alerts, use codeql database run-queries or codeql query 126 | run instead, and then codeql bqrs decode to convert the raw results to a 127 | readable notation. 128 | """ 129 | # Support single query or list of queries 130 | if type(queries) is not list: 131 | queries = [queries] 132 | # Prepare options 133 | options = [f'--format={format}', '-o', output] 134 | if search_path is not None: 135 | options += ['--search-path', search_path] 136 | # Dispatch command 137 | self.run_command('analyze', options, post=queries) 138 | 139 | def upgrade(self): 140 | """ 141 | Upgrade a database so it is usable by the current tools. 142 | 143 | This rewrites a CodeQL database to be compatible with the QL libraries 144 | that are found on the QL pack search path, if necessary. 145 | 146 | If an upgrade is necessary, it is irreversible. The database will 147 | subsequently be unusable with the libraries that were current when it 148 | was created. 149 | """ 150 | self.run_command('upgrade') 151 | 152 | def cleanup(self): 153 | """ 154 | Compact a CodeQL database on disk. 155 | 156 | Delete temporary data, and generally make a database as small as 157 | possible on disk without degrading its future usefulness. 158 | """ 159 | self.run_command('cleanup') 160 | 161 | def bundle(self, output): 162 | """ 163 | Create a relocatable archive of a CodeQL database. 164 | 165 | A command that zips up the useful parts of the database. This will only 166 | include the mandatory components, unless the user specifically requests 167 | that results, logs, TRAP, or similar should be included. 168 | """ 169 | options = ['-o', output] 170 | self.run_command('bundle', options) 171 | -------------------------------------------------------------------------------- /codeql/query.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | CodeQL for Python. 5 | """ 6 | 7 | import os 8 | import shutil 9 | import tempfile 10 | 11 | import codeql 12 | from .common import * 13 | 14 | class Query(object): 15 | def __init__(self, path): 16 | """ 17 | Arguments: 18 | path -- Location of the query file 19 | """ 20 | # Temporaries will be cleaned up on destructor 21 | self.path = path 22 | 23 | # Helpers 24 | def run_command(self, command, options=[], post=[]): 25 | run(['query', command] + options + [self.path] + post) 26 | 27 | @staticmethod 28 | def from_source(code): 29 | path = temporary_file(suffix='.ql') 30 | with open(path, mode='w') as f: 31 | f.write(code) 32 | return Query(path) 33 | 34 | @staticmethod 35 | def from_file(path): 36 | return Query(path) 37 | 38 | # Interface 39 | def compile(self): 40 | """ 41 | Compile or check QL code. 42 | 43 | Compile one or more queries. Usually the main outcome of this command is that the compiled version of the query is written 44 | to a compilation cache where it will be found when the query is later executed. Other output options are mostly for 45 | debugging. 46 | """ 47 | self.run('compile') 48 | 49 | def run(self, database, output=None): 50 | """ 51 | Run a single query. 52 | 53 | This command runs single query against a CodeQL database or raw QL dataset. 54 | 55 | By default the result of the query will be displayed on the terminal in a human-friendly rendering. If you want to do 56 | further processing of the results, we strongly recommend using the --output option to write the results to a file in an 57 | intermediate binary format, which can then be unpacked into various more machine-friendly representations by codeql 58 | bqrs decode. 59 | 60 | If your query produces results in a form that can be interpreted as source-code alerts, you may find codeql database 61 | analyze a more convenient way to run it. In particular, codeql database analyze can produce output in the SARIF format, 62 | which can be used with an variety of alert viewers. 63 | """ 64 | # Return temporary results if no output is specified 65 | if output is None: 66 | output = temporary_file(suffix='.bqrs') 67 | # Obtain actual path to database 68 | if type(database) == codeql.Database: 69 | database = database.path 70 | # Perform query and return results 71 | options = ['-o', output, '-d', database] 72 | self.run_command('run', options) 73 | return codeql.BQRS(output) 74 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | CodeQL for Python documentation 2 | =============================== 3 | 4 | Disclaimer: This is a rather informal description. 5 | 6 | ## Database 7 | 8 | TODO 9 | 10 | ## Queries 11 | 12 | TODO 13 | -------------------------------------------------------------------------------- /publish.bat: -------------------------------------------------------------------------------- 1 | del dist\* 2 | python setup.py bdist_wheel --universal 3 | gpg --detach-sign -u FA31DF0C -a dist/* 4 | twine upload dist/* 5 | pause 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import codecs 5 | import setuptools 6 | 7 | CODEQL_VERSION = '2.4.5' 8 | CODEQL_REPOSITORY_URL = 'https://github.com/AlexAltea/codeql-python' 9 | CODEQL_DOWNLOAD_URL = 'https://github.com/AlexAltea/codeql-python/tarball/' + CODEQL_VERSION 10 | 11 | # Description 12 | CODEQL_DESCRIPTION = """CodeQL for Python 13 | ================= 14 | 15 | Unofficial Python 3.x bindings for the CodeQL CLI application. 16 | 17 | More information at: https://github.com/AlexAltea/codeql-python 18 | """ 19 | 20 | setuptools.setup( 21 | name='codeql-python', 22 | version=CODEQL_VERSION, 23 | description='Unofficial Python bindings for CodeQL CLI', 24 | long_description=CODEQL_DESCRIPTION, 25 | license='MIT', 26 | author='Alexandro Sanchez Bach', 27 | author_email='alexandro@phi.nz', 28 | url=CODEQL_REPOSITORY_URL, 29 | download_url=CODEQL_DOWNLOAD_URL, 30 | packages=['codeql-python'], 31 | classifiers=[ 32 | 'Intended Audience :: Developers', 33 | 'License :: OSI Approved :: MIT License', 34 | 'Programming Language :: Python :: 3.5', 35 | 'Natural Language :: English', 36 | ], 37 | ) 38 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import codeql 5 | 6 | def test_basic(): 7 | db = codeql.Database.from_cpp('int main() { return 0; }') 8 | res = db.query(''' 9 | select "Test" 10 | ''') 11 | assert res[1][0] == "Test" 12 | 13 | def test(): 14 | test_basic() 15 | 16 | if __name__ == '__main__': 17 | test() 18 | --------------------------------------------------------------------------------