├── .gitattributes ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── analysis └── about_this_folder ├── binaryninja ├── README.md ├── update_to_latest.py └── version_switcher.py ├── img ├── grakn-start.png ├── grakn_crash.png └── grakn_crash_2.png ├── paper_machete.py ├── pmanalyze.py ├── queries ├── cwe_120_v1.py ├── cwe_121_v1.py ├── cwe_129_v1.py ├── cwe_134_v1.py └── cwe_788_v1.py ├── requirements.txt └── templates ├── binja_mlil_ssa.gql ├── binja_mlil_ssa_1.tpl ├── binja_mlil_ssa_2.tpl ├── binja_mlil_ssa_3.tpl ├── binja_mlil_ssa_4.tpl ├── binja_mlil_ssa_5.tpl ├── binja_mlil_ssa_6.tpl └── binja_mlil_ssa_7.tpl /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .DS_Store 3 | binaryninja/* 4 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64 4 | RUN apt update && DEBIAN_FRONTEND=noninteractive apt upgrade -y 5 | RUN DEBIAN_FRONTEND=noninteractive apt install -y --fix-missing \ 6 | curl \ 7 | openjdk-8-jre-headless \ 8 | python-pip \ 9 | python3-pip \ 10 | unzip 11 | 12 | # Binary Ninja 13 | COPY binaryninja/BinaryNinja.zip /tmp/BinaryNinja.zip 14 | COPY binaryninja/update_to_latest.py /tmp/update_to_latest.py 15 | COPY binaryninja/version_switcher.py /tmp/version_switcher.py 16 | RUN unzip /tmp/BinaryNinja.zip -d /opt/ && rm /tmp/BinaryNinja.zip && \ 17 | mkdir -p /root/.local/lib/python2.7/site-packages/ && \ 18 | echo "/opt/binaryninja/python" > /root/.local/lib/python2.7/site-packages/binaryninja.pth && \ 19 | mkdir -p /root/.binaryninja/ 20 | COPY binaryninja/license.txt /root/.binaryninja/license.dat 21 | RUN pip install pexpect && python /tmp/update_to_latest.py && rm /tmp/version_switcher.py && rm /tmp/update_to_latest.py 22 | 23 | # Grakn 24 | COPY requirements.txt /tmp/requirements.txt 25 | RUN BROWSER_DOWNLOAD_URL=$(curl --silent https://api.github.com/repos/graknlabs/grakn/releases/latest | python -c "import sys; from json import loads as l; x = l(sys.stdin.read()); print(''.join(s['browser_download_url'] for s in x['assets']))"); \ 26 | curl -fL $BROWSER_DOWNLOAD_URL -o /tmp/grakn.zip && \ 27 | unzip /tmp/grakn.zip -d /opt/ && rm /tmp/grakn.zip && \ 28 | ln -s /opt/grakn*/grakn /usr/local/bin/ && ln -s /opt/grakn*/graql /usr/local/bin/ && \ 29 | pip3 install -r /tmp/requirements.txt && rm /tmp/requirements.txt 30 | 31 | # Useful stuff 32 | RUN DEBIAN_FRONTEND=noninteractive apt install -y --fix-missing \ 33 | tmux \ 34 | vim 35 | #ENTRYPOINT ["/bin/bash"] 36 | 37 | ENTRYPOINT ["sh", "-c", "grakn server start && cd /opt/papermachete && python2.7 paper_machete.py"] 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Battelle Memorial Institute 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ____ __ __ _ _ 2 | | _ \ __ _ _ __ ___ _ __ | \/ | __ _ ___| |__ ___| |_ ___ ________ 3 | | |_) / _` | '_ \/ _ \ '__| | |\/| |/ _` |/ __| '_ \ / _ \ __/ _ \ /_______/ 4 | | __/ (_| | |_)| __/ | | | | | (_| | (__| | | | __/ || __/ \_______\ 5 | |_| \__,_| .__/\___|_| |_| |_|\__,_|\___|_| |_|\___|\__\___| /_______/ 6 | |_| @==|;;;;;;> 7 | 8 | ## About 9 | Paper Machete (PM) orchestrates [Binary Ninja](https://binary.ninja) and [Grakn.ai](https://grakn.ai) to aid static binary analysis for the purpose of finding bugs in software. PM leverages the Binary Ninja MLIL SSA to extract semantic meaning about individual instructions, operations, register/variable state, and overall control flow. 10 | 11 | PM migrates this data into Grakn - a knowledge graph that gives us the ability to define domain-specific ontologies for data and write powerful inference rules to form relationships between data we don't want to (or can't) explicitly store. [Heeh, how neat is that](https://www.youtube.com/watch?v=Hm3JodBR-vs)? 12 | 13 | This project was released in conjunction with a DerbyCon 2017 talk titled "Aiding Static Analysis: Discovering Vulnerabilities in Binary Targets through Knowledge Graph Inferences." You can watch that talk [here](http://www.irongeek.com/i.php?page=videos/derbycon7/t116-aiding-static-analysis-discovering-vulnerabilities-in-binary-targets-through-knowledge-graph-inferences-john-toterhi). 14 | 15 | Paper Machete's initial prototype and public codebase were developed by security researchers at the [Battelle Memorial Institute](https://www.battelle.org/government-offerings/national-security/cyber/mission-focused-tools). As this project matures, we hope that you will find it useful in your own research and consider contributing to the project. 16 | 17 | ## Why BNIL? 18 | The BNIL suite of ILs is easy to work with, pleasantly verbose, and human-readable. At any point we can decide to leverage other levels and forms of the IL with little development effort on our part. When you add to that the ability to [lift multiple architectures](https://binary.ninja/faq/) and [write custom lifters](https://github.com/joshwatson/binaryninja-msp430), we have little reason not to use BNIL. 19 | 20 | ## Why Grakn? 21 | Grakn's query language (Graql) is easy to learn and intuitive, which is extremely important in the early stages of this research while we're still hand-writing queries to model the patterns vulnerability researchers look for when performing static analysis. 22 | 23 | The ability to write our own domain-specific ontologies lets us quickly experiment with new query ideas and ways of making our queries less complex. When we run into a case where we think "gee, if I just had access to the relationship between..." we can modify our ontology and inference rules to get that information. 24 | 25 | While the end game for PM is to eliminate the need for human-written queries, the fact is we're starting from square one. Which means hand-jamming a lot queries to model the patterns human vulnerability researchers look for when bug hunting. 26 | 27 | ## Dependencies 28 | Paper Machete requires [BinaryNinja v1.1](https://binary.ninja), [Grakn v1.4.2](https://github.com/graknlabs/grakn/releases/tag/v1.4.2), the [Grakn Python Driver](http://github.com/graknlabs/grakn-python), and the [Java JRE](http://www.oracle.com/technetwork/java/javase/downloads/index.html) 29 | 30 | 31 | ## Query Scripts 32 | We've included some basic queries to get you started if you want to play around with PM. As you can imagine, there is no "silver bullet" query that will find all manifestations of a specific vulnerability class. Because of this, we've included versions for each CWE query. As we add new methods of finding the same CWE, we'll add scripts with incremented the version numbers to differentiate. 33 | 34 | `cwe_120_v1.py` - Tests for use of unsafe 'gets()' function ([CWE-120](https://cwe.mitre.org/data/definitions/120.html)) 35 | 36 | `cwe_121_v1.py` - Tests for buffer overflows ([CWE-121](https://cwe.mitre.org/data/definitions/121.html)) 37 | 38 | `cwe_129_v1.py` - Tests for missing bounds checks ([CWE-129](https://cwe.mitre.org/data/definitions/129.html)) 39 | 40 | `cwe_134_v1.py` - Tests for format string vulnerabilities ([CWE-134](https://cwe.mitre.org/data/definitions/134.html)) 41 | 42 | `cwe_788_v1.py` - Tests for missing bounds check on array indexes ([CWE-788](https://cwe.mitre.org/data/definitions/788.html)) 43 | 44 | ## How Do I Use It? 45 | 46 | For basic use, run the `paper_machete.py` script and follow the prompts. For more advanced use, please [read the wiki](https://github.com/cetfor/PaperMachete/wiki). 47 | 48 | Typically you'll start with option `[1]` and work your way down to option `[3]`. If you run into any issues with Grakn use option `[4]` to reset Grakn to a clean state and try again. 49 | ``` 50 | ... banner ... 51 | [1] Analyze a binary file 52 | [2] Migrate a JSON file into Grakn 53 | [3] Run all CWE queries 54 | [4] Clean and restart Grakn 55 | [5] Quit 56 | ``` 57 | 58 | Option `[1]` lists all executable files in the `/analysis` directory. So place any executables you want to analyze in `/analysis`. This option will run `pmanalyze.py` and generate a JSON file in the `/analysis` directory. 59 | 60 | Once you've analyzed files with `[1]` and produced resulting JSON files, they will appear as a choice in option `[2]`. Selecting a JSON file in option `[2]` will migrate the data into Grakn. 61 | 62 | Now that you have data in Grakn, you can use option `[3]`. This will kick off all scripts in `/queries` against the keyspace of your choice. If you write your own query patterns, just throw them in `/queries` and option `[3]` will run them too. 63 | -------------------------------------------------------------------------------- /analysis/about_this_folder: -------------------------------------------------------------------------------- 1 | This folder serves two purposes: 2 | 1. It's where you put the binaries or Binary Ninja databases you want to analyze (PE, ELF, Mach-O, .bndb) 3 | 2. It's where analysis files (JSON) are stored after being processed by Paper Machete. 4 | 5 | The Paper Machete CLI `paper_machete.py` enumerates this folder when presenting you with analysis/migration options. 6 | 7 | FAQ: 8 | Q: What if my target isn't a PE/ELF/Mach-O executable? It's a binary blob! 9 | A: Analyze it with Binary Ninja and save your analysis as a .bndb file in this folder. 10 | -------------------------------------------------------------------------------- /binaryninja/README.md: -------------------------------------------------------------------------------- 1 | # binaryninja 2 | 3 | Supply your own `BinaryNinja.zip` (Linux sources) and commercial `license.txt` for use with Docker. 4 | Files in this directory are `.gitignore`'ed. 5 | -------------------------------------------------------------------------------- /binaryninja/update_to_latest.py: -------------------------------------------------------------------------------- 1 | import pexpect 2 | import subprocess 3 | import sys 4 | 5 | child = pexpect.spawn('python /tmp/version_switcher.py') 6 | child.logfile = sys.stdout 7 | child.expect('Choice:') 8 | child.sendline('1') 9 | child.expect('Choice:') 10 | child.sendline('1') 11 | child.timeout=600 12 | child.expect(['Choice:', 'UpdateSuccess']) 13 | child.terminate() 14 | -------------------------------------------------------------------------------- /binaryninja/version_switcher.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) 2015-2017 Vector 35 LLC 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to 6 | # deal in the Software without restriction, including without limitation the 7 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | # sell copies of the Software, and to permit persons to whom the Software is 9 | # furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in 12 | # all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | # IN THE SOFTWARE. 21 | 22 | import sys 23 | 24 | from binaryninja.update import UpdateChannel, are_auto_updates_enabled, set_auto_updates_enabled, is_update_installation_pending, install_pending_update 25 | from binaryninja import core_version 26 | import datetime 27 | 28 | chandefault = UpdateChannel.list[0].name 29 | channel = None 30 | versions = [] 31 | 32 | 33 | def load_channel(newchannel): 34 | global channel 35 | global versions 36 | if (channel is not None and newchannel == channel.name): 37 | print("Same channel, not updating.") 38 | else: 39 | try: 40 | print("Loading channel %s" % newchannel) 41 | channel = UpdateChannel[newchannel] 42 | print("Loading versions...") 43 | versions = channel.versions 44 | except Exception: 45 | print("%s is not a valid channel name. Defaulting to " % chandefault) 46 | channel = UpdateChannel[chandefault] 47 | 48 | 49 | def select(version): 50 | done = False 51 | date = datetime.datetime.fromtimestamp(version.time).strftime('%c') 52 | while not done: 53 | print("Version:\t%s" % version.version) 54 | print("Updated:\t%s" % date) 55 | print("Notes:\n\n-----\n%s" % version.notes) 56 | print("-----") 57 | print("\t1)\tSwitch to version") 58 | print("\t2)\tMain Menu") 59 | selection = raw_input('Choice: ') 60 | if selection.isdigit(): 61 | selection = int(selection) 62 | else: 63 | selection = 0 64 | if (selection == 2): 65 | done = True 66 | elif (selection == 1): 67 | if (version.version == channel.latest_version.version): 68 | print("Requesting update to latest version.") 69 | else: 70 | print("Requesting update to prior version.") 71 | if are_auto_updates_enabled(): 72 | print("Disabling automatic updates.") 73 | set_auto_updates_enabled(False) 74 | if (version.version == core_version): 75 | print("Already running %s" % version.version) 76 | else: 77 | print("version.version %s" % version.version) 78 | print("core_version %s" % core_version) 79 | print("Downloading...") 80 | print(version.update()) 81 | print("Installing...") 82 | if is_update_installation_pending: 83 | #note that the GUI will be launched after update but should still do the upgrade headless 84 | install_pending_update() 85 | # forward updating won't work without reloading 86 | sys.exit() 87 | else: 88 | print("Invalid selection") 89 | 90 | 91 | def list_channels(): 92 | done = False 93 | print("\tSelect channel:\n") 94 | while not done: 95 | channel_list = UpdateChannel.list 96 | for index, item in enumerate(channel_list): 97 | print("\t%d)\t%s" % (index + 1, item.name)) 98 | print("\t%d)\t%s" % (len(channel_list) + 1, "Main Menu")) 99 | selection = raw_input('Choice: ') 100 | if selection.isdigit(): 101 | selection = int(selection) 102 | else: 103 | selection = 0 104 | if (selection <= 0 or selection > len(channel_list) + 1): 105 | print("%s is an invalid choice." % selection) 106 | else: 107 | done = True 108 | if (selection != len(channel_list) + 1): 109 | load_channel(channel_list[selection - 1].name) 110 | 111 | 112 | def toggle_updates(): 113 | set_auto_updates_enabled(not are_auto_updates_enabled()) 114 | 115 | 116 | def main(): 117 | global channel 118 | done = False 119 | load_channel(chandefault) 120 | while not done: 121 | print("\n\tBinary Ninja Version Switcher") 122 | print("\t\tCurrent Channel:\t%s" % channel.name) 123 | print("\t\tCurrent Version:\t%s" % core_version) 124 | print("\t\tAuto-Updates On:\t%s\n" % are_auto_updates_enabled()) 125 | for index, version in enumerate(versions): 126 | date = datetime.datetime.fromtimestamp(version.time).strftime('%c') 127 | print("\t%d)\t%s (%s)" % (index + 1, version.version, date)) 128 | print("\t%d)\t%s" % (len(versions) + 1, "Switch Channel")) 129 | print("\t%d)\t%s" % (len(versions) + 2, "Toggle Auto Updates")) 130 | print("\t%d)\t%s" % (len(versions) + 3, "Exit")) 131 | selection = raw_input('Choice: ') 132 | if selection.isdigit(): 133 | selection = int(selection) 134 | else: 135 | selection = 0 136 | if (selection <= 0 or selection > len(versions) + 3): 137 | print("%d is an invalid choice.\n\n" % selection) 138 | else: 139 | if (selection == len(versions) + 3): 140 | done = True 141 | elif (selection == len(versions) + 2): 142 | toggle_updates() 143 | elif (selection == len(versions) + 1): 144 | list_channels() 145 | else: 146 | select(versions[selection - 1]) 147 | 148 | 149 | if __name__ == "__main__": 150 | main() 151 | -------------------------------------------------------------------------------- /img/grakn-start.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cetfor/PaperMachete/cdceeed57bdae2b5d8138ae0c197098acd764835/img/grakn-start.png -------------------------------------------------------------------------------- /img/grakn_crash.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cetfor/PaperMachete/cdceeed57bdae2b5d8138ae0c197098acd764835/img/grakn_crash.png -------------------------------------------------------------------------------- /img/grakn_crash_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cetfor/PaperMachete/cdceeed57bdae2b5d8138ae0c197098acd764835/img/grakn_crash_2.png -------------------------------------------------------------------------------- /paper_machete.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import subprocess 3 | import os 4 | from os.path import abspath, isdir, isfile, join, splitext 5 | from mimetypes import guess_type 6 | from urllib2 import urlopen 7 | from ast import literal_eval 8 | import pmanalyze 9 | 10 | ENTER = '\nPress ENTER to continue' 11 | MACHETE = abspath('.') 12 | query_path = join(MACHETE, "queries") 13 | ANALYSIS = join(MACHETE, "analysis") 14 | 15 | MAX_ACTIVE = 25 # migration knob: max number of migration workers running at once 16 | MAX_BATCHES = 1000000000 # migration knob: max number of rows to execute in one transation 17 | 18 | MENU1 = "[1] Analyze a binary file" 19 | MENU2 = "[2] Migrate a JSON file into Grakn" 20 | MENU3 = "[3] Run all CWE queries" 21 | MENU4 = "[4] Clean and restart Grakn" 22 | MENU5 = "[5] Quit" 23 | 24 | TEMPLATE_DESC = [ 25 | '', # n/a 26 | 'Migrating functions.', # template 1 27 | 'Migrating basic-blocks.', # template 2 28 | 'Linking basic-blocks to their functions.', # template 3 29 | 'Migrating instructions.', # template 4 30 | 'Linking instructions to their basic-blocks.', # template 5 31 | 'Migrating all AST nodes.', # template 6 32 | 'Linking AST nodes.' # template 7 33 | ] 34 | 35 | def print_banner(title=""): 36 | subprocess.call("clear") 37 | print(""" 38 | ____ __ __ _ _ 39 | | _ \ __ _ _ __ ___ _ __ | \/ | __ _ ___| |__ ___| |_ ___ ________ 40 | | |_) / _` | '_ \/ _ \ '__| | |\/| |/ _` |/ __| '_ \ / _ \ __/ _ \ /_______/ 41 | | __/ (_| | |_)| __/ | | | | | (_| | (__| | | | __/ || __/ \_______\\ 42 | |_| \__,_| .__/\___|_| |_| |_|\__,_|\___|_| |_|\___|\__\___| /_______/ 43 | |_| @==|;;;;;;> 44 | """) 45 | total_len = 80 46 | if title: 47 | padding = total_len - len(title) - 4 48 | print("== {} {}\n".format(title, "=" * padding)) 49 | else: 50 | print("{}\n".format("=" * total_len)) 51 | 52 | def run_script(query_path, query, keyspace): 53 | try: 54 | subprocess.call(["python3.6", join(query_path, query), keyspace]) 55 | except OSError: 56 | print("It looks like you don't have Python3.6 installed. " \ 57 | "The Grakn Python driver requires it.") 58 | return -1 59 | return 0 60 | 61 | def run_queries(query, keyspace): 62 | if query == 'all_queries': 63 | print("Running all CWE queries against the '{}' keyspace...".format(keyspace)) 64 | queries = [f for f in os.listdir(query_path) if isfile(join(query_path, f))] 65 | for query in queries: 66 | if ".py" not in query: continue 67 | if run_script(query_path, query, keyspace): return 68 | print("Script " + query + " complete.") 69 | print("All queries complete.") 70 | else: 71 | if isfile(join(query_path, query)): 72 | if run_script(query_path, query, keyspace): return 73 | else: 74 | print("Could not find the python script " + query) 75 | print("Please make sure it is located in " + query_path) 76 | return 77 | 78 | 79 | def get_file_selection(types): 80 | file_list = os.listdir(ANALYSIS) 81 | filtered = [] 82 | for file in file_list: 83 | if types == "json" and guess_type(join(ANALYSIS, file))[0] == "application/json": 84 | filtered.append(file) 85 | elif types == "bin": 86 | filecmd = (subprocess.check_output(["file", join(ANALYSIS, file)])).lower() 87 | filecmd = filecmd.split(": ")[1] # remove file path returned by 'file' utility 88 | if "elf" in filecmd or "mach-o" in filecmd or "pe" in filecmd or ".bndb" in file.lower(): 89 | filtered.append(file) 90 | else: 91 | pass # not json or executable binary 92 | 93 | # print file choices 94 | if len(filtered) == 0: 95 | if types == "json": 96 | print("No json files were found in {}".format(ANALYSIS)) 97 | elif types == "bin": 98 | print("No executable files were found in {}".format(ANALYSIS)) 99 | raw_input(ENTER) 100 | return "quit" 101 | else: 102 | for i, file in enumerate(filtered): 103 | print "[{}] {}".format(i, file) 104 | 105 | index = raw_input("\nSelect a file number to analyze ([q]uit): ").lower() 106 | if index == "q" or index == "quit": 107 | return "quit" 108 | 109 | try: 110 | index = int(index) 111 | if index in range(0, len(filtered)): 112 | return filtered[int(index)] 113 | except ValueError: 114 | pass 115 | 116 | if index != "": 117 | print("\nThat is not a valid file selection. Try again.") 118 | raw_input(ENTER) 119 | if types == "bin": 120 | print_banner(MENU1) 121 | elif types == "json": 122 | print_banner(MENU2) 123 | else: 124 | print_banner() 125 | 126 | return False 127 | 128 | 129 | def main(): 130 | menu = True 131 | while menu: 132 | print_banner() 133 | 134 | # check directories 135 | try: 136 | subprocess.call(['grakn', 'version'], stdout=open(os.devnull, 'wb')) 137 | subprocess.call(['graql', 'version'], stdout=open(os.devnull, 'wb')) 138 | except OSError: 139 | print("Please ensure grakn and graql are in your PATH") 140 | sys.exit() 141 | 142 | if not isdir(MACHETE): 143 | print("Paper Machete directory not found") 144 | print("Please ensure Paper Machete is located in {}".format(MACHETE)) 145 | sys.exit() 146 | 147 | if not isdir(ANALYSIS): 148 | print("Creating directory '{}'".format(ANALYSIS)) 149 | subprocess.call(["mkdir", "analysis"]) 150 | 151 | menu_option = raw_input("{}\n{}\n{}\n{}\n{}\n\n>> ".format(MENU1,MENU2,MENU3,MENU4,MENU5)) 152 | 153 | try: 154 | menu_option = int(menu_option) 155 | except ValueError: 156 | if menu_option != "": 157 | print("'{}' is not a valid option.".format(menu_option)) 158 | raw_input(ENTER) 159 | continue 160 | 161 | # analyze a binary file 162 | if menu_option == 1: 163 | 164 | # display supported binary files in ./analysis 165 | binary = False 166 | while binary == False: 167 | print_banner(MENU1) 168 | binary = get_file_selection("bin") 169 | if binary == "quit": 170 | break 171 | if binary == "quit": 172 | continue 173 | 174 | # check to see if the file exists, if it does, process it 175 | if not isfile(join(ANALYSIS, binary)): 176 | print("File '{}' not found.".format(binary)) 177 | else: 178 | functions = str(raw_input('Specify a list of functions examine seperated by spaces (ENTER for all): ')).split() 179 | if len(functions) == 0: 180 | pmanalyze.main(join(ANALYSIS, binary)) 181 | else: 182 | print functions 183 | pmanalyze.main(join(ANALYSIS, binary), functions) 184 | raw_input(ENTER) 185 | 186 | # migrate a json file into Grakn 187 | elif menu_option == 2: 188 | 189 | # display supported binary files in ./analysis 190 | json = False 191 | while json == False: 192 | print_banner(MENU2) 193 | json = get_file_selection("json") 194 | if json == "quit": 195 | break 196 | if json == "quit": 197 | continue 198 | 199 | # check to see if the keyspace already exists for this file 200 | try: 201 | keyspace = json.lower().replace('.json', '') 202 | keyspaces = literal_eval(urlopen('http://127.0.0.1:4567/kb').read()) 203 | 204 | inc = 1 205 | finding_name = True 206 | while finding_name: 207 | inc += 1 208 | if keyspace not in keyspaces: 209 | finding_name = False # keyspace name is not in use 210 | else: 211 | keyspace = "{}_{}".format(keyspace, inc) # add a _# suffix and try again 212 | except: 213 | print("Unable to query keyspace names. Is Grakn running?\nContinuing assuming keyspace '{}' is OK to use.".format(keyspace)) 214 | 215 | try: 216 | # insert the ontology 217 | print("Inserting ontology into the '{}' keyspace...".format(keyspace)) 218 | subprocess.call(["graql","console", "-f", join(MACHETE, "templates", "binja_mlil_ssa.gql"), "-k", keyspace]) 219 | 220 | 221 | # migrate data into Grakn 222 | print("\nMigrating data from '{}' into the '{}' keyspace...".format(json, keyspace)) 223 | 224 | # loop over all 7 templates 225 | for num in range(1,8): 226 | print(">> Migration step {} of 7: {}".format(num, TEMPLATE_DESC[num])) 227 | subprocess.call(["graql", "migrate", "json", "--template", join(MACHETE, "templates", "binja_mlil_ssa_{}.tpl".format(num)), "--input", join(ANALYSIS, json), "--keyspace", keyspace]) 228 | 229 | print("Data successfully migrated into Grakn. You can now run CWE query scripts against '{}' to check for vulnerabilities".format(keyspace)) 230 | raw_input(ENTER) 231 | except: 232 | print("Upload failed... please try agin.") 233 | raw_input(ENTER) 234 | 235 | # run CWE queries 236 | elif menu_option == 3: 237 | keyspace = None 238 | keyspaces = literal_eval(urlopen('http://127.0.0.1:4567/kb').read())['keyspaces'] 239 | 240 | print_banner(MENU3) 241 | 242 | for i, ks in enumerate(keyspaces): 243 | print("[{}] {}".format(i, ks['name'])) 244 | 245 | index = raw_input("\nSelect a keyspace to run all queries against ([q]uit): ").lower() 246 | if index == "q" or index == "quit": 247 | continue 248 | 249 | try: 250 | index = int(index) 251 | if index in range(0, len(keyspaces)): 252 | keyspace = keyspaces[int(index)]['name'] 253 | except ValueError: 254 | continue 255 | 256 | run_queries('all_queries', keyspace) 257 | raw_input(ENTER) 258 | 259 | # clean and restart Grakn 260 | elif menu_option == 4: 261 | print("Restarting Grakn. Press \"Y\" when prompted.\nWait until you see the Grakn banner before continuing!") 262 | raw_input(ENTER) 263 | 264 | subprocess.call(["grakn", "server", "stop"]) 265 | subprocess.call(["grakn", "server", "clean"]) 266 | subprocess.call(["grakn", "server", "start"]) 267 | 268 | # quit 269 | elif menu_option == 5: 270 | menu = False 271 | 272 | else: 273 | print("Invalid option!\n") 274 | raw_input(ENTER) 275 | 276 | if __name__ == "__main__": 277 | main() 278 | -------------------------------------------------------------------------------- /pmanalyze.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import json 3 | from struct import pack, unpack 4 | from os.path import basename, join, isfile 5 | from operator import attrgetter 6 | from collections import defaultdict 7 | import binaryninja as binja 8 | 9 | PM = None 10 | vars_and_sizes = {} 11 | 12 | class PaperMachete(): 13 | def __init__(self): 14 | self.functions = [] 15 | 16 | class PMFunction(): 17 | def __init__(self, func_name, asm_addr): 18 | self.func_name = func_name 19 | self.asm_addr = asm_addr 20 | self.basic_blocks = [] 21 | self.bb_edges = [] 22 | 23 | class PMBasicBlock(): 24 | def __init__(self, bb_name, bb_start, bb_end): 25 | self.bb_name = bb_name 26 | self.bb_start = bb_start 27 | self.bb_end = bb_end - 1 # set end as last il index (not +1 like binja gives us) 28 | self.instructions = [] 29 | 30 | class PMInstruction(): 31 | def __init__(self, name, il_index, asm_address, operation_type, in_bb): 32 | self.name = name 33 | self.il_index = il_index 34 | self.asm_address = asm_address 35 | self.operation_type = operation_type 36 | self.in_bb = in_bb 37 | self.nodes = [] 38 | 39 | class PMOperation(): 40 | def __init__(self, name, depth, node_type, edge_label, parent_hash): 41 | self.name = name 42 | self.depth = depth 43 | self.node_type = node_type 44 | self.edge_label = edge_label 45 | self.parent_hash = parent_hash 46 | 47 | class PMNodeList(): 48 | def __init__(self, name, depth, node_type, edge_label, parent_hash, list_size): 49 | self.name = name 50 | self.depth = depth 51 | self.node_type = node_type 52 | self.edge_label = edge_label 53 | self.parent_hash = parent_hash 54 | self.list_size = list_size 55 | 56 | class PMEndNodeConstant(): 57 | def __init__(self, name, depth, node_type, edge_label, parent_hash, constant_value): 58 | self.name = name 59 | self.depth = depth 60 | self.node_type = node_type 61 | self.edge_label = edge_label 62 | self.parent_hash = parent_hash 63 | self.constant_value = constant_value 64 | 65 | class PMEndNodeVarSSA(): 66 | def __init__(self, name, depth, node_type, edge_label, parent_hash, var, version, var_type, var_size, var_func): 67 | self.name = name 68 | self.depth = depth 69 | self.node_type = node_type 70 | self.edge_label = edge_label 71 | self.parent_hash = parent_hash 72 | self.var = var 73 | self.version = version 74 | self.var_type = var_type 75 | self.var_size = var_size 76 | self.var_func = var_func 77 | 78 | class PMEndNodeVariable(): 79 | def __init__(self, name, depth, node_type, edge_label, parent_hash, var, var_type, var_size, var_func): 80 | self.name = name 81 | self.depth = depth 82 | self.node_type = node_type 83 | self.edge_label = edge_label 84 | self.parent_hash = parent_hash 85 | self.var = var 86 | self.var_type = var_type 87 | self.var_size = var_size 88 | self.var_func = var_func 89 | 90 | class PMBBEdge(): 91 | def __init__(self, source, target): 92 | self.source = source 93 | self.target = target 94 | 95 | 96 | def process_function(func): 97 | global insn_list 98 | global vars_and_sizes 99 | 100 | insn_list = [] 101 | vars_and_sizes = {} 102 | 103 | stack = str(binja.function.Function.stack_layout.__get__(func)) 104 | vars_and_sizes = get_variable_sizes(stack) 105 | 106 | func_name = func.name.replace('.', '_') 107 | asm_addr = hex(func.start).strip('L') 108 | 109 | PM.functions.append(PMFunction(func_name, asm_addr)) 110 | 111 | 112 | def process_basic_block(func, block): 113 | func_name = func.name.replace('.', '_') 114 | bb_name = "bb_{}_{}_{}".format(block.start, block.end-1, func_name) 115 | 116 | for func in PM.functions: 117 | if func.func_name == func_name: 118 | func.basic_blocks.append(PMBasicBlock(bb_name, block.start, block.end)) 119 | 120 | 121 | def process_instruction(func, block, insn): 122 | global insn_list 123 | 124 | func_name = func.name.replace('.', '_') 125 | 126 | # A single ISA instruction can map to many IL instructions. 127 | # This can cause the same instruction to be processed many times. 128 | # To avoid this, we track instructions in a function and only 129 | # process them once. We clear this global list in process_function(). 130 | 131 | # To complicate this more, MLIL_GOTO operations always seem to have 132 | # address => 0x0. So we have to process 0x0 addresses multiple times until 133 | # this behavior changes in Binary Ninja (this may actually be expected). 134 | 135 | if (insn.address not in insn_list) or (insn.address == 0x0): 136 | ast_parse([func, block, insn]) 137 | insn_list.append(insn.address) 138 | 139 | # sort the 'nodes' list in each instruction by 'depth' 140 | # This is extremely important for Grakn's migration template 141 | # since nodes at depth 1 need to exist before nodes at depth 142 | # 2 can be linked to them (and so on). 143 | 144 | for func in PM.functions: 145 | for bb in func.basic_blocks: 146 | for inst in bb.instructions: 147 | (inst.nodes).sort(key=attrgetter('depth')) 148 | 149 | 150 | def ast_build_json(args, name, il, level=0, edge=""): 151 | global insn_list 152 | global vars_and_sizes 153 | 154 | func = args[0] 155 | block = args[1] 156 | insn = args[2] 157 | 158 | func_name = func.name.replace('.', '_') 159 | 160 | # slice off the last "_#" and rejoin to get the parent reference hash 161 | parent = "_".join(name.split('_')[:-1]) 162 | 163 | # Hashes of instruction nodes in the AST look like: "N_8735918103813_4195908" 164 | # One element down from an instruction will look like: "N_8735918103813_4195908_0" 165 | # So if there are two "_" in the hash, the node is an instruction. List nodes have 166 | # the letter 'L' appended to them. (Yeah, I LOL'd when I wrote this too.) 167 | depth = name.count("_") - 2 168 | if 'L' in parent: 169 | parent_type = "list" 170 | name = name.replace('L', 'N') # reset node status 171 | elif parent.count("_") == 2: 172 | parent_type = "instruction" 173 | else: 174 | parent_type = "operation" 175 | 176 | # get the instruction hash this node belongs in 177 | inst_hash = "_".join(name.split('_')[:3]) 178 | 179 | # get the basic-block this node belongs in 180 | inbb = "bb_{}_{}_{}".format(block.start, block.end-1, func_name) 181 | 182 | if isinstance(il, binja.MediumLevelILInstruction): 183 | 184 | # instruction 185 | if level == 0: 186 | il_index = il.instr_index 187 | asm_address = hex(il.address).strip('L') 188 | operation_type = str(il.operation).split('.')[1] 189 | 190 | for func in PM.functions: 191 | for bb in func.basic_blocks: 192 | if bb.bb_name == inbb: 193 | # This next if statement is to avoid issues with MLIL_GOTO nodes 194 | # being placed in the wrong basic blocks. This is because all MLIL_GOTO 195 | # nodes have and asm_address of 0x0, so we leave them out of the insn_list global. 196 | # This also means, the same instruction can be added twice! So we need to check if 197 | # the same node already exists. If it does, we don't add it. 198 | if il_index >= bb.bb_start and il_index <= bb.bb_end: 199 | if operation_type == "MLIL_GOTO": 200 | if (inst_hash not in insn_list): 201 | insn_list.append(inst_hash) 202 | else: 203 | continue # don't add this again! 204 | bb.instructions.append(PMInstruction(inst_hash, il_index, asm_address, operation_type, inbb)) 205 | 206 | # operation 207 | else: 208 | node_type = str(il.operation).split('.')[1] 209 | edge_label = str(edge) 210 | parent_hash = parent 211 | 212 | for func in PM.functions: 213 | for bb in func.basic_blocks: 214 | for inst in bb.instructions: 215 | if inst.name == inst_hash: 216 | inst.nodes.append(PMOperation(name, depth, node_type, edge_label, parent_hash)) 217 | 218 | # edge 219 | for i, o in enumerate(il.operands): 220 | try: 221 | edge_label = str(il.ILOperations[il.operation][i][0]) 222 | except IndexError: 223 | # Addresses issue in binja v1.1 stable with MLIL_SET_VAR_ALIASED 224 | # operations in the Python bindings. 225 | # See: https://github.com/Vector35/binaryninja-api/issues/787 226 | edge_label = "unimplemented" 227 | child_name = "{}_{}".format(name, i) 228 | ast_build_json(args, child_name, o, level+1, edge_label) 229 | 230 | 231 | # list of operands / nodes 232 | elif isinstance(il, list): 233 | node_type = "list" 234 | edge_label = str(edge) 235 | parent_hash = parent 236 | name = name.replace('N', 'L') # list hashes have an 'L' prefix to distinguish from nodes ('N'). 237 | list_size = len(il) 238 | 239 | for func in PM.functions: 240 | for bb in func.basic_blocks: 241 | for inst in bb.instructions: 242 | if inst.name == inst_hash: 243 | inst.nodes.append(PMNodeList(name, depth, node_type, edge_label, parent_hash, list_size)) 244 | 245 | 246 | # add elements from 247 | for i, item in enumerate(il): 248 | edge_label = str(i) 249 | item_name = "{}_{}".format(name, i) 250 | ast_build_json(args, item_name, item, level+1, edge_label) 251 | 252 | # end node 253 | else: 254 | parent_hash = parent 255 | edge_label = str(edge) 256 | 257 | # constant 258 | if isinstance(il, long): 259 | node_type = "constant" 260 | constant_value = str(il) 261 | 262 | for func in PM.functions: 263 | for bb in func.basic_blocks: 264 | for inst in bb.instructions: 265 | if inst.name == inst_hash: 266 | inst.nodes.append(PMEndNodeConstant(name, depth, node_type, edge_label, parent_hash, constant_value)) 267 | 268 | 269 | # SSAVariable (not using type information) 270 | elif isinstance(il, binja.mediumlevelil.SSAVariable): 271 | node_type = "variable-ssa" 272 | var = str(il.var) 273 | version = il.version 274 | 275 | var_type = str(il.var.type) 276 | var_size = vars_and_sizes.get(str(il.var), 4) 277 | var_func = func_name 278 | 279 | for func in PM.functions: 280 | for bb in func.basic_blocks: 281 | for inst in bb.instructions: 282 | if inst.name == inst_hash: 283 | inst.nodes.append(PMEndNodeVarSSA(name, depth, node_type, edge_label, parent_hash, var, version, var_type, var_size, var_func)) 284 | 285 | 286 | # Variable (contains more information than we currently use) 287 | elif isinstance(il, binja.function.Variable): 288 | node_type = "variable" 289 | var = str(il) 290 | 291 | var_type = str(il.type) 292 | var_size = vars_and_sizes.get(str(il), 4) 293 | var_func = func_name 294 | 295 | for func in PM.functions: 296 | for bb in func.basic_blocks: 297 | for inst in bb.instructions: 298 | if inst.name == inst_hash: 299 | inst.nodes.append(PMEndNodeVariable(name, depth, node_type, edge_label, parent_hash, var, var_type, var_size, var_func)) 300 | 301 | 302 | # Unknown terminating node (this should not be reached) 303 | else: 304 | print "A terminating node was encountered that was not expected: '{}'".format(type(il)) 305 | raise ValueError 306 | 307 | 308 | def ast_name_element(args, il_type, il): 309 | h = hash(il) 310 | name = "N_{}_{}".format(h, il.address) 311 | ast_build_json(args, name, il) 312 | 313 | 314 | def ast_parse(args): 315 | func = args[0] 316 | block = args[1] 317 | insn = args[2] 318 | 319 | print " function: {} (asm-addr: {})".format(func.name, hex(insn.address).strip('L')) 320 | lookup = defaultdict(lambda: defaultdict(list)) 321 | 322 | for block in func.medium_level_il.ssa_form: 323 | for mil in block: 324 | lookup['MediumLevelILSSA'][mil.address].append(mil) 325 | 326 | for il_type in sorted(lookup): 327 | ils = lookup[il_type][insn.address] 328 | for il in sorted(ils): 329 | ast_name_element(args, il_type, il) 330 | 331 | 332 | def process_edges(func): 333 | func_name = (func.name).replace('.', '_') 334 | 335 | for block in func.medium_level_il.ssa_form: 336 | if len(block.outgoing_edges) > 0: 337 | for edge in block.outgoing_edges: 338 | source = "bb_{}_{}_{}".format(edge.source.start, edge.source.end-1, func_name) 339 | target = "bb_{}_{}_{}".format(edge.target.start, edge.target.end-1, func_name) 340 | for func in PM.functions: 341 | if func.func_name == func_name: 342 | func.bb_edges.append(PMBBEdge(source, target)) 343 | 344 | 345 | def get_offset_from_var(var): 346 | """ 347 | Helper for get_variable_sizes)_ 348 | Use this to calculate var offset. 349 | e.g. var_90, __saved_edi --> 144, -1 350 | """ 351 | instance = False 352 | i=0 353 | 354 | # Parse string 355 | i = var.rfind(' ')+1 356 | tmp = var[i:-1] 357 | 358 | # Parse var 359 | if tmp[0] == 'v': 360 | tmp = tmp[4:] 361 | j = tmp.find('_') 362 | 363 | # Handles SSA var instances (var_14_1) and converts c, 58, 88 --> 12, 88, 136 364 | if (j != -1): 365 | tmp = tmp[:j] 366 | instance = True 367 | else: 368 | instance = False 369 | 370 | try: 371 | tmp = int(tmp, 16) 372 | except: 373 | tmp = -1 374 | 375 | # -1 for non vars 376 | else: 377 | tmp = -1 378 | 379 | return tmp, instance 380 | 381 | 382 | def get_variable_sizes(stack): 383 | """ 384 | Called from process_function. This function Accepts a string 385 | of stack variables and returns a dict of var names and sizes. 386 | """ 387 | prev_offset = 0 388 | offset = 0 389 | counter = 0 390 | i=0 391 | var_dict = {} 392 | str_list = list(reversed(stack[1:-1].split(', '))) 393 | 394 | # Loop through each item on stack backwards 395 | for item in str_list: 396 | size=0 397 | tmp=0 398 | instance = False 399 | 400 | # Handle args and return addr 401 | if (('arg' in item) or ('return' in item)): 402 | size = 4 403 | 404 | elif('int32' in item): 405 | size = 4 406 | tmp, instance = get_offset_from_var(str_list[counter]) 407 | if tmp != -1: 408 | offset = tmp 409 | if not instance: 410 | offset = prev_offset+4 411 | 412 | elif ('int64' in item): 413 | size = 8 414 | tmp, instance = get_offset_from_var(str_list[counter]) 415 | if not instance: 416 | offset = prev_offset+8 417 | if tmp != -1: 418 | offset = tmp 419 | 420 | else: 421 | offset, instance = get_offset_from_var(str_list[counter]) 422 | if instance: 423 | offset = offset-4 424 | 425 | if size == 0: 426 | size = offset-prev_offset 427 | if (not instance): 428 | prev_offset = offset 429 | 430 | # Parse string 431 | i = item.rfind(' ')+1 432 | key = item[i:-1] 433 | 434 | var_dict.update({key:size}) 435 | counter = counter+1 436 | 437 | return var_dict 438 | 439 | 440 | def analyze(bv, func_list=[]): 441 | 442 | list_len = len(func_list) 443 | 444 | ## process functions 445 | for func in bv.functions: 446 | if list_len > 0 and func.name not in func_list: continue 447 | process_function(func) 448 | 449 | ## process basic blocks 450 | for block in func.medium_level_il.ssa_form: 451 | process_basic_block(func, block) 452 | 453 | ## process instructions 454 | for insn in block: 455 | process_instruction(func, block, insn) 456 | 457 | ## process basic block edges 458 | # all edges need to exist in Grakn before we can do this 459 | # because edges stemming from loops wont have an associated 460 | # basic block inserted to create a relationship for. 461 | process_edges(func) 462 | 463 | 464 | def main(target, func_list=[]): 465 | global PM 466 | 467 | PM = PaperMachete() 468 | 469 | if not isfile(target): 470 | print "The specified target '{}' is not a file. Try again.".format(target) 471 | return 472 | 473 | print "Invoking Binary Ninja and analyzing file: {}".format(target) 474 | bv = binja.BinaryViewType.get_view_of_file(target) 475 | bv.add_analysis_option('linearsweep') 476 | print "Performing linear sweep..." 477 | bv.update_analysis_and_wait() 478 | print "Linear sweep complete. Collecting BNIL data..." 479 | analyze(bv, func_list) 480 | 481 | # pretty printed json (pretty printed files are much larger than compact files!) 482 | target_json = json.dumps(PM, default=lambda o: o.__dict__, indent=4, sort_keys=True) 483 | 484 | # compact / minified json 485 | #target_json = json.dumps(PM, default=lambda o: o.__dict__) 486 | 487 | try: 488 | jf = None 489 | if __name__ == "__main__": 490 | jf = open("{}.json".format(basename(target)), "w") 491 | else: 492 | jf = open(join("analysis", "{}.json".format(basename(target))), "w") 493 | jf.write(target_json) 494 | jf.close() 495 | except IOError: 496 | print "ERROR: Unable to open/write to {}.json.".format(basename(target)) 497 | return 498 | 499 | if __name__ == "__main__": 500 | if len(sys.argv) > 1: 501 | target = sys.argv[1] 502 | func_list = sys.argv[2:] 503 | else: 504 | print "Usage: %s [function1 function2 ...]" % sys.argv[0] 505 | main(target, func_list) 506 | -------------------------------------------------------------------------------- /queries/cwe_120_v1.py: -------------------------------------------------------------------------------- 1 | #============================================================================================================ 2 | # CWE-120: Buffer Copy without Checking Size of Input 3 | # 4 | # Vuln Info: A trivial way to cause this vulnerability is using the gets() function which is not secure. 5 | # Ex: 6 | # bytes_received = gets(input); <--Bad 7 | # bytes_received = receive_until(input, sizeof(input), '\n'); <--Good 8 | # 9 | # Methodology: 10 | # 1. Find gets instruction 11 | # 2. There's a vulnerability 12 | # 13 | # Try it on: REMATCH_1--Hat_Trick--Morris_Worm 14 | # 15 | #============================================================================================================ 16 | 17 | import sys 18 | import grakn 19 | 20 | def main(keyspace): 21 | client = grakn.Grakn(uri='localhost:48555') 22 | with client.session(keyspace=keyspace).transaction(grakn.TxType.READ) as graph: 23 | # Check for gets() function 24 | # Get address of function to use for next query 25 | func_names = ['gets', 'cgc_gets'] 26 | func_addrs = [] 27 | for function_name in func_names: 28 | query1 = 'match $func isa function, has func-name "{}", has asm-address $a; get $a;'.format(function_name) 29 | func_addrs += [int(result.value(), 16) for result in graph.query(query1).collect_concepts()] 30 | 31 | # If the function is found continue query 32 | for func_addr in func_addrs: 33 | # Get all instructions that have function name 34 | query2 = 'match $x has operation-type "MLIL_CALL_SSA" has asm-address $a; $y isa"MLIL_CONST_PTR"; ($x,$y); $z isa constant, has constant-value {}; ($y,$z); get $a;'.format(func_addr) 35 | result2 = graph.query(query2).collect_concepts() 36 | 37 | # If there are instructions that use the function check the instructions 38 | for instr in result2: 39 | ins_addr = instr.value() 40 | print("CWE-120: Buffer Copy Without Checking Size of Input at {}".format(ins_addr)) 41 | 42 | if __name__ == "__main__": 43 | if len(sys.argv) > 1: 44 | keyspace = sys.argv[1] 45 | else: 46 | keyspace = "grakn" 47 | main(keyspace) 48 | -------------------------------------------------------------------------------- /queries/cwe_121_v1.py: -------------------------------------------------------------------------------- 1 | #============================================================================================================ 2 | # CWE-121: Stack-based Buffer Overflow 3 | # 4 | # Vuln Info: This vulnerability comes from allocating too much space for a string. 5 | # Ex: char string[64] 6 | # (cgc_receive_delim(0, string, 128, '\n') != 0) <--Bad 7 | # (cgc_receive_delim(0, string, sizeof(string), '\n') != 0) <--Good 8 | # 9 | # Methodo#logy: 10 | # 1. Find all instructions that call a specific function specified with function_name 11 | # 2. Check these instructions' parameters, string, and bytes allocated (sizeof(string)) 12 | # 3. Find where the string was initialized to get amount of bytes allocated 13 | # 4. If the amount of bytes allocated != size of string alert possible vulerability 14 | # 15 | # Try it on: Palindrome2, ShoutCTF 16 | # 17 | # Includes functions: 18 | # fgets(name, sizeof(name), stdin) 19 | # receive_delim(0, 0, string, sizeof(string), '\n') 20 | # strncpy(targetBuffer, srcBuffer, sizeof(targetBuffer)); 21 | # receive_until(buff, '\n', 25); 22 | # memcpy(str1, str2, n); 23 | # freaduntil(buf, sizeof(buf), '\n', stdin) 24 | # read(int fd, void *buf, size_t count); 25 | #============================================================================================================ 26 | 27 | import sys 28 | import grakn 29 | 30 | def main(keyspace): 31 | client = grakn.Grakn(uri='localhost:48555') 32 | with client.session(keyspace=keyspace).transaction(grakn.TxType.READ) as graph: 33 | 34 | # Functions with indexes for (dest, sizeof(dest)) stored in dict 35 | functions = {"receive_delim": (2,3), "fgets": (0,1), "strncpy": (0,2), "receive_until": (0,2), "memcpy": (0,2), "freaduntil": (1,2), "read":(1,2)} 36 | 37 | # Check for potential vuln in each function 38 | for function_name in functions: 39 | # Get address of function to use for next query 40 | query1 = 'match $func isa function, has func-name contains "{}", has asm-address $a; get $a;'.format(function_name) 41 | result1 = [result.map() for result in graph.query(query1)] 42 | 43 | # If the function is found continue query 44 | if result1: 45 | # Get all instructions that have function name 46 | func_addr = int(result1[0]['a'].value(), 16) 47 | query2 = 'match $x has operation-type "MLIL_CALL_SSA"; $y isa"MLIL_CONST_PTR"; ($x,$y); $z isa constant, has constant-value {}; ($y,$z); get $x;'.format(func_addr) 48 | result2 = [result.map() for result in graph.query(query2)] 49 | 50 | # If there are instructions that use the function check the instructions 51 | if result2: 52 | 53 | buff_index = functions[function_name][0] 54 | size_index = functions[function_name][1] 55 | for instr in result2: 56 | Id = instr['x'].id 57 | query3 = 'match $x id "' + Id + '"; $l isa list; ($x,$l); (from-node: $l, $q); $q has edge-label $e; (from-node: $q, $v); {$v has var $s;} or {$v has constant-value $s;}; get $e, $s;' 58 | result3 = [result.map() for result in graph.query(query3)] 59 | 60 | # This section grabs instrution params and insert into an array 61 | param_array = [0, 0, 0, 0, 0, 0, 0, 0] 62 | 63 | for ele in result3: 64 | index = int(ele['e'].value()) 65 | val = ele['s'].value() 66 | param_array[index] = val 67 | # Get var name - This is done to determine how many bytes the variable is 68 | var_name = param_array[buff_index] 69 | var_name = var_name.split('#',1)[0].lstrip() 70 | 71 | # NOTE Enhancement Make finding buff_size the same as string_size 72 | # This assumes that buffer_size is a number, breaks when its a var or register 73 | # Get buffer size 74 | try: 75 | buff_size = int(param_array[size_index]) 76 | except ValueError as err: 77 | continue 78 | # Get size of string in by finding initialization Ex. var_88 = &var_58 79 | # Find where string is initialzed 80 | query4 = 'match $x id "{}"; $y isa basic-block; ($x,$y); $z isa instruction, has operation-type "MLIL_SET_VAR_SSA"; ($y,$z); {{$v1 isa variable, has var "{}";}} or {{$v1 isa variable-ssa, has var "{}";}}; ($z, $v1); $w isa MLIL_ADDRESS_OF; ($w, $z); $v isa variable, has var-size $s; ($w, $v); get $s, $x;'.format(Id, var_name, var_name) 81 | result4 = [result.map() for result in graph.query(query4)] 82 | 83 | if (result4): 84 | string_size = result4[0]['s'].value() 85 | # Finally Determine if buffer size == sizeof(str) 86 | if string_size != buff_size: 87 | instruction_ID = result4[0]['x'].id 88 | query5 = 'match $i id {}, has asm-address $a; get $a;'.format(instruction_ID) 89 | result5 = [result.map() for result in graph.query(query5)] 90 | instr_addr = result5[0]['a'].value() 91 | 92 | print("CWE-121: Stack-based Overflow possible at {}".format(instr_addr)) 93 | 94 | if __name__ == "__main__": 95 | if len(sys.argv) > 1: 96 | keyspace = sys.argv[1] 97 | else: 98 | keyspace = "grakn" 99 | main(keyspace) 100 | -------------------------------------------------------------------------------- /queries/cwe_129_v1.py: -------------------------------------------------------------------------------- 1 | #============================================================================================================ 2 | # CWE-129:Imporper validation of array index 3 | # 4 | # Vuln Info: This vulnerability comes from using untrusted (unchecked) input when using an array index. 5 | # 6 | # Methodology: Find all signed comparisons of a varaible and constant and follow the variable to see if its 7 | # other bound is checked. 8 | # 9 | # TODO: Currently the script searches out all comparisons to see if the other bound is checked by looking 10 | # for the same variable in other comparisons. The search can be improved by instead searching for where 11 | # the user can modify an array index then checking for bounds on that. 12 | # 13 | # Limitations: This implementation only find instances where one bound was checked, but not the other. 14 | # Also this implementation does not specifically search for array indexs, but comparisons in general. 15 | # 16 | # try it on: recipe_and_pantry_manager 17 | #============================================================================================================ 18 | 19 | import sys 20 | import grakn 21 | 22 | #Exits the script 23 | def fail(): 24 | sys.exit() 25 | 26 | #Finds comparisons that are acting as a lower boudns check 27 | def lowerCheck(): 28 | query = 'match {$comp isa MLIL_CMP_SGE;} or {$comp isa MLIL_CMP_SGT;};$node isa MLIL_VAR_SSA;$cons isa MLIL_CONST;($comp, $node);($comp, $cons);$varssa isa variable-ssa has var $var;($node, $varssa);get $comp, $var;' 29 | return [result.map() for result in graph.query(query)] 30 | 31 | #Finds comparisons that are acting as an upper bounds check 32 | def upperCheck(): 33 | query = 'match {$comp isa MLIL_CMP_SLE;} or {$comp isa MLIL_CMP_SLT;};$node isa MLIL_VAR_SSA;$cons isa MLIL_CONST;($comp, $node);($comp, $cons);$varssa isa variable-ssa has var $var;($node, $varssa);get $comp, $var;' 34 | return [result.map() for result in graph.query(query)] 35 | 36 | #Returns the addresss of a comparison instruction 37 | def get_addr(comp): 38 | query = 'match $comp id "' + comp + '";$inst isa instruction, has asm-address $addr;($comp, $inst);get $addr;' 39 | return [result.map() for result in graph.query(query)] 40 | 41 | def main(keyspace): 42 | client = grakn.Grakn(uri='localhost:48555') 43 | global graph 44 | with client.session(keyspace=keyspace).transaction(grakn.TxType.READ) as graph: 45 | 46 | #Find a variable being compared 47 | query1 = 'match {$comp isa MLIL_CMP_SGE;} or {$comp isa MLIL_CMP_SLE;} or {$comp isa MLIL_CMP_SLT;} or {$comp isa MLIL_CMP_SGT;};$node isa MLIL_VAR_SSA;$cons isa MLIL_CONST;($comp, $node);($comp, $cons);$varssa isa variable-ssa has var $var;($node, $varssa);get $comp, $var;' 48 | result1 = [result.map() for result in graph.query(query1)] 49 | 50 | #Parse the output of result1 into the compare statements and varaible names 51 | comp, var = [], [] 52 | if result1: 53 | for entry in result1: 54 | comp.append(entry['comp'].id) 55 | var.append(entry['var'].value()) 56 | else: 57 | fail() 58 | for entry in comp: 59 | #Do upper bound check 60 | if ('SGE' or 'SGT') in entry: 61 | lower = lowerCheck() 62 | if lower: 63 | for item in lower: 64 | if item['var'].value() not in var: 65 | #failed to find upper bound check 66 | addr = get_addr(entry) 67 | print('CWE-129: Missing upper bound check at ' + str(addr[0]['addr'].value())) 68 | else: 69 | adddr = get_addr(entry) 70 | else: 71 | addr = get_addr(entry) 72 | print('CWE-129: Missing upper bound check at ' + str(addr[0]['addr'].value())) 73 | #Do lower bound check 74 | else: 75 | upper = upperCheck() 76 | if upper: 77 | for item in upper: 78 | if item['var'].value() not in var: 79 | #failed to find lower bound check 80 | addr = get_addr(entry) 81 | print('CWE-129: Missing lower bound check at ' + str(addr[0]['addr'].value())) 82 | else: 83 | addr = get_addr(entry) 84 | else: 85 | addr = get_addr(entry) 86 | print('CWE-129: Missing lower bound check at ' + str(addr[0]['addr'].value())) 87 | 88 | if __name__ == "__main__": 89 | if len(sys.argv) > 1: 90 | keyspace = sys.argv[1] 91 | else: 92 | keyspace = "grakn" 93 | main(keyspace) 94 | -------------------------------------------------------------------------------- /queries/cwe_134_v1.py: -------------------------------------------------------------------------------- 1 | #============================================================================================================ 2 | # CWE-134 Uncontrolled Format String 3 | # 4 | # Vuln Info: This vulnerability comes from using printf without a modifier 5 | # Ex: cgc_printf(message); <--Bad 6 | # cgc_printf("%s", message); <--Good 7 | # 8 | # Methodology: 9 | # 1. Check if file has a printf function 10 | # 2. Check if any instructions use printf 11 | # 3. Check if params in printf are data type(correct) or var_type(incorrect, no modifier i.e. %s used) 12 | # 13 | # Try it on: Barcoder, Checkmate, Kaprica_Go 14 | #============================================================================================================ 15 | 16 | import sys 17 | import grakn 18 | 19 | def main(keyspace): 20 | client = grakn.Grakn(uri='localhost:48555') 21 | with client.session(keyspace=keyspace).transaction(grakn.TxType.READ) as graph: 22 | 23 | # Get address of printf to use for next query 24 | query1 ='match $func isa function, has func-name contains "printf", has asm-address $a; offset 0; limit 100; get $a;' 25 | result1 = [result.map() for result in graph.query(query1)] 26 | if len(result1) > 0: 27 | print("Found potential calls at the following addresses:") 28 | for addr in result1: 29 | print(addr['a'].value()) 30 | 31 | # If printf is found continue query 32 | for printf_func in result1: 33 | # Pull any instructions that use printf and don't use a modifier (have var type and not data type) 34 | func_addr = int(printf_func['a'].value(), 16) 35 | print("Scanning address {}".format(hex(func_addr))) 36 | query2 = 'match $x isa instruction, has operation-type "MLIL_CALL_SSA", has asm-address $a; $y isa "MLIL_CONST_PTR"; ($x,$y); $z isa constant, has constant-value {}; ($y,$z); $l isa list, has list-size 1; ($x,$l); $s isa "MLIL_VAR_SSA"; ($l,$s); offset 0; limit 500; get $x, $a;'.format(func_addr) 37 | result2 = [result.map() for result in graph.query(query2)] 38 | 39 | # If there is an instruction that uses printf without modifier, output instruction 40 | if result2: 41 | for instr in result2: 42 | asm_addr = instr['a'].value() 43 | print("CWE-134: Uncontrolled Format String possible at {} ".format(asm_addr)) 44 | 45 | if __name__ == "__main__": 46 | if len(sys.argv) > 1: 47 | keyspace = sys.argv[1] 48 | main(keyspace) 49 | else: 50 | print("Please specify a keyspace to search.\nUsage: python3.6 {} ".format(sys.argv[0])) 51 | 52 | -------------------------------------------------------------------------------- /queries/cwe_788_v1.py: -------------------------------------------------------------------------------- 1 | #======================================================================================= 2 | # CWE-788: Access of Memory Location After End of Buffer 3 | # 4 | # Vuln Info: The software reads or writes to a buffer using an index or pointer that 5 | # references a memory location after the end of the buffer. 6 | # 7 | # Methodology: 8 | # 1.Find any arrays 9 | # 2.Find indexing variables for said arrays 10 | # 3.Look to see if those variables are used in a comparison (bounds check) 11 | #======================================================================================= 12 | 13 | import sys 14 | import grakn 15 | 16 | #Exits script 17 | def fail(): 18 | return 0 19 | sys.exit() 20 | 21 | #Searches for potential array declarations 22 | def query1(): 23 | query = 'match $set isa instruction, has operation-type "MLIL_SET_VAR_SSA";$ptr isa MLIL_CONST_PTR;($set, $ptr);$reg isa variable-ssa, has var $index;($set, $reg); get $index;' 24 | return [result.map() for result in graph.query(query)] 25 | 26 | #Finds potential loops 27 | def query2(): 28 | query = 'match $block isa basic-block;($block, $inst);$inst isa instruction;$reg isa variable-ssa, has var $index, has edge-label "dest";($inst, $reg);get $index, $block;' 29 | return [result.map() for result in graph.query(query)] 30 | 31 | #Checks query2 for if statements 32 | def query3(item): 33 | query = 'match $block isa basic-block, id "' + item + '";($block, $inst);$inst isa instruction, has operation-type "MLIL_IF";offset 0; get $inst;' 34 | return [result.map() for result in graph.query(query)] 35 | 36 | #Finds and returns various information about the loops, including the counting variable 37 | def query4(entry): 38 | query = 'match $block isa basic-block, id "' + entry + '";($block, contains-instruction:$inst);$inst isa instruction, has operation-type "MLIL_SET_VAR_SSA";($inst, to-node:$add);$add isa MLIL_ADD;$var isa MLIL_VAR_SSA;($add, $var);$const isa MLIL_CONST;($add, $const);$one isa constant has constant-value 1;($const, $one);$reg isa variable-ssa, has var $index, has version $version, has edge-label "dest";($inst, $reg);get $index, $reg, $version;' 39 | return [result.map() for result in graph.query(query)] 40 | 41 | #Checks if the bounds on the counting varaible (array index) are ever checked 42 | def query5(): 43 | query = 'match $block isa basic-block;$inst isa instruction, has operation-type "MLIL_IF";($block, $inst);{$comp isa MLIL_CMP_SGE;} or {$comp isa MLIL_CMP_SLE;} or {$comp isa MLIL_CMP_SLT;} or {$comp isa MLIL_CMP_SGT;} or {$comp isa MLIL_CMP_UGE;} or {$comp isa MLIL_CMP_ULE;} or {$comp isa MLIL_CMP_ULT;} or {$comp isa MLIL_CMP_UGT;};($inst, $comp);$reg isa MLIL_VAR_SSA;($comp, $reg);$index isa variable-ssa, has var $var, has version $version;($reg, $index);get $var, $version;' 44 | return [result.map() for result in graph.query(query)] 45 | 46 | #Returns asm-address of vulnerability 47 | def query6(reg_type, reg): 48 | query = 'match $inst isa instruction, has asm-address $adr;$var isa '+ reg_type + ', id "' + reg + '";($inst, $var);get $adr;' 49 | return [result.map() for result in graph.query(query)] 50 | 51 | def main(keyspace): 52 | client = grakn.Grakn(uri='localhost:48555') 53 | global graph 54 | with client.session(keyspace=keyspace).transaction(grakn.TxType.READ) as graph: 55 | 56 | # Find possible arrays 57 | array = [] 58 | q1 = query1() 59 | if q1: 60 | i = 0 61 | for item in q1: 62 | array.append(q1[i]['index'].id) 63 | i += 1 64 | else: 65 | fail() 66 | 67 | # Find loops involving the array 68 | block = [] 69 | q2 = query2() 70 | if q2: 71 | i = 0 72 | for item in q2: 73 | if q2[i]['index'].id in array: 74 | block.append(q2[i]['block'].id) 75 | i += 1 76 | else: 77 | fail() 78 | 79 | # Do the 'loop' blocks contain if statements? 80 | if_id = [] 81 | block2 = block.copy() 82 | for item in block2: 83 | q3 = query3(item) 84 | if not q3: 85 | block.remove(item) 86 | 87 | # Find the loop counters 88 | var, version, var_id, reg, reg_type, block2 = [], [], [], [], [], block.copy() 89 | for entry in block2: 90 | q4 = query4(entry) 91 | if q4: 92 | i = 0 93 | for item in q4: 94 | reg.append(item['reg'].id) 95 | reg_type.append(item['reg'].type().label()) 96 | var.append(item['index'].value()) 97 | version.append(item['version'].value()) 98 | var_id.append(item['index'].id) 99 | i += 1 100 | else: 101 | block.remove(entry) 102 | i = len(var) - 1 103 | 104 | # Find is the bounds of the loop counter are checked 105 | var2 = [] 106 | q5 = query5() 107 | i = 0 108 | for entry in q5: 109 | var2.append(q5[i]['var'].value()) 110 | i += 1 111 | 112 | # Any variables in var[] but not var2[] are potential vulnerabilities 113 | i = 0 114 | for entry in var: 115 | if entry not in var2: 116 | q6 = query6(reg_type[i], reg[i]) 117 | print('CWE-788: Array index missing bounds check at ' + q6[0]['adr'].value() + ' associated with '+ var[i] + '#' + str(version[i]) + ' id = ' + var_id[i] + ' sub of ' + reg_type[i] + ' id = ' + reg[i]) 118 | i += 1 119 | 120 | if __name__ == "__main__": 121 | if len(sys.argv) > 1: 122 | keyspace = sys.argv[1] 123 | else: 124 | keyspace = "grakn" 125 | main(keyspace) 126 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | grakn 2 | -------------------------------------------------------------------------------- /templates/binja_mlil_ssa.gql: -------------------------------------------------------------------------------- 1 | define 2 | 3 | ## ENTITIES ##################################### 4 | function sub entity 5 | plays in-function 6 | has func-name 7 | has asm-address 8 | has stack; 9 | 10 | basic-block sub entity 11 | plays from-basic-block 12 | plays to-basic-block 13 | plays in-basic-block 14 | plays contains-basic-block 15 | has bb-name 16 | has bb-start 17 | has bb-end; 18 | 19 | instruction sub entity 20 | plays from-node 21 | plays to-node 22 | plays in-instruction 23 | plays contains-instruction 24 | has name 25 | has il-index 26 | has asm-address 27 | has ins-text 28 | has operation-type 29 | has in-bb; 30 | 31 | operation sub entity 32 | plays from-node 33 | plays to-node 34 | plays in-operation 35 | plays contains-operation 36 | has name 37 | has parent-hash 38 | has edge-label; 39 | 40 | constant sub entity 41 | plays from-node 42 | plays to-node 43 | has name 44 | has parent-hash 45 | has constant-value 46 | has edge-label; 47 | 48 | variable sub entity 49 | plays from-node 50 | plays to-node 51 | has name 52 | has parent-hash 53 | has var 54 | has edge-label 55 | has var-type 56 | has var-size 57 | has var-func; 58 | 59 | variable-ssa sub entity 60 | plays from-node 61 | plays to-node 62 | plays trace 63 | has name 64 | has parent-hash 65 | has var 66 | has version 67 | has edge-label 68 | has var-type 69 | has var-size 70 | has var-func; 71 | 72 | list sub entity 73 | plays from-node 74 | plays to-node 75 | has name 76 | has parent-hash 77 | has list-size 78 | has edge-label; 79 | 80 | 81 | ## SUB ENTITIES ################################# 82 | #### OPERATIONS ################################# 83 | MLIL_NOP sub operation; 84 | MLIL_SET_VAR sub operation; 85 | MLIL_SET_VAR_FIELD sub operation; 86 | MLIL_SET_VAR_SPLIT sub operation; 87 | MLIL_LOAD sub operation; 88 | MLIL_STORE sub operation; 89 | MLIL_VAR sub operation; 90 | MLIL_VAR_FIELD sub operation; 91 | MLIL_ADDRESS_OF sub operation; 92 | MLIL_ADDRESS_OF_FIELD sub operation; 93 | MLIL_CONST sub operation; 94 | MLIL_CONST_PTR sub operation; 95 | MLIL_ADD sub operation; 96 | MLIL_ADC sub operation; 97 | MLIL_SUB sub operation; 98 | MLIL_SBB sub operation; 99 | MLIL_AND sub operation; 100 | MLIL_OR sub operation; 101 | MLIL_XOR sub operation; 102 | MLIL_LSL sub operation; 103 | MLIL_LSR sub operation; 104 | MLIL_ASR sub operation; 105 | MLIL_ROL sub operation; 106 | MLIL_RLC sub operation; 107 | MLIL_ROR sub operation; 108 | MLIL_RRC sub operation; 109 | MLIL_MUL sub operation; 110 | MLIL_MULU_DP sub operation; 111 | MLIL_MULS_DP sub operation; 112 | MLIL_DIVU sub operation; 113 | MLIL_DIVU_DP sub operation; 114 | MLIL_DIVS sub operation; 115 | MLIL_DIVS_DP sub operation; 116 | MLIL_MODU sub operation; 117 | MLIL_MODU_DP sub operation; 118 | MLIL_MODS sub operation; 119 | MLIL_MODS_DP sub operation; 120 | MLIL_NEG sub operation; 121 | MLIL_NOT sub operation; 122 | MLIL_SX sub operation; 123 | MLIL_ZX sub operation; 124 | MLIL_LOW_PART sub operation; 125 | MLIL_JUMP sub operation; 126 | MLIL_JUMP_TO sub operation; 127 | MLIL_CALL sub operation; 128 | MLIL_CALL_UNTYPED sub operation; 129 | MLIL_CALL_OUTPUT sub operation; 130 | MLIL_CALL_PARAM sub operation; 131 | MLIL_RET sub operation; 132 | MLIL_NORET sub operation; 133 | MLIL_IF sub operation; 134 | MLIL_GOTO sub operation; 135 | MLIL_CMP_E sub operation; 136 | MLIL_CMP_NE sub operation; 137 | MLIL_CMP_SLT sub operation; 138 | MLIL_CMP_ULT sub operation; 139 | MLIL_CMP_SLE sub operation; 140 | MLIL_CMP_ULE sub operation; 141 | MLIL_CMP_SGE sub operation; 142 | MLIL_CMP_UGE sub operation; 143 | MLIL_CMP_SGT sub operation; 144 | MLIL_CMP_UGT sub operation; 145 | MLIL_TEST_BIT sub operation; 146 | MLIL_BOOL_TO_INT sub operation; 147 | MLIL_ADD_OVERFLOW sub operation; 148 | MLIL_SYSCALL sub operation; 149 | MLIL_SYSCALL_UNTYPED sub operation; 150 | MLIL_BP sub operation; 151 | MLIL_TRAP sub operation; 152 | MLIL_UNDEF sub operation; 153 | MLIL_UNIMPL sub operation; 154 | MLIL_UNIMPL_MEM sub operation; 155 | MLIL_IMPORT sub operation; 156 | MLIL_SET_VAR_SSA sub operation; 157 | MLIL_SET_VAR_SSA_FIELD sub operation; 158 | MLIL_SET_VAR_SPLIT_SSA sub operation; 159 | MLIL_SET_VAR_ALIASED sub operation; 160 | MLIL_SET_VAR_ALIASED_FIELD sub operation; 161 | MLIL_VAR_SSA sub operation; 162 | MLIL_VAR_SSA_FIELD sub operation; 163 | MLIL_VAR_ALIASED sub operation; 164 | MLIL_VAR_ALIASED_FIELD sub operation; 165 | MLIL_CALL_SSA sub operation; 166 | MLIL_CALL_UNTYPED_SSA sub operation; 167 | MLIL_SYSCALL_SSA sub operation; 168 | MLIL_SYSCALL_UNTYPED_SSA sub operation; 169 | MLIL_CALL_OUTPUT_SSA sub operation; 170 | MLIL_CALL_PARAM_SSA sub operation; 171 | MLIL_LOAD_SSA sub operation; 172 | MLIL_STORE_SSA sub operation; 173 | MLIL_VAR_PHI sub operation; 174 | MLIL_MEM_PHI sub operation; 175 | 176 | 177 | ## Attribute (has) ############################## 178 | stack sub attribute datatype string; 179 | operation-type sub attribute datatype string; 180 | ins-text sub attribute datatype string; 181 | func-name sub attribute datatype string; 182 | bb-name sub attribute datatype string; 183 | name sub attribute datatype string; 184 | in-bb sub attribute datatype string; 185 | asm-address sub attribute datatype string; 186 | edge-label sub attribute datatype string; 187 | constant-value sub attribute datatype string; 188 | parent-hash sub attribute datatype string; 189 | var sub attribute datatype string; 190 | var-type sub attribute datatype string; 191 | var-func sub attribute datatype string; 192 | var-size sub attribute datatype long; 193 | bb-start sub attribute datatype long; 194 | bb-end sub attribute datatype long; 195 | il-index sub attribute datatype long; 196 | list-size sub attribute datatype long; 197 | int sub attribute datatype long; 198 | version sub attribute datatype long; 199 | size sub attribute datatype long; 200 | if-true sub attribute datatype long; 201 | if-false sub attribute datatype long; 202 | 203 | 204 | ## ROLES (plays) ################################ 205 | in-function sub role; 206 | from-basic-block sub role; 207 | to-basic-block sub role; 208 | in-basic-block sub role; 209 | contains-basic-block sub role; 210 | in-instruction sub role; 211 | contains-instruction sub role; 212 | in-operation sub role; 213 | contains-operation sub role; 214 | from-node sub role; 215 | to-node sub role; 216 | trace sub role; 217 | 218 | ## RELATIONSHIP #################################### 219 | has-basic-block sub relationship 220 | relates in-function 221 | relates contains-basic-block; 222 | 223 | basic-block-edge sub relationship 224 | relates from-basic-block 225 | relates to-basic-block; 226 | 227 | has-instruction sub relationship 228 | relates contains-instruction 229 | relates in-basic-block; 230 | 231 | instruction-has-operation sub relationship 232 | relates contains-operation 233 | relates in-instruction; 234 | 235 | operation-has-operation sub relationship 236 | relates contains-operation 237 | relates in-operation; 238 | 239 | node-link sub relationship 240 | relates from-node 241 | relates to-node; 242 | 243 | trace-link sub relationship 244 | relates trace; 245 | 246 | trace-instruction sub relationship 247 | relates trace; 248 | 249 | trace-full sub relationship 250 | relates trace; 251 | 252 | ## INFERRENCE RULES ############################# 253 | share-var sub rule 254 | when { 255 | $v1 isa variable-ssa, has var $var; 256 | $v2 isa variable-ssa, has var $var; 257 | $v1 != $v2; 258 | }, 259 | then { 260 | (trace:$v1, trace:$v2) isa trace-link; 261 | }; 262 | 263 | share-instruction sub rule 264 | when { 265 | $v1 isa variable-ssa; 266 | $v2 isa MLIL_VAR_SSA; 267 | (to-node:$v1, from-node:$v2); 268 | $v4 isa variable-ssa; 269 | $v3 isa MLIL_VAR_SSA; 270 | (to-node:$v4, from-node:$v3); 271 | (from-node:$inst, to-node:$v2); 272 | (from-node:$inst, to-node:$v3); 273 | $inst isa instruction; 274 | $v1 != $v2;$v2 != $v3;$v3 != $v4;$v1 != $v4;$v2 != $v4;$v1 != $v3; 275 | }, 276 | then { 277 | (trace:$v1, trace:$v4) isa trace-instruction; 278 | }; 279 | 280 | trace-goal sub rule 281 | when { 282 | (trace:$v1,trace:$v2) isa trace-link; 283 | (trace:$v2,trace:$v3) isa trace-instruction; 284 | $v1 != $v2;$v2 != $v3;$v1 != $v3; 285 | }, 286 | then { 287 | (trace:$v1, trace:$v3) isa trace-full; 288 | }; 289 | 290 | -------------------------------------------------------------------------------- /templates/binja_mlil_ssa_1.tpl: -------------------------------------------------------------------------------- 1 | ## Grakn JSON migration template for binja_mlil_ssa.gql : inserts functions 2 | 3 | ## Loop over all functions in the binary 4 | for() do { 5 | insert 6 | $f isa function 7 | has func-name 8 | has asm-address ; 9 | } 10 | -------------------------------------------------------------------------------- /templates/binja_mlil_ssa_2.tpl: -------------------------------------------------------------------------------- 1 | ## Grakn JSON migration template for binja_mlil_ssa.gql : inserts basic-blocks 2 | 3 | ## Loop over all functions in the binary 4 | for() do { 5 | match 6 | $f isa function 7 | has func-name 8 | has asm-address ; 9 | 10 | ## Loop over all basic-blocks in this function and link basic-blocks to the function they are in 11 | insert 12 | for() do { 13 | $ isa basic-block 14 | has bb-name 15 | has bb-start 16 | has bb-end ; 17 | (contains-basic-block: $, in-function: $f) isa has-basic-block; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /templates/binja_mlil_ssa_3.tpl: -------------------------------------------------------------------------------- 1 | ## Grakn JSON migration template for binja_mlil_ssa.gql : links basic-blocks 2 | 3 | ## Loop over all functions in the binary 4 | for() do { 5 | 6 | ## Now loop over bb-edges and link the source and target basic-blocks in this function 7 | for() do { 8 | match 9 | $ isa basic-block 10 | has bb-name ; 11 | $ isa basic-block 12 | has bb-name ; 13 | 14 | insert 15 | (from-basic-block: $, to-basic-block: $) isa basic-block-edge; 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /templates/binja_mlil_ssa_4.tpl: -------------------------------------------------------------------------------- 1 | ## Grakn JSON migration template for binja_mlil_ssa.gql : inserts instructions 2 | 3 | ## Loop over all functions in the binary 4 | for() do { 5 | 6 | ## Loop over all basic-blocks in this function and link basic-blocks to the function they are in 7 | for() do { 8 | 9 | ## Loop over all instructions in this basic-block, add them, and link them to their basic-block 10 | for() do { 11 | insert 12 | $ins isa instruction 13 | has name 14 | has il-index 15 | has asm-address 16 | has operation-type ; 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /templates/binja_mlil_ssa_5.tpl: -------------------------------------------------------------------------------- 1 | ## Grakn JSON migration template for binja_mlil_ssa.gql : link instructions to their basic-blocks 2 | 3 | ## Loop over all functions in the binary 4 | for() do { 5 | 6 | ## Loop over all basic-blocks in this function and link basic-blocks to the function they are in 7 | for() do { 8 | 9 | ## Loop over all instructions in this basic-block, add them, and link them to their basic-block 10 | ## in_bb is a resource of 'instruction' that helps locate a basic-block by it's hash name 11 | for() do { 12 | match 13 | 14 | $bb isa basic-block 15 | has bb-name ; 16 | 17 | $ins isa instruction 18 | has name ; 19 | 20 | insert 21 | (contains-instruction: $ins, in-basic-block: $bb) isa has-instruction; 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /templates/binja_mlil_ssa_6.tpl: -------------------------------------------------------------------------------- 1 | ## Grakn JSON migration template for binja_mlil_ssa.gql : inserts instruction nodes (AST nodes) 2 | 3 | ## Loop over all functions in the binary 4 | for() do { 5 | 6 | ## Loop over all basic-blocks in this function and link basic-blocks to the function they are in 7 | for() do { 8 | 9 | ## Loop over all instructions in this basic-block, add them, and link them to their basic-block 10 | for() do { 11 | 12 | ## Loop over all nodes in this instruction and add them 13 | for() do { 14 | insert 15 | ## list nodes 16 | if (@equals(, "list")) do { 17 | $ isa 18 | has name 19 | has parent-hash 20 | has edge-label 21 | has list-size ; 22 | } 23 | 24 | ## constant nodes 25 | elseif (@equals(, "constant")) do { 26 | $ isa 27 | has name 28 | has parent-hash 29 | has edge-label 30 | has constant-value ; 31 | } 32 | 33 | ## variable-ssa nodes 34 | elseif (@equals(, "variable-ssa")) do { 35 | $ isa 36 | has name 37 | has parent-hash 38 | has edge-label 39 | has var 40 | has version 41 | has var-type 42 | has var-size 43 | has var-func ; 44 | } 45 | 46 | ## variable nodes 47 | elseif (@equals(, "variable")) do { 48 | $ isa 49 | has name 50 | has parent-hash 51 | has edge-label 52 | has var 53 | has var-type 54 | has var-size 55 | has var-func ; 56 | } 57 | 58 | ## all other nodes (operations) 59 | else { 60 | $ isa 61 | has name 62 | has parent-hash 63 | has edge-label ; 64 | } 65 | } 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /templates/binja_mlil_ssa_7.tpl: -------------------------------------------------------------------------------- 1 | ## Grakn JSON migration template for binja_mlil_ssa.gql : links instruction nodes (AST nodes) 2 | 3 | ## Loop over all functions in the binary 4 | for() do { 5 | 6 | ## Loop over all basic-blocks in this function and link basic-blocks to the function they are in 7 | for() do { 8 | 9 | ## Loop over all instructions in this basic-block, add them, and link them to their basic-block 10 | for() do { 11 | 12 | ## Loop over all nodes in this instruction and add them 13 | for() do { 14 | match 15 | $ isa entity 16 | has name ; 17 | $ isa entity 18 | has name ; 19 | 20 | insert 21 | (from-node: $, to-node: $) isa node-link; 22 | } 23 | } 24 | } 25 | } 26 | --------------------------------------------------------------------------------