├── .gitattributes
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── analysis
    └── about_this_folder
├── binaryninja
    ├── README.md
    ├── update_to_latest.py
    └── version_switcher.py
├── img
    ├── grakn-start.png
    ├── grakn_crash.png
    └── grakn_crash_2.png
├── paper_machete.py
├── pmanalyze.py
├── queries
    ├── cwe_120_v1.py
    ├── cwe_121_v1.py
    ├── cwe_129_v1.py
    ├── cwe_134_v1.py
    └── cwe_788_v1.py
├── requirements.txt
└── templates
    ├── binja_mlil_ssa.gql
    ├── binja_mlil_ssa_1.tpl
    ├── binja_mlil_ssa_2.tpl
    ├── binja_mlil_ssa_3.tpl
    ├── binja_mlil_ssa_4.tpl
    ├── binja_mlil_ssa_5.tpl
    ├── binja_mlil_ssa_6.tpl
    └── binja_mlil_ssa_7.tpl


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | .DS_Store
3 | binaryninja/*
4 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | 
 3 | ENV  JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64
 4 | RUN  apt update && DEBIAN_FRONTEND=noninteractive apt upgrade -y
 5 | RUN  DEBIAN_FRONTEND=noninteractive apt install -y --fix-missing \
 6 |         curl \
 7 |         openjdk-8-jre-headless \
 8 |         python-pip \
 9 |         python3-pip \
10 |         unzip
11 | 
12 | # Binary Ninja
13 | COPY binaryninja/BinaryNinja.zip /tmp/BinaryNinja.zip
14 | COPY binaryninja/update_to_latest.py /tmp/update_to_latest.py
15 | COPY binaryninja/version_switcher.py /tmp/version_switcher.py
16 | RUN  unzip /tmp/BinaryNinja.zip -d /opt/ && rm /tmp/BinaryNinja.zip && \
17 |      mkdir -p /root/.local/lib/python2.7/site-packages/ && \
18 |      echo "/opt/binaryninja/python" > /root/.local/lib/python2.7/site-packages/binaryninja.pth && \
19 |      mkdir -p /root/.binaryninja/
20 | COPY binaryninja/license.txt /root/.binaryninja/license.dat
21 | RUN  pip install pexpect && python /tmp/update_to_latest.py && rm /tmp/version_switcher.py && rm /tmp/update_to_latest.py
22 | 
23 | # Grakn
24 | COPY requirements.txt /tmp/requirements.txt
25 | RUN  BROWSER_DOWNLOAD_URL=$(curl --silent https://api.github.com/repos/graknlabs/grakn/releases/latest | python -c "import sys; from json import loads as l; x = l(sys.stdin.read()); print(''.join(s['browser_download_url'] for s in x['assets']))"); \
26 |      curl -fL $BROWSER_DOWNLOAD_URL -o /tmp/grakn.zip && \
27 |      unzip /tmp/grakn.zip -d /opt/ && rm /tmp/grakn.zip && \
28 |      ln -s /opt/grakn*/grakn /usr/local/bin/ && ln -s /opt/grakn*/graql /usr/local/bin/ && \
29 |      pip3 install -r /tmp/requirements.txt && rm /tmp/requirements.txt
30 | 
31 | # Useful stuff
32 | RUN  DEBIAN_FRONTEND=noninteractive apt install -y --fix-missing \
33 |         tmux \
34 |         vim
35 | #ENTRYPOINT ["/bin/bash"]
36 | 
37 | ENTRYPOINT ["sh", "-c",  "grakn server start && cd /opt/papermachete && python2.7 paper_machete.py"]
38 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Battelle Memorial Institute
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 |      ____                        __  __            _          _           
 2 |     |  _ \ __ _ _ __  ___ _ __  |  \/  | __ _  ___| |__   ___| |_ ___     ________
 3 |     | |_) / _` | '_ \/ _ \ '__| | |\/| |/ _` |/ __| '_ \ / _ \ __/ _ \   /_______/
 4 |     |  __/ (_| | |_)|  __/ |    | |  | | (_| | (__| | | |  __/ ||  __/   \_______\
 5 |     |_|   \__,_| .__/\___|_|    |_|  |_|\__,_|\___|_| |_|\___|\__\___|   /_______/
 6 |                |_|                                                      @==|;;;;;;>
 7 | 
 8 | ## About
 9 | Paper Machete (PM) orchestrates [Binary Ninja](https://binary.ninja) and [Grakn.ai](https://grakn.ai) to aid static binary analysis for the purpose of finding bugs in software. PM leverages the Binary Ninja MLIL SSA to extract semantic meaning about individual instructions, operations, register/variable state, and overall control flow.
10 | 
11 | PM migrates this data into Grakn - a knowledge graph that gives us the ability to define domain-specific ontologies for data and write powerful inference rules to form relationships between data we don't want to (or can't) explicitly store. [Heeh, how neat is that](https://www.youtube.com/watch?v=Hm3JodBR-vs)?
12 | 
13 | This project was released in conjunction with a DerbyCon 2017 talk titled "Aiding Static Analysis: Discovering Vulnerabilities in Binary Targets through Knowledge Graph Inferences." You can watch that talk [here](http://www.irongeek.com/i.php?page=videos/derbycon7/t116-aiding-static-analysis-discovering-vulnerabilities-in-binary-targets-through-knowledge-graph-inferences-john-toterhi). 
14 | 
15 | Paper Machete's initial prototype and public codebase were developed by security researchers at the [Battelle Memorial Institute](https://www.battelle.org/government-offerings/national-security/cyber/mission-focused-tools). As this project matures, we hope that you will find it useful in your own research and consider contributing to the project.
16 | 
17 | ## Why BNIL?
18 | The BNIL suite of ILs is easy to work with, pleasantly verbose, and human-readable. At any point we can decide to leverage other levels and forms of the IL with little development effort on our part. When you add to that the ability to [lift multiple architectures](https://binary.ninja/faq/) and [write custom lifters](https://github.com/joshwatson/binaryninja-msp430), we have little reason not to use BNIL.
19 | 
20 | ## Why Grakn?
21 | Grakn's query language (Graql) is easy to learn and intuitive, which is extremely important in the early stages of this research while we're still hand-writing queries to model the patterns vulnerability researchers look for when performing static analysis. 
22 | 
23 | The ability to write our own domain-specific ontologies lets us quickly experiment with new query ideas and ways of making our queries less complex. When we run into a case where we think "gee, if I just had access to the relationship between..." we can modify our ontology and inference rules to get that information.
24 | 
25 | While the end game for PM is to eliminate the need for human-written queries, the fact is we're starting from square one. Which means hand-jamming a lot queries to model the patterns human vulnerability researchers look for when bug hunting.
26 | 
27 | ## Dependencies
28 | Paper Machete requires [BinaryNinja v1.1](https://binary.ninja), [Grakn v1.4.2](https://github.com/graknlabs/grakn/releases/tag/v1.4.2), the [Grakn Python Driver](http://github.com/graknlabs/grakn-python), and the [Java JRE](http://www.oracle.com/technetwork/java/javase/downloads/index.html)
29 | 
30 | 
31 | ## Query Scripts
32 | We've included some basic queries to get you started if you want to play around with PM. As you can imagine, there is no "silver bullet" query that will find all manifestations of a specific vulnerability class. Because of this, we've included versions for each CWE query. As we add new methods of finding the same CWE, we'll add scripts with incremented the version numbers to differentiate. 
33 | 
34 | `cwe_120_v1.py` - Tests for use of unsafe 'gets()' function ([CWE-120](https://cwe.mitre.org/data/definitions/120.html))
35 | 
36 | `cwe_121_v1.py` - Tests for buffer overflows ([CWE-121](https://cwe.mitre.org/data/definitions/121.html))
37 | 
38 | `cwe_129_v1.py` - Tests for missing bounds checks ([CWE-129](https://cwe.mitre.org/data/definitions/129.html))
39 | 
40 | `cwe_134_v1.py` - Tests for format string vulnerabilities ([CWE-134](https://cwe.mitre.org/data/definitions/134.html))
41 | 
42 | `cwe_788_v1.py` - Tests for missing bounds check on array indexes ([CWE-788](https://cwe.mitre.org/data/definitions/788.html))
43 | 
44 | ## How Do I Use It?
45 | 
46 | For basic use, run the `paper_machete.py` script and follow the prompts. For more advanced use, please [read the wiki](https://github.com/cetfor/PaperMachete/wiki).
47 | 
48 | Typically you'll start with option `[1]` and work your way down to option `[3]`. If you run into any issues with Grakn use option `[4]` to reset Grakn to a clean state and try again.
49 | ```
50 | ... banner ...
51 | [1] Analyze a binary file
52 | [2] Migrate a JSON file into Grakn
53 | [3] Run all CWE queries
54 | [4] Clean and restart Grakn
55 | [5] Quit
56 | ```
57 | 
58 | Option `[1]` lists all executable files in the `/analysis` directory. So place any executables you want to analyze in `/analysis`. This option will run `pmanalyze.py` and generate a JSON file in the `/analysis` directory.
59 | 
60 | Once you've analyzed files with `[1]` and produced resulting JSON files, they will appear as a choice in option `[2]`. Selecting a JSON file in option `[2]` will migrate the data into Grakn.
61 | 
62 | Now that you have data in Grakn, you can use option `[3]`. This will kick off all scripts in `/queries` against the keyspace of your choice. If you write your own query patterns, just throw them in `/queries` and option `[3]` will run them too.
63 | 


--------------------------------------------------------------------------------
/analysis/about_this_folder:
--------------------------------------------------------------------------------
 1 | This folder serves two purposes:
 2 | 	1. It's where you put the binaries or Binary Ninja databases you want to analyze (PE, ELF, Mach-O, .bndb)
 3 | 	2. It's where analysis files (JSON) are stored after being processed by Paper Machete.
 4 | 
 5 | The Paper Machete CLI `paper_machete.py` enumerates this folder when presenting you with analysis/migration options.
 6 | 
 7 | FAQ:
 8 | Q: What if my target isn't a PE/ELF/Mach-O executable? It's a binary blob!
 9 | A: Analyze it with Binary Ninja and save your analysis as a .bndb file in this folder.
10 | 


--------------------------------------------------------------------------------
/binaryninja/README.md:
--------------------------------------------------------------------------------
1 | # binaryninja
2 | 
3 | Supply your own `BinaryNinja.zip` (Linux sources) and commercial `license.txt` for use with Docker.
4 | Files in this directory are `.gitignore`'ed.
5 | 


--------------------------------------------------------------------------------
/binaryninja/update_to_latest.py:
--------------------------------------------------------------------------------
 1 | import pexpect
 2 | import subprocess
 3 | import sys
 4 | 
 5 | child = pexpect.spawn('python /tmp/version_switcher.py')
 6 | child.logfile = sys.stdout
 7 | child.expect('Choice:')
 8 | child.sendline('1')
 9 | child.expect('Choice:')
10 | child.sendline('1')
11 | child.timeout=600
12 | child.expect(['Choice:', 'UpdateSuccess'])
13 | child.terminate()
14 | 


--------------------------------------------------------------------------------
/binaryninja/version_switcher.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # Copyright (c) 2015-2017 Vector 35 LLC
  3 | #
  4 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  5 | # of this software and associated documentation files (the "Software"), to
  6 | # deal in the Software without restriction, including without limitation the
  7 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8 | # sell copies of the Software, and to permit persons to whom the Software is
  9 | # furnished to do so, subject to the following conditions:
 10 | #
 11 | # The above copyright notice and this permission notice shall be included in
 12 | # all copies or substantial portions of the Software.
 13 | #
 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 20 | # IN THE SOFTWARE.
 21 | 
 22 | import sys
 23 | 
 24 | from binaryninja.update import UpdateChannel, are_auto_updates_enabled, set_auto_updates_enabled, is_update_installation_pending, install_pending_update
 25 | from binaryninja import core_version
 26 | import datetime
 27 | 
 28 | chandefault = UpdateChannel.list[0].name
 29 | channel = None
 30 | versions = []
 31 | 
 32 | 
 33 | def load_channel(newchannel):
 34 | 	global channel
 35 | 	global versions
 36 | 	if (channel is not None and newchannel == channel.name):
 37 | 		print("Same channel, not updating.")
 38 | 	else:
 39 | 		try:
 40 | 			print("Loading channel %s" % newchannel)
 41 | 			channel = UpdateChannel[newchannel]
 42 | 			print("Loading versions...")
 43 | 			versions = channel.versions
 44 | 		except Exception:
 45 | 			print("%s is not a valid channel name. Defaulting to " % chandefault)
 46 | 			channel = UpdateChannel[chandefault]
 47 | 
 48 | 
 49 | def select(version):
 50 | 	done = False
 51 | 	date = datetime.datetime.fromtimestamp(version.time).strftime('%c')
 52 | 	while not done:
 53 | 		print("Version:\t%s" % version.version)
 54 | 		print("Updated:\t%s" % date)
 55 | 		print("Notes:\n\n-----\n%s" % version.notes)
 56 | 		print("-----")
 57 | 		print("\t1)\tSwitch to version")
 58 | 		print("\t2)\tMain Menu")
 59 | 		selection = raw_input('Choice: ')
 60 | 		if selection.isdigit():
 61 | 			selection = int(selection)
 62 | 		else:
 63 | 			selection = 0
 64 | 		if (selection == 2):
 65 | 			done = True
 66 | 		elif (selection == 1):
 67 | 			if (version.version == channel.latest_version.version):
 68 | 				print("Requesting update to latest version.")
 69 | 			else:
 70 | 				print("Requesting update to prior version.")
 71 | 				if are_auto_updates_enabled():
 72 | 					print("Disabling automatic updates.")
 73 | 					set_auto_updates_enabled(False)
 74 | 			if (version.version == core_version):
 75 | 				print("Already running %s" % version.version)
 76 | 			else:
 77 | 				print("version.version %s" % version.version)
 78 | 				print("core_version %s" % core_version)
 79 | 				print("Downloading...")
 80 | 				print(version.update())
 81 | 				print("Installing...")
 82 | 				if is_update_installation_pending:
 83 | 					#note that the GUI will be launched after update but should still do the upgrade headless
 84 | 					install_pending_update()
 85 | 				# forward updating won't work without reloading
 86 | 				sys.exit()
 87 | 		else:
 88 | 			print("Invalid selection")
 89 | 
 90 | 
 91 | def list_channels():
 92 | 	done = False
 93 | 	print("\tSelect channel:\n")
 94 | 	while not done:
 95 | 		channel_list = UpdateChannel.list
 96 | 		for index, item in enumerate(channel_list):
 97 | 			print("\t%d)\t%s" % (index + 1, item.name))
 98 | 		print("\t%d)\t%s" % (len(channel_list) + 1, "Main Menu"))
 99 | 		selection = raw_input('Choice: ')
100 | 		if selection.isdigit():
101 | 			selection = int(selection)
102 | 		else:
103 | 			selection = 0
104 | 		if (selection <= 0 or selection > len(channel_list) + 1):
105 | 			print("%s is an invalid choice." % selection)
106 | 		else:
107 | 			done = True
108 | 			if (selection != len(channel_list) + 1):
109 | 				load_channel(channel_list[selection - 1].name)
110 | 
111 | 
112 | def toggle_updates():
113 | 	set_auto_updates_enabled(not are_auto_updates_enabled())
114 | 
115 | 
116 | def main():
117 | 	global channel
118 | 	done = False
119 | 	load_channel(chandefault)
120 | 	while not done:
121 | 		print("\n\tBinary Ninja Version Switcher")
122 | 		print("\t\tCurrent Channel:\t%s" % channel.name)
123 | 		print("\t\tCurrent Version:\t%s" % core_version)
124 | 		print("\t\tAuto-Updates On:\t%s\n" % are_auto_updates_enabled())
125 | 		for index, version in enumerate(versions):
126 | 			date = datetime.datetime.fromtimestamp(version.time).strftime('%c')
127 | 			print("\t%d)\t%s (%s)" % (index + 1, version.version, date))
128 | 		print("\t%d)\t%s" % (len(versions) + 1, "Switch Channel"))
129 | 		print("\t%d)\t%s" % (len(versions) + 2, "Toggle Auto Updates"))
130 | 		print("\t%d)\t%s" % (len(versions) + 3, "Exit"))
131 | 		selection = raw_input('Choice: ')
132 | 		if selection.isdigit():
133 | 			selection = int(selection)
134 | 		else:
135 | 			selection = 0
136 | 		if (selection <= 0 or selection > len(versions) + 3):
137 | 			print("%d is an invalid choice.\n\n" % selection)
138 | 		else:
139 | 			if (selection == len(versions) + 3):
140 | 				done = True
141 | 			elif (selection == len(versions) + 2):
142 | 				toggle_updates()
143 | 			elif (selection == len(versions) + 1):
144 | 				list_channels()
145 | 			else:
146 | 				select(versions[selection - 1])
147 | 
148 | 
149 | if __name__ == "__main__":
150 | 	main()
151 | 


--------------------------------------------------------------------------------
/img/grakn-start.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cetfor/PaperMachete/cdceeed57bdae2b5d8138ae0c197098acd764835/img/grakn-start.png


--------------------------------------------------------------------------------
/img/grakn_crash.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cetfor/PaperMachete/cdceeed57bdae2b5d8138ae0c197098acd764835/img/grakn_crash.png


--------------------------------------------------------------------------------
/img/grakn_crash_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cetfor/PaperMachete/cdceeed57bdae2b5d8138ae0c197098acd764835/img/grakn_crash_2.png


--------------------------------------------------------------------------------
/paper_machete.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import subprocess
  3 | import os
  4 | from os.path import abspath, isdir, isfile, join, splitext
  5 | from mimetypes import guess_type
  6 | from urllib2 import urlopen
  7 | from ast import literal_eval
  8 | import pmanalyze
  9 | 
 10 | ENTER = '\nPress ENTER to continue'
 11 | MACHETE = abspath('.')
 12 | query_path = join(MACHETE, "queries")
 13 | ANALYSIS = join(MACHETE, "analysis")
 14 | 
 15 | MAX_ACTIVE = 25     # migration knob: max number of migration workers running at once
 16 | MAX_BATCHES = 1000000000   # migration knob: max number of rows to execute in one transation
 17 | 
 18 | MENU1 = "[1] Analyze a binary file"
 19 | MENU2 = "[2] Migrate a JSON file into Grakn"
 20 | MENU3 = "[3] Run all CWE queries"
 21 | MENU4 = "[4] Clean and restart Grakn"
 22 | MENU5 = "[5] Quit"
 23 | 
 24 | TEMPLATE_DESC = [
 25 |     '', # n/a
 26 |     'Migrating functions.',                         # template 1
 27 |     'Migrating basic-blocks.',                      # template 2
 28 |     'Linking basic-blocks to their functions.',     # template 3
 29 |     'Migrating instructions.',                      # template 4
 30 |     'Linking instructions to their basic-blocks.',  # template 5
 31 |     'Migrating all AST nodes.',                     # template 6
 32 |     'Linking AST nodes.'                            # template 7
 33 | ]
 34 | 
 35 | def print_banner(title=""):
 36 |     subprocess.call("clear")
 37 |     print("""
 38 |  ____                        __  __            _          _
 39 | |  _ \ __ _ _ __  ___ _ __  |  \/  | __ _  ___| |__   ___| |_ ___    ________
 40 | | |_) / _` | '_ \/ _ \ '__| | |\/| |/ _` |/ __| '_ \ / _ \ __/ _ \  /_______/
 41 | |  __/ (_| | |_)|  __/ |    | |  | | (_| | (__| | | |  __/ ||  __/  \_______\\
 42 | |_|   \__,_| .__/\___|_|    |_|  |_|\__,_|\___|_| |_|\___|\__\___|  /_______/
 43 |            |_|                                                     @==|;;;;;;>
 44 | """)
 45 |     total_len = 80
 46 |     if title:
 47 |         padding = total_len - len(title) - 4
 48 |         print("== {} {}\n".format(title, "=" * padding))
 49 |     else:
 50 |         print("{}\n".format("=" * total_len))
 51 | 
 52 | def run_script(query_path, query, keyspace):
 53 |     try:
 54 |         subprocess.call(["python3.6", join(query_path, query), keyspace])
 55 |     except OSError:
 56 |         print("It looks like you don't have Python3.6 installed. " \
 57 |             "The Grakn Python driver requires it.")
 58 |         return -1
 59 |     return 0
 60 | 
 61 | def run_queries(query, keyspace):
 62 |     if query == 'all_queries':
 63 |         print("Running all CWE queries against the '{}' keyspace...".format(keyspace))
 64 |         queries = [f for f in os.listdir(query_path) if isfile(join(query_path, f))]
 65 |         for query in queries:
 66 |             if ".py" not in query: continue
 67 |             if run_script(query_path, query, keyspace): return
 68 |             print("Script " + query + " complete.")
 69 |         print("All queries complete.")
 70 |     else:
 71 |         if isfile(join(query_path, query)):
 72 |             if run_script(query_path, query, keyspace): return
 73 |         else:
 74 |             print("Could not find the python script " + query)
 75 |             print("Please make sure it is located in " + query_path)
 76 |         return
 77 | 
 78 | 
 79 | def get_file_selection(types):
 80 |     file_list = os.listdir(ANALYSIS)
 81 |     filtered = []
 82 |     for file in file_list:
 83 |         if types == "json" and guess_type(join(ANALYSIS, file))[0] == "application/json":
 84 |             filtered.append(file)
 85 |         elif types == "bin":
 86 |             filecmd = (subprocess.check_output(["file", join(ANALYSIS, file)])).lower()
 87 |             filecmd = filecmd.split(": ")[1] # remove file path returned by 'file' utility
 88 |             if "elf" in filecmd or "mach-o" in filecmd or "pe" in filecmd or ".bndb" in file.lower():
 89 |                 filtered.append(file)
 90 |         else:
 91 |             pass # not json or executable binary
 92 | 
 93 |     # print file choices
 94 |     if len(filtered) == 0:
 95 |         if types == "json":
 96 |             print("No json files were found in {}".format(ANALYSIS))
 97 |         elif types == "bin":
 98 |             print("No executable files were found in {}".format(ANALYSIS))
 99 |         raw_input(ENTER)
100 |         return "quit"
101 |     else:
102 |         for i, file in enumerate(filtered):
103 |             print "[{}] {}".format(i, file)
104 | 
105 |     index = raw_input("\nSelect a file number to analyze ([q]uit): ").lower()
106 |     if index == "q" or index == "quit":
107 |         return "quit"
108 | 
109 |     try:
110 |         index = int(index)
111 |         if index in range(0, len(filtered)):
112 |             return filtered[int(index)]
113 |     except ValueError:
114 |         pass
115 | 
116 |     if index != "":
117 |         print("\nThat is not a valid file selection. Try again.")
118 |         raw_input(ENTER)
119 |     if types == "bin":
120 |         print_banner(MENU1)
121 |     elif types == "json":
122 |         print_banner(MENU2)
123 |     else:
124 |         print_banner()
125 | 
126 |     return False
127 | 
128 | 
129 | def main():
130 |     menu = True
131 |     while menu:
132 |         print_banner()
133 | 
134 |         # check directories
135 |         try:
136 |             subprocess.call(['grakn', 'version'], stdout=open(os.devnull, 'wb'))
137 |             subprocess.call(['graql', 'version'], stdout=open(os.devnull, 'wb'))
138 |         except OSError:
139 |             print("Please ensure grakn and graql are in your PATH")
140 |             sys.exit()
141 | 
142 |         if not isdir(MACHETE):
143 |             print("Paper Machete directory not found")
144 |             print("Please ensure Paper Machete is located in {}".format(MACHETE))
145 |             sys.exit()
146 | 
147 |         if not isdir(ANALYSIS):
148 |             print("Creating directory '{}'".format(ANALYSIS))
149 |             subprocess.call(["mkdir", "analysis"])
150 | 
151 |         menu_option = raw_input("{}\n{}\n{}\n{}\n{}\n\n>> ".format(MENU1,MENU2,MENU3,MENU4,MENU5))
152 | 
153 |         try:
154 |             menu_option = int(menu_option)
155 |         except ValueError:
156 |             if menu_option != "":
157 |                 print("'{}' is not a valid option.".format(menu_option))
158 |                 raw_input(ENTER)
159 |             continue
160 | 
161 |         # analyze a binary file
162 |         if menu_option == 1:
163 | 
164 |             # display supported binary files in ./analysis
165 |             binary = False
166 |             while binary == False:
167 |                 print_banner(MENU1)
168 |                 binary = get_file_selection("bin")
169 |                 if binary == "quit":
170 |                     break
171 |             if binary == "quit":
172 |                 continue
173 | 
174 |             # check to see if the file exists, if it does, process it
175 |             if not isfile(join(ANALYSIS, binary)):
176 |                 print("File '{}' not found.".format(binary))
177 |             else:
178 |                 functions = str(raw_input('Specify a list of functions examine seperated by spaces (ENTER for all): ')).split()
179 |                 if len(functions) == 0:
180 |                     pmanalyze.main(join(ANALYSIS, binary))
181 |                 else:
182 |                     print functions
183 |                     pmanalyze.main(join(ANALYSIS, binary), functions)
184 |             raw_input(ENTER)
185 | 
186 |         # migrate a json file into Grakn
187 |         elif menu_option == 2:
188 | 
189 |             # display supported binary files in ./analysis
190 |             json = False
191 |             while json == False:
192 |                 print_banner(MENU2)
193 |                 json = get_file_selection("json")
194 |                 if json == "quit":
195 |                     break
196 |             if json == "quit":
197 |                 continue
198 | 
199 |             # check to see if the keyspace already exists for this file
200 |             try:
201 |                 keyspace = json.lower().replace('.json', '')
202 |                 keyspaces = literal_eval(urlopen('http://127.0.0.1:4567/kb').read())
203 | 
204 |                 inc = 1
205 |                 finding_name = True
206 |                 while finding_name:
207 |                     inc += 1
208 |                     if keyspace not in keyspaces:
209 |                         finding_name = False # keyspace name is not in use
210 |                     else:
211 |                         keyspace = "{}_{}".format(keyspace, inc) # add a _# suffix and try again
212 |             except:
213 |                 print("Unable to query keyspace names. Is Grakn running?\nContinuing assuming keyspace '{}' is OK to use.".format(keyspace))
214 | 
215 |             try:
216 |                 # insert the ontology
217 |                 print("Inserting ontology into the '{}' keyspace...".format(keyspace))
218 |                 subprocess.call(["graql","console", "-f", join(MACHETE, "templates", "binja_mlil_ssa.gql"), "-k", keyspace])
219 | 
220 | 
221 |                 # migrate data into Grakn
222 |                 print("\nMigrating data from '{}' into the '{}' keyspace...".format(json, keyspace))
223 | 
224 |                 # loop over all 7 templates
225 |                 for num in range(1,8):
226 |                     print(">> Migration step {} of 7: {}".format(num, TEMPLATE_DESC[num]))
227 |                     subprocess.call(["graql", "migrate", "json", "--template", join(MACHETE, "templates", "binja_mlil_ssa_{}.tpl".format(num)), "--input", join(ANALYSIS, json), "--keyspace", keyspace])
228 | 
229 |                 print("Data successfully migrated into Grakn. You can now run CWE query scripts against '{}' to check for vulnerabilities".format(keyspace))
230 |                 raw_input(ENTER)
231 |             except:
232 |                 print("Upload failed... please try agin.")
233 |                 raw_input(ENTER)
234 | 
235 |         # run CWE queries
236 |         elif menu_option == 3:
237 |             keyspace = None
238 |             keyspaces = literal_eval(urlopen('http://127.0.0.1:4567/kb').read())['keyspaces']
239 | 
240 |             print_banner(MENU3)
241 | 
242 |             for i, ks in enumerate(keyspaces):
243 |                 print("[{}] {}".format(i, ks['name']))
244 | 
245 |             index = raw_input("\nSelect a keyspace to run all queries against ([q]uit): ").lower()
246 |             if index == "q" or index == "quit":
247 |                 continue
248 | 
249 |             try:
250 |                 index = int(index)
251 |                 if index in range(0, len(keyspaces)):
252 |                     keyspace = keyspaces[int(index)]['name']
253 |             except ValueError:
254 |                 continue
255 | 
256 |             run_queries('all_queries', keyspace)
257 |             raw_input(ENTER)
258 | 
259 |         # clean and restart Grakn
260 |         elif menu_option == 4:
261 |             print("Restarting Grakn. Press \"Y\" when prompted.\nWait until you see the Grakn banner before continuing!")
262 |             raw_input(ENTER)
263 | 
264 |             subprocess.call(["grakn", "server", "stop"])
265 |             subprocess.call(["grakn", "server", "clean"])
266 |             subprocess.call(["grakn", "server", "start"])
267 | 
268 |         # quit
269 |         elif menu_option == 5:
270 |             menu = False
271 | 
272 |         else:
273 |             print("Invalid option!\n")
274 |             raw_input(ENTER)
275 | 
276 | if __name__ == "__main__":
277 |     main()
278 | 


--------------------------------------------------------------------------------
/pmanalyze.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import json
  3 | from struct import pack, unpack
  4 | from os.path import basename, join, isfile
  5 | from operator import attrgetter
  6 | from collections import defaultdict
  7 | import binaryninja as binja
  8 | 
  9 | PM = None
 10 | vars_and_sizes = {}
 11 | 
 12 | class PaperMachete():
 13 |     def __init__(self):
 14 |         self.functions = []
 15 | 
 16 | class PMFunction(): 
 17 |     def __init__(self, func_name, asm_addr):
 18 |         self.func_name = func_name
 19 |         self.asm_addr = asm_addr
 20 |         self.basic_blocks = []
 21 |         self.bb_edges = []
 22 | 
 23 | class PMBasicBlock():
 24 |     def __init__(self, bb_name, bb_start, bb_end):
 25 |         self.bb_name = bb_name
 26 |         self.bb_start = bb_start
 27 |         self.bb_end = bb_end - 1 # set end as last il index (not +1 like binja gives us)
 28 |         self.instructions = []
 29 | 
 30 | class PMInstruction():
 31 |     def __init__(self, name, il_index, asm_address, operation_type, in_bb):
 32 |         self.name = name
 33 |         self.il_index = il_index
 34 |         self.asm_address = asm_address
 35 |         self.operation_type = operation_type
 36 |         self.in_bb = in_bb
 37 |         self.nodes = []
 38 | 
 39 | class PMOperation():
 40 |     def __init__(self, name, depth, node_type, edge_label, parent_hash):
 41 |         self.name = name
 42 |         self.depth = depth
 43 |         self.node_type = node_type
 44 |         self.edge_label = edge_label
 45 |         self.parent_hash = parent_hash
 46 | 
 47 | class PMNodeList():
 48 |     def __init__(self, name, depth, node_type, edge_label, parent_hash, list_size):
 49 |         self.name = name
 50 |         self.depth = depth
 51 |         self.node_type = node_type
 52 |         self.edge_label = edge_label
 53 |         self.parent_hash = parent_hash
 54 |         self.list_size = list_size
 55 | 
 56 | class PMEndNodeConstant():
 57 |     def __init__(self, name, depth, node_type, edge_label, parent_hash, constant_value):
 58 |         self.name = name
 59 |         self.depth = depth
 60 |         self.node_type = node_type
 61 |         self.edge_label = edge_label
 62 |         self.parent_hash = parent_hash
 63 |         self.constant_value = constant_value
 64 | 
 65 | class PMEndNodeVarSSA():
 66 |     def __init__(self, name, depth, node_type, edge_label, parent_hash, var, version, var_type, var_size, var_func):
 67 |         self.name = name
 68 |         self.depth = depth
 69 |         self.node_type = node_type
 70 |         self.edge_label = edge_label
 71 |         self.parent_hash = parent_hash
 72 |         self.var = var
 73 |         self.version = version
 74 |         self.var_type = var_type
 75 |         self.var_size = var_size
 76 |         self.var_func = var_func
 77 | 
 78 | class PMEndNodeVariable():
 79 |     def __init__(self, name, depth, node_type, edge_label, parent_hash, var, var_type, var_size, var_func):
 80 |         self.name = name
 81 |         self.depth = depth
 82 |         self.node_type = node_type
 83 |         self.edge_label = edge_label
 84 |         self.parent_hash = parent_hash
 85 |         self.var = var
 86 |         self.var_type = var_type
 87 |         self.var_size = var_size
 88 |         self.var_func = var_func
 89 | 
 90 | class PMBBEdge():
 91 |     def __init__(self, source, target):
 92 |         self.source = source
 93 |         self.target = target
 94 | 
 95 | 
 96 | def process_function(func):
 97 |     global insn_list
 98 |     global vars_and_sizes
 99 | 
100 |     insn_list = []
101 |     vars_and_sizes = {}
102 | 
103 |     stack = str(binja.function.Function.stack_layout.__get__(func))
104 |     vars_and_sizes = get_variable_sizes(stack)
105 | 
106 |     func_name = func.name.replace('.', '_')
107 |     asm_addr = hex(func.start).strip('L')
108 | 
109 |     PM.functions.append(PMFunction(func_name, asm_addr))
110 | 
111 | 
112 | def process_basic_block(func, block):
113 |     func_name = func.name.replace('.', '_')
114 |     bb_name = "bb_{}_{}_{}".format(block.start, block.end-1, func_name)
115 | 
116 |     for func in PM.functions:
117 |         if func.func_name == func_name:
118 |             func.basic_blocks.append(PMBasicBlock(bb_name, block.start, block.end))
119 | 
120 | 
121 | def process_instruction(func, block, insn):
122 |     global insn_list
123 | 
124 |     func_name = func.name.replace('.', '_')
125 | 
126 |     # A single ISA instruction can map to many IL instructions.
127 |     # This can cause the same instruction to be processed many times.
128 |     # To avoid this, we track instructions in a function and only
129 |     # process them once. We clear this global list in process_function().
130 | 
131 |     # To complicate this more, MLIL_GOTO operations always seem to have
132 |     # address => 0x0. So we have to process 0x0 addresses multiple times until
133 |     # this behavior changes in Binary Ninja (this may actually be expected).
134 |     
135 |     if (insn.address not in insn_list) or (insn.address == 0x0):
136 |         ast_parse([func, block, insn])
137 |         insn_list.append(insn.address)
138 | 
139 |     # sort the 'nodes' list in each instruction by 'depth'
140 |     # This is extremely important for Grakn's migration template
141 |     # since nodes at depth 1 need to exist before nodes at depth
142 |     # 2 can be linked to them (and so on).
143 |     
144 |     for func in PM.functions:
145 |         for bb in func.basic_blocks:
146 |             for inst in bb.instructions:
147 |                 (inst.nodes).sort(key=attrgetter('depth'))
148 | 
149 | 
150 | def ast_build_json(args, name, il, level=0, edge=""):
151 |     global insn_list
152 |     global vars_and_sizes
153 | 
154 |     func  = args[0]
155 |     block = args[1]
156 |     insn  = args[2]
157 | 
158 |     func_name = func.name.replace('.', '_')
159 | 
160 |     # slice off the last "_#" and rejoin to get the parent reference hash
161 |     parent = "_".join(name.split('_')[:-1])
162 | 
163 |     # Hashes of instruction nodes in the AST look like: "N_8735918103813_4195908"
164 |     # One element down from an instruction will look like: "N_8735918103813_4195908_0"
165 |     # So if there are two "_" in the hash, the node is an instruction. List nodes have
166 |     # the letter 'L' appended to them. (Yeah, I LOL'd when I wrote this too.)
167 |     depth = name.count("_") - 2
168 |     if 'L' in parent:
169 |         parent_type = "list"
170 |         name = name.replace('L', 'N') # reset node status
171 |     elif parent.count("_") == 2:
172 |         parent_type = "instruction"
173 |     else:
174 |         parent_type = "operation"
175 | 
176 |     # get the instruction hash this node belongs in
177 |     inst_hash = "_".join(name.split('_')[:3])
178 | 
179 |     # get the basic-block this node belongs in
180 |     inbb = "bb_{}_{}_{}".format(block.start, block.end-1, func_name)
181 | 
182 |     if isinstance(il, binja.MediumLevelILInstruction):
183 | 
184 |         # instruction
185 |         if level == 0:
186 |             il_index =  il.instr_index
187 |             asm_address = hex(il.address).strip('L')
188 |             operation_type = str(il.operation).split('.')[1]
189 | 
190 |             for func in PM.functions:
191 |                 for bb in func.basic_blocks:
192 |                     if bb.bb_name == inbb:
193 |                         # This next if statement is to avoid issues with MLIL_GOTO nodes
194 |                         # being placed in the wrong basic blocks. This is because all MLIL_GOTO
195 |                         # nodes have and asm_address of 0x0, so we leave them out of the insn_list global.
196 |                         # This also means, the same instruction can be added twice! So we need to check if
197 |                         # the same node already exists. If it does, we don't add it.
198 |                         if il_index >= bb.bb_start and il_index <= bb.bb_end:
199 |                             if operation_type == "MLIL_GOTO":
200 |                                 if (inst_hash not in insn_list):
201 |                                     insn_list.append(inst_hash)
202 |                                 else:
203 |                                     continue # don't add this again!
204 |                             bb.instructions.append(PMInstruction(inst_hash, il_index, asm_address, operation_type, inbb))
205 |                             
206 |         # operation
207 |         else:
208 |             node_type = str(il.operation).split('.')[1]
209 |             edge_label = str(edge)
210 |             parent_hash = parent
211 | 
212 |             for func in PM.functions:
213 |                 for bb in func.basic_blocks:
214 |                     for inst in bb.instructions:
215 |                         if inst.name == inst_hash:
216 |                             inst.nodes.append(PMOperation(name, depth, node_type, edge_label, parent_hash))
217 |                             
218 |         # edge
219 |         for i, o in enumerate(il.operands):
220 |             try:
221 |                 edge_label = str(il.ILOperations[il.operation][i][0])
222 |             except IndexError:
223 |                 # Addresses issue in binja v1.1 stable with MLIL_SET_VAR_ALIASED 
224 |                 # operations in the Python bindings. 
225 |                 # See: https://github.com/Vector35/binaryninja-api/issues/787 
226 |                 edge_label = "unimplemented"
227 |             child_name = "{}_{}".format(name, i)
228 |             ast_build_json(args, child_name, o, level+1, edge_label)
229 |             
230 | 
231 |     # list of operands / nodes
232 |     elif isinstance(il, list):
233 |         node_type = "list"
234 |         edge_label = str(edge)
235 |         parent_hash = parent
236 |         name = name.replace('N', 'L') # list hashes have an 'L' prefix to distinguish from nodes ('N').
237 |         list_size = len(il)
238 | 
239 |         for func in PM.functions:
240 |             for bb in func.basic_blocks:
241 |                 for inst in bb.instructions:
242 |                     if inst.name == inst_hash:
243 |                         inst.nodes.append(PMNodeList(name, depth, node_type, edge_label, parent_hash, list_size))
244 |                         
245 | 
246 |         # add elements from 
247 |         for i, item in enumerate(il):
248 |             edge_label = str(i)
249 |             item_name = "{}_{}".format(name, i)
250 |             ast_build_json(args, item_name, item, level+1, edge_label)
251 |             
252 |     # end node
253 |     else:
254 |         parent_hash = parent
255 |         edge_label = str(edge)
256 | 
257 |         # constant
258 |         if isinstance(il, long):
259 |             node_type = "constant"
260 |             constant_value = str(il)
261 | 
262 |             for func in PM.functions:
263 |                 for bb in func.basic_blocks:
264 |                     for inst in bb.instructions:
265 |                         if inst.name == inst_hash:
266 |                             inst.nodes.append(PMEndNodeConstant(name, depth, node_type, edge_label, parent_hash, constant_value))
267 | 
268 | 
269 |         # SSAVariable (not using type information)
270 |         elif isinstance(il, binja.mediumlevelil.SSAVariable):
271 |             node_type = "variable-ssa"
272 |             var = str(il.var)
273 |             version = il.version
274 | 
275 |             var_type = str(il.var.type)
276 |             var_size = vars_and_sizes.get(str(il.var), 4) 
277 |             var_func = func_name
278 | 
279 |             for func in PM.functions:
280 |                 for bb in func.basic_blocks:
281 |                     for inst in bb.instructions:
282 |                         if inst.name == inst_hash:
283 |                             inst.nodes.append(PMEndNodeVarSSA(name, depth, node_type, edge_label, parent_hash, var, version, var_type, var_size, var_func))
284 | 
285 | 
286 |         # Variable (contains more information than we currently use)
287 |         elif isinstance(il, binja.function.Variable):
288 |             node_type = "variable"
289 |             var = str(il)
290 | 
291 |             var_type = str(il.type)
292 |             var_size = vars_and_sizes.get(str(il), 4) 
293 |             var_func = func_name
294 | 
295 |             for func in PM.functions:
296 |                 for bb in func.basic_blocks:
297 |                     for inst in bb.instructions:
298 |                         if inst.name == inst_hash:
299 |                             inst.nodes.append(PMEndNodeVariable(name, depth, node_type, edge_label, parent_hash, var, var_type, var_size, var_func))
300 | 
301 | 
302 |         # Unknown terminating node (this should not be reached)
303 |         else:
304 |             print "A terminating node was encountered that was not expected: '{}'".format(type(il))
305 |             raise ValueError
306 | 
307 | 
308 | def ast_name_element(args, il_type, il):
309 |     h = hash(il)
310 |     name = "N_{}_{}".format(h, il.address)
311 |     ast_build_json(args, name, il)
312 | 
313 | 
314 | def ast_parse(args):
315 |     func = args[0]
316 |     block = args[1]
317 |     insn = args[2]
318 | 
319 |     print "  function: {} (asm-addr: {})".format(func.name, hex(insn.address).strip('L'))
320 |     lookup = defaultdict(lambda: defaultdict(list))
321 | 
322 |     for block in func.medium_level_il.ssa_form:
323 |         for mil in block:
324 |             lookup['MediumLevelILSSA'][mil.address].append(mil)
325 | 
326 |     for il_type in sorted(lookup):
327 |         ils = lookup[il_type][insn.address]
328 |         for il in sorted(ils):
329 |             ast_name_element(args, il_type, il)
330 | 
331 | 
332 | def process_edges(func):
333 |     func_name = (func.name).replace('.', '_')
334 | 
335 |     for block in func.medium_level_il.ssa_form:
336 |         if len(block.outgoing_edges) > 0:
337 |             for edge in block.outgoing_edges:
338 |                 source = "bb_{}_{}_{}".format(edge.source.start, edge.source.end-1, func_name)
339 |                 target = "bb_{}_{}_{}".format(edge.target.start, edge.target.end-1, func_name)
340 |                 for func in PM.functions:
341 |                     if func.func_name == func_name:
342 |                         func.bb_edges.append(PMBBEdge(source, target))
343 | 
344 | 
345 | def get_offset_from_var(var):
346 |     """
347 |     Helper for get_variable_sizes)_
348 |     Use this to calculate var offset. 
349 |         e.g. var_90, __saved_edi --> 144, -1
350 |     """
351 |     instance = False
352 |     i=0
353 | 
354 |     # Parse string
355 |     i = var.rfind(' ')+1
356 |     tmp = var[i:-1]
357 | 
358 |     # Parse var
359 |     if tmp[0] == 'v':
360 |         tmp = tmp[4:]
361 |         j = tmp.find('_')
362 | 
363 |         # Handles SSA var instances (var_14_1) and converts c, 58, 88 --> 12, 88, 136
364 |         if (j != -1):
365 |             tmp = tmp[:j]
366 |             instance = True
367 |         else:
368 |             instance = False
369 | 
370 |     try:    
371 |         tmp = int(tmp, 16)
372 |     except:
373 |         tmp = -1
374 | 
375 |     # -1 for non vars
376 |     else:
377 |         tmp = -1
378 |     
379 |     return tmp, instance 
380 | 
381 | 
382 | def get_variable_sizes(stack):
383 |     """
384 |     Called from process_function. This function Accepts a string 
385 |     of stack variables and returns a dict of var names and sizes.
386 |     """
387 |     prev_offset = 0
388 |     offset = 0
389 |     counter = 0
390 |     i=0
391 |     var_dict = {}
392 |     str_list = list(reversed(stack[1:-1].split(', ')))
393 | 
394 |     # Loop through each item on stack backwards
395 |     for item in str_list:
396 |         size=0
397 |         tmp=0
398 |         instance = False
399 | 
400 |         # Handle args and return addr
401 |         if (('arg' in item) or ('return' in item)):
402 |             size = 4
403 | 
404 |         elif('int32' in item):
405 |             size = 4
406 |             tmp, instance = get_offset_from_var(str_list[counter])
407 |             if tmp != -1:
408 |                 offset = tmp
409 |             if not instance:
410 |                 offset = prev_offset+4
411 | 
412 |         elif ('int64' in item):
413 |             size = 8
414 |             tmp, instance = get_offset_from_var(str_list[counter])
415 |             if not instance:
416 |                 offset = prev_offset+8
417 |             if tmp != -1:
418 |                 offset = tmp
419 | 
420 |         else:
421 |             offset, instance = get_offset_from_var(str_list[counter])
422 |             if instance:
423 |                 offset = offset-4
424 | 
425 |         if size == 0:  
426 |             size = offset-prev_offset
427 |         if (not instance):   
428 |             prev_offset = offset
429 | 
430 |         # Parse string
431 |         i = item.rfind(' ')+1
432 |         key = item[i:-1]
433 |         
434 |         var_dict.update({key:size})
435 |         counter = counter+1
436 | 
437 |     return var_dict
438 | 
439 | 
440 | def analyze(bv, func_list=[]):
441 | 
442 |     list_len = len(func_list)
443 | 
444 |     ## process functions
445 |     for func in bv.functions:
446 |         if list_len > 0 and func.name not in func_list: continue
447 |         process_function(func)
448 | 
449 |         ## process basic blocks
450 |         for block in func.medium_level_il.ssa_form:
451 |             process_basic_block(func, block)
452 | 
453 |             ## process instructions
454 |             for insn in block:
455 |                 process_instruction(func, block, insn)
456 | 
457 |         ## process basic block edges
458 |         # all edges need to exist in Grakn before we can do this
459 |         # because edges stemming from loops wont have an associated
460 |         # basic block inserted to create a relationship for.
461 |         process_edges(func)
462 | 
463 | 
464 | def main(target, func_list=[]):
465 |     global PM
466 | 
467 |     PM = PaperMachete()
468 |     
469 |     if not isfile(target):
470 |         print "The specified target '{}' is not a file. Try again.".format(target)
471 |         return
472 | 
473 |     print "Invoking Binary Ninja and analyzing file: {}".format(target)
474 |     bv = binja.BinaryViewType.get_view_of_file(target)
475 |     bv.add_analysis_option('linearsweep')
476 |     print "Performing linear sweep..."
477 |     bv.update_analysis_and_wait()
478 |     print "Linear sweep complete. Collecting BNIL data..."
479 |     analyze(bv, func_list)
480 |     
481 |     # pretty printed json (pretty printed files are much larger than compact files!)
482 |     target_json = json.dumps(PM, default=lambda o: o.__dict__, indent=4, sort_keys=True)
483 |     
484 |     # compact / minified json
485 |     #target_json = json.dumps(PM, default=lambda o: o.__dict__)
486 |     
487 |     try:
488 |         jf = None
489 |         if __name__ == "__main__":
490 |             jf = open("{}.json".format(basename(target)), "w")
491 |         else:
492 |             jf = open(join("analysis", "{}.json".format(basename(target))), "w")
493 |         jf.write(target_json)
494 |         jf.close()
495 |     except IOError:
496 |         print "ERROR: Unable to open/write to {}.json.".format(basename(target))
497 |         return
498 | 
499 | if __name__ == "__main__":
500 |     if len(sys.argv) > 1:
501 |         target = sys.argv[1]
502 |         func_list = sys.argv[2:]
503 |     else:
504 |         print "Usage: %s <binary> [function1 function2 ...]" % sys.argv[0]
505 |     main(target, func_list)
506 | 


--------------------------------------------------------------------------------
/queries/cwe_120_v1.py:
--------------------------------------------------------------------------------
 1 | #============================================================================================================
 2 | # CWE-120: Buffer Copy without Checking Size of Input
 3 | #
 4 | # Vuln Info: A trivial way to cause this vulnerability is using the gets() function which is not secure.
 5 | # Ex:
 6 | #     bytes_received = gets(input);                                 <--Bad
 7 | #     bytes_received = receive_until(input, sizeof(input), '\n');   <--Good
 8 | #
 9 | # Methodology:
10 | # 1. Find gets instruction
11 | # 2. There's a vulnerability
12 | #
13 | # Try it on: REMATCH_1--Hat_Trick--Morris_Worm
14 | #
15 | #============================================================================================================
16 | 
17 | import sys
18 | import grakn
19 | 
20 | def main(keyspace):
21 |     client = grakn.Grakn(uri='localhost:48555')
22 |     with client.session(keyspace=keyspace).transaction(grakn.TxType.READ) as graph:
23 |         # Check for gets() function
24 |         # Get address of function to use for next query
25 |         func_names = ['gets', 'cgc_gets']
26 |         func_addrs = []
27 |         for function_name in func_names:
28 |             query1 = 'match $func isa function, has func-name "{}", has asm-address $a; get $a;'.format(function_name)
29 |             func_addrs += [int(result.value(), 16) for result in graph.query(query1).collect_concepts()]
30 |         
31 |         # If the function is found continue query
32 |         for func_addr in func_addrs:
33 |             # Get all instructions that have function name
34 |             query2 = 'match $x has operation-type "MLIL_CALL_SSA" has asm-address $a; $y isa"MLIL_CONST_PTR"; ($x,$y); $z isa constant, has constant-value {}; ($y,$z); get $a;'.format(func_addr)
35 |             result2 = graph.query(query2).collect_concepts()
36 | 
37 |             # If there are instructions that use the function check the instructions
38 |             for instr in result2:
39 |                 ins_addr = instr.value()
40 |                 print("CWE-120: Buffer Copy Without Checking Size of Input at {}".format(ins_addr))
41 | 
42 | if __name__ == "__main__":
43 |     if len(sys.argv) > 1:
44 |         keyspace = sys.argv[1]
45 |     else:
46 |         keyspace = "grakn"
47 |     main(keyspace)
48 | 


--------------------------------------------------------------------------------
/queries/cwe_121_v1.py:
--------------------------------------------------------------------------------
  1 | #============================================================================================================
  2 | # CWE-121: Stack-based Buffer Overflow
  3 | #
  4 | # Vuln Info: This vulnerability comes from allocating too much space for a string.
  5 | # Ex: char string[64]
  6 | #     (cgc_receive_delim(0, string, 128, '\n') != 0)                <--Bad
  7 | #     (cgc_receive_delim(0, string, sizeof(string), '\n') != 0)     <--Good
  8 | #
  9 | # Methodo#logy:
 10 | # 1. Find all instructions that call a specific function specified with function_name
 11 | # 2. Check these instructions' parameters, string, and bytes allocated (sizeof(string))
 12 | # 3. Find where the string was initialized to get amount of bytes allocated
 13 | # 4. If the amount of bytes allocated != size of string alert possible vulerability
 14 | #
 15 | # Try it on: Palindrome2, ShoutCTF
 16 | #
 17 | # Includes functions:
 18 | # fgets(name, sizeof(name), stdin)
 19 | # receive_delim(0, 0, string, sizeof(string), '\n')
 20 | # strncpy(targetBuffer, srcBuffer, sizeof(targetBuffer));
 21 | # receive_until(buff, '\n', 25);
 22 | # memcpy(str1, str2, n);
 23 | # freaduntil(buf, sizeof(buf), '\n', stdin)
 24 | # read(int fd, void *buf, size_t count);
 25 | #============================================================================================================
 26 | 
 27 | import sys
 28 | import grakn
 29 | 
 30 | def main(keyspace):
 31 |     client = grakn.Grakn(uri='localhost:48555')
 32 |     with client.session(keyspace=keyspace).transaction(grakn.TxType.READ) as graph:
 33 | 
 34 |         # Functions with indexes for (dest, sizeof(dest)) stored in dict
 35 |         functions = {"receive_delim": (2,3), "fgets": (0,1), "strncpy": (0,2), "receive_until": (0,2), "memcpy": (0,2), "freaduntil": (1,2), "read":(1,2)}
 36 | 
 37 |         # Check for potential vuln in each function
 38 |         for function_name in functions:
 39 |         # Get address of function to use for next query
 40 |             query1 = 'match $func isa function, has func-name contains "{}", has asm-address $a; get $a;'.format(function_name)
 41 |             result1 = [result.map() for result in graph.query(query1)]
 42 | 
 43 |             # If the function is found continue query
 44 |             if result1:
 45 |                 # Get all instructions that have function name
 46 |                 func_addr = int(result1[0]['a'].value(), 16)
 47 |                 query2 = 'match $x has operation-type "MLIL_CALL_SSA"; $y isa"MLIL_CONST_PTR"; ($x,$y); $z isa constant, has constant-value {}; ($y,$z); get $x;'.format(func_addr)
 48 |                 result2 = [result.map() for result in graph.query(query2)]
 49 | 
 50 |                 # If there are instructions that use the function check the instructions
 51 |                 if result2:
 52 | 
 53 |                     buff_index = functions[function_name][0]
 54 |                     size_index = functions[function_name][1]
 55 |                     for instr in result2:
 56 |                         Id = instr['x'].id
 57 |                         query3 = 'match $x id "' + Id + '"; $l isa list; ($x,$l); (from-node: $l, $q); $q has edge-label $e; (from-node: $q, $v); {$v has var $s;} or {$v has constant-value $s;}; get $e, $s;'
 58 |                         result3 = [result.map() for result in graph.query(query3)]
 59 | 
 60 |                         # This section grabs instrution params and insert into an array
 61 |                         param_array = [0, 0, 0, 0, 0, 0, 0, 0]
 62 | 
 63 |                         for ele in result3:
 64 |                             index = int(ele['e'].value())
 65 |                             val = ele['s'].value()
 66 |                             param_array[index] = val
 67 |                         # Get var name - This is done to determine how many bytes the variable is
 68 |                         var_name = param_array[buff_index]
 69 |                         var_name = var_name.split('#',1)[0].lstrip()
 70 | 
 71 |                         # NOTE Enhancement Make finding buff_size the same as string_size
 72 |                         # This assumes that buffer_size is a number, breaks when its a var or register
 73 |                         # Get buffer size
 74 |                         try:
 75 |                             buff_size = int(param_array[size_index])
 76 |                         except ValueError as err:
 77 |                             continue
 78 |                         # Get size of string in by finding initialization Ex. var_88 = &var_58
 79 |                         # Find where string is initialzed
 80 |                         query4 = 'match $x id "{}"; $y isa basic-block; ($x,$y); $z isa instruction, has operation-type "MLIL_SET_VAR_SSA"; ($y,$z); {{$v1 isa variable, has var "{}";}} or {{$v1 isa variable-ssa, has var "{}";}}; ($z, $v1); $w isa MLIL_ADDRESS_OF; ($w, $z); $v isa variable, has var-size $s; ($w, $v); get $s, $x;'.format(Id, var_name, var_name)
 81 |                         result4 = [result.map() for result in graph.query(query4)]
 82 | 
 83 |                         if (result4):
 84 |                             string_size = result4[0]['s'].value()
 85 |                             # Finally Determine if buffer size == sizeof(str)
 86 |                             if string_size != buff_size:
 87 |                                 instruction_ID = result4[0]['x'].id
 88 |                                 query5 = 'match $i id {}, has asm-address $a; get $a;'.format(instruction_ID)
 89 |                                 result5 = [result.map() for result in graph.query(query5)]
 90 |                                 instr_addr = result5[0]['a'].value()
 91 | 
 92 |                                 print("CWE-121: Stack-based Overflow possible at {}".format(instr_addr))
 93 | 
 94 | if __name__ == "__main__":
 95 |     if len(sys.argv) > 1:
 96 |         keyspace = sys.argv[1]
 97 |     else:
 98 |         keyspace = "grakn"
 99 |     main(keyspace)
100 | 


--------------------------------------------------------------------------------
/queries/cwe_129_v1.py:
--------------------------------------------------------------------------------
 1 | #============================================================================================================
 2 | # CWE-129:Imporper validation of array index
 3 | #
 4 | # Vuln Info: This vulnerability comes from using untrusted (unchecked) input when using an array index.
 5 | #
 6 | # Methodology: Find all signed comparisons of a varaible and constant and follow the variable to see if its
 7 | #               other bound is checked.
 8 | #
 9 | # TODO: Currently the script searches out all comparisons to see if the other bound is checked by looking
10 | #       for the same variable in other comparisons. The search can be improved by instead searching for where
11 | #       the user can modify an array index then checking for bounds on that.
12 | #
13 | # Limitations: This implementation only find instances where one bound was checked, but not the other.
14 | #               Also this implementation does not specifically search for array indexs, but comparisons in general.
15 | #
16 | # try it on: recipe_and_pantry_manager
17 | #============================================================================================================
18 | 
19 | import sys
20 | import grakn
21 | 
22 | #Exits the script
23 | def fail():
24 |     sys.exit()
25 | 
26 | #Finds comparisons that are acting as a lower boudns check
27 | def lowerCheck():
28 |     query = 'match {$comp isa MLIL_CMP_SGE;} or {$comp isa MLIL_CMP_SGT;};$node isa MLIL_VAR_SSA;$cons isa MLIL_CONST;($comp, $node);($comp, $cons);$varssa isa variable-ssa has var $var;($node, $varssa);get $comp, $var;'
29 |     return [result.map() for result in graph.query(query)]
30 | 
31 | #Finds comparisons that are acting as an upper bounds check
32 | def upperCheck():
33 |     query = 'match {$comp isa MLIL_CMP_SLE;} or {$comp isa MLIL_CMP_SLT;};$node isa MLIL_VAR_SSA;$cons isa MLIL_CONST;($comp, $node);($comp, $cons);$varssa isa variable-ssa has var $var;($node, $varssa);get $comp, $var;'
34 |     return [result.map() for result in graph.query(query)]
35 | 
36 | #Returns the addresss of a comparison instruction
37 | def get_addr(comp):
38 |     query = 'match $comp id "' + comp  + '";$inst isa instruction, has asm-address $addr;($comp, $inst);get $addr;'
39 |     return [result.map() for result in graph.query(query)]
40 | 
41 | def main(keyspace):
42 |     client = grakn.Grakn(uri='localhost:48555')
43 |     global graph
44 |     with client.session(keyspace=keyspace).transaction(grakn.TxType.READ) as graph:
45 | 
46 |         #Find a variable being compared
47 |         query1 = 'match {$comp isa MLIL_CMP_SGE;} or {$comp isa MLIL_CMP_SLE;} or {$comp isa MLIL_CMP_SLT;} or {$comp isa MLIL_CMP_SGT;};$node isa MLIL_VAR_SSA;$cons isa MLIL_CONST;($comp, $node);($comp, $cons);$varssa isa variable-ssa has var $var;($node, $varssa);get $comp, $var;'
48 |         result1 = [result.map() for result in graph.query(query1)]
49 | 
50 |         #Parse the output of result1 into the compare statements and varaible names
51 |         comp, var = [], []
52 |         if result1:
53 |             for entry in result1:
54 |                 comp.append(entry['comp'].id)
55 |                 var.append(entry['var'].value())
56 |         else:
57 |             fail()
58 |         for entry in comp:
59 |             #Do upper bound check
60 |             if ('SGE' or 'SGT') in entry:
61 |                 lower = lowerCheck()
62 |                 if lower:
63 |                     for item in lower:
64 |                         if item['var'].value() not in var:
65 |                             #failed to find upper bound check
66 |                             addr = get_addr(entry)
67 |                             print('CWE-129: Missing upper bound check at ' + str(addr[0]['addr'].value()))
68 |                         else:
69 |                             adddr = get_addr(entry)
70 |                 else:
71 |                     addr = get_addr(entry)
72 |                     print('CWE-129: Missing upper bound check at ' + str(addr[0]['addr'].value()))
73 |             #Do lower bound check
74 |             else:
75 |                 upper = upperCheck()
76 |                 if upper:
77 |                     for item in upper:
78 |                         if item['var'].value() not in var:
79 |                             #failed to find lower bound check
80 |                             addr = get_addr(entry)
81 |                             print('CWE-129: Missing lower bound check at ' + str(addr[0]['addr'].value()))
82 |                         else:
83 |                             addr = get_addr(entry)
84 |                 else:
85 |                     addr = get_addr(entry)
86 |                     print('CWE-129: Missing lower bound check at ' + str(addr[0]['addr'].value()))
87 | 
88 | if __name__ == "__main__":
89 |     if len(sys.argv) > 1:
90 |         keyspace = sys.argv[1]
91 |     else:
92 |         keyspace = "grakn"
93 |     main(keyspace)
94 | 


--------------------------------------------------------------------------------
/queries/cwe_134_v1.py:
--------------------------------------------------------------------------------
 1 | #============================================================================================================
 2 | # CWE-134 Uncontrolled Format String
 3 | #
 4 | # Vuln Info: This vulnerability comes from using printf without a modifier
 5 | # Ex: cgc_printf(message);          <--Bad
 6 | #     cgc_printf("%s", message);    <--Good
 7 | #
 8 | # Methodology:
 9 | # 1. Check if file has a printf function
10 | # 2. Check if any instructions use printf
11 | # 3. Check if params in printf are data type(correct) or var_type(incorrect, no modifier i.e. %s used)
12 | #
13 | # Try it on: Barcoder, Checkmate, Kaprica_Go
14 | #============================================================================================================
15 | 
16 | import sys
17 | import grakn
18 | 
19 | def main(keyspace):
20 |     client = grakn.Grakn(uri='localhost:48555')
21 |     with client.session(keyspace=keyspace).transaction(grakn.TxType.READ) as graph:
22 | 
23 |         # Get address of printf to use for next query
24 |         query1 ='match $func isa function, has func-name contains "printf", has asm-address $a; offset 0; limit 100; get $a;'
25 |         result1 = [result.map() for result in graph.query(query1)]
26 |         if len(result1) > 0:
27 |             print("Found potential calls at the following addresses:")
28 |             for addr in result1:
29 |                 print(addr['a'].value())
30 | 
31 |         # If printf is found continue query
32 |         for printf_func in result1:
33 |             # Pull any instructions that use printf and don't use a modifier (have var type and not data type)
34 |             func_addr = int(printf_func['a'].value(), 16)
35 |             print("Scanning address {}".format(hex(func_addr)))
36 |             query2 = 'match $x isa instruction, has operation-type "MLIL_CALL_SSA", has asm-address $a; $y isa "MLIL_CONST_PTR"; ($x,$y); $z isa constant, has constant-value {}; ($y,$z); $l isa list, has list-size 1; ($x,$l); $s isa "MLIL_VAR_SSA"; ($l,$s); offset 0; limit 500; get $x, $a;'.format(func_addr)
37 |             result2 = [result.map() for result in graph.query(query2)]
38 | 
39 |             # If there is an instruction that uses printf without modifier, output instruction
40 |             if result2:
41 |                 for instr in result2:
42 |                     asm_addr = instr['a'].value()
43 |                     print("CWE-134: Uncontrolled Format String possible at {} ".format(asm_addr))
44 | 
45 | if __name__ == "__main__":
46 |     if len(sys.argv) > 1:
47 |         keyspace = sys.argv[1]
48 |         main(keyspace)
49 |     else:
50 |         print("Please specify a keyspace to search.\nUsage: python3.6 {} <keyspace>".format(sys.argv[0]))
51 | 
52 | 


--------------------------------------------------------------------------------
/queries/cwe_788_v1.py:
--------------------------------------------------------------------------------
  1 | #=======================================================================================
  2 | # CWE-788: Access of Memory Location After End of Buffer
  3 | #
  4 | # Vuln Info: The software reads or writes to a buffer using an index or pointer that
  5 | #            references a memory location after the end of the buffer.
  6 | #
  7 | # Methodology:
  8 | # 1.Find any arrays
  9 | # 2.Find indexing variables for said arrays
 10 | # 3.Look to see if those variables are used in a comparison (bounds check)
 11 | #=======================================================================================
 12 | 
 13 | import sys
 14 | import grakn
 15 | 
 16 | #Exits script
 17 | def fail():
 18 |     return 0
 19 |     sys.exit()
 20 | 
 21 | #Searches for potential array declarations
 22 | def query1():
 23 |     query = 'match $set isa instruction, has operation-type "MLIL_SET_VAR_SSA";$ptr isa MLIL_CONST_PTR;($set, $ptr);$reg isa variable-ssa, has var $index;($set, $reg); get $index;'
 24 |     return [result.map() for result in graph.query(query)]
 25 | 
 26 | #Finds potential loops
 27 | def query2():
 28 |     query = 'match $block isa basic-block;($block, $inst);$inst isa instruction;$reg isa variable-ssa, has var $index, has edge-label "dest";($inst, $reg);get $index, $block;'
 29 |     return [result.map() for result in graph.query(query)]
 30 | 
 31 | #Checks query2 for if statements
 32 | def query3(item):
 33 |     query = 'match $block isa basic-block, id "' + item  + '";($block, $inst);$inst isa instruction, has operation-type "MLIL_IF";offset 0; get $inst;'
 34 |     return [result.map() for result in graph.query(query)]
 35 | 
 36 | #Finds and returns various information about the loops, including the counting variable
 37 | def query4(entry):
 38 |     query = 'match $block isa basic-block, id "' + entry  + '";($block, contains-instruction:$inst);$inst isa instruction, has operation-type "MLIL_SET_VAR_SSA";($inst, to-node:$add);$add isa MLIL_ADD;$var isa MLIL_VAR_SSA;($add, $var);$const isa MLIL_CONST;($add, $const);$one isa constant has constant-value 1;($const, $one);$reg isa variable-ssa, has var $index, has version $version, has edge-label "dest";($inst, $reg);get $index, $reg, $version;'
 39 |     return [result.map() for result in graph.query(query)]
 40 | 
 41 | #Checks if the bounds on the counting varaible (array index) are ever checked
 42 | def query5():
 43 |     query = 'match $block isa basic-block;$inst isa instruction, has operation-type "MLIL_IF";($block, $inst);{$comp isa MLIL_CMP_SGE;} or {$comp isa MLIL_CMP_SLE;} or {$comp isa MLIL_CMP_SLT;} or {$comp isa MLIL_CMP_SGT;} or {$comp isa MLIL_CMP_UGE;} or {$comp isa MLIL_CMP_ULE;} or {$comp isa MLIL_CMP_ULT;} or {$comp isa MLIL_CMP_UGT;};($inst, $comp);$reg isa MLIL_VAR_SSA;($comp, $reg);$index isa variable-ssa, has var $var, has version $version;($reg, $index);get $var, $version;'
 44 |     return [result.map() for result in graph.query(query)]
 45 | 
 46 | #Returns asm-address of vulnerability
 47 | def query6(reg_type, reg):
 48 |     query = 'match $inst isa instruction, has asm-address $adr;$var isa '+ reg_type + ', id "' + reg + '";($inst, $var);get $adr;'
 49 |     return [result.map() for result in graph.query(query)]
 50 | 
 51 | def main(keyspace):
 52 |     client = grakn.Grakn(uri='localhost:48555')
 53 |     global graph
 54 |     with client.session(keyspace=keyspace).transaction(grakn.TxType.READ) as graph:
 55 | 
 56 |         # Find possible arrays
 57 |         array = []
 58 |         q1 = query1()
 59 |         if q1:
 60 |             i = 0
 61 |             for item in q1:
 62 |                 array.append(q1[i]['index'].id)
 63 |                 i += 1
 64 |         else:
 65 |             fail()
 66 | 
 67 |         # Find loops involving the array
 68 |         block = []
 69 |         q2 = query2()
 70 |         if q2:
 71 |             i = 0
 72 |             for item in q2:
 73 |                 if q2[i]['index'].id in array:
 74 |                     block.append(q2[i]['block'].id)
 75 |                 i += 1
 76 |         else:
 77 |             fail()
 78 | 
 79 |         # Do the 'loop' blocks contain if statements?
 80 |         if_id = []
 81 |         block2 = block.copy()
 82 |         for item in block2:
 83 |             q3 = query3(item)
 84 |             if not q3:
 85 |                 block.remove(item)
 86 | 
 87 |         # Find the loop counters
 88 |         var, version, var_id, reg, reg_type, block2 = [], [], [], [], [], block.copy()
 89 |         for entry in block2:
 90 |             q4 = query4(entry)
 91 |             if q4:
 92 |                 i = 0
 93 |                 for item in q4:
 94 |                     reg.append(item['reg'].id)
 95 |                     reg_type.append(item['reg'].type().label())
 96 |                     var.append(item['index'].value())
 97 |                     version.append(item['version'].value())
 98 |                     var_id.append(item['index'].id)
 99 |                     i += 1
100 |             else:
101 |                 block.remove(entry)
102 |         i = len(var) - 1
103 | 
104 |         # Find is the bounds of the loop counter are checked
105 |         var2 = []
106 |         q5 = query5()
107 |         i = 0
108 |         for entry in q5:
109 |             var2.append(q5[i]['var'].value())
110 |             i += 1
111 | 
112 |         # Any variables in var[] but not var2[] are potential vulnerabilities
113 |         i = 0
114 |         for entry in var:
115 |             if entry not in var2:
116 |                 q6 = query6(reg_type[i], reg[i])
117 |                 print('CWE-788: Array index missing bounds check at ' + q6[0]['adr'].value() + ' associated with '+ var[i] + '#' + str(version[i]) + ' id = ' + var_id[i] + ' sub of ' + reg_type[i] + ' id = ' + reg[i])
118 |             i += 1
119 | 
120 | if __name__ == "__main__":
121 |     if len(sys.argv) > 1:
122 |         keyspace = sys.argv[1]
123 |     else:
124 |         keyspace = "grakn"
125 |     main(keyspace)
126 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | grakn
2 | 


--------------------------------------------------------------------------------
/templates/binja_mlil_ssa.gql:
--------------------------------------------------------------------------------
  1 | define 
  2 | 
  3 | ## ENTITIES #####################################
  4 | function sub entity
  5 | 	plays in-function
  6 | 	has func-name
  7 | 	has asm-address
  8 | 	has stack;
  9 | 
 10 | basic-block sub entity
 11 | 	plays from-basic-block 
 12 | 	plays to-basic-block
 13 | 	plays in-basic-block 
 14 | 	plays contains-basic-block
 15 | 	has bb-name
 16 | 	has bb-start
 17 | 	has bb-end;
 18 | 
 19 | instruction sub entity
 20 | 	plays from-node
 21 | 	plays to-node
 22 | 	plays in-instruction
 23 | 	plays contains-instruction
 24 | 	has name
 25 | 	has il-index
 26 | 	has asm-address
 27 | 	has ins-text
 28 | 	has operation-type
 29 | 	has in-bb;
 30 | 
 31 | operation sub entity
 32 | 	plays from-node
 33 | 	plays to-node
 34 | 	plays in-operation
 35 | 	plays contains-operation
 36 | 	has name
 37 | 	has parent-hash
 38 | 	has edge-label;
 39 | 
 40 | constant sub entity
 41 | 	plays from-node
 42 | 	plays to-node
 43 | 	has name
 44 | 	has parent-hash
 45 | 	has constant-value
 46 | 	has edge-label;
 47 | 
 48 | variable sub entity
 49 | 	plays from-node
 50 | 	plays to-node
 51 | 	has name
 52 | 	has parent-hash
 53 | 	has var
 54 | 	has edge-label
 55 | 	has var-type
 56 | 	has var-size
 57 | 	has var-func;
 58 | 
 59 | variable-ssa sub entity
 60 | 	plays from-node
 61 | 	plays to-node
 62 | 	plays trace
 63 | 	has name
 64 | 	has parent-hash
 65 | 	has var
 66 | 	has version
 67 | 	has edge-label
 68 | 	has var-type
 69 | 	has var-size
 70 | 	has var-func;
 71 | 
 72 | list sub entity
 73 | 	plays from-node
 74 | 	plays to-node
 75 | 	has name
 76 | 	has parent-hash
 77 | 	has list-size
 78 | 	has edge-label;
 79 | 
 80 | 
 81 | ## SUB ENTITIES #################################
 82 | #### OPERATIONS #################################
 83 | MLIL_NOP sub operation;
 84 | MLIL_SET_VAR sub operation;
 85 | MLIL_SET_VAR_FIELD sub operation;
 86 | MLIL_SET_VAR_SPLIT sub operation;
 87 | MLIL_LOAD sub operation;
 88 | MLIL_STORE sub operation;
 89 | MLIL_VAR sub operation;
 90 | MLIL_VAR_FIELD sub operation;
 91 | MLIL_ADDRESS_OF sub operation;
 92 | MLIL_ADDRESS_OF_FIELD sub operation;
 93 | MLIL_CONST sub operation;
 94 | MLIL_CONST_PTR sub operation;
 95 | MLIL_ADD sub operation;
 96 | MLIL_ADC sub operation;
 97 | MLIL_SUB sub operation;
 98 | MLIL_SBB sub operation;
 99 | MLIL_AND sub operation;
100 | MLIL_OR sub operation;
101 | MLIL_XOR sub operation;
102 | MLIL_LSL sub operation;
103 | MLIL_LSR sub operation;
104 | MLIL_ASR sub operation;
105 | MLIL_ROL sub operation;
106 | MLIL_RLC sub operation;
107 | MLIL_ROR sub operation;
108 | MLIL_RRC sub operation;
109 | MLIL_MUL sub operation;
110 | MLIL_MULU_DP sub operation;
111 | MLIL_MULS_DP sub operation;
112 | MLIL_DIVU sub operation;
113 | MLIL_DIVU_DP sub operation;
114 | MLIL_DIVS sub operation;
115 | MLIL_DIVS_DP sub operation;
116 | MLIL_MODU sub operation;
117 | MLIL_MODU_DP sub operation;
118 | MLIL_MODS sub operation;
119 | MLIL_MODS_DP sub operation;
120 | MLIL_NEG sub operation;
121 | MLIL_NOT sub operation;
122 | MLIL_SX sub operation;
123 | MLIL_ZX sub operation;
124 | MLIL_LOW_PART sub operation;
125 | MLIL_JUMP sub operation;
126 | MLIL_JUMP_TO sub operation;
127 | MLIL_CALL sub operation;
128 | MLIL_CALL_UNTYPED sub operation;
129 | MLIL_CALL_OUTPUT sub operation;
130 | MLIL_CALL_PARAM sub operation;
131 | MLIL_RET sub operation;
132 | MLIL_NORET sub operation;
133 | MLIL_IF sub operation;
134 | MLIL_GOTO sub operation;
135 | MLIL_CMP_E sub operation;
136 | MLIL_CMP_NE sub operation;
137 | MLIL_CMP_SLT sub operation;
138 | MLIL_CMP_ULT sub operation;
139 | MLIL_CMP_SLE sub operation;
140 | MLIL_CMP_ULE sub operation;
141 | MLIL_CMP_SGE sub operation;
142 | MLIL_CMP_UGE sub operation;
143 | MLIL_CMP_SGT sub operation;
144 | MLIL_CMP_UGT sub operation;
145 | MLIL_TEST_BIT sub operation;
146 | MLIL_BOOL_TO_INT sub operation;
147 | MLIL_ADD_OVERFLOW sub operation;
148 | MLIL_SYSCALL sub operation;
149 | MLIL_SYSCALL_UNTYPED sub operation;
150 | MLIL_BP sub operation;
151 | MLIL_TRAP sub operation;
152 | MLIL_UNDEF sub operation;
153 | MLIL_UNIMPL sub operation;
154 | MLIL_UNIMPL_MEM sub operation;
155 | MLIL_IMPORT sub operation;
156 | MLIL_SET_VAR_SSA sub operation;
157 | MLIL_SET_VAR_SSA_FIELD sub operation;
158 | MLIL_SET_VAR_SPLIT_SSA sub operation;
159 | MLIL_SET_VAR_ALIASED sub operation;
160 | MLIL_SET_VAR_ALIASED_FIELD sub operation;
161 | MLIL_VAR_SSA sub operation;
162 | MLIL_VAR_SSA_FIELD sub operation;
163 | MLIL_VAR_ALIASED sub operation;
164 | MLIL_VAR_ALIASED_FIELD sub operation;
165 | MLIL_CALL_SSA sub operation;
166 | MLIL_CALL_UNTYPED_SSA sub operation;
167 | MLIL_SYSCALL_SSA sub operation;
168 | MLIL_SYSCALL_UNTYPED_SSA sub operation;
169 | MLIL_CALL_OUTPUT_SSA sub operation;
170 | MLIL_CALL_PARAM_SSA sub operation;
171 | MLIL_LOAD_SSA sub operation;
172 | MLIL_STORE_SSA sub operation;
173 | MLIL_VAR_PHI sub operation;
174 | MLIL_MEM_PHI sub operation;
175 | 
176 | 
177 | ## Attribute (has) ##############################
178 | stack			sub attribute datatype string;
179 | operation-type 	sub attribute datatype string;
180 | ins-text		sub attribute datatype string;
181 | func-name		sub attribute datatype string;
182 | bb-name			sub attribute datatype string;
183 | name			sub attribute datatype string;
184 | in-bb			sub attribute datatype string;
185 | asm-address 	sub attribute datatype string;
186 | edge-label 		sub attribute datatype string;
187 | constant-value 	sub attribute datatype string;
188 | parent-hash 	sub attribute datatype string;
189 | var 			sub attribute datatype string;
190 | var-type 		sub attribute datatype string;
191 | var-func 		sub attribute datatype string;
192 | var-size		sub attribute datatype long;
193 | bb-start		sub attribute datatype long;
194 | bb-end			sub attribute datatype long;
195 | il-index 		sub attribute datatype long;
196 | list-size 		sub attribute datatype long;
197 | int 			sub attribute datatype long;
198 | version 		sub attribute datatype long;
199 | size			sub attribute datatype long;
200 | if-true 		sub attribute datatype long;
201 | if-false 		sub attribute datatype long;
202 | 
203 | 
204 | ## ROLES (plays) ################################
205 | in-function 			sub role;
206 | from-basic-block 		sub role;
207 | to-basic-block			sub role;
208 | in-basic-block 			sub role;
209 | contains-basic-block 	sub role;
210 | in-instruction 			sub role;
211 | contains-instruction 	sub role;
212 | in-operation 			sub role;
213 | contains-operation 		sub role;
214 | from-node 				sub role;
215 | to-node					sub role;
216 | trace					sub role;
217 | 
218 | ## RELATIONSHIP ####################################
219 | has-basic-block sub relationship
220 | 	relates in-function
221 | 	relates contains-basic-block;
222 | 
223 | basic-block-edge sub relationship
224 | 	relates from-basic-block
225 | 	relates to-basic-block;
226 | 
227 | has-instruction sub relationship
228 |     relates contains-instruction
229 |     relates in-basic-block;
230 | 
231 | instruction-has-operation sub relationship
232 | 	relates contains-operation
233 | 	relates in-instruction;
234 | 
235 | operation-has-operation sub relationship
236 | 	relates contains-operation
237 | 	relates in-operation;
238 | 
239 | node-link sub relationship
240 | 	relates from-node
241 | 	relates to-node;
242 | 
243 | trace-link sub relationship
244 | 	relates trace;
245 | 
246 | trace-instruction sub relationship
247 | 	relates trace;
248 | 
249 | trace-full sub relationship
250 | 	relates trace;
251 | 
252 | ## INFERRENCE RULES #############################
253 | share-var sub rule
254 | when {
255 | 	$v1 isa variable-ssa, has var $var;
256 | 	$v2 isa variable-ssa, has var $var;
257 | 	$v1 != $v2;
258 | },
259 | then {
260 | 	(trace:$v1, trace:$v2) isa trace-link;
261 | };
262 | 
263 | share-instruction sub rule
264 | when {
265 | 	$v1 isa variable-ssa;
266 | 	$v2 isa MLIL_VAR_SSA;
267 | 	(to-node:$v1, from-node:$v2);
268 | 	$v4 isa variable-ssa;
269 | 	$v3 isa MLIL_VAR_SSA;
270 | 	(to-node:$v4, from-node:$v3);
271 | 	(from-node:$inst, to-node:$v2);
272 | 	(from-node:$inst, to-node:$v3);
273 | 	$inst isa instruction;
274 | 	$v1 != $v2;$v2 != $v3;$v3 != $v4;$v1 != $v4;$v2 != $v4;$v1 != $v3;
275 | },
276 | then {
277 | 	(trace:$v1, trace:$v4) isa trace-instruction;
278 | };
279 | 
280 | trace-goal sub rule
281 | when {
282 | 	(trace:$v1,trace:$v2) isa trace-link;
283 | 	(trace:$v2,trace:$v3) isa trace-instruction;
284 | 	$v1 != $v2;$v2 != $v3;$v1 != $v3;
285 | },
286 | then {
287 | 	(trace:$v1, trace:$v3) isa trace-full;
288 | };   
289 |  
290 | 


--------------------------------------------------------------------------------
/templates/binja_mlil_ssa_1.tpl:
--------------------------------------------------------------------------------
 1 | ## Grakn JSON migration template for binja_mlil_ssa.gql : inserts functions
 2 | 
 3 | ## Loop over all functions in the binary
 4 | for(<functions>) do {
 5 | 	insert
 6 |     $f isa function
 7 |         has func-name <func_name>
 8 |         has asm-address <asm_addr>;
 9 | }
10 | 


--------------------------------------------------------------------------------
/templates/binja_mlil_ssa_2.tpl:
--------------------------------------------------------------------------------
 1 | ## Grakn JSON migration template for binja_mlil_ssa.gql : inserts basic-blocks
 2 | 
 3 | ## Loop over all functions in the binary
 4 | for(<functions>) do {
 5 |     match
 6 |     $f isa function
 7 |         has func-name <func_name>
 8 |         has asm-address <asm_addr>;
 9 |         
10 |     ## Loop over all basic-blocks in this function and link basic-blocks to the function they are in
11 |     insert
12 |     for(<basic_blocks>) do {
13 |         $<bb_name> isa basic-block
14 |             has bb-name <bb_name>
15 |             has bb-start <bb_start>
16 |             has bb-end <bb_end>;
17 |         (contains-basic-block: $<bb_name>, in-function: $f) isa has-basic-block;
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/templates/binja_mlil_ssa_3.tpl:
--------------------------------------------------------------------------------
 1 | ## Grakn JSON migration template for binja_mlil_ssa.gql : links basic-blocks
 2 | 
 3 | ## Loop over all functions in the binary
 4 | for(<functions>) do {
 5 |             
 6 |     ## Now loop over bb-edges and link the source and target basic-blocks in this function
 7 |     for(<bb_edges>) do {
 8 |         match 
 9 |         $<source> isa basic-block
10 |             has bb-name <source>;
11 |         $<target> isa basic-block
12 |             has bb-name <target>;   
13 |         
14 |         insert
15 |         (from-basic-block: $<source>, to-basic-block: $<target>) isa basic-block-edge;
16 |     }
17 | }
18 | 


--------------------------------------------------------------------------------
/templates/binja_mlil_ssa_4.tpl:
--------------------------------------------------------------------------------
 1 | ## Grakn JSON migration template for binja_mlil_ssa.gql : inserts instructions
 2 | 
 3 | ## Loop over all functions in the binary
 4 | for(<functions>) do {
 5 | 
 6 |     ## Loop over all basic-blocks in this function and link basic-blocks to the function they are in 
 7 |     for(<basic_blocks>) do {
 8 |         
 9 |         ## Loop over all instructions in this basic-block, add them, and link them to their basic-block
10 |         for(<instructions>) do {
11 |             insert
12 |             $ins isa instruction
13 |                 has name <name>
14 |                 has il-index <il_index>
15 |                 has asm-address <asm_address>
16 |                 has operation-type <operation_type>;
17 |         }
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/templates/binja_mlil_ssa_5.tpl:
--------------------------------------------------------------------------------
 1 | ## Grakn JSON migration template for binja_mlil_ssa.gql : link instructions to their basic-blocks
 2 | 
 3 | ## Loop over all functions in the binary
 4 | for(<functions>) do {
 5 | 
 6 |     ## Loop over all basic-blocks in this function and link basic-blocks to the function they are in
 7 |     for(<basic_blocks>) do {
 8 | 
 9 |         ## Loop over all instructions in this basic-block, add them, and link them to their basic-block
10 |         ## in_bb is a resource of 'instruction' that helps locate a basic-block by it's hash name
11 |         for(<instructions>) do {
12 |             match
13 | 
14 |             $bb isa basic-block
15 |                 has bb-name <in_bb>;
16 | 
17 |             $ins isa instruction
18 |                 has name <name>;
19 | 
20 |             insert
21 |             (contains-instruction: $ins, in-basic-block: $bb) isa has-instruction;
22 |         }
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/templates/binja_mlil_ssa_6.tpl:
--------------------------------------------------------------------------------
 1 | ## Grakn JSON migration template for binja_mlil_ssa.gql : inserts instruction nodes (AST nodes)
 2 | 
 3 | ## Loop over all functions in the binary
 4 | for(<functions>) do {
 5 | 
 6 |     ## Loop over all basic-blocks in this function and link basic-blocks to the function they are in
 7 |     for(<basic_blocks>) do {
 8 | 
 9 |         ## Loop over all instructions in this basic-block, add them, and link them to their basic-block
10 |         for(<instructions>) do {
11 |             
12 |             ## Loop over all nodes in this instruction and add them
13 |             for(<nodes>) do {
14 |                 insert
15 |                 ## list nodes
16 |                 if (@equals(<node_type>, "list")) do {
17 |                     $<name> isa <node_type>
18 |                         has name <name>
19 |                         has parent-hash <parent_hash>
20 |                         has edge-label <edge_label>
21 |                         has list-size <list_size>;
22 |                 }
23 | 
24 |                 ## constant nodes
25 |                 elseif (@equals(<node_type>, "constant")) do {
26 |                     $<name> isa <node_type>
27 |                         has name <name>
28 |                         has parent-hash <parent_hash>
29 |                         has edge-label <edge_label>
30 |                         has constant-value <constant_value>;
31 |                 }
32 | 
33 |                 ## variable-ssa nodes
34 |                 elseif (@equals(<node_type>, "variable-ssa")) do {
35 |                     $<name> isa <node_type>
36 |                         has name <name>
37 |                         has parent-hash <parent_hash>
38 |                         has edge-label <edge_label>
39 |                         has var <var>
40 |                         has version <version>
41 |                         has var-type <var_type>
42 |                         has var-size <var_size>
43 |                         has var-func <var_func>;
44 |                 }
45 | 
46 |                 ## variable nodes
47 |                 elseif (@equals(<node_type>, "variable")) do {
48 |                     $<name> isa <node_type>
49 |                         has name <name>
50 |                         has parent-hash <parent_hash>
51 |                         has edge-label <edge_label>
52 |                         has var <var>
53 |                         has var-type <var_type>
54 |                         has var-size <var_size>
55 |                         has var-func <var_func>;
56 |                 }
57 | 
58 |                 ## all other nodes (operations)
59 |                 else {
60 |                     $<name> isa <node_type>
61 |                         has name <name>
62 |                         has parent-hash <parent_hash>
63 |                         has edge-label <edge_label>;
64 |                 }
65 |             }
66 |         }
67 |     }
68 | }
69 | 


--------------------------------------------------------------------------------
/templates/binja_mlil_ssa_7.tpl:
--------------------------------------------------------------------------------
 1 | ## Grakn JSON migration template for binja_mlil_ssa.gql : links instruction nodes (AST nodes)
 2 | 
 3 | ## Loop over all functions in the binary
 4 | for(<functions>) do {
 5 | 
 6 |     ## Loop over all basic-blocks in this function and link basic-blocks to the function they are in
 7 |     for(<basic_blocks>) do {
 8 | 
 9 |         ## Loop over all instructions in this basic-block, add them, and link them to their basic-block
10 |         for(<instructions>) do {
11 | 
12 |             ## Loop over all nodes in this instruction and add them
13 |             for(<nodes>) do {
14 |                 match
15 |                 $<parent_hash> isa entity
16 |                     has name <parent_hash>;
17 |                 $<name> isa entity
18 |                     has name <name>;
19 |                 
20 |                 insert
21 |                 (from-node: $<parent_hash>, to-node: $<name>) isa node-link;
22 |             }
23 |         }
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------