├── .gitignore ├── LICENSE ├── Makefile ├── NOTICE ├── README.md ├── ThreatDragonModels └── Macke threats │ └── Macke threats.json ├── config.ini ├── examples ├── chain.c ├── divisible.c ├── doomcircle.c ├── factorial.c ├── justmain.c ├── main.c ├── not42.c ├── sanatized.c ├── simple.assert.err ├── small.c └── split.c ├── leagacy ├── README.md ├── analyze_klee_test.py ├── batch_generate_unit.py ├── batch_klee.py ├── batch_separate_units.py ├── branch_analyzer.py ├── c_keywords.py ├── callee.py ├── cindex-dump.py ├── compose_units.py ├── copy_main_unit.py ├── coreutils_coverage_temp.py ├── coreutils_temp.py ├── create_call_graph.py ├── generate_separate_unit.py ├── generate_unit.py ├── html_artifacts.py ├── instrument_branches.py ├── log_branch.py ├── read_istats.py ├── read_ktest.py ├── remove_global.py ├── second_klee_round.py ├── source_coverage.py └── source_coverage_main.py ├── macke ├── Asan.py ├── CallGraph.py ├── Error.py ├── ErrorChain.py ├── ErrorRegistry.py ├── Fuzzer.py ├── Klee.py ├── Logger.py ├── Macke.py ├── StackTrace.py ├── __init__.py ├── __main__.py ├── analyse │ ├── __init__.py │ ├── aflabort.py │ ├── chains.py │ ├── everything.py │ ├── functions.py │ ├── helper.py │ ├── kleecrash.py │ ├── linecoverage.py │ ├── partial.py │ ├── plot-coverage.py │ ├── runtime.py │ └── vulninsts.py ├── callgrind.py ├── cgroups.py ├── config.py ├── constants.py ├── llvm_wrapper.py ├── read_klee_testcases.py ├── run_istats.py └── threads.py ├── reporting_tool ├── css │ └── style.css ├── html │ ├── dfaexec.html │ └── dfaexec_test000001.html ├── js │ └── graph.js ├── json │ ├── flex.json │ ├── flex1.json │ ├── flex2.json │ ├── grep.json │ ├── grep1.json │ └── grep2.json ├── vulnerabilityGraphTool.html └── xepOnline.jqPlugin.js ├── requirements.txt ├── setup.py ├── shamrock ├── __init__.py ├── __main__.py └── shamrock.py └── tests ├── __init__.py ├── test_CallGraph.py ├── test_Config.py ├── test_ErrorChain.py ├── test_ErrorRegistry.py ├── test_Macke.py ├── test_Macke_PhaseOne.py ├── test_Macke_PhaseTwo.py ├── test_Macke_WithMain.py ├── test_dummy.py └── test_llvm_wrapper.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # Distribution / packaging 7 | .Python 8 | env/ 9 | build/ 10 | develop-eggs/ 11 | dist/ 12 | downloads/ 13 | eggs/ 14 | .eggs/ 15 | lib/ 16 | lib64/ 17 | parts/ 18 | sdist/ 19 | var/ 20 | *.egg-info/ 21 | .installed.cfg 22 | *.egg 23 | 24 | # Virtual environment 25 | .venv 26 | 27 | # IDE project files 28 | .idea/ 29 | 30 | # LLVM stuff 31 | *.bc 32 | *.ll 33 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CLANG ?= $$HOME/build/llvm/Release/bin/clang 2 | 3 | .PHONY: all 4 | all: help 5 | 6 | .PHONY: help 7 | help: 8 | @ echo "Please give a target. Choices are:" 9 | @ echo " test -> run all unittests" 10 | @ echo " dev -> installs this project in local virtual environment" 11 | @ echo " venv -> update (and if needed initialize) the virual environment" 12 | 13 | .PHONY: dev 14 | dev: venv 15 | .venv/bin/python3 -m pip install --editable . 16 | @ echo "Everything is set up for development." 17 | @ echo "Please switch with 'source .venv/bin/activate'" 18 | 19 | .PHONY: test 20 | test: venv examples/chain.bc examples/divisible.bc examples/doomcircle.bc \ 21 | examples/factorial.bc examples/justmain.bc examples/main.bc \ 22 | examples/not42.bc examples/sanatized.bc examples/small.bc \ 23 | examples/split.bc 24 | .venv/bin/python -m unittest 25 | 26 | # Initialize the virtual environment, if needed 27 | .venv: 28 | python3 -m venv .venv --without-pip --system-site-packages 29 | .venv/bin/python3 -m pip install --upgrade pip 30 | 31 | # Install and keep in sync with the requirements 32 | .venv/bin/activate: requirements.txt .venv 33 | .venv/bin/python3 -m pip install -Ur requirements.txt 34 | touch .venv/bin/activate 35 | 36 | .PHONY: venv 37 | venv: .venv/bin/activate 38 | 39 | .PHONY: clean 40 | clean: 41 | @ rm -rf .venv 42 | @ rm -rf macke.egg-info 43 | @ rm -rf macke/__pycache__ 44 | @ rm -rf tests/__pycache__ 45 | @ rm -f examples/*.bc 46 | 47 | examples/%.bc: examples/%.c 48 | $(CLANG) -c -emit-llvm -O0 -g $^ -o $@ 49 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright 2018 Saahil Ognawala 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MACKE - `M`odular `a`nd `C`ompositional Analysis with `K`LEE (and AFL) `E`ngine 2 | 3 | MACKE is a wrapper around [AFL](http://lcamtuf.coredump.cx/afl/) and [KLEE](https://klee.github.io/), that decomposes the analyzed programs into several smaller units, analyze these seperately and finally merge all found errors to one interactive report. Please read the [MACKE-paper](https://www.researchgate.net/publication/305641321_MACKE_-_Compositional_Analysis_of_Low-Level_Vulnerabilities_with_Symbolic_Execution) for more details. 4 | 5 | ## Installation guide 6 | 7 | ### Requirements 8 | * Python 3.4+ 9 | * A system able to run LLVM. See [official requirements for LLVM](http://www.llvm.org/docs/GettingStarted.html#requirements) 10 | * AFL See AFL's [Quickstart guide](http://lcamtuf.coredump.cx/afl/QuickStartGuide.txt) 11 | 12 | ### Step 1: LLVM and KLEE with targeted search 13 | Building KLEE can be a complicated task and there are multiple strategies for it. We suggest the setup described in our [Step-by-Step manual](https://github.com/hutoTUM/install-klee). But whatever way you choose, MACKE needs a special search mode, that is not part of the official KLEE code, yet. We aim to merge it into KLEE someday, but till then, you need to use [our fork of KLEE](https://github.com/tum-i22/klee22). 14 | 15 | For our step-by-step manual, this means, that you have to adapt one command. Instead of: 16 | ``` 17 | git clone --depth 1 --branch v1.3.0 https://github.com/klee/klee.git 18 | ``` 19 | in [Step 6](https://github.com/hutoTUM/install-klee#step-6-klee), you must use: 20 | ``` 21 | git clone --depth 1 https://github.com/tum-i22/klee22.git 22 | ``` 23 | 24 | In addition to the above, you also need to install LLVM 6.0 if you want the ability to fuzz in phase 1 of Macke. 25 | 26 | For our step-by-step manual, this means that you must **repeat** [Step 1](https://github.com/tum-i22/klee-install#step-1-llvm) for LLVM 6.0, i.e. replace ``RELEASE_342`` with ``RELEASE_600`` in all links. 27 | 28 | ### Step 2: Building the macke-llvm-opt passes 29 | MACKE performs several modifications on LLVM bitcode level. Doing this inside python requires a lot more effort, than directly writing the operations in C++ - especially if you are forced to use the same, old version of LLVM as KLEE uses. Therefore, we decide to seperate all low level operations into two other repositories - [one for LLVM 3.4 for KLEE-related stuff](https://github.com/hutoTUM/macke-opt-llvm) and [another one for LLVM 6.0 for AFL-related stuff](https://github.com/tum-i22/macke-fuzzer-opt-llvm). 30 | 31 | If you choose a different directory structure than suggested in our Step-by-Step manual, please adapt the pathes to match your needs. 32 | 33 | ``` 34 | git clone --depth 1 https://github.com/tum-i22/macke-opt-llvm 35 | cd macke-opt-llvm 36 | make LLVM_SRC_PATH=~/build/llvm3.4/ KLEE_BUILDDIR=~/build/klee/Release+Asserts KLEE_INCLUDES=~/build/klee/include/ 37 | ``` 38 | 39 | Now repeat the above step for macke-fuzzer-opt-llvm 40 | ``` 41 | git clone --depth 1 https://github.com/tum-i22/macke-fuzzer-opt-llvm 42 | cd macke-fuzzer-opt-llvm 43 | make LLVM_SRC_PATH=~/build/llvm6.0/ KLEE_BUILDDIR=~/build/klee/Release+Asserts KLEE_INCLUDES=~/build/klee/include/ 44 | ``` 45 | 46 | ### Step 3: Building MACKE 47 | We are done with the dependencies - now to the main project. 48 | ``` 49 | # You might have to change the branch in repository below, depending on the version you want to build 50 | git clone --depth 1 https://github.com/tum-i22/macke 51 | cd macke 52 | make dev 53 | ``` 54 | 55 | ### Step 4: Running MACKE 56 | Before you can actually start using MACKE, you have to modify the `./config.ini` with your favorite text editor. Please adapt the pathes there to the directories, you have created earlier in this guide. 57 | 58 | First switch your virtual environment to Macke 59 | 60 | ``` 61 | source .venv/bin/activate # Note: just needed once per open shell 62 | ``` 63 | 64 | If you want to analyze the isolated functions with symbolic execution then run the following: 65 | ``` 66 | macke 2beAnalyzed.bc 67 | ``` 68 | 69 | Otherwise if you want to analyze the isolated functions with fuzzing (AFL) then run the following: 70 | ``` 71 | macke --use-fuzzer=1 --fuzz-bc=2beAnalyzedCompiledWithClang6.0.bc 2beAnalyzed.bc 72 | ``` 73 | 74 | Finally, if you want to analyze the isolated functions with flipper (switch between KLEE and AFL, whenever one of them saturates) then run the following: 75 | ``` 76 | macke --flipper=1 --fuzz-bc=2beAnalyzedCompiledWithClang6.0.bc 2beAnalyzed.bc 77 | ``` 78 | 79 | We wish you happy testing! If you have problems converting existing source code bases to bitcode files, you should have a look at this [tool for improving make](https://github.com/tum-i22/MakeAdditions). 80 | 81 | ## Troubleshooting 82 | 83 | ### Getting around the issue of cgroups 84 | Linux control groups, or *Cgroups* in short, are a kernel feature that allows user space processes to have limited (and exclusive) access to certain system resources, such as CPU. We leverage cgroups to isolate parallel fuzzing processes so that they don't interfere. 85 | 86 | Therefore, before using Macke, you need to create these cgroups partitions. There are two alternative ways to do this. 87 | 88 | __Alternative 1: Using Macke__ 89 | 90 | 1. Change as root 91 | ``` 92 | sudo -s 93 | ``` 94 | 95 | 2. Load macke (with source command) virtual environment and create cgroups 96 | ``` 97 | macke --initialize-cgroups --cgroup-name=: 98 | ``` 99 | 100 | If the warning about limiting swap memory shows up, then run the following 101 | ``` 102 | macke --initialize-cgroups --cgroup-name=: --ignore-swap 103 | ``` 104 | and in all subsequent macke commands, also add --ignore-swap 105 | 106 | __Alternative 2: Manually on command-line__: 107 | 108 | 1. Run cgroups command. 109 | If your CPU has *x* cores, then run the following command *x* times replacing the index *i* with the CPU number, i.e. *mackefuzzer_1*, *mackefuzzer_2*... *mackefuzzer_x*. 110 | 111 | ``` 112 | cgcreate -s 775 -d 775 -f 775 -a -t -g memory: mackefuzzer_ 113 | ``` 114 | 115 | You almost certainly might need to run this command as root or with sudo. 116 | 117 | Then, in all subsequent macke commands, also add --ignore-swap, if your operating system does not allow partitioning swap memory resource. 118 | 119 | ## Author's note 120 | For current issues, suggestions, datasets and gratitude please email [me](mailto:saahil.ognawala@tum.de). 121 | Big thanks to [HuTo](t.hutzelmann@tum.de) and [Fabian Kilger](fabian.kilger@tum.de) for much of the development effort. 122 | 123 | > [Saahil Ognawala](https://www22.in.tum.de/en/ognawala/) 124 | -------------------------------------------------------------------------------- /ThreatDragonModels/Macke threats/Macke threats.json: -------------------------------------------------------------------------------- 1 | { 2 | "summary": { 3 | "title": "Macke threats", 4 | "owner": "saahil" 5 | }, 6 | "detail": { 7 | "contributors": [], 8 | "diagrams": [ 9 | { 10 | "title": "first-model", 11 | "thumbnail": "./public/content/images/thumbnail.jpg", 12 | "id": 0 13 | } 14 | ], 15 | "reviewer": "saahil" 16 | } 17 | } -------------------------------------------------------------------------------- /config.ini: -------------------------------------------------------------------------------- 1 | ######################################## 2 | # !!! Important notice for all git-users !!! 3 | # Please do not commit your local changes of this file to the repo 4 | # There is a git command for handling config files like this: 5 | # 6 | # git update-index --skip-worktree config.ini 7 | # 8 | # P.S. This can be undone with: 9 | # git update-index --no-skip-worktree config.ini 10 | ######################################## 11 | 12 | [binaries] 13 | 14 | # The binary of the shared library with all llvm opt passes needed by macke 15 | # See dependencies section in README.md to get more information 16 | # e.g. "/home/user/git/macke-opt-llvm/bin/libMackeOpt.so" (without "") 17 | libmackeopt: /home/ognawala/macke-opt-llvm/bin/libMackeOpt.so 18 | 19 | # The binary of llvm opt, e.g. "/usr/bin/opt" (without "") 20 | llvmopt: /home/ognawala/build/llvm-3.4/Release/bin/opt 21 | 22 | # The binary of klee, e.g. "/usr/bin/klee" (without "") 23 | # Warning: the KLEE version must include targeted search, which is missing 24 | # in the vanilla version of KLEE. Again, see dependencies section in README.md 25 | klee: /home/ognawala/build/klee/Release+Asserts/bin/klee 26 | 27 | 28 | # The binary of llvm-config, e.g. "/usr/bin/llvm-config" (without "") 29 | llvm-config: /usr/bin/llvm-config 30 | 31 | # The binary of the shared library with all llvm opt passes needed by the macke fuzzer 32 | # See dependencies section in README.md to get more information 33 | # e.g. "/home/user/git/macke-fuzzer-opt-llvm/bin/libMackeFuzzerOpt.so" (without "") 34 | libmackefuzzopt: /home/kilger/thesis/opt/ 35 | 36 | # The path, where the afl files can be found 37 | afl-bin: /usr/local/bin/ 38 | afl-lib: /usr/local/lib/afl/ 39 | 40 | # The valgrind binary for coverage analysis 41 | valgrind: /usr/bin/valgrind 42 | 43 | [runtime] 44 | 45 | # Number of threads used for parallel executation. Without any information 46 | # the number of cpu threads of the current machine is used 47 | threadnum: 1 48 | -------------------------------------------------------------------------------- /examples/chain.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | void c4(int i) { 5 | assert(i != 42); 6 | } 7 | 8 | void c3(int i) { 9 | c4(i * 7); 10 | } 11 | 12 | void c2(int i) { 13 | c3(i * 3); 14 | } 15 | 16 | void c1(int i) { 17 | c2(i * 2); 18 | } 19 | 20 | int main(int argc, char** argv) { 21 | if (argc != 2) { 22 | return -1; 23 | } 24 | int i = atoi(argv[1]); 25 | 26 | c1(i); 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /examples/divisible.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | __attribute__((noinline)) int divby2(int i) { 4 | return (i & 1) == 0; 5 | } 6 | 7 | __attribute__((noinline)) int divby3(int i) { 8 | return i % 3 == 0; 9 | } 10 | 11 | __attribute__((noinline)) int divby5(int i) { 12 | return i % 5 == 0; 13 | } 14 | 15 | __attribute__((noinline)) int divby6(int i) { 16 | return divby2(i) && divby3(i); 17 | } 18 | 19 | __attribute__((noinline)) int divby10(int i) { 20 | return divby2(i) && divby5(i); 21 | } 22 | 23 | __attribute__((noinline)) int divby30(int i) { 24 | return divby3(i) && divby10(i); 25 | } 26 | 27 | int main(int argc, char **argv) { 28 | int n = 42; 29 | 30 | if (argc == 2) { 31 | n = atoi(argv[1]); 32 | } 33 | 34 | return (n < 1000) ? divby6(n) : divby30(n); 35 | } 36 | -------------------------------------------------------------------------------- /examples/doomcircle.c: -------------------------------------------------------------------------------- 1 | __attribute__((noinline)) int a(int n); 2 | __attribute__((noinline)) int b(int n); 3 | __attribute__((noinline)) int c(int n); 4 | 5 | int a(int n) { 6 | return b(n + 1); 7 | } 8 | 9 | int b(int n) { 10 | if (n > 10) { 11 | return n; 12 | } else { 13 | return c(n + 1); 14 | } 15 | } 16 | 17 | int c(int n) { 18 | return a(n + 1); 19 | } 20 | 21 | int main(int argc, char** argv) { 22 | return a(argc); 23 | } 24 | -------------------------------------------------------------------------------- /examples/factorial.c: -------------------------------------------------------------------------------- 1 | 2 | unsigned int fac(unsigned int n) { 3 | return (n > 1) ? n * fac(n - 1) : 1; 4 | } 5 | -------------------------------------------------------------------------------- /examples/justmain.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(int argc, char** argv) { 4 | 5 | if (argc == 2) { 6 | assert(argv[1][0] != 'a'); 7 | } 8 | 9 | return 0; 10 | } 11 | -------------------------------------------------------------------------------- /examples/main.c: -------------------------------------------------------------------------------- 1 | 2 | int main(int argc, char** argv) { 3 | return argv[0][0]; 4 | } 5 | -------------------------------------------------------------------------------- /examples/not42.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void not42(int i) { 4 | assert(i != 42); 5 | } 6 | -------------------------------------------------------------------------------- /examples/sanatized.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | void c4(int i) { 5 | assert(i != 42); 6 | } 7 | 8 | void c3(int i) { 9 | c4(i * 7); 10 | } 11 | 12 | void c2(int i) { 13 | if (i != 2) { 14 | c3(i * 3); 15 | } 16 | } 17 | 18 | void c1(int i) { 19 | c2(i * 2); 20 | } 21 | 22 | int main(int argc, char** argv) { 23 | if (argc != 2) { 24 | return -1; 25 | } 26 | int i = atoi(argv[1]); 27 | 28 | c1(i); 29 | return 0; 30 | } 31 | -------------------------------------------------------------------------------- /examples/simple.assert.err: -------------------------------------------------------------------------------- 1 | Error: ASSERTION FAIL: i != 42 2 | File: /some/path/file.c 3 | Line: 21 4 | assembly.ll line: 84 5 | Stack: 6 | #000000001 in func (i) at /some/path/file.c:5 7 | #100000001 in __user_main (=1, =0) 8 | -------------------------------------------------------------------------------- /examples/small.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int f2(int i) { 4 | assert(i != 2); 5 | return i; 6 | } 7 | 8 | int f3(int i) { 9 | assert(i % 3 == 0); 10 | assert(i % 5 == 0); 11 | return i; 12 | } 13 | 14 | int f1(int i) { 15 | if (i % 2 == 0) { 16 | return f2(i); 17 | } else { 18 | return f3(i); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /examples/split.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void bottom(int i) { 4 | assert(i < 42); 5 | } 6 | 7 | void left(int i) { 8 | bottom(i); 9 | } 10 | 11 | void right(int i) { 12 | bottom(i); 13 | } 14 | 15 | void top(int i) { 16 | if (i % 2 == 0) { 17 | left(i); 18 | } else { 19 | right(i); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /leagacy/README.md: -------------------------------------------------------------------------------- 1 | MACKE - Modular and Compositional Analysis with KLEE Engine 2 | === 3 | 4 | Author's note 5 | --- 6 | 7 | A short introduction and usage guide for MACKE is available as a [demo on YouTube](https://www.youtube.com/watch?v=icC3jc3mHEU). 8 | 9 | [Email me](mailto:ognawala@in.tum.de) if something is broken. Ditto if you would like to contribute! 10 | 11 | -- [Saahil Ognawala](https://www.i22.in.tum.de/index.php?id=31&L=1) 12 | 13 | Pre-requisites for installation 14 | --- 15 | Following are the *minimum* requirements before installing MACKE. 16 | 17 | - **Python 3.4+** 18 | - **KLEE** - We have prepared a [KLEE installation guide](https://github.com/hutoTUM/install-klee) that includes all the pre-requisites for KLEE, such as LLVM's Clang toolkit, uclibc and a SMT solver. 19 | - **Libclang** - MACKE requires Python wrappings for libclang's C/C++ parsing library to work correctly. A nice guide to parsing C++ in Python can be found at [Eli Bendersky's website](http://eli.thegreenplace.net/2011/07/03/parsing-c-in-python-with-clang), that includes some installation details. 20 | 21 | Please note that we installed MACKE and performed all our experiments on Ubuntu 14.04. Some dependencies may differ based on your system 22 | 23 | Description of included scripts 24 | --- 25 | + `analyze_klee_test.py` - Analyze KLEE .ptr.err files to analyze infected functions, their files and line numbers. 26 | + `batch_generate_unit.py` - Defunct. Has no purpose at the moment, except for Ngircd experiments. IGNORE! 27 | + `batch_klee.py` - Runs KLEE on the seperate unit tests (generated by `batch_generate_units.py`) and stores results in respective output folders for isolated functions. Ideally, should be run right after generating separate unit test files. 28 | + `batch_separate_units.py` - Runs `generate_seperate_unit.py` script in batch mode for all C files in the source directory. Ideally, the starting point of the full MACKE framework. 29 | + `branch_analyzer.py` - Analyzes static elements (condition, true-block, false-block) of an if-else block. 30 | + `c_keywords.py` - For playing around with libclang. Lists out keywords and lexical structures for C language sources. IGNORE! 31 | + `callee.py` - Analyzes caller-callee relationship between all functions (defined within the project source) and lists them. 32 | + `cindex-dump.py` - Dump whole abstract syntax tree of the project under test. Can be ignored, serves no practical purpose in MACKE. 33 | + `compose_units.py` - Performs the first step of compositional analysis. Compares test output files (.ptr.err and others) to get chain of affected functions. Gets a list of outlier functions and writes everything to composition.test. Usually doesn't need to be run separately. Essential functions called by `batch_klee.py`. 34 | + `copy_main_unit.py` - Copy main function into it's own "unit test". Usually doesn't need to be run separately. 35 | + `create_call_graph.py` - Generate [pydot]([https://pypi.python.org/pypi/pydot]) nodes and edges for interactive program call graph in the HTML output. 36 | + `generate_separate_unit.py` - Generates seperate unit test file for isolated functons. Outputs *.c.units in the parent source file's respective directory. 37 | + `generate_unit.py` - Old (most likely, broken) version of `generate_separate_unit.py`. IGNORE! 38 | + `html_artifacts.py`- Creates static HTML pages for the interactive report, list of bugs and pointer error details. 39 | + `instrument_branches.py`- For future work - recording branch coverage in source - for hybrid blackbox+whitebox fuzzing. Ignore for now! 40 | + `log_branch.py`- Future work - log traversed branches in source. Ignore for now! 41 | + `read_istats.py`- Parse run.istats files for all individual KLEE runs. 42 | + `read_ktest.py`- Parse ktest output (output from `ktest-tool`) from individual KLEE runs. 43 | + `remove_global.py` - Treats some particularities in files where functions are declared GLOBAL. 44 | + `second_klee_round.py` - For the second step of compositional analysis. Runs KLEE with the modified code (with `klee_assert` statements) for partial PC matching. 45 | + `source_coverage.py` - Calculate approximate source code coverage, from the information parsed from run.istats. 46 | + `source_coverage_main.py` - IGNORE! 47 | -------------------------------------------------------------------------------- /leagacy/analyze_klee_test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import glob 4 | from optparse import OptionParser, OptionGroup 5 | import re 6 | 7 | def find_matching_error(ptr_err_directory, culp_func, culp_line): 8 | if not ptr_err_directory.endswith('/'): 9 | ptr_err_directory = ptr_err_directory+'/' 10 | for ptr_err_filename in glob.glob(ptr_err_directory+'*.ptr.err'): 11 | ptr_err = open(ptr_err_filename, 'r') 12 | 13 | for line in ptr_err: 14 | if line.startswith('Stack:'): 15 | break 16 | 17 | if line=='': 18 | print('Something went wrong while reading the ptr.err file.') 19 | return False 20 | 21 | stack_lines = [] 22 | for line in ptr_err: 23 | if line.startswith('Info:'): 24 | break 25 | stack_lines.append(line) 26 | 27 | if line=='': 28 | print('Something went wrong while reading the ptr.err file.') 29 | return False 30 | 31 | for l in stack_lines: 32 | #print l.strip() 33 | if culp_func in l: #and (':'+str(culp_line)) in l: 34 | return True 35 | 36 | return False 37 | 38 | def get_culp_line(ptr_err_filename, culp_func): 39 | ptr_err = open(ptr_err_filename, 'r') 40 | 41 | for line in ptr_err: 42 | if line.startswith('Stack:'): 43 | break 44 | 45 | if line=='': 46 | print('Something went wrong while reading the ptr.err file.') 47 | return None 48 | 49 | stack_lines = [] 50 | for line in ptr_err: 51 | if line.startswith('Info:'): 52 | break 53 | stack_lines.append(line) 54 | 55 | if line=='': 56 | print('Something went wrong while reading the ptr.err file.') 57 | return None 58 | 59 | for l in stack_lines: 60 | if culp_func in l: 61 | culp_line = int(l.split(':')[-1].strip()) 62 | return culp_line 63 | 64 | print('The given function not found in the ptr.err file. Something went wrong.') 65 | return None 66 | 67 | def get_culp_func(err_filename): 68 | err_file = open(err_filename, 'r') 69 | 70 | for line in err_file: 71 | if line.startswith('Stack:'): 72 | break 73 | 74 | if line=='': 75 | print('Something went wrong while reading the ptr.err file.') 76 | return None 77 | 78 | stack_lines = [] 79 | for line in err_file: 80 | if line.startswith('Info:'): 81 | break 82 | stack_lines.append(line) 83 | 84 | if line=='': 85 | print('Something went wrong while reading the ptr.err file.') 86 | return None 87 | 88 | func_line = 0 89 | for i, l in enumerate(stack_lines): 90 | if '__user_main' in l: 91 | func_line = stack_lines[i-1] 92 | break 93 | 94 | #func_name = func_line.split('in')[1].split('(')[0].strip() 95 | func_name = func_line.strip().split()[2] 96 | return func_name 97 | 98 | def find_ptr_errs(klee_out_dir): 99 | err_funcs = {} 100 | 101 | if not klee_out_dir.endswith('/'): 102 | klee_out_dir = klee_out_dir+'/' 103 | # Read all the *.ptr.err files 104 | for f in glob.glob(klee_out_dir+'*.ptr.err'): 105 | #print 'Reading pointer error from file: ' + f 106 | culp_func = get_culp_func(f) 107 | if culp_func==None: 108 | print('Returning None') 109 | return None 110 | 111 | if culp_func not in err_funcs: 112 | err_funcs[culp_func] = [] 113 | err_funcs[culp_func].append((f, get_culp_line(f, culp_func))) 114 | 115 | return err_funcs 116 | 117 | -------------------------------------------------------------------------------- /leagacy/batch_generate_unit.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import glob 4 | from optparse import OptionParser, OptionGroup 5 | 6 | if __name__=='__main__': 7 | klee_command = 'klee --simplify-sym-indices --write-cvcs --write-cov --write-smt2 --output-module --max-memory=1000 --disable-inlining --optimize --use-forked-solver --use-cex-cache --libc=uclibc --posix-runtime --allow-external-sym-calls --only-output-states-covering-new --max-sym-array-size=4096 --max-instruction-time=60. --max-time=300. --watchdog --max-memory-inhibit=false --max-static-fork-pct=1 --max-static-solve-pct=1 --max-static-cpfork-pct=1 --switch-type=internal --randomize-fork --search=random-path --search=nurs:covnew --use-batching-search --batch-instructions=10000 ' 8 | klee_executable = ' ./ngircd ' 9 | klee_sym_args = '' #' --sym-args 0 1 10 --sym-args 0 2 2 --sym-files 1 8 ' 10 | decl_vars = [] 11 | func_nodes = [] 12 | 13 | inj_code = [] 14 | func_names = [] 15 | 16 | parser = OptionParser("usage: %prog -d {directory containing source files} -e {executable name}") 17 | parser.add_option('-d', '--dir', action='store', type='string', dest='dir', help='Source file directory path') 18 | parser.add_option('-e', '--exec', action='store', type='string', dest='executable', help='Name of executable generated by Makefile') 19 | 20 | (opts, args) = parser.parse_args() 21 | 22 | # pprint(('diags', map(get_diag_info, tu.diagnostics))) 23 | 24 | dir_name = opts.dir 25 | exec_name = opts.executable 26 | 27 | if not os.path.isdir(dir_name): 28 | print('Could not find the specified directory.\nExiting.') 29 | sys.exit(-1) 30 | 31 | if not dir_name.endswith('/'): 32 | dir_name = dir_name+'/' 33 | 34 | for f in glob.glob(dir_name+'src/*.c'): 35 | if not os.path.exists(dir_name+'src/ngircd/*.c.units.mod'): 36 | os.system('python generate_unit.py -f '+f+' -a') 37 | 38 | for f in glob.glob(dir_name+'src/ngircd/*.c.units.mod'): 39 | fl_name_noext = f[:-12] 40 | # user_response = raw_input('Do you want to run klee for '+fl_name_noext) 41 | if True: 42 | #if user_response == 'Y': 43 | os.system('mv '+fl_name_noext+'.c '+fl_name_noext+'.c.bkp') 44 | os.system('cp '+f+' '+fl_name_noext+'.c') 45 | os.system('make -C '+dir_name+' clean') 46 | os.system('make -C '+dir_name) 47 | 48 | os.system('llvm-link-3.4 -o '+dir_name+'src/ngircd/'+exec_name+' '+dir_name+'src/ngircd/*.o') 49 | os.system(klee_command + ' --output-dir='+fl_name_noext+'/ '+dir_name+'src/ngircd/'+exec_name+klee_sym_args) 50 | os.system('mv '+fl_name_noext+'.c '+fl_name_noext+'.c.units.finished') 51 | os.system('mv '+fl_name_noext+'.c.bkp '+fl_name_noext+'.c') 52 | -------------------------------------------------------------------------------- /leagacy/batch_klee.py: -------------------------------------------------------------------------------- 1 | import time 2 | import sys 3 | import os 4 | import glob 5 | from optparse import OptionParser, OptionGroup 6 | import re 7 | from source_coverage import source_coverage 8 | from compose_units import get_top_level_funcs, get_outlier_funcs 9 | from read_ktest import generate_assert_code, get_location_to_insert, modify_unit_files, get_lines_to_insert 10 | from second_klee_round import get_target_info 11 | 12 | if __name__=='__main__': 13 | klee_command = 'klee --simplify-sym-indices --write-cov --write-smt2s --output-module --max-memory=1000 --disable-inlining --optimize --use-forked-solver --use-cex-cache --libc=uclibc --posix-runtime --allow-external-sym-calls --only-output-states-covering-new -max-sym-array-size=4096 -max-instruction-time=%d. --max-time=%d. --watchdog --max-memory-inhibit=false --max-static-fork-pct=1 -max-static-solve-pct=1 --max-static-cpfork-pct=1 --switch-type=internal --randomize-fork --search=nurs:covnew --use-batching-search --batch-instructions=10000 '%(10, 120) 14 | klee_executable = ' ./bzip2 ' 15 | klee_sym_args = ' --sym-args 1 2 100 --sym-files 1 100' 16 | decl_vars = [] 17 | func_nodes = [] 18 | 19 | inj_code = [] 20 | func_names = [] 21 | 22 | sp_comps = [] 23 | 24 | parser = OptionParser("usage: %prog -d {directory containing source files} -e {executable name}") 25 | parser.add_option('-d', '--dir', action='store', type='string', dest='dir', help='Source file directory path') 26 | parser.add_option('-e', '--exec', action='store', type='string', dest='executable', help='Name of executable generated by Makefile') 27 | parser.add_option('-n', '--n-long', action='store', type='int', dest='n_long', help='Minimum length of error chain to be reported') 28 | parser.add_option('-x', '--no-klee', action='store_true', dest='no_klee', help='Do not run KLEE, but only run the compositional analysis on the ptr.err files') 29 | parser.add_option('-s', '--special-components', action='store', type = 'string', dest='special_components_filename', help='Name of the file containing list of special components, to be considered strictly for compositional analysis') 30 | parser.add_option('-a', '--generate-assertion-code', action='store_true', dest='assert_code_needed', help='Generate instrumentation code for assertion statements') 31 | (opts, args) = parser.parse_args() 32 | 33 | # pprint(('diags', map(get_diag_info, tu.diagnostics))) 34 | 35 | dir_name = opts.dir 36 | exec_name = opts.executable 37 | no_klee = opts.no_klee 38 | assert_code_needed = opts.assert_code_needed 39 | if opts.special_components_filename: 40 | special_components_filename = opts.special_components_filename 41 | else: 42 | special_components_filename = '' 43 | 44 | if opts.n_long: 45 | n_long = opts.n_long 46 | else: 47 | n_long = 2 48 | 49 | if not special_components_filename=='': 50 | if not os.path.isfile(special_components_filename): 51 | print('The file containing special components does not exist.\nExiting') 52 | sys.exit(-1) 53 | sp_comps_file = open(special_components_filename, 'r') 54 | for line in sp_comps_file: 55 | sp_comps.append(line.strip()) 56 | 57 | if not os.path.isdir(dir_name): 58 | print('Could not find the specified directory.\nExiting.') 59 | sys.exit(-1) 60 | 61 | if not dir_name.endswith('/'): 62 | dir_name = dir_name+'/' 63 | 64 | uncompiled_files = open(dir_name + 'incomplete.units', 'w') 65 | if not no_klee: 66 | for ud in glob.glob(dir_name+'*_units/'): 67 | main_pattern = dir_name+'(.*)_units/' 68 | main_match = re.search(main_pattern, ud) 69 | main_name = main_match.group(1) 70 | for f in glob.glob(ud+'*_*.c.units'): 71 | re_pattern = ud+'(.*)_(.*)\.c\.units' 72 | re_match = re.search(re_pattern, f) 73 | #main_file = re_match.group(1) 74 | func_name = re_match.group(2) 75 | 76 | if not os.path.exists(dir_name+main_name+'.c.bkp'): 77 | os.system('cp '+dir_name+main_name+'.c ' + dir_name+main_name+'.c.bkp') 78 | os.system('cp '+f+' '+dir_name+main_name+'.c') 79 | 80 | os.system('make -C '+dir_name+' clean') 81 | make_ret = os.system('make -C '+dir_name+'../') 82 | 83 | if not make_ret==0: 84 | uncompiled_files.write(f + '\n') 85 | print('Make error\nContinuing with the next unit file...\n') 86 | time.sleep(3) 87 | #user_input = raw_input('Make error') 88 | continue 89 | 90 | # link_return = os.system('llvm-link-3.4 -o '+dir_name+exec_name+' '+dir_name+'*.bc') 91 | 92 | #if not link_return==0: 93 | # user_input = raw_input('Linking error') 94 | # continue 95 | 96 | 97 | os.system(klee_command + '--output-dir=' + ud + main_name + '_' + func_name + '/ ' + dir_name+main_name + ' ' + klee_sym_args) 98 | os.system('mv ' + dir_name + main_name + '.c.bkp ' + dir_name + main_name + '.c') 99 | 100 | uncompiled_files.close() 101 | 102 | tot_cov = 0 103 | tot_seen = 0 104 | for c_filename in glob.glob(dir_name + '*.c'): 105 | cov, seen = source_coverage(c_filename) 106 | tot_cov += len(cov) 107 | tot_seen += len(seen) 108 | 109 | coverage = float(tot_cov)/tot_seen 110 | src_cov_file = open(dir_name + 'src.cov', 'w+') 111 | src_cov_file.write(str(coverage)) 112 | src_cov_file.close() 113 | 114 | composition_file = open(dir_name+'composition.test', 'w+') 115 | all_funcs = [] 116 | affected_funcs = [] 117 | unaffected_funcs = [] 118 | for c_filename in glob.glob(dir_name + '*.c'): 119 | re_pattern = dir_name + '(.*).c' 120 | re_match = re.search(re_pattern, c_filename) 121 | main_name = re_match.group(1) 122 | 123 | for unit_test in glob.glob(dir_name+main_name+'_units/*.c.units'): 124 | unit_file_name = os.path.splitext(os.path.basename(unit_test))[0][:-2] 125 | func_name = unit_file_name.split('_', 1)[1] 126 | affected_parent_funcs, unaffected_parent_funcs = get_top_level_funcs(c_filename, func_name) 127 | if len(affected_parent_funcs)>=n_long or func_name in sp_comps: 128 | affected_funcs.extend(affected_parent_funcs) 129 | composition_file.write(func_name+'\n') 130 | composition_file.write(str(affected_parent_funcs)+'\n\n') 131 | if len(affected_parent_funcs)>0: 132 | all_funcs.extend(affected_parent_funcs) 133 | unaffected_funcs.extend(unaffected_parent_funcs) 134 | sorted_unaffected_funcs = [] 135 | for uf in unaffected_funcs: 136 | if uf not in sorted_unaffected_funcs: 137 | sorted_unaffected_funcs.append(uf) 138 | unaffected_funcs = sorted_unaffected_funcs 139 | 140 | outliers = get_outlier_funcs(all_funcs) 141 | for o in outliers: 142 | if o not in affected_funcs: 143 | composition_file.write(o[1]+'\n') 144 | composition_file.write(str(o)+'\n\n') 145 | 146 | # Generate target information to use for targeted path search in KLEE 147 | target_info_file = open(dir_name + 'target.info', 'w+') 148 | for uf in unaffected_funcs: 149 | caller_file = uf[1] 150 | callee_lines = get_lines_to_insert([(uf[1], uf[2])], uf[0]) 151 | if callee_lines: 152 | callee_line = callee_lines[0][2] 153 | target_info_file.write(caller_file + '\n' + str(callee_line+1) + '\n' + uf[2] + '\n\n') 154 | target_info_file.close() 155 | 156 | target_info = get_target_info(dir_name) 157 | 158 | # Generate assertion code that must be inserted manually into parent functions 159 | if assert_code_needed: 160 | for ktest_folder in glob.glob(dir_name + '*_units/*/'): 161 | memcpy_stmt, buffer_decl_stmt, comparison_stmt = generate_assert_code(ktest_folder) 162 | if not buffer_decl_stmt=='': 163 | assertion_code_file = open(os.path.split(os.path.abspath(ktest_folder))[0]+'/'+os.path.split(os.path.abspath(ktest_folder))[1]+'.assertion', 'w+') 164 | assertion_code_file.write(memcpy_stmt) 165 | assertion_code_file.write(buffer_decl_stmt) 166 | assertion_code_file.write(comparison_stmt) 167 | assertion_code_file.close() 168 | ### MAJOR CHANGE: Switching off automatic instrumentation for now ### 169 | ''' 170 | locations_to_insert = get_location_to_insert(ktest_folder) 171 | modify_unit_files(locations_to_insert, (memcpy_stmt, buffer_decltarget_info = get_target_, comparison_stmt)) 172 | ''' 173 | 174 | -------------------------------------------------------------------------------- /leagacy/batch_separate_units.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import glob 4 | from optparse import OptionParser, OptionGroup 5 | import re 6 | 7 | def invert_dict(orig): 8 | inverted = {} 9 | for key in list(orig.keys()): 10 | for v in orig[key]: 11 | if v not in inverted: 12 | inverted[v] = [] 13 | inverted[v].append(key) 14 | 15 | return inverted 16 | 17 | if __name__=='__main__': 18 | klee_command = 'klee --simplify-sym-indices --write-cvcs --write-cov --output-module --max-memory=1000 --disable-inlining --optimize --use-forked-solver --use-cex-cache --libc=uclibc --posix-runtime --allow-external-sym-calls --only-output-states-covering-new --max-sym-array-size=4096 --max-instruction-time=60. --max-time=3600. --watchdog --max-memory-inhibit=false --max-static-fork-pct=1 --max-static-solve-pct=1 --max-static-cpfork-pct=1 --switch-type=internal --randomize-fork --search=random-path --search=nurs:covnew --use-batching-search --batch-instructions=10000 ' 19 | klee_executable = ' ./ngircd ' 20 | klee_sym_args = '' #' --sym-args 0 1 10 --sym-args 0 2 2 --sym-files 1 8 ' 21 | decl_vars = [] 22 | func_nodes = [] 23 | 24 | inj_code = [] 25 | func_names = [] 26 | 27 | parser = OptionParser("usage: %prog -d {directory containing source files} -e {executable name}") 28 | parser.add_option('-d', '--dir', action='store', type='string', dest='dir', help='Source file directory path') 29 | parser.add_option('-e', '--exec', action='store', type='string', dest='executable', help='Name of executable generated by Makefile') 30 | 31 | (opts, args) = parser.parse_args() 32 | 33 | # pprint(('diags', map(get_diag_info, tu.diagnostics))) 34 | 35 | dir_name = opts.dir 36 | exec_name = opts.executable 37 | 38 | if not os.path.isdir(dir_name): 39 | print('Could not find the specified directory.\nExiting.') 40 | sys.exit(-1) 41 | 42 | if not dir_name.endswith('/'): 43 | dir_name = dir_name+'/' 44 | 45 | caller_dict = {} 46 | 47 | for f in glob.glob(dir_name+'*.c'): 48 | if os.path.exists(f[:-2]+'_units'): 49 | print('Unit files for ' + f[:-2] + ' exists') 50 | continue 51 | print(f) 52 | os.system('python generate_separate_unit.py -f '+f+' -a') 53 | 54 | for f in glob.glob(dir_name+'*.c'): 55 | base_f = f[:-2] 56 | unit_name = os.path.splitext(os.path.basename(f))[0] 57 | if not os.path.isdir(base_f+'_units'): 58 | print('No unit test directory generated for ' + f) 59 | continue 60 | for callee in glob.glob(base_f+'_units/'+unit_name+'_*.c.callee'): 61 | callee_file = open(callee, 'r') 62 | 63 | # "If regular expressions is the solution to your problem, then you have two problems" - Some dude 64 | 65 | re_pattern = base_f+'_units/'+unit_name+'_(.*)\.c\.callee' 66 | re_match = re.search(re_pattern, callee) 67 | key = re_match.group(1) 68 | 69 | for line in callee_file: 70 | if not line.strip()=='': 71 | if key not in list(caller_dict.keys()): 72 | caller_dict[key] = [] 73 | caller_dict[key].append(line.strip()) 74 | 75 | inverted_caller_dict = invert_dict(caller_dict) 76 | 77 | for f in glob.glob(dir_name+'*.c'): 78 | base_f = f[:-2] 79 | unit_name = os.path.splitext(os.path.basename(f))[0] 80 | if not os.path.isdir(base_f+'_units'): 81 | print('No unit test directory generated for ' + f) 82 | continue 83 | 84 | for callee in glob.glob(base_f+'_units/'+unit_name+'_*.c.units'): 85 | re_pattern = base_f+'_units/'+unit_name+'_(.*)\.c\.units' 86 | re_match = re.search(re_pattern, callee) 87 | key = re_match.group(1) 88 | 89 | caller_file = open(base_f+'_units/'+unit_name+'_'+key+'.c.caller', 'w') 90 | caller_list = inverted_caller_dict[key]if (key in list(inverted_caller_dict.keys())) else [] 91 | 92 | for v in caller_list: 93 | if not v=='': 94 | caller_file.write(v+'\n') 95 | 96 | 97 | -------------------------------------------------------------------------------- /leagacy/c_keywords.py: -------------------------------------------------------------------------------- 1 | C_keywords = ['auto', 2 | 'break', 3 | 'case', 4 | 'char', 5 | 'const', 6 | 'continue', 7 | 'default', 8 | 'do', 9 | 'double', 10 | 'else', 11 | 'enum', 12 | 'extern', 13 | 'float', 14 | 'for', 15 | 'goto', 16 | 'if', 17 | 'int' 18 | 'long', 19 | 'register', 20 | 'return', 21 | 'short', 22 | 'signed', 23 | 'sizeof', 24 | 'static', 25 | 'struct', 26 | 'switch', 27 | 'typedef', 28 | 'union', 29 | 'unsigned', 30 | 'void', 31 | 'volatile', 32 | 'while', 33 | 'NULL'] 34 | -------------------------------------------------------------------------------- /leagacy/callee.py: -------------------------------------------------------------------------------- 1 | from clang.cindex import Index, CursorKind 2 | from branch_analyzer import analyze 3 | import sys, os 4 | from generate_unit import find_func_node 5 | 6 | def get_callee_list_rec(node, callee_list): 7 | ch = [c for c in node.get_children()] 8 | if node.kind==CursorKind.CALL_EXPR and str(node.location.file).endswith('.c'): 9 | callee_list.append(node.displayname) 10 | 11 | for c in ch: 12 | c_callee = get_callee_list_rec(c, []) 13 | if len(c_callee)>0: 14 | callee_list.extend(c_callee) 15 | 16 | return callee_list 17 | 18 | def get_callee_list(node, func_name): 19 | func_node = find_func_node(node, func_name) 20 | if not func_node: 21 | print('Function not found') 22 | return None 23 | 24 | ch = [c for c in func_node.get_children()] 25 | 26 | callee_list = get_callee_list_rec(func_node, []) 27 | callee_list = set(callee_list) 28 | return callee_list 29 | 30 | if __name__=='__main__': 31 | filename = sys.argv[1] 32 | func_name = sys.argv[2] 33 | 34 | index = Index.create() 35 | tu = index.parse(filename) 36 | 37 | print(get_callee_list(tu.cursor, func_name)) 38 | 39 | -------------------------------------------------------------------------------- /leagacy/cindex-dump.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | #===- cindex-dump.py - cindex/Python Source Dump -------------*- python -*--===# 4 | # 5 | # The LLVM Compiler Infrastructure 6 | # 7 | # This file is distributed under the University of Illinois Open Source 8 | # License. See LICENSE.TXT for details. 9 | # 10 | #===------------------------------------------------------------------------===# 11 | 12 | """ 13 | A simple command line tool for dumping a source file using the Clang Index 14 | Library. 15 | """ 16 | def get_diag_info(diag): 17 | return { 'severity' : diag.severity, 18 | 'location' : diag.location, 19 | 'spelling' : diag.spelling, 20 | 'ranges' : diag.ranges, 21 | 'fixits' : diag.fixits } 22 | 23 | def get_cursor_id(cursor, cursor_list = []): 24 | if not opts.showIDs: 25 | return None 26 | 27 | if cursor is None: 28 | return None 29 | 30 | # FIXME: This is really slow. It would be nice if the index API exposed 31 | # something that let us hash cursors. 32 | for i,c in enumerate(cursor_list): 33 | if cursor == c: 34 | return i 35 | cursor_list.append(cursor) 36 | return len(cursor_list) - 1 37 | 38 | def get_info(node, depth=0): 39 | if opts.maxDepth is not None and depth >= opts.maxDepth: 40 | children = None 41 | else: 42 | children = [get_info(c, depth+1) 43 | for c in node.get_children()] 44 | return { 'id' : get_cursor_id(node), 45 | 'kind' : node.kind, 46 | 'usr' : node.get_usr(), 47 | 'spelling' : node.spelling, 48 | 'location' : node.location, 49 | 'extent.start' : node.extent.start, 50 | 'extent.end' : node.extent.end, 51 | 'is_definition' : node.is_definition(), 52 | 'is_declaration' : node.kind.is_declaration(), 53 | 'definition id' : get_cursor_id(node.get_definition()), 54 | 'children' : children } 55 | 56 | def main(): 57 | from clang.cindex import Index 58 | from pprint import pprint 59 | 60 | from optparse import OptionParser, OptionGroup 61 | 62 | global opts 63 | 64 | parser = OptionParser("usage: %prog [options] {filename} [clang-args*]") 65 | parser.add_option("", "--show-ids", dest="showIDs", 66 | help="Compute cursor IDs (very slow)", 67 | action="store_true", default=False) 68 | parser.add_option("", "--max-depth", dest="maxDepth", 69 | help="Limit cursor expansion to depth N", 70 | metavar="N", type=int, default=None) 71 | parser.disable_interspersed_args() 72 | (opts, args) = parser.parse_args() 73 | 74 | if len(args) == 0: 75 | parser.error('invalid number arguments') 76 | 77 | index = Index.create() 78 | tu = index.parse(None, args) 79 | if not tu: 80 | parser.error("unable to load input") 81 | 82 | pprint('diags', list(map(get_diag_info, tu.diagnostics))) 83 | pprint('nodes', get_info(tu.cursor)) 84 | 85 | if __name__ == '__main__': 86 | main() 87 | 88 | -------------------------------------------------------------------------------- /leagacy/compose_units.py: -------------------------------------------------------------------------------- 1 | ''' 2 | A utility to compositionally analyze test cases generated by KLEE 3 | Pre-conditions - 4 | 1. genearate_separate_unit.py should have been run to generate separate test cases for all functions in a C file 5 | 2. run KLEE on separate unit tests to generate test cases 6 | ''' 7 | 8 | from .analyze_klee_test import find_ptr_errs, find_matching_error 9 | from clang.cindex import Index, CursorKind 10 | import sys, os 11 | import re 12 | import glob 13 | from optparse import OptionParser, OptionGroup 14 | from collections import Counter 15 | import numpy 16 | 17 | def get_caller_list(dirname, func_name): 18 | #dirname = os.path.dirname(tests_dir) 19 | main_name = os.path.basename(dirname).split('_')[0] 20 | if not dirname.endswith('/'): 21 | dirname = dirname + '/' 22 | if not os.path.exists(dirname+main_name+'_'+func_name+'.c.caller'): 23 | print(dirname+main_name+'_'+func_name+'.c.caller') 24 | print('The caller file for the given function does not exist.\nIgnoring and moving on') 25 | return None 26 | 27 | caller_file = open(dirname+main_name+'_'+func_name+'.c.caller') 28 | caller_list = [] 29 | for line in caller_file: 30 | caller_list.append(line.strip()) 31 | 32 | return caller_list 33 | 34 | def get_outlier_funcs(affected_funcs): 35 | outliers = [] 36 | outlier_names = [] 37 | 38 | func_names = [ f[1] for f in affected_funcs ] 39 | func_count = Counter(func_names) 40 | occurences = list(func_count.values()) 41 | occurences = numpy.array(occurences) 42 | mean = numpy.mean(occurences) 43 | std = numpy.std(occurences) 44 | outlier_thres = mean + 2*std 45 | 46 | for f in list(func_count.keys()): 47 | if func_count[f]>=outlier_thres: 48 | outlier_names.append(f) 49 | 50 | for a in affected_funcs: 51 | if a[1] in outlier_names: 52 | outliers.append(a) 53 | 54 | return outliers 55 | 56 | def get_container_file(dirname, caller): 57 | unit_dir_list = glob.glob(dirname+'/*_units/') 58 | for unit_dir in unit_dir_list: 59 | unit_files = glob.glob(unit_dir+'*_'+caller+'.c.units') 60 | if not unit_files==[]: 61 | return os.path.basename(unit_dir[:-1])[:-6]+'.c' 62 | 63 | print('Could not find container file for that function: ' + caller) 64 | return None 65 | 66 | def check_parent_funcs(c_filename, func_name): 67 | if not os.path.exists(c_filename): 68 | print(c_filename) 69 | print('The given filename does not exist.') 70 | return [], [] 71 | 72 | affected_funcs = [] 73 | unaffected_funcs = [] 74 | 75 | main_name = os.path.splitext(os.path.basename(c_filename))[0] 76 | tests_dir = c_filename[:-2]+'_units'+'/'+main_name+'_'+func_name 77 | # Sometimes regex screws up the grep of function name, while generating directories 78 | alt_func_name = func_name.split('_')[-1] 79 | alt_tests_dir = c_filename[:-2]+'_units'+'/'+main_name+'_'+alt_func_name 80 | 81 | if os.path.isdir(tests_dir): 82 | pass 83 | elif os.path.isdir(alt_tests_dir): 84 | tests_dir = alt_tests_dir 85 | else: 86 | # print 'The given function either does not exist, or was not tested by KLEE (no klee output folder found): %s in %s'%(func_name, c_filename) 87 | return [], [] 88 | 89 | ptr_errs = find_ptr_errs(tests_dir) 90 | 91 | for pe in list(ptr_errs.keys()): 92 | caller_list = get_caller_list(os.path.dirname(tests_dir), pe) 93 | if caller_list==None: 94 | return [], [] 95 | for caller in caller_list: 96 | container = get_container_file(os.path.dirname(c_filename), caller) 97 | if container==None: 98 | continue 99 | klee_out_dir = os.path.dirname(c_filename)+'/'+os.path.splitext(os.path.basename(container))[0]+'_units/'+os.path.splitext(os.path.basename(container))[0]+'_'+caller.split('_')[-1]+'/' 100 | if not os.path.isdir(klee_out_dir): 101 | # print 'Could not find klee output directory for the function: %s in directory %s'%(func_name, tests_dir) 102 | return [], [] 103 | 104 | for ptr_err in ptr_errs[pe]: 105 | if find_matching_error(klee_out_dir, pe, ptr_err[1]): 106 | affected_funcs.append((os.path.dirname(c_filename)+'/'+container, caller)) 107 | else: 108 | unaffected_funcs.append((os.path.dirname(c_filename)+'/'+container, caller)) 109 | 110 | affected_funcs = set(affected_funcs) 111 | affected_funcs = [i for i in affected_funcs] 112 | 113 | unaffected_funcs = set(unaffected_funcs) 114 | unaffected_funcs = [i for i in unaffected_funcs] 115 | 116 | return affected_funcs, unaffected_funcs 117 | 118 | def get_top_level_funcs(c_filename, func_name): 119 | unaffected_parents_clean = [] 120 | affected_parents, unaffected_parents = check_parent_funcs(c_filename, func_name) 121 | affected = [(c_filename, func_name)] 122 | 123 | # This works 124 | while len(affected_parents)>0: 125 | affected_grandparents = [] 126 | for a in affected_parents: 127 | rec_result, rec_result_unaffected = check_parent_funcs(a[0], a[1]) 128 | if len(rec_result)>0: 129 | for r in rec_result: 130 | if r not in affected and r not in affected_parents: 131 | affected_grandparents.append(r) 132 | if len(rec_result_unaffected)>0: 133 | for ur in rec_result_unaffected: 134 | if (a[1], ur[0], ur[1]) not in unaffected_parents_clean: 135 | unaffected_parents_clean.append((a[1], ur[0], ur[1])) 136 | affected.extend(affected_parents) 137 | affected_parents = affected_grandparents 138 | 139 | # This, not so much 140 | ''' 141 | rec_affected_funcs = [(c_filename, func_name)] 142 | 143 | for i in affected_funcs: 144 | temp_rec_affected_funcs = check_parent_funcs(i[0], i[1]) 145 | if not temp_rec_affected_funcs == []: 146 | rec_affected_funcs.extend(temp_rec_affected_funcs) 147 | ''' 148 | 149 | if len(affected)>0: 150 | sorted_affected = [] 151 | for a in affected: 152 | if a not in sorted_affected: 153 | sorted_affected.append(a) 154 | affected = sorted_affected 155 | else: 156 | affected = [] 157 | 158 | if len(unaffected_parents_clean)>0: 159 | sorted_unaffected_parents = [] 160 | for ua in unaffected_parents_clean: 161 | if ua not in sorted_unaffected_parents: 162 | sorted_unaffected_parents.append(ua) 163 | unaffected_parents_clean = sorted_unaffected_parents 164 | 165 | return affected, unaffected_parents_clean 166 | 167 | if __name__=='__main__': 168 | parser = OptionParser('Usage: %prog -d {name of the source directory}') 169 | parser.add_option('-d', '--dir', action='store', type='string', dest='dir', help='Source file directory path') 170 | parser.add_option('-n', '--n-long', action='store', type='int', dest='n_long', help='Minimum length of error chain to be reported') 171 | 172 | (opts, args) = parser.parse_args() 173 | 174 | src_dir = opts.dir 175 | if opts.n_long: 176 | n_long = opts.n_long 177 | else: 178 | n_long = 2 179 | 180 | if not src_dir.endswith('/'): 181 | src_dir = src_dir + '/' 182 | 183 | out_file = open(src_dir+'composition.test', 'w+') 184 | 185 | func_names = [] 186 | affected_parent_funcs = [] 187 | unaffected_parent_funcs = [] 188 | for c_file in glob.glob(src_dir + '*.c'): 189 | re_pattern = src_dir + '(.*).c' 190 | re_match = re.search(re_pattern, c_file) 191 | main_name = re_match.group(1) 192 | 193 | for unit_test in glob.glob(src_dir+main_name+'_units/*.c.units'): 194 | unit_file_name = os.path.splitext(os.path.basename(unit_test))[0][:-2] 195 | func_name = unit_file_name.split('_', 1)[1] 196 | temp_affected_parent_funcs, temp_unaffected_parent_funcs = get_top_level_funcs(c_file, func_name) 197 | if temp_affected_parent_funcs: 198 | func_names.append(func_name) 199 | affected_parent_funcs.append(temp_affected_parent_funcs) 200 | if temp_unaffected_parent_funcs: 201 | unaffected_parents_funcs.extend(temp_unaffected_parent_funcs) 202 | 203 | print(unaffected_parent_funcs) 204 | for i, func_name in enumerate(func_names): 205 | out_file.write(func_name+'\n') 206 | out_file.write(str(affected_parent_funcs[i])+'\n\n') 207 | 208 | out_file.close() 209 | 210 | -------------------------------------------------------------------------------- /leagacy/copy_main_unit.py: -------------------------------------------------------------------------------- 1 | import glob, os 2 | import sys 3 | 4 | if __name__=='__main__': 5 | dir_name = sys.argv[1] 6 | print(dir_name) 7 | for f in glob.glob(dir_name+'/*.c'): 8 | print('Creating unit test functions for ' + f) 9 | c_file = open(f, 'r') 10 | orig_unit = open(f+'.units', 'r') 11 | mod_unit = open(f+'.units.mod', 'w') 12 | 13 | mod_unit.write('#include //Generated code - Macke framework\n') 14 | for line in c_file: 15 | mod_unit.write(line) 16 | 17 | while True: 18 | line = orig_unit.readline() 19 | if not line: 20 | break 21 | if line.startswith('int main'): 22 | break 23 | 24 | mod_unit.write(line) 25 | while True: 26 | line = orig_unit.readline() 27 | if not line: 28 | break 29 | mod_unit.write(line) 30 | 31 | mod_unit.close() 32 | orig_unit.close() 33 | c_file.close() 34 | 35 | -------------------------------------------------------------------------------- /leagacy/coreutils_coverage_temp.py: -------------------------------------------------------------------------------- 1 | from source_coverage import source_coverage, get_source_coverage 2 | import glob, os 3 | 4 | cov_main = [] 5 | seen_main = [] 6 | 7 | cov_comp = [] 8 | seen_comp = [] 9 | 10 | for c in glob.glob('/mnt/ext-hdd/coreutils-6.10/src/*.c'): 11 | if os.path.exists(c[:-2]+'_main/'): 12 | cov_temp, seen_temp = get_source_coverage(c, c[:-2]+'_main/run.istats', [], []) 13 | cov_main.extend(cov_temp) 14 | seen_main.extend(seen_temp) 15 | 16 | cov_temp, seen_temp = source_coverage(c) 17 | cov_comp.extend(cov_temp) 18 | seen_comp.extend(seen_temp) 19 | 20 | print(len(cov_main), len(seen_main)) 21 | print(len(cov_comp), len(seen_comp)) 22 | -------------------------------------------------------------------------------- /leagacy/coreutils_temp.py: -------------------------------------------------------------------------------- 1 | import glob, os 2 | 3 | if __name__=='__main__': 4 | coreutils_path = '/mnt/ext-hdd/coreutils-6.10/src/' 5 | klee_command = 'klee --simplify-sym-indices --write-cov --write-smt2s --output-module --max-memory=1000 --disable-inlining --optimize --use-forked-solver --use-cex-cache --libc=uclibc --posix-runtime --allow-external-sym-calls --only-output-states-covering-new -max-sym-array-size=4096 -max-instruction-time=%d. --max-time=%d. --watchdog --max-memory-inhibit=false --max-static-fork-pct=1 -max-static-solve-pct=1 --max-static-cpfork-pct=1 --switch-type=internal --randomize-fork --search=nurs:covnew --use-batching-search --batch-instructions=10000 '%(10, 1800) 6 | klee_sym_args = '--sym-args 0 2 100 --sym-files 1 100' 7 | 8 | for f in glob.glob(coreutils_path + '*.o'): 9 | if os.path.exists(f[:-2]): 10 | os.system('chmod +x ' + f[:-2]) 11 | os.system(klee_command + '--output-dir=' + f[:-2] + '_main/ ' + f[:-2] + ' ' + klee_sym_args + ' &') 12 | 13 | -------------------------------------------------------------------------------- /leagacy/create_call_graph.py: -------------------------------------------------------------------------------- 1 | import glob, os, sys 2 | import pydot 3 | import re 4 | from optparse import OptionParser, OptionGroup 5 | from clang.cindex import Index, CursorKind 6 | from callee import get_callee_list 7 | 8 | COLOR_RANGE = ['#ffffff', '#ffe5e5', '#ff9999', '#ff4c4c', '#ff0000'] 9 | SEVERITY_RANGES = {(0, 1): 0, 10 | (1, 10): 1, 11 | (10, 20): 2, 12 | (20, 40): 3, 13 | (40, 10000): 4} 14 | 15 | LONG_CHAIN = 5 16 | KNOWN_INTERFACE = 4 17 | PROXIMITY_TO_MAIN = 3 18 | TOTAL_ERRORS = 3 19 | HIGH_OCCURENCE = 4 20 | ERROR_EXISTS = 3 21 | HTML_SRC = 'html4/' 22 | 23 | known_interfaces = ['bzip2Main', 'bzip2recoverMain'] 24 | nodes = [] 25 | pydot_nodes = {} 26 | edges = {} 27 | pydot_edges = {} 28 | 29 | not_leaf = [] 30 | 31 | dir_name = '' 32 | 33 | bug_chains = [] 34 | outliers = [] 35 | 36 | def temp_hack(dir_name, main_file, main_file_bkp): 37 | main_file_name = main_file[:-2] 38 | if os.path.exists(dir_name + '%s_units/%s_main.c.callee'%(main_file_name, main_file_name)): 39 | return 40 | 41 | os.system('mv %s%s %s%s.new'%(dir_name, main_file, dir_name, main_file)) 42 | os.system('mv %s%s %s%s'%(dir_name, main_file_bkp, dir_name, main_file)) 43 | 44 | index = Index.create() 45 | tu = index.parse(dir_name + main_file) 46 | 47 | callee_list = get_callee_list(tu.cursor, 'main') 48 | 49 | callee_file = open(dir_name + '%s_units/%s_%sMain.c.callee'%(main_file_name, main_file_name, main_file_name), 'w+') 50 | for c in callee_list: 51 | callee_file.write(c + '\n') 52 | callee_file.close() 53 | 54 | os.system('mv %s%s %s%s'%(dir_name, main_file, dir_name, main_file_bkp)) 55 | os.system('mv %s%s.new %s%s'%(dir_name, main_file, dir_name, main_file)) 56 | 57 | def shortest_dist(source, dest, how_deep): 58 | if how_deep>=10: 59 | return 10 60 | min_dist = 10000 61 | if source not in list(edges.keys()): 62 | return min_dist 63 | if dest in edges[source]: 64 | return 1 65 | for s in edges[source]: 66 | cur_shortest = shortest_dist(s, dest, how_deep+1) + 1 67 | if cur_shortest=10: 91 | dist_to_interface = 9 92 | 93 | severity += PROXIMITY_TO_MAIN*(10-dist_to_interface) 94 | 95 | # How many errors in the function? 96 | severity += TOTAL_ERRORS*len(errs) 97 | 98 | # check if exists in chains 99 | for c in bug_chains: 100 | if unit_name in c: 101 | severity += len(c)*LONG_CHAIN 102 | 103 | # check if in a known interface 104 | if unit_name in known_interfaces: 105 | severity += KNOWN_INTERFACE 106 | 107 | # check if it occurs too many times 108 | if unit_name in outliers: 109 | severity += HIGH_OCCURENCE 110 | 111 | unit_name_mod = unit_name 112 | if '_' in unit_name: 113 | unit_name_mod = unit_name.split('_')[-1] 114 | 115 | url = unit_name_mod + '.html' 116 | 117 | for s in list(SEVERITY_RANGES.keys()): 118 | if severity in range(s[0], s[1]): 119 | severity_index = SEVERITY_RANGES[s] 120 | 121 | return COLOR_RANGE[severity_index], severity, url 122 | 123 | def create_nodes_and_edges(): 124 | 125 | for f in glob.glob(dir_name + '*_units/*.c.callee'): 126 | pattern = dir_name + '(.*)_units/(.*?)_(.*).c.callee' 127 | re_match = re.search(pattern, f) 128 | 129 | nodes.append(re_match.group(3)) 130 | 131 | callee_file = open(f, 'r') 132 | node_edges = [] 133 | for line in callee_file: 134 | node_edges.append(line.strip()) 135 | 136 | edges[re_match.group(3)] = node_edges 137 | 138 | def create_pydot_nodes(): 139 | colors = {} 140 | tooltips = {} 141 | urls = {} 142 | for f in glob.glob(dir_name + '*_units/*.c.callee'): 143 | pattern = dir_name + '(.*)_units/(.*?)_(.*).c.callee' 144 | re_match = re.search(pattern, f) 145 | colors[re_match.group(3)], tooltips[re_match.group(3)], urls[re_match.group(3)] = get_node_decoration(re_match.group(1), re_match.group(3).split('_')[-1]) 146 | 147 | for n in nodes: 148 | if urls[n]!='': 149 | pydot_nodes[n] = pydot.Node(n, style='filled', fillcolor=colors[n], tooltip=tooltips[n], URL=urls[n]) 150 | else: 151 | pydot_nodes[n] = pydot.Node(n, style='filled', fillcolor=colors[n], tooltip=tooltips[n]) 152 | 153 | def create_pydot_edges(): 154 | for n in list(edges.keys()): 155 | node_ends = edges[n] 156 | 157 | for e in node_ends: 158 | if e not in nodes: 159 | continue 160 | if n=='main': 161 | print(e) 162 | pydot_edges[(pydot_nodes[n], pydot_nodes[e])] = pydot.Edge(pydot_nodes[n], pydot_nodes[e]) 163 | 164 | def read_composition_file(): 165 | composition_file = open(dir_name + 'composition.test') 166 | 167 | line = composition_file.readline() 168 | while not line=='': 169 | current_chain = [] 170 | current_chain.append(line.strip().split()[0]) 171 | line = composition_file.readline() 172 | tokens = line.split(',') 173 | for t in tokens: 174 | t = t.strip(" ,'()[]\n") 175 | if not t.endswith('.c'): 176 | if t in current_chain: # Probably an outlier 177 | outliers.append(t) 178 | else: 179 | current_chain.append(t) 180 | bug_chains.append(current_chain) 181 | line = composition_file.readline() #Empty line ideally 182 | line = composition_file.readline() 183 | 184 | if __name__=='__main__': 185 | nodes = [] 186 | edges = {} 187 | pydot_nodes = {} 188 | pydot_edges = {} 189 | 190 | parser = OptionParser("usage: %prog -d {directory containing source files}") 191 | parser.add_option('-d', '--dir', action='store', type='string', dest='dir', help='Source file directory path') 192 | parser.add_option('-m', '--main', action='store', type='string', dest='main_file', help='Source file containing main function') 193 | parser.add_option('-b', '--main-backup', action='store', type='string', dest='main_file_bkp', help='Main file backup') 194 | 195 | (opts, args) = parser.parse_args() 196 | dir_name = opts.dir 197 | main_file = opts.main_file 198 | main_file_bkp = opts.main_file_bkp 199 | 200 | if not dir_name[-1]=='/': 201 | dir_name += '/' 202 | 203 | if not os.path.isdir(dir_name): 204 | print('There does not seem to be a directory with that name: ' + dir_name) 205 | sys.exit(-1) 206 | 207 | temp_hack(dir_name, main_file, main_file_bkp) 208 | 209 | read_composition_file() 210 | graph = pydot.Dot(graph_type='digraph', splines='ortho') 211 | 212 | # Making all nodes and edges data structures (not the pydot structures) 213 | create_nodes_and_edges() 214 | 215 | # Making pydot nodes 216 | create_pydot_nodes() 217 | for n in nodes: 218 | graph.add_node(pydot_nodes[n]) 219 | 220 | # Making pydot edges 221 | create_pydot_edges() 222 | 223 | for pe in list(pydot_edges.keys()): 224 | graph.add_edge(pydot_edges[pe]) 225 | 226 | if not os.path.isdir(dir_name + HTML_SRC): 227 | os.system(dir_name + HTML_SRC) 228 | 229 | graph.write_svg(dir_name + HTML_SRC + 'call_graph.svg') 230 | -------------------------------------------------------------------------------- /leagacy/html_artifacts.py: -------------------------------------------------------------------------------- 1 | import sys, os, glob 2 | import re 3 | from optparse import OptionParser, OptionGroup 4 | import subprocess 5 | 6 | HTML_SRC = 'html4' 7 | 8 | if __name__=='__main__': 9 | # parse arguments 10 | parser = OptionParser("usage: %prog -d {directory containing source files}") 11 | parser.add_option('-d', '--dir', action='store', type='string', dest='dir', help='Source file directory path') 12 | 13 | (opts, args) = parser.parse_args() 14 | dir_name = opts.dir 15 | 16 | if not dir_name.endswith('/'): 17 | dir_name += '/' 18 | 19 | if not os.path.isdir(dir_name + HTML_SRC): 20 | os.system('mkdir ' + dir_name + HTML_SRC) 21 | 22 | for d in glob.glob(dir_name + '*_units/*/'): 23 | re_pattern = dir_name + '(.*)_units/(.*)_(.*)/' 24 | re_match = re.search(re_pattern, d) 25 | re_pattern_actual = dir_name + '(.*)_units/(.*?)_(.*)/' 26 | re_match_actual = re.search(re_pattern, d) 27 | func_name_actual = re_match_actual.group(3) 28 | func_name = re_match.group(3) 29 | 30 | parent_test_case = open(dir_name + HTML_SRC + '/' + func_name + '.html', 'w') 31 | parent_test_case.write('Following test case(s) in %s resulted in memory out-of-bounds errors. Click for more details


'%(func_name_actual)) 32 | parent_test_case.close() 33 | 34 | for f in glob.glob(dir_name + '*_units/*/test*.ptr.err'): 35 | re_pattern = dir_name + '(.*)_units/(.*)_(.*)/(.*).ptr.err' 36 | re_match = re.search(re_pattern, f) 37 | func_name = re_match.group(3) 38 | test_case = re_match.group(4) 39 | 40 | test_case_pagename_relative = '%s_%s.html'%(func_name, test_case) 41 | test_case_page = open('%s%s/%s_%s.html'%(dir_name, HTML_SRC, func_name, test_case), 'w') 42 | 43 | test_case_page.write('\n') 44 | 45 | out = os.popen('cat ' + f).read() 46 | out = out.replace('\n', '\n

') 47 | 48 | ''' 49 | proc = subprocess.Popen(['cat', f], stdout=subprocess.PIPE, shell=True) 50 | (out, err) = proc.communicate() 51 | ''' 52 | 53 | test_case_page.write(out) 54 | test_case_page.write('


') 55 | 56 | ''' 57 | proc = subprocess.Popen(['ktest-tool', f[:-7]+'ktest'], stdout=subprocess.PIPE, shell=True) 58 | (out, err) = proc.communicate() 59 | ''' 60 | 61 | out = os.popen('ktest-tool ' + f[:-7]+'ktest').read() 62 | out = out.replace('\n', '\n

') 63 | 64 | test_case_page.write(out) 65 | test_case_page.write('') 66 | test_case_page.close() 67 | 68 | parent_test_case = open(dir_name + HTML_SRC + '/' + func_name + '.html', 'a') 69 | parent_test_case.write('%s

'%(test_case_pagename_relative, test_case)) 70 | parent_test_case.close() 71 | 72 | -------------------------------------------------------------------------------- /leagacy/instrument_branches.py: -------------------------------------------------------------------------------- 1 | from clang.cindex import Index, CursorKind 2 | from pprint import pprint 3 | import os, glob, inspect 4 | 5 | from optparse import OptionParser 6 | import clang.cindex 7 | import hashlib 8 | 9 | global conditionals 10 | global orig_file 11 | global mod_file 12 | global orig_lines 13 | global mod_lines 14 | 15 | def get_ifs_and_whiles(node): 16 | ch = [c for c in node.get_children()] 17 | if node.kind==CursorKind.IF_STMT or node.kind==CursorKind.WHILE_STMT or node.kind==CursorKind.DO_STMT or node.kind==CursorKind.FOR_STMT: 18 | conditionals.append(node) 19 | 20 | for c in ch: 21 | get_ifs_and_whiles(c) 22 | 23 | def get_children_list(node): 24 | ch_list = [] 25 | 26 | for c in node.get_children(): 27 | ch_list.append(c) 28 | 29 | return ch_list 30 | 31 | def get_compound_line(line_n): 32 | global orig_lines 33 | 34 | new_line = '{' + orig_lines[line_n-1].strip() + '}\n' 35 | 36 | return new_line 37 | 38 | def add_instrumentation(): 39 | global conditionals 40 | self_path = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 41 | if not self_path.endswith('/'): 42 | self_path += '/' 43 | 44 | for c in conditionals: 45 | if c.kind==CursorKind.DO_STMT: 46 | continue 47 | 48 | ch_list = get_children_list(c) 49 | 50 | for ch in ch_list[1:]: 51 | # Convert simple statements to compound statements 52 | if not ch.kind==CursorKind.COMPOUND_STMT and not c.kind==CursorKind.DO_STMT: 53 | mod_lines[ch.location.line - 1] = get_compound_line(ch.location.line) 54 | 55 | cur_line_n = ch.location.line-1 56 | # Add instrumentation near the opening brace 57 | while '{' not in mod_lines[cur_line_n]: 58 | cur_line_n += 1 59 | cur_line = mod_lines[cur_line_n] 60 | 61 | # Let Python script take care of logging 62 | instrumentation = 'system("python %s %s\\n");\n'%(self_path+'log_branch.py', hashlib.sha1(str(ch.location.file)+' '+str(ch.location.line)).hexdigest()) 63 | for i, char in enumerate(cur_line): 64 | if char=='{': 65 | break 66 | cur_line = cur_line[:i+1] + instrumentation + cur_line[i+1:] 67 | mod_lines[cur_line_n] = cur_line 68 | 69 | def run(c_src_filename): 70 | global conditionals, orig_file, mod_file 71 | global orig_lines, mod_lines 72 | index = Index.create() 73 | 74 | if not os.path.exists(c_src_filename): 75 | print('Problem opening file: ' + c_src_filename) 76 | return None 77 | 78 | orig_file = open(c_src_filename, 'r') 79 | mod_file = open(c_src_filename+'.instr', 'w+') 80 | 81 | orig_lines = orig_file.readlines() 82 | 83 | # Make the modified file the same as original file 84 | mod_lines = orig_lines 85 | 86 | tu = index.parse(c_src_filename) 87 | if not tu: 88 | print('Unable to load input') 89 | return None 90 | 91 | # Get list of all if, while and for statements 92 | get_ifs_and_whiles(tu.cursor) 93 | 94 | # Add instrumentation for all branching statements 95 | add_instrumentation() 96 | 97 | # Include stdio, just in case 98 | mod_lines = ['#include \n'] + mod_lines 99 | 100 | # Finally write it to the instrumented replacement file 101 | mod_file.writelines(mod_lines) 102 | 103 | print([c.location.line for c in conditionals]) 104 | 105 | if __name__=='__main__': 106 | global opts 107 | global conditionals 108 | 109 | conditionals = [] 110 | 111 | parser = OptionParser('usage: %prog [options] -c {source file name}') 112 | parser.add_option('-c', '--source', action='store', type='string', dest='c_src_filename', help='Source C filename') 113 | 114 | (opts, args) = parser.parse_args() 115 | c_src_filename = opts.c_src_filename 116 | 117 | run(c_src_filename) 118 | -------------------------------------------------------------------------------- /leagacy/log_branch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-i4/macke/b096ee194b0861b6581fee140bdf16d8cc60b116/leagacy/log_branch.py -------------------------------------------------------------------------------- /leagacy/read_istats.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | 3 | def is_number(s): 4 | try: 5 | float(s) 6 | return True 7 | except ValueError: 8 | return False 9 | 10 | if __name__=='__main__': 11 | # folders = ['blocksort', 'compress', 'crctable', 'decompress', 'huffman', 'randtable'] 12 | folders = ['klee-last'] 13 | dir = '/home/ognawala/stonesoup/stonesoup-c-mc/bzip2/' 14 | funcs = {} 15 | 16 | for fold in folders: 17 | istats = open(dir+fold+'/run.istats', 'r') 18 | 19 | line = istats.readline() 20 | while line!='': # Read the entire file 21 | if line.startswith('fl=') and (dir in line): # Found a line with a file location 22 | line = istats.readline() 23 | while ('=' not in line) and line!='': 24 | line = istats.readline() 25 | if line=='': 26 | continue 27 | toks = line.split('=') 28 | while toks[0]=='fn': # Read all function in a file 29 | func_name = toks[1].strip() 30 | line = istats.readline() 31 | toks = line.split() 32 | while line!='' and is_number(toks[0]): # Read all the lines in a function 33 | if func_name not in list(funcs.keys()): 34 | funcs[func_name] = 0 35 | funcs[func_name] += int(toks[2]) # Incremention covered-instruction count for the function 36 | line = istats.readline() 37 | toks = line.split() 38 | if line!='': 39 | toks = line.split('=') 40 | else: 41 | toks = [''] 42 | continue # Next line already read. Don't read again 43 | line = istats.readline() 44 | 45 | # Funcs should be full by now 46 | 47 | print(funcs) 48 | 49 | -------------------------------------------------------------------------------- /leagacy/remove_global.py: -------------------------------------------------------------------------------- 1 | import glob, os, sys 2 | 3 | lower_types = ['char', 'void', 'boolean', 'int'] 4 | convert_to = ['char', 'void', 'bool', 'int'] 5 | 6 | def type_convert_lower(st): 7 | toks = st.split() 8 | st_cp = '' 9 | for i, t in enumerate(toks): 10 | if t.lower() in lower_types: 11 | toks[i] = t.lower() 12 | #ind = lower_types.index(t.lower()) 13 | #toks[i] = convert_to[ind] 14 | st_cp = st_cp + ' ' + toks[i] 15 | st_cp = st_cp + '\n' 16 | 17 | return st_cp 18 | 19 | def str_without_qual(st): 20 | toks = st.split() 21 | toks = toks[1:] 22 | st_cp = '' 23 | for t in toks: 24 | st_cp = st_cp + t + ' ' 25 | st_cp = st_cp + '\n' 26 | 27 | return st_cp 28 | 29 | if __name__=='__main__': 30 | dir_name = sys.argv[1] 31 | print('copying files in ' + dir_name) 32 | 33 | for src in glob.glob(dir_name+'/*.c'): 34 | fl = open(src, 'r') 35 | fl_cp = open(src+'.copy', 'w') 36 | 37 | # fl_cp.write('#include \n') 38 | for line in fl: 39 | toks = line.split() 40 | if len(toks)>1 and (toks[0]=='GLOBAL' or toks[0]=='LOCAL'): 41 | line_cp = str_without_qual(line) 42 | else: 43 | line_cp = line 44 | # line_cp = type_convert_lower(line_cp) 45 | 46 | fl_cp.write(line_cp) 47 | fl_cp.close() 48 | 49 | 50 | -------------------------------------------------------------------------------- /leagacy/second_klee_round.py: -------------------------------------------------------------------------------- 1 | import os, glob, sys 2 | import time 3 | from optparse import OptionParser, OptionGroup 4 | 5 | def get_target_info(dir_name): 6 | target_file = open(dir_name + 'target.info') 7 | 8 | content = target_file.readlines() 9 | 10 | target_info = [] 11 | i = 0 12 | while i=len(content): 16 | break 17 | target_line = content[i].strip() 18 | i += 1 19 | func_name = content[i].strip() 20 | i += 1 21 | 22 | unit_filename = target_file[:-2] + '_units/' + os.path.split(target_file)[1][:-2] + '_' + func_name + '.c.units.assert' 23 | print(unit_filename) # TODO: remove 24 | 25 | target_info.append((target_file.strip(), int(target_line.strip()), unit_filename)) 26 | if i>=len(content): 27 | break 28 | dum_empty_line = content[i] 29 | i += 1 30 | 31 | return target_info 32 | 33 | if __name__=='__main__': 34 | parser = OptionParser("usage: %prog -d {directory containing source files} -e {executable name}") 35 | parser.add_option('-d', '--dir', action='store', type='string', dest='dir', help='Source file directory path') 36 | parser.add_option('-e', '--exec', action='store', type='string', dest='executable', help='Name of executable generated by Makefile') 37 | (opts, args) = parser.parse_args() 38 | 39 | dir_name = opts.dir 40 | 41 | if not dir_name.endswith('/'): 42 | dir_name = dir_name + '/' 43 | 44 | klee_command = 'klee --simplify-sym-indices --write-cov --write-smt2s --output-module --max-memory=1000 --disable-inlining --optimize --use-forked-solver --use-cex-cache --libc=uclibc --posix-runtime --allow-external-sym-calls --only-output-states-covering-new --max-sym-array-size=4096 --max-instruction-time=%d. --max-time=%d. --watchdog --max-memory-inhibit=false --max-static-fork-pct=1 --max-static-solve-pct=1 --max-static-cpfork-pct=1 --switch-type=internal --randomize-fork --search=random-path --search=targeted-search --use-batching-search --batch-instructions=10000 '%(10, 120) 45 | klee_sym_args = ' --sym-args 0 2 100 --sym-files 1 100' 46 | 47 | targets_info_file = open(dir_name + 'target.info') 48 | 49 | targets_info = targets_info_file.readlines() 50 | 51 | exec_name = opts.executable 52 | 53 | i = 0 54 | while i 1: 31 | return splits[0], splits[1] 32 | return splits[0], "0" 33 | return "", "0" 34 | 35 | def has_stack_trace(self): 36 | """ Return true if stack trace data is found """ 37 | # Check for "#0" and "#1" 38 | return b"#0" in self.output and b"#1" in self.output 39 | 40 | def parse_asan_output(self): 41 | assert self.iserror 42 | 43 | lines = self.output.splitlines() 44 | for line in lines: 45 | # Get the first word after the "Sanitizer:" string on the line that contains "==ERROR:" 46 | if b"==ERROR:" in line: 47 | beginline = lines.index(line) 48 | description = line[line.find(b"Sanitizer:")+11:] 49 | description.strip() 50 | desc_parts = description.split(b' ') 51 | if (b'on' in description) and (b'on' in desc_parts): 52 | self.description = b' '.join(desc_parts[0:desc_parts.index(b'on')]).decode("utf-8") 53 | else: 54 | self.description = desc_parts[0].decode("utf-8", 'ignore').rstrip(':') 55 | 56 | self.stack = [] 57 | has_location = re.compile("^.*:[0-9]+:[0-9]+$") 58 | if self.has_stack_trace(): 59 | # line number and frame-number 60 | lno = beginline + 1 61 | fno = 0 62 | 63 | while b"#0" not in lines[lno]: 64 | lno += 1 65 | 66 | while lno < len(lines) and b"#%d" % fno in lines[lno]: 67 | words = lines[lno].strip().split(b' ') 68 | 69 | # function name is 4th word 70 | fname = words[3].decode("utf-8", 'ignore') 71 | 72 | # location is last word 73 | # careful because c++ names containing spaces (Relevant for asan functions) 74 | location = words[-1].decode("utf-8", 'ignore') 75 | # remove line offset for klee compatibility 76 | if has_location.match(location): 77 | location = location[:location.rfind(":")] 78 | 79 | # Exclude main for fuzzing to prevent false positives and handle bad stacktraces better (HACKY!) 80 | if fname != "main": 81 | self.stack.append((fname, location)) 82 | lno += 1 83 | fno += 1 84 | 85 | # Ignore errors where the argument was freed and gets freed (again) in our driver 86 | if (self.description.startswith("attempting double-free") and ((len(self.stack) > 2 and 87 | self.stack[1][0].startswith("__interceptor_") and self.stack[2][0].startswith("macke_fuzzer_driver")) or 88 | (len(self.stack) > 1 and self.stack[0][0].startswith("__interceptor_") and self.stack[1][0].startswith("macke_fuzzer_driver")))): 89 | self.iserror = False 90 | 91 | def convert_to_ktest(self, fuzzmanager, directory, testname, kleeargs = None): 92 | """ 93 | Creates a file .ktest and .ktest.err in directory 94 | Returns the name of the errfile 95 | """ 96 | ktestname = path.join(directory, testname + ".ktest") 97 | ktesterrorname = path.join(directory, testname + ".fuzz.err") 98 | 99 | if kleeargs is None: 100 | kleeargs = [] 101 | 102 | # Generate .ktest file 103 | fuzzmanager.run_ktest_converter(self.analyzedfunc, self.inputfile, ktestname, kleeargs) 104 | 105 | # Generate .ktest.err file 106 | errcontent = ("Error: " + self.description + "\n" 107 | + "File: " + self.file + "\n" 108 | + "line: " + self.line + "\n" 109 | # Dummy assembly.ll line 110 | + "assembly.ll line: 0\n" 111 | + "Stack:\n") 112 | # Now only add stack to errcontent 113 | for i in range(0, len(self.stack)): 114 | # The first number is the stack frame number + line number in assembly (here dummy 0) 115 | errcontent += "\t#" + str(i) + "0000000" 116 | # Function name + arguments - to be tested 117 | errcontent += " in " + self.stack[i][0] + " ()" 118 | # at location 119 | errcontent += " at " + self.stack[i][1] + "\n" 120 | 121 | f = open(ktesterrorname, "w") 122 | f.write(errcontent) 123 | f.close() 124 | 125 | return ktesterrorname 126 | 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /macke/CallGraph.py: -------------------------------------------------------------------------------- 1 | """ 2 | Class container for all call graph operations 3 | """ 4 | 5 | from os import path 6 | from pprint import pformat 7 | 8 | from . import llvm_wrapper 9 | 10 | 11 | class CallGraph: 12 | """ 13 | All information about the callgraph from a specific bitcode file 14 | """ 15 | 16 | def __init__(self, bitcodefile): 17 | assert path.isfile(bitcodefile) 18 | self.graph = llvm_wrapper.extract_callgraph(bitcodefile) 19 | self.topology = llvm_wrapper.list_all_funcs_topological(bitcodefile) 20 | 21 | def __contains__(self, item): 22 | return item in self.graph 23 | 24 | def __str__(self): 25 | return pformat(self.graph) 26 | 27 | def __getitem__(self, key): 28 | try: 29 | return self.graph[key] 30 | except KeyError: 31 | return None 32 | 33 | def is_symbolic_encapsulable(self, function): 34 | """ 35 | Checks, if a function can be encapsulated symbolically 36 | """ 37 | return (not self[function]['hasdoubleptrarg'] and 38 | not self[function]['hasfuncptrarg'] and 39 | not self[function]['isexternal']) 40 | 41 | def get_flattened_inverted_topology(self): 42 | """ 43 | Returns a sort of inverted topologically ordered list of all functions 44 | """ 45 | # Nested lists of circles and SCCs are simply flattened 46 | flattened = [] 47 | for topo in self.topology: 48 | if isinstance(topo, str): 49 | flattened.append(topo) 50 | else: 51 | flattened.extend(topo) 52 | return flattened 53 | 54 | def get_internal_functions(self): 55 | """ 56 | Returns a list of all internal functions in arbitrary order 57 | """ 58 | 59 | return [f for f, info in self.graph.items() if not info["isexternal"]] 60 | 61 | def list_symbolic_encapsulable(self, removemain=True): 62 | """ 63 | Returns a sort of inverted topologically ordered list of all function 64 | names, that can be symbolically encapsulated by MACKE 65 | """ 66 | flattened = self.get_flattened_inverted_topology() 67 | return [t for t in flattened if (self.is_symbolic_encapsulable(t) or ( 68 | not removemain and t == "main"))] 69 | 70 | def group_independent_calls(self, removemain=True): 71 | """ 72 | Returns a topologically ordered list of (caller, callee)-tuples 73 | nested in sublists, that can be analyzed in parallel processes 74 | """ 75 | 76 | # Probably the result of this method is not the optimal solution 77 | # considering the number parallel executable pairs. But I don't 78 | # know a better algorithm to generate them. Maybe later ... 79 | 80 | units = self.group_independent_callees() 81 | 82 | # Convert the unit list of functions to a list of callers 83 | result = [] 84 | for unit in units: 85 | pairs = [] 86 | for callee in unit: 87 | for caller in self[callee]['calledby']: 88 | if ((not removemain and caller == "main") or 89 | (self.is_symbolic_encapsulable(caller))): 90 | pairs.append((caller, callee)) 91 | if pairs: 92 | result.append(sorted(pairs)) 93 | 94 | # (partially) assert correctness of the result 95 | for res in result: 96 | assert res 97 | callers, callees = set(), set() 98 | for (caller, callee) in res: 99 | if caller != callee: 100 | callers.add(caller) 101 | callees.add(callee) 102 | assert callers.isdisjoint(callees) 103 | 104 | return result 105 | 106 | def group_independent_callees(self): 107 | """ 108 | Group the topological ordered function list in independent units 109 | """ 110 | units = [] 111 | independent = set() 112 | earlier_calls = set() 113 | 114 | for topo in self.topology: 115 | if isinstance(topo, str): 116 | if topo in earlier_calls: 117 | # Add all function, that are called earlier 118 | if independent: 119 | units.append(sorted(list(independent))) 120 | # And restart the search 121 | independent = set() 122 | earlier_calls = set() 123 | 124 | # Mark this function as indepent 125 | independent.add(topo) 126 | # Mark all function called by now 127 | earlier_calls |= set(self[topo]['calledby']) 128 | 129 | else: 130 | # Add all previous independent functions 131 | if independent: 132 | units.append(sorted(list(independent))) 133 | independent = set() 134 | 135 | # Split each part of a scc in a separate run 136 | for arc in sorted(topo): 137 | units.append([arc]) 138 | 139 | # Add all remaining elements 140 | if independent: 141 | units.append(list(independent)) 142 | 143 | return units 144 | 145 | def get_functions_with_no_caller(self): 146 | """ 147 | Returns a set with all functions, that do not have any caller 148 | """ 149 | return {func for func in self.get_flattened_inverted_topology() 150 | if not self[func]["calledby"]} 151 | -------------------------------------------------------------------------------- /macke/Error.py: -------------------------------------------------------------------------------- 1 | """ 2 | Storage wrapper for Errors found by KLEE 3 | """ 4 | from collections import OrderedDict 5 | from functools import total_ordering 6 | from os import path 7 | 8 | from .StackTrace import StackTrace 9 | 10 | @total_ordering 11 | class Error: 12 | program_functions = [] 13 | 14 | def set_program_functions(program_functions): 15 | Error.program_functions = program_functions 16 | 17 | def get_function_name(function): 18 | if function not in Error.program_functions and '.' in function: 19 | # remove everything after the dot 20 | tmp = function[:function.index('.')] 21 | if tmp in Error.program_functions: 22 | return tmp 23 | return function 24 | 25 | """ 26 | Container class for all information about errors found by KLEE 27 | """ 28 | def __init__(self, errfile, entryfunction): 29 | entryfunction = Error.get_function_name(entryfunction) 30 | # Store the function, that was used as an entry point on the test case 31 | self.entryfunction = entryfunction 32 | 33 | # Store the path and name of the .err-file generate by KLEE 34 | self.errfile = errfile 35 | 36 | # Store the path and name of the corresponding .ktest-file 37 | self.ktestfile = get_corresponding_ktest(errfile) 38 | 39 | # Store the reason for the error 40 | self.reason = get_reason_for_error(errfile) 41 | 42 | # Store an identifier for the vulnerable instruction "file:line" 43 | self.vulnerable_instruction = get_vulnerable_instruction(errfile) 44 | 45 | # Store the stack trace for comparison 46 | self.stacktrace = get_stacktrace(errfile, entryfunction) 47 | 48 | def __eq__(self, other): 49 | return ((self.entryfunction, self.errfile, self.reason, 50 | self.vulnerable_instruction) == 51 | (other.entryfunction, other.errfile, other.reason, 52 | other.vulnerable_instruction)) 53 | 54 | def __lt__(self, other): 55 | return ((self.vulnerable_instruction, self.entryfunction, 56 | self.errfile) < (other.vulnerable_instruction, 57 | other.entryfunction, other.errfile)) 58 | 59 | def __str__(self): 60 | return "<%s, %s, %s, %s>" % ( 61 | self.entryfunction, self.errfile, self.reason, 62 | self.vulnerable_instruction) 63 | 64 | def __repr__(self): 65 | return "" % ( 66 | self.entryfunction, self.errfile, self.reason, 67 | self.vulnerable_instruction) 68 | 69 | def is_blacklisted(self): 70 | """ 71 | Exclude some error reasons, that are not helpful for further analysis 72 | """ 73 | # klee_get_obj_size can be removed, if KLEE fixes bug #458 74 | # See: https://github.com/klee/klee/issues/458 75 | return "klee_get_obj_size" in self.reason 76 | 77 | def as_ordered_dict(self): 78 | """ Get all informations about this error in an ordered dict """ 79 | return OrderedDict([ 80 | ("entryfunction", self.entryfunction), 81 | ("errfile", self.errfile), 82 | ("ktestfile", self.ktestfile), 83 | ("reason", self.reason), 84 | ("vulnerableInstruction", self.vulnerable_instruction), 85 | ]) 86 | 87 | 88 | def get_corresponding_kleedir(errfile): 89 | """ Get the path of the corresponding klee directory """ 90 | return path.dirname(errfile) 91 | 92 | 93 | def get_corresponding_kleedir_name(errfile): 94 | """ Get the name of the corresponding klee directory """ 95 | return path.basename(path.dirname(errfile)) 96 | 97 | 98 | def get_corresponding_ktest(errfile): 99 | """ Get the corresponding ktest file for a .err file """ 100 | assert errfile.endswith(".err") 101 | assert errfile.count(".") >= 2 102 | 103 | # some/path/test123456.type.err 104 | return errfile[:errfile[:-4].rfind(".")] + ".ktest" 105 | 106 | 107 | def get_reason_for_error(errfile): 108 | """ Extract the reason for an error from a .err file """ 109 | assert path.isfile(errfile) 110 | 111 | with open(errfile, "r", errors='ignore') as file: 112 | reason = file.readline() 113 | # The reason starts with "Error: " 114 | return reason[len("Error: "):].strip() 115 | return "" 116 | 117 | 118 | def get_vulnerable_instruction(errfile): 119 | """ Extract the vulnerable instruction "file:line" from a .err file """ 120 | assert path.isfile(errfile) 121 | 122 | with open(errfile, "r", errors='ignore') as file: 123 | # The first line contains the reason - irrelevant for vuln inst 124 | file.readline() 125 | 126 | # Check whether klee info is existent and use it in case there is no stack trace 127 | nextline = file.readline().strip() 128 | if nextline.startswith("File: "): 129 | klee_flinfo_exists = True 130 | filenameline = nextline[len("File: "):] 131 | linenumline = int(file.readline().strip()[len("line: "):]) 132 | else: 133 | klee_flinfo_exists = False 134 | 135 | for line in file: 136 | if line.startswith('Stack:'): 137 | break 138 | 139 | for line in file: 140 | if line.startswith('Info:'): 141 | break 142 | words = line.strip().split(' ') 143 | 144 | # function name is 3th word 145 | fname = words[2] 146 | 147 | # Don't use external functions as vulnerable instruction 148 | if fname not in Error.program_functions: 149 | continue 150 | 151 | # Don't put __macke_error as vulnerable instruction 152 | if fname.startswith("__macke_error_"): 153 | continue 154 | 155 | # location is the last word 156 | location = words[-1] 157 | 158 | # The location is already in the format filename:line, thus use it directly 159 | return location 160 | 161 | if klee_flinfo_exists: 162 | return "%s:%s" % (filenameline, linenumline) 163 | return "" 164 | 165 | 166 | def get_stacktrace(errfile, entryfunction): 167 | """ Extract the relevant parts of the stack trace from a .err file """ 168 | assert path.isfile(errfile) 169 | 170 | with open(errfile, 'r', errors='ignore') as err: 171 | for line in err: 172 | if line.startswith('Stack:'): 173 | break 174 | 175 | stack = [] 176 | for line in err: 177 | if line.startswith('Info:'): 178 | break 179 | words = line.strip().split(' ') 180 | 181 | # function name is 3th word 182 | fname = Error.get_function_name(words[2]) 183 | 184 | # Don't continue after driver to rule out possible main false positives due to weird asan errors 185 | if fname.startswith("macke_fuzzer_driver"): 186 | break 187 | 188 | # Don't put external functions in stack trace 189 | if fname not in Error.program_functions: 190 | continue 191 | 192 | # Don't put __macke_error helper functions in stack trace 193 | if fname.startswith("__macke_error_"): 194 | continue 195 | 196 | 197 | # location is last word 198 | location = words[-1] 199 | stack.append((fname, location)) 200 | return StackTrace(stack, entryfunction) 201 | -------------------------------------------------------------------------------- /macke/ErrorChain.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for error chain reconstruction 3 | """ 4 | 5 | 6 | class ErrorChain: 7 | def __init__(self, error): 8 | self.trace = error.stacktrace 9 | self.found_errors = [error] 10 | self.head_errors = [error] 11 | 12 | def get_support(self): 13 | return len(self.found_errors) 14 | 15 | def get_depth(self): 16 | return self.trace.get_depth() 17 | 18 | def get_num_user_funcs(self, user_funcs): 19 | return len(set(fun for fun, loc in self.trace.stack if fun in user_funcs)) 20 | 21 | def filtered_trace(self, user_funcs): 22 | return list(filter(lambda f : f[0] in user_funcs, self.trace.stack)) 23 | 24 | def get_head_errors(self): 25 | return self.head_errors 26 | 27 | def get_vulnerable_instruction(self): 28 | return self.head_errors[0].vulnerable_instruction 29 | 30 | def error_matches(self, error): 31 | return error.stacktrace.is_contained_in(self.trace) or self.trace.is_contained_in(error.stacktrace) 32 | 33 | def add_error(self, error): 34 | assert self.error_matches(error) 35 | edepth = error.stacktrace.get_depth() 36 | sdepth = self.get_depth() 37 | self.found_errors.append(error) 38 | if edepth == sdepth: 39 | self.head_errors.append(error) 40 | elif edepth > sdepth: 41 | self.head_errors.clear() 42 | self.head_errors = [error] 43 | self.trace = error.stacktrace 44 | return True 45 | return False 46 | 47 | def reconstruct_all_error_chains(errorregistry, callgraph): 48 | """ 49 | Calculate a dict of vulnerable instruction => [error chains] 50 | """ 51 | 52 | result = dict() 53 | for vulninst, errorlist in errorregistry.forvulninst.items(): 54 | # Get all chains for this vulnerable instruction 55 | chains = reconstruct_error_chain(errorlist, callgraph) 56 | 57 | # Sort by chain length and then alphabetically 58 | result[vulninst] = sorted(chains, key=lambda x: (-len(x), "@".join(x))) 59 | 60 | return result 61 | 62 | 63 | ### LEGACY 64 | ##def reconstruct_all_error_chains(errorregistry, callgraph): 65 | ## """ 66 | ## Calculate a dict of vulnerable instruction => [error chains] 67 | ## """ 68 | ## 69 | ## result = dict() 70 | ## for vulninst, errorlist in errorregistry.forvulninst.items(): 71 | ## # Get all chains for this vulnerable instruction 72 | ## chains = reconstruct_error_chain(errorlist, callgraph) 73 | ## 74 | ## # Sort by chain length and then alphabetically 75 | ## result[vulninst] = sorted(chains, key=lambda x: (-len(x), "@".join(x))) 76 | ## 77 | ## return result 78 | ## 79 | ## 80 | ##def reconstruct_error_chain(errorlist, callgraph): 81 | ## """ 82 | ## Calculate a list of all error chains inside a given errorlist 83 | ## """ 84 | ## # Collect a set of affected functions 85 | ## affected = set({error.entryfunction for error in errorlist}) 86 | ## 87 | ## # Find all heads of the chains 88 | ## chains = [[fun] for fun in affected if not any( 89 | ## call in affected for call in callgraph[fun]['calls'])] 90 | ## 91 | ## # If we don't find a head for the error, it must be a circle 92 | ## if not chains: 93 | ## # A bit hacky, because we might circles over multiple functions 94 | ## chains = [[fun] for fun in affected if not any( 95 | ## call in affected and call != fun 96 | ## for call in callgraph[fun]['calls'])] 97 | ## 98 | ## # Extent all chains, until all callers are no longer infected 99 | ## for calleelist in callgraph.group_independent_callees(): 100 | ## # Create storage for old and new chains 101 | ## oldchains = list(chains) 102 | ## newchains = list() 103 | ## 104 | ## # Try to continue each chain 105 | ## for chain in oldchains: 106 | ## extended = False 107 | ## if chain[-1] in calleelist: 108 | ## for nextnode in callgraph[chain[-1]]['calledby']: 109 | ## if nextnode in affected: 110 | ## newchains.append(chain[:] + [nextnode]) 111 | ## extended = True 112 | ## 113 | ## if not extended: 114 | ## # Leave the current chain untouched 115 | ## newchains.append(chain[:]) 116 | ## chains = newchains 117 | ## return chains 118 | -------------------------------------------------------------------------------- /macke/ErrorRegistry.py: -------------------------------------------------------------------------------- 1 | """ 2 | Registry for Errors found by KLEE 3 | """ 4 | from os import listdir, path 5 | 6 | from .constants import ERRORFILEEXTENSIONS 7 | from .Error import Error 8 | from .ErrorChain import ErrorChain 9 | 10 | 11 | class ErrorRegistry: 12 | """ 13 | A registry for errors found by KLEE, that allows quick access and filters 14 | """ 15 | 16 | def __init__(self): 17 | # Initialize some hash tables for quick access 18 | # Note: Python stores pointers to objects, not copies of the objects 19 | self.forfunction = dict() 20 | self.forvulninst = dict() 21 | self.forerrfile = dict() 22 | self.mackeforerrfile = dict() 23 | self.list_forerrfile = dict() 24 | 25 | self.forheadstackentry = dict() 26 | 27 | self.errorcounter = 0 28 | self.mackerrorcounter = 0 29 | self.fuzzpropagated = 0 30 | self.fuzzinstpropagated = set() 31 | 32 | self.errorchains = [] 33 | 34 | def count_chains(self): 35 | return len(self.errorchains) 36 | 37 | def get_chains(self): 38 | return self.errorchains 39 | 40 | def create_from_dir(self, kleedir, entryfunction): 41 | """ register all errors from directory """ 42 | try: 43 | assert path.isdir(kleedir) 44 | 45 | for file in listdir(kleedir): 46 | if any(file.endswith(ext) for ext in ERRORFILEEXTENSIONS): 47 | self.create_entry(path.join(kleedir, file), entryfunction) 48 | except AssertionError: 49 | print("%s is not a directory"%(kleedir)) 50 | 51 | def create_entry(self, errfile, entryfunction): 52 | """ Create a new error and add it to the registry """ 53 | err = Error(errfile, entryfunction) 54 | 55 | if not err.is_blacklisted(): 56 | self.register_error(err) 57 | 58 | def build_chain_for_error(self, error): 59 | """ 60 | Used when error matched no errorchain, thus create a new one 61 | look for all lower-level errors that also match in the new chain 62 | """ 63 | new_chain = ErrorChain(error) 64 | for index in new_chain.trace.get_indices(): 65 | if index not in self.forheadstackentry: 66 | continue 67 | for e in self.forheadstackentry[index]: 68 | if new_chain.error_matches(e): 69 | new_chain.add_error(e) 70 | self.errorchains.append(new_chain) 71 | return new_chain 72 | 73 | def add_to_chains(self, error): 74 | added = False 75 | for chain in self.errorchains: 76 | if chain.error_matches(error): 77 | increased = chain.add_error(error) 78 | added = True 79 | if not added: 80 | self.build_chain_for_error(error) 81 | 82 | 83 | def register_error(self, error): 84 | """ register an existing error """ 85 | 86 | if error.stacktrace.get_depth() == 0: 87 | print("Error with empty stack: " + error.errfile) 88 | return 89 | 90 | if error.errfile.endswith(".macke.err"): 91 | # Find the prepended error 92 | # "ERROR FROM /path/test0000001.ptr.err" 93 | testfrom = error.reason[len("ERROR FROM "):].strip() 94 | 95 | # Exclude all MACKE errors based on black listed errors 96 | if testfrom not in self.forerrfile: 97 | print("testfrom not found...: " + testfrom) 98 | print("error.errfile: " + error.errfile) 99 | return 100 | 101 | 102 | self.mackerrorcounter += 1 103 | add_to_listdict(self.mackeforerrfile, testfrom, error) 104 | 105 | # Propagate information about the vulnerable instruction 106 | preverr = self.forerrfile[testfrom] 107 | error.vulnerable_instruction = preverr.vulnerable_instruction 108 | error.stacktrace.prepend(preverr.stacktrace) 109 | 110 | 111 | if testfrom.endswith(".fuzz.err"): 112 | self.fuzzpropagated += 1 113 | self.fuzzinstpropagated.add(error.vulnerable_instruction) 114 | 115 | add_to_listdict(self.forfunction, error.entryfunction, error) 116 | self.forerrfile[error.errfile] = error 117 | self.errorcounter += 1 118 | 119 | add_to_listdict(self.forvulninst, error.vulnerable_instruction, error) 120 | self.add_to_chains(error) 121 | add_to_listdict(self.forheadstackentry, error.stacktrace.get_head_index(), error) 122 | 123 | def count_vulnerable_instructions(self): 124 | """ 125 | Count the number of vulnerable instructions stored in the registry 126 | """ 127 | return len(self.forvulninst) 128 | 129 | def count_fuzz_vulnerable_instructions(self): 130 | """ 131 | Count the number of vulnerable instructions stored in the registry, that were found 132 | by at least one .fuzz.err 133 | """ 134 | return len(list(filter(lambda x : len(list(filter(lambda e : e.errfile.endswith(".fuzz.err"), self.forvulninst[x]))) > 0, self.forvulninst))) 135 | 136 | def count_functions_with_errors(self): 137 | """ 138 | Count the number of functions with at least one error in the registry 139 | """ 140 | return len(self.forfunction) 141 | 142 | def get_all_vulninst_for_func(self, function): 143 | """ 144 | Returns a set of all vulnerable instructions for a given function 145 | """ 146 | if function not in self.forfunction: 147 | return set() 148 | 149 | result = set() 150 | for error in self.forfunction[function]: 151 | result.add(error.vulnerable_instruction) 152 | 153 | return result 154 | 155 | def get_all_errors_for_func(self, function): 156 | """ 157 | Returns a set of all errors for a given function 158 | """ 159 | if function not in self.forfunction: 160 | return set() 161 | 162 | return self.forfunction[function] 163 | 164 | def to_prepend_in_phase_two(self, caller, callee, exclude_known=True): 165 | """ 166 | Returns a set of .err-files, that should be prepended to callee for the 167 | analysis from caller. All these ktests belongs to a vulnerable 168 | instruction, that was not covered by an error from caller 169 | """ 170 | if callee not in self.forfunction: 171 | return set() 172 | 173 | err_caller = self.get_all_errors_for_func(caller) 174 | err_callee = self.get_all_errors_for_func(callee) 175 | 176 | result = set() 177 | for err in err_callee: 178 | # Look whether it is already known 179 | if exclude_known and any(err.stacktrace.is_contained_in(err2.stacktrace) for err2 in err_caller): 180 | continue 181 | result.add(err.errfile) 182 | 183 | return result 184 | 185 | 186 | def add_to_listdict(dictionary, key, value): 187 | """ Add an entry to a dictionary of lists """ 188 | 189 | # Create slot for key, if this is the first entry for the key 190 | if key not in dictionary: 191 | dictionary[key] = [] 192 | dictionary[key].append(value) 193 | -------------------------------------------------------------------------------- /macke/Logger.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | Logging used in debug mode 4 | """ 5 | 6 | import sys 7 | import os 8 | import time 9 | import shutil 10 | import tempfile 11 | 12 | 13 | class Logger: 14 | """ 15 | Logs data in debug mode 16 | """ 17 | VERBOSITY_LEVELS = ["none", "error", "warning", "info", "debug"] 18 | accepted_verbosity_levels = [] 19 | log_file = None 20 | log_filename = None 21 | 22 | @staticmethod 23 | def open(verbosity_level="error", filename=None): 24 | 25 | if verbosity_level not in Logger.VERBOSITY_LEVELS: 26 | sys.stderr.write("warning: Unaccepted verbosity level. Defaulting to 'info'\n") 27 | index = Logger.VERBOSITY_LEVELS.index("info") 28 | else: 29 | index = Logger.VERBOSITY_LEVELS.index(verbosity_level) 30 | 31 | Logger.accepted_verbosity_levels = Logger.VERBOSITY_LEVELS[: index + 1] 32 | 33 | if filename: 34 | Logger.log_filename = filename 35 | #Logger.log_file = open(filename, "a+") 36 | else: 37 | # write to stdout 38 | Logger.log_filename = "STDOUT" 39 | #Logger.log_file = sys.stdout 40 | 41 | @staticmethod 42 | def openfile(): 43 | if (not Logger.log_filename) or Logger.log_filename=="STDOUT": 44 | Logger.log_file = sys.stdout 45 | else: 46 | Logger.log_file = open(Logger.log_filename, "a+") 47 | 48 | @staticmethod 49 | def log(message: str, verbosity_level="info"): 50 | Logger.openfile() 51 | if verbosity_level in Logger.accepted_verbosity_levels: 52 | if verbosity_level is not "info": 53 | Logger.log_file.write("(" + verbosity_level + ") ") 54 | Logger.log_file.write("[" + str(os.getpid()) + "]: ") 55 | Logger.log_file.write(message) 56 | Logger.close() 57 | 58 | @staticmethod 59 | def close(): 60 | if Logger.log_file is not sys.stdout: 61 | Logger.log_file.close() 62 | 63 | class PlotDataLogger: 64 | ''' 65 | Logs plot data for KLEE and AFL rounds 66 | ''' 67 | 68 | def __init__(self, output_dir: str, klee_output_dir: str, fuzzer_output_dir: str): 69 | self.output_dir = output_dir 70 | self.klee_output_dir = klee_output_dir 71 | self.fuzzer_output_dir = fuzzer_output_dir 72 | 73 | if self.klee_output_dir!=self.fuzzer_output_dir: # We let KLEE generate its own directories 74 | if not os.path.exists(fuzzer_output_dir): 75 | os.makedirs(fuzzer_output_dir) 76 | 77 | self.coverage_list = {} 78 | self.written_coverage = [] 79 | 80 | def write_coverage(self): 81 | sorted_keys = sorted(self.coverage_list.keys()) 82 | Logger.log("Writing coverage in " + self.output_dir + "/coverage.log\n", verbosity_level="debug") 83 | coverage_file = open(os.path.join(self.output_dir, "coverage.log"), "a+") 84 | for s in sorted_keys: 85 | if s in self.written_coverage: 86 | continue 87 | for tup in self.coverage_list[s]: 88 | coverage_file.write("%s, %s, %s, %d\n" % (time.ctime(s), tup[0], tup[1], tup[2])) 89 | self.written_coverage.append(s) 90 | coverage_file.close() 91 | 92 | def log_fuzzer_progress(self, coverage: dict): 93 | #Logger.log ("Logging fuzzer progress: " + str(coverage) + "\n", verbosity_level="debug") 94 | new_covered = [] 95 | 96 | for filename in coverage: 97 | for line_no in coverage[filename]["covered"]: 98 | new_covered.append(("AFL", os.path.basename(filename), line_no)) 99 | 100 | #Logger.log("New covered: " + str(new_covered) + "\n", verbosity_level="debug") 101 | 102 | self.coverage_list[time.time()] = new_covered 103 | self.write_coverage() 104 | 105 | """ 106 | def log_fuzzer_progress(self): 107 | if not os.path.exists(os.path.join(self.fuzzer_output_dir, "cov/id-delta-cov")): 108 | return [] 109 | 110 | coverage_file = open(os.path.join(self.fuzzer_output_dir, "cov/id-delta-cov"), "r") 111 | new_covered = [] 112 | 113 | for line in coverage_file: 114 | #Logger.log("delta coverage line: " + line + "\n", verbosity_level="debug") 115 | if line.startswith("#"): 116 | continue 117 | fields = line.strip().split(", ") 118 | 119 | if not fields[2].startswith(self.PREFIXES[1]): 120 | continue 121 | 122 | if not fields[3]=="line": 123 | continue 124 | #file_name = fields[2].split(self.PREFIXES[1])[-1] 125 | file_name = fields[2].strip() 126 | line_no = int(fields[4]) 127 | #Logger.log("Logging fuzzer progress line " + line + "\n", verbosity_level="debug") 128 | 129 | if not(any([ (("AFL", os.path.basename(file_name), line_no) in v or 130 | ("KLEE", os.path.basename(file_name), line_no) in v) for v in self.coverage_list.values() ])): 131 | new_covered.append(("AFL", os.path.basename(file_name), line_no)) 132 | #else: 133 | # Logger.log(line + "already in " + str(self.coverage_list.values()) + "\n", verbosity_level="debug") 134 | 135 | self.coverage_list[time.time()] = new_covered 136 | self.write_coverage() 137 | """ 138 | def log_klee_coverage(self): 139 | new_covered = [] 140 | 141 | while ( 142 | not os.path.exists( 143 | os.path.join(self.klee_output_dir, "run.istats"))): # Klee should have at least done something 144 | continue 145 | 146 | tmp_istats_dir = tempfile.mkdtemp() 147 | os.system("cp " + os.path.join(os.path.join(self.klee_output_dir, "run.istats") + " " + tmp_istats_dir)) 148 | covered = self.parse_run_istats(os.path.join(tmp_istats_dir, "run.istats")) 149 | 150 | #Logger.log("log_klee_coverage: " + str(covered.keys()) + "\n", verbosity_level="debug") 151 | 152 | for k in covered.keys(): 153 | if not k: 154 | #Logger.log("log_klee_coverage: covered keys contains None\n", verbosity_level="warning") 155 | continue 156 | Logger.log("klee covered " + k + "\n", verbosity_level="debug") 157 | file_name = os.path.basename(k) 158 | for src in covered[k].keys(): 159 | line_no = src 160 | 161 | if not (any([(("AFL", os.path.basename(file_name), line_no) in v or 162 | ("KLEE", os.path.basename(file_name), line_no) in v) for v in 163 | self.coverage_list.values()])): 164 | new_covered.append(("KLEE", file_name, line_no)) 165 | 166 | shutil.rmtree(tmp_istats_dir) 167 | 168 | """ 169 | for f in glob.glob(klee_out_dir+"/*.cov"): 170 | cov_file = open(f, "r") 171 | for line in cov_file: 172 | #file_name = line.strip().split(":")[0].split(self.PREFIXES[0])[-1] 173 | file_name = line.strip().split(":")[0].strip() 174 | line_no = int(line.strip().split(":")[-1]) 175 | if not(any([ (("AFL", os.path.basename(file_name), line_no) in v or ("KLEE", os.path.basename(file_name), line_no) in v) for v in self.coverage_list.values() ])): 176 | new_covered.append(("KLEE", os.path.basename(file_name), line_no)) 177 | """ 178 | self.coverage_list[time.time()] = new_covered 179 | self.write_coverage() 180 | 181 | def parse_run_istats(self, istats_file): 182 | istats = open(istats_file) 183 | 184 | covered = {} 185 | 186 | cur_file = None 187 | 188 | for line in istats: 189 | if line.startswith("fl="): 190 | cur_file = line.split("=")[1].strip() 191 | continue 192 | 193 | tokens = line.split() 194 | if len(tokens) != 15: 195 | continue 196 | src, cov = int(tokens[1]), int(tokens[2]) # Read source-level coverage rather than LLVM level 197 | 198 | if (cov > 0): 199 | if cur_file not in covered.keys(): # The covered file is newly covered 200 | covered[cur_file] = {} 201 | covered[cur_file][src] = cov 202 | return covered 203 | -------------------------------------------------------------------------------- /macke/StackTrace.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class StackTrace: 4 | def __init__(self, stack, entryfunc): 5 | # Store the entry func, this is needed for prepending a stack 6 | self.entryfunction = entryfunc 7 | 8 | self.entryFrame = 0 9 | for (fname, _) in stack: 10 | if fname == entryfunc: 11 | break 12 | self.entryFrame += 1 13 | 14 | self.stack = stack[:self.entryFrame + 1] 15 | 16 | 17 | def __eq__(self, other): 18 | return self.stack == other.stack 19 | 20 | def __str__(self): 21 | return str(self.stack) 22 | 23 | def is_contained_in(self, other): 24 | slen = len(self.stack) 25 | olen = len(other.stack) 26 | if slen > olen: 27 | return False 28 | 29 | for i in range(slen): 30 | if self.stack[i] != other.stack[i]: 31 | return False 32 | return True 33 | 34 | 35 | def prepend(self, other): 36 | """ 37 | Prepend the other stacktrace to self 38 | This function assumes, that the self stack has a call to other.entryfunction 39 | It looks for this call this frame and all lower frames with the ones in other 40 | """ 41 | 42 | call_pos = 0 43 | 44 | old_stack = self.stack 45 | self.stack = other.stack[:other.entryFrame + 1] + self.stack 46 | 47 | 48 | def get_depth(self): 49 | return len(self.stack) 50 | 51 | def get_indices(self): 52 | """ 53 | Return a list of (depth, function, location), representing each entry 54 | This tuple can be hashed to find possible candidates 55 | Note, that the last call has depth 0, so that it does not change, 56 | when prepending stacktraces 57 | """ 58 | ret = [] 59 | cur_depth = 0 60 | for (func, loc) in self.stack: 61 | ret.append((cur_depth, func, loc)) 62 | cur_depth += 1 63 | assert(cur_depth == self.get_depth()) 64 | return ret 65 | 66 | 67 | def get_head(self): 68 | return self.stack[-1] 69 | 70 | def get_head_index(self): 71 | func, loc = self.get_head() 72 | depth = self.get_depth() 73 | return (depth - 1, func, loc) 74 | -------------------------------------------------------------------------------- /macke/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-i4/macke/b096ee194b0861b6581fee140bdf16d8cc60b116/macke/__init__.py -------------------------------------------------------------------------------- /macke/analyse/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-i4/macke/b096ee194b0861b6581fee140bdf16d8cc60b116/macke/analyse/__init__.py -------------------------------------------------------------------------------- /macke/analyse/aflabort.py: -------------------------------------------------------------------------------- 1 | """ 2 | Extract all AFL runs that aborted, because no input was non-crashing 3 | """ 4 | 5 | from os import path 6 | 7 | from collections import OrderedDict 8 | from .helper import generic_main, get_fuzz_outdirs 9 | 10 | def aflabort(macke_directory): 11 | 12 | result = OrderedDict() 13 | aborts = [] 14 | for (function, outpath) in get_fuzz_outdirs(macke_directory): 15 | outputfile = path.join(outpath, "output.txt") 16 | assert path.exists(outputfile) 17 | 18 | with open(outputfile, 'r') as f: 19 | for line in f: 20 | if "PROGRAM ABORT" in line: 21 | aborts.append({ "function": function, "path": outpath, "abortline": line}) 22 | 23 | result["count"] = len(aborts) 24 | result["detail"] = aborts 25 | 26 | return result 27 | 28 | 29 | 30 | def main(): 31 | """ Entry point to run this analysis stand alone """ 32 | generic_main( 33 | "Extract informations about all AFL runs that aborted in a MACKE run", 34 | "The AFL aborts were stored in %s", 35 | "aflabort.json", aflabort 36 | ) 37 | 38 | if __name__ == '__main__': 39 | main() 40 | -------------------------------------------------------------------------------- /macke/analyse/chains.py: -------------------------------------------------------------------------------- 1 | """ 2 | Details about the error chains found by a MACKE run 3 | """ 4 | from collections import OrderedDict 5 | from os import path 6 | from statistics import mean, stdev 7 | 8 | from ..CallGraph import CallGraph 9 | from ..ErrorChain import reconstruct_all_error_chains 10 | from .helper import generic_main, get_error_registry_for_mackedir 11 | 12 | 13 | def chains(macke_directory): 14 | """ 15 | Extract the information about the error chains as an OrderedDict 16 | """ 17 | 18 | clg = CallGraph(path.join(macke_directory, "bitcode", "program.bc")) 19 | registry = get_error_registry_for_mackedir(macke_directory, clg) 20 | 21 | funcs = set(clg.get_flattened_inverted_topology()) 22 | 23 | errchains = registry.get_chains() 24 | 25 | # Group chains by vulninst 26 | detail_dict = dict() 27 | for chain in errchains: 28 | vulninst = chain.get_vulnerable_instruction() 29 | if vulninst not in detail_dict: 30 | detail_dict[vulninst] = [] 31 | fchain = list(map(lambda x : x[0], chain.filtered_trace(funcs))) 32 | if fchain not in detail_dict[vulninst]: 33 | detail_dict[vulninst].append(fchain) 34 | 35 | for chains in detail_dict.values(): 36 | chains.sort(key = lambda x: (-len(x), "@".join(x))) 37 | 38 | chainlengths = [len(set(chain)) 39 | for _, chainlist in detail_dict.items() 40 | for chain in chainlist] 41 | 42 | # Calculate old 1-level-up statistic 43 | onelevelup = 0 44 | for caller in clg.get_flattened_inverted_topology(): 45 | for callee in clg[caller]['calls']: 46 | onelevelup += len( 47 | registry.get_all_vulninst_for_func(caller) & 48 | registry.get_all_vulninst_for_func(callee)) 49 | 50 | 51 | # Count phase ends 52 | endphaseone, endphasetwo = 0, 0 53 | for vulninst, chainlist in detail_dict.items(): 54 | for chain in chainlist: 55 | # candidaterror are all errors, that end this chain 56 | # normally, this is just one error, but circles can have several 57 | try: 58 | if any(candidaterror.vulnerable_instruction == vulninst and 59 | candidaterror.errfile.endswith(".macke.err") 60 | for candidaterror in registry.forfunction[chain[-1]]): 61 | endphasetwo += 1 62 | else: 63 | endphaseone += 1 64 | except KeyError: 65 | print("Exception: %s not in registry for some reason. Skipping."%(chain[-1])) 66 | 67 | 68 | result = OrderedDict([ 69 | ("count", len(chainlengths)), 70 | ("length", OrderedDict([ 71 | ("min", min(chainlengths) if chainlengths else -1), 72 | ("max", max(chainlengths) if chainlengths else -1), 73 | ("avg", mean(chainlengths) if chainlengths else -1), 74 | ("std", stdev(chainlengths) if len(chainlengths) > 2 else -1), 75 | ])), 76 | ("1-level-up", onelevelup), 77 | ("longerthanone", len([True for c in chainlengths if c > 1])), 78 | ("end-found-by-phase-one", endphaseone), 79 | ("end-found-by-phase-two", endphasetwo), 80 | ("detail", detail_dict) 81 | ]) 82 | return result 83 | ## LEGACY 84 | #errchains = reconstruct_all_error_chains(registry, clg) 85 | #chainlengths = [len(chain) 86 | # for _, chainlist in errchains.items() 87 | # for chain in chainlist] 88 | 89 | ## Calculate old 1-level-up statistic 90 | #onelevelup = 0 91 | #for caller in clg.get_flattened_inverted_topology(): 92 | # for callee in clg[caller]['calls']: 93 | # onelevelup += len( 94 | # registry.get_all_vulninst_for_func(caller) & 95 | # registry.get_all_vulninst_for_func(callee)) 96 | 97 | ## Count the end phases 98 | #endphaseone, endphasetwo = 0, 0 99 | #for vulninst, chainlist in errchains.items(): 100 | # for chain in chainlist: 101 | # # candidaterror are all errors, that end this chain 102 | # # normally, this is just one error, but circles can have several 103 | # if any(candidaterror.vulnerable_instruction == vulninst and 104 | # candidaterror.errfile.endswith(".macke.err") 105 | # for candidaterror in registry.forfunction[chain[-1]]): 106 | # endphasetwo += 1 107 | # else: 108 | # endphaseone += 1 109 | # ("1-level-up", onelevelup), 110 | 111 | 112 | def main(): 113 | """ Entry point to run this analysis stand alone """ 114 | generic_main( 115 | "Details about the error chains found by a MACKE run", 116 | "The details about the error chains were stored in %s", 117 | "chains.json", chains 118 | ) 119 | 120 | if __name__ == '__main__': 121 | main() 122 | -------------------------------------------------------------------------------- /macke/analyse/everything.py: -------------------------------------------------------------------------------- 1 | """ 2 | One main function, that just calls all analysis 3 | """ 4 | from .chains import main as chains 5 | from .functions import main as functions 6 | from .helper import arg_parse_mackedir 7 | from .kleecrash import main as kleecrash 8 | from .aflabort import main as aflabort 9 | from .linecoverage import main as linecoverage 10 | from .partial import main as partial 11 | from .runtime import main as runtime 12 | from .vulninsts import main as vulninsts 13 | 14 | 15 | def main(): 16 | """ 17 | One function, that calls all analysis functions and thereby generates 18 | multiple jsons inside a MACKE directory 19 | """ 20 | # Parse the arguments and give corresponding -h information 21 | arg_parse_mackedir("Adds lots of analyzes to a MACKE directory") 22 | 23 | # Just call all mains from all analyzes scripts 24 | chains() 25 | functions() 26 | kleecrash() 27 | aflabort() 28 | linecoverage() 29 | partial() 30 | runtime() 31 | vulninsts() 32 | 33 | if __name__ == '__main__': 34 | main() 35 | -------------------------------------------------------------------------------- /macke/analyse/functions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Collect information about the functions in program.bc 3 | """ 4 | from collections import OrderedDict 5 | from os import path 6 | 7 | from ..CallGraph import CallGraph 8 | from .helper import generic_main, get_error_registry_for_mackedir 9 | 10 | 11 | def functions(macke_directory): 12 | """ 13 | Extract all informations about the functions of a program as an OrderedDict 14 | """ 15 | clg = CallGraph(path.join(macke_directory, "bitcode", "program.bc")) 16 | 17 | totalfunccount = sum(1 for _, info in clg.graph.items() 18 | if not info["isexternal"]) 19 | symenccount = sum(1 for func in clg.graph 20 | if clg.is_symbolic_encapsulable(func)) 21 | 22 | registry = get_error_registry_for_mackedir(macke_directory, clg) 23 | 24 | result = OrderedDict([ 25 | ("totalfunctioncount", totalfunccount), 26 | ("symbolicencapsulable", symenccount), 27 | ("erroneousfunctioncount", registry.count_functions_with_errors()), 28 | ("functionswitherrors", sorted( 29 | [func for func in registry.forfunction])) 30 | ]) 31 | return result 32 | 33 | 34 | def main(): 35 | """ Entry point to run this analysis stand alone """ 36 | generic_main( 37 | "Collect informations about functions in a MACKE run", 38 | "The function analysis was stored in %s", 39 | "functions.json", functions 40 | ) 41 | 42 | if __name__ == '__main__': 43 | main() 44 | -------------------------------------------------------------------------------- /macke/analyse/helper.py: -------------------------------------------------------------------------------- 1 | """ 2 | Some helping functions to reduce the duplicate code for stand alone evaluation 3 | """ 4 | import argparse 5 | import json 6 | from collections import OrderedDict 7 | from os import path, listdir 8 | 9 | from ..ErrorRegistry import ErrorRegistry 10 | 11 | from ..Error import Error 12 | 13 | 14 | def arg_parse_mackedir(description): 15 | """ 16 | Parse command line arguments for macke directory 17 | """ 18 | 19 | parser = argparse.ArgumentParser( 20 | description=description 21 | ) 22 | parser.add_argument( 23 | "mackedir", 24 | help="The directory of a MACKE run to be analyzed") 25 | 26 | args = parser.parse_args() 27 | 28 | if (path.isdir(args.mackedir) and 29 | path.isfile(path.join(args.mackedir, 'klee.json'))): 30 | return args.mackedir 31 | else: 32 | raise ValueError("'%s' is not a directory of a MACKE run" % 33 | args.mackedir) 34 | 35 | 36 | def store_as_json(macke_directory, filename, content): 37 | """ 38 | Store content as json inside filename 39 | """ 40 | jsonfile = path.join(macke_directory, filename) 41 | with open(jsonfile, 'w') as file: 42 | json.dump(content, file) 43 | 44 | 45 | def get_fuzz_outdirs(macke_directory): 46 | fuzzdir = path.join(macke_directory, "fuzzer") 47 | if not path.isdir(path.join(macke_directory, "fuzzer")): 48 | return [] 49 | 50 | result = [] 51 | prefix = "fuzz_out_" 52 | 53 | for f in listdir(fuzzdir): 54 | if not f.startswith(prefix): 55 | continue 56 | fpath = path.join(fuzzdir, f) 57 | function = f[len(prefix):] 58 | if path.islink(fpath) or not path.isdir(fpath): 59 | continue 60 | result.append((function, fpath)) 61 | 62 | return result 63 | 64 | 65 | def append_to_registry_from_fuzzdir(registry, macke_directory): 66 | for (function, fpath) in get_fuzz_outdirs(macke_directory): 67 | errordir = path.join(fpath, "macke_errors") 68 | 69 | # sanity check 70 | if path.islink(errordir) or not path.isdir(errordir): 71 | continue 72 | registry.create_from_dir(errordir, function) 73 | 74 | 75 | 76 | 77 | def get_klee_registry_from_mackedir(macke_directory): 78 | """ 79 | Build an OrderedDict with informations about all KLEE runs in a MACKE run 80 | """ 81 | kinfo = OrderedDict() 82 | with open(path.join(macke_directory, 'klee.json')) as klee_json: 83 | kinfo = json.load(klee_json, object_pairs_hook=OrderedDict) 84 | 85 | return kinfo 86 | 87 | 88 | def get_error_registry_for_mackedir(macke_directory, callgraph): 89 | """ 90 | Build an error Registry for a MACKE run 91 | """ 92 | Error.set_program_functions(callgraph.get_internal_functions()) 93 | macke_directory = path.abspath(macke_directory) 94 | registry = ErrorRegistry() 95 | klees = get_klee_registry_from_mackedir(macke_directory) 96 | 97 | append_to_registry_from_fuzzdir(registry, macke_directory) 98 | 99 | for _, klee in klees.items(): 100 | if "function" in klee: 101 | registry.create_from_dir(klee['folder'], klee['function']) 102 | else: 103 | registry.create_from_dir(klee['folder'], klee['caller']) 104 | 105 | return registry 106 | 107 | 108 | def generic_main(description, feedback, filename, callback): 109 | """ 110 | Entry point to run an analyse-script stand alone. It reads a MACKE 111 | directory, perform the analysis and store the result as a json file. 112 | """ 113 | mackedir = arg_parse_mackedir(description) 114 | store_as_json(mackedir, filename, callback(mackedir)) 115 | if feedback: 116 | print(feedback % filename) 117 | -------------------------------------------------------------------------------- /macke/analyse/kleecrash.py: -------------------------------------------------------------------------------- 1 | """ 2 | Extract all KLEE runs, that crashes 3 | """ 4 | from collections import OrderedDict 5 | 6 | from ..Klee import reconstruct_from_macke_dir 7 | from .helper import generic_main, get_klee_registry_from_mackedir 8 | 9 | 10 | def kleecrash(macke_directory): 11 | """ 12 | Extract all information about KLEE crashes in an OrderedDict 13 | """ 14 | klees = reconstruct_from_macke_dir(macke_directory) 15 | 16 | kinfo = get_klee_registry_from_mackedir(macke_directory) 17 | 18 | result = [] 19 | for klee in klees: 20 | if klee.did_klee_crash(): 21 | kresult = OrderedDict(sorted( 22 | kinfo[klee.get_outname()].items(), key=lambda t: t[0])) 23 | del kresult["bcfile"] 24 | kresult["output"] = klee.stdoutput 25 | result.append(kresult) 26 | 27 | return result 28 | 29 | 30 | def main(): 31 | """ Entry point to run this analysis stand alone """ 32 | generic_main( 33 | "Extract informations about all KLEE runs that crashes in a MACKE run", 34 | "The KLEE crashes were stored in %s", 35 | "kleecrash.json", kleecrash 36 | ) 37 | 38 | if __name__ == '__main__': 39 | main() 40 | -------------------------------------------------------------------------------- /macke/analyse/linecoverage.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generate a json file with line coverage information about a MACKE run 3 | """ 4 | 5 | from collections import OrderedDict 6 | from os import path 7 | 8 | from ..llvm_wrapper import extract_lines_of_code 9 | from ..run_istats import extract_linecoverage 10 | from ..Fuzzer import extract_fuzzer_coverage 11 | from .helper import generic_main, get_klee_registry_from_mackedir 12 | 13 | def linecoverage(macke_directory): 14 | """ 15 | Extract all linecoverage information in an OrderedDict 16 | """ 17 | 18 | 19 | # Extract all lines of code in the unoptimized program 20 | funcovs = extract_lines_of_code( 21 | path.join(macke_directory, "bitcode", "program.bc")) 22 | 23 | # Collect fuzzing coverage 24 | # TODO: Coverage dictionary contains both lines and functions 25 | coverage = extract_fuzzer_coverage(macke_directory) 26 | #print(coverage.keys()) 27 | 28 | # Read klee.json information 29 | klees = get_klee_registry_from_mackedir(macke_directory) 30 | 31 | # Collect all covered and uncovered lines from all run.istats 32 | for _, klee in klees.items(): 33 | 34 | istatsfile = path.join(klee['folder'], "run.istats") 35 | if not path.isfile(istatsfile): 36 | istatsfile = path.join(macke_directory, 'klee', 37 | path.basename(klee['folder']), "run.istats") 38 | 39 | for func, info in extract_linecoverage(istatsfile).items(): 40 | if func in coverage: 41 | # Merge existing information with the new information 42 | coverage[func]['covered'] |= info['covered'] 43 | coverage[func]['uncovered'] |= info['uncovered'] 44 | else: 45 | # Add a new entry to the overall stats 46 | coverage[func] = info 47 | 48 | # lines only covered on some runs are considered as covered 49 | for func in coverage: 50 | coverage[func]['uncovered'] -= coverage[func]['covered'] 51 | 52 | #for file in coverage: 53 | # print (str(len(cov_dict['covered']))) 54 | # print (file + ": " + str(coverage[file]['covered'])) 55 | 56 | # Categorize the per function informations 57 | perfunction = dict() 58 | 59 | for function, position in funcovs.items(): 60 | for file, lines in position.items(): 61 | if function not in perfunction: 62 | perfunction[function] = dict() 63 | coverageforfile = coverage.get(function, dict()) 64 | perfunction[function][file] = OrderedDict([ 65 | ('covered', sorted(list( 66 | set(lines) & coverageforfile.get('covered', set())))), 67 | ('uncovered', sorted(list( 68 | set(lines) & coverageforfile.get('uncovered', set())))), 69 | ('removed', sorted(list((set(lines) - 70 | coverageforfile.get('covered', set())) - 71 | coverageforfile.get('uncovered', set())) 72 | )) 73 | ]) 74 | ''' 75 | for filen in coverage: 76 | function = coverage[ 77 | if function not in perfunction: 78 | perfunction[function] = dict() 79 | 80 | coverageforfile = coverage.get(file, dict()) 81 | perfunction[function][file] = OrderedDict([ 82 | ('covered', sorted(list( 83 | set(lines) & coverageforfile.get('covered', set())))), 84 | ('uncovered', sorted(list( 85 | set(lines) & coverageforfile.get('uncovered', set())))), 86 | ('removed', sorted(list((set(lines) - 87 | coverageforfile.get('covered', set())) - 88 | coverageforfile.get('uncovered', set())) 89 | )) 90 | 91 | ''' 92 | # Count the absolute numbers 93 | covered, uncovered, removed = 0, 0, 0 94 | for _, position in perfunction.items(): 95 | for _, status in position.items(): 96 | covered += len(status['covered']) 97 | uncovered += len(status['uncovered']) 98 | removed += len(status['removed']) 99 | 100 | # Compose everything in a sorted result 101 | result = OrderedDict([ 102 | ('total', OrderedDict( 103 | [ 104 | ('covered', covered), 105 | ('uncovered', uncovered), 106 | ('removed', removed) 107 | ])), 108 | ('perfunction', OrderedDict( 109 | sorted(perfunction.items(), key=lambda t: t[0]))), 110 | ]) 111 | return result 112 | 113 | 114 | def main(): 115 | """ Entry point to run this analysis stand alone """ 116 | generic_main( 117 | "Extract line coverage of a MACKE run", 118 | "The coverage analysis was stored in %s", 119 | "coverage.json", linecoverage 120 | ) 121 | 122 | 123 | if __name__ == '__main__': 124 | main() 125 | -------------------------------------------------------------------------------- /macke/analyse/partial.py: -------------------------------------------------------------------------------- 1 | """ 2 | Count partial error analysis and track the reasons, why MACKE did no finish 3 | """ 4 | from collections import OrderedDict 5 | from os import path 6 | 7 | from ..CallGraph import CallGraph 8 | from ..Klee import reconstruct_from_macke_dir 9 | from .helper import (generic_main, get_error_registry_for_mackedir, 10 | get_klee_registry_from_mackedir) 11 | 12 | 13 | def partial(macke_directory): 14 | """ 15 | Extract information about all partial error analysis during a MACKE run 16 | """ 17 | 18 | clg = CallGraph(path.join(macke_directory, "bitcode", "program.bc")) 19 | registry = get_error_registry_for_mackedir(macke_directory, clg) 20 | klees = reconstruct_from_macke_dir(macke_directory) 21 | kinfos = get_klee_registry_from_mackedir(macke_directory) 22 | 23 | # Merge KLEEs and kinfos 24 | for klee in klees: 25 | kinfos[klee.get_outname()]["klee"] = klee 26 | 27 | # Generate index structure for easy access 28 | kphaseone = dict() # function -> kleeResult 29 | kphasetwo = dict() # caller,callee -> kleeResult 30 | for _, kinfo in kinfos.items(): 31 | if kinfo["phase"] == 1: 32 | if "klee" in kinfo: 33 | kphaseone[kinfo["function"]] = kinfo["klee"] 34 | else: 35 | pass 36 | #kphaseone[kinfo["function"]] = 0 37 | elif kinfo["phase"] == 2: 38 | if "klee" in kinfo: 39 | kphasetwo[(kinfo["caller"], kinfo["callee"])] = kinfo["klee"] 40 | else: 41 | pass 42 | #kphasetwo[(kinfo["caller"], kinfo["callee"])] = 0 43 | 44 | # Sanitized I: All leafs are only from phase one and have no errors 45 | sanatizedone = 0 46 | # Sanitized II: All leafs have no errors (phase one + two) 47 | sanatizedtwo = 0 48 | complete = 0 49 | kleecrash = 0 50 | noencapsulation = 0 51 | outofressources = 0 52 | targetmissed = 0 53 | targetmissednoproblem = 0 54 | incomplete = 0 55 | 56 | for _, errorlist in registry.forvulninst.items(): 57 | 58 | # Extract all erroneous functions and caller-callee pairs that is 59 | # (partial) covered by this error 60 | erroneous, callpairs = set(), set() 61 | for error in errorlist: 62 | erroneous.add(error.entryfunction) 63 | if clg[error.entryfunction]: 64 | callpairs.update( 65 | {(caller, error.entryfunction) 66 | for caller in clg[error.entryfunction]["calledby"]}) 67 | 68 | border = {(cler, clee) for (cler, clee) in callpairs 69 | if cler not in erroneous} 70 | 71 | if all(cler in kphaseone and 72 | not kphaseone[cler].did_klee_run_out_of_ressources() 73 | for cler, _ in border): 74 | sanatizedone += 1 75 | elif all( 76 | (cler in kphaseone and 77 | not kphaseone[cler].did_klee_run_out_of_ressources()) or ( 78 | (cler, clee) in kphasetwo and not kphasetwo[ 79 | (cler, clee)].did_klee_run_out_of_ressources() and 80 | kphasetwo[(cler, clee)].did_klee_reach_error_summary(clee) 81 | ) for cler, clee in border): 82 | sanatizedtwo += 1 83 | 84 | isincomplete = False 85 | 86 | if any(not (clg.is_symbolic_encapsulable(cler) or cler == "main") 87 | for cler, _ in border): 88 | noencapsulation += 1 89 | isincomplete = True 90 | 91 | if any((cler in kphaseone and kphaseone[cler].did_klee_crash()) or 92 | ((cler, clee) in kphasetwo and 93 | kphasetwo[(cler, clee)].did_klee_crash()) 94 | for cler, clee in border): 95 | kleecrash += 1 96 | isincomplete = True 97 | 98 | if any((cler in kphaseone and 99 | kphaseone[cler].did_klee_run_out_of_ressources()) or 100 | ((cler, clee) in kphasetwo and 101 | kphasetwo[(cler, clee)].did_klee_run_out_of_ressources()) 102 | for cler, clee in border): 103 | outofressources += 1 104 | isincomplete = True 105 | 106 | if any((cler, clee) in kphasetwo and 107 | not kphasetwo[(cler, clee)].did_klee_reach_error_summary(clee) 108 | for cler, clee in border): 109 | targetmissed += 1 110 | isincomplete = True 111 | 112 | if any((cler, clee) in kphasetwo and 113 | not kphasetwo[ 114 | (cler, clee)].did_klee_run_out_of_ressources() and 115 | not kphasetwo[(cler, clee)].did_klee_crash() and 116 | not kphasetwo[(cler, clee)].did_klee_reach_error_summary(clee) 117 | for cler, clee in border): 118 | targetmissednoproblem += 1 119 | 120 | incomplete += isincomplete 121 | complete += not isincomplete 122 | 123 | return OrderedDict([ 124 | ("vulnerable-instructions", registry.count_vulnerable_instructions()), 125 | ("sanatized-one", sanatizedone), 126 | ("sanatized-two", sanatizedtwo), 127 | ("complete", complete), 128 | ("incomplete", incomplete), 129 | ("reasons", OrderedDict([ 130 | ("kleecrash", kleecrash), 131 | ("noencapsulation", noencapsulation), 132 | ("out-of-resources", outofressources), 133 | ("target-missed", targetmissed), 134 | ("target-missed-no-problem", targetmissednoproblem), 135 | ])) 136 | ]) 137 | 138 | 139 | def main(): 140 | """ Entry point to run this analysis stand alone """ 141 | generic_main( 142 | "Count partial error analysis and track the reasons for it", 143 | "The partial analysis were stored in %s", 144 | "partial.json", partial 145 | ) 146 | 147 | if __name__ == '__main__': 148 | main() 149 | -------------------------------------------------------------------------------- /macke/analyse/runtime.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generate a json file with all runtime information inside a Macke run directory 3 | """ 4 | 5 | from collections import OrderedDict 6 | from os import path 7 | 8 | from .helper import generic_main, get_klee_registry_from_mackedir 9 | 10 | 11 | def analyse_runtime(macke_directory): 12 | """ 13 | Collect and summarize the runtime information of all KLEE runs 14 | """ 15 | 16 | # Read klee.json information 17 | klees = get_klee_registry_from_mackedir(macke_directory) 18 | 19 | result = OrderedDict() 20 | result['total'] = 0 21 | result['phase'] = {'1': 0, '2': 0} 22 | result['entrypoint'] = {} 23 | 24 | for _, klee in klees.items(): 25 | # Load runtime information from run.stats 26 | runtime = 0.0 27 | 28 | run_stats_file = path.join(klee['folder'], 'run.stats') 29 | # Skip non existing files 30 | if not path.isfile(run_stats_file): 31 | continue 32 | 33 | with open(run_stats_file) as run_stats: 34 | # Read the entire file 35 | stats = run_stats.readlines() 36 | 37 | # Read the position of UserTime 38 | runtimepos = stats[0][1:-1].split(",").index("'UserTime'") 39 | 40 | # Read the last row - it contains the overall values 41 | runtime = float(stats[-1][1:-1].split(",")[runtimepos]) 42 | 43 | result['total'] += runtime 44 | result['phase'][str(klee['phase'])] += runtime 45 | 46 | if klee['phase'] == 1: 47 | entry = result['entrypoint'].get( 48 | klee['function'], OrderedDict([('1', 0), ('2', 0)])) 49 | entry['1'] += runtime 50 | result['entrypoint'][klee['function']] = entry 51 | elif klee['phase'] == 2: 52 | entry = result['entrypoint'].get( 53 | klee['caller'], OrderedDict([('1', 0), ('2', 0)])) 54 | entry['2'] += runtime 55 | result['entrypoint'][klee['caller']] = entry 56 | 57 | result['entrypoint'] = OrderedDict( 58 | sorted(result['entrypoint'].items(), key=lambda t: t[0])) 59 | 60 | return result 61 | 62 | 63 | def main(): 64 | """ Entry point to run this analysis stand alone """ 65 | generic_main( 66 | "Add a summary of all KLEE runtimes to the directory of a MACKE run", 67 | "The runtime analysis was stored in %s", 68 | "runtime.json", analyse_runtime 69 | ) 70 | 71 | if __name__ == '__main__': 72 | main() 73 | -------------------------------------------------------------------------------- /macke/analyse/vulninsts.py: -------------------------------------------------------------------------------- 1 | """ 2 | List all vulnerable instructions found by a MACKE run 3 | """ 4 | import operator 5 | from collections import OrderedDict 6 | from os import path 7 | 8 | from ..CallGraph import CallGraph 9 | from .helper import generic_main, get_error_registry_for_mackedir 10 | 11 | 12 | def vulninsts(macke_directory): 13 | """ 14 | Extract informations about the vulnerable instruction as an OrderedDict 15 | """ 16 | clg = CallGraph(path.join(macke_directory, "bitcode", "program.bc")) 17 | registry = get_error_registry_for_mackedir(macke_directory, clg) 18 | 19 | vulninstdict = OrderedDict() 20 | for vulninst, errors in sorted( 21 | registry.forvulninst.items(), key=operator.itemgetter(0)): 22 | vulninstdict[vulninst] = [] 23 | for error in sorted(errors): 24 | odict = error.as_ordered_dict() 25 | odict.pop('vulnerableInstruction', None) 26 | vulninstdict[vulninst].append(odict) 27 | 28 | # Get all library functions 29 | libfuncs = clg.get_functions_with_no_caller() 30 | libfuncs.discard("main") 31 | 32 | # Classify the vulnerable instructions by type 33 | mainc = len(registry.get_all_vulninst_for_func("main")) 34 | libvulninst = set() 35 | for libfunc in libfuncs: 36 | libvulninst |= registry.get_all_vulninst_for_func(libfunc) 37 | libc = len(libvulninst) 38 | innerc = registry.count_vulnerable_instructions() - libc 39 | 40 | result = OrderedDict([ 41 | ("vulninstcount", registry.count_vulnerable_instructions()), 42 | ("fuzz-vulninstcount", registry.count_fuzz_vulnerable_instructions()), 43 | ("fuzz-inst-propagated-count", len(registry.fuzzinstpropagated)), 44 | ("fuzz-propagated", registry.fuzzpropagated), 45 | ("bytype", OrderedDict([ 46 | ("main", mainc), 47 | ("library", libc), 48 | ("inner", innerc), 49 | ])), 50 | ("vulninst", vulninstdict), 51 | ]) 52 | return result 53 | 54 | 55 | def main(): 56 | """ Entry point to run this analysis stand alone """ 57 | generic_main( 58 | "List all vulnerable instructions found by a MACKE run", 59 | "The vulnerable instructions were stored in %s", 60 | "vulninst.json", vulninsts 61 | ) 62 | 63 | if __name__ == '__main__': 64 | main() 65 | -------------------------------------------------------------------------------- /macke/callgrind.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | Module to run callgrind for a process call and receive line coverage 4 | """ 5 | 6 | import tempfile 7 | from os import path 8 | import os 9 | import subprocess 10 | import signal 11 | 12 | from .Logger import Logger 13 | 14 | try: 15 | from .config import VALGRIND 16 | except SystemError: 17 | from config import VALGRIND 18 | 19 | # constants 20 | POSITION_SPECS = [ "ob", "fl", "fi", "fe", "fn", "cob", "cfi", "cfl", "cfn" ] 21 | 22 | def parse_coverage(cov_file): 23 | content = cov_file.readlines() 24 | 25 | if not content: 26 | return dict() 27 | 28 | isCreatorCallgrind3 = False 29 | for i in range(4): 30 | if "creator: callgrind-3" in content[i]: 31 | isCreatorCallgrind3 = True 32 | break 33 | 34 | assert isCreatorCallgrind3 35 | """ 36 | assert ((len(content) >= 3) and 37 | content[0] == "# callgrind format\n" and 38 | content[1] == "version: 1\n" and 39 | content[2].startswith("creator: callgrind-3")) 40 | """ 41 | i = 3 42 | while "positions" not in content[i]: 43 | i += 1 44 | assert content[i] == "positions: line\n" 45 | i += 1 46 | assert len(content) > i and content[i] == "events: Ir\n" 47 | 48 | extract = dict() 49 | fn_mapping = dict() 50 | fl_mapping = dict() 51 | 52 | # Skip until content 53 | while not any(content[i].startswith(pm) for pm in POSITION_SPECS): 54 | i += 1 55 | 56 | def parse_name(name_str, name_dict): 57 | if name_str[0] == '(': 58 | bracket_end = name_str.index(')') 59 | id = int(name_str[1:bracket_end]) 60 | if id in name_dict: 61 | assert("(" + str(id) + ")\n" == name_str) 62 | #return name_dict[id] 63 | else: 64 | assert("(" + str(id) + ")\n" != name_str) 65 | name_str = name_str[bracket_end+1:].strip() 66 | name_dict[id] = name_str 67 | return name_dict[id] 68 | else: 69 | return name_str 70 | 71 | currentline = 0 72 | currentfile = "" 73 | for line in content[i:]: 74 | if any(line.startswith(pm) for pm in POSITION_SPECS): 75 | pm = line[0:line.index('=')] 76 | 77 | if pm == "fl" or pm == "fi" or pm == "fe": 78 | try: 79 | currentfile = parse_name(line[3:], fl_mapping) 80 | except AssertionError as ae: 81 | print("Could not parse name: %s"%(line[3:])) 82 | currentfile = "" 83 | if currentfile != "" and currentfile != "???" and currentfile not in extract: 84 | extract[currentfile] = {'covered': set(), 'uncovered': set()} 85 | elif pm == "cfl" or pm == "cfi": 86 | parse_name(line[len(pm) + 1:], fl_mapping) 87 | elif pm == "ob" or pm == "cob": 88 | pass 89 | else: 90 | try: 91 | currentfile = parse_name(line[len(pm) + 1:], fn_mapping) 92 | except AssertionError as ae: 93 | print("Could not parse name: %s"%(line[3:])) 94 | currentfile = "" 95 | 96 | # Start with number 97 | elif '0' <= line[0] <= '9': 98 | # Line with details for the current file 99 | cols = line.split() 100 | loc = int(cols[0]) 101 | if loc != 0 and currentfile != "" and currentfile != "???": 102 | assert int(cols[1]) != 0 103 | # This line was covered 104 | if not (currentfile in extract): 105 | extract[currentfile] = {'covered': set(), 'uncovered': set()} 106 | extract[currentfile]['covered'].add(loc) 107 | currentline = loc 108 | # Subposition compression 109 | elif line[0] == "+" or line[0] == "-": 110 | # Line with details for the current file 111 | cols = line.split() 112 | loc = currentline + int(cols[0]) 113 | if loc != 0 and currentfile != "" and currentfile != "???": 114 | assert int(cols[1]) != 0 115 | # This line was covered 116 | if not (currentfile in extract): 117 | extract[currentfile] = {'covered': set(), 'uncovered': set()} 118 | extract[currentfile]['covered'].add(loc) 119 | currentline = loc 120 | # means cost on same line, thus nothing new is covered 121 | elif line[0] == "*": 122 | pass 123 | elif line.startswith("calls="): 124 | pass 125 | elif not line.strip(): 126 | # Ignore empty lines 127 | pass 128 | elif line.startswith("totals:"): 129 | pass 130 | else: 131 | print("Invalid line %s\nSkipping." % line) 132 | pass 133 | #raise ValueError("Invalid line %s" % line) 134 | return extract 135 | 136 | 137 | def get_coverage(args, inputfile, timeout=1, fileinput=False, tmpfilename=None): 138 | if tmpfilename==None: 139 | fd, tmpfilename = tempfile.mkstemp(prefix="macke_callgrind_") 140 | else: 141 | fd = os.open(tmpfilename, "w") 142 | os.close(fd) 143 | if not fileinput: 144 | try: 145 | infd = open(inputfile, "r") 146 | except FileNotFoundError: 147 | Logger.log("get_coverage: input file " + inputfile + " not found!\n", verbosity_level="error") 148 | return dict() 149 | else: 150 | infd = None 151 | args.append(inputfile) 152 | Logger.log("get_coverage: " + str([ VALGRIND, "--tool=callgrind", "--callgrind-out-file=" + tmpfilename]) + 153 | str(args) + "\n", verbosity_level="debug") 154 | p = subprocess.Popen([ VALGRIND, "--tool=callgrind", "--callgrind-out-file=" + tmpfilename] + args, stdin=infd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, preexec_fn=os.setsid) 155 | output = b"" 156 | err = b"" 157 | try: 158 | while p.poll() is None: 159 | (o, e) = p.communicate(None, timeout=1) 160 | output += o 161 | err += e 162 | # On hangup terminate the program 163 | except subprocess.TimeoutExpired: 164 | p.terminate() 165 | try: 166 | o, e = p.communicate(timeout=1) 167 | # If program does not like to be terminated, kill it. 168 | except subprocess.TimeoutExpired: 169 | os.killpg(os.getpgid(p.pid), signal.SIGKILL) 170 | # Timeout to get instead throw exception instead of just idling forever 171 | o, e = p.communicate(timeout=1) 172 | output += o 173 | err += e 174 | 175 | if not fileinput: 176 | infd.close() 177 | 178 | with open(tmpfilename, 'r') as tmpfile: 179 | ret = parse_coverage(tmpfile) 180 | os.unlink(tmpfilename) 181 | return ret 182 | -------------------------------------------------------------------------------- /macke/cgroups.py: -------------------------------------------------------------------------------- 1 | # noinspection SpellCheckingInspection 2 | """ 3 | Module to contain cgroup abstractions 4 | """ 5 | 6 | from os import path, killpg, getpgid, setsid 7 | import os 8 | import errno 9 | import subprocess 10 | import signal 11 | 12 | from .config import FUZZMEMLIMIT, THREADNUM 13 | 14 | _limitfilenames = ["memory.limit_in_bytes", "memory.memsw.limit_in_bytes"] 15 | 16 | 17 | def get_num_threads(): 18 | # Same behaviour as the multiprocessing.Pool for THREADNUM 19 | if THREADNUM is None: 20 | return os.cpu_count() 21 | return THREADNUM 22 | 23 | 24 | def get_cgroups(): 25 | """ 26 | Return names of all cgroups 27 | """ 28 | num_groups = get_num_threads() 29 | 30 | ret = [] 31 | for i in range(num_groups): 32 | ret.append("mackefuzzer_" + str(i)) 33 | return ret 34 | 35 | 36 | def initialize_cgroups(usergroup, ignore_swap): 37 | """ 38 | Creates and initiliazes one group for each thread 39 | """ 40 | num_groups = get_num_threads() 41 | limitstr = str(FUZZMEMLIMIT) + "M" 42 | files = _limitfilenames 43 | 44 | for cgrpname in get_cgroups(): 45 | subprocess.check_call( 46 | ["cgcreate", "-s", "775", "-d", "775", "-f", "775", "-a", usergroup, "-t", usergroup, "-g", 47 | "memory:" + cgrpname]) 48 | cpath = path.join("/sys/fs/cgroup/memory", cgrpname) 49 | for p in _limitfilenames: 50 | fpath = path.join(cpath, p) 51 | if not path.exists(fpath) and path.basename(fpath) == "memory.memsw.limit_in_bytes": 52 | if not ignore_swap: 53 | print( 54 | "Your system does not allow limiting swap memory with cgroups. Either disable swap or continue at your own risk with by adding --ignore-swap to initialization and execution") 55 | return False 56 | else: 57 | continue 58 | with open(fpath, 'w') as f: 59 | f.write(limitstr) 60 | return True 61 | 62 | 63 | def validate_cgroups(ignore_swap): 64 | """ 65 | Validate whether cgroups are present and we have access to it 66 | """ 67 | num_groups = get_num_threads() 68 | 69 | required_limit = FUZZMEMLIMIT * 1024 * 1024 70 | 71 | for cgrpname in get_cgroups(): 72 | cpath = path.join("/sys/fs/cgroup/memory", cgrpname) 73 | if not path.exists(cpath): 74 | print("Some required groups do not exist") 75 | return False 76 | 77 | if not os.access(cpath, os.R_OK | os.W_OK | os.X_OK): 78 | print("Lacking access to groups") 79 | return False 80 | for p in _limitfilenames: 81 | fpath = path.join(cpath, p) 82 | if not path.exists(fpath) and path.basename(fpath) == "memory.memsw.limit_in_bytes": 83 | if not ignore_swap: 84 | print( 85 | "Your system does not allow limiting swap memory with cgroups. Either disable swap or continue " 86 | "at your own risk with by adding --ignore-swap to initialization and execution") 87 | return False 88 | else: 89 | continue 90 | with open(fpath, 'r') as f: 91 | limit = int(f.read()) 92 | if limit != required_limit: 93 | print("Some cgroups contain invalid memory limits") 94 | return False 95 | return True 96 | 97 | 98 | def cgroups_run_checked_silent_subprocess(args, cgroup, **kwargs): 99 | return subprocess.check_output(["cgexec", "-g", "memory:" + cgroup, "--sticky"] + args, stderr=subprocess.STDOUT, 100 | **kwargs) 101 | 102 | 103 | def cgroups_run_subprocess(command, *args, cgroup=None, **kwargs): 104 | if cgroup is None: 105 | raise ValueError("No cgroup given") 106 | return subprocess.check_output(["cgexec", "-g", "memory:" + cgroup, "--sticky"] + command, stderr=subprocess.STDOUT, 107 | *args, **kwargs) 108 | 109 | 110 | def cgroups_Popen(command, *args, cgroup=None, **kwargs): 111 | if cgroup is None: 112 | raise ValueError("No cgroup given") 113 | return subprocess.Popen(["cgexec", "-g", "memory:" + cgroup, "--sticky"] + command, *args, **kwargs) 114 | 115 | 116 | def cgroups_run_timed_subprocess(command, *args, cgroup=None, timeout=1, **kwargs): 117 | """ 118 | Starts a subprocess, waits for it and returns (exitcode, output, erroutput) 119 | """ 120 | if cgroup is None: 121 | raise ValueError("No cgroup given") 122 | p = cgroups_Popen(command, *args, cgroup=cgroup, stdout=subprocess.PIPE, stderr=subprocess.PIPE, preexec_fn=setsid, 123 | **kwargs) 124 | output = b"" 125 | err = b"" 126 | try: 127 | while p.poll() is None: 128 | (o, e) = p.communicate(None, timeout=timeout) 129 | output += o 130 | err += e 131 | # On hangup kill the program (and children) 132 | except subprocess.TimeoutExpired: 133 | killpg(getpgid(p.pid), signal.SIGKILL) 134 | 135 | return p.returncode, output, err 136 | -------------------------------------------------------------------------------- /macke/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Load global configurations, if they exist in a config.ini file 3 | """ 4 | 5 | import configparser 6 | import subprocess 7 | from os import path 8 | 9 | CONFIG = configparser.ConfigParser() 10 | CONFIGFILE = path.join(path.dirname(__file__), "..", "config.ini") 11 | CONFIG.read(CONFIGFILE) 12 | 13 | 14 | # for fuzzer 15 | LIBMACKEFUZZPATH = path.expanduser(CONFIG.get("binaries", "libmackefuzzopt")) 16 | LIBMACKEFUZZOPT = path.join(LIBMACKEFUZZPATH, "bin", "libMackeFuzzerOpt.so") 17 | AFLLIB = path.expanduser(CONFIG.get("binaries", "afl-lib")) 18 | AFLBIN = path.expanduser(CONFIG.get("binaries", "afl-bin")) 19 | AFLCC = path.join(AFLBIN, "afl-clang-fast") 20 | AFLFUZZ = path.join(AFLBIN, "afl-fuzz") 21 | AFLTMIN = path.join(AFLBIN, "afl-tmin") 22 | LLVMCONFIG = path.expanduser(CONFIG.get("binaries", "llvm-config")) 23 | LLVMBINDIR = subprocess.check_output([LLVMCONFIG, "--bindir"]).decode("utf-8").strip() 24 | CLANG = path.join(LLVMBINDIR, "clang") 25 | LLVMFUZZOPT = path.join(LLVMBINDIR, "opt") 26 | 27 | VALGRIND = path.expanduser(CONFIG.get("binaries", "valgrind")) 28 | 29 | # for symbolic execution 30 | LIBMACKEOPT = path.expanduser(CONFIG.get("binaries", "libmackeopt")) 31 | LLVMOPT = path.expanduser(CONFIG.get("binaries", "llvmopt", fallback="opt")) 32 | KLEEBIN = path.expanduser(CONFIG.get("binaries", "klee", fallback="klee")) 33 | 34 | # general 35 | THREADNUM = CONFIG.getint("runtime", "threadnum", fallback=None) 36 | FUZZMEMLIMIT = CONFIG.getint("runtime", "memlimit", fallback=50) 37 | 38 | 39 | def __get_output_from(*args, **kwargs): 40 | """ 41 | Starts a subprocess with the given args and returns its output - no matter 42 | if the process exits normally or with an error 43 | """ 44 | # Sadly, some programs return their help pages with non-zero exit code 45 | try: 46 | output = subprocess.check_output(*args, **kwargs) 47 | except subprocess.CalledProcessError as ex: 48 | output = ex.output 49 | return output 50 | 51 | 52 | def check_config(): 53 | """ 54 | Checks all variables in the config. Especially if all given binaries are 55 | actually executable, have the correct version and support everything needed 56 | """ 57 | 58 | # Check, if LLVMOPT is actually a binary of opt 59 | if (not path.isfile(LLVMOPT) or b"llvm .bc -> .bc modular optimizer" 60 | not in __get_output_from([LLVMOPT, "-help"])): 61 | raise Exception("Config: Invalid opt binary") 62 | 63 | # Check, if LLVMOPT is the correct opt version 64 | if b"LLVM version 3.4.2" not in __get_output_from([LLVMOPT, "-version"]): 65 | raise Exception("Config: Invalid opt version") 66 | 67 | # Check, if LIBMACKEOPT actually supports the relevant passes 68 | if not path.isfile(LIBMACKEOPT): 69 | raise Exception("Config: Invalid libmackeopt binary") 70 | mhelp = __get_output_from([LLVMOPT, "-load", LIBMACKEOPT, "-help"]) 71 | if any(t not in mhelp for t in [ 72 | b"-extractcallgraph", b"-listallfuncstopologic", 73 | b"-encapsulatesymbolic", b"-preprenderror"]): 74 | raise Exception( 75 | "Config: limackeopt does not support all required passes") 76 | 77 | # Check, if KLEEBIN 78 | if not path.isfile(KLEEBIN): 79 | raise Exception("Config: Invalid KLEE binary") 80 | kvers = __get_output_from([KLEEBIN, "-version"]) 81 | if b"KLEE" not in kvers or b"LLVM version 3.4.2" not in kvers: 82 | raise Exception("Config: Invalid klee version") 83 | khelp = __get_output_from([KLEEBIN, "-help"]) 84 | if any(t not in khelp for t in [b"=sonar", b"-sonar-target", b"-sonar-target-info="]): 85 | raise Exception("Config: klee does not support sonar search") 86 | 87 | # Check, if a reasonable number of threads is used 88 | if THREADNUM is not None and not 0 < THREADNUM < 128: 89 | raise Exception("Config: Invalid Number of threads") 90 | 91 | 92 | def get_current_git_hash_from(directory): 93 | """ 94 | Returns the git hash of the currently checked out commit in directory 95 | """ 96 | try: 97 | githash = subprocess.check_output( 98 | ['git', 'rev-parse', 'HEAD'], cwd=directory, 99 | stderr=subprocess.DEVNULL).decode("utf-8").rstrip() 100 | except subprocess.CalledProcessError: 101 | githash = "unknown" 102 | 103 | return githash 104 | 105 | 106 | def get_current_git_hash(): 107 | """ 108 | Returns the git hash of the currently checked out commit 109 | """ 110 | return get_current_git_hash_from(path.join(path.dirname(__file__), "..")) 111 | 112 | 113 | def get_llvm_opt_git_hash(): 114 | """ 115 | Tries to get the git hash of currently checket out version of 116 | macke-llvm-opt, if the binary is inside the git repository 117 | """ 118 | return get_current_git_hash_from(path.dirname(LIBMACKEOPT)) 119 | 120 | 121 | def get_klee_git_hash(): 122 | """ 123 | Tries to get the git hash of currently checket out version of klee, 124 | if the binary is inside the git repository 125 | """ 126 | return get_current_git_hash_from(path.dirname(KLEEBIN)) 127 | -------------------------------------------------------------------------------- /macke/constants.py: -------------------------------------------------------------------------------- 1 | """ 2 | Storage for all global constants 3 | """ 4 | 5 | # A list of flags used by KLEE runs 6 | KLEEFLAGS = [ 7 | "--allow-external-sym-calls", 8 | "--libc=uclibc", 9 | "--max-memory=1000", 10 | "--only-output-states-covering-new", 11 | "--optimize", 12 | "--disable-inlining", 13 | # "--output-module", # Helpful for debugging 14 | "--output-source=false", # Removing this is helpful for debugging 15 | "--posix-runtime", 16 | #"--watchdog" 17 | ] 18 | 19 | UCLIBC_LIBS = [ 20 | "acl", "crypt", "dl", "m", "pthread", "rt", "selinux" 21 | ] 22 | 23 | FUZZFUNCDIR_PREFIX = "fuzz_out_" 24 | 25 | # A list of file extensions for errors that can be prepended by phase two 26 | ERRORFILEEXTENSIONS = [ 27 | ".ptr.err", ".free.err", ".assert.err", ".div.err", ".macke.err", ".fuzz.err"] 28 | -------------------------------------------------------------------------------- /macke/llvm_wrapper.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions, that wraps all llvm actions and transformation into python functions 3 | """ 4 | import json 5 | import subprocess 6 | 7 | from .config import LIBMACKEOPT, LLVMOPT 8 | 9 | 10 | def __run_subprocess(popenargs): 11 | """ 12 | Starts a subprocess with popenargs and returns it output 13 | """ 14 | return subprocess.check_output(popenargs) 15 | 16 | 17 | def __run_subprocess_json_output(popenargs): 18 | """ 19 | Starts a subprocess with popenargs and returns the output as parsed json 20 | """ 21 | out = __run_subprocess(popenargs) 22 | return json.loads(out.decode("utf-8")) 23 | 24 | 25 | def list_all_funcs_topological(bitcodefile): 26 | """ 27 | Wrapper around the list all functions pass. Any circles or strongly 28 | connected components are listed alphabetically in nested lists 29 | """ 30 | return __run_subprocess_json_output([ 31 | LLVMOPT, "-load", LIBMACKEOPT, 32 | "-listallfuncstopologic", bitcodefile, 33 | "-disable-output"]) 34 | 35 | 36 | def extract_callgraph(bitcodefile): 37 | """ 38 | Wrapper around the extract callgraph pass 39 | """ 40 | return __run_subprocess_json_output([ 41 | LLVMOPT, "-load", LIBMACKEOPT, 42 | "-extractcallgraph", bitcodefile, 43 | "-disable-output"]) 44 | 45 | 46 | def encapsulate_symbolic(sourcefile, function, destfile=None): 47 | """ 48 | Wrapper around the encapsulate symbolic pass 49 | """ 50 | # If no destfile is given, just modify the source file 51 | if destfile is None: 52 | destfile = sourcefile 53 | 54 | return __run_subprocess([ 55 | LLVMOPT, "-load", LIBMACKEOPT, 56 | "-encapsulatesymbolic", sourcefile, 57 | "-encapsulatedfunction", function, "-o", destfile]) 58 | 59 | 60 | def prepend_error_from_dir(sourcefile, function, errordirlist, destfile=None): 61 | """ 62 | Wrapper around the prepend error pass 63 | """ 64 | # Reject empty error lists 65 | assert errordirlist 66 | 67 | # If no destfile is given, just modify the source file 68 | if destfile is None: 69 | destfile = sourcefile 70 | 71 | errordirflags = [] 72 | for errordir in errordirlist: 73 | errordirflags.append("-previouskleerundirectory") 74 | errordirflags.append(errordir) 75 | 76 | return __run_subprocess([ 77 | LLVMOPT, "-load", LIBMACKEOPT, "-preprenderror", sourcefile, 78 | "-prependtofunction", function] + errordirflags + ["-o", destfile]) 79 | 80 | 81 | def prepend_error_from_ktest(sourcefile, function, ktestlist, destfile=None): 82 | """ 83 | Wrapper around the prepend error pass 84 | """ 85 | # Reject empty ktest lists 86 | assert ktestlist 87 | 88 | # If no destfile is given, just modify the source file 89 | if destfile is None: 90 | destfile = sourcefile 91 | 92 | ktestflags = [] 93 | for ktest in ktestlist: 94 | ktestflags.append("-errorfiletoprepend") 95 | ktestflags.append(ktest) 96 | 97 | return __run_subprocess([ 98 | LLVMOPT, "-load", LIBMACKEOPT, "-preprenderror", sourcefile, 99 | "-prependtofunction", function] + ktestflags + ["-o", destfile]) 100 | 101 | 102 | def remove_unreachable_from(entrypoint, sourcefile, destfile=None): 103 | """ 104 | Internalize everything except entrypoint and remove unused code 105 | """ 106 | # If no destfile is given, just modify the source file 107 | if destfile is None: 108 | destfile = sourcefile 109 | 110 | return __run_subprocess([ 111 | LLVMOPT, "-internalize-public-api-list=%s" % entrypoint, sourcefile, 112 | "-internalize", "-globalopt", "-globaldce", "-o", destfile]) 113 | 114 | 115 | def optimize_redundant_globals(sourcefile, destfile=None): 116 | """ 117 | Runs an llvm opt pass, that merges all globals with identical content 118 | """ 119 | # If no destfile is given, just modify the source file 120 | if destfile is None: 121 | destfile = sourcefile 122 | 123 | return __run_subprocess([ 124 | LLVMOPT, "-constmerge", sourcefile, "-o", destfile]) 125 | 126 | 127 | def extract_lines_of_code(bitcodefile): 128 | """ 129 | Extract all lines of code represented inside a bitcode file 130 | """ 131 | return __run_subprocess_json_output([ 132 | LLVMOPT, "-load", LIBMACKEOPT, 133 | "-extractlinesofcode", bitcodefile, "-disable-output"]) 134 | -------------------------------------------------------------------------------- /macke/run_istats.py: -------------------------------------------------------------------------------- 1 | """ 2 | All logic to read informations from KLEE's run.istats file 3 | """ 4 | 5 | from os import path 6 | 7 | 8 | def extract_linecoverage(run_istats_file): 9 | """ 10 | Extract all lines of code, that were mentioned in a run.istats file 11 | Result: dict(file: {covered: [lines], uncovered: [lines]}) 12 | """ 13 | 14 | # no run.istats file means empty result 15 | if not path.isfile(run_istats_file): 16 | return dict() 17 | 18 | content = [] 19 | with open(run_istats_file, 'r') as file: 20 | content = file.readlines() 21 | 22 | # empty files means empty result 23 | if not content: 24 | return dict() 25 | 26 | # Check, if the output format matches the format 27 | assert ((len(content) > 10) and 28 | content[0] == 'version: 1\n' and 29 | content[1] == 'creator: klee\n' and 30 | content[6] == 'positions: instr line\n' 31 | ), "file %s" % run_istats_file 32 | 33 | extract = dict() 34 | currentfunc = "" 35 | 36 | # Skip the header and read the rest 37 | for line in content[22:]: 38 | if '0' <= line[0] <= '9': 39 | # Line with details for the current file 40 | cols = line.split() 41 | loc = int(cols[1]) 42 | if loc != 0 and currentfunc != "": 43 | if int(cols[2]) != 0: 44 | # This line was covered 45 | extract[currentfunc]['covered'].add(loc) 46 | else: 47 | # This line was not covered 48 | extract[currentfunc]['uncovered'].add(loc) 49 | elif line.startswith("fl="): 50 | # Line with information about a file 51 | pass 52 | elif line.startswith("fn="): 53 | # Line with information about a function name 54 | currentfunc = line[3:].strip() 55 | if currentfunc != "" and currentfunc not in extract: 56 | extract[currentfunc] = {'covered': set(), 'uncovered': set()} 57 | elif line.startswith("cfl="): 58 | # Line with the file name of a called function 59 | pass 60 | elif line.startswith("cfn="): 61 | # Line with the name of a called function 62 | pass 63 | elif line.startswith("calls="): 64 | # Line with informations about a call instruction 65 | pass 66 | elif not line.strip(): 67 | # Ingore empty lines 68 | pass 69 | else: 70 | raise ValueError("Invalid line %s" % line) 71 | 72 | result = dict() 73 | for func, lines in extract.items(): 74 | result[func] = lines 75 | 76 | return result 77 | -------------------------------------------------------------------------------- /reporting_tool/css/style.css: -------------------------------------------------------------------------------- 1 | body { 2 | background-color:#D3D3D3; 3 | } 4 | 5 | .pageHeading { 6 | width:100%; 7 | } 8 | 9 | #graph { 10 | background-color: #FFF; 11 | height: 3000px; 12 | } 13 | 14 | #graphContainer { 15 | width: 100%; 16 | max-height: 70vh; 17 | overflow: auto; 18 | } 19 | 20 | .legend { 21 | padding-bottom: 10px; 22 | padding-top: 10px; 23 | background-color: white; 24 | height:60px; 25 | } 26 | 27 | .legendItem { 28 | width:80%; 29 | } 30 | 31 | .legendItemColor { 32 | width:20px; 33 | height:20px; 34 | } 35 | 36 | .zero-to-ten-severity { 37 | /* GrayScale background-color:#D9D9D9; */ 38 | background-color:#FFFF00; 39 | } 40 | 41 | .eleven-to-twenty-severity { 42 | /* GrayScale background-color:#8A8A8A; */ 43 | background-color:#FFAA00; 44 | } 45 | 46 | .twenty-one-to-forty-severity { 47 | /* GrayScale background-color:#555555; */ 48 | background-color:#FF5500; 49 | } 50 | 51 | .more-than-forty-severity { 52 | /* GrayScale background-color:#000000; */ 53 | background-color:#CC0000; 54 | } 55 | 56 | .options { 57 | width:50%; 58 | margin-bottom: 20px; 59 | } 60 | 61 | .small-width-number{ 62 | width: 60px !important; 63 | } 64 | 65 | .formula { 66 | font-size: 16px; 67 | font-style: italic; 68 | } 69 | 70 | .test-description { 71 | font-size: 16px; 72 | } 73 | 74 | .test-link{ 75 | font-size: 16px; 76 | } 77 | 78 | .test-link:hover{ 79 | -webkit-transition: all 500ms ease; 80 | transition: all 500ms ease; 81 | color: blue; 82 | font-size: 16px; 83 | text-decoration: none; 84 | } 85 | 86 | .break-long-word{ 87 | white-space: pre-wrap; /* CSS3 */ 88 | white-space: -moz-pre-wrap; /* Firefox */ 89 | white-space: -pre-wrap; /* Opera <7 */ 90 | white-space: -o-pre-wrap; /* Opera 7 */ 91 | word-wrap: break-word; /* IE */ 92 | } 93 | 94 | .data-content{ 95 | margin-left:20px; 96 | } 97 | 98 | .case-info-heading { 99 | font-size: 18px; 100 | } 101 | 102 | .sub-heading-italics { 103 | font-style: italic; 104 | } 105 | 106 | .object-info { 107 | margin-left: 20px; 108 | margin-top: 10px; 109 | } -------------------------------------------------------------------------------- /reporting_tool/html/dfaexec.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dfaexec 5 | 6 | 7 | 8 | 9 | 10 | 11 |

12 |

Test case(s) of dfaexec

13 |
14 |

15 | Following test case(s) in dfaexec resulted in memory out-of-bounds errors. Click for more details 16 |

17 |
18 | test000001
19 | test000007
20 | test000009
21 | test000003
22 | test000004
23 | test000010 24 |

25 | 26 | -------------------------------------------------------------------------------- /reporting_tool/js/graph.js: -------------------------------------------------------------------------------- 1 | // Function to convert graph into pdf - Not working properly with big dimention. Will update soon. 2 | function convertToPdf() { 3 | var pdfWidth = Math.round(($("#graph").width()) * 0.264583333); 4 | var pdfHeight = Math.round(($("#graph").height()) * 0.264583333); 5 | return xepOnline.Formatter.Format('graph', { 6 | pageWidth: pdfWidth + 'mm', 7 | pageHeight: pdfHeight + 'mm', 8 | render: 'download', 9 | pageMargin: '0in', 10 | resolution: 100 11 | }); 12 | } 13 | 14 | // Function to change the dimentions of the graph 15 | function changeDimention() { 16 | $("#graph").html(''); 17 | $("#graph").width($("#canvasWidth").val()); 18 | $("#graph").height($("#canvasHeight").val()); 19 | $("#graphContainer").scrollTop($("#graph").height()/2-200); 20 | $("#graphContainer").scrollLeft($("#graph").width()/2-500); 21 | createGraph(); 22 | } 23 | 24 | $(document).ready(function() { 25 | // Fill the current width and height of graph in Width-Height form 26 | $("#canvasWidth").val($("#graph").width()); 27 | $("#canvasHeight").val($("#graph").height()); 28 | $("#graphContainer").scrollTop($("#graph").height()/2-200); 29 | var default_L = 3; 30 | var default_I = 5; 31 | var default_N = 2; 32 | var default_D = 4; 33 | var default_O = 1; 34 | $('#L').val(default_L); 35 | $('#I').val(default_I); 36 | $('#N').val(default_N); 37 | $('#D').val(default_D); 38 | $('#O').val(default_O); 39 | calculateSeverity(default_L, default_I, default_N, default_D, default_O); 40 | colorNodes(); 41 | createGraph(); 42 | }); 43 | 44 | // Function to change severity when new L, I, N, D, O are specified 45 | function changeSeverity(){ 46 | $("#graph").html(''); 47 | var new_L = $('#L').val(); 48 | var new_I = $('#I').val(); 49 | var new_N = $('#N').val(); 50 | var new_D = $('#D').val(); 51 | var new_O = $('#O').val(); 52 | calculateSeverity(new_L, new_I, new_N, new_D, new_O); 53 | colorNodes(); 54 | createGraph(); 55 | } 56 | 57 | // calculates severity as severity = L*factor_L + I*factor_I + N*factor_N + D*factor_D + O*factor_O 58 | function calculateSeverity(L, I, N, D, O){ 59 | for (var i = 0; i < nodesJSON.length; i++) { 60 | nodesJSON[i].severity = (L*nodesJSON[i].factor_L) + (I*nodesJSON[i].factor_I) + (N*nodesJSON[i].factor_N) + (D*nodesJSON[i].factor_D) + (O*nodesJSON[i].factor_O); 61 | } 62 | } 63 | 64 | // change color according to severity 65 | function colorNodes() { 66 | for (var i = 0; i < nodesJSON.length; i++) { 67 | if (nodesJSON[i].severity >= 0 && nodesJSON[i].severity < 11) { 68 | // GrayScale nodesJSON[i].color="#D9D9D9"; 69 | nodesJSON[i].color = "#FFFF00"; 70 | // Overriding font color for light backgound nodes 71 | nodesJSON[i].fontColor = "black"; 72 | } else if (nodesJSON[i].severity > 10 && nodesJSON[i].severity < 21) { 73 | // GrayScale nodesJSON[i].color="#8A8A8A"; 74 | nodesJSON[i].color = "#FFAA00"; 75 | nodesJSON[i].fontColor = "black"; // Not for GrayScale 76 | } else if (nodesJSON[i].severity > 20 && nodesJSON[i].severity < 41) { 77 | // GrayScale nodesJSON[i].color="#555555"; 78 | nodesJSON[i].color = "#FF5500"; 79 | } else if (nodesJSON[i].severity > 40) { 80 | // GrayScale nodesJSON[i].color="#000000"; 81 | nodesJSON[i].color = "#CC0000"; 82 | } else { 83 | console.log("Severity cannot be less than 0 for Node " + i); 84 | } 85 | } 86 | } 87 | 88 | // Creates graph 89 | function createGraph() { 90 | var container = document.getElementById('graph'); 91 | var data = { 92 | nodes: nodesJSON, 93 | edges: edgesJSON 94 | }; 95 | var options = { 96 | tooltip: { 97 | delay: 50, 98 | fontColor: "white", 99 | fontSize: 14, 100 | fontFace: "verdana", 101 | color: { 102 | border: "white", 103 | background: "#FFFFC6" 104 | } 105 | }, 106 | clustering: { 107 | enabled: false, 108 | clusterEdgeThreshold: 50 109 | }, 110 | physics: { 111 | enabled: false, 112 | barnesHut: { 113 | gravitationalConstant: -5000, 114 | springConstant: 0.001 115 | } 116 | }, 117 | smoothCurves: { 118 | dynamic: false 119 | }, 120 | hideEdgesOnDrag: true, 121 | stabilize: true, 122 | stabilizationIterations: 1000, 123 | zoomExtentOnStabilize: true, 124 | nodes: { 125 | fontColor: "white", 126 | borderWidth: 2, 127 | shadow: true 128 | }, 129 | edges: { 130 | style: "arrow", 131 | width: 1 132 | } 133 | }; 134 | var network = new vis.Network(container, data, options); 135 | 136 | // Method to change nodes to hyperlink 137 | network.on('click', function(properties) { 138 | if (properties.nodes.length != 0) { 139 | nodesJSON.filter(function(node) { 140 | if (node.id == properties.nodes[0]) { 141 | window.location = "./html/" + node.label + ".html"; 142 | } 143 | }); 144 | } 145 | }); 146 | } -------------------------------------------------------------------------------- /reporting_tool/vulnerabilityGraphTool.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Vulnerability Graph Tool 5 | 6 | 7 | 8 | 9 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 |
40 |
41 |

Vulnerabilty Graph

42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |

Legend

52 |
53 |
54 |
55 |
56 | 0 - 10 Severity 57 |
58 |
59 |
60 |
61 |
62 | 11 - 20 Severity 63 |
64 |
65 |
66 |
67 |
68 | 21 - 40 Severity 69 |
70 |
71 |
72 |
73 |
74 | > 40 Severity 75 |
76 |
77 |
78 |
79 |
80 |

s(f) = L x length of bug chain(f) + I x is interface(f) + N x vulnerable inst(f) + D x distance to interface + O x is outlier(f)

81 |
82 |
83 |
84 | 85 |    86 |
87 |
88 | 89 |    90 |
91 |
92 | 93 |    94 |
95 |
96 | 97 |    98 |
99 |
100 | 101 | 102 |
103 |

104 | 105 |
106 |
107 |
108 |
109 | 110 | 111 |
112 |
113 | 114 | 115 |
116 |

117 | 118 |
119 |
120 | 121 |
122 |
123 | 124 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | libclang-py3==3.8.2 2 | pydot==1.2.2 3 | progressbar2==3.10.0 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup 4 | import sys 5 | 6 | if sys.version_info[0] == 3 and sys.version_info[1] < 4: 7 | sys.exit('Sorry, Python < 3.4 is not supported') 8 | 9 | setup( 10 | name='macke', 11 | version='0.1-alpha', 12 | packages=['macke'], 13 | url="https://github.com/tum-i22/macke", 14 | author="Saahil Ognawala", 15 | author_email="ognawala@in.tum.de", 16 | license="Apache Software License", 17 | entry_points={ 18 | 'console_scripts': [ 19 | 'macke = macke.__main__:main', 20 | 'macke-analyze = macke.analyse.everything:main', 21 | 'shamrock = shamrock.__main__:main', 22 | ] 23 | }, 24 | classifiers=[ 25 | 'License :: OSI Approved :: Apache Software License', 26 | 'Development Status :: 3 - Alpha', 27 | 'Environment :: Console', 28 | 'Intended Audience :: Developers', 29 | 'Programming Language :: Python :: 3.4', 30 | 'Programming Language :: Python :: 3.5', 31 | ] 32 | ) 33 | -------------------------------------------------------------------------------- /shamrock/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-i4/macke/b096ee194b0861b6581fee140bdf16d8cc60b116/shamrock/__init__.py -------------------------------------------------------------------------------- /shamrock/__main__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Start a complete analysis with the MACKE toolchain on a given bitcode file 3 | """ 4 | import argparse 5 | 6 | from macke.config import check_config 7 | from .shamrock import Shamrock 8 | 9 | 10 | def main(): 11 | """ 12 | Parse command line arguments, initialize and start a complete MACKE run 13 | """ 14 | parser = argparse.ArgumentParser( 15 | description="""\ 16 | Run KLEE and some additional analysis on the given bitcode file. 17 | """ 18 | ) 19 | 20 | parser.add_argument( 21 | 'bcfile', 22 | metavar=".bc-file", 23 | type=argparse.FileType('r'), 24 | help="Bitcode file, that will be analyzed" 25 | ) 26 | 27 | parser.add_argument( 28 | '--comment', 29 | nargs='?', 30 | default="", 31 | help="Additional comment, that will be stored in the output directory") 32 | 33 | parser.add_argument( 34 | '--parent-dir', 35 | nargs='?', 36 | default="/tmp/macke", 37 | help="The output directory of the run is put inside this directory") 38 | 39 | parser.add_argument( 40 | '--max-time', 41 | nargs='?', 42 | type=int, 43 | default=120, 44 | help="Maximum execution time for one KLEE run" 45 | ) 46 | 47 | parser.add_argument( 48 | '--max-instruction-time', 49 | nargs='?', 50 | type=int, 51 | default=12, 52 | help="Maximum execution time KLEE can spend on one instruction" 53 | ) 54 | 55 | parser.add_argument( 56 | '--sym-args', 57 | nargs=3, 58 | metavar=("", "", ""), 59 | help="Symbolic arguments passed to main function" 60 | ) 61 | 62 | parser.add_argument( 63 | '--sym-files', 64 | nargs=2, 65 | metavar=("", ""), 66 | help="Symbolic file argument passed to main function" 67 | ) 68 | 69 | parser.add_argument( 70 | '--sym-stdin', 71 | type=int, 72 | metavar="", 73 | help="Use symbolic stdin with size " 74 | ) 75 | 76 | parser.add_argument( 77 | '--libraries', 78 | type=lambda s : s.split(','), 79 | default=None, 80 | help="Libraries that are needed for linking (fuzzing only)" 81 | ) 82 | 83 | parser.add_argument( 84 | '--quiet', 85 | dest='quiet', 86 | action='store_true' 87 | ) 88 | parser.set_defaults(quiet=False) 89 | 90 | check_config() 91 | 92 | args = parser.parse_args() 93 | 94 | # Compose KLEE flags given directly by the user 95 | flags_user = [ 96 | "--max-time=%d" % args.max_time, 97 | "--max-instruction-time=%d" % args.max_instruction_time 98 | ] 99 | 100 | # Compose flags for analysing the main function 101 | posix4main = [] 102 | if args.sym_args: 103 | posix4main.append("--sym-args") 104 | posix4main.extend(args.sym_args) 105 | 106 | posixflags = [] 107 | if args.sym_files: 108 | posixflags.append("--sym-files") 109 | posixflags.extend(args.sym_files) 110 | 111 | if args.sym_stdin: 112 | posixflags.append("-sym-stdin") 113 | posixflags.append(str(args.sym_stdin)) 114 | 115 | # And finally pass everything to shamrock 116 | shamrock = Shamrock(args.bcfile.name, args.comment, args.parent_dir, args.quiet, 117 | flags_user, posixflags, posix4main, args.libraries) 118 | shamrock.run_complete_analysis() 119 | 120 | 121 | if __name__ == "__main__": 122 | main() 123 | -------------------------------------------------------------------------------- /shamrock/shamrock.py: -------------------------------------------------------------------------------- 1 | """ 2 | Core class for MACKE execution. 3 | Contains methods for both phases and some analysis 4 | """ 5 | 6 | import json 7 | import shutil 8 | import sys 9 | from collections import OrderedDict 10 | from datetime import datetime, timedelta 11 | from os import makedirs, path 12 | from macke.constants import UCLIBC_LIBS 13 | from macke.config import (CONFIGFILE, get_current_git_hash, 14 | get_klee_git_hash, get_llvm_opt_git_hash) 15 | from macke.Klee import execute_klee 16 | 17 | 18 | class Shamrock: 19 | """ 20 | Main container for all steps of the MACKE analysis 21 | """ 22 | 23 | def __init__(self, bitcodefile, comment="", 24 | parentdir="/tmp/macke", quiet=False, 25 | flags_user=None, posixflags=None, posix4main=None, 26 | libraries = None): 27 | # Only accept valid files and directory 28 | assert path.isfile(bitcodefile) 29 | 30 | # store the path to the analyzed bitcode file 31 | self.bitcodefile = bitcodefile 32 | 33 | # Store information from command line 34 | self.comment = comment 35 | self.flags_user = flags_user if flags_user is not None else [] 36 | self.posixflags = posixflags if posixflags is not None else [] 37 | self.posix4main = posix4main if posix4main is not None else [] 38 | 39 | # add libraries to flags_user 40 | if libraries is not None: 41 | for l in libraries: 42 | if l not in UCLIBC_LIBS: 43 | self.flags_user.append("-load=lib"+l+".so") 44 | 45 | # generate name of directory with all run results 46 | self.starttime = datetime.now() 47 | newdirname = self.starttime.strftime("%Y-%m-%d-%H-%M-%S") 48 | self.rundir = path.join(parentdir, newdirname) 49 | self.parentdir = parentdir 50 | 51 | # Generate the path for the bitcode directory 52 | self.bcdir = path.join(self.rundir, "bitcode") 53 | 54 | # Generate the filename for the copy of the program 55 | self.program_bc = path.join(self.rundir, "bitcode", "program.bc") 56 | 57 | # Filename, where the index of the klee runs is stored 58 | self.kleejson = path.join(self.rundir, "klee.json") 59 | 60 | self.kleedir = path.join(self.rundir, "klee") 61 | self.kleeoutdir = path.join(self.kleedir, "klee-out-1") 62 | 63 | # Some attributes, that are filled later 64 | self.endtime = None 65 | 66 | # Setting quiet == True suppress all outputs 67 | self.quiet = quiet 68 | 69 | def generate_klee_json(self): 70 | """ 71 | Generate a klee json containing the single klee run 72 | """ 73 | info = dict({"klee-out-1": OrderedDict([ 74 | ("bcfile", self.program_bc), 75 | ("folder", self.kleeoutdir), 76 | ("function", "main"), 77 | ("phase", 1), 78 | ])}) 79 | 80 | with open(self.kleejson, 'w') as file: 81 | json.dump(info, file) 82 | 83 | def run_complete_analysis(self): 84 | """ 85 | Run all consecutive steps of the analysis 86 | """ 87 | 88 | self.run_initialization() 89 | self.run_klee() 90 | self.run_finalization() 91 | 92 | def run_initialization(self): 93 | """ 94 | Initialize the SHAMROCK output directory 95 | """ 96 | 97 | # Create an some empty directories 98 | makedirs(self.kleedir) 99 | makedirs(self.bcdir) 100 | 101 | # Copy the unmodified bitcode file 102 | shutil.copy2(self.bitcodefile, self.program_bc) 103 | 104 | # Copy macke's config file 105 | shutil.copy2(CONFIGFILE, self.rundir) 106 | 107 | self.generate_klee_json() 108 | 109 | # Store some basic information about the current run 110 | with open(path.join(self.rundir, "info.json"), 'w') as file: 111 | info = OrderedDict() 112 | info["macke-git-version-hash"] = get_current_git_hash() 113 | info["llvm-opt-git-version-hash"] = get_llvm_opt_git_hash() 114 | info["klee-git-version-hash"] = get_klee_git_hash() 115 | info["analyzed-bitcodefile"] = path.abspath(self.bitcodefile) 116 | info["run-argv"] = sys.argv 117 | info["shamrock"] = "This is a pure KLEE analysis of main" 118 | info["comment"] = self.comment 119 | json.dump(info, file) 120 | 121 | self.qprint("Analysis started at %s" % self.starttime) 122 | self.qprint("KLEE Timeout set to %s" % 123 | (self.starttime + timedelta( 124 | seconds=int(self.flags_user[0][len("--max-time="):])))) 125 | 126 | def run_klee(self): 127 | """ 128 | Run KLEE on the given program 129 | """ 130 | execute_klee( 131 | self.program_bc, "main", self.kleeoutdir, flags=self.flags_user, 132 | posixflags=self.posixflags, posix4main=self.posix4main) 133 | 134 | def run_finalization(self): 135 | """ 136 | Print a summary and write the result to the MACKE directory 137 | """ 138 | self.endtime = datetime.now() 139 | 140 | # Export all the data gathered so far to a json file 141 | with open(path.join(self.rundir, "timing.json"), 'w') as file: 142 | info = OrderedDict() 143 | 144 | info["start"] = self.starttime.isoformat() 145 | info["start-phase-two"] = self.endtime.isoformat() 146 | info["end"] = self.endtime.isoformat() 147 | 148 | json.dump(info, file) 149 | 150 | self.qprint("Analysis ended at %s" % self.endtime) 151 | 152 | def qprint(self, *args, **kwargs): 153 | """ 154 | Call pythons print, if MACKE is not set to be quiet 155 | """ 156 | if not self.quiet: 157 | print(*args, **kwargs) 158 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tum-i4/macke/b096ee194b0861b6581fee140bdf16d8cc60b116/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_CallGraph.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from macke.CallGraph import CallGraph 4 | 5 | 6 | class TestCallGraph(unittest.TestCase): 7 | 8 | def test_candidates_for_symbolic_encapsulation_simple(self): 9 | bcfile = "./examples/divisible.bc" 10 | c = CallGraph(bcfile).list_symbolic_encapsulable() 11 | 12 | self.assertEqual(len(c), 6) 13 | 14 | self.assertTrue( 15 | c.index("divby5") < c.index("divby10") < c.index("divby30")) 16 | 17 | self.assertTrue(c.index("divby2") < c.index("divby6")) 18 | self.assertTrue(c.index("divby3") < c.index("divby6")) 19 | 20 | def test_candidates_for_symbolic_encapsulation_circle(self): 21 | bcfile = "./examples/doomcircle.bc" 22 | c = CallGraph(bcfile).list_symbolic_encapsulable() 23 | self.assertEqual(c, ['a', 'b', 'c']) 24 | 25 | def test_candidates_for_symbolic_encapsulation_small(self): 26 | bcfile = "./examples/small.bc" 27 | c = CallGraph(bcfile).list_symbolic_encapsulable() 28 | self.assertEqual(len(c), 3) 29 | self.assertTrue(c.index("f2") < c.index("f1")) 30 | self.assertTrue(c.index("f3") < c.index("f1")) 31 | 32 | def test_edges_for_call_chain_propagation_divisible(self): 33 | bcfile = "./examples/divisible.bc" 34 | c = CallGraph(bcfile).group_independent_calls() 35 | 36 | self.assertEqual(6, len([pair for run in c for pair in run])) 37 | 38 | # This asserts are specifig to the grouping strategy 39 | # If you change the strategy, you probably have to change this 40 | self.assertEqual(c[0], [ 41 | ('divby10', 'divby2'), ('divby10', 'divby5'), 42 | ('divby30', 'divby3'), ('divby6', 'divby2'), ('divby6', 'divby3')]) 43 | self.assertEqual([('divby30', 'divby10')], c[1]) 44 | 45 | def test_edges_for_call_chain_propagation_circle(self): 46 | bcfile = "./examples/doomcircle.bc" 47 | c = CallGraph(bcfile).group_independent_calls() 48 | 49 | self.assertEqual(c, [[('c', 'a')], [('a', 'b')], [('b', 'c')]]) 50 | 51 | def test_edges_for_call_chain_propagation_small(self): 52 | bcfile = "./examples/small.bc" 53 | c = CallGraph(bcfile).group_independent_calls() 54 | 55 | self.assertEqual(c, [[('f1', 'f2'), ('f1', 'f3')]]) 56 | 57 | def test_edges_for_call_chain_propagation_factorial(self): 58 | bcfile = "./examples/factorial.bc" 59 | c = CallGraph(bcfile).group_independent_calls() 60 | 61 | self.assertEqual(c, [[('fac', 'fac')]]) 62 | -------------------------------------------------------------------------------- /tests/test_Config.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from macke.config import check_config 4 | 5 | 6 | class TestValidConfig(unittest.TestCase): 7 | 8 | def test_valid_config(self): 9 | check_config() 10 | -------------------------------------------------------------------------------- /tests/test_ErrorChain.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from macke.Macke import Macke 4 | from macke.ErrorChain import reconstruct_all_error_chains 5 | 6 | 7 | class TestErrorChain(unittest.TestCase): 8 | 9 | def run_macke_test_on_file(self, bcfile): 10 | m = Macke(bcfile, quiet=True, 11 | flags_user=["--max-time=60"], 12 | posix4main=['--sym-args', '1', '1', '2']) 13 | m.run_initialization() 14 | m.run_phase_one() 15 | m.run_phase_two() 16 | m.delete_directory() 17 | return m 18 | 19 | def test_with_not42(self): 20 | m = self.run_macke_test_on_file("examples/not42.bc") 21 | chains = reconstruct_all_error_chains(m.errorregistry, m.callgraph) 22 | 23 | # Only one reason for the chain 24 | self.assertEqual(len(chains), 1) 25 | 26 | # Only one chain trough all functions 27 | self.assertEqual( 28 | next(iter(chains.values())), [['not42']]) 29 | 30 | def test_with_chain(self): 31 | m = self.run_macke_test_on_file("examples/chain.bc") 32 | chains = reconstruct_all_error_chains(m.errorregistry, m.callgraph) 33 | 34 | # Only one reason for the chain 35 | self.assertEqual(len(chains), 1) 36 | 37 | # Only one chain trough all functions 38 | self.assertEqual( 39 | next(iter(chains.values())), [['c4', 'c3', 'c2', 'c1', 'main']]) 40 | 41 | def test_with_split(self): 42 | m = self.run_macke_test_on_file("examples/split.bc") 43 | chains = reconstruct_all_error_chains(m.errorregistry, m.callgraph) 44 | 45 | # All errors share the same vulnerable instruction 46 | self.assertEqual(len(chains), 1) 47 | 48 | # Two chains to top 49 | self.assertEqual( 50 | next(iter(chains.values())), 51 | [['bottom', 'left', 'top'], ['bottom', 'right', 'top']]) 52 | 53 | def test_with_small(self): 54 | m = self.run_macke_test_on_file("examples/small.bc") 55 | chains = reconstruct_all_error_chains(m.errorregistry, m.callgraph) 56 | 57 | self.assertEqual(len(chains), 3) 58 | 59 | for vulninst, chainlist in chains.items(): 60 | if vulninst.endswith("small.c:4"): 61 | self.assertEqual(chainlist, [['f2', 'f1']]) 62 | elif vulninst.endswith("small.c:9"): 63 | self.assertEqual(chainlist, [['f3', 'f1']]) 64 | elif vulninst.endswith("small.c:10"): 65 | self.assertEqual(chainlist, [['f3', 'f1']]) 66 | else: 67 | raise Exception("Unexpected vulnerable instruction") 68 | 69 | def test_with_sanatized(self): 70 | m = self.run_macke_test_on_file("examples/sanatized.bc") 71 | chains = reconstruct_all_error_chains(m.errorregistry, m.callgraph) 72 | 73 | self.assertEqual(len(chains), 1) 74 | self.assertEqual(next(iter(chains.values())), [['c4', 'c3']]) 75 | -------------------------------------------------------------------------------- /tests/test_ErrorRegistry.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from macke.Error import ( 4 | get_corresponding_ktest, get_reason_for_error, get_vulnerable_instruction) 5 | 6 | 7 | class TestErrorRegistry(unittest.TestCase): 8 | 9 | def test_get_corresponding_ktest_file(self): 10 | self.assertEqual( 11 | "test000001.ktest", 12 | get_corresponding_ktest("test000001.assert.err")) 13 | 14 | def test_get_corresponding_ktest_directory(self): 15 | self.assertEqual( 16 | "/some/dir/test000001.ktest", 17 | get_corresponding_ktest("/some/dir/test000001.macke.err")) 18 | 19 | def test_get_corresponding_ktest_dirwithdot(self): 20 | self.assertEqual( 21 | "/sub.dir/test000001.ktest", 22 | get_corresponding_ktest("/sub.dir/test000001.ptr.err")) 23 | 24 | def test_get_reason_for_error(self): 25 | self.assertEqual( 26 | "ASSERTION FAIL: i != 42", 27 | get_reason_for_error("examples/simple.assert.err")) 28 | 29 | def test_get_vulnerable_instruction(self): 30 | self.assertEqual( 31 | "/some/path/file.c:21", 32 | get_vulnerable_instruction("examples/simple.assert.err")) 33 | -------------------------------------------------------------------------------- /tests/test_Macke.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from macke.Macke import Macke 4 | 5 | 6 | class TestMackeGeneral(unittest.TestCase): 7 | 8 | def test_invalid_file_assertion(self): 9 | with self.assertRaises(AssertionError): 10 | Macke("randomFileThatDoesNot.exists") 11 | -------------------------------------------------------------------------------- /tests/test_Macke_PhaseOne.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from macke.Macke import Macke 4 | 5 | 6 | class TestMackePhaseOne(unittest.TestCase): 7 | 8 | def run_macke_test_on_file(self, bcfile): 9 | m = Macke(bcfile, quiet=True, flags_user=["--max-time=60"]) 10 | m.run_initialization() 11 | m.run_phase_one() 12 | m.delete_directory() 13 | return m 14 | 15 | def test_with_divisible(self): 16 | m = self.run_macke_test_on_file("examples/divisible.bc") 17 | 18 | self.assertEqual(m.testcases, 10) 19 | self.assertEqual(m.errorregistry.errorcounter, 0) 20 | self.assertEqual(m.errorregistry.count_functions_with_errors(), 0) 21 | 22 | def test_with_one_asserts(self): 23 | m = self.run_macke_test_on_file("examples/not42.bc") 24 | 25 | self.assertEqual(m.testcases, 2) 26 | self.assertEqual(m.errorregistry.errorcounter, 1) 27 | self.assertEqual(m.errorregistry.count_functions_with_errors(), 1) 28 | 29 | self.assertEqual(len(m.errorregistry.forfunction), 1) 30 | self.assertEqual(len(m.errorregistry.forfunction['not42']), 1) 31 | 32 | def test_main_generates_no_testcases(self): 33 | m = self.run_macke_test_on_file("examples/main.bc") 34 | 35 | self.assertEqual(m.testcases, 0) 36 | self.assertEqual(m.errorregistry.errorcounter, 0) 37 | self.assertEqual(m.errorregistry.count_functions_with_errors(), 0) 38 | 39 | self.assertEqual(m.errorregistry.forfunction, {}) 40 | 41 | def test_with_several_asserts(self): 42 | m = self.run_macke_test_on_file("examples/small.bc") 43 | 44 | # 2 for f2, 3 for f3 and sum of both for f1 45 | self.assertEqual(m.testcases, 10) 46 | # 3 for f1, 1 for f2, 2 for f3 47 | self.assertEqual(m.errorregistry.errorcounter, 6) 48 | self.assertEqual(m.errorregistry.count_functions_with_errors(), 3) 49 | 50 | self.assertEqual(len(m.errorregistry.forfunction), 3) 51 | self.assertEqual(len(m.errorregistry.forfunction['f1']), 3) 52 | self.assertEqual(len(m.errorregistry.forfunction['f2']), 1) 53 | self.assertEqual(len(m.errorregistry.forfunction['f3']), 2) 54 | -------------------------------------------------------------------------------- /tests/test_Macke_PhaseTwo.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from macke.Macke import Macke 4 | 5 | 6 | class TestMackePhaseTwo(unittest.TestCase): 7 | 8 | def run_macke_test_on_file(self, bcfile, excludes_in_phase_two): 9 | m = Macke( 10 | bcfile, quiet=True, flags_user=["--max-time=60"], 11 | exclude_known_from_phase_two=excludes_in_phase_two) 12 | m.run_initialization() 13 | m.run_phase_one() 14 | m.run_phase_two() 15 | m.delete_directory() 16 | return m 17 | 18 | def test_with_small(self): 19 | m = self.run_macke_test_on_file("examples/small.bc", False) 20 | 21 | self.assertEqual(m.errorregistry.count_functions_with_errors(), 3) 22 | 23 | # Check, that all three errors were prepended and triggered again 24 | for vulninst in ["small.c:4", "small.c:9", "small.c:10"]: 25 | self.assertTrue(any( 26 | err.vulnerable_instruction.endswith(vulninst) and 27 | err.errfile.endswith(".macke.err") 28 | for err in m.errorregistry.forfunction["f1"]), 29 | "Missing %s" % vulninst) 30 | 31 | def test_with_chain(self): 32 | m = self.run_macke_test_on_file("examples/chain.bc", False) 33 | 34 | self.assertEqual(m.errorregistry.count_functions_with_errors(), 4) 35 | 36 | rooterror = m.errorregistry.forfunction['c4'][0] 37 | 38 | oldchainheads = [rooterror] 39 | for i in range(1, 4): 40 | newchainheads = list() 41 | for error in oldchainheads: 42 | newchainheads.extend( 43 | m.errorregistry.mackeforerrfile[error.errfile]) 44 | oldchainheads = newchainheads[:] 45 | self.assertTrue(oldchainheads, "Iteration %d failed" % i) 46 | self.assertEqual(oldchainheads[0].entryfunction, "c1") 47 | 48 | def test_with_sanatized(self): 49 | m = self.run_macke_test_on_file("examples/sanatized.bc", True) 50 | 51 | self.assertEqual(m.errorregistry.count_functions_with_errors(), 2) 52 | self.assertEqual(m.kleecount, 4 + 1) 53 | -------------------------------------------------------------------------------- /tests/test_Macke_WithMain.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from macke.Macke import Macke 4 | 5 | 6 | class TestMackeWithMain(unittest.TestCase): 7 | 8 | def run_macke_test_on_file(self, bcfile): 9 | m = Macke(bcfile, quiet=True, 10 | flags_user=["--max-time=60"], 11 | posixflags=["--sym-files", "1", "2"], 12 | posix4main=['--sym-args', '1', '1', '2']) 13 | m.run_initialization() 14 | m.run_phase_one() 15 | m.run_phase_two() 16 | m.delete_directory() 17 | return m 18 | 19 | def test_with_justmain(self): 20 | m = self.run_macke_test_on_file("examples/justmain.bc") 21 | 22 | self.assertEqual(m.testcases, 2) 23 | self.assertEqual(m.errorregistry.errorcounter, 1) 24 | self.assertEqual(m.errorregistry.count_functions_with_errors(), 1) 25 | 26 | def test_with_chain(self): 27 | m = self.run_macke_test_on_file("examples/chain.bc") 28 | 29 | self.assertEqual(m.errorregistry.count_functions_with_errors(), 5) 30 | 31 | # chain.c just has one vulnerable instruction 32 | self.assertEqual(m.errorregistry.count_vulnerable_instructions(), 1) 33 | -------------------------------------------------------------------------------- /tests/test_dummy.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | 4 | class TestDummy(unittest.TestCase): 5 | 6 | def test_dummy(self): 7 | self.assertEqual(1 + 1, 2) 8 | -------------------------------------------------------------------------------- /tests/test_llvm_wrapper.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from macke import llvm_wrapper 4 | 5 | 6 | class TestLLVMWrapper(unittest.TestCase): 7 | 8 | def test_extract_callgraph(self): 9 | json = llvm_wrapper.extract_callgraph("./examples/divisible.bc") 10 | # Just a view asserts - correct callgraph is tested in pass repo 11 | self.assertIn("main", json) 12 | self.assertIn("divby5", json) 13 | self.assertIn("divby10", json) 14 | --------------------------------------------------------------------------------