├── .gitignore ├── dataset.tar.gz ├── .pre-commit-config.yaml ├── setup.cfg ├── docs ├── debugging_tips.md ├── framac_setup.md ├── GNUmakefile_template └── README.md ├── scripts ├── devaide ├── README.md ├── getstats.py ├── devcmds ├── zfpid.py ├── zfpcmds ├── evalcmds └── BinValueSetEval.py ├── LICENSE ├── tools ├── rosette │ ├── synthesize.rkt │ └── perform_synthesis.sh └── frama-c │ └── prettyvsa.ml ├── dockerfile-zfp ├── zfp.py ├── README.md └── core ├── utilities ├── __init__.py └── framac_engineer.py └── __init__.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | zfp-0.* 3 | dataset* 4 | -------------------------------------------------------------------------------- /dataset.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yellowbyte/zero-footprint-opaque-predicates/HEAD/dataset.tar.gz -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | - repo: https://github.com/pycqa/flake8 2 | rev: '3.9.2' # pick a git hash / tag to point to 3 | hooks: 4 | - id: flake8 5 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = .git,scripts,*migrations* 3 | max-line-length = 80 4 | per-file-ignores = 5 | core/utilities/*.py: E211 6 | 7 | [darglint] 8 | docstring_style=sphinx 9 | enable=DAR104 10 | strictness=long 11 | -------------------------------------------------------------------------------- /docs/debugging_tips.md: -------------------------------------------------------------------------------- 1 | ## Python Exception? 2 | 3 | Run our program using `zfptest` instead of `zfp`. This allows user to inspect the metadata. 4 | 5 | Also, set `--no-delmetadata` or else metadata will be deleted. 6 | 7 | Look inside the metadata folder (folder name will always start with "zfp-"): 8 | * Did a ".eva" folder get generated? If not, check that your GNUmakefile actually runs correctly 9 | * Did a vsa.json file get generated? If not, the parsing of Frama-C output failed. Perhaps try setting `value_set_limit` to a smaller value 10 | 11 | If you ran this tool multiple times, multiple "zfp-" folders will be created. To find the most recent one that corresponds to your current exception: `ls -ltr`. The last listed "zfp-" folder will be the most recent one. 12 | -------------------------------------------------------------------------------- /scripts/devaide: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ### Docker 4 | # detach from container terminal: Ctrl-P and Ctrl-Q 5 | # how to de-attach: https://stackoverflow.com/questions/19688314/how-do-you-attach-and-detach-from-dockers-process 6 | 7 | ### Flake8 8 | # pretty print number of violations: 9 | # flake8 --statistics src/* 10 | # all locations of a specific violation: 11 | # flake8 --select W293 src/* 12 | 13 | # count line numbers in file excluding comments and spaces 14 | countlines () 15 | { 16 | # to remove blank lines: 17 | # https://stackoverflow.com/questions/114814/count-non-blank-lines-of-code-in-bash 18 | # to remove C comments: 19 | # https://unix.stackexchange.com/questions/317795/remove-comments-in-a-c-file 20 | perl -0777 -pe 's,/\*.*?\*/,,gs' ${1} | sed '/^\s*$/d' | wc -l 21 | } 22 | 23 | # delete trailing whitespaces from a file 24 | trailingno () 25 | { 26 | sed --in-place 's/[[:space:]]\+$//' ${1} 27 | } 28 | -------------------------------------------------------------------------------- /scripts/README.md: -------------------------------------------------------------------------------- 1 | ## Files 2 | 3 | * `BinValueSetEval.py`: given an obfuscated binary, check if binary abstract interpretation (available in BinaryNinja) can identify our opaque predicates by evaluating variable's value set under the corresponding predicate 4 | * `zfpid.py`: print all the opaque predicates locations 5 | * We purposely make our opaque predicates' obfuscation (i.e., injected non-executable code) deterministic so we can detect them for evaluation with other deobfuscation tools 6 | * `getstats.py`: given a file containing the correct opaque predicates locations (can be retrieved with zfpid.py) and another file containing the opaque predicates locations identified by a deobfuscation tool, evaluate how well the deobfuscation tool performs 7 | * `zfpcmds`: a host of bash functions to build, and, and clean up this project 8 | * `devcmds`: a host of bash functions to aide development 9 | * `evalcmds`: a host of bash functions to help perform evaluations/obfuscations on multiple binaries 10 | * run this command to use: source scripts/autoeval 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Yu-Jye 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /docs/framac_setup.md: -------------------------------------------------------------------------------- 1 | ## Frama-C Setup 2 | 3 | [Frama-C](https://frama-c.com/) is a powerful program analysis tool. We use it in this project for its implementation of [abstract interpretation](https://www.di.ens.fr/~cousot/AI/IntroAbsInt.html). As for why we need to use abstract interpretation, check out [our project overview](README.md). 4 | 5 | [Here is a template to use for setting up Frama-C](GNUmakefile_template). It is based on the GNUmakefile found in [Frama-C's open source case studies](https://git.frama-c.com/pub/open-source-case-studies). Simply change all occurrences of \ to the filename of the single C file (without the .c extension), rename the template to GNUmakefile, and place the template in the target source code folder. 6 | 7 | Suppose your single C file is called "simple.c", to automate renaming use this command: `sed -i 's//simple/g' GNUmakefile` 8 | 9 | If you are running this tool without using our container, you will also have to change the argument to `-load-script` to the absolute filepath that points to `prettyvsa.ml` on your machine. `prettyvsa.ml` is relative to the root of this project: `tools/frama-c/prettyvsa.ml`. 10 | 11 | Frama-C has some knobs to turn that allows user to tune its analysis. We plan to make another tutorial that explains some of those knobs in the future. 12 | -------------------------------------------------------------------------------- /scripts/getstats.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | 4 | from pprint import pprint 5 | 6 | 7 | 8 | file1 = sys.argv[1] # correct (original) 9 | file2 = sys.argv[2] # identification from tool (e.g., binsec) 10 | 11 | with open(file1, "r") as f: 12 | file1_content = f.readlines() 13 | 14 | with open(file2, "r") as f: 15 | file2_content = f.readlines() 16 | 17 | file1_content = set(int(i.rstrip().rstrip("L"),16) for i in file1_content) 18 | file2_content = set(int(i.rstrip().rstrip("L"),16) for i in file2_content) 19 | 20 | # metrics 21 | fn = len(file1_content.difference(file2_content)) 22 | fp = len(file2_content.difference(file1_content)) 23 | tp = len(file1_content.intersection(file2_content)) 24 | 25 | print("FN: "+str(fn)) # elements present in file1 and not file2 (i.e., inserted opaque predicates not identified) 26 | print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") 27 | #pprint([hex(addr) for addr in list(file1_content.difference(file2_content))]) 28 | #print("") 29 | print("FP: "+str(fp)) # elements present in file2 and not file1 (i.e., bb misidentified as opaque predicates) 30 | print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") 31 | #pprint([hex(addr) for addr in list(file2_content.difference(file1_content))]) 32 | #print("") 33 | print("TP: "+str(tp)) 34 | print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") 35 | #pprint([hex(addr) for addr in list(file1_content.intersection(file2_content))]) 36 | -------------------------------------------------------------------------------- /scripts/devcmds: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # remove all stopped containers 4 | drm () 5 | { 6 | docker rm $(docker ps -a -q) 7 | } 8 | # stop and remove all running docker containers 9 | dsrm () 10 | { 11 | docker stop $(docker ps -a -q) 12 | docker rm $(docker ps -a -q) 13 | } 14 | 15 | # restart docker 16 | # https://stackoverflow.com/questions/31365827/cannot-stop-or-restart-a-docker-container 17 | dfresh () 18 | { 19 | sudo systemctl restart docker.socket docker.service 20 | } 21 | 22 | ### Docker 23 | # detach from container terminal: Ctrl-P and Ctrl-Q 24 | # how to de-attach: https://stackoverflow.com/questions/19688314/how-do-you-attach-and-detach-from-dockers-process 25 | 26 | ### Flake8 27 | # pretty print number of violations: 28 | # flake8 --statistics core/* 29 | # all locations of a specific violation: 30 | # flake8 --select W293 core/* 31 | 32 | # count line numbers in file excluding comments and spaces 33 | clines () 34 | { 35 | # to remove blank lines: 36 | # https://stackoverflow.com/questions/114814/count-non-blank-lines-of-code-in-bash 37 | # to remove C comments: 38 | # https://unix.stackexchange.com/questions/317795/remove-comments-in-a-c-file 39 | perl -0777 -pe 's,/\*.*?\*/,,gs' ${1} | sed '/^\s*$/d' | wc -l 40 | } 41 | 42 | # delete trailing whitespaces from a file 43 | rmt () 44 | { 45 | sed --in-place 's/[[:space:]]\+$//' ${1} 46 | } 47 | -------------------------------------------------------------------------------- /docs/GNUmakefile_template: -------------------------------------------------------------------------------- 1 | # Makefile template for Frama-C/Eva case studies. 2 | # For details and usage information, see the Frama-C User Manual. 3 | 4 | ### Prologue. Do not modify this block. ####################################### 5 | -include path.mk 6 | FRAMAC ?= frama-c 7 | include $(shell $(FRAMAC)-config -print-share-path)/analysis-scripts/prologue.mk 8 | ############################################################################### 9 | 10 | # Edit below as needed. Suggested flags are optional. 11 | 12 | MACHDEP = x86_32 13 | 14 | ## Preprocessing flags (for -cpp-extra-args) 15 | CPPFLAGS += \ 16 | -D__FRAMAC__EVA \ 17 | 18 | ## General flags 19 | FCFLAGS += \ 20 | -kernel-warn-key typing:implicit-function-declaration=abort \ 21 | 22 | ## Eva-specific flags 23 | EVAFLAGS += \ 24 | -eva-warn-key builtins:missing-spec=abort \ 25 | -eva-no-alloc-returns-null \ 26 | -eva-slevel 100 \ 27 | -eva-auto-loop-unroll 300 \ 28 | -load-script /zfp/tools/frama-c/prettyvsa.ml \ 29 | 30 | ## GUI-only flags 31 | FCGUIFLAGS += \ 32 | 33 | ## Analysis targets (suffixed with .eva) 34 | TARGETS = .eva 35 | 36 | ### Each target .eva needs a rule .parse with source files as prerequisites 37 | .parse: \ 38 | .c 39 | 40 | ### Epilogue. Do not modify this block. ####################################### 41 | include $(shell $(FRAMAC)-config -print-share-path)/analysis-scripts/epilogue.mk 42 | ############################################################################### 43 | -------------------------------------------------------------------------------- /tools/rosette/synthesize.rkt: -------------------------------------------------------------------------------- 1 | #lang rosette 2 | 3 | 4 | (require rosette/lib/angelic ; provides `choose*` 5 | rosette/lib/synthax ; provides `??` 6 | rosette/lib/match) ; provides `match` 7 | ; Tell Rosette we really do want to use integers. 8 | (current-bitwidth #f) 9 | 10 | 11 | (struct expr (identifier op digit) #:transparent) 12 | 13 | (define (neq? a b) 14 | (not (eq? a b))) 15 | 16 | (define (interpret p input) 17 | (let ([acceptable-consts (list 0 1 2 -1)] 18 | [acceptable-ops (list > < >= <= eq? neq?)] 19 | [ismember? (lambda (item lst) 20 | (ormap (lambda (cur) (eq? item cur)) 21 | lst))]) 22 | (match p 23 | [(expr _ op digit) (and (ismember? digit acceptable-consts) 24 | (ismember? op acceptable-ops) 25 | (op input digit))]))) 26 | 27 | (define sketch 28 | (expr 'x [choose > < >= <= eq? neq?] [choose 0 1 2 -1])) 29 | 30 | ; command args 31 | (define raw-args (vector->list (current-command-line-arguments))) 32 | (define op-option (first raw-args)) 33 | (define raw-vsa (rest raw-args)) 34 | (define vsa (map (lambda (n) (string->number n)) raw-vsa)) 35 | 36 | 37 | (define identify-op 38 | (solve 39 | (begin 40 | (for ([i vsa]) 41 | (cond 42 | [(string=? op-option "t") (assert (interpret sketch i))] ; always true 43 | [else (assert (not (interpret sketch i)))]) ; always false 44 | ) 45 | ))) 46 | 47 | (cond 48 | ([sat? identify-op] (evaluate sketch identify-op)) 49 | (else (display "unsat"))) 50 | -------------------------------------------------------------------------------- /scripts/zfpid.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | # given a binary obfuscated with zfp, identify all their virtual addresses 4 | # We purposely makes our opaque predicates' obfuscation (i.e., injected non-executable code) deterministic so we can detect it for evaluation with other deobfuscation tools 5 | 6 | 7 | from binaryninja import * 8 | 9 | 10 | def main(filepath): 11 | """ 12 | """ 13 | bv = BinaryViewType.get_view_of_file(filepath) 14 | bv.update_analysis_and_wait() 15 | if bv is None: 16 | print("Couldn't open {}".format(filepath)) 17 | sys.exit() 18 | 19 | for func in bv.functions: 20 | for bbl in func.basic_blocks: 21 | instrs = bbl.get_disassembly_text() 22 | # Pattern matching our obfuscation 23 | if not ( 24 | str(instrs[0].tokens[0]) == "xor" and str(instrs[0].tokens[2]) == "eax" and str(instrs[0].tokens[4]) == "eax" and 25 | str(instrs[1].tokens[0]) == "xor" and str(instrs[1].tokens[2]) == "esp" and str(instrs[1].tokens[4]) == "esp" and 26 | str(instrs[2].tokens[0]) == "xor" and str(instrs[2].tokens[2]) == "ebp" and str(instrs[2].tokens[4]) == "ebp" and 27 | str(instrs[3].tokens[0]) == "add" and str(instrs[3].tokens[2]) == "esp" and str(instrs[3].tokens[4]) == "eax" 28 | ): 29 | continue 30 | assert len(bbl.incoming_edges) == 1 31 | parent_bbl = bbl.incoming_edges[0].source 32 | print(hex(parent_bbl.start + (parent_bbl.length-parent_bbl[-1][-1]))) 33 | 34 | 35 | if __name__ == "__main__": 36 | main(sys.argv[1]) 37 | -------------------------------------------------------------------------------- /tools/rosette/perform_synthesis.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | # read json file in as a dictionary 5 | declare -A vsas 6 | while IFS="=" read -r key value 7 | do 8 | vsas[$key]="$value" 9 | done < <(jq -r "to_entries|map(\"\(.key)=\(.value)\")|.[]" $1) 10 | 11 | 12 | # iterate each key in dictionary to try to synthesize an oapque predicate for it 13 | # each key is of this format: loc:var_name 14 | for key in "${!vsas[@]}" 15 | do 16 | # remove array braces and change comma to empty space 17 | cur_vsa=$(echo "${vsas[$key]//,/ }" | tr -d '[],') 18 | 19 | # identify opaque predicate that always evaluate to false 20 | result=$(racket ./tools/rosette/synthesize.rkt f ${cur_vsa}) 21 | if test "${result}" != "unsat" 22 | then 23 | comparator=$(echo ${result} | tr -d '()' | awk '{print $3}') 24 | constant=$(echo ${result} | tr -d '()' | awk '{print $4}') 25 | if test "${comparator}" == "#" 26 | then 27 | comparator="!=" 28 | elif test "${comparator}" == "#" 29 | then 30 | comparator="==" 31 | fi 32 | echo f ${key} ${comparator} ${constant} 33 | 34 | continue 35 | fi 36 | 37 | # if no opaquely falase predicate, identify opaque predicate that 38 | # always evaluate to true instead 39 | result=$(racket ./tools/rosette/synthesize.rkt t ${cur_vsa}) 40 | if test "${result}" != "unsat" 41 | then 42 | comparator=$(echo ${result} | tr -d '()' | awk '{print $3}') 43 | constant=$(echo ${result} | tr -d '()' | awk '{print $4}') 44 | if test "${comparator}" == "#" 45 | then 46 | comparator="!=" 47 | elif test "${comparator}" == "#" 48 | then 49 | comparator="==" 50 | fi 51 | 52 | echo t ${key} ${comparator} ${constant} 53 | fi 54 | 55 | echo unsat ${key} ${vsas[$key]//,/ } 56 | done 57 | -------------------------------------------------------------------------------- /scripts/zfpcmds: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Build docker images 4 | zfpbuild () 5 | { 6 | docker build . --tag zfp -f dockerfile-zfp 7 | } 8 | 9 | # start analysis 10 | # testing 11 | zfptest () 12 | { 13 | if [ $# -eq 0 ] 14 | then 15 | echo "Need to supply path to folder containing code to obfuscate" 16 | else 17 | if [[ ${1} == /* ]] 18 | then 19 | # absolute filepath provided as argument 20 | filepath=${1%/} 21 | else 22 | # relative filepath provided as argument 23 | filepath=$(pwd)/${1%/} 24 | fi 25 | 26 | # container action 27 | docker run -t -d -v $(pwd):/zfp -v ${filepath}:/tmp/${filepath##*/}/ --name zfp zfp 28 | docker exec -it zfp ln -sf /usr/bin/python3.10 /usr/bin/python3 29 | docker exec -it zfp bash -c 'cd /zfp; exec "${SHELL:-sh}"' 30 | fi 31 | } 32 | 33 | # start analysis 34 | # production 35 | zfp () 36 | { 37 | # commandline argument parsing 38 | # -m, --delmetadata, --no-delmetadata are irrelevant when running `zfp` 39 | # if you want info regarding metadata, use `zfptest` 40 | args="" 41 | while true; do 42 | case "$1" in 43 | -h | --help ) args+="--help "; shift; break ;; 44 | -m | --metadatadir ) args+="--metadatadir ${2} "; shift 2 ;; 45 | -l | --limits ) args+="--limits ${2} "; shift 2 ;; 46 | --delmetadata ) args+="--delmetadata "; shift ;; 47 | --no-delmetadata ) args+="--no-delmetadata "; shift ;; 48 | -- ) shift; break ;; 49 | * ) break ;; # no argument 50 | esac 51 | done 52 | 53 | if [ $# -eq 0 ] 54 | then 55 | echo "Need to supply path to folder containing code to obfuscate" 56 | else 57 | if [[ ${1} == /* ]] 58 | then 59 | # absolute filepath provided as argument 60 | filepath=${1%/} 61 | else 62 | # relative filepath provided as argument 63 | filepath=$(pwd)/${1%/} 64 | fi 65 | 66 | # container action 67 | # mount the folder containing the obfuscation source, not including the path 68 | docker run -t -d -v ${filepath}:/zfp/${filepath##*/}/ --name zfp zfp 69 | docker exec -it zfp ln -sf /usr/bin/python3.10 /usr/bin/python3 70 | docker exec -it zfp python3 /zfp/zfp.py ${args} /zfp/${filepath##*/}/ 71 | echo "Obfuscation Completed. Delete Container:" 72 | zfpstop 73 | fi 74 | } 75 | 76 | # stop analysis 77 | zfpstop () 78 | { 79 | docker stop $(docker ps -q --filter ancestor=zfp) 80 | docker rm $(docker ps -qa --filter ancestor=zfp) 81 | } 82 | 83 | # attach to container after detach 84 | zfpattach () 85 | { 86 | docker exec -it zfp /bin/bash 87 | } 88 | 89 | # remove zfp metadata 90 | zfprm () 91 | { 92 | rm -rf zfp-0.* 93 | } 94 | -------------------------------------------------------------------------------- /dockerfile-zfp: -------------------------------------------------------------------------------- 1 | FROM ubuntu:focal 2 | MAINTAINER Yu-Jye Tung 3 | 4 | ARG DEBIAN_FRONTEND=noninteractive 5 | 6 | ### Privilege for some installations 7 | USER root 8 | 9 | ### Install common necessary packages 10 | RUN apt-get clean 11 | RUN apt-get -y update && DEBIAN_FRONTEND=noninteractive apt-get -y install python3 python3-pip 12 | RUN apt-get install -y build-essential && \ 13 | apt-get update && \ 14 | apt-get install -y pkg-config \ 15 | software-properties-common \ 16 | libgnomecanvas2-dev \ 17 | libgtksourceview2.0-dev \ 18 | gcc-multilib \ 19 | g++-multilib \ 20 | libgtk-3-dev \ 21 | libc6-dev \ 22 | libstdc++6 \ 23 | libffi-dev \ 24 | libz-dev \ 25 | zlib1g-dev \ 26 | libc6 \ 27 | clang \ 28 | wget \ 29 | vim \ 30 | gdb \ 31 | jq \ 32 | opam 33 | 34 | ### Install Rosette (Racket) 35 | RUN mkdir /rosette-install 36 | WORKDIR /rosette-install 37 | # racket install script is too big to store in git 38 | RUN wget https://download.racket-lang.org/installers/8.2/racket-8.2-x86_64-linux-cs.sh 39 | RUN chmod 755 racket-8.2-x86_64-linux-cs.sh 40 | RUN echo "yes\n1\n" | sh racket-8.2-x86_64-linux-cs.sh 41 | RUN raco setup 42 | RUN raco pkg install custom-load 43 | RUN raco pkg install rfc6455 44 | RUN raco pkg install rosette 45 | 46 | ### Install Frama-C 47 | RUN opam init --disable-sandboxing --shell-setup 48 | RUN eval $(opam env) && \ 49 | opam install -y depext && \ 50 | opam depext --install -y lablgtk3 lablgtk3-sourceview3 51 | RUN eval $(opam env) && \ 52 | opam depext --install -y frama-c 53 | # the ENVs are replacement for "eval $(opam env)" 54 | # eval $(opam env) doesn't work here since each RUN command runs a new shell in a new container: 55 | # https://stackoverflow.com/questions/56253582/how-do-i-run-the-eval-envkey-source-command-in-docker-using-dockerfile 56 | ENV OPAM_SWITCH_PREFIX='/root/.opam/default' 57 | ENV CAML_LD_LIBRARY_PATH='/root/.opam/default/lib/stublibs:/usr/local/lib/ocaml/4.08.1/stublibs:/usr/lib/ocaml/stublibs' 58 | ENV OCAML_TOPLEVEL_PATH='/root/.opam/default/lib/toplevel' 59 | ENV PKG_CONFIG_PATH='/root/.opam/default/lib/pkgconfig' 60 | ENV MANPATH=':/root/.opam/default/man' 61 | ENV PATH='/root/.opam/default/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin' 62 | 63 | ### Install Python 3.10 64 | RUN apt update 65 | RUN apt install software-properties-common 66 | RUN add-apt-repository ppa:deadsnakes/ppa 67 | RUN apt -y install python3.10 68 | 69 | ### Move obfuscation tool to container 70 | RUN mkdir /zfp 71 | COPY ./core /zfp/core 72 | COPY ./zfp.py /zfp/zfp.py 73 | COPY ./tools /zfp/tools 74 | 75 | # since the tool assume this working directory 76 | WORKDIR /zfp 77 | -------------------------------------------------------------------------------- /zfp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | This is the entry point for the ZFP obfuscation tool. 5 | """ 6 | 7 | import logging 8 | import os 9 | import shutil 10 | import sys 11 | import time 12 | from datetime import timedelta 13 | from random import random, seed 14 | from time import perf_counter 15 | 16 | from core import Zfp 17 | from core import parse_args, shell_exec, get_configs 18 | 19 | 20 | # Logging 21 | logger = logging.getLogger('') 22 | logger.setLevel(logging.DEBUG) 23 | 24 | 25 | def main(wdir, sdir, configs): 26 | """Perform Main. 27 | 28 | Args: 29 | wdir: working directory 30 | sdir: orginal directory that contains the code to obfuscate 31 | 32 | Return: 33 | None 34 | """ 35 | timer_start = perf_counter() 36 | obfuscated = Zfp(wdir, configs) 37 | timer_stop = perf_counter() 38 | 39 | # Compile the obfuscated source code 40 | shell_exec(' make -C '+wdir+' -f Makefile') 41 | 42 | # Statistics 43 | logging.info('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') 44 | logging.info('current: ' 45 | + sdir) 46 | logging.info('number of value sets identified: ' 47 | + str(len(obfuscated.value_sets.keys()))) 48 | logging.info('number of opaque predicates: ' 49 | + str(obfuscated.op_nums)) 50 | logging.info('time it takes to obfuscate (seconds): ' 51 | + str(timer_stop-timer_start)) 52 | logging.info('time it takes to obfuscate (formatted): ' 53 | + str(timedelta(seconds=(timer_stop-timer_start)))) 54 | logging.info('Frama-C runtime (seconds): ' 55 | + str(obfuscated.framac_runtime)) 56 | logging.info('Frama-C runtime (formatted): ' 57 | + str(timedelta(seconds=(obfuscated.framac_runtime)))) 58 | 59 | # Move the compiled binary to original directory 60 | # Copy back everything except C files, zfp.log and vsa.json. 61 | # C files are modified by this tool (with OP added in) 62 | # zfp.log in wdir is the old log 63 | for subdir, _, files in os.walk(wdir): 64 | for _file in files: 65 | if _file.endswith('.c'): 66 | continue 67 | if _file == 'vsa.json': 68 | continue 69 | if _file == 'zfp.log': 70 | continue 71 | # analysis results from Frama-C. Don't need 72 | # or else can't rerun unless user manually delete these folders 73 | if subdir.endswith('.eva'): 74 | continue 75 | if subdir.endswith('.parse'): 76 | continue 77 | 78 | relative_filepath = os.path.relpath(subdir, wdir) 79 | shutil.move(os.path.join(wdir, relative_filepath, _file), 80 | os.path.join(sdir, relative_filepath, _file)) 81 | 82 | # Remove tmp working dir 83 | if configs['delete_metadata']: 84 | shutil.rmtree(wdir) 85 | 86 | 87 | if __name__ == '__main__': 88 | # Commandline argument parsing 89 | parse_args() 90 | 91 | # Create tmp working dir (wdir) 92 | # wdir: working directory (containing metadata like obfuscated source, etc.) 93 | # sdir: source directory (containing source code to obfuscate) 94 | configs = get_configs() 95 | sdir = configs['srcfolder'] 96 | millis = int(round(time.time() * 1000)) 97 | seed(millis) # Make random() as random as possible 98 | wdir = os.path.join(configs['metadata_dir'], 99 | 'zfp-'+str(random())) # noqa:S311 100 | shutil.copytree(sdir, wdir) 101 | 102 | # Set up logging 103 | fh = logging.FileHandler(os.path.join(sdir, 'zfp.log'), mode='w') 104 | fh.setLevel(logging.INFO) 105 | sh = logging.StreamHandler() 106 | sh.setLevel(logging.DEBUG) 107 | logger.addHandler(fh) 108 | logger.addHandler(sh) 109 | 110 | # Main 111 | main(wdir, sdir, configs) 112 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | ## What Is Zero Footprint Opaque Predicates? 2 | 3 | For a detailed explanation, check out our [paper](https://rdcu.be/cpnNf). But if you just want a simplified explanation, this page is for you! The aim of this page is to simplify understanding the "what", "why", and "how" questions regarding this project. 4 | 5 | It's called Zero Footprint Opaque Predicates because the opaque predicates it constructs aim to resemble every other predicates typically found in an unobfuscated binary. 6 | 7 | Why do we want opaque predicates to resemble real predicates? Because once an opaque predicate is detected, deobfuscation is as simple as changing the conditional branch instruction to a unconditional branch instruction. For opaque predicates' effects to last, they need to be stealthy against manual and automated analysis. To be stealthy against manual analysis (e.g., heuristic attacks, pattern matching attacks), the opaque predicates need to resemble real predicates _syntactically_. To be stealthy against automated analysis, the opaque predicates need to resemble real predicates _semantically_. 8 | 9 | Resembling real predicates syntactically means that it "looks" like real predicates from the disassembly. Whereas resembling real predicates semantically means that it "behaves" like real predicates at program runtime. 10 | 11 | To syntactically resemble real predicates, the problem is two-folds. 12 | 13 | Firstly, the construction of the invariants cannot syntactically stand out. To ensure this, our opaque predicates use naturally occurring invariants. An invariant is just a specific program behavior (e.g., `x > 10`) that is always true at a particular line of code, and every opaque predicate requires one invariant in order to disguise itself as a conditional branch instruction. Naturally occurring invariants are just invariants that already exist in the original program. We want to make this distinction because other approaches to constructing opaque predicates inject their own code to create the invariants (i.e., synthetic invariants) for their opaque predicates. And because they use synthetic invariants --- which are proned to be constructed in similar manners --- every opaque predicate they insert will likely to have distinctive features that they all share and thus can stand out from the other real predicates. __Naturally occurring invariants create natural diversity among our opaque predicates' invariants__ since they may not all be constructed in similar manners. 14 | 15 | Secondly, the construction of the opaque predicates using the invariants cannot syntactically stand out as well. To ensure this, we first did a study to identify syntactic features that represent real predicates. We then use the tool [Rosette](https://emina.github.io/rosette/) to construct our opaque predicates such that the resulting opaque predicates share the syntactic features we identified. 16 | 17 | To semantically resemble real predicates, we use value sets as our invariants so our opaque predicates may take on different values at program runtime. A value set contains all possible values that can be assigned to a variable at a particular line of code. And we can make sure that the value sets we use DO contain all possible values by using [abstract interpretation](https://www.di.ens.fr/~cousot/AI/IntroAbsInt.html) (for this project we use [Frama-C's implementation](https://frama-c.com/fc-plugins/eva.html)). To use a value set as an invariant, the corresponding opaque predicate has to evaluate each value in the value set to the same truth value (T/F). Thankfully, Rosette can also make sure the constructed opaque predicates have that behavior. Note that the value sets inferred by abstract interpretation can still be easily detected if the value sets are constructed in a local context (i.e., based on a variable's semantics within a basic block). However, abstract interpretation performs whole-program analysis so the value sets it inferred can also be constructed in a global context (i.e., based on a variable's semantics throughout multiple functions). 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Overview 2 | 3 | [Zero Footprint Opaque Predicates](docs/README.md) obfuscate single-file C source code with opaque predicates that aim to syntactically and semantically resemble real predicates in order to prevent heuristic attacks (e.g., pattern matching) while maintaining resilience against automated attacks. 4 | 5 | Currently, our inserted opaque predicates' obfuscation is a deterministic and impossible instruction sequence. This is to allow us to detect our opaque predicates so we can evaluate them with deobfuscation tools. In practice, the obfuscation should not always be the same sequence (or else it is easily detected from the obfuscation). 6 | 7 | ## Getting Started 8 | 9 | #### Requirements 10 | * Docker 11 | * Linux 12 | 13 | #### Installation (assumed in project root directory) 14 | 1. source scripts/zfpcmds 15 | 2. zfpbuild (will build a Docker image that contains everything you needed to run this tool) 16 | 17 | #### To Run (assumed you ran `source scripts/zfpcmds`) 18 | [Option 1]: `zfp ` 19 | * The use of our container is invisible to the user. 20 | 21 | [Option 2]: `zfptest ` 22 | * This will drop user inside the Docker container and give user the ability to inspect or debug any potential errors when running the tool. 23 | * To obfuscate from inside the container: `python3 zfp.py /tmp/`. 24 | 25 | [Option 3]: `python3 zfp.py ` 26 | * This option allows user to use this tool without leveraging our container given that Frama-C, Rosette, jq, and Python version 3.10 or greater are installed. 27 | 28 | For either one of the three options, the obfuscated binary will be placed in \. 29 | 30 | __NOTE__: Make sure the folder containing target source code has the following additional files: 31 | * `Makefile`: standard Makefile. The code will call `make` to compile the codebase after obfuscation. This is the default behavior but can be changed. [Here is a simple tutorial on how to write a Makefile](https://gist.github.com/yellowbyte/c23a6b25a4b3edf371777d21bd3dc7d0). 32 | * `GNUmakefile`: a Makefile with instructions on how to run Frama-C for the specified codebase. The number of value sets that can be inferred heavily depend on the settings in this file. [Here is a simple tutorial on how to write the Frama-C tailored GNUmakefile](docs/framac_setup.md). 33 | * Our dependence on Frama-C also means that our tool cannot obfuscate code that contains recursive calls, [as it is a limitation of Frama-C](https://www.frama-c.com/fc-plugins/eva.html). 34 | 35 | #### Settings 36 | The followings are settings user can change: 37 | * `--metadatadir `: filepath to directory where metadata will be stored. Default to /tmp. 38 | * `--delmetadata` or `--no-delmetadata`: decides whether to delete the metadata folder. Default to delete. 39 | * `--limits `: the max value set length to consider. Too small may lead to few synthesized opaque predicates. Too large may lead to crash. Default to 100000000. In our dataset, for example, "tweetnacl-usable" will fail with the default limits (change to 10000 instead). 40 | 41 | The option `-h` or `--help` will also give user information on the settings. 42 | 43 | #### Examples 44 | ```bash 45 | ##### [Option 1]: with `zfp` ##### 46 | # if you want to inspect metadata, use `zfptest` and the `--no-delmetadata` option 47 | yellowbyte:~/zero-footprint-opaque-predicates$ source scripts/zfpcmds 48 | yellowbyte:~/zero-footprint-opaque-predicates$ zfp --limits 30000 ./dataset/01_simple_if 49 | ... 50 | 51 | ##### [Option 2]: with `zfptest` ##### 52 | yellowbyte:~/zero-footprint-opaque-predicates$ source scripts/zfpcmds 53 | yellowbyte:~/zero-footprint-opaque-predicates$ zfptest ./dataset/01_simple_if 54 | # inside Docker container 55 | root@dfe5e978cd2b:/zfp# python3 zfp.py --no-delmetadata /tmp/01_simple_if 56 | ... 57 | root@dfe5e978cd2b:/zfp# exit 58 | # zfpstop to stop and remove container 59 | yellowbyte:~/zero-footprint-opaque-predicates$ zfpstop 60 | 61 | ##### [Option 3]: running without using our container ##### 62 | yellowbyte:~/zero-footprint-opaque-predicates$ python3 zfp.py --no-delmetadata ./dataset/01_simple_if 63 | ... 64 | ``` 65 | -------------------------------------------------------------------------------- /scripts/evalcmds: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | # perform the obfuscation 5 | zfp_main() 6 | { 7 | for filepath in ${1}/*/; do 8 | echo ${filepath} 9 | zfp ${filepath} 10 | done 11 | } 12 | 13 | # find all the inserted OP from the obfuscation (non-executable code) 14 | zfp_identify() 15 | { 16 | for filepath in ${1}/*/; do 17 | echo ${filepath} 18 | exe=$(find ${filepath} -executable -type f ! -name "*.orig" ! -name "*.sh") # return relative path 19 | python3 scripts/identify_zfp.py ${exe} > ${filepath}op_locations.zfp 20 | done 21 | } 22 | 23 | # create dir for deobfuscation and binary vsa eval 24 | zfp_evaldir() 25 | { 26 | for filepath in ${1}/*/; do 27 | echo ${filepath} 28 | mkdir ${filepath}/deobf 29 | mkdir ${filepath}/vsa 30 | done 31 | } 32 | 33 | ############################################################################################################################################ 34 | # deobfuscation with OpaquePredicatePatcher (binja) 35 | zfp_opaquepredicatepatcher() 36 | { 37 | for filepath in ${1}/*/; do 38 | echo ${filepath} 39 | exe=$(find ${filepath} -executable -type f ! -name "*.orig" ! -name "*.sh") # return relative path 40 | python3 -mOpaquePredicatePatcher.__main__ ${exe} | grep -Ev "^(Analysis|start)" > ${filepath}/deobf/opaquepredicatepatcher.zfp 41 | done 42 | } 43 | 44 | # deobfuscation with BINSEC 45 | zfp_binsec() 46 | { 47 | # NOTE: you may have to change the BINSEC docker image version based on which one is installed on your machine 48 | # binsec-0.3 is the newest version at the time of experiment 49 | touch tmp.zfp 50 | docker run -t -d -v $(pwd)/dataset:/home/binsec/dataset --name binsec binsec/binsec-0.3:v1 51 | BINSEC_ID=$(docker ps -qf "name=^binsec$") 52 | for filepath in ${1}/*/; do 53 | echo ${filepath} 54 | exe=$(find ${filepath} -executable -type f ! -name "*.orig" ! -name "*.sh") # return relative path 55 | docker exec -it $BINSEC_ID binsec -bw -bw-opaque -bw-k 16 ${exe} > tmp.zfp 56 | cat tmp.zfp | grep "is opaque" | awk '{ print $6 }' > ${filepath}deobf/binsec.zfp 57 | docker exec -it $BINSEC_ID binsec -bw -bw-opaque -bw-k 12 ${exe} > tmp.zfp 58 | cat tmp.zfp | grep "is opaque" | awk '{ print $6 }' > ${filepath}deobf/binsec-12.zfp 59 | docker exec -it $BINSEC_ID binsec -bw -bw-opaque -bw-k 20 ${exe} > tmp.zfp 60 | cat tmp.zfp | grep "is opaque" | awk '{ print $6 }' > ${filepath}deobf/binsec-20.zfp 61 | docker exec -it $BINSEC_ID binsec -bw -bw-opaque -bw-k 24 ${exe} > tmp.zfp 62 | cat tmp.zfp | grep "is opaque" | awk '{ print $6 }' > ${filepath}deobf/binsec-24.zfp 63 | 64 | done 65 | docker rm -f $BINSEC_ID 66 | rm tmp.zfp 67 | } 68 | 69 | # get eval for how well the tool (e.g., opaquepredicatepatcher, binsec) deobfuscation performs 70 | # Arg 2 (Type): 71 | # * opaquepredicatepatcher 72 | # * binsec 73 | zfp_eval() 74 | { 75 | for filepath in ${1}/*/; do 76 | echo ${filepath} 77 | python scripts/perform_eval.py ${filepath}/op_locations.zfp ${filepath}deobf/${2}.zfp > ${filepath}deobf/eval_${2}.zfp 78 | done 79 | } 80 | 81 | # get eval for how well the binary-level abstract interpretation performs 82 | # Arg 2 (Type): 83 | # * binja 84 | zfp_eval2() 85 | { 86 | for filepath in ${1}/*/; do 87 | echo ${filepath} 88 | python scripts/perform_eval.py ${filepath}/op_locations.zfp ${filepath}vsa/${2}.zfp > ${filepath}vsa/eval_${2}.zfp 89 | done 90 | } 91 | ############################################################################################################################################ 92 | 93 | # produce summary on benchmark in one file 94 | # Arg 2 (Type): 95 | # * op_locations.zfp 96 | # * time.zfp 97 | # * deobf/opaquepredicatepatcher.zfp 98 | # * deobf/eval_opaquepredicatepatcher.zfp 99 | # * deobf/binsec.zfp 100 | # * deobf/eval_binsec.zfp 101 | # * vsa/binja.zfp 102 | # * vsa/eval_binja.zfp 103 | zfp_summary() 104 | { 105 | summary=$(basename ${2}) 106 | rm ${summary} 107 | touch ${summary} 108 | for filepath in ${1}/*/; do 109 | echo "############################################################" >> ${summary} 110 | echo ${filepath}${2} >> ${summary} 111 | echo "############################################################" >> ${summary} 112 | cat ${filepath}/${2} >> ${summary} 113 | done 114 | } 115 | 116 | # compile the original binary 117 | zfp_ogcreate() 118 | { 119 | for filepath in ${1}/*/; do 120 | echo ${filepath} 121 | pushd ${filepath} 122 | make -f MakefileOrig 123 | popd 124 | done 125 | } 126 | 127 | 128 | -------------------------------------------------------------------------------- /core/utilities/__init__.py: -------------------------------------------------------------------------------- 1 | """Contains general utilities functions and global configs.""" 2 | import logging 3 | import argparse 4 | import subprocess # noqa 5 | 6 | from .framac_engineer import * # noqa 7 | 8 | # Configurations 9 | ###################### 10 | __CONFIGS = { 11 | 'metadata_dir': '/tmp', # noqa 12 | 13 | 'delete_metadata': True, 14 | 15 | 'srcfolder': None, 16 | 17 | # Obfuscation for the inserted opaque predicates 18 | # We purposely make it deterministic so we can detect our 19 | # opaque predicates for evaluation with other deobfuscation tools. 20 | # Also in practice, the obfuscation shouldn't always be the same sequence. 21 | # Or else it can be easily detected from the obfuscation. 22 | 'obfuscation': '__asm__ __volatile__(\"xor %eax, %eax;xor %esp, %esp;xor %ebp, %ebp; add %eax, %esp;\");', # noqa 23 | 24 | # NOTE: python can only hold so many values in-memory. 25 | # Higher "value_set_limit" allows you to possibly generate 26 | # more opaque predicates but will also slow down program or 27 | # worst-case, prematurely terminate it. 28 | 'value_set_limit': 100000000, # we found this value to work well for our 29 | # benchmark. Can choose a larger value. 30 | # However, if program terminates prematurely, choose a smaller value (10000) 31 | 32 | # Specific to running Frama-C 33 | 'framac_macro': 'Frama_C_show_each_', 34 | 'framac_vars': ['__retres', 'Frama_C_entropy_source', 35 | '__fc_', '__realloc_', '__malloc_'], 36 | # stubs to analyze frama-c 37 | 'ignored_files': ['fc_stubs.c', 'fc_stubs.h'], 38 | # function specific to the frama-c value analysis 39 | 'ignored_functions': ['eva_main'], 40 | } 41 | 42 | 43 | def parse_args(): 44 | """ 45 | """ 46 | parser = argparse.ArgumentParser() 47 | parser.add_argument('-m', 48 | '--metadatadir', 49 | type=str, 50 | required=False, 51 | help=('path to directory where metadata will be stored.' 52 | 'Default to /tmp')) 53 | parser.add_argument('-l', 54 | '--limits', 55 | type=int, 56 | required=False, 57 | help=('the max value set length to consider.' 58 | 'Too small may lead to few synthesized opaque' 59 | 'predicates. Too large may lead to crash.' 60 | 'Default to 100000000')) 61 | parser.add_argument('srcfolder', 62 | type=str, 63 | help='folder containing source code to obfuscate') 64 | parser.add_argument('--delmetadata', 65 | action=argparse.BooleanOptionalAction, 66 | required=False, 67 | help=('set to either True or False. Decides whether to' 68 | 'delete the metadata folder. Default to True')) 69 | args = parser.parse_args() 70 | # Set configurations 71 | set_configs(args) 72 | 73 | 74 | def get_configs(): 75 | """ 76 | """ 77 | global __CONFIGS 78 | return __CONFIGS 79 | 80 | 81 | def set_configs(args): 82 | """Set `configs` based on commandline arguments `args`. 83 | 84 | Args: 85 | args: commandline arguments 86 | 87 | Return: 88 | None 89 | """ 90 | global __CONFIGS 91 | 92 | __CONFIGS['srcfolder'] = args.srcfolder 93 | 94 | if args.metadatadir: 95 | __CONFIGS['metadata_dir'] = args.metadatadir 96 | if args.delmetadata is False: 97 | # by default True 98 | __CONFIGS['delete_metadata'] = False 99 | if args.limits: 100 | __CONFIGS['value_set_limit'] = args.limits 101 | 102 | 103 | def shell_exec(cmd): 104 | """Run `cmd` in shell. 105 | 106 | Args: 107 | cmd: command to run by the shell 108 | 109 | Returns: 110 | output from running cmd in the shell 111 | """ 112 | logging.debug(cmd) 113 | # Capture_output argument is only available in Python >= 3.7 114 | result = subprocess.run(cmd.split(), capture_output=True) # noqa 115 | logging.debug('SHELL_EXEC: '+result.stdout.decode('utf-8').rstrip('\n')) 116 | return result.stdout.decode('utf-8').rstrip('\n') 117 | 118 | 119 | def get_file_content(filepath, return_type='list'): 120 | """Bring content of file at `filepath` to memory. 121 | 122 | Args: 123 | filepath: file to read content of 124 | return_type: data structure to represent content in 125 | """ 126 | with open(filepath, 'r') as f: 127 | if return_type == 'list': 128 | content = [line.rstrip('\n') for line in f.readlines()] 129 | else: 130 | content = f.read() 131 | return content 132 | -------------------------------------------------------------------------------- /scripts/BinValueSetEval.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import pdb 4 | 5 | from pprint import pprint 6 | from binaryninja import * 7 | 8 | 9 | COMPARE = { 10 | "MLIL_CMP_E":" == ", 11 | "MLIL_CMP_NE":" != ", 12 | "MLIL_CMP_SLT":" < ", 13 | "MLIL_CMP_ULT":" < ", 14 | "MLIL_CMP_SLE":" <= ", 15 | "MLIL_CMP_ULE":" <= ", 16 | "MLIL_CMP_SGE":" >= ", 17 | "MLIL_CMP_UGE":" >= ", 18 | "MLIL_CMP_SGT":" > ", 19 | "MLIL_CMP_UGT":" > " 20 | } 21 | 22 | def eval_all_same(value_set, cond): 23 | result = eval('['+cond+' for x in value_set]') 24 | return result 25 | 26 | def unsigned2signed(num): 27 | if(num & 0x80000000): 28 | num = -0x100000000 + num 29 | return num 30 | 31 | def main(filepath, op_locations, outfile): 32 | """ 33 | """ 34 | bv = BinaryViewType.get_view_of_file(filepath) 35 | if bv is None: 36 | print("Couldn't open {}".format(filepath)) 37 | sys.exit() 38 | 39 | op_bbs = list() 40 | # NOTE: op_locations not used. For debugging 41 | op_locations = [int(o.rstrip("\n").rstrip("L"),16) for o in op_locations] 42 | # pprint([hex(a) for a in op_locations]) 43 | for func in bv.functions: 44 | for bb in func.mlil.ssa_form: 45 | # bb ends with OP 46 | if_cond = bb[-1] # the if condition (opaque predicate) instruction. 47 | # Always the last instruction of the bb 48 | addr = hex(bb.get_disassembly_text()[-1].address) 49 | 50 | if not hasattr(if_cond, 'condition'): 51 | # already seen. MLIL SSA splits instruction into multiple instructions 52 | # so the if statment can be split into multiple instructions 53 | continue 54 | # const value 55 | if if_cond.condition.possible_values.type == RegisterValueType.ConstantValue: 56 | # just const value 57 | # detection successful in this case since value set is of size 1 58 | outfile.write(addr+'\n') 59 | continue 60 | 61 | # compare register or memory with a constant 62 | # in MLIL, stack variable is now a variable 63 | if len(if_cond.vars_read) == 1 and not hasattr(if_cond.condition, 'left'): 64 | # cond variable 65 | actual_cond = func.mlil.ssa_form.get_ssa_var_definition(if_cond.vars_read[0]).src 66 | else: 67 | actual_cond = if_cond.condition 68 | 69 | if not hasattr(actual_cond, 'left'): 70 | if not hasattr(actual_cond, 'src'): 71 | # an unimplemented instruction 72 | continue 73 | # a cond:# or c:# object. Get definition to retrieve condition 74 | actual_cond = func.mlil.ssa_form.get_ssa_var_definition(actual_cond.src).src 75 | if str(actual_cond.left.possible_values) == '': 76 | # value cannot be determined 77 | continue 78 | if str(actual_cond.right.value) == '': 79 | # not comparing with a constant 80 | continue 81 | # recreate the opaque predicate as a string 82 | const = actual_cond.right.value.value 83 | operation = COMPARE[actual_cond.operation.name] 84 | op_str = "x"+operation+str(unsigned2signed(const)) 85 | # retrieve the value set 86 | if hasattr(actual_cond.left.possible_values, 'value'): 87 | possiblevalueset = {actual_cond.left.possible_values.value} 88 | elif hasattr(actual_cond.left.possible_values, 'values'): 89 | possiblevalueset = actual_cond.left.possible_values.values 90 | else: 91 | # the following possible values are in ranges 92 | tmp = [] 93 | for _range in actual_cond.left.possible_values.ranges: 94 | if len(range(_range.start,_range.end+1,_range.step)) >= 100000000: 95 | # need a bound or else if value set too large Python will crash (intractable). 96 | # the value is the same as the value set bound in our obfuscation pipeline 97 | continue 98 | for i in range(_range.start,_range.end+1,_range.step): 99 | tmp.append(i) 100 | possiblevalueset = set(tmp) 101 | # evaluate the value set 102 | eval_result = eval_all_same(possiblevalueset, op_str) 103 | if (False in eval_result) and (True in eval_result): 104 | # not all elements in value set evaluated to the same value 105 | continue 106 | else: 107 | # binja correctly identify 108 | outfile.write(hex(bb.get_disassembly_text()[-1].address)+'\n') 109 | 110 | 111 | if __name__ == "__main__": 112 | filepath = sys.argv[1] # give path to the binary 113 | proj_dir = os.path.dirname(filepath) # get path to folder containing the binary 114 | with open(os.path.join(proj_dir, "op_locations.zfp"), "r") as f: 115 | op_locations = f.readlines() 116 | 117 | # output to binja.zfp the opaque predicates that it can correctly identify 118 | with open(os.path.join(proj_dir, "vsa", "binja.zfp"), "w") as outfile: 119 | main(filepath, op_locations, outfile) 120 | -------------------------------------------------------------------------------- /tools/frama-c/prettyvsa.ml: -------------------------------------------------------------------------------- 1 | open Cil_types 2 | 3 | 4 | (* Return true if [s2] is a substring of [s1]. Else, return false *) 5 | let contains s1 s2 = 6 | let re = Str.regexp_string s2 7 | in 8 | try ignore (Str.search_forward re s1 0); true 9 | with Not_found -> false 10 | 11 | 12 | (* Prints the value associated to a pointer *) 13 | let pretty_lval fmt stmt lval vi = 14 | let kinstr = Kstmt stmt in (* Make a kinstr from a stmt *) 15 | let loc = (* Make a location from a kinstr + an lval *) 16 | !Db.Value.lval_to_loc kinstr ~with_alarms:CilE.warn_none_mode lval 17 | in 18 | match vi.vtype with 19 | | TInt (ikind,_) -> begin 20 | match ikind with 21 | | IUInt | IUShort | IUChar | IULong | IULongLong -> 22 | (* Ignore unsigned variable since it messes with the opaque predicate tool *) 23 | () 24 | | _ -> begin 25 | Db.Value.fold_state_callstack 26 | (fun state () -> 27 | (* For each state in the callstack *) 28 | let value = Db.Value.find state loc in (* obtain value for location *) 29 | (* Print variable name and corresponding value set *) 30 | Format.fprintf fmt "\"%a:%a\",@." Printer.pp_lval lval 31 | Locations.Location_Bytes.pretty value (* print mapping *) 32 | ) () ~after:true kinstr; (* Get value set after execution of statement *) 33 | end 34 | end 35 | | _ -> begin 36 | Db.Value.fold_state_callstack 37 | (fun state () -> 38 | (* For each state in the callstack *) 39 | let value = Db.Value.find state loc in (* obtain value for location *) 40 | (* Print variable name and corresponding value set *) 41 | Format.fprintf fmt "\"%a:%a\",@." Printer.pp_lval lval 42 | Locations.Location_Bytes.pretty value (* print mapping *) 43 | ) () ~after:true kinstr; (* Get value set after execution of statement *) 44 | end 45 | 46 | 47 | (* Prints the value associated to variable [vi] before [stmt]. *) 48 | let pretty_vi fmt stmt vi = 49 | let kinstr = Kstmt stmt in (* Make a kinstr from a stmt *) 50 | let lval = (Var vi, NoOffset) in (* Make an lval from a varinfo *) 51 | let loc = (* Make a location from a kinstr + an lval *) 52 | !Db.Value.lval_to_loc kinstr ~with_alarms:CilE.warn_none_mode lval 53 | in 54 | match vi.vtype with 55 | | TInt (ikind,_) -> begin 56 | match ikind with 57 | | IUInt | IUShort | IUChar | IULong | IULongLong -> 58 | (* Ignore unsigned variable since it messes with the opaque predicate tool *) 59 | () 60 | | _ -> begin 61 | Db.Value.fold_state_callstack 62 | (fun state () -> 63 | (* For each state in the callstack *) 64 | let value = Db.Value.find state loc in (* obtain value for location *) 65 | (* Print variable name and corresponding value set *) 66 | Format.fprintf fmt "\"%a:%a\",@." Printer.pp_varinfo vi 67 | Locations.Location_Bytes.pretty value (* print mapping *) 68 | ) () ~after:true kinstr; (* Get value set after execution of statement *) 69 | end 70 | end 71 | | _ -> begin 72 | Db.Value.fold_state_callstack 73 | (fun state () -> 74 | (* For each state in the callstack *) 75 | let value = Db.Value.find state loc in (* obtain value for location *) 76 | (* Print variable name and corresponding value set *) 77 | Format.fprintf fmt "\"%a:%a\",@." Printer.pp_varinfo vi 78 | Locations.Location_Bytes.pretty value (* print mapping *) 79 | ) () ~after:true kinstr; (* Get value set after execution of statement *) 80 | end 81 | 82 | 83 | (* Prints the state at statement [stmt] for each local variable in [kf], 84 | and for each global variable. *) 85 | let pretty_local_and_global_vars kf fmt stmt = 86 | (* Handles local variables *) 87 | let locals = Kernel_function.get_locals kf in 88 | print_endline "END_OF_METADATA"; 89 | print_endline "["; 90 | List.iter (fun vi -> 91 | if Cil.isPointerType vi.vtype then 92 | (* Variable is a pointer. Print it as such so user knows *) 93 | let lval = (Mem (Cil.evar vi), NoOffset) in 94 | pretty_lval fmt stmt lval vi 95 | else if Cil.isArrayType vi.vtype then 96 | (* Defined array. Skip *) 97 | () 98 | else 99 | (* Normal variable *) 100 | pretty_vi fmt stmt vi) locals; 101 | 102 | (* Handles global variables *) 103 | Globals.Vars.iter (fun vi ii -> 104 | let s = Format.asprintf "%a" Printer.pp_location vi.vdecl in 105 | if (contains s "root/.opam/default/") == false && (contains s "FRAMAC_SHARE") == false then 106 | (* Filter out internal Frama-C variables, which has paths that start with root/.opam/default/ or :0 *) 107 | if Cil.isPointerType vi.vtype then 108 | (* Variable is a pointer. Print it as such so user knows *) 109 | let lval = (Mem (Cil.evar vi), NoOffset) in 110 | pretty_lval fmt stmt lval vi 111 | else if Cil.isArrayType vi.vtype then 112 | (* Defined array. Skip *) 113 | () 114 | else 115 | (* Normal variable *) 116 | pretty_vi fmt stmt vi 117 | ); 118 | print_endline "]"; 119 | print_string "----------"; 120 | 121 | 122 | (* Visits each statement in function [kf] and prints the result of Value before the statement. *) 123 | (* kf represents a function *) 124 | class stmt_val_visitor kf = 125 | object (self) 126 | inherit Visitor.frama_c_inplace 127 | method! vstmt_aux stmt = 128 | (match stmt.skind with 129 | | Instr _ -> 130 | Format.printf "current instruction: %a@." 131 | Printer.pp_stmt stmt; (* Code at current line *) 132 | (* Format: filename and line number, variable name, value-sets *) 133 | Format.printf "current line: %a@.%a@." 134 | Printer.pp_location (Cil_datatype.Stmt.loc stmt) 135 | (* Printer.pp_stmt stmt *) (* Code at current line *) 136 | (* Function call to the rest of the code to get value sets *) 137 | (pretty_local_and_global_vars kf) stmt; 138 | | _ -> ()); 139 | Cil.DoChildren 140 | end 141 | 142 | 143 | (* Start *) 144 | (* EX: frama-c -eva -eva-slevel 100 -eva-warn-key alarm=inactive -eva-auto-loop-unroll 300 -load-script /prettyvsa.ml 2048.c *) 145 | let () = 146 | Db.Main.extend (fun () -> 147 | Format.printf "START PRETTY VSA (ZFP) @."; 148 | !Db.Value.compute (); 149 | Globals.Functions.iter 150 | (fun kf -> 151 | let s = Format.asprintf "%a" Printer.pp_location (Kernel_function.get_location kf) in 152 | if (contains s "root/.opam/default/") == false && (contains s "FRAMAC_SHARE") == false then 153 | (* Filter functions that are not present in original source code *) 154 | let kf_vis = new stmt_val_visitor in 155 | let fundec = Kernel_function.get_definition kf in 156 | ignore (Visitor.visitFramacFunction (kf_vis kf) fundec); 157 | ); 158 | print_string "FUNCTIONS IN SOURCE (ZFP)\n"; 159 | Globals.Functions.iter 160 | (fun kf -> 161 | let s = Format.asprintf "%a" Printer.pp_location (Kernel_function.get_location kf) in 162 | if (contains s "root/.opam/default/") == false && (contains s "FRAMAC_SHARE") == false then 163 | (* Filter functions that are not present in original source code *) 164 | let funcs_in_src = Format.asprintf "%s\n" (Kernel_function.get_name kf) in 165 | Format.printf "%s" funcs_in_src; 166 | )) 167 | -------------------------------------------------------------------------------- /core/utilities/framac_engineer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import re 4 | from collections import defaultdict 5 | 6 | 7 | def extract_vars(instr): 8 | """ 9 | Given a C `instr`, return a list containing all the variable names it uses 10 | """ 11 | return set(re.findall(r"[a-zA-Z]\w*", instr)) 12 | 13 | 14 | def extract_metadata(metadata): 15 | """ 16 | Given `metadata` produced from the custom Frama-C script, 17 | extract the line number and instruction information 18 | """ 19 | loc_index = metadata.find("current line: ") 20 | instr = metadata[len("current instruction: "):loc_index] 21 | loc = metadata[loc_index+len("current line: "):] 22 | return instr, loc 23 | 24 | 25 | def framac_output_split(framac_out, ignored_lines, params): 26 | """ 27 | First step in parsing Frama-C value analysis output 28 | """ 29 | pretty_value_sets = defaultdict(set) 30 | 31 | prettyvsa_curfuncs_output = framac_out.split("START PRETTY VSA (ZFP)")[-1] 32 | prettyvsa_output, curfuncs_output = prettyvsa_curfuncs_output.split( 33 | "FUNCTIONS IN SOURCE (ZFP)" 34 | ) 35 | curfuncs = curfuncs_output.split() 36 | # last item is not part of the result 37 | # EX: "\nmake: Leaving directory '/tmp/zfp-0.16860656542979857'" 38 | prettyvsa_output = prettyvsa_output.split("----------")[:-1] 39 | 40 | for value_sets_at_loc in prettyvsa_output: 41 | 42 | value_sets_nonewline = value_sets_at_loc.replace("\n", "") 43 | end_of_metadata_index = value_sets_nonewline.find("END_OF_METADATA") 44 | # metadata contains instruction, filename, and line number 45 | metadata = value_sets_nonewline[:end_of_metadata_index] 46 | # value set from Frama-C script print value set to stdout 47 | # in the format of a Python list 48 | try: 49 | # TODO: 50 | # may fail. Investigate more since my custom script 51 | # should print to stdout a formatted python list 52 | vs_index = end_of_metadata_index+len("END_OF_METADATA") 53 | value_sets = eval(value_sets_nonewline[vs_index:]) 54 | except: # noqa 55 | continue 56 | 57 | # list is empty (no value sets) 58 | if not value_sets: 59 | continue 60 | 61 | instr, loc = extract_metadata(metadata) 62 | var_names = extract_vars(instr) 63 | 64 | # current instruction is a oneliner 65 | # Oneliner (e.g., one-line if statement, one-line for-loop) 66 | # will mess up our tool 67 | # EX loc: 09_loop_for_complex.c:7 68 | # if int(loc.split(":")[1]) in ignored_lines: 69 | # continue 70 | if loc.split(":")[0] in ignored_lines.keys(): 71 | # current file contains lines that need to be ignored 72 | if loc.split(":")[1] in ignored_lines[loc.split(":")[0]]: 73 | # current line needs to be ignored 74 | continue 75 | 76 | # parse content of value_sets 77 | for vs in value_sets: 78 | # content of _set may also contain the char ":" 79 | var_name, _set = vs.split(":{{") 80 | _set = "{{" + _set 81 | is_bool = False 82 | 83 | # only want value sets of variable in current instruction 84 | if var_name not in var_names: 85 | continue 86 | if var_name == "__retres": 87 | # return value statement 88 | continue 89 | if "tmp" in var_name: 90 | # Frama-C internal variable to keep track of intermediate values 91 | # Variable does not exist in original source. Unless developer 92 | # names the variable to start with "tmp" or "tmp_" cannot 93 | # differentiate so ignore to be safe 94 | # EX: return value from rand() 95 | continue 96 | if any([fun for fun in curfuncs if var_name.startswith(fun+"_")]): 97 | # Boolean variable will be outputted by Frama-C 98 | # with its function name prepended 99 | # Boolean variable and the function name is separated by a '_' 100 | is_bool = True 101 | 102 | match _set.split(): # noqa 103 | case ["{{", "}}"]: 104 | continue 105 | case ["{{", "NULL", "->", "[--..--]", "}}"]: 106 | continue 107 | case ["{{", _, "->", *to_extract, "}}"]: 108 | to_extract = "".join(to_extract) 109 | 110 | # filter out floating point range 111 | match list(to_extract): 112 | case ["{", *_, ".", "}"] | ["[", *_, ".", "]"]: 113 | continue 114 | # filter out formats that do not give manageable intervals 115 | # documents for all possible value set formats can be 116 | # found in "3.1.3 Interpreting the variation domains" of the 117 | # Frama-C EVA manual 118 | if "inf" in to_extract: 119 | continue 120 | if "&" in to_extract: 121 | continue 122 | if "NaN" in to_extract: 123 | continue 124 | if "mix of " in to_extract: 125 | continue 126 | 127 | # Frama-C represents Boolean variable differently. 128 | # However, when inserted back into source, we need 129 | # the original variable name. So here we reconstruct the 130 | # original variable name 131 | if is_bool: 132 | underscore_index = var_name.find("_") 133 | var_name = var_name[underscore_index+1:] 134 | 135 | match list(to_extract): 136 | case ["{", *_, "}"]: # EX: {1; 2; 3} 137 | pretty_value_sets[loc+":"+var_name].update( 138 | eval(to_extract.replace(";", ",")) 139 | ) 140 | case ["[", *_range, "]"]: # EX: [1..256] 141 | _range = "".join(_range) 142 | start, end = _range.split("..") 143 | if int(end)-int(start) > params.value_set_limit: 144 | continue 145 | pretty_value_sets[loc+":"+var_name].update( 146 | list(range(int(start), int(end)+1)) 147 | ) 148 | case _ if "," in list(to_extract) and "%" in list(to_extract): # EX: [1..245],0%20 # noqa 149 | # range can be negative 150 | # numbers does not have to be single digit 151 | range_pattern = r'\[([0-9-]+)\.\.([0-9-]+)\],([0-9]+)%([0-9]+)' # noqa 152 | match = re.search(range_pattern, to_extract) 153 | start = match.group(1) 154 | end = match.group(2) 155 | remainder = match.group(3) 156 | divisor = match.group(4) 157 | 158 | if int(end)-int(start) > params.value_set_limit: 159 | continue 160 | pretty_value_sets[loc+":"+var_name].update( 161 | [i for i in list(range(int(start), int(end)+1)) 162 | if i % int(divisor) == int(remainder)] 163 | ) 164 | case _: 165 | continue 166 | case _: 167 | continue 168 | 169 | return pretty_value_sets 170 | -------------------------------------------------------------------------------- /core/__init__.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from time import perf_counter 5 | 6 | from collections import defaultdict, namedtuple 7 | 8 | from .utilities import (get_configs, framac_output_split, get_file_content, 9 | shell_exec, parse_args) 10 | 11 | 12 | class Zfp: 13 | """Class to construct zero footprint opaque predicates in src code.""" 14 | 15 | PARAMS_STRUCT = namedtuple('PARAMS_STRUCT', 16 | ('obfuscation framac_vars ignored_files ' 17 | 'ignored_functions value_set_limit')) 18 | 19 | def __init__(self, wdir, configs): 20 | """Init variables and perform obfuscation. 21 | 22 | Args: 23 | wdir: working directory 24 | 25 | Return: 26 | None 27 | """ 28 | self.wdir = wdir # working directory in /tmp folder 29 | self.ignored_lines = defaultdict(list) 30 | # Useful statistic 31 | self.failed_vsa2op = [] # numbers of unsat 32 | self.failed_vsa = [] # too simple. A value set of just 0 33 | self.framac_runtime = 0 34 | self.line_nums = 0 35 | self.op_nums = 0 36 | 37 | self.params = Zfp.PARAMS_STRUCT(configs['obfuscation'], 38 | configs['framac_vars'], 39 | configs['ignored_files'], 40 | configs['ignored_functions'], 41 | configs['value_set_limit']) 42 | 43 | # ~~~ main operations ~~~ 44 | # (0) Pre-Process or Remove Comments 45 | self._iterate_c_files(self._remove_comments) 46 | 47 | # (1) One-Liner Identification 48 | self._iterate_c_files(self._identify_oneliner) 49 | 50 | # (2) Perform Value Analysis 51 | # Run Frama-C to perform value analysis 52 | # Parse Frama-C's output to identify value set 53 | self.value_sets = self._get_value_sets() 54 | 55 | # (3) Perform Synthesis 56 | # Pass value set to Rosette to perform synthesis 57 | self.opaque_expressions = self._get_opaque_expressions() 58 | # Create opaque predicates from synthesized output 59 | self.opaque_predicates = self._get_opaque_predicates() 60 | 61 | # (4) Perform Injection 62 | # Perform opaque predicates injection 63 | self._perform_injection() 64 | # ~~~ main operations ~~~ 65 | 66 | @property 67 | def vsa_json(self): 68 | """Filepath to the value analysis output. 69 | 70 | Returns: 71 | filepath to store the vsa.json file 72 | """ 73 | return os.path.join(self.wdir, 'vsa.json') 74 | 75 | def _iterate_c_files(self, method, args=()): 76 | """Call `method` on each C file found. 77 | 78 | Args: 79 | method: function to call each c file on 80 | args: arguments to pass to method 81 | 82 | Return: 83 | None 84 | """ 85 | for root, _, files in os.walk(self.wdir): 86 | for filename in files: 87 | if not filename.endswith('.c'): 88 | continue 89 | if filename in self.params.ignored_files: 90 | continue 91 | # Get paht to each c source file 92 | filepath = os.path.join(root, filename) 93 | # Call method 94 | method(filepath, *args) 95 | 96 | def _remove_comments(self, filepath): 97 | """Comments might mess with later pipeline for identifying oneliner. 98 | 99 | Args: 100 | filepath: file to remove comments from 101 | 102 | Return: 103 | None 104 | """ 105 | cmd = 'gcc -fpreprocessed -dD -E -P ' + filepath 106 | cmtless_src = shell_exec(cmd) 107 | 108 | with open(filepath, 'w') as f: 109 | f.write(cmtless_src) 110 | 111 | def _identify_oneliner(self, filepath): 112 | """Identify oneliner since they are problematic for our pipeline. 113 | 114 | Args: 115 | filepath: file to identify oneliner code in 116 | 117 | Return: 118 | None 119 | """ 120 | oneliner_begins = ['for ', 'while ', 'if ', 'else if ', 'else '] 121 | 122 | src_code = get_file_content(filepath, return_type='list') 123 | for i, line in enumerate(src_code): 124 | oneliner_test = [line.find(o) for o in oneliner_begins] 125 | # get index to first character of the oneliner 126 | oneliner_index = next( 127 | (i for i, x in enumerate(oneliner_test) if x != -1), 128 | None, 129 | ) 130 | if oneliner_index is not None: 131 | # beginning of one-liner detected 132 | 133 | # identify opening brace "{" 134 | # If keywords such as else, for, if, else, else if, while, "}" 135 | # is detected before the opening brace, then it is a one-liner 136 | 137 | potential_oneliner_start = line[oneliner_index:] 138 | 139 | # last line of source code 140 | if i == len(src_code)-1: 141 | if '{' not in potential_oneliner_start: 142 | basename = os.path.basename(filepath) 143 | self.ignored_lines[basename].append(i+1) 144 | break 145 | 146 | # not last line of source code 147 | lines_of_interest = src_code[i:] 148 | # start search from where oneliner begins 149 | lines_of_interest[0] = potential_oneliner_start 150 | 151 | oneliner_loc = 0 152 | for ii, l in enumerate(lines_of_interest): 153 | openbrace_test = l.find('{') 154 | oneliner_find = l.find(';') 155 | if oneliner_find != -1: 156 | # first statement since oneliner start 157 | # +1 to account for counting from 0 158 | oneliner_loc = i+ii+1 159 | 160 | # filter keywords_test of -1 since that is when find() fails 161 | keywords = filter( 162 | lambda x: x != -1, 163 | [l.find(o) for o in oneliner_begins+['}']], 164 | ) 165 | 166 | if not keywords: 167 | # empty list 168 | # no keyword found 169 | if openbrace_test != -1: 170 | # not a oneliner! 171 | break 172 | else: 173 | if openbrace_test == -1: 174 | # found a keyword before "{". A oneliner 175 | (self.ignored_lines[os.path.basename(filepath)] 176 | .append(oneliner_loc)) 177 | break 178 | 179 | # a keyword and "{" are both found on current line 180 | # not a oneliner! EX: } else { 181 | break 182 | 183 | # end of loop reached if a keyword and "{" is not found 184 | 185 | def _perform_injection(self): 186 | """Insert synthesized opaque predicates back to source.""" 187 | for filepath in self.opaque_predicates.keys(): 188 | # Each `filepath` is a relative path to a C source file 189 | src_code = get_file_content(os.path.join(self.wdir, filepath), 190 | return_type='list') 191 | for line_number, ops in self.opaque_predicates[filepath].items(): 192 | if not src_code[int(line_number)-1].isspace(): 193 | # make sure to insert at end of an instruction 194 | # signify by ';', '}', '{' 195 | if src_code[int(line_number)-1].rstrip()[-1] not in \ 196 | (';', '}', '{'): 197 | continue 198 | 199 | for op in ops: 200 | self.op_nums += 1 201 | src_code[int(line_number)-1] += op 202 | with open(os.path.join(self.wdir, filepath), 'w') as f: 203 | # Write back the obfuscated C source file 204 | f.write('\n'.join(src_code)) 205 | 206 | def _get_opaque_expressions(self): 207 | """Perform synthesis to get the opaque expressions (construction). 208 | 209 | Returns: 210 | opaque expressions (ex: y > 10) 211 | """ 212 | # Run Rosette 213 | cmd = 'tools/rosette/perform_synthesis.sh '+self.vsa_json 214 | opaque_expressions = shell_exec(cmd) 215 | return opaque_expressions 216 | 217 | def _get_opaque_predicates(self): 218 | """Create opaque predicates (construction+obfuscation). 219 | 220 | Returns: 221 | fully constructed opaque predicates 222 | """ 223 | # Format synthesized outputs to prime them for injection 224 | opaque_predicates = {} 225 | opaque_expressions = self.opaque_expressions.split('\n') 226 | index = 0 227 | for expression in opaque_expressions: 228 | # t/f 229 | # = :: 230 | label = 'label'+str(index) 231 | try: 232 | opaqueness, key, comparator, constant = expression.split(' ') 233 | filepath, loc, var = key.split(':') 234 | except: # noqa 235 | # Possible faulty expression 236 | # or value sets that didn't make it as opaque predicates (unsat) 237 | # EX: f /tmp/x.c:1722:param{.curve_param; .null_param} != 0 238 | self.failed_vsa2op.append(expression) 239 | continue 240 | 241 | if filepath not in opaque_predicates: 242 | opaque_predicates[filepath] = defaultdict(list) 243 | 244 | content = opaque_predicates[filepath] 245 | 246 | if opaqueness == 't': 247 | content[loc].append(('if('+var+' '+comparator+' '+constant 248 | + '){goto '+label+';}' 249 | + self.params.obfuscation.format(index) 250 | + label+':')) 251 | elif opaqueness == 'f': 252 | content[loc].append(('if('+var+' '+comparator+' '+constant+'){' 253 | + self.params.obfuscation.format(index)+'}')) 254 | index += 1 255 | 256 | return opaque_predicates 257 | 258 | def _get_value_sets(self): 259 | """Parse, beautify, and save Frama-C's value analysis result as JSON. 260 | 261 | Returns: 262 | value sets from Frama-C 263 | """ 264 | value_sets = framac_output_split(self._run_framac(), 265 | self.ignored_lines, 266 | self.params) 267 | 268 | # Save value_sets result (dictionary) as json 269 | # object of type set is not JSON serializable 270 | # the JSON file is for Rosette 271 | value_sets = {k: list(v) for k, v in value_sets.items()} 272 | with open(self.vsa_json, 'w') as f: 273 | json.dump(value_sets, f) 274 | return value_sets 275 | 276 | def _run_framac(self): 277 | """Perform Value Analysis with Frama-C. 278 | 279 | Returns: 280 | stdout from Frama-C (in str) 281 | """ 282 | # call Frama-C (i.e., content of GNUmakefile) 283 | cmd = 'make -C '+self.wdir 284 | time_before = perf_counter() 285 | framac_raw_output = shell_exec(cmd) 286 | time_after = perf_counter() 287 | self.framac_runtime = time_after-time_before 288 | return framac_raw_output 289 | --------------------------------------------------------------------------------