├── Makefile ├── README.md ├── dynamic_to_static.sh ├── fix_got_plt.py ├── test-clean.sh ├── test.sh └── tests ├── hello.c └── loop.c /Makefile: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # execute the whole process from compilation 4 | # 5 | # Antonio Barbalace, Stevens 2019 6 | 7 | INPUT_FILE="loop.c" 8 | # TODO check file extension 9 | PATH_FILE=${INPUT_FILE%.*} 10 | 11 | 12 | 13 | Configure your compiler 14 | first 15 | support both clang and musl -libc 16 | 17 | 18 | 19 | 20 | #compile first 21 | ${COMPILER} -o ${PATH_FILE}.${ARCH} ${INPUT_FILE} 22 | # TODO check if error 23 | 24 | 25 | 26 | 27 | TODO this should be a makefile 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Enables the conversion of a dynamically linked binary to a statically linked binary by removing symbols from the GOT. It requires remill/mcsema, available at https://github.com/systems-nuts/mcsema, to lift the dynamically linked binary to LLVM IR. 2 | 3 | Dependencies: IDA Pro or binary Ninja, llvm, clang, python, binutils, qemu 4 | 5 | It supports binaries compiled with clang and musl-gcc. 6 | 7 | Just run dynamic_to_static.sh with the path to the binary as an argument. 8 | 9 | To see a fully working example just run test.sh after installing the dependencies, and change to the tests/ directory to see the results. (The test.sh script will automatically run the staically compiled binarie(s)) 10 | -------------------------------------------------------------------------------- /dynamic_to_static.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # simple script to process from dyamically linked binary to statically linked 4 | # binary, it supports clang, gcc, and musl-gcc 5 | # 6 | # Antonio Barbalace, Stevens 2019 7 | 8 | # sourceCompiler can be automatically identified by this script 9 | # targetTriple to which the code must compiled 10 | # targetCompiler for linking (the LLVM IR is compiled by clang) 11 | # targetCompilerFlags used for compilation and linking 12 | 13 | #remill/mcsema parameters 14 | DISASSEMBLER=~/ida-7.2/idat64 15 | MCSEMA_DIS="mcsema-disass" 16 | MCSEMA_LIFT="mcsema-lift-4.0" 17 | 18 | arch_name() 19 | { 20 | ARCH_NAME_ARG1=$1 21 | #extract homogeneous names for architectures 22 | case "$ARCH_NAME_ARG1" in 23 | arm*) ARCH_NAME="arm" ;; 24 | aarch64*) ARCH_NAME="aarch64" ;; 25 | i?86*) ARCH_NAME="i386" ;; 26 | x86-64*|x86_64*|amd64*) ARCH_NAME="amd64" ;; 27 | unknown) echo "target selection ERROR $ARCH_NAME_ARG1" ; exit 1 ; ;; 28 | *) echo "unknown or unsupported target ERROR $ARCH_NAME_ARG1" ; exit 1 ; ;; 29 | esac 30 | } 31 | 32 | #the compiler for the static linking 33 | DEFAULT_COMPILER="clang" 34 | #the triple for the static linking 35 | DEFAULT_TARGET_TRIPLE=`clang -dumpmachine` 36 | 37 | ############################################################################### 38 | # main 39 | ############################################################################### 40 | 41 | usage() 42 | { 43 | echo "Usage: $0 [-C sourceCompiler] [-t targetTriple] [-c targetCompiler] [-f targetCompilerFlags] binary" 44 | exit 0 45 | } 46 | 47 | #check the number of arguments, at least one 48 | [ $# -le 0 ] && usage 49 | 50 | #parse command line arguments 51 | while getopts "ht:C:c:f:" OPT ; do 52 | case ${OPT} in 53 | t) TARGET_TRIPLE=$OPTARG ;; 54 | C) SRC_COMPILER=$OPTARG ;; 55 | c) TARGET_COMPILER=$OPTARG ;; 56 | f) TARGET_FLAGS=$OPTARG ;; 57 | *) usage ;; 58 | esac 59 | done 60 | shift "$((OPTIND -1))" 61 | INPUT_FILE="$@" 62 | 63 | #check if the file exists 64 | if [ ! -f "$INPUT_FILE" ] ; then 65 | echo "file ERROR: ${INPUT_FILE} doesn't exist" 66 | exit 1 67 | fi 68 | 69 | #detect input file architecture from the binary, and create a copy 70 | ARCH=`file $INPUT_FILE` 71 | ARCH=${ARCH#*,} 72 | ARCH=${ARCH%%,*} 73 | if [ -z "$ARCH" ] ; then 74 | echo "arch ERROR: ${INPUT_FILE} cannot identify architecture" 75 | exit 1 76 | fi 77 | arch_name ${ARCH} 78 | ARCH=${ARCH_NAME} 79 | PATH_FILE=${INPUT_FILE}.${ARCH} 80 | cp -a ${INPUT_FILE} ${PATH_FILE} 81 | 82 | #check if the input file is a dynamically linked binary 83 | if [ -z "`file ${PATH_FILE} | grep "dynamically linked"`" ] ; then 84 | echo "file ERROR: ${PATH_FILE} is not a dynamically linked binary" 85 | exit 1 86 | fi 87 | 88 | #heuristic to identify the compiler 89 | COMPILERS="clang GCC" 90 | LIBRARIES="musl" 91 | for COMP in $COMPILERS ; do 92 | if [ ! -z "`readelf -p .comment ${PATH_FILE} | grep ${COMP}`" ] ; then 93 | AUTO_COMP=${COMP} 94 | for LIB in $LIBRARIES ; do 95 | if [ ! -z "`grep ${LIB} ${PATH_FILE}`" ] ; then 96 | AUTO_COMP=${LIB}-${AUTO_COMP} 97 | fi 98 | done 99 | break 100 | fi 101 | done 102 | AUTO_COMP=`echo $AUTO_COMP | tr '[:upper:]' '[:lower:]'` 103 | #if heuristic search failed use default compiler 104 | if [ -z "${AUTO_COMP}" ] ; then 105 | AUTO_COMP=$DEFAULT_COMPILER 106 | echo "Default src compiler: ${AUTO_COMP}" #cannot detect compiler 107 | else 108 | echo "Detected src compiler: ${AUTO_COMP}" 109 | fi 110 | #check if the user declared what compiler he used 111 | if [ -z "${SRC_COMPILER}" ] || [ ! -f "`which ${SRC_COMPILER}`" ] ; then 112 | SRC_COMPILER=${AUTO_COMP} 113 | fi 114 | 115 | #check target triple 116 | if [ -z "${TARGET_TRIPLE}" ] ; then 117 | TARGET_TRIPLE=${DEFAULT_TARGET_TRIPLE} 118 | fi 119 | arch_name $TARGET_TRIPLE 120 | TARGET_ARCH=$ARCH_NAME 121 | 122 | #check target compiler 123 | if [ -z "${TARGET_COMPILER}" ] ; then 124 | TARGET_COMPILER=${DEFAULT_COMPILER} 125 | fi 126 | 127 | 128 | ############################################################################### 129 | # Disassembling, lifting, fixing got/plt 130 | ############################################################################### 131 | 132 | #then disassemble 133 | OUTPUT=$( ${MCSEMA_DIS} --disassembler ${DISASSEMBLER} --os linux --arch ${ARCH} --output ${PATH_FILE}.cfg --binary ${PATH_FILE} --entrypoint main --log_file ${PATH_FILE}.log 2>&1 ) 134 | if [ $? != 0 ] ; then 135 | echo "disassembler ERROR: $OUTPUT" 136 | exit 1 137 | fi 138 | 139 | #then lifting 140 | OUTPUT=$( ${MCSEMA_LIFT} --os linux --arch ${ARCH} --cfg ${PATH_FILE}.cfg --output ${PATH_FILE}.bc --explicit_args 2>&1) 141 | if [ $? != 0 ] ; then 142 | echo "lifter ERROR: $OUTPUT" 143 | exit 1 144 | fi 145 | 146 | #now need to look for the GOT table in order to extract the functions 147 | OUTPUT=$( llvm-dis ${PATH_FILE}.bc 2>&1 ) 148 | if [ $? != 0 ] ; then 149 | echo "llvm-dis ERROR: $OUTPUT" 150 | exit 1 151 | fi 152 | #saving the old files so that they can be accessed later 153 | mv ${PATH_FILE}.bc ${PATH_FILE}.bc-orig 154 | mv ${PATH_FILE}.ll ${PATH_FILE}.ll-orig 155 | 156 | #fixing the got table 157 | SCRIPT_PATH=$( readlink -f $0 ) 158 | ${SCRIPT_PATH%/*}/fix_got_plt.py ${SRC_COMPILER} ${PATH_FILE}.ll-orig &> ${PATH_FILE}.ll 159 | if [ $? != 0 ] ; then 160 | echo "fixer ERROR: " `cat ${PATH_FILE}.ll` 161 | rm ${PATH_FILE}.ll 162 | exit 1 163 | fi 164 | 165 | #compiling back to bitcode (I am doing that because I had errors without doing this ...) 166 | OUTPUT=$( llvm-as ${PATH_FILE}.ll 2>&1 ) 167 | if [ $? != 0 ] ; then 168 | echo "llvm-as ERROR: $OUTPUT" 169 | exit 1 170 | fi 171 | 172 | ############################################################################### 173 | # Recompiling to native 174 | ############################################################################### 175 | 176 | echo Static compilation to ${TARGET_ARCH} 177 | 178 | #recompile it to TARGET_ARCH 179 | OUTPUT=$( clang -c -v ${PATH_FILE}.bc -target ${TARGET_TRIPLE} ${TARGET_FLAGS} -o ${PATH_FILE}.${TARGET_ARCH}.o 2>&1 ) 180 | if [ $? != 0 ] ; then 181 | echo "llc ERROR: $OUTPUT" 182 | exit 1 183 | fi 184 | 185 | if [ ${TARGET_COMPILER} = "*clang*" ] ; then 186 | #static linking to TARGET_ARCH 187 | OUTPUT=$( ${TARGET_COMPILER} -v -o ${PATH_FILE}-${TARGET_ARCH} -target ${TARGET_TRIPLE} ${PATH_FILE}.${TARGET_ARCH}.o ${TARGET_FLAGS} -static 2>&1 ) 188 | if [ $? != 0 ] ; then 189 | echo "compiler ERROR: $OUTPUT" 190 | exit 1 191 | fi 192 | else 193 | # OUTPUT=$( ${TARGET_TRIPLE}-${TARGET_COMPILER} -v -o ${PATH_FILE}-${TARGET_ARCH} ${PATH_FILE}.${TARGET_ARCH}.o ${TARGET_FLAGS} -static 2>&1 ) 194 | OUTPUT=$( ${TARGET_COMPILER} -v -o ${PATH_FILE}-${TARGET_ARCH} ${PATH_FILE}.${TARGET_ARCH}.o ${TARGET_FLAGS} -static 2>&1 ) 195 | if [ $? != 0 ] ; then 196 | echo "compiler ERROR: $OUTPUT" 197 | exit 1 198 | fi 199 | fi 200 | 201 | 202 | echo Output ${PATH_FILE}-${TARGET_ARCH} 203 | -------------------------------------------------------------------------------- /fix_got_plt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Script to get rid of got_plt symbols and declare them external 4 | # currently supports clang and musl-gcc 5 | # 6 | # Antonio Barbalace, Stevens 2019 7 | 8 | import re 9 | import sys 10 | import os 11 | 12 | # check command line arguments, need the file to open as first argument 13 | if (len(sys.argv)<3): 14 | print "Usage: " + sys.argv[0] + " compiler file.ll" 15 | sys.exit(1) 16 | # check if first file exists 17 | file_exist = os.path.isfile(sys.argv[2]) 18 | if (file_exist): 19 | file_name = sys.argv[2] 20 | else: 21 | print "Error: file1 " + sys.argv[2] + " doesn't exist" 22 | sys.exit(1) 23 | # no need to check the compiler 24 | compiler=sys.argv[1] 25 | 26 | #file.close() 27 | file_desc = open(file_name,"r") 28 | 29 | # use patterns based on the compiler, we support clang, musl-gcc 30 | if (compiler=="clang"): 31 | got_plt_pattern = re.compile("got_plt[ \t]+=[ \t]+") 32 | got_plt_member = re.compile("@([a-zA-Z0-9_]+)") 33 | elif (compiler=="musl-gcc"): 34 | got_plt_pattern = re.compile("_got[ \t]+=[ \t]+") 35 | got_plt_member = re.compile("@([a-zA-Z0-9_]+)[ \t]+to[ \t]+") 36 | else: 37 | print "Error: compiler "+compiler+" not supported" 38 | sys.exit(1) 39 | 40 | got_plt_open = re.compile("<{") 41 | #got_plt_close = re.compile("}>") 42 | 43 | XXXpattern="SSSSS" 44 | members =[] 45 | 46 | sysvcc_signature = "x86_64_sysvcc" 47 | sysvcc_pattern = re.compile("declare extern_weak "+sysvcc_signature+" [a-z0-9]+ @") 48 | sysvcc_funcName = re.compile("@([a-zA-Z0-9_]+)\(") 49 | 50 | for line in file_desc: 51 | # search for got_plt line(s) and substitute 52 | got_plt_match = got_plt_pattern.search(line) 53 | if got_plt_match: 54 | # find the symbols first 55 | open_match = got_plt_open.search(line, got_plt_match.end()) 56 | if open_match: 57 | end_index = open_match.end() 58 | while True: 59 | member_match = got_plt_member.search(line, end_index) 60 | if member_match: 61 | #print member_match.group(1) + " at " + str(member_match.start()) 62 | members.append(member_match.group(1)) 63 | end_index = member_match.end() 64 | else: 65 | break 66 | # then do substitute the text with the new text 67 | for member in members: 68 | line = re.sub(r" @"+member+" ", r" @"+XXXpattern+member+" ",line.rstrip()) 69 | print line 70 | continue 71 | # search for weak external symbols and add external symbols 72 | sysvcc_match = sysvcc_pattern.search(line) 73 | if sysvcc_match: 74 | # check if the symbol is to be substituted or not 75 | funcName_match = sysvcc_funcName.search(line, sysvcc_match.end() -1) 76 | if funcName_match: 77 | #print "found: "+funcName_match.group(1) 78 | if (funcName_match.group(1) in members): 79 | # substitute previous line 80 | lineSub = re.sub(r" @"+funcName_match.group(1)+"\(", " @"+XXXpattern+funcName_match.group(1)+"(",line.rstrip()) 81 | print lineSub 82 | # create a new line 83 | lineSubNew = re.sub(r" extern_weak "+sysvcc_signature+" "," ",line.rstrip()) 84 | print lineSubNew 85 | continue 86 | # if this is just a normal line, just print it 87 | print line 88 | 89 | sys.exit(0) 90 | 91 | -------------------------------------------------------------------------------- /test-clean.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # just remove all files but not sources 4 | 5 | cd tests 6 | rm -f `ls | grep -v "\.[ch]$"` 7 | cd - 8 | 9 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # simple example to test compilation 4 | # 5 | # Antonio Barbalace, Stevens 2019 6 | 7 | INPUT_FILE="tests/loop.c" 8 | #INPUT_FILE="tests/hello.c" 9 | PATH_FILE=${INPUT_FILE%.*} 10 | 11 | #config parameters 12 | DISASSEMBLER=~/ida-7.2/idat64 13 | ARCH="amd64" 14 | #COMPILER="musl-gcc" 15 | COMPILER="clang" 16 | TARGET="aarch64-linux-gnu" 17 | 18 | #compile first 19 | ${COMPILER} -o ${PATH_FILE}.${ARCH} ${INPUT_FILE} 20 | 21 | #then disassemble 22 | mcsema-disass --disassembler ${DISASSEMBLER} --os linux --arch ${ARCH} --output ${PATH_FILE}.${ARCH}.cfg --binary ${PATH_FILE}.${ARCH} --entrypoint main --log_file ${PATH_FILE}.${ARCH}.log 23 | 24 | #then lifting 25 | mcsema-lift-4.0 --os linux --arch ${ARCH} --cfg ${PATH_FILE}.${ARCH}.cfg --output ${PATH_FILE}.${ARCH}.bc --explicit_args 26 | 27 | #now need to look for the GOT table in order to extract the functions 28 | llvm-dis ${PATH_FILE}.${ARCH}.bc 29 | mv ${PATH_FILE}.${ARCH}.bc ${PATH_FILE}.${ARCH}.bc-orig 30 | mv ${PATH_FILE}.${ARCH}.ll ${PATH_FILE}.${ARCH}.ll-orig 31 | ./fix_got_plt.py ${COMPILER} ${PATH_FILE}.${ARCH}.ll-orig > ${PATH_FILE}.${ARCH}.ll 32 | llvm-as ${PATH_FILE}.${ARCH}.ll 33 | 34 | #recompile it to ARCH 35 | clang -o ${PATH_FILE}.${ARCH}.o -c -v ${PATH_FILE}.${ARCH}.bc 36 | ${COMPILER} -o ${PATH_FILE}.${ARCH}-${ARCH} ${PATH_FILE}.${ARCH}.o -static 37 | 38 | #recompile it to TARGET 39 | clang -o ${PATH_FILE}.${ARCH}-aarch64.o -c -v ${PATH_FILE}.${ARCH}.bc -target ${TARGET} 40 | ${COMPILER} -o ${PATH_FILE}.${ARCH}-aarch64 ${PATH_FILE}.${ARCH}-aarch64.o -static -target ${TARGET} 41 | 42 | #run the ARCH binary 43 | echo "RUNNING on ${ARCH}" 44 | ${PATH_FILE}.${ARCH}-${ARCH} 45 | 46 | #run the TARGET binary 47 | echo "RUNNING on aarch64" 48 | qemu-aarch64 ${PATH_FILE}.${ARCH}-aarch64 49 | -------------------------------------------------------------------------------- /tests/hello.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main () { 4 | printf("helloworld!\n"); 5 | return 1; 6 | } 7 | -------------------------------------------------------------------------------- /tests/loop.c: -------------------------------------------------------------------------------- 1 | /* ghidra test with integers only */ 2 | 3 | #include 4 | #include 5 | 6 | #define A 'a' 7 | #define FIVE 5 8 | #define PI 3 9 | 10 | int total =3; 11 | static int average =PI; 12 | 13 | long long int mypow(long x, long y) { 14 | return x*y; 15 | } 16 | 17 | int bob(int a, long b, char c) { 18 | a++; 19 | b+=a; 20 | char string[200]; 21 | sprintf(string, "a=%d, b=%ld, c=%c\n", a, b, c); 22 | printf(string); 23 | total++; 24 | average /= total; 25 | fprintf(stderr, "total=%d average=%d\n", total, average); 26 | return a; 27 | } 28 | 29 | int hello() { 30 | printf("Hello World!\n"); 31 | bob(42, PI, A); 32 | return FIVE; 33 | } 34 | 35 | float goodbye() { 36 | printf("Goodbye World!\n"); 37 | bob(42, PI, A); 38 | return FIVE; 39 | } 40 | 41 | int main () { 42 | int i = 0; 43 | int looper = 0; 44 | hello(); 45 | for (; i < 5; ++i) { 46 | printf("\ti = %d\n", i); 47 | } 48 | goodbye(); 49 | bob(42, 2, 'x'); 50 | printf("mypow: %lld\n", mypow(2,3)); 51 | for (int j =0; j< 5; j++) { 52 | printf("sleep looper=%d\n", ++looper); 53 | sleep(1); 54 | } 55 | return 1; 56 | } 57 | --------------------------------------------------------------------------------