├── README.md ├── mutate.pl └── uroboros.py /README.md: -------------------------------------------------------------------------------- 1 | # Binary Mutation 2 | The Binary Mutation code based on Uroboros 3 | To use this binary mutation tool, follow these steps: 4 | 5 | INSTALLATION: 6 | 1) install Uroboros from https://github.com/s3team/uroboros.git 7 | 2) copy over the uroboros.py script into uroboros/src directory 8 | 3) copy mutate.pl into uroboros/src directory 9 | 10 | 11 | 12 | USAGE: 13 | Go to uroboros/src directory and use following command to generate the mutants: 14 | python uroboros.py 15 | The mutants will be written into uroboros/mutants directory. 16 | 17 | -------------------------------------------------------------------------------- /mutate.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | 5 | die "Usage: make-mutants.pl [] [] or make-mutants.pl []" 6 | unless @ARGV >= 1; 7 | my ($binary,$fn_name,$debug); 8 | my $length = @ARGV; 9 | $fn_name = ""; 10 | 11 | my $output; 12 | if($length == 2) { 13 | ($binary, $debug) = @ARGV; 14 | } elsif($length == 3) { 15 | ($binary, $fn_name, $debug) = @ARGV; 16 | } 17 | my @past_mutations = (); 18 | my $binary_mutants = "./mutants"; 19 | #`mkdir $binary_mutants`; 20 | #`mkdir mutants`; 21 | print "debug = $debug\n"; 22 | my $mutant_num = 0; 23 | while(1) { 24 | open(F, $binary) or die; 25 | my $curr_fn_name = ""; 26 | my $actual_fn_name = ""; 27 | my ($mutant_1_assembly, $mutant_2_assembly) = ("", ""); 28 | my ($mutant_1_name, $mutant_2_name) = ("", ""); 29 | my $line_num = 0; 30 | my $mutated = 0; 31 | while () { 32 | $line_num += 1; 33 | # if($debug == 1) { print $_; } 34 | if($_ =~ /^.globl/){ 35 | #if($debug == 1) { print $_; } 36 | $actual_fn_name = $_; 37 | chomp($actual_fn_name); 38 | $actual_fn_name = (split ' ', $_)[-1]; 39 | if($debug == 1) {print "$actual_fn_name\n"; } 40 | } 41 | if(/^[_a-zA-Z0-9\.]+:/ && !/^\./) { 42 | $curr_fn_name = $_; 43 | chomp($curr_fn_name); 44 | $curr_fn_name = substr($curr_fn_name, 0, index($curr_fn_name, ':')); 45 | if ($debug == 1) { print "curr_fn_name = $curr_fn_name\n"; } 46 | } 47 | my ($new_insn_0, $new_insn_1) = ($_, $_); 48 | #if($fn_name eq "" || ($curr_fn_name =~ /$fn_name/)) { 49 | if($fn_name eq "" || ($actual_fn_name eq $fn_name)) { 50 | if ($debug == 1){ 51 | print "NNNN $line_num\n$_\n"; 52 | print is_flag_use($_); 53 | print "\n";} 54 | my $curr_insn = $_; 55 | chomp($curr_insn); 56 | if($mutated == 0 && is_flag_use($_) && is_past_mutation(("$line_num: " . $curr_insn)) == 0) { 57 | if($debug == 1){ print "VVVV $line_num\n";} 58 | $new_insn_0 = mutate($_, 0); 59 | $new_insn_1 = mutate($_, 1); 60 | if($debug == 1) { print "new_insn_0 = $new_insn_0, new_insn_1 = $new_insn_1\n"; } 61 | push @past_mutations, ("$line_num: " . $curr_insn); 62 | $mutant_1_name = $curr_fn_name. "#" . $mutant_num. ".0.s"; 63 | $mutant_2_name = $curr_fn_name. "#" . $mutant_num . ".1.s"; 64 | $mutated = 1; 65 | $mutant_num++; 66 | } 67 | } 68 | $mutant_1_assembly .= $new_insn_0; 69 | $mutant_2_assembly .= $new_insn_1; 70 | } 71 | close F; 72 | if ($mutated == 1) { 73 | open(M1, ">$binary_mutants/$mutant_1_name"); 74 | #open(M1, ">mutants/$mutant_1_name"); 75 | print M1 $mutant_1_assembly; 76 | close(M1); 77 | open(M2, ">$binary_mutants/$mutant_2_name"); 78 | #open(M2, ">mutants/$mutant_2_name"); 79 | print M2 $mutant_2_assembly; 80 | close(M2); 81 | print "Wrote to mutants/$mutant_1_name && mutants/$mutant_2_name\n"; 82 | } else { last; } 83 | } 84 | 85 | sub is_past_mutation { 86 | my ($curr_insn) = (@_); 87 | foreach my $past_mut_str (@past_mutations) { 88 | if($debug == 1) { 89 | print "past_mut_str = $past_mut_str, curr_insn = $curr_insn, " . 90 | ($past_mut_str eq $curr_insn) . "\n"; 91 | } 92 | if ($past_mut_str eq $curr_insn) { 93 | return 1; 94 | } 95 | } 96 | return 0; 97 | } 98 | 99 | sub is_flag_use { 100 | my ($insn) = (@_); 101 | #if($insn =~ /\scmov/ || $insn =~ /\sset.*/ || ($insn =~ /\sj.*/ && !($insn =~ /\sjmp.*/))) { 102 | if($insn =~ /cmov/ || $insn =~ /set.*/ || ($insn =~ /j.*/ && !($insn =~ /jmp.*/)) || $insn =~ /adc/ || $insn =~ /sbb/) { 103 | return 1; 104 | } 105 | return 0; 106 | } 107 | 108 | sub mutate { 109 | my($insn, $mutate_direction) = (@_); 110 | if($insn =~ /cmov/) { 111 | if($mutate_direction == 0) { 112 | $insn =~ s/cmov. /mov /g; 113 | $insn =~ s/cmov.. /mov /g; 114 | } elsif ($mutate_direction == 1) { 115 | $insn = " nop\n"; 116 | } 117 | } 118 | if($insn =~ /set/) { 119 | $insn =~ s/set../movb \$$mutate_direction, /g; 120 | } 121 | if ($insn =~ /j.*/ && !($insn =~ /jmp.*/)) { 122 | if($mutate_direction == 0) { 123 | $insn =~ s/j.. /jmp /g; 124 | $insn =~ s/j. /jmp /g; 125 | } elsif ($mutate_direction == 1) { 126 | $insn = " nop\n"; 127 | } 128 | } 129 | if($insn =~ /adc.*/ ){ 130 | if ($mutate_direction == 0){ 131 | $insn =~ s/adc/add/g; 132 | $insn .= "incl " . (split(',', $insn))[-1]; 133 | }elsif ($mutate_direction == 1) { 134 | $insn =~ s/adc/add/g; 135 | } 136 | } 137 | if($insn =~ /sbb/){ 138 | if($mutate_direction == 0){ 139 | $insn =~ s/sbb /sub /g; 140 | $insn .= "decl " . (split(',',$insn))[-1]; 141 | }elsif ($mutate_direction == 1){ 142 | $insn =~ s/sbb /sub /g; 143 | } 144 | } 145 | return $insn; 146 | } 147 | -------------------------------------------------------------------------------- /uroboros.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys, os, subprocess 3 | import pdb 4 | import shutil 5 | 6 | from argparse import RawTextHelpFormatter 7 | from argparse import ArgumentParser 8 | 9 | 10 | 11 | 12 | # keep the imtermediate binary/final.s or not. 13 | k = False 14 | f_dic = "" 15 | 16 | iter_num = 0 17 | 18 | def check_exe(): 19 | lines = [] 20 | with open("elf.info") as f: 21 | lines = f.readlines() 22 | if "LSB shared object" in lines[0]: 23 | return False 24 | else: 25 | return True 26 | 27 | def check_32(): 28 | lines = [] 29 | with open("elf.info") as f: 30 | lines = f.readlines() 31 | if "32-bit" in lines[0]: 32 | return True 33 | else: 34 | return False 35 | 36 | def check_strip(): 37 | lines = [] 38 | with open("elf.info") as f: 39 | lines = f.readlines() 40 | if "not stripped" in lines[0]: 41 | return True 42 | else: 43 | return False 44 | 45 | 46 | 47 | 48 | def reassemble(): 49 | if check_32() == True: 50 | # 32-bit binary 51 | os.system('gcc final.s -lm -lrt -lpthread -lcrypt -m32') 52 | else: 53 | # 64-bit binary 54 | os.system('gcc final.s -lm -lrt -lpthread -lcrypt') 55 | 56 | 57 | def process(f, i): 58 | try: 59 | os.system("rm final_*.txt") 60 | 61 | # suppose we use this method to obtain function information 62 | os.system("cp " + f + " func_discover/") 63 | os.system("python func_discover/func_addr.py func_discover/"+f + " " + str(i)) 64 | os.system("rm final_data.s") 65 | os.system('rm useless_func.info') 66 | if i > 0: 67 | os.system("python useless_func_discover.py " + f) 68 | 69 | os.system('echo \"' + str(i) + '\" > count.txt') 70 | os.system("strip " + f) 71 | os.system("python main_discover.py " + f) 72 | 73 | os.system("./init.native " + f) 74 | if not os.path.isfile("final.s"): 75 | return False 76 | 77 | os.system("python post_process_data.py") 78 | 79 | os.system('echo ".section .eh_frame" >> final_data.s') 80 | os.system('cat eh_frame_split.info >> final_data.s') 81 | os.system('echo ".section .eh_frame_hdr" >> final_data.s') 82 | os.system('cat eh_frame_hdr_split.info >> final_data.s') 83 | 84 | os.system('cat final_data.s >> final.s') 85 | 86 | if k: 87 | os.system("cp final.s final.s." + str(i)) 88 | 89 | if "gobmk" in f: 90 | # FIXME! 91 | os.system("python gobmk_sub.py") 92 | 93 | os.system("python compile_process.py") 94 | os.system("python label_adjust.py") 95 | 96 | reassemble() 97 | ###################################### 98 | create_mutants() 99 | ###################################### 100 | if iter_num > 0: 101 | os.system("cp a.out " + f) 102 | 103 | if k: 104 | print f_dic 105 | os.system("cp a.out " + f_dic + "/" + f + "." + str(i+1)) 106 | os.system("mv final.s." + str(i) + " " + f_dic) 107 | 108 | except : 109 | return False 110 | else: 111 | 112 | os.system('rm ' + "faddr_old.txt." + str(i)) 113 | os.system('rm ' + "faddr.txt." + str(i)) 114 | 115 | 116 | return True 117 | 118 | def create_mutants(): 119 | init_assembly = "final-init.s" 120 | os.system("cp final.s "+ init_assembly) 121 | break_labels(init_assembly) 122 | myReassemble(init_assembly, "original-bin") 123 | mutant_dir = os.getcwd() + "/mutants" 124 | if os.path.isdir(mutant_dir): 125 | shutil.rmtree(mutant_dir) #cleanup the previos run leftovers! 126 | else: 127 | os.mkdir(mutant_dir) 128 | os.system("/home/nvd/uroboros/src/mutate.pl " + init_assembly + " 0") 129 | #os.system("/home/nvd/uroboros/src/mutate.pl " + init_assembly + " func_name_here"+ " 0") 130 | #if you need to mutate just a function uncomment above ^^ statement and provide the function name 131 | 132 | 133 | for dirpath, dirnames, filenames in os.walk(mutant_dir): 134 | for ma in filenames: 135 | print "#"*20 136 | print ma 137 | ma_path = dirpath+"/"+ma 138 | myReassemble(ma_path, ma_path+"-bin") 139 | print "%"*20 140 | 141 | def break_labels(inFile): 142 | outContent = [] 143 | with open(inFile, "r") as f: 144 | content = f.readlines() 145 | for con in content: 146 | ll = con.strip().split(" : ") 147 | if len(ll) > 1: 148 | #print con, ll 149 | outContent.append(ll[0]+" : ") 150 | outContent.append(ll[1]) 151 | else: 152 | outContent.append(con.strip()) 153 | 154 | with open(inFile, "w") as ff: 155 | ff.writelines('\n'.join(outContent)) 156 | 157 | 158 | 159 | def myReassemble(inputF, outputF): 160 | ret = subprocess.call(["gcc", inputF, "-lm", "-lrt", "-lpthread", "-lcrypt", "-m32", "-o", outputF]) 161 | if ret != 0: 162 | print "assembling "+inputF + " failed!" 163 | 164 | def iterate (f, iterations): 165 | print "start to process binary: " + f 166 | 167 | for i in xrange(0, iterations): 168 | print ("########## iteration round "+str(i+1) + " begin ! ###########") 169 | if process(f, i): 170 | pass 171 | else: 172 | return False 173 | print ("########## iteration round "+str(i+1) + " finish ! ##########") 174 | 175 | return True 176 | 177 | 178 | def check (b, f, al): 179 | if not al: 180 | al = [] 181 | 182 | if not os.path.isfile(b): 183 | print "cannot find input binary" 184 | return False 185 | 186 | if '/' in b: 187 | # not in current directory 188 | os.system('cp ' + b + ' .') 189 | 190 | 191 | os.system('file ' + f + ' > elf.info') 192 | if check_exe() == False: 193 | print "Uroboros doesn't support shared library" 194 | return False 195 | 196 | # if assumption three is utilized, then input binary must be unstripped. 197 | if '3' in al and check_strip() == False: 198 | print '''Uroboros doesn't support stripped binaries when using assumption three''' 199 | return False 200 | 201 | return True 202 | 203 | 204 | import datetime 205 | import time 206 | 207 | 208 | def fold_withtamp (f): 209 | global f_dic 210 | ts = time.time() 211 | st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d_%H:%M:%S') 212 | 213 | f_dic = "test_fold_" + f + '_' + st 214 | 215 | os.system('mkdir ' + f_dic) 216 | 217 | 218 | def set_assumption (l): 219 | # 2 -> assumption two: fix data section starting address 220 | # Note that assumption two require linker script to reassemble! 221 | # Some of the examples can be found at ./ld_scripts/* 222 | # 3 -> assumption three: function starting address + jump table 223 | # _ -> not defined. 224 | 225 | a = 0 226 | b = 0 227 | 228 | if not l: 229 | with open('assumption_set.info', 'w') as f: 230 | f.writelines(["1\n"]) 231 | 232 | else: 233 | chk = (i in ['2', '3'] for i in l) 234 | 235 | 236 | if any(chk) == False: 237 | print "assumption undefined!" 238 | print "accecpt assumptions: 2 for assumption two and 3 for assumption three" 239 | return False 240 | 241 | l = set(l) 242 | 243 | l = ' '.join(l) 244 | l += "\n" 245 | 246 | with open('assumption_set.info', 'w') as f: 247 | f.writelines(l) 248 | 249 | return True 250 | 251 | 252 | if __name__ == "__main__": 253 | p = ArgumentParser(formatter_class=RawTextHelpFormatter) 254 | p.add_argument("binary", 255 | help="path to the input binary, for example, /home/szw175/ls") 256 | p.add_argument("-i", "--iteration", type=int, 257 | help="the number of disassemble-(instrument)-reassemble iterations") 258 | p.add_argument("-k", "--keep", action="count", 259 | help="if multiple iteration processing, whether to keep itermediate binaries") 260 | p.add_argument("-a", "--assumption", action="append", 261 | help='''this option configures three addtional assumption, 262 | note that two basic assumptions and addtional assumption one 263 | (n-byte alignment) are set by default, 264 | while assumption two and three need to be configured. For example, setting 265 | assumption two and three: -a 2 -a 3''') 266 | p.add_argument('--version', action='version', version='Uroboros 0.11') 267 | 268 | args = p.parse_args() 269 | b = args.binary 270 | i = args.iteration 271 | iter_num = i 272 | k = (args.keep > 0) 273 | 274 | 275 | f = os.path.basename(b) 276 | if check(b, f, args.assumption) == False or set_assumption(args.assumption) == False: 277 | pass 278 | 279 | else: 280 | if k: 281 | fold_withtamp(f) 282 | 283 | if args.iteration: 284 | if iterate(f, i): 285 | print "processing succeeded" 286 | else: 287 | print "exception, processing failed" 288 | else: 289 | if process(f, 0): 290 | print "processing succeeded" 291 | else: 292 | print "exception, processing failed" 293 | --------------------------------------------------------------------------------