├── .gitignore ├── .gitmodules ├── Dockerfile ├── README.md ├── artifact ├── .gitignore ├── 1_get_reassembled_code.py ├── 1_print_rewrite_result.py ├── 2_make_set.py ├── 2_run_testsuite.py ├── 2_run_testsuite_spec.py ├── 4_get_br_stat.py ├── 4_get_code_size.py ├── 4_get_runtime_overhead.py ├── 4_get_suri_overhead.py ├── 4_get_table_size.py ├── 4_print_br_overhead.py ├── 4_print_code_size_overhead.py ├── 4_print_runtime_overhead.py ├── 4_print_suri_overhead.py ├── 4_print_table_overhead.py ├── Dockerfile ├── EXPERIMENT.md ├── PREPARATION.md ├── README.md ├── Reassessor │ ├── .gitignore │ ├── Dockerfile │ ├── LICENSE.md │ ├── README.md │ ├── artifact │ │ └── run_reassessor.py │ ├── example │ │ ├── Makefile │ │ └── src │ │ │ └── hello.c │ ├── reassessor │ │ ├── __init__.py │ │ ├── differ │ │ │ ├── __init__.py │ │ │ ├── diff.py │ │ │ ├── ereport.py │ │ │ └── statistics.py │ │ ├── lib │ │ │ ├── __init__.py │ │ │ ├── asmfile.py │ │ │ ├── parser.py │ │ │ └── types.py │ │ ├── normalizer │ │ │ ├── __init__.py │ │ │ ├── ddisasm.py │ │ │ ├── gt.py │ │ │ ├── ramblr.py │ │ │ ├── retro.py │ │ │ └── tool_base.py │ │ ├── preprocessing.py │ │ └── reassessor.py │ ├── requirements.txt │ └── setup.py ├── application │ ├── rewrite_juliet.py │ ├── run_juliet.py │ └── summary.py ├── build_script │ ├── build_spec2006.py │ ├── build_spec2017.py │ ├── script │ │ ├── base2006.cfg │ │ ├── base2017.cfg │ │ ├── binutils-2.40_list.txt │ │ ├── binutils_copy.sh │ │ ├── build-spec2006.sh │ │ ├── build-spec2006_no_ehframe.sh │ │ ├── build-spec2017.sh │ │ ├── build-spec2017_no_ehframe.sh │ │ ├── copy-spec2017.sh │ │ ├── copy.sh │ │ ├── coreutils-9.1_list.txt │ │ ├── coreutils_copy.sh │ │ ├── spec_cpu2006_list.txt │ │ └── spec_cpu2017_list.txt │ ├── test_suite_script │ │ ├── Dockerfile │ │ └── script │ │ │ ├── case1_spec2006.cfg │ │ │ └── case1_spec2017.cfg │ └── test_suite_script_ubuntu18.04 │ │ ├── Dockerfile │ │ └── script │ │ ├── case1_spec2006.cfg │ │ └── case1_spec2017.cfg ├── consts.py ├── filter_utils.py ├── install_reassessor.sh ├── make_gt.py ├── realworld │ ├── client │ │ ├── build.py │ │ ├── build.sh │ │ ├── epiphany │ │ ├── filezilla │ │ ├── git │ │ ├── openssh │ │ ├── putty │ │ ├── run_docker.sh │ │ ├── run_epiphany.sh │ │ └── vim │ └── phoronix │ │ ├── 7zip │ │ ├── apache │ │ ├── build.py │ │ ├── build.sh │ │ ├── copy.sh │ │ ├── mariadb │ │ ├── nginx │ │ ├── run_docker.sh │ │ └── sqlite3 ├── table_size.py ├── terminate_suri_docker.sh └── ubuntu18.04 │ ├── Dockerfile │ └── build_script │ └── script │ ├── binutils-2.40_list.txt │ ├── binutils_copy.sh │ ├── coreutils-9.1_list.txt │ └── coreutils_copy.sh ├── emitter.py ├── setup.py ├── superCFGBuilder ├── .gitignore ├── BinEssence │ ├── BinEssence.fs │ ├── BinEssence.fsproj │ └── DisasmLens.fs ├── BinGraph │ ├── BinGraph.fsproj │ ├── DiGraph.fs │ ├── Dominator.fs │ ├── DummyVertex.fs │ ├── Edges.fs │ ├── Graph.fs │ ├── GraphCore.fs │ ├── Imperative.fs │ ├── Loop.fs │ ├── Persistent.fs │ ├── RangedDiGraph.fs │ ├── SCC.fs │ ├── Traversal.fs │ └── Vertices.fs ├── ControlFlowAnalysis │ ├── BBLInfo.fs │ ├── BBLManager.fs │ ├── CFGBuilder.fs │ ├── CFGError.fs │ ├── CFGEvents.fs │ ├── CFGHelper.fs │ ├── CodeManager.fs │ ├── ControlFlowAnalysis.fsproj │ ├── CoverageMaintainer.fs │ ├── DataManager.fs │ ├── EvalHelper.fs │ ├── ExceptionTable.fs │ ├── FortranRegularJmpResolution.fs │ ├── Function.fs │ ├── FunctionMaintainer.fs │ ├── HistoryManager.fs │ ├── ICFGBuildable.fs │ ├── IPluggableAnalysis.fs │ ├── IRHelper.fs │ ├── IndirectCallResolution.fs │ ├── IndirectJumpResolution.fs │ ├── JumpTableMaintainer.fs │ ├── LowUIRHelper.fs │ ├── NoReturnFunctionIdentification.fs │ ├── PerFunctionAnalysis.fs │ ├── RegularJmpResolution.fs │ ├── SSAPromotion.fs │ └── TblResolution.fs ├── ControlFlowGraph │ ├── BasicBlock.fs │ ├── CFGEdgeKind.fs │ ├── ControlFlowGraph.fs │ ├── ControlFlowGraph.fsproj │ ├── DisasmBasicBlock.fs │ ├── DisasmCFG.fs │ ├── FakeBlockInfo.fs │ ├── IRBasicBlock.fs │ ├── IRCFG.fs │ ├── InlinedAssembly.fs │ ├── InstructionInfo.fs │ ├── SSABasicBlock.fs │ ├── SSACFG.fs │ ├── SSAEdges.fs │ ├── SSATypes.fs │ ├── SSAUtils.fs │ ├── SyscallTailInfo.fs │ └── VisualBlock.fs ├── DataFlow │ ├── CPState.fs │ ├── Chains.fs │ ├── ConstantPropagation.fs │ ├── DFHelper.fs │ ├── DataFlow.fsproj │ ├── DataFlowAnalysis.fs │ ├── ReachingDefinitions.fs │ ├── SCPTransfer.fs │ ├── SCPValue.fs │ ├── SPTransfer.fs │ ├── SPValue.fs │ ├── SparseConstantPropagation.fs │ ├── StackPointerPropagation.fs │ ├── UVTransfer.fs │ ├── UVValue.fs │ ├── UntouchedValuePropagation.fs │ └── Utils.fs ├── SSA │ ├── SSA.AST.fs │ ├── SSA.Pp.fs │ ├── SSA.fs │ └── SSA.fsproj ├── superCFGBuilder.sln └── superCFGBuilder │ ├── ASanGen.fs │ ├── MetaGen.fs │ ├── Program.fs │ └── superCFGBuilder.fsproj ├── superSymbolizer ├── CustomCompiler.py ├── ElfBricks.py ├── README.md ├── SuperAsan.py ├── SuperSymbolizer.py ├── lib │ ├── CFGSerializer.py │ ├── CFIInfo.py │ ├── ElfDef.py │ ├── ExceptTable.py │ ├── LocalSymbolizer.py │ └── Misc.py ├── linker.ld └── requirements.txt └── suri.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | build/ 3 | dist/ 4 | suri.egg-info/ 5 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "B2R2"] 2 | path = B2R2 3 | url = https://github.com/B2R2-org/B2R2.git 4 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | 3 | ENV DEBIAN_FRONTEND="noninteractive" 4 | ENV DEBIAN_FRONTEND="Etc/UTC" 5 | 6 | RUN apt update && \ 7 | apt install -y git wget software-properties-common python3-pip 8 | 9 | # Install compilers 10 | RUN add-apt-repository ppa:ubuntu-toolchain-r/test -y && \ 11 | apt update && \ 12 | apt install -y gcc-13 g++-13 gcc-11 g++-11 clang-10 clang-11 gfortran-11 gfortran-13 13 | 14 | # Install dotnet9 15 | RUN wget https://packages.microsoft.com/config/ubuntu/20.04/packages-microsoft-prod.deb -O packages-microsoft-prod.deb && \ 16 | dpkg -i packages-microsoft-prod.deb && \ 17 | rm packages-microsoft-prod.deb && \ 18 | apt-get update && \ 19 | apt-get install -y dotnet-sdk-9.0 20 | 21 | # Install Python3 dependency 22 | RUN pip install pyelftools 23 | 24 | RUN mkdir -p /project 25 | 26 | # Add SURI 27 | RUN cd /project/ && git clone https://github.com/SoftSec-KAIST/SURI.git && \ 28 | cd SURI && git submodule update --init && \ 29 | python3 setup.py install 30 | 31 | # Build superCFGBuilder 32 | RUN cd /project/SURI/superCFGBuilder && dotnet build -c Release 33 | -------------------------------------------------------------------------------- /artifact/.gitignore: -------------------------------------------------------------------------------- 1 | benchmark/ 2 | gt/ 3 | log/ 4 | output/ 5 | setA/ 6 | setB/ 7 | setC/ 8 | stat/ 9 | spec2006_image/ 10 | spec2017_image/ 11 | -------------------------------------------------------------------------------- /artifact/2_make_set.py: -------------------------------------------------------------------------------- 1 | import argparse, glob, os 2 | from collections import namedtuple 3 | from ctypes import * 4 | from filter_utils import check_exclude_files 5 | from consts import * 6 | 7 | ExpTask = namedtuple('ExpTask', ['dataset', 'input_dir', 'output_dir', 'set_dir', 'bin_name']) 8 | 9 | def parse_arguments(): 10 | parser = argparse.ArgumentParser(description='manager') 11 | parser.add_argument('dataset', type=str, default='setA', help='Select dataset (setA, setB, setC)') 12 | parser.add_argument('--input_dir', type=str, default='benchmark') 13 | parser.add_argument('--output_dir', type=str, default='output') 14 | parser.add_argument('--package', type=str, help='Package') 15 | parser.add_argument('--core', type=int, default=1, help='Number of cores to use') 16 | parser.add_argument('--blacklist', nargs='+') 17 | parser.add_argument('--whitelist', nargs='+') 18 | args = parser.parse_args() 19 | 20 | # Sanitizing arguments 21 | assert args.dataset in ['setA', 'setB', 'setC'], 'Invalid dataset: "%s"'%(args.dataset) 22 | 23 | return args 24 | 25 | ################################ 26 | 27 | def prepare_tasks(args, package): 28 | tasks = [] 29 | for comp in COMPILERS: 30 | for opt in OPTIMIZATIONS: 31 | for lopt in LINKERS: 32 | input_base = os.path.join(args.input_dir, args.dataset, package, comp, '%s_%s' % (opt, lopt)) 33 | output_base = os.path.join(args.output_dir, args.dataset, package, comp, '%s_%s' % (opt, lopt)) 34 | strip_dir = os.path.join(input_base, 'stripbin', '*') 35 | set_dir = os.path.join('.', args.dataset, package, comp, '%s_%s' % (opt, lopt)) 36 | 37 | for target in glob.glob(strip_dir): 38 | filename = os.path.basename(target) 39 | 40 | # Filter binaries 41 | if args.blacklist and filename in args.blacklist: 42 | continue 43 | if args.whitelist and filename not in args.whitelist: 44 | continue 45 | if check_exclude_files(args.dataset, package, comp, opt, filename): 46 | continue 47 | 48 | bin_dir = os.path.join(input_base, 'stripbin') 49 | out_dir = os.path.join(output_base, filename) 50 | tasks.append(ExpTask(args.dataset, bin_dir, out_dir, set_dir, filename)) 51 | 52 | return tasks 53 | 54 | ################################ 55 | 56 | def copy(src, dst): 57 | if os.path.exists(src) and not os.path.exists(dst): 58 | print('cp %s %s' % (src, dst)) 59 | os.system('cp %s %s' % (src, dst)) 60 | 61 | def build_set(task): 62 | orig_src_path = os.path.join(task.input_dir, task.bin_name) 63 | orig_dst_dir = os.path.join(task.set_dir, 'original') 64 | os.system('mkdir -p %s' % orig_dst_dir) 65 | orig_dst_path = os.path.join(orig_dst_dir, task.bin_name) 66 | copy(orig_src_path, orig_dst_path) 67 | 68 | suri_src_path = os.path.join(task.output_dir, 'super', 'my_%s' % task.bin_name) 69 | suri_dst_dir = os.path.join(task.set_dir, 'suri') 70 | os.system('mkdir -p %s' % suri_dst_dir) 71 | suri_dst_path = os.path.join(suri_dst_dir, task.bin_name) 72 | copy(suri_src_path, suri_dst_path) 73 | 74 | if task.dataset == 'setA': 75 | ddisasm_src_path = os.path.join(task.output_dir, 'ddisasm', task.bin_name) 76 | ddisasm_dst_dir = os.path.join(task.set_dir, 'ddisasm') 77 | os.system('mkdir -p %s' % ddisasm_dst_dir) 78 | ddisasm_dst_path = os.path.join(ddisasm_dst_dir, task.bin_name) 79 | copy(ddisasm_src_path, ddisasm_dst_path) 80 | elif task.dataset == 'setB': 81 | egalito_src_path = os.path.join(task.output_dir, 'egalito', task.bin_name) 82 | egalito_dst_dir = os.path.join(task.set_dir, 'egalito') 83 | os.system('mkdir -p %s' % egalito_dst_dir) 84 | egalito_dst_path = os.path.join(egalito_dst_dir, task.bin_name) 85 | copy(egalito_src_path, egalito_dst_path) 86 | 87 | ################################ 88 | 89 | def run(args, package): 90 | tasks = prepare_tasks(args, package) 91 | for task in tasks: 92 | build_set(task) 93 | 94 | if __name__ == '__main__': 95 | args = parse_arguments() 96 | for package in PACKAGES: 97 | run(args, package) 98 | -------------------------------------------------------------------------------- /artifact/4_get_br_stat.py: -------------------------------------------------------------------------------- 1 | import argparse, glob, multiprocessing, os, sys 2 | from collections import namedtuple 3 | from filter_utils import check_exclude_files 4 | sys.path.append("../superSymbolizer") 5 | from SuperSymbolizer import SuperSymbolizer 6 | from consts import * 7 | 8 | ExpTask = namedtuple('ExpTask', ['dataset', 'input_dir', 'output_dir', 'prefix', 'bin_name']) 9 | 10 | def parse_arguments(): 11 | parser = argparse.ArgumentParser(description='manager') 12 | parser.add_argument('dataset', type=str, default='setA', help='Select dataset (setA, setC)') 13 | parser.add_argument('--input_dir', type=str, default='benchmark') 14 | parser.add_argument('--output_dir', type=str, default='output') 15 | parser.add_argument('--package', type=str, help='Select package (coreutils-9.1, binutils-2.40, spec_cpu2017, spec_cpu2006)') 16 | parser.add_argument('--core', type=int, default=1, help='Number of cores to use') 17 | parser.add_argument('--target', type=str) 18 | parser.add_argument('--blacklist', nargs='+') 19 | parser.add_argument('--whitelist', nargs='+') 20 | args = parser.parse_args() 21 | 22 | # Sanitizing arguments 23 | assert args.dataset in ['setA', 'setC'], 'Invalid dataset: "%s"'%(args.dataset) 24 | if args.package: 25 | assert args.package in PACKAGES, 'Invalid package: "%s"'%(args.package) 26 | 27 | return args 28 | 29 | ################################ 30 | 31 | def prepare_tasks(args, package): 32 | tasks = [] 33 | for comp in COMPILERS: 34 | for opt in OPTIMIZATIONS: 35 | for lopt in LINKERS: 36 | input_base = os.path.join(args.input_dir, args.dataset, package, comp, '%s_%s' % (opt, lopt)) 37 | output_base = os.path.join(args.output_dir, args.dataset, package, comp, '%s_%s' % (opt, lopt)) 38 | strip_dir = os.path.join(input_base, 'bin', '*') 39 | prefix = '_'.join([package, comp, opt, lopt]) 40 | 41 | for target in glob.glob(strip_dir): 42 | filename = os.path.basename(target) 43 | 44 | # Filter binaries 45 | if args.blacklist and filename in args.blacklist: 46 | continue 47 | if args.whitelist and filename not in args.whitelist: 48 | continue 49 | if check_exclude_files(args.dataset, package, comp, opt, filename): 50 | continue 51 | 52 | bin_dir = os.path.join(input_base, 'bin') 53 | out_dir = os.path.join(output_base, filename) 54 | tasks.append(ExpTask(args.dataset, bin_dir, out_dir, prefix, filename)) 55 | 56 | return tasks 57 | 58 | ################################ 59 | 60 | def run_task(task): 61 | bin_path = os.path.join(task.input_dir, task.bin_name) 62 | 63 | b2r2_func_path = os.path.join(task.output_dir, 'super', 'b2r2_meta.json') 64 | if not os.path.exists(b2r2_func_path): 65 | return 66 | 67 | stat_dir = os.path.join('stat', 'bbl', task.dataset) 68 | os.system('mkdir -p %s' % stat_dir) 69 | out_path = os.path.join(stat_dir, task.prefix + '_' + task.bin_name) 70 | if os.path.exists(out_path): 71 | return 72 | print(out_path) 73 | 74 | sym = SuperSymbolizer(bin_path, b2r2_func_path, 0, 'intel') 75 | sym.symbolize(True) 76 | sym.report_statistics(out_path) 77 | 78 | def run_package(args, package): 79 | tasks = prepare_tasks(args, package) 80 | p = multiprocessing.Pool(args.core) 81 | p.map(run_task, tasks) 82 | 83 | def run(args): 84 | if args.package: 85 | run_package(args, args.package) 86 | else: 87 | for package in PACKAGES: 88 | run_package(args, package) 89 | 90 | if __name__ == '__main__': 91 | args = parse_arguments() 92 | run(args) 93 | -------------------------------------------------------------------------------- /artifact/4_get_code_size.py: -------------------------------------------------------------------------------- 1 | import argparse, glob, multiprocessing, os 2 | from collections import namedtuple 3 | from filter_utils import check_exclude_files 4 | from consts import * 5 | 6 | ExpTask = namedtuple('ExpTask', ['dataset', 'input_dir', 'output_dir', 'prefix', 'bin_name']) 7 | 8 | def parse_arguments(): 9 | parser = argparse.ArgumentParser(description='manager') 10 | parser.add_argument('dataset', type=str, default='setA', help='Select dataset (setA, setC)') 11 | parser.add_argument('--input_dir', type=str, default='benchmark') 12 | parser.add_argument('--output_dir', type=str, default='output') 13 | parser.add_argument('--package', type=str, help='Select package (coreutils-9.1, binutils-2.40, spec_cpu2017, spec_cpu2006)') 14 | parser.add_argument('--core', type=int, default=1, help='Number of cores to use') 15 | parser.add_argument('--blacklist', nargs='+') 16 | parser.add_argument('--whitelist', nargs='+') 17 | args = parser.parse_args() 18 | 19 | # Sanitizing arguments 20 | assert args.dataset in ['setA', 'setC'], 'Invalid dataset: "%s"'%(args.dataset) 21 | if args.package: 22 | assert args.package in PACKAGES, 'Invalid package: "%s"'%(args.package) 23 | 24 | return args 25 | 26 | ################################ 27 | 28 | def prepare_tasks(args, package): 29 | tasks = [] 30 | for comp in COMPILERS: 31 | for opt in OPTIMIZATIONS: 32 | for lopt in LINKERS: 33 | input_base = os.path.join(args.input_dir, args.dataset, package, comp, '%s_%s' % (opt, lopt)) 34 | output_base = os.path.join(args.output_dir, args.dataset, package, comp, '%s_%s' % (opt, lopt)) 35 | strip_dir = os.path.join(input_base, 'bin', '*') 36 | prefix = '_'.join([package, comp, opt, lopt]) 37 | 38 | for target in glob.glob(strip_dir): 39 | filename = os.path.basename(target) 40 | 41 | # Filter binaries 42 | if args.blacklist and filename in args.blacklist: 43 | continue 44 | if args.whitelist and filename not in args.whitelist: 45 | continue 46 | if check_exclude_files(args.dataset, package, comp, opt, filename): 47 | continue 48 | 49 | bin_dir = os.path.join(input_base, 'bin') 50 | out_dir = os.path.join(output_base, filename) 51 | tasks.append(ExpTask(args.dataset, bin_dir, out_dir, prefix, filename)) 52 | 53 | return tasks 54 | 55 | ################################ 56 | 57 | def run_task(task): 58 | bin_path = os.path.join(task.input_dir, task.bin_name) 59 | 60 | b2r2_func_path = os.path.join(task.output_dir, 'super', 'tmp_%s' % task.bin_name) 61 | 62 | stat_dir = os.path.join('stat', 'size', task.dataset) 63 | os.system('mkdir -p %s' % stat_dir) 64 | out_path = os.path.join(stat_dir, task.prefix + '_' + task.bin_name) 65 | if os.path.exists(out_path): 66 | return 67 | print(out_path) 68 | 69 | os.system("objdump -d %s -j .text --no-show-raw-insn | grep '^ \s*[0-9a-f]' | grep -v 'xor %%eax,%%eax' | grep -v 'xor %%eax,%%eax' | grep -v 'data16 nop' | grep -v '\snop' | wc -l > %s" % (bin_path, out_path)) 70 | os.system("objdump -d %s -j .text --no-show-raw-insn | grep '^ \s*[0-9a-f]' | grep -v 'xor %%eax,%%eax' | grep -v 'xor %%eax,%%eax' | grep -v 'data16 nop' | grep -v '\snop' | wc -l >> %s" % (b2r2_func_path, out_path)) 71 | 72 | def run_package(args, package): 73 | tasks = prepare_tasks(args, package) 74 | p = multiprocessing.Pool(args.core) 75 | p.map(run_task, tasks) 76 | 77 | def run(args): 78 | if args.package: 79 | run_package(args, args.package) 80 | else: 81 | for package in PACKAGES: 82 | run_package(args, package) 83 | 84 | if __name__ == '__main__': 85 | args = parse_arguments() 86 | run(args) 87 | -------------------------------------------------------------------------------- /artifact/4_get_table_size.py: -------------------------------------------------------------------------------- 1 | import argparse, glob, multiprocessing, os, sys 2 | from collections import namedtuple 3 | from filter_utils import check_exclude_files 4 | from consts import * 5 | 6 | ExpTask = namedtuple('ExpTask', ['dataset', 'output_dir', 'gt_dir', 'prefix', 'bin_name']) 7 | 8 | def parse_arguments(): 9 | parser = argparse.ArgumentParser(description='manager') 10 | parser.add_argument('dataset', type=str, default='setA', help='Select dataset (setA, setC)') 11 | parser.add_argument('--input_dir', type=str, default='benchmark') 12 | parser.add_argument('--output_dir', type=str, default='output') 13 | parser.add_argument('--package', type=str, help='Select package (coreutils-9.1, binutils-2.40, spec_cpu2017, spec_cpu2006)') 14 | parser.add_argument('--core', type=int, default=1, help='Number of cores to use') 15 | parser.add_argument('--target', type=str) 16 | parser.add_argument('--blacklist', nargs='+') 17 | parser.add_argument('--whitelist', nargs='+') 18 | args = parser.parse_args() 19 | 20 | # Sanitizing arguments 21 | assert args.dataset in ['setA', 'setC'], 'Invalid dataset: "%s"'%(args.dataset) 22 | if args.package: 23 | assert args.package in PACKAGES, 'Invalid package: "%s"'%(args.package) 24 | 25 | return args 26 | 27 | ################################ 28 | 29 | def prepare_tasks(args, package): 30 | tasks = [] 31 | for comp in COMPILERS: 32 | for opt in OPTIMIZATIONS: 33 | for lopt in LINKERS: 34 | input_base = os.path.join(args.input_dir, args.dataset, package, comp, '%s_%s' % (opt, lopt)) 35 | output_base = os.path.join(args.output_dir, args.dataset, package, comp, '%s_%s' % (opt, lopt)) 36 | gt_base = os.path.join('gt', args.dataset, package, comp, '%s_%s' % (opt, lopt)) 37 | strip_dir = os.path.join(input_base, 'bin', '*') 38 | prefix = '_'.join([package, comp, opt, lopt]) 39 | 40 | for target in glob.glob(strip_dir): 41 | filename = os.path.basename(target) 42 | 43 | # Filter binaries 44 | if args.blacklist and filename in args.blacklist: 45 | continue 46 | if args.whitelist and filename not in args.whitelist: 47 | continue 48 | if check_exclude_files(args.dataset, package, comp, opt, filename): 49 | continue 50 | 51 | out_dir = os.path.join(output_base, filename) 52 | gt_dir = os.path.join(gt_base, filename) 53 | tasks.append(ExpTask(args.dataset, out_dir, gt_dir, prefix, filename)) 54 | 55 | return tasks 56 | 57 | ################################ 58 | 59 | def job(task): 60 | b2r2_func_path = os.path.join(task.output_dir, 'super', 'b2r2_meta.json') 61 | if not os.path.exists(b2r2_func_path): 62 | return 63 | 64 | gt_func_path = os.path.join(task.gt_dir, 'norm_db', 'func.json') 65 | if not os.path.exists(gt_func_path): 66 | #print(' [-] %s does not exist'%(gt_func_path)) 67 | return 68 | 69 | stat_dir = os.path.join('stat', 'table', task.dataset) 70 | os.system('mkdir -p %s' % stat_dir) 71 | out_path = os.path.join(stat_dir, task.prefix + '_' + task.bin_name) 72 | if os.path.exists(out_path): 73 | return 74 | print(out_path) 75 | 76 | sys.stdout.flush() 77 | os.system("python3 table_size.py %s %s > %s" % (gt_func_path, b2r2_func_path, out_path)) 78 | 79 | def run_package(args, package): 80 | tasks = prepare_tasks(args, package) 81 | p = multiprocessing.Pool(args.core) 82 | p.map(job, tasks) 83 | 84 | def run(args): 85 | if args.package: 86 | run_package(args, args.package) 87 | else: 88 | for package in PACKAGES: 89 | run_package(args, package) 90 | 91 | if __name__ == '__main__': 92 | args = parse_arguments() 93 | run(args) 94 | -------------------------------------------------------------------------------- /artifact/4_print_br_overhead.py: -------------------------------------------------------------------------------- 1 | import argparse, glob, re, os 2 | from consts import * 3 | 4 | def parse_arguments(): 5 | parser = argparse.ArgumentParser(description='counter') 6 | parser.add_argument('dataset', type=str, default='setA', help='Select dataset (setA, setC)') 7 | args = parser.parse_args() 8 | 9 | # Sanitizing arguments 10 | assert args.dataset in ['setA', 'setC'], 'Invalid dataset: "%s"'%(args.dataset) 11 | 12 | return args 13 | 14 | ################################ 15 | 16 | def is_valid_data(br1): 17 | if br1 == 0: 18 | return False 19 | return True 20 | 21 | def read_branch_data(filepath): 22 | with open(filepath) as f: 23 | data = f.read() 24 | (br1, br2) = re.findall('Indirect Branch Sites (.*) \((.*)\)', data.split('\n')[1])[0] 25 | br1 = int(br1) 26 | br2 = int(br2) 27 | if not is_valid_data(br1): 28 | return None 29 | 30 | return br2 / br1 31 | 32 | # Collect data generated by 4_get_br_overhead.py. 33 | def collect(args): 34 | data = {} 35 | for package in PACKAGES: 36 | stat_files = os.path.join('stat', 'bbl', args.dataset, '%s_*' % package) 37 | for filepath in glob.glob(stat_files): 38 | overhead = read_branch_data(filepath) 39 | if overhead is None: 40 | continue 41 | 42 | if package not in data: 43 | data[package] = 0, 0 44 | 45 | num_bins, sum_overhead = data[package] 46 | num_bins += 1 47 | sum_overhead += overhead 48 | data[package] = num_bins, sum_overhead 49 | 50 | return data 51 | 52 | ################################ 53 | 54 | # Report the percentage of average branch count overheads for Section 4.3.1 of our 55 | # paper. 56 | def report(data): 57 | print(FMT_BRANCH_HEADER) 58 | 59 | total_num_bins = 0 60 | total_overhead = 0.0 61 | for package in PACKAGES: 62 | if package not in data: 63 | continue 64 | 65 | num_bins, overhead = data[package] 66 | total_num_bins += num_bins 67 | total_overhead += overhead 68 | if num_bins > 0: 69 | avg_overhead = overhead / num_bins * 100 70 | print(FMT_OVERHEAD % (package, num_bins, avg_overhead)) # Report individual data per package 71 | 72 | if total_num_bins > 0: 73 | total_avg_overhead = total_overhead / total_num_bins * 100 74 | print(FMT_OVERHEAD % ('[+]All', total_num_bins, total_avg_overhead)) # Report overall data 75 | 76 | if __name__ == '__main__': 77 | args = parse_arguments() 78 | data = collect(args) 79 | report(data) 80 | -------------------------------------------------------------------------------- /artifact/4_print_code_size_overhead.py: -------------------------------------------------------------------------------- 1 | import argparse, glob, os 2 | from consts import * 3 | 4 | def parse_arguments(): 5 | parser = argparse.ArgumentParser(description='counter') 6 | parser.add_argument('dataset', type=str, default='setA', help='Select dataset (setA, setC)') 7 | args = parser.parse_args() 8 | 9 | # Sanitizing arguments 10 | assert args.dataset in ['setA', 'setC'], 'Invalid dataset: "%s"'%(args.dataset) 11 | 12 | return args 13 | 14 | ################################ 15 | 16 | def is_valid_data(data): 17 | if len(data.split()) != 2: 18 | return False 19 | return True 20 | 21 | def read_code_size_data(filepath): 22 | with open(filepath) as fd: 23 | data = fd.read() 24 | if not is_valid_data(data): 25 | return None 26 | 27 | old_size, new_size = data.split() 28 | old_size = int(old_size) 29 | new_size = int(new_size) 30 | return (new_size - old_size) / old_size 31 | 32 | # Collect data generated by 4_get_code_size_overhead.py. 33 | def collect(args): 34 | data = {} 35 | for package in PACKAGES: 36 | stat_files = os.path.join('stat', 'size', args.dataset, '%s_*' % package) 37 | for filepath in glob.glob(stat_files): 38 | overhead = read_code_size_data(filepath) 39 | if overhead is None: 40 | continue 41 | 42 | if package not in data: 43 | data[package] = 0, 0 44 | 45 | num_bins, sum_overhead = data[package] 46 | num_bins += 1 47 | sum_overhead += overhead 48 | data[package] = num_bins, sum_overhead 49 | 50 | return data 51 | 52 | ################################ 53 | 54 | # Report the percentage of average code size overheads for Section 4.3.1 of our 55 | # paper. 56 | def report(data): 57 | print(FMT_CODE_HEADER) 58 | 59 | total_num_bins = 0 60 | total_overhead = 0.0 61 | for package in PACKAGES: 62 | if package not in data: 63 | continue 64 | 65 | num_bins, overhead = data[package] 66 | total_num_bins += num_bins 67 | total_overhead += overhead 68 | if num_bins > 0: 69 | avg_overhead = overhead / num_bins * 100 70 | print(FMT_OVERHEAD % (package, num_bins, avg_overhead)) # Report individual data per package 71 | 72 | if total_num_bins > 0: 73 | total_avg_overhead = total_overhead / total_num_bins * 100 74 | print(FMT_OVERHEAD % ('[+]All', total_num_bins, total_avg_overhead)) # Report overall data 75 | 76 | if __name__ == '__main__': 77 | args = parse_arguments() 78 | data = collect(args) 79 | report(data) 80 | -------------------------------------------------------------------------------- /artifact/4_print_suri_overhead.py: -------------------------------------------------------------------------------- 1 | import argparse, glob, os 2 | from collections import namedtuple 3 | from consts import * 4 | 5 | ExpTask = namedtuple('ExpTask', ['dataset', 'log_dir', 'bin_name']) 6 | 7 | def parse_arguments(): 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument('dataset', type=str, default='setA', help='Select dataset (setA)') 10 | args = parser.parse_args() 11 | 12 | # Sanitizing arguments 13 | assert args.dataset in ['setA'], 'Invalid dataset: "%s"'%(args.dataset) 14 | 15 | return args 16 | 17 | ################################ 18 | 19 | def prepare_tasks(args, package): 20 | tasks = [] 21 | for comp in COMPILERS: 22 | for opt in OPTIMIZATIONS: 23 | for lopt in LINKERS: 24 | data_dir = os.path.join(args.dataset, package, comp, '%s_%s' % (opt, lopt)) 25 | log_dir = os.path.join('stat', 'suri_runtime', args.dataset, package, comp, '%s_%s' % (opt, lopt)) 26 | if os.path.exists(data_dir): 27 | orig_dir = os.path.join(data_dir, 'original', '*') 28 | for target in glob.glob(orig_dir): 29 | filename = os.path.basename(target) 30 | tasks.append(ExpTask(args.dataset, log_dir, filename)) 31 | 32 | return tasks 33 | 34 | ################################ 35 | 36 | def is_valid_data(line): 37 | if 'seconds' not in line: 38 | return False 39 | # FIXME: maybe it should be 'total'? 40 | if line.split(';')[1].split()[1] not in ['total', 'seconds']: 41 | return False 42 | return True 43 | 44 | def read_time_data(filepath): 45 | with open(filepath) as f: 46 | line = f.read().split('\n')[-2] 47 | if not is_valid_data(line): 48 | return None 49 | 50 | time = line.split(';')[1].split()[0] 51 | return int(time) 52 | 53 | def get_data(task, package, tool_name): 54 | log_path = os.path.join(task.log_dir, tool_name, '%s.txt' % task.bin_name) 55 | return read_time_data(log_path) 56 | 57 | # Collect data generated by 4_get_suri_overhead.py. 58 | def collect(args): 59 | data = {} 60 | for package in PACKAGES_SPEC: 61 | tasks = prepare_tasks(args, package) 62 | 63 | num_bins = 0 64 | overhead = 0.0 65 | for task in tasks: 66 | d_original = get_data(task, package, 'original') 67 | d_suri = get_data(task, package, 'suri') 68 | if d_original is None or d_suri is None: 69 | continue 70 | 71 | num_bins += 1 72 | overhead += (d_suri - d_original) / d_original 73 | 74 | data[package] = num_bins, overhead 75 | 76 | return data 77 | 78 | ################################ 79 | 80 | # Report the percentage of average runtime overheads on the overall dataset. 81 | # This will report the number mentioned in Section 1 (0.2% on average) of our 82 | # paper. 83 | def report(data): 84 | print(FMT_SURI_HEADER % ('', 'suri')) 85 | print(FMT_LINE) 86 | 87 | total_num_bins = 0 88 | total_overhead = 0.0 89 | for package in PACKAGES_SPEC: 90 | if package not in data: 91 | continue 92 | 93 | num_bins, overhead = data[package] 94 | total_num_bins += num_bins 95 | total_overhead += overhead 96 | if num_bins > 0: 97 | avg_overhead = overhead / num_bins * 100 98 | print(FMT_SURI % (package, num_bins, avg_overhead)) # Report individual data per package 99 | 100 | if total_num_bins > 0: 101 | print(FMT_LINE) 102 | total_avg_overhead = total_overhead / total_num_bins * 100 103 | print(FMT_SURI % ('Total', total_num_bins, total_avg_overhead)) # Report overall data 104 | 105 | if __name__ == '__main__': 106 | args = parse_arguments() 107 | data = collect(args) 108 | report(data) 109 | -------------------------------------------------------------------------------- /artifact/4_print_table_overhead.py: -------------------------------------------------------------------------------- 1 | import argparse, glob, os 2 | from consts import * 3 | 4 | def parse_arguments(): 5 | parser = argparse.ArgumentParser(description='counter') 6 | parser.add_argument('dataset', type=str, default='setA', help='Select dataset (setA, setC)') 7 | args = parser.parse_args() 8 | 9 | # Sanitizing arguments 10 | assert args.dataset in ['setA', 'setC'], 'Invalid dataset: "%s"'%(args.dataset) 11 | 12 | return args 13 | 14 | ################################ 15 | 16 | def is_valid_data(lines): 17 | if len(lines) < 7: 18 | return False 19 | elif 'Size Overhead:' not in lines[-7]: 20 | return False 21 | return True 22 | 23 | def read_table_data(filepath): 24 | with open(filepath) as f: 25 | data = f.read() 26 | lines = data.split('\n') 27 | if not is_valid_data(lines): 28 | return None 29 | 30 | entries_gt = int(lines[-5].split()[-1]) 31 | entries_suri = int(lines[-4].split()[-1]) 32 | if entries_gt > 0: 33 | return (entries_suri - entries_gt) / entries_gt 34 | else: 35 | return None # We do not count the cases where there are no jump tables. 36 | 37 | # Collect data generated by 4_get_table_overhead.py. 38 | def collect(args): 39 | data = {} 40 | for package in PACKAGES: 41 | stat_files = os.path.join('stat', 'table', args.dataset, '%s_*' % package) 42 | for filepath in glob.glob(stat_files): 43 | overhead = read_table_data(filepath) 44 | if overhead is None: 45 | continue 46 | 47 | if package not in data: 48 | data[package] = 0, 0 49 | 50 | num_bins, sum_overhead = data[package] 51 | num_bins += 1 52 | sum_overhead += overhead 53 | data[package] = num_bins, sum_overhead 54 | 55 | return data 56 | 57 | ################################ 58 | 59 | # Report the percentage of average table size overheads for Section 4.3.1 of our 60 | # paper. 61 | def report(data): 62 | print(FMT_TABLE_HEADER) 63 | 64 | total_num_bins = 0 65 | total_overhead = 0.0 66 | for package in PACKAGES: 67 | if package not in data: 68 | continue 69 | 70 | num_bins, overhead = data[package] 71 | total_num_bins += num_bins 72 | total_overhead += overhead 73 | if num_bins > 0: 74 | avg_overhead = overhead / num_bins * 100 75 | print(FMT_OVERHEAD % (package, num_bins, avg_overhead)) # Report individual data per package 76 | 77 | if total_num_bins > 0: 78 | total_avg_overhead = total_overhead / total_num_bins * 100 79 | print(FMT_OVERHEAD % ('[+]All', total_num_bins, total_avg_overhead)) # Report overall data 80 | 81 | if __name__ == '__main__': 82 | args = parse_arguments() 83 | data = collect(args) 84 | report(data) 85 | -------------------------------------------------------------------------------- /artifact/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM suri:v1.0 2 | 3 | # Install additional dependencies 4 | RUN apt -y install time flex texinfo bison dejagnu 5 | 6 | RUN wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \ 7 | add-apt-repository 'deb http://apt.llvm.org/focal/ llvm-toolchain-focal-13 main' && \ 8 | apt update && \ 9 | apt install -y clang-13 libomp-dev && \ 10 | apt install -y libomp-13-dev 11 | 12 | RUN apt -y install php-cli php-xml 13 | 14 | # Install Reassessor 15 | RUN cd /project && \ 16 | git clone https://github.com/SoftSec-KAIST/Reassessor.git && \ 17 | cd /project/Reassessor && \ 18 | git checkout suri && \ 19 | pip install -r requirements.txt && \ 20 | python3 setup.py install 21 | 22 | # Install RetroWrite 23 | RUN git clone https://github.com/witbring/retrowrite_ef4e541.git /project/retrowrite && \ 24 | cd /project/retrowrite && \ 25 | pip3 install -r requirements.txt 26 | 27 | # Build Coreutils for test suite 28 | RUN wget https://ftp.gnu.org/gnu/coreutils/coreutils-9.1.tar.gz && \ 29 | tar -xzf coreutils-9.1.tar.gz && \ 30 | cd /coreutils-9.1 && \ 31 | FORCE_UNSAFE_CONFIGURE=1 ./configure && \ 32 | make 33 | 34 | COPY ./build_script/script/coreutils_copy.sh /coreutils-9.1/copy.sh 35 | COPY ./build_script/script/coreutils-9.1_list.txt /coreutils-9.1/coreutils-9.1_list.txt 36 | 37 | # Build Binutils for test suite 38 | RUN wget https://ftp.gnu.org/gnu/binutils/binutils-2.40.tar.gz && \ 39 | tar -xzf binutils-2.40.tar.gz && \ 40 | cd /binutils-2.40 && \ 41 | ./configure && \ 42 | make 43 | 44 | COPY ./build_script/script/binutils_copy.sh /binutils-2.40/copy.sh 45 | COPY ./build_script/script/binutils-2.40_list.txt /binutils-2.40/binutils-2.40_list.txt 46 | 47 | # Install phoronix test suites 48 | RUN git clone https://github.com/phoronix-test-suite/phoronix-test-suite.git && \ 49 | cd /phoronix-test-suite && \ 50 | ./install-sh && \ 51 | phoronix-test-suite install sqlite && \ 52 | phoronix-test-suite intsall nginx && \ 53 | phoronix-test-suite install apache && \ 54 | phoronix-test-suite install compress-7zip && \ 55 | phoronix-test-suite install mysqlslap 56 | -------------------------------------------------------------------------------- /artifact/README.md: -------------------------------------------------------------------------------- 1 | # SURI Artifact 2 | 3 | This artifact is intended to reproduce the experimental results presented in 4 | our paper, "Towards Sound Reassembly of Modern x86-64 Binaries", published at 5 | ASPLOS '25. It contains scripts for running experiments and datasets we used. 6 | 7 | ## Overview 8 | 9 | ### Experiments 10 | 11 | This artifact will answer all three research questions from our paper: 12 | - RQ1: How well does SURI compare to the state-of-the-art reassembly tools in 13 | terms of reliability? 14 | - RQ2: How big is the performance overhead introduced by SURI for rewritten 15 | binaries? 16 | - RQ3: Is SURI applicable to real-world scenarios, such as runtime memory 17 | sanitization? 18 | 19 | To answer these questions, we conducted total 5 experiments as follows: 20 | - Exp1: Reassembly completion comparison (RQ1) 21 | - Exp2: Test suite pass rate comparison (RQ1) 22 | - Exp3: Reliability test on real-world programs (RQ1) 23 | - Exp4: Reassembly overhead measurement (RQ2) 24 | - Exp5: Application of SURI (RQ3) 25 | 26 | ### Comparison Targets 27 | 28 | We have three comparison targets for the comparative study of SURI. 29 | - [Ddisasm](https://github.com/GrammaTech/ddisasm): a binary reassembler based 30 | on datalog disassembly (USENIX Security '20) 31 | - [Egalito](https://github.com/columbia/egalito): a binary recompiler based on 32 | layout-agnostifc binary recompilation (ASPLOS '20) 33 | - BASan: a binary-only address sanitizer implemented on top of 34 | [RetroWrite](https://github.com/HexHive/retrowrite) (S&P '20) 35 | 36 | This table is a brief summary of each tool: 37 | | Tool | Running Env. | Exp1 | Exp2 | Exp3 | Exp4 | Exp5 | 38 | | ------- | ------------ | ---- | ---- | ---- | ---- | ---- | 39 | | Ddisasm | Ubuntu 20.04 | :o: | :o: | | :o: | | 40 | | Egalito | Ubuntu 18.04 | :o: | :o: | | :o: | | 41 | | BASan | Ubuntu 20.04 | | | | | :o: | 42 | 43 | ### Dataset 44 | 45 | We used 5 different kinds of benchmark programs to evaluate SURI: 46 | - Coreutils v9.1 47 | - Binutils v2.40 48 | - SPEC CPU 2006 v1.2 and 2017 v1.1.5 49 | - 10 real-world programs 50 | - Apache v2.4.56 51 | - MariaDB v11.5.0 52 | - Nginx v1.23.3 53 | - SQLitev 3.31.2 54 | - 7-Zip-24.05 55 | - Epiphany-3.36.4 56 | - Filezilla v3.46.3 57 | - Openssh v8.2p1 58 | - Putty v0.73 59 | - Vim v8.1 60 | - Juliet Test Suite v1.3 61 | 62 | Coreutils, Binutils, and SPEC are used for Exp1, Exp2, and Exp4, real-world 63 | programs are used for Exp3, and Juliet Test Suite is used for Exp5. 64 | 65 | For Coreutils, Binutils and SPEC benchmarks, we further make three different 66 | datasets: 67 | - setA: binaries compiled on Ubuntu 20.04 (SURI vs. Ddisasm) 68 | - setB: binaries compiled on Ubuntu 18.04 (SURI vs. Egalito) 69 | - setC: binaries compiled on Ubuntu 20.04 w/o call frame information (ablation 70 | study - see Section 4.3.3 of the paper) 71 | 72 | Below table sumarizes our datasets: 73 | | Dataset | Language | Exp1 | Exp2 | Exp3 | Exp4 | Exp5 | 74 | | ---------- | -------------- | ---- | ---- | ---- | ---- | ---- | 75 | | setA | C/C++, Fortran | :o: | :o: | | :o: | | 76 | | setB | C | :o: | :o: | | :o: | | 77 | | setC | C/C++, Fortran | :o: | :o: | | :o: | | 78 | | Real-world | C/C++ | | | :o: | | | 79 | | Juliet | C/C++ | | | | | :o: | 80 | 81 | :warning: We exclude SPEC benchmark binaries from our dataset because they are 82 | proprietary. However, we prepare benchmark building scripts for SPEC 83 | benchmarks, in case you have a valid license of SPEC CPU 2006 or 2017. Our 84 | experimental scripts will work well regardless of the existence of SPEC 85 | binaries, though. 86 | 87 | ## Links 88 | 89 | These are the links that explain how to set up our artifact and how to run the 90 | experiments. 91 | - [Preparation](PREPARATION.md) 92 | - [Experiment](EXPERIMENT.md) 93 | -------------------------------------------------------------------------------- /artifact/Reassessor/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | #lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | *.pyc 131 | *.swp 132 | temp* 133 | 134 | nohup.out 135 | triage/ 136 | -------------------------------------------------------------------------------- /artifact/Reassessor/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | RUN apt-get update && apt-get upgrade -y && \ 4 | apt-get install -y git python3-setuptools python3-pip 5 | 6 | RUN git clone https://github.com/SoftSec-KAIST/Reassessor.git 7 | 8 | RUN cd Reassessor; \ 9 | pip3 install -r requirements.txt; \ 10 | python3 setup.py install;\ 11 | cd - 12 | 13 | 14 | -------------------------------------------------------------------------------- /artifact/Reassessor/LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2022 SoftSec Lab, KAIST 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 7 | of the Software, and to permit persons to whom the Software is furnished to do 8 | so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /artifact/Reassessor/example/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all 2 | all: hello 3 | 4 | # This rule tells make how to build hello from hello.cpp 5 | hello: src/hello.c 6 | clang -o src/hello src/hello.c -save-temps=obj -g -pie -fPIE 7 | mkdir -p bin/ 8 | mv src/hello bin/ 9 | mkdir -p asm/ 10 | mv src/*.s asm/ 11 | 12 | # This rule tells make to delete hello and hello.o 13 | .PHONY: clean 14 | clean: 15 | rm src/hello *.bc *.i *.o *.s 16 | -------------------------------------------------------------------------------- /artifact/Reassessor/example/src/hello.c: -------------------------------------------------------------------------------- 1 | #include 2 | int output; 3 | const int bar[]={-0xd0,-0xe0,-0xf0,-0x100}; 4 | 5 | void foo(int input){ 6 | switch(input){ 7 | case 0: output = bar[0]; break; 8 | case 1: output = bar[1]; break; 9 | case 2: output = bar[2]; break; 10 | case 3: output = bar[3]; break; 11 | default: break; 12 | } 13 | } 14 | int main() 15 | { 16 | int i; 17 | foo(i); 18 | for (i=0; i < 4; ++i){ 19 | foo(i); 20 | printf("Hello World [In:%d, Out:%d]\n", i, output); 21 | } 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /artifact/Reassessor/reassessor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SoftSec-KAIST/SURI/4f66872a9597f0c18cd3ff45516ac25d600b4576/artifact/Reassessor/reassessor/__init__.py -------------------------------------------------------------------------------- /artifact/Reassessor/reassessor/differ/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SoftSec-KAIST/SURI/4f66872a9597f0c18cd3ff45516ac25d600b4576/artifact/Reassessor/reassessor/differ/__init__.py -------------------------------------------------------------------------------- /artifact/Reassessor/reassessor/differ/diff.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | from reassessor.normalizer import retro, ramblr, ddisasm, gt 4 | from .statistics import Statistics 5 | from .ereport import Report 6 | 7 | 8 | def diff(bin_path, pickle_gt_path, pickle_tool_dict, save_dir, error_check=True, disasm_check=True, reset=False): 9 | 10 | 11 | # Load GT 12 | if not os.path.exists(pickle_gt_path): 13 | print('No gt ' + pickle_gt_path) 14 | return 15 | 16 | pickle_gt_f = open(pickle_gt_path, 'rb') 17 | prog_c = pickle.load(pickle_gt_f) 18 | stat = Statistics(prog_c) 19 | 20 | for tool, pickle_tool_path in pickle_tool_dict.items(): 21 | pickle_tool_f = open(pickle_tool_path, 'rb') 22 | prog_r = pickle.load(pickle_tool_f) 23 | 24 | if error_check: 25 | sym_diff_file_path = '%s/%s/sym_diff.txt'%(save_dir,tool) 26 | error_json_file_path = '%s/%s/sym_errors.json'%(save_dir, tool) 27 | error_pickle_file_path = '%s/%s/sym_errors.dat'%(save_dir, tool) 28 | 29 | if os.path.exists(sym_diff_file_path) and not reset: 30 | pass 31 | else: 32 | report = Report(bin_path, prog_c) 33 | report.compare(prog_r) 34 | report.save_file(sym_diff_file_path) 35 | report.save_file(error_json_file_path, option='json') 36 | report.save_pickle(error_pickle_file_path) 37 | 38 | if disasm_check: 39 | disasm_file_path = '%s/%s/disasm_diff.txt'%(save_dir, tool) 40 | if os.path.exists(disasm_file_path) and not reset: 41 | pass 42 | else: 43 | stat.count_disasm(prog_r, disasm_file_path) 44 | 45 | pickle_tool_f.close() 46 | 47 | pickle_gt_f.close() 48 | 49 | 50 | import argparse 51 | 52 | if __name__ == '__main__': 53 | parser = argparse.ArgumentParser(description='differ') 54 | parser.add_argument('bin_path', type=str) 55 | parser.add_argument('pickle_gt_path', type=str) 56 | parser.add_argument('save_dir', type=str) 57 | parser.add_argument('--ddisasm', type=str) 58 | parser.add_argument('--ramblr', type=str) 59 | parser.add_argument('--retro', type=str) 60 | parser.add_argument('--error', action='store_true') 61 | parser.add_argument('--disasm', action='store_true') 62 | args = parser.parse_args() 63 | 64 | pickle_tool_dict = dict() 65 | if args.ddisasm: 66 | pickle_tool_dict['ddisasm'] = args.ddisasm 67 | if args.ramblr: 68 | pickle_tool_dict['ramblr'] = args.ramblr 69 | if args.retro: 70 | pickle_tool_dict['retrowrite'] = args.retro 71 | 72 | if pickle_tool_dict: 73 | if args.error and not args.disasm: 74 | diff(args.bin_path, args.pickle_gt_path, pickle_tool_dict, args.save_dir, error_check=True, disasm_check=False, reset=True ) 75 | elif not args.error and args.disasm: 76 | diff(args.bin_path, args.pickle_gt_path, pickle_tool_dict, args.save_dir, error_check=False, disasm_check=True, reset=True ) 77 | else: 78 | diff(args.bin_path, args.pickle_gt_path, pickle_tool_dict, args.save_dir, reset=True) 79 | 80 | 81 | -------------------------------------------------------------------------------- /artifact/Reassessor/reassessor/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SoftSec-KAIST/SURI/4f66872a9597f0c18cd3ff45516ac25d600b4576/artifact/Reassessor/reassessor/lib/__init__.py -------------------------------------------------------------------------------- /artifact/Reassessor/reassessor/normalizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SoftSec-KAIST/SURI/4f66872a9597f0c18cd3ff45516ac25d600b4576/artifact/Reassessor/reassessor/normalizer/__init__.py -------------------------------------------------------------------------------- /artifact/Reassessor/requirements.txt: -------------------------------------------------------------------------------- 1 | capstone 2 | pyelftools>=0.29 3 | bitstring 4 | numpy 5 | -------------------------------------------------------------------------------- /artifact/Reassessor/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='reassessor', 5 | version='1.0.0', 6 | author='Hyungseok Kim', 7 | description='Reassessor', 8 | packages=['reassessor', 'reassessor.lib', 'reassessor.normalizer', 'reassessor.differ'], 9 | ) 10 | -------------------------------------------------------------------------------- /artifact/application/rewrite_juliet.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import glob 3 | import multiprocessing 4 | from collections import namedtuple 5 | 6 | CWES = [124, 126, 127, 122, 121, 129] 7 | 8 | failures = set() 9 | 10 | def run_docker(cmd): 11 | pwd = os.getcwd() 12 | dock_cmd = 'docker run --rm -v %s:/input -v %s:/output suri_artifact:v1.0 sh -c " %s"'%(pwd, pwd, cmd) 13 | os.system(dock_cmd) 14 | 15 | 16 | ConfRetro = namedtuple('ConfRetro', ['file_name', 'asm_file', 'bin_file']) 17 | 18 | def make_retro(args): 19 | target, asm_file, bin_file = args 20 | if not os.path.exists(asm_file): 21 | run_docker('python3 /project/retrowrite/retrowrite --asan /input/%s /output/%s'%(target, asm_file)) 22 | 23 | if not os.path.exists(bin_file): 24 | os.system('g++ %s -lasan -lpthread -o %s'%(asm_file, bin_file)) 25 | 26 | print(bin_file) 27 | 28 | def build_retro(core): 29 | 30 | conf_list = [] 31 | for target in glob.glob('bin_original/CWE*/*/CWE*'): 32 | dir_name = os.path.dirname(target).replace('bin_original', 'bin_retro') 33 | base_name = os.path.basename(target) 34 | if not base_name.endswith('.bin'): 35 | continue 36 | 37 | if not os.path.exists(dir_name): 38 | os.makedirs(dir_name) 39 | 40 | name = base_name[:-4] 41 | asm_file = os.path.join(dir_name, name + '.s') 42 | bin_file = os.path.join(dir_name, base_name) 43 | conf_list.append(ConfRetro(target, asm_file, bin_file)) 44 | 45 | if core and core > 1: 46 | p = multiprocessing.Pool(core) 47 | p.map(make_retro, conf_list) 48 | else: 49 | for conf in conf_list: 50 | make_retro(conf) 51 | 52 | ConfSURI = namedtuple('ConfSURI', ['target', 'b2r2_meta', 'b2r2_asan', 'asm_file', 'dirname']) 53 | 54 | def make_suri(args): 55 | 56 | target, b2r2_meta, b2r2_asan, asm_file, dirname = args 57 | 58 | cmd = 'python3 /project/SURI/suri.py /input/%s --ofolder /output/%s --asan --without-compile --with-stack-poisoning'%(target, dirname) 59 | run_docker(cmd) 60 | 61 | src_dir = os.path.dirname(target) 62 | target_name = target.split('/')[-1] 63 | tmp_name = 'tmp_' + target_name 64 | new_name = 'my_' + target_name 65 | 66 | cmd_list = [] 67 | pwd = os.getcwd() 68 | cmd_list.append('python3 ../../emitter.py %s %s --ofolder %s --asan'%(target, asm_file, dirname)) 69 | cmd_list.append('mv %s/%s %s/%s'%(dirname, new_name, dirname, target_name )) 70 | cmd_list.append('rm %s/%s'%(dirname, tmp_name)) 71 | cmd = ';'.join(cmd_list) 72 | os.system(cmd) 73 | 74 | def build_suri(core): 75 | 76 | conf_list = [] 77 | for target in glob.glob('bin_original/CWE*/*/CWE*'): 78 | dir_name = os.path.dirname(target).replace('bin_original', 'bin_suri') 79 | base_name = os.path.basename(target) 80 | if not base_name.endswith('.bin'): 81 | continue 82 | 83 | if not os.path.exists(dir_name): 84 | os.makedirs(dir_name) 85 | 86 | name = base_name[:-4] 87 | b2r2_meta = os.path.join(dir_name, name + '.json') 88 | b2r2_asan = os.path.join(dir_name, name + '_asan.json') 89 | asm_file = os.path.join(dir_name, base_name + '.s') 90 | conf_list.append(ConfSURI(target, b2r2_meta, b2r2_asan, asm_file, dir_name)) 91 | 92 | if core and core > 1: 93 | p = multiprocessing.Pool(core) 94 | p.map(make_suri, conf_list) 95 | else: 96 | for conf in conf_list: 97 | make_suri(conf) 98 | 99 | 100 | def run(tool, core): 101 | if tool == 'retrowrite': 102 | build_retro(core) 103 | elif tool == 'suri': 104 | build_suri(core) 105 | 106 | 107 | 108 | import argparse 109 | if __name__ == '__main__': 110 | parser = argparse.ArgumentParser(description='manager') 111 | parser.add_argument('tool', type=str, help='Tool') 112 | parser.add_argument('--core', type=int, default=1, help='Number of cores to use') 113 | args = parser.parse_args() 114 | 115 | run(args.tool, args.core) 116 | -------------------------------------------------------------------------------- /artifact/application/run_juliet.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import glob 3 | import subprocess 4 | 5 | 6 | # gcc -I./testcasesupport -DINCLUDEMAIN -o test.bin testcasesupport/io.c testcasesupport/std_thread.c testcases/CWE78_OS_Command_Injection/s02/CWE78_OS_Command_Injection__char_console_system_01.c 7 | # CWE-121/122/124/126/127 8 | 9 | CWES = [122, 124, 126, 127, 121, 129] 10 | 11 | failures = set() 12 | 13 | 14 | from collections import namedtuple 15 | BuildConf = namedtuple('BuildConf', ['file_name', 'tc_dir', 'sub_name', 'dir_name']) 16 | 17 | def single(target): 18 | 19 | log_file = '/'.join(target.split('/')[:2]) + '/logs/' + os.path.basename(target) 20 | 21 | dirname = os.path.dirname(log_file) 22 | if not os.path.exists(dirname): 23 | os.system('mkdir -p %s'%(dirname)) 24 | 25 | if not os.path.exists(log_file): 26 | cmd = 'timeout 5 %s > %s 2>&1 '%(target, log_file) 27 | print(cmd) 28 | os.system(cmd) 29 | elif os.path.getsize(log_file) == 0: 30 | cmd = "script -qc 'timeout 5 %s' %s "%(target, log_file) 31 | print(log_file) 32 | os.system(cmd) 33 | 34 | 35 | def job(conf): 36 | target, multiple = conf 37 | 38 | log_file = '/'.join(target.split('/')[:2]) + '/logs/' + os.path.basename(target) 39 | 40 | dirname = os.path.dirname(log_file) 41 | if not os.path.exists(dirname): 42 | os.system('mkdir -p %s'%(dirname)) 43 | 44 | cmd = 'timeout 5 %s > %s 2>&1 '%(target, log_file) 45 | if not os.path.exists(log_file): 46 | os.system('echo timeout > %s'%(log_file)) 47 | os.system(cmd) 48 | 49 | res = subprocess.getoutput('grep "==ERROR" %s | wc -l'%(log_file)) 50 | if res != '0': 51 | print(log_file) 52 | return 53 | for idx in range(multiple): 54 | os.system(cmd) 55 | res = subprocess.getoutput('grep "==ERROR" %s | wc -l'%(log_file)) 56 | if res != '0': 57 | return 58 | 59 | print('not found') 60 | 61 | import multiprocessing 62 | 63 | def run_cwe(out_dir, dir_name, core, dataset, multiple): 64 | base_dir = os.path.join('./C/testcases', dir_name) 65 | conf_list = [] 66 | 67 | 68 | pattern = '%s/*/*/*.bin'%(out_dir) 69 | 70 | print(pattern) 71 | for sub_name in glob.glob(pattern): 72 | 73 | target = os.path.abspath (sub_name) 74 | 75 | filename = os.path.basename(target) 76 | conf_list.append(sub_name) 77 | 78 | if core and core > 1: 79 | p = multiprocessing.Pool(core) 80 | if dataset in ['original']: 81 | p.map(single, [(conf) for conf in conf_list]) 82 | else: 83 | p.map(job, [(conf,multiple) for conf in conf_list]) 84 | else: 85 | for conf in conf_list: 86 | if dataset in ['original']: 87 | single(conf) 88 | else: 89 | job((conf, multiple)) 90 | 91 | def main(out_dir, core, dataset, multiple): 92 | for cwe in CWES: 93 | for dir_name in os.listdir('./C/testcases'): 94 | if dir_name.startswith('CWE%d' % cwe): 95 | run_cwe(out_dir, dir_name, core, dataset, multiple) 96 | 97 | with open('./failed.txt', 'w') as f: 98 | for case in failures: 99 | f.write('%s\n' % case) 100 | 101 | import argparse 102 | 103 | if __name__ == '__main__': 104 | 105 | parser = argparse.ArgumentParser(description='manager') 106 | parser.add_argument('dataset', type=str, default='setA', help='Select dataset (setA, setB, setC)') 107 | parser.add_argument('--core', type=int, default=1, help='Number of cores to use') 108 | parser.add_argument('--multiple', type=int, default=1, help='Number of cores to use') 109 | 110 | args = parser.parse_args() 111 | 112 | assert args.dataset in ['original', 'asan', 'suri', 'retrowrite'], '"%s" is invalid.'%(args.dataset) 113 | 114 | 115 | out_dir = './bin_%s'%(args.dataset) 116 | 117 | os.system('mkdir -p %s' % out_dir) 118 | main(out_dir, args.core, args.dataset, args.multiple) 119 | -------------------------------------------------------------------------------- /artifact/application/summary.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | 4 | errors = [ 5 | b"bad(", 6 | b"Segmentation fault", 7 | b"Aborted", 8 | b"timeout: the monitored command dumped core" 9 | ] 10 | 11 | def get_gt(): 12 | bad = [] 13 | good = [] 14 | for log in glob.glob('bin_original/logs/*'): 15 | with open(log, 'rb') as fd: 16 | data = fd.read() 17 | filename = os.path.basename(log) 18 | for error in errors: 19 | if error in data: 20 | bad.append(filename) 21 | break 22 | if filename not in bad: 23 | good.append(filename) 24 | 25 | return (bad, good) 26 | 27 | def get_res(dataset): 28 | bad = [] 29 | good = [] 30 | base = 'bin_%s'%(dataset) 31 | for log in glob.glob('%s/logs/*'%(base)): 32 | with open(log, 'rb') as fd: 33 | filename = os.path.basename(log) 34 | if filename.startswith('my_'): 35 | filename = filename[3:] 36 | 37 | data = fd.read() 38 | if b'=ERROR' in data: 39 | bad.append(filename) 40 | else: 41 | good.append(filename) 42 | 43 | return (bad, good) 44 | 45 | def get_tp(gt, res1, res2, res3): 46 | tp1 = gt.intersection(res1) 47 | tp2 = gt.intersection(res2) 48 | tp3 = gt.intersection(res3) 49 | return len(tp1), len(tp2), len(tp3) 50 | 51 | def get_fp(gt, res1, res2, res3): 52 | fp1 = res1 - gt 53 | fp2 = res2 - gt 54 | fp3 = res3 - gt 55 | return len(fp1), len(fp2), len(fp3) 56 | 57 | def get_fn(gt, res1, res2, res3): 58 | fn1 = gt - res1 59 | fn2 = gt - res2 60 | fn3 = gt - res3 61 | return len(fn1), len(fn2), len(fn3) 62 | 63 | def get_tn(gt, res1, res2, res3): 64 | tn1 = gt.intersection(res1) 65 | tn2 = gt.intersection(res2) 66 | tn3 = gt.intersection(res3) 67 | return len(tn1), len(tn2), len(tn3) 68 | 69 | def summary(gt, suri, retro, asan): 70 | gt_bad_set = set(gt[0]) 71 | suri_bad_set = set(suri[0]) 72 | retro_bad_set = set(retro[0]) 73 | asan_bad_set = set(asan[0]) 74 | 75 | gt_good_set = set(gt[1]) 76 | suri_good_set = set(suri[1]) 77 | retro_good_set = set(retro[1]) 78 | asan_good_set = set(asan[1]) 79 | 80 | print('%15s %10s %10s %10s'%('', 'Ours', 'BASan', 'ASan')) 81 | 82 | res1, res2, res3 = get_tp(gt_bad_set, suri_bad_set, retro_bad_set, asan_bad_set) 83 | print('%15s %10d %10d %10d'%('True Positive', res1, res2, res3) ) 84 | 85 | res1, res2, res3 = get_fp(gt_bad_set, suri_bad_set, retro_bad_set, asan_bad_set) 86 | print('%15s %10d %10d %10d'%('False Positive', res1, res2, res3) ) 87 | 88 | res1, res2, res3 = get_fn(gt_bad_set, suri_bad_set, retro_bad_set, asan_bad_set) 89 | print('%15s %10d %10d %10d'%('False Negative', res1, res2, res3) ) 90 | 91 | res1, res2, res3 = get_tn(gt_good_set, suri_good_set, retro_good_set, asan_good_set) 92 | print('%15s %10d %10d %10d'%('True Negative', res1, res2, res3) ) 93 | 94 | print('-------------------------------------------------') 95 | print('%15s %10d %10d %10d'%('Total Binaries', 96 | len(suri_bad_set | suri_good_set), 97 | len(retro_bad_set | retro_good_set), 98 | len(asan_bad_set | asan_good_set))) 99 | 100 | if __name__ == '__main__': 101 | 102 | gt = get_gt() 103 | 104 | suri = get_res('suri') 105 | retro = get_res('retro') 106 | asan = get_res('asan') 107 | 108 | summary(gt, suri, retro, asan) 109 | 110 | 111 | -------------------------------------------------------------------------------- /artifact/build_script/build_spec2006.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import shutil 4 | 5 | OPTIMIZATIONS = ['-O0', '-O1', '-O2', '-O3', '-Os', '-Ofast'] 6 | LINKERS = ['bfd', 'gold'] 7 | 8 | def parse_arguments(): 9 | parser = argparse.ArgumentParser('setup spec_cpu2006') 10 | parser.add_argument('dataset', type=str, default='setA', help='Select dataset (setA, setB, setC)') 11 | parser.add_argument('--spec', type=str, default='spec2006_image') 12 | 13 | args = parser.parse_args() 14 | 15 | assert args.dataset in ['setA', 'setB', 'setC'], 'Invalid dataset: "%s"'%(args.dataset) 16 | assert os.path.exists(os.path.join(args.spec, 'install.sh')), 'Invalid SPEC path: "%s"' % args.spec 17 | 18 | return args 19 | 20 | def get_docker_image(dataset): 21 | if dataset in ['setA', 'setC']: 22 | return 'suri_artifact:v1.0' 23 | else: 24 | return 'suri_artifact_ubuntu18.04:v1.0' 25 | 26 | ################################ 27 | 28 | def setup(dataset, spec_path): 29 | image = get_docker_image(dataset) 30 | 31 | cmd = 'docker run --rm -v %s:/spec2006_image -v $(pwd)/src:/data %s sh -c "chmod -R +x /spec2006_image && /spec2006_image/install.sh -d /data/spec_cpu2006 -f"' % (spec_path, image) 32 | print(cmd) 33 | os.system(cmd) 34 | 35 | shutil.copyfile('script/base2006.cfg', './src/spec_cpu2006/config/base2006.cfg') 36 | print('[+] Successfully finish setup SPEC CPU2006') 37 | 38 | ################################ 39 | 40 | def get_script_name(dataset): 41 | if dataset in ['setA', 'setB']: 42 | return 'build-spec2006.sh' 43 | else: 44 | return 'build-spec2006_no_ehframe.sh' 45 | 46 | def build(dataset, out_dir, opt, lopt): 47 | image = get_docker_image(dataset) 48 | script = get_script_name(dataset) 49 | 50 | cmd = 'docker run --rm -v $(pwd)/src:/data -v $(pwd)/script:/script -v %s:/output %s sh -c "/script/%s %s %s"' % (out_dir, image, script, opt, lopt) 51 | print(cmd) 52 | os.system(cmd) 53 | 54 | ################################ 55 | 56 | def run(args): 57 | spec_path = os.path.abspath(args.spec) 58 | out_dir = os.path.join(os.path.abspath("../benchmark"), args.dataset) 59 | 60 | setup(args.dataset, spec_path) 61 | for opt in OPTIMIZATIONS: 62 | for lopt in LINKERS: 63 | build(args.dataset, out_dir, opt, lopt) 64 | 65 | if __name__ == '__main__': 66 | args = parse_arguments() 67 | run(args) 68 | -------------------------------------------------------------------------------- /artifact/build_script/build_spec2017.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import shutil 4 | 5 | OPTIMIZATIONS = ['-O0', '-O1', '-O2', '-O3', '-Os', '-Ofast'] 6 | LINKERS = ['bfd', 'gold'] 7 | 8 | def parse_arguments(): 9 | parser = argparse.ArgumentParser('setup spec_cpu2017') 10 | parser.add_argument('dataset', type=str, default='setA', help='Select dataset (setA, setB, setC)') 11 | parser.add_argument('--spec', type=str, default='spec2017_image') 12 | 13 | args = parser.parse_args() 14 | 15 | assert args.dataset in ['setA', 'setB', 'setC'], 'Invalid dataset: "%s"'%(args.dataset) 16 | assert os.path.exists(os.path.join(args.spec, 'install.sh')), 'Invalid SPEC path: "%s"' % args.spec 17 | 18 | return args 19 | 20 | def get_docker_image(dataset): 21 | if dataset in ['setA', 'setC']: 22 | return 'suri_artifact:v1.0' 23 | else: 24 | return 'suri_artifact_ubuntu18.04:v1.0' 25 | 26 | ################################ 27 | 28 | def setup(dataset, spec_path): 29 | image = get_docker_image(dataset) 30 | 31 | cmd = 'docker run --rm -v %s:/spec2017_image -v $(pwd)/src:/data %s sh -c "chmod -R +x /spec2017_image && /spec2017_image/install.sh -d /data/spec_cpu2017 -f"' % (spec_path, image) 32 | print(cmd) 33 | os.system(cmd) 34 | 35 | shutil.copyfile('script/base2017.cfg', './src/spec_cpu2017/config/base2017.cfg') 36 | print('[+] Successfully finish setup SPEC CPU2017') 37 | 38 | ################################ 39 | 40 | def get_script_name(dataset): 41 | if dataset in ['setA', 'setB']: 42 | return 'build-spec2017.sh' 43 | else: 44 | return 'build-spec2017_no_ehframe.sh' 45 | 46 | def build(dataset, out_dir, opt, lopt): 47 | image = get_docker_image(dataset) 48 | script = get_script_name(dataset) 49 | 50 | cmd = 'docker run --rm -v $(pwd)/src:/data -v $(pwd)/script:/script -v %s:/output %s sh -c "/script/%s %s %s"' % (out_dir, image, script, opt, lopt) 51 | print(cmd) 52 | os.system(cmd) 53 | 54 | ################################ 55 | 56 | def run(args): 57 | spec_path = os.path.abspath(args.spec) 58 | out_dir = os.path.join(os.path.abspath("../benchmark"), args.dataset) 59 | 60 | setup(args.dataset, spec_path) 61 | for opt in OPTIMIZATIONS: 62 | for lopt in LINKERS: 63 | build(args.dataset, out_dir, opt, lopt) 64 | 65 | if __name__ == '__main__': 66 | args = parse_arguments() 67 | run(args) 68 | -------------------------------------------------------------------------------- /artifact/build_script/script/binutils-2.40_list.txt: -------------------------------------------------------------------------------- 1 | binutils/addr2line 2 | binutils/ar 3 | gas/as-new 4 | binutils/cxxfilt 5 | binutils/elfedit 6 | gprof/gprof 7 | ld/ld-new 8 | binutils/nm-new 9 | binutils/objcopy 10 | binutils/objdump 11 | binutils/ranlib 12 | binutils/readelf 13 | binutils/size 14 | binutils/strings 15 | binutils/strip-new 16 | -------------------------------------------------------------------------------- /artifact/build_script/script/binutils_copy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function copy 4 | { 5 | list_file=$1 6 | while IFS='' read -r line || [[ -n "$line" ]]; do 7 | name=$(echo $line | awk -F' ' '{print $1}') 8 | filename=$(basename "${line}") 9 | 10 | echo cp /dataset/$filename $name 11 | cp /dataset/$filename $name 12 | 13 | 14 | done < "$list_file" 15 | 16 | } 17 | copy binutils-2.40_list.txt 18 | -------------------------------------------------------------------------------- /artifact/build_script/script/build-spec2006.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | PACKAGE="spec_cpu2006" 4 | 5 | SRCDIR=/data/ 6 | 7 | WORKDIR=/script/ 8 | 9 | MYDIR="$(dirname "$0")" 10 | OPTS=("-O0" "-O1" "-O2" "-O3" "-Ofast" "-Os") 11 | PIEOPTS=("-pie") 12 | TARGETOPTS=("x64" ) 13 | LINKEROPTS=("-fuse-ld=gold" "-fuse-ld=bfd") 14 | 15 | build_bin(){ 16 | PAKAGE=$1 17 | TARGET=$2 18 | OPT=$3 19 | PIEOPT=$4 20 | LINKEROPT=$5 21 | 22 | LINKEROPTSTR=`echo $LINKEROPT | sed 's/-fuse-ld=//g'` 23 | BINDOPT="-Wl,-z,lazy" 24 | 25 | #echo $TARGETOPT $OPT $PIEOPT $BINDOPT $LINKEROPT 26 | 27 | 28 | OPTSTR=`echo ${OPT:1} | tr '[:upper:]' '[:lower:]'` 29 | PIEOPTSTR=`echo $PIEOPT | tr '[:upper:]' '[:lower:]' | tr -d '-'` 30 | 31 | GCCOPT="$GCC" 32 | GPPOPT="$GPP" 33 | FORTRANOPT="$GFORTRAN" 34 | 35 | COMPILER=$(basename $GCCOPT) 36 | 37 | 38 | if [ "$TARGET" = "x86" ]; then 39 | EXTRAFLAGS="--build=i386-pc-linux TIME_T_32_BIT_OK=yes" 40 | EXTRAOPT="-m32" 41 | else 42 | EXTRAFLAGS="" 43 | EXTRAOPT="" 44 | fi 45 | 46 | if [ "$PIEOPT" = "-pie" ]; then 47 | PIEFLAGS="-fPIE" 48 | else 49 | PIEFLAGS="-fno-PIC" 50 | fi 51 | 52 | COMPILERSTR=`echo "$COMPILER" | sed 's$-.*$$g'` 53 | 54 | if [ "$COMPILER" = "clang-13" ]; then 55 | ALL_FLAGS="$COMMON $OPT $PIEOPT $BINDOPT $LOPT $EXTRAOPT $PIEFLAGS $LINKEROPT -L/usr/lib/llvm-13/lib/" 56 | elif [ "$COMPILER" = "clang-10" ]; then 57 | ALL_FLAGS="$COMMON $OPT $PIEOPT $BINDOPT $LOPT $EXTRAOPT $PIEFLAGS $LINKEROPT -L/usr/lib/llvm-10/lib/" 58 | else 59 | ALL_FLAGS="$COMMON $OPT $PIEOPT $BINDOPT $LOPT $EXTRAOPT $PIEFLAGS $LINKEROPT " 60 | fi 61 | 62 | OFOLDER=$SRCDIR/$PACKAGE 63 | 64 | OUTPUT=case1_$COMPILER\_$OPTSTR\_$LINKEROPTSTR.cfg 65 | 66 | if [ -f "$OFOLDER/config/$OUTPUT" ]; then 67 | echo "$OUTPUT exists" 68 | cd $OFOLDER/ 69 | source shrc 70 | runspec --config=$OUTPUT --action=build --tune=base all 71 | cd $WORKDIR 72 | else 73 | cd $OFOLDER/config/ 74 | sed "s|_OPT_FLAGS_|COPTIMIZE = $OPT \nCXXOPTIMIZE = $OPT\nFOPTIMIZE = $OPT |g" base2006.cfg | sed "s|_COMPILE_OPTION_|CC = $GCCOPT $ALL_FLAGS -std=gnu89\nCXX = $GPPOPT $ALL_FLAGS -std=gnu++98\nFC = $FORTRANOPT $ALL_FLAGS $FFLAGS -std=legacy --save-temps |g" | sed "s|_OUTPUT_FOLDER_|ext = $OUTPUT|g" | sed "s|wrf_data_header_size|#wrf_data_header_size|g" > $OUTPUT 75 | cd .. 76 | source shrc 77 | runspec --config=$OUTPUT --action=build --tune=base all 78 | cd $WORKDIR 79 | fi 80 | /bin/bash ./copy.sh spec_cpu2006 case1 $COMPILER $OPTSTR $LINKEROPTSTR 81 | } 82 | 83 | arg_opt=$1 84 | arg_lopt=$2 85 | 86 | GCC=/usr/bin/gcc-11 87 | GPP=/usr/bin/g++-11 88 | GFORTRAN=/usr/bin/gfortran-11 89 | COMMON="-ggdb -save-temps=obj -fverbose-asm -fcf-protection=full -mno-avx512f -mno-avx2" 90 | 91 | build_bin spec_cpu2006 x64 $arg_opt -pie -fuse-ld=$arg_lopt 92 | 93 | GCC=/usr/bin/gcc-13 94 | GPP=/usr/bin/g++-13 95 | GFORTRAN=/usr/bin/gfortran-13 96 | COMMON="-ggdb -save-temps=obj -fverbose-asm -fcf-protection=full -mno-avx512f -mno-avx2" 97 | 98 | build_bin spec_cpu2006 x64 $arg_opt -pie -fuse-ld=$arg_lopt 99 | 100 | GCC=/usr/bin/clang-10 101 | GPP=/usr/bin/clang++-10 102 | GFORTRAN=/usr/bin/gfortran-11 103 | COMMON="-ggdb -save-temps=obj -fverbose-asm -fcf-protection=full -mno-avx512f -mno-avx2" 104 | 105 | build_bin spec_cpu2006 x64 $arg_opt -pie -fuse-ld=$arg_lopt 106 | 107 | GCC=/usr/bin/clang-13 108 | GPP=/usr/bin/clang++-13 109 | GFORTRAN=/usr/bin/gfortran-11 110 | COMMON="-ggdb -save-temps=obj -fverbose-asm -fcf-protection=full -mno-avx512f -mno-avx2" 111 | 112 | build_bin spec_cpu2006 x64 $arg_opt -pie -fuse-ld=$arg_lopt 113 | 114 | -------------------------------------------------------------------------------- /artifact/build_script/script/build-spec2017.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | PACKAGE="spec_cpu2017" 4 | 5 | SRCDIR=/data/ 6 | 7 | WORKDIR=/script/ 8 | 9 | MYDIR="$(dirname "$0")" 10 | OPTS=("-Os" "-Ofast" "-O3" "-O2" "-O1" "-O0") 11 | PIEOPTS=("-pie") 12 | TARGETOPTS=("x64" ) 13 | LINKEROPTS=("-fuse-ld=gold" "-fuse-ld=bfd") 14 | 15 | build_bin(){ 16 | PACKAGE=$1 17 | TARGET=$2 18 | OPT=$3 19 | PIEOPT=$4 20 | LINKEROPT=$5 21 | 22 | LINKEROPTSTR=`echo $LINKEROPT | sed 's/-fuse-ld=//g'` 23 | BINDOPT="-Wl,-z,lazy" 24 | 25 | #echo $TARGETOPT $OPT $PIEOPT $BINDOPT $LINKEROPT 26 | 27 | 28 | OPTSTR=`echo ${OPT:1} | tr '[:upper:]' '[:lower:]'` 29 | PIEOPTSTR=`echo $PIEOPT | tr '[:upper:]' '[:lower:]' | tr -d '-'` 30 | 31 | GCCOPT="$GCC" 32 | GPPOPT="$GPP" 33 | FORTRANOPT="$GFORTRAN" 34 | 35 | COMPILER=$(basename $GCCOPT) 36 | 37 | 38 | if [ "$TARGET" = "x86" ]; then 39 | EXTRAFLAGS="--build=i386-pc-linux TIME_T_32_BIT_OK=yes" 40 | EXTRAOPT="-m32" 41 | else 42 | EXTRAFLAGS="" 43 | EXTRAOPT="" 44 | fi 45 | 46 | if [ "$PIEOPT" = "-pie" ]; then 47 | PIEFLAGS="-fPIE" 48 | else 49 | PIEFLAGS="-fno-PIC" 50 | fi 51 | 52 | COMPILERSTR=`echo "$COMPILER" | sed 's$-.*$$g'` 53 | 54 | if [ "$COMPILE" = "clang-10" ]; then 55 | ALL_FLAGS="$COMMON $OPT $PIEOPT $BINDOPT $LOPT $EXTRAOPT $PIEFLAGS $LINKEROPT $LINKEROPT2 -L/usr/lib/llvm-10/lib/" 56 | elif [ "$COMPILE" = "clang-13" ]; then 57 | ALL_FLAGS="$COMMON $OPT $PIEOPT $BINDOPT $LOPT $EXTRAOPT $PIEFLAGS $LINKEROPT $LINKEROPT2 -L/usr/lib/llvm-13/lib/" 58 | else 59 | ALL_FLAGS="$COMMON $OPT $PIEOPT $BINDOPT $LOPT $EXTRAOPT $PIEFLAGS $LINKEROPT $LINKEROPT2" 60 | fi 61 | 62 | OFOLDER=$SRCDIR/$PACKAGE 63 | 64 | OUTPUT=case1_$COMPILER\_$OPTSTR\_$LINKEROPTSTR.cfg 65 | 66 | if [ -f "$OFOLDER/config/$OUTPUT" ]; then 67 | echo "$OUTPUT exists" 68 | 69 | cd $OFOLDER/ 70 | source shrc 71 | runcpu --config=$OUTPUT --action=build --tune=base all 72 | cd $WORKDIR 73 | 74 | else 75 | cd $OFOLDER/config/ 76 | sed "s|_OPT_FLAGS_|OPTIMIZE = $OPT |g" base2017.cfg | sed "s|_COMPILE_OPTION_|CC = $GCCOPT $ALL_FLAGS -std=c99\nCXX = $GPPOPT $ALL_FLAGS -std=c++11\nFC = $FORTRANOPT $ALL_FLAGS $FFLAGS -std=legacy |g" | sed "s|_OUTPUT_FOLDER_|$OUTPUT|g" > $OUTPUT 77 | 78 | cd .. 79 | source shrc 80 | runcpu --config=$OUTPUT --action=build --tune=base all 81 | cd $WORKDIR 82 | fi 83 | /bin/bash ./copy.sh spec_cpu2017 case1 $COMPILER $OPTSTR $LINKEROPTSTR 84 | } 85 | 86 | arg_opt=$1 87 | arg_lopt=$2 88 | 89 | GCC=/usr/bin/gcc-11 90 | GPP=/usr/bin/g++-11 91 | GFORTRAN=/usr/bin/gfortran-11 92 | COMMON="-ggdb -save-temps=obj -fverbose-asm -fcf-protection=full -mno-avx512f -mno-avx2" 93 | 94 | build_bin spec_cpu2017 x64 $arg_opt -pie -fuse-ld=$arg_lopt 95 | 96 | GCC=/usr/bin/gcc-13 97 | GPP=/usr/bin/g++-13 98 | GFORTRAN=/usr/bin/gfortran-13 99 | COMMON="-ggdb -save-temps=obj -fverbose-asm -fcf-protection=full -mno-avx512f -mno-avx2" 100 | 101 | build_bin spec_cpu2017 x64 $arg_opt -pie -fuse-ld=$arg_lopt 102 | 103 | GCC=/usr/bin/clang-10 104 | GPP=/usr/bin/clang++-10 105 | GFORTRAN=/usr/bin/gfortran-11 106 | COMMON="-ggdb -save-temps=obj -fverbose-asm -fcf-protection=full -mno-avx512f -mno-avx2" 107 | 108 | build_bin spec_cpu2017 x64 $arg_opt -pie -fuse-ld=$arg_lopt 109 | 110 | GCC=/usr/bin/clang-13 111 | GPP=/usr/bin/clang++-13 112 | GFORTRAN=/usr/bin/gfortran-11 113 | COMMON="-ggdb -save-temps=obj -fverbose-asm -fcf-protection=full -mno-avx512f -mno-avx2" 114 | 115 | build_bin spec_cpu2017 x64 $arg_opt -pie -fuse-ld=$arg_lopt 116 | 117 | -------------------------------------------------------------------------------- /artifact/build_script/script/build-spec2017_no_ehframe.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | PACKAGE="spec_cpu2017" 4 | 5 | SRCDIR=/data4/src 6 | WORKDIR=/data4/build_script 7 | MYDIR="$(dirname "$0")" 8 | 9 | OPTS=("-Os" "-Ofast" "-O3" "-O2" "-O1" "-O0") 10 | PIEOPTS=("-pie") 11 | TARGETOPTS=("x64" ) 12 | LINKEROPTS=("-fuse-ld=gold" "-fuse-ld=bfd") 13 | 14 | COMMON="-ggdb -save-temps=obj -fverbose-asm -Wl,--emit-relocs -fcf-protection=full" 15 | COMMON="-ggdb -save-temps=obj -fverbose-asm -fcf-protection=full -mno-avx512f -mno-avx2 -fno-unwind-tables -fno-asynchronous-unwind-tables" 16 | 17 | build_bin(){ 18 | PACKAGE=$1 19 | TARGET=$2 20 | OPT=$3 21 | PIEOPT=$4 22 | LINKEROPT=$5 23 | 24 | LINKEROPTSTR=`echo $LINKEROPT | sed 's/-fuse-ld=//g'` 25 | BINDOPT="-Wl,-z,lazy" 26 | 27 | echo $TARGETOPT $OPT $PIEOPT $BINDOPT $LINKEROPT 28 | 29 | 30 | OPTSTR=`echo ${OPT:1} | tr '[:upper:]' '[:lower:]'` 31 | PIEOPTSTR=`echo $PIEOPT | tr '[:upper:]' '[:lower:]' | tr -d '-'` 32 | 33 | GCCOPT="$GCC" 34 | GPPOPT="$GPP" 35 | FORTRANOPT="$GFORTRAN" 36 | 37 | COMPILER=$(basename $GCCOPT) 38 | 39 | 40 | if [ "$TARGET" = "x86" ]; then 41 | EXTRAFLAGS="--build=i386-pc-linux TIME_T_32_BIT_OK=yes" 42 | EXTRAOPT="-m32" 43 | else 44 | EXTRAFLAGS="" 45 | EXTRAOPT="" 46 | fi 47 | 48 | if [ "$PIEOPT" = "-pie" ]; then 49 | PIEFLAGS="-fPIE" 50 | else 51 | PIEFLAGS="-fno-PIC" 52 | fi 53 | 54 | COMPILERSTR=`echo "$COMPILER" | sed 's$-.*$$g'` 55 | 56 | if [ "$COMPILERSTR" = "clang" ]; then 57 | ALL_FLAGS="$COMMON $OPT $PIEOPT $BINDOPT $LOPT $EXTRAOPT $PIEFLAGS $LINKEROPT $LINKEROPT2 -L/usr/lib/llvm-13/lib/" 58 | else 59 | ALL_FLAGS="$COMMON $OPT $PIEOPT $BINDOPT $LOPT $EXTRAOPT $PIEFLAGS $LINKEROPT $LINKEROPT2" 60 | fi 61 | 62 | OFOLDER=$SRCDIR/$PACKAGE 63 | 64 | OUTPUT=case6_no_ehframe_$COMPILER\_$OPTSTR\_$LINKEROPTSTR.cfg 65 | 66 | if [ -f "$OFOLDER/config/$OUTPUT" ]; then 67 | echo "$OUTPUT exists" 68 | 69 | cd $OFOLDER/config/ 70 | sed "s|_OPT_FLAGS_|OPTIMIZE = $OPT |g" base.cfg | sed "s|_COMPILE_OPTION_|CC = $GCCOPT $ALL_FLAGS -std=c99\nCXX = $GPPOPT $ALL_FLAGS -std=c++11\nFC = $FORTRANOPT $ALL_FLAGS $FFLAGS -std=legacy |g" | sed "s|_OUTPUT_FOLDER_|$OUTPUT|g" > $OUTPUT 71 | 72 | cd .. 73 | source shrc 74 | runcpu --config=$OUTPUT --action=build --tune=base all 75 | cd $WORKDIR 76 | 77 | else 78 | cd $OFOLDER/config/ 79 | sed "s|_OPT_FLAGS_|OPTIMIZE = $OPT |g" base.cfg | sed "s|_COMPILE_OPTION_|CC = $GCCOPT $ALL_FLAGS -std=c99\nCXX = $GPPOPT $ALL_FLAGS -std=c++11\nFC = $FORTRANOPT $ALL_FLAGS $FFLAGS -std=legacy |g" | sed "s|_OUTPUT_FOLDER_|$OUTPUT|g" > $OUTPUT 80 | 81 | cd .. 82 | source shrc 83 | runcpu --config=$OUTPUT --action=build --tune=base all 84 | cd $WORKDIR 85 | fi 86 | 87 | } 88 | 89 | build_all(){ 90 | for TARGETOPT in ${TARGETOPTS[@]} 91 | do 92 | for OPT in ${OPTS[@]} 93 | do 94 | for PIEOPT in ${PIEOPTS[@]} 95 | do 96 | for LINKEROPT in ${LINKEROPTS[@]} 97 | do 98 | echo build_bin $TARGETOPT $OPT $PIEOPT $LINKEROPT 99 | build_bin $PACKAGE $TARGETOPT $OPT $PIEOPT $LINKEROPT 100 | done 101 | done 102 | done 103 | done 104 | } 105 | 106 | GCC=/usr/bin/gcc-11 107 | GPP=/usr/bin/g++-11 108 | GFORTRAN=/usr/bin/gfortran-11 109 | 110 | GCC=/usr/bin/gcc-13 111 | GPP=/usr/bin/g++-13 112 | GFORTRAN=/usr/bin/gfortran-13 113 | 114 | build_all 115 | #build_bin spec_cpu2017 x64 -Os -pie -fuse-ld=bfd 116 | 117 | GCC=/usr/bin/clang-13 118 | GPP=/usr/bin/clang++-13 119 | 120 | GCC=/usr/bin/clang-10 121 | GPP=/usr/bin/clang++-10 122 | GFORTRAN=/usr/bin/gfortran-11 123 | 124 | build_all 125 | #build_bin spec_cpu2017 x64 -O1 -pie -fuse-ld=bfd 126 | -------------------------------------------------------------------------------- /artifact/build_script/script/coreutils-9.1_list.txt: -------------------------------------------------------------------------------- 1 | src/[ 2 | src/b2sum 3 | src/base32 4 | src/base64 5 | src/basename 6 | src/basenc 7 | src/cat 8 | src/chcon 9 | src/chgrp 10 | src/chmod 11 | src/chown 12 | src/chroot 13 | src/cksum 14 | src/comm 15 | src/cp 16 | src/csplit 17 | src/cut 18 | src/date 19 | src/dd 20 | src/df 21 | src/dir 22 | src/dircolors 23 | src/dirname 24 | src/du 25 | src/echo 26 | src/env 27 | src/expand 28 | src/expr 29 | src/factor 30 | src/false 31 | src/fmt 32 | src/fold 33 | src/getlimits 34 | src/ginstall 35 | src/groups 36 | src/head 37 | src/hostid 38 | src/id 39 | src/join 40 | src/kill 41 | src/link 42 | src/ln 43 | src/logname 44 | src/ls 45 | src/make-prime-list 46 | src/md5sum 47 | src/mkdir 48 | src/mkfifo 49 | src/mknod 50 | src/mktemp 51 | src/mv 52 | src/nice 53 | src/nl 54 | src/nohup 55 | src/nproc 56 | src/numfmt 57 | src/od 58 | src/paste 59 | src/pathchk 60 | src/pinky 61 | src/pr 62 | src/printenv 63 | src/printf 64 | src/ptx 65 | src/pwd 66 | src/readlink 67 | src/realpath 68 | src/rm 69 | src/rmdir 70 | src/runcon 71 | src/seq 72 | src/sha1sum 73 | src/sha224sum 74 | src/sha256sum 75 | src/sha384sum 76 | src/sha512sum 77 | src/shred 78 | src/shuf 79 | src/sleep 80 | src/sort 81 | src/split 82 | src/stat 83 | src/stdbuf 84 | src/stty 85 | src/sum 86 | src/sync 87 | src/tac 88 | src/tail 89 | src/tee 90 | src/test 91 | src/timeout 92 | src/touch 93 | src/tr 94 | src/true 95 | src/truncate 96 | src/tsort 97 | src/tty 98 | src/uname 99 | src/unexpand 100 | src/uniq 101 | src/unlink 102 | src/uptime 103 | src/users 104 | src/vdir 105 | src/wc 106 | src/who 107 | src/whoami 108 | src/yes 109 | -------------------------------------------------------------------------------- /artifact/build_script/script/coreutils_copy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function copy 4 | { 5 | list_file=$1 6 | while IFS='' read -r line || [[ -n "$line" ]]; do 7 | name=$(echo $line | awk -F' ' '{print $1}') 8 | filename=$(basename "${line}") 9 | 10 | echo cp /dataset/$filename $name 11 | cp /dataset/$filename $name 12 | 13 | done < "$list_file" 14 | 15 | } 16 | copy coreutils-9.1_list.txt 17 | -------------------------------------------------------------------------------- /artifact/build_script/script/spec_cpu2006_list.txt: -------------------------------------------------------------------------------- 1 | 400.perlbench 2 | 401.bzip2 3 | 403.gcc 4 | 410.bwaves 5 | 416.gamess 6 | 429.mcf 7 | 433.milc 8 | 434.zeusmp 9 | 435.gromacs 10 | 436.cactusADM 11 | 437.leslie3d 12 | 444.namd 13 | 445.gobmk 14 | 447.dealII 15 | 450.soplex 16 | 453.povray 17 | 454.calculix 18 | 456.hmmer 19 | 458.sjeng 20 | 459.GemsFDTD 21 | 462.libquantum 22 | 464.h264ref 23 | 465.tonto 24 | 470.lbm 25 | 471.omnetpp 26 | 473.astar 27 | 481.wrf 28 | 482.sphinx3 sphinx_livepretend 29 | 483.xalancbmk Xalan 30 | 998.specrand 31 | 999.specrand 32 | -------------------------------------------------------------------------------- /artifact/build_script/script/spec_cpu2017_list.txt: -------------------------------------------------------------------------------- 1 | 500.perlbench_r perlbench_r 2 | 502.gcc_r cpugcc_r 3 | 503.bwaves_r bwaves_r 4 | 505.mcf_r mcf_r 5 | 507.cactuBSSN_r cactusBSSN_r 6 | 508.namd_r namd_r 7 | 510.parest_r parest_r 8 | 511.povray_r povray_r 9 | 519.lbm_r lbm_r 10 | 520.omnetpp_r omnetpp_r 11 | 521.wrf_r wrf_r 12 | 523.xalancbmk_r cpuxalan_r 13 | 525.x264_r x264_r 14 | 526.blender_r blender_r 15 | 527.cam4_r cam4_r 16 | 531.deepsjeng_r deepsjeng_r 17 | 538.imagick_r imagick_r 18 | 541.leela_r leela_r 19 | 544.nab_r nab_r 20 | 548.exchange2_r exchange2_r 21 | 549.fotonik3d_r fotonik3d_r 22 | 554.roms_r roms_r 23 | 557.xz_r xz_r 24 | 600.perlbench_s perlbench_s 25 | 602.gcc_s sgcc 26 | 603.bwaves_s speed_bwaves 27 | 605.mcf_s mcf_s 28 | 607.cactuBSSN_s cactuBSSN_s 29 | 619.lbm_s lbm_s 30 | 620.omnetpp_s omnetpp_s 31 | 621.wrf_s wrf_s 32 | 623.xalancbmk_s xalancbmk_s 33 | 625.x264_s x264_s 34 | 627.cam4_s cam4_s 35 | 628.pop2_s speed_pop2 36 | 631.deepsjeng_s deepsjeng_s 37 | 638.imagick_s imagick_s 38 | 641.leela_s leela_s 39 | 644.nab_s nab_s 40 | 648.exchange2_s exchange2_s 41 | 649.fotonik3d_s fotonik3d_s 42 | 654.roms_s sroms 43 | 657.xz_s xz_s 44 | 996.specrand_fs specrand_fs 45 | 997.specrand_fr specrand_fr 46 | 998.specrand_is specrand_is 47 | 999.specrand_ir specrand_ir 48 | -------------------------------------------------------------------------------- /artifact/build_script/test_suite_script/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM suri_artifact:v1.0 2 | 3 | SHELL ["/bin/bash", "-c"] 4 | 5 | COPY ./spec2006_image /spec_cpu2006 6 | COPY ./script/case1_spec2006.cfg /spec_cpu2006/config/case1_bfd.cfg 7 | 8 | # Set up SPEC CPU2006 9 | RUN chmod -R +x /spec_cpu2006 && \ 10 | /spec_cpu2006/install.sh -d /spec_cpu2006 -f && \ 11 | cd /spec_cpu2006/ && \ 12 | source shrc && \ 13 | runspec --config=case1_bfd --action=build --tune=base all 14 | 15 | COPY ./spec2017_image /spec_cpu2017 16 | COPY ./script/case1_spec2017.cfg /spec_cpu2017/config/case1_bfd.cfg 17 | 18 | # Set up SPEC CPU2017 19 | RUN chmod -R +x /spec_cpu2017 && \ 20 | /spec_cpu2017/install.sh -d /spec_cpu2017 -f && \ 21 | cd /spec_cpu2017/ && \ 22 | source shrc && \ 23 | runcpu --config=case1_bfd --action=build --tune=base all 24 | -------------------------------------------------------------------------------- /artifact/build_script/test_suite_script_ubuntu18.04/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM suri_artifact_ubuntu18.04:v1.0 2 | 3 | SHELL ["/bin/bash", "-c"] 4 | 5 | COPY ./spec2006_image /spec_cpu2006 6 | COPY ./script/case1_spec2006.cfg /spec_cpu2006/config/case1_bfd.cfg 7 | 8 | # Set up SPEC CPU2006 9 | RUN chmod -R +x /spec_cpu2006 && \ 10 | /spec_cpu2006/install.sh -d /spec_cpu2006 -f && \ 11 | cd /spec_cpu2006/ && \ 12 | source shrc && \ 13 | runspec --config=case1_bfd --action=build --tune=base all 14 | 15 | COPY ./spec2017_image /spec_cpu2017 16 | COPY ./script/case1_spec2017.cfg /spec_cpu2017/config/case1_bfd.cfg 17 | 18 | # Set up SPEC CPU2017 19 | RUN chmod -R +x /spec_cpu2017 && \ 20 | /spec_cpu2017/install.sh -d /spec_cpu2017 -f && \ 21 | cd /spec_cpu2017/ && \ 22 | source shrc && \ 23 | runcpu --config=case1_bfd --action=build --tune=base all 24 | -------------------------------------------------------------------------------- /artifact/filter_utils.py: -------------------------------------------------------------------------------- 1 | setB_blacklist = [ 2 | '444.namd', 3 | '447.dealII', 4 | '450.soplex', 5 | '453.povray', 6 | '471.omnetpp', 7 | '473.astar', 8 | '483.xalancbmk', 9 | '520.omnetpp_r', '620.omnetpp_s', 10 | '523.xalancbmk_r', '623.xalancbmk_s', 11 | '531.deepsjeng_r', '631.deepsjeng_s', 12 | '541.leela_r', '641.leela_s', 13 | '507.cactuBSSN_r', '607.cactuBSSN_s', 14 | '508.namd_r', 15 | '510.parest_r', 16 | '511.povray_r', 17 | '526.blender_r' 18 | ] 19 | 20 | 21 | def check_exclude_files(dataset, package, comp, opt, filename): 22 | 23 | # Exclude C++ for Egalito 24 | if dataset in ['setB'] and filename in setB_blacklist: 25 | return True 26 | 27 | # Exclude Errornous Binaries 28 | 29 | if package in ['coreutils-9.1']: 30 | # 1 (opt) * 2 (comp) * 2 (linker) = 4 31 | if comp in ['gcc-11', 'gcc-13']: 32 | if opt in ['ofast']: 33 | if filename in ['seq']: 34 | return True 35 | 36 | if package in ['spec_cpu2006']: 37 | # 5 (opt) * 4 (comp) * 2 (linker) = 40 38 | if filename in ['416.gamess'] and opt not in ['o0']: 39 | return True 40 | # 1 (opt) * 1 (comp) * 2 (linker) = 2 41 | if filename in ['453.povray'] and opt in ['ofast'] and comp in ['gcc-13']: 42 | return True 43 | 44 | if package in ['spec_cpu2017']: 45 | # 1 (opt) * 1 (comp) * 2 (linker) = 2 46 | if filename in ['511.povray_r'] and opt in ['ofast'] and comp in ['gcc-13']: 47 | return True 48 | 49 | return False 50 | 51 | 52 | -------------------------------------------------------------------------------- /artifact/install_reassessor.sh: -------------------------------------------------------------------------------- 1 | cd ./Reassessor 2 | pip3 install -r requirements.txt 3 | python3 setup.py install --user 4 | cd - 5 | -------------------------------------------------------------------------------- /artifact/make_gt.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | def parse_arguments(): 5 | parser = argparse.ArgumentParser(description='GT') 6 | parser.add_argument('dataset', type=str, default='setA', help='Select dataset (setA, setC)') 7 | parser.add_argument('--input_dir', type=str, default='benchmark') 8 | parser.add_argument('--output_dir', type=str, default='gt') 9 | 10 | args = parser.parse_args() 11 | 12 | assert args.dataset in ['setA', 'setC'], 'Invalid dataset: "%s"'%(args.dataset) 13 | 14 | return args 15 | 16 | def run(args): 17 | in_dir = os.path.join(args.input_dir, args.dataset) 18 | out_dir = os.path.join(args.output_dir, args.dataset) 19 | os.system('mkdir -p %s' % out_dir) 20 | 21 | in_dir = os.path.abspath(in_dir) 22 | out_dir = os.path.abspath(out_dir) 23 | 24 | cmd = 'docker run --rm -v %s:/input -v %s:/output suri_artifact:v1.0 sh -c "python3 /project/Reassessor/artifact/run_reassessor.py /input /output"' % (in_dir, out_dir) 25 | print(cmd) 26 | os.system(cmd) 27 | 28 | if __name__ == '__main__': 29 | args = parse_arguments() 30 | run(args) 31 | -------------------------------------------------------------------------------- /artifact/realworld/client/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | class Builder: 4 | def __init__(self, target, verbose): 5 | self.target =target 6 | self.input_dir = os.path.dirname(target) 7 | self.output_dir = os.getcwd() 8 | self.filename = os.path.basename(target) 9 | self.verbose = verbose 10 | 11 | 12 | def run_docker(self, cmd): 13 | if self.verbose: 14 | docker_cmd = 'docker run --rm -v %s:/input -v %s:/output suri_artifact:v1.0 sh -c " %s; "'%(self.input_dir, self.output_dir, cmd ) 15 | else: 16 | docker_cmd = 'docker run --rm -v %s:/input -v %s:/output suri_artifact:v1.0 sh -c " %s 2> /dev/null "'%(self.input_dir, self.output_dir, cmd ) 17 | os.system(docker_cmd) 18 | 19 | def run(self): 20 | cmd= 'python3 /project/SURI/suri.py /input/%s --ofolder /output/output --without-compile'%(self.filename) 21 | self.run_docker(cmd) 22 | 23 | import argparse 24 | if __name__ == '__main__': 25 | parser = argparse.ArgumentParser(description='Builder') 26 | parser.add_argument('target', type=str, help='Target Binary') 27 | parser.add_argument('--verbose', action='store_true') 28 | 29 | args = parser.parse_args() 30 | 31 | target = os.path.abspath(args.target) 32 | 33 | suri = Builder(target, args.verbose) 34 | suri.run() 35 | -------------------------------------------------------------------------------- /artifact/realworld/client/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function build() 4 | { 5 | target=$1 6 | python3 build.py $target --verbose 7 | python3 ../../../emitter.py $target output/$target.s --ofolder output 8 | } 9 | mkdir -p output 10 | build epiphany 11 | build filezilla 12 | build openssh 13 | build putty 14 | build vim 15 | 16 | -------------------------------------------------------------------------------- /artifact/realworld/client/epiphany: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SoftSec-KAIST/SURI/4f66872a9597f0c18cd3ff45516ac25d600b4576/artifact/realworld/client/epiphany -------------------------------------------------------------------------------- /artifact/realworld/client/filezilla: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SoftSec-KAIST/SURI/4f66872a9597f0c18cd3ff45516ac25d600b4576/artifact/realworld/client/filezilla -------------------------------------------------------------------------------- /artifact/realworld/client/git: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SoftSec-KAIST/SURI/4f66872a9597f0c18cd3ff45516ac25d600b4576/artifact/realworld/client/git -------------------------------------------------------------------------------- /artifact/realworld/client/openssh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SoftSec-KAIST/SURI/4f66872a9597f0c18cd3ff45516ac25d600b4576/artifact/realworld/client/openssh -------------------------------------------------------------------------------- /artifact/realworld/client/putty: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SoftSec-KAIST/SURI/4f66872a9597f0c18cd3ff45516ac25d600b4576/artifact/realworld/client/putty -------------------------------------------------------------------------------- /artifact/realworld/client/run_docker.sh: -------------------------------------------------------------------------------- 1 | docker run -it --rm -v $(pwd):/data/ suri_artifact:v1.0 /bin/bash 2 | -------------------------------------------------------------------------------- /artifact/realworld/client/run_epiphany.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | str=$(ldd epiphany | grep libephymain.so) 3 | if [[ "$str" == *not* ]]; then 4 | echo "Could not find library path for libephymain.so" 5 | else 6 | libpath=$(ldd epiphany | grep libephymain.so | awk '{print $3}' | awk -F'libephymain' '{print $1}') 7 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$libpath 8 | ./my_epiphany 9 | fi 10 | -------------------------------------------------------------------------------- /artifact/realworld/client/vim: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SoftSec-KAIST/SURI/4f66872a9597f0c18cd3ff45516ac25d600b4576/artifact/realworld/client/vim -------------------------------------------------------------------------------- /artifact/realworld/phoronix/7zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SoftSec-KAIST/SURI/4f66872a9597f0c18cd3ff45516ac25d600b4576/artifact/realworld/phoronix/7zip -------------------------------------------------------------------------------- /artifact/realworld/phoronix/apache: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SoftSec-KAIST/SURI/4f66872a9597f0c18cd3ff45516ac25d600b4576/artifact/realworld/phoronix/apache -------------------------------------------------------------------------------- /artifact/realworld/phoronix/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | class Builder: 4 | def __init__(self, target, verbose): 5 | self.target =target 6 | self.input_dir = os.path.dirname(target) 7 | self.output_dir = os.getcwd() 8 | self.suri_dir = os.path.dirname(os.path.realpath(__file__)) 9 | self.filename = os.path.basename(target) 10 | self.verbose = verbose 11 | 12 | 13 | def run_docker(self, cmd): 14 | if self.verbose: 15 | print(cmd) 16 | docker_cmd = 'docker run --rm -v %s:/input -v %s:/output suri_artifact:v1.0 sh -c " %s; "'%(self.input_dir, self.output_dir, cmd ) 17 | else: 18 | docker_cmd = 'docker run --rm -v %s:/input -v %s:/output suri_artifact:v1.0 sh -c " %s 2> /dev/null "'%(self.input_dir, self.output_dir, cmd ) 19 | os.system(docker_cmd) 20 | 21 | def run(self): 22 | cmd= 'python3 /project/SURI/suri.py /input/%s --ofolder /output/output'%(self.filename) 23 | self.run_docker(cmd) 24 | 25 | import argparse 26 | if __name__ == '__main__': 27 | parser = argparse.ArgumentParser(description='Builder') 28 | parser.add_argument('target', type=str, help='Target Binary') 29 | parser.add_argument('--verbose', action='store_true') 30 | 31 | args = parser.parse_args() 32 | 33 | target = os.path.abspath(args.target) 34 | 35 | suri = Builder(target, args.verbose) 36 | suri.run() 37 | -------------------------------------------------------------------------------- /artifact/realworld/phoronix/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir -p output 4 | python3 build.py 7zip 5 | python3 build.py apache 6 | python3 build.py mariadb 7 | python3 build.py nginx 8 | python3 build.py sqlite3 9 | -------------------------------------------------------------------------------- /artifact/realworld/phoronix/copy.sh: -------------------------------------------------------------------------------- 1 | 2 | function copy() 3 | { 4 | src=$1 5 | dst=$2 6 | name=$(basename $src | awk -F'my_' '{print $2}' ) 7 | if [ -f $dst ]; then 8 | cp $src $dst 9 | echo "[+] Replaced `$name` file in phoronix directory." 10 | else 11 | echo "[-] Error: Could not copy '$name' as the destination file does not exist." 12 | echo " Please run: phoronix-test-suite benchmark $name" 13 | fi 14 | } 15 | copy /data/output/my_7zip /var/lib/phoronix-test-suite/installed-tests/pts/compress-7zip-1.11.0/CPP/7zip/Bundles/Alone2/_o/7zz 16 | # phoronix-test-suite benchmark 7zip 17 | 18 | copy /data/output/my_apache /var/lib/phoronix-test-suite/installed-tests/pts/apache-3.0.0/httpd_/bin/httpd 19 | # phoronix-test-suite benchmark apache 20 | 21 | copy /data/output/my_mariadb /var/lib/phoronix-test-suite/installed-tests/pts/mysqlslap-1.5.0/mysql_/bin/mariadb 22 | # phoronix-test-suite benchmark mysqlslap 23 | 24 | copy /data/output/my_nginx /var/lib/phoronix-test-suite/installed-tests/pts/nginx-3.0.1/nginx_/sbin/nginx 25 | # phoronix-test-suite benchmark nginx 26 | 27 | copy /data/output/my_sqlite3 /var/lib/phoronix-test-suite/installed-tests/pts/sqlite-2.2.0/sqlite_/bin/sqlite3 28 | # phoronix-test-suite benchmark sqlite 29 | 30 | -------------------------------------------------------------------------------- /artifact/realworld/phoronix/mariadb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SoftSec-KAIST/SURI/4f66872a9597f0c18cd3ff45516ac25d600b4576/artifact/realworld/phoronix/mariadb -------------------------------------------------------------------------------- /artifact/realworld/phoronix/nginx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SoftSec-KAIST/SURI/4f66872a9597f0c18cd3ff45516ac25d600b4576/artifact/realworld/phoronix/nginx -------------------------------------------------------------------------------- /artifact/realworld/phoronix/run_docker.sh: -------------------------------------------------------------------------------- 1 | docker run -it --rm -v $(pwd):/data/ suri_artifact:v1.0 /bin/bash 2 | -------------------------------------------------------------------------------- /artifact/realworld/phoronix/sqlite3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SoftSec-KAIST/SURI/4f66872a9597f0c18cd3ff45516ac25d600b4576/artifact/realworld/phoronix/sqlite3 -------------------------------------------------------------------------------- /artifact/terminate_suri_docker.sh: -------------------------------------------------------------------------------- 1 | echo 'ps -aux | grep docker | grep "suri.*:v1.0" | awk '{print \$2}' | sudo xargs kill -9' 2 | ps -aux | grep docker | grep "suri.*:v1.0" | awk '{print $2}' | sudo xargs kill -9 3 | -------------------------------------------------------------------------------- /artifact/ubuntu18.04/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | ENV DEBIAN_FRONTEND="noninteractive" 4 | ENV DEBIAN_FRONTEND="Etc/UTC" 5 | 6 | RUN apt update && \ 7 | apt install -y git wget software-properties-common python3-pip 8 | 9 | # Install compilers 10 | RUN add-apt-repository ppa:ubuntu-toolchain-r/test -y && \ 11 | apt update && \ 12 | apt install -y gcc-11 g++-11 clang-10 gfortran-11 13 | 14 | # Install dotnet7 15 | RUN wget https://packages.microsoft.com/config/ubuntu/18.04/packages-microsoft-prod.deb -O packages-microsoft-prod.deb && \ 16 | dpkg -i packages-microsoft-prod.deb && \ 17 | rm packages-microsoft-prod.deb && \ 18 | apt-get update && \ 19 | apt-get install -y dotnet-sdk-7.0 20 | 21 | # Install Python3 dependency 22 | RUN pip3 install pyelftools 23 | 24 | # Install Egalito dependencies 25 | RUN apt install -y make g++ libreadline-dev gdb lsb-release unzip \ 26 | libc6-dbg libstdc++6-7-dbg 27 | 28 | # Install additional dependencies 29 | RUN apt install -y time flex texinfo bison dejagnu libcap-dev 30 | 31 | RUN mkdir -p /project 32 | 33 | # Add SURI 34 | RUN cd /project/ && git clone https://github.com/SoftSec-KAIST/SURI.git && \ 35 | cd SURI && python3 setup.py install 36 | 37 | # Build superCFGBuilder 38 | RUN cd /project/SURI/superCFGBuilder && dotnet build -c Release 39 | 40 | # Install Egalito 41 | RUN cd /project && \ 42 | git clone https://github.com/columbia/egalito.git && \ 43 | cd egalito && \ 44 | git reset --hard c5bccb4 && \ 45 | sed -i 's|git@github.com:|https://github.com/|' .gitmodules && \ 46 | git submodule update --init --recursive && \ 47 | make 48 | 49 | # Build Coreutils for test suite 50 | RUN wget https://ftp.gnu.org/gnu/coreutils/coreutils-9.1.tar.gz && \ 51 | tar -xzf coreutils-9.1.tar.gz && \ 52 | cd /coreutils-9.1 && \ 53 | FORCE_UNSAFE_CONFIGURE=1 ./configure && \ 54 | make 55 | 56 | COPY ./build_script/script/coreutils_copy.sh /coreutils-9.1/copy.sh 57 | COPY ./build_script/script/coreutils-9.1_list.txt /coreutils-9.1/coreutils-9.1_list.txt 58 | 59 | # Build Binutils for test suite 60 | RUN wget https://ftp.gnu.org/gnu/binutils/binutils-2.40.tar.gz && \ 61 | tar -xzf binutils-2.40.tar.gz && \ 62 | cd /binutils-2.40 && \ 63 | ./configure && \ 64 | make 65 | 66 | COPY ./build_script/script/binutils_copy.sh /binutils-2.40/copy.sh 67 | COPY ./build_script/script/binutils-2.40_list.txt /binutils-2.40/binutils-2.40_list.txt 68 | -------------------------------------------------------------------------------- /artifact/ubuntu18.04/build_script/script/binutils-2.40_list.txt: -------------------------------------------------------------------------------- 1 | binutils/addr2line 2 | binutils/ar 3 | gas/as-new 4 | binutils/cxxfilt 5 | binutils/elfedit 6 | gprof/gprof 7 | ld/ld-new 8 | binutils/nm-new 9 | binutils/objcopy 10 | binutils/objdump 11 | binutils/ranlib 12 | binutils/readelf 13 | binutils/size 14 | binutils/strings 15 | binutils/strip-new 16 | -------------------------------------------------------------------------------- /artifact/ubuntu18.04/build_script/script/binutils_copy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function copy 4 | { 5 | list_file=$1 6 | while IFS='' read -r line || [[ -n "$line" ]]; do 7 | name=$(echo $line | awk -F' ' '{print $1}') 8 | filename=$(basename "${line}") 9 | 10 | echo cp /dataset/$filename $name 11 | cp /dataset/$filename $name 12 | 13 | 14 | done < "$list_file" 15 | 16 | } 17 | copy binutils-2.40_list.txt 18 | -------------------------------------------------------------------------------- /artifact/ubuntu18.04/build_script/script/coreutils-9.1_list.txt: -------------------------------------------------------------------------------- 1 | src/[ 2 | src/b2sum 3 | src/base32 4 | src/base64 5 | src/basename 6 | src/basenc 7 | src/cat 8 | src/chcon 9 | src/chgrp 10 | src/chmod 11 | src/chown 12 | src/chroot 13 | src/cksum 14 | src/comm 15 | src/cp 16 | src/csplit 17 | src/cut 18 | src/date 19 | src/dd 20 | src/df 21 | src/dir 22 | src/dircolors 23 | src/dirname 24 | src/du 25 | src/echo 26 | src/env 27 | src/expand 28 | src/expr 29 | src/factor 30 | src/false 31 | src/fmt 32 | src/fold 33 | src/getlimits 34 | src/ginstall 35 | src/groups 36 | src/head 37 | src/hostid 38 | src/id 39 | src/join 40 | src/kill 41 | src/link 42 | src/ln 43 | src/logname 44 | src/ls 45 | src/make-prime-list 46 | src/md5sum 47 | src/mkdir 48 | src/mkfifo 49 | src/mknod 50 | src/mktemp 51 | src/mv 52 | src/nice 53 | src/nl 54 | src/nohup 55 | src/nproc 56 | src/numfmt 57 | src/od 58 | src/paste 59 | src/pathchk 60 | src/pinky 61 | src/pr 62 | src/printenv 63 | src/printf 64 | src/ptx 65 | src/pwd 66 | src/readlink 67 | src/realpath 68 | src/rm 69 | src/rmdir 70 | src/runcon 71 | src/seq 72 | src/sha1sum 73 | src/sha224sum 74 | src/sha256sum 75 | src/sha384sum 76 | src/sha512sum 77 | src/shred 78 | src/shuf 79 | src/sleep 80 | src/sort 81 | src/split 82 | src/stat 83 | src/stdbuf 84 | src/stty 85 | src/sum 86 | src/sync 87 | src/tac 88 | src/tail 89 | src/tee 90 | src/test 91 | src/timeout 92 | src/touch 93 | src/tr 94 | src/true 95 | src/truncate 96 | src/tsort 97 | src/tty 98 | src/uname 99 | src/unexpand 100 | src/uniq 101 | src/unlink 102 | src/uptime 103 | src/users 104 | src/vdir 105 | src/wc 106 | src/who 107 | src/whoami 108 | src/yes 109 | -------------------------------------------------------------------------------- /artifact/ubuntu18.04/build_script/script/coreutils_copy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function copy 4 | { 5 | list_file=$1 6 | while IFS='' read -r line || [[ -n "$line" ]]; do 7 | name=$(echo $line | awk -F' ' '{print $1}') 8 | filename=$(basename "${line}") 9 | 10 | echo cp /dataset/$filename $name 11 | cp /dataset/$filename $name 12 | 13 | done < "$list_file" 14 | 15 | } 16 | copy coreutils-9.1_list.txt 17 | -------------------------------------------------------------------------------- /emitter.py: -------------------------------------------------------------------------------- 1 | import os 2 | from superSymbolizer import SuperSymbolizer, CustomCompiler, SuperAsan 3 | 4 | 5 | class Emitter: 6 | def __init__(self, target, asm, new_out_dir, asan, use_docker, verbose): 7 | self.target = target 8 | self.input_dir = os.path.dirname(target) 9 | if new_out_dir: 10 | self.output_dir = os.path.abspath(new_out_dir) 11 | else: 12 | self.output_dir = os.getcwd() 13 | self.suri_dir = os.path.dirname(os.path.realpath(__file__)) 14 | self.filename = os.path.basename(target) 15 | self.use_docker = use_docker 16 | self.verbose = verbose 17 | 18 | if asan: 19 | self.asan = '%s_asan.json'%(self.filename) 20 | else: 21 | self.asan = '' 22 | self.asm = asm 23 | self.tmp = 'tmp_%s'%(self.filename) 24 | self.myfile = 'my_%s'%(self.filename) 25 | 26 | def run_docker(self, cmd): 27 | if self.verbose: 28 | print(cmd) 29 | docker_cmd = 'docker run --rm -v %s:/input -v %s:/output suri:v1.0 sh -c " %s; "'%(self.input_dir, self.output_dir, cmd ) 30 | else: 31 | docker_cmd = 'docker run --rm -v %s:/input -v %s:/output suri:v1.0 sh -c " %s 2> /dev/null "'%(self.input_dir, self.output_dir, cmd ) 32 | os.system(docker_cmd) 33 | 34 | 35 | def compile_suri(self): 36 | if self.use_docker: 37 | input_path = '/input/%s'%(self.filename) 38 | asm_path = '/output/%s'%(self.asm) 39 | output_path = '/output/%s'%(self.filename) 40 | 41 | if self.asan: 42 | cmd = 'python3 /project/SURI/superSymbolizer/CustomCompiler.py %s %s %s --asan'%(input_path, asm_path, output_path) 43 | else: 44 | cmd = 'python3 /project/SURI/superSymbolizer/CustomCompiler.py %s %s %s'%(input_path, asm_path, output_path) 45 | 46 | if self.verbose: 47 | print(cmd) 48 | 49 | self.run_docker(cmd) 50 | else: 51 | input_path = '%s/%s'%(self.input_dir, self.filename) 52 | asm_path = self.asm 53 | output_path = '%s/%s'%(self.output_dir, self.filename) 54 | 55 | if self.asan: 56 | CustomCompiler.emitter(input_path, asm_path, output_path, asan=True) 57 | else: 58 | CustomCompiler.emitter(input_path, asm_path, output_path) 59 | 60 | 61 | def run(self): 62 | my_path = '%s/%s'%(self.output_dir, self.myfile) 63 | 64 | if os.path.exists(my_path): 65 | os.remove(self.myfile) 66 | 67 | self.compile_suri() 68 | 69 | if os.path.exists(my_path): 70 | print('[+] Generate rewritten binary: %s'%(my_path)) 71 | 72 | import argparse 73 | if __name__ == '__main__': 74 | parser = argparse.ArgumentParser(description='Emitter') 75 | parser.add_argument('target', type=str, help='Target Binary') 76 | parser.add_argument('assembly', type=str, help='Assembly File') 77 | parser.add_argument('--ofolder', type=str, help='Output Dir') 78 | parser.add_argument('--asan', action='store_true') 79 | parser.add_argument('--usedocker', action='store_true') 80 | parser.add_argument('--verbose', action='store_true') 81 | 82 | args = parser.parse_args() 83 | 84 | target = os.path.abspath(args.target) 85 | 86 | emitter = Emitter(target, args.assembly, args.ofolder, args.asan, args.usedocker, args.verbose) 87 | emitter.run() 88 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='suri', 5 | version='1.0.0', 6 | author='Hyungseok Kim', 7 | description='suri', 8 | packages=['superSymbolizer', 'superSymbolizer.lib'], 9 | ) 10 | -------------------------------------------------------------------------------- /superCFGBuilder/.gitignore: -------------------------------------------------------------------------------- 1 | bin/ 2 | obj/ 3 | .vs/ 4 | -------------------------------------------------------------------------------- /superCFGBuilder/BinEssence/BinEssence.fsproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net9.0 5 | true 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /superCFGBuilder/BinGraph/BinGraph.fsproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | LICENSE.md 5 | b2r2-240x240.png 6 | README.md 7 | B2R2 graph library. 8 | net9.0 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /superCFGBuilder/BinGraph/DummyVertex.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.BinGraph 26 | 27 | type DummyEntry = 28 | /// Temporarily connect entry dummy node with the given root node. We do not 29 | /// touch the Graph, but simply connect two vertices temporarily for the 30 | /// convenience of analysis. 31 | static member Connect g (root: Vertex<_>) = 32 | if root.IsDummy () then root, g 33 | else 34 | let dummyEntry, g = DiGraph.AddDummyVertex g 35 | let g = DiGraph.AddDummyEdge (g, dummyEntry, root) 36 | dummyEntry, g 37 | -------------------------------------------------------------------------------- /superCFGBuilder/BinGraph/Edges.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.BinGraph 26 | 27 | /// Missing edge. 28 | exception EdgeNotFoundException 29 | 30 | /// Edge ID is a tuple of two node IDs (source node ID, destination node ID). 31 | type EdgeID = VertexID * VertexID 32 | 33 | /// An edge in a directed graph. 34 | type Edge<'E> = Edge of 'E 35 | 36 | type E<'E> = Edge<'E> 37 | 38 | // vim: set tw=80 sts=2 sw=2: 39 | -------------------------------------------------------------------------------- /superCFGBuilder/BinGraph/GraphCore.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.BinGraph 26 | 27 | /// GraphCore is an internal representation for the core graph operations, and 28 | /// this should not be directly accessed by the user. 29 | [] 30 | type GraphCore<'D, 'E, 'G 31 | when 'D :> VertexData and 'G :> Graph<'D, 'E, 'G>> internal () = 32 | 33 | abstract ImplementationType: GraphImplementationType 34 | 35 | abstract InitGraph: GraphCore<'D, 'E, 'G> option -> 'G 36 | 37 | abstract Vertices: Set> 38 | 39 | abstract Unreachables: Vertex<'D> list 40 | 41 | abstract Exits: Vertex<'D> list 42 | 43 | abstract GetSize: unit -> int 44 | 45 | abstract AddDummyVertex: 'G -> Vertex<'D> * 'G 46 | 47 | abstract AddVertex: 'G -> 'D -> Vertex<'D> * 'G 48 | 49 | abstract GetVertex: VertexID -> Vertex<'D> 50 | 51 | abstract ContainsVertex: VertexID -> bool 52 | 53 | abstract RemoveVertex: 'G -> Vertex<'D> -> 'G 54 | 55 | abstract FoldVertex: ('a -> Vertex<'D> -> 'a) -> 'a -> 'a 56 | 57 | abstract IterVertex: (Vertex<'D> -> unit) -> unit 58 | 59 | abstract FindVertexBy: (Vertex<'D> -> bool) -> Vertex<'D> 60 | 61 | abstract TryFindVertexBy: (Vertex<'D> -> bool) -> Vertex<'D> option 62 | 63 | abstract GetPreds: Vertex<'D> -> Vertex<'D> list 64 | 65 | abstract GetSuccs: Vertex<'D> -> Vertex<'D> list 66 | 67 | abstract AddDummyEdge: 'G -> Vertex<'D> -> Vertex<'D> -> 'G 68 | 69 | abstract AddEdge: 'G -> Vertex<'D> -> Vertex<'D> -> 'E -> 'G 70 | 71 | abstract RemoveEdge: 'G -> Vertex<'D> -> Vertex<'D> -> 'G 72 | 73 | abstract FoldEdge: ('a -> Vertex<'D> -> Vertex<'D> -> 'E -> 'a) -> 'a -> 'a 74 | 75 | abstract IterEdge: (Vertex<'D> -> Vertex<'D> -> 'E -> unit) -> unit 76 | 77 | abstract FindEdge: Vertex<'D> -> Vertex<'D> -> 'E 78 | 79 | abstract TryFindEdge: Vertex<'D> -> Vertex<'D> -> 'E option 80 | 81 | abstract Clone: unit -> GraphCore<'D, 'E, 'G> 82 | -------------------------------------------------------------------------------- /superCFGBuilder/BinGraph/Loop.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | module SuperCFG.BinGraph.Loop 26 | 27 | open System.Collections.Generic 28 | 29 | let private getBackEdges g root = 30 | let ctx = Dominator.initDominatorContext g root 31 | let doms = 32 | [] 33 | |> g.FoldVertex (fun acc v -> 34 | (v, Dominator.doms ctx v) :: acc) 35 | |> Map.ofList 36 | [] 37 | |> g.FoldEdge (fun acc s d e -> 38 | match doms[s] with 39 | | l when l |> List.exists (fun v -> v = d) -> (s, d) :: acc 40 | | _ -> acc) 41 | 42 | let private findIn g v = DiGraph<_, _>.FindVertexByID (g, Vertex<_>.GetID v) 43 | 44 | let getNaturalLoops g root = 45 | let rev = DiGraph.Reverse g 46 | getBackEdges g root 47 | |> List.fold (fun acc (s, d) -> 48 | let s = findIn rev s 49 | let d = findIn rev d 50 | let vertices = 51 | [ d ] 52 | |> Traversal.foldPreorderExcept rev [ s ] [ d ] (fun acc v -> 53 | (findIn g v) :: acc) 54 | |> HashSet 55 | vertices :: acc) [] 56 | -------------------------------------------------------------------------------- /superCFGBuilder/BinGraph/RangedDiGraph.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.BinGraph 26 | 27 | type RangedDiGraph<'D, 'E 28 | when 'D :> RangedVertexData and 'D : equality> (core) = 29 | inherit DiGraph<'D, 'E> (core: GraphCore<'D, 'E, DiGraph<'D, 'E>>) 30 | 31 | member __.FindVertexByRange range = 32 | core.FindVertexBy (fun (v: Vertex<'D>) -> v.VData.AddrRange = range) 33 | 34 | [] 35 | module RangedDiGraph = 36 | let private initializer core = RangedDiGraph<'D, 'E> (core) :> DiGraph<'D, 'E> 37 | 38 | let private initImperative edgeData = 39 | let core = ImperativeRangedCore<'D, 'E> (initializer, edgeData) 40 | RangedDiGraph<'D, 'E> (core) :> DiGraph<'D, 'E> 41 | 42 | let private initPersistent edgeData = 43 | let core = PersistentRangedCore<'D, 'E> (initializer, edgeData) 44 | RangedDiGraph<'D, 'E> (core) :> DiGraph<'D, 'E> 45 | 46 | /// Initialize RangedDiGraph based on the implementation type. 47 | let init edgeData = function 48 | | ImperativeGraph -> initImperative edgeData 49 | | PersistentGraph -> initPersistent edgeData 50 | -------------------------------------------------------------------------------- /superCFGBuilder/BinGraph/Vertices.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.BinGraph 26 | 27 | open B2R2 28 | 29 | /// Missing vertex. 30 | exception VertexNotFoundException 31 | 32 | /// Multiple vertices found when looking for a vertex containing certain data 33 | exception MultipleVerticesFoundException 34 | 35 | /// Trying to access dummy node's data 36 | exception DummyDataAccessException 37 | 38 | /// A unique ID for a vertex. 39 | type VertexID = int 40 | 41 | /// A data type for vertex. A VertexData should have an ID. 42 | [] 43 | type VertexData (id) = 44 | member __.ID: VertexID = id 45 | 46 | module VertexData = 47 | let private freshID = ref 0 48 | 49 | let genID () = System.Threading.Interlocked.Increment (freshID) 50 | 51 | type RangedVertexData (range: AddrRange) = 52 | inherit VertexData(VertexData.genID ()) 53 | member __.AddrRange = range 54 | 55 | /// A vertex of a graph. The vertex data (v) is optional, and if it is None, we 56 | /// will consider the vertex as a dummy node. Dummy nodes are useful for 57 | /// representing entry/exit node in a CFG. 58 | [] 59 | type Vertex<'V when 'V :> VertexData> (v: 'V option) = 60 | let myid = 61 | match v with 62 | | Some v -> v.ID 63 | | None -> 0 64 | 65 | /// Create a dummy vertex. 66 | new () = Vertex (None) 67 | /// Create a regular vertex. 68 | new (v: 'V) = Vertex (Some v) 69 | 70 | abstract Preds : Vertex<'V> list with get, set 71 | abstract Succs : Vertex<'V> list with get, set 72 | 73 | /// Data attached to the vertex. 74 | member __.VData = 75 | match v with 76 | | Some v -> v 77 | | None -> raise DummyDataAccessException 78 | 79 | /// Check whether vertex is a dummy node. 80 | member __.IsDummy () = Option.isNone v 81 | 82 | /// Each vertex has a unique ID attached to it. We sometimes need to access ID 83 | /// of dummy vertex for example calculating dominators. 84 | member __.GetID () = myid 85 | 86 | /// Return the ID of the given vertex. 87 | static member GetID (v: Vertex<#VertexData>) = v.GetID () 88 | 89 | // Each vertex has a unique ID, so ID can be used to check equality. 90 | override __.Equals obj = 91 | match obj with 92 | | :? Vertex<'V> as obj -> __.GetID () = obj.GetID () 93 | | _ -> false 94 | 95 | override __.GetHashCode () = __.GetID () 96 | 97 | override __.ToString () = 98 | match v with 99 | | Some v -> sprintf "Vertex<%s>" <| v.ToString () 100 | | None -> "DummyVertex" 101 | 102 | // Each vertex has a unique ID, so ID can be used for comparison. 103 | interface System.IComparable with 104 | member __.CompareTo obj = 105 | match obj with 106 | | :? Vertex<'V> as v -> compare (__.GetID ()) (v.GetID ()) 107 | | _ -> failwith "Invalid comparison" 108 | 109 | type V<'V when 'V :> VertexData> = Vertex<'V> 110 | 111 | // vim: set tw=80 sts=2 sw=2: 112 | -------------------------------------------------------------------------------- /superCFGBuilder/ControlFlowAnalysis/BBLInfo.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.ControlFlowAnalysis 26 | 27 | open B2R2 28 | 29 | /// Represents an instruction-level basic block. 30 | type BBLInfo = { 31 | /// The range (start addr, end addr) of the basic block. 32 | BlkRange: AddrRange 33 | /// Instruction addresses in the basic block. 34 | InstrAddrs: Set 35 | /// IR-level leaders (program points) within the bbl. 36 | IRLeaders: Set 37 | /// Function entry point. 38 | FunctionEntry: Addr 39 | } 40 | -------------------------------------------------------------------------------- /superCFGBuilder/ControlFlowAnalysis/CFGError.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.ControlFlowAnalysis 26 | 27 | open B2R2 28 | 29 | /// Error occured from a CFG analysis. 30 | type CFGError = 31 | /// This error occurs while resolving an indirect branch. 32 | | ErrorBranchRecovery of fnAddr: Addr 33 | * brAddr: Addr 34 | * rollbackFuncs: Set 35 | /// Nested switch is found and we found the existence of a jump-table overlap 36 | /// late. So we rollback. 37 | | ErrorLateDetection 38 | /// This error occurs while parsing an invalid basic block. 39 | | ErrorParsing 40 | /// This error occurs while connecting an invalid edge; src/dst node is 41 | /// invalid, e.g., when an edge is intruding an instruction boundary. 42 | | ErrorConnectingEdge 43 | /// This error occurs when the recovered entry exists prior to 44 | /// confirmedEndPoint 45 | | ErrorVisitedSwitchEntry 46 | 47 | [] 48 | module CFGError = 49 | let toString = function 50 | | ErrorBranchRecovery (fnAddr, brAddr, _) -> 51 | (nameof ErrorBranchRecovery) 52 | + "(" + fnAddr.ToString("x") + "," + brAddr.ToString("x") + ")" 53 | | ErrorLateDetection -> nameof ErrorLateDetection 54 | | ErrorParsing -> nameof ErrorParsing 55 | | ErrorConnectingEdge -> nameof ErrorConnectingEdge 56 | | _ -> "" 57 | -------------------------------------------------------------------------------- /superCFGBuilder/ControlFlowAnalysis/CFGHelper.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | [] 26 | module internal SuperCFG.ControlFlowAnalysis.CFGHelper 27 | 28 | open B2R2 29 | open SuperCFG.BinGraph 30 | 31 | #if CFGDEBUG 32 | open System.IO 33 | 34 | [] 35 | module internal Dbg = 36 | let logger = 37 | let path = Path.Combine (Directory.GetCurrentDirectory (), "cfg.log") 38 | FileLogger (path) :> ILogger 39 | 40 | let inline dbglog (locationName: string) fmt = 41 | let after str = logger.LogLine (locationName.PadRight 20 + "| " + str) 42 | Printf.ksprintf after fmt 43 | #endif 44 | 45 | /// Categorize neighboring edges of a given vertex (v) in the graph (g). This 46 | /// function returns three different groups of edges: (1) incoming edges, (2) 47 | /// outgoing edges, and (3) self-cycle edge. 48 | let categorizeNeighboringEdges g v = 49 | let incomings, cycle = 50 | DiGraph.GetPreds (g, v) 51 | |> List.fold (fun (incomings, cycle) p -> 52 | let e = DiGraph.FindEdgeData (g, p, v) 53 | if p.GetID () = v.GetID () then incomings, Some e 54 | else (p, e) :: incomings, cycle) ([], None) 55 | let outgoings = 56 | DiGraph.GetSuccs (g, v) 57 | |> List.fold (fun outgoings s -> 58 | let e = DiGraph.FindEdgeData (g, v, s) 59 | if s.GetID () = v.GetID () then outgoings 60 | else (s, e) :: outgoings) [] 61 | incomings, outgoings, cycle 62 | 63 | /// Get reachable vertices and edges from v in g. 64 | let getReachables g v = 65 | let reachables = 66 | Set.empty |> Traversal.foldPostorder g [v] (fun acc v -> Set.add v acc) 67 | let edges = (* Collect corresponding edges. *) 68 | g.FoldEdge (fun acc src dst e -> 69 | (* Collect only when both src and dst belong to vertices. *) 70 | if Set.contains src reachables && Set.contains dst reachables then 71 | Set.add (src, dst, e) acc 72 | else acc) Set.empty 73 | reachables, edges 74 | -------------------------------------------------------------------------------- /superCFGBuilder/ControlFlowAnalysis/ControlFlowAnalysis.fsproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net9.0 5 | true 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /superCFGBuilder/ControlFlowAnalysis/DataManager.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.ControlFlowAnalysis 26 | 27 | open System.Collections.Generic 28 | open B2R2 29 | open B2R2.FrontEnd 30 | open B2R2.FrontEnd.BinFile 31 | open B2R2.FrontEnd.BinFile.ELF 32 | 33 | [] 34 | module private DataManager = 35 | let parseRelocatableFunctionSymbols reloc = 36 | let dict = Dictionary () 37 | let iter (KeyValue (addr, rel: RelocationEntry)) = 38 | match rel.RelType with 39 | | RelocationX86 RelocationX86.R_386_32 40 | | RelocationX86 RelocationX86.R_386_PC32 41 | | RelocationX64 RelocationX64.R_X86_64_PLT32 -> 42 | match rel.RelSymbol with 43 | | Some sym when sym.SymType = SymbolType.STT_FUNC -> 44 | dict.Add (addr, sym) 45 | | _ -> () 46 | | _ -> () 47 | reloc.RelocByAddr |> Seq.iter iter 48 | dict 49 | 50 | let parseRelocatableFuncs (hdl: BinHandle) = 51 | match hdl.File with 52 | | :? ELFBinFile as efi -> parseRelocatableFunctionSymbols efi.RelocationInfo 53 | | _ -> Dictionary () 54 | 55 | type FunDataManager () = 56 | 57 | let jmpTables = JumpTableMaintainer () 58 | 59 | let visitedTbls = SortedSet () 60 | 61 | /// Return the JumpTableMaintainer. 62 | member __.JumpTables with get() = jmpTables 63 | 64 | member __.MarkVisitedTbl tblAddr = 65 | visitedTbls.Add tblAddr |> ignore 66 | 67 | member __.IsVisitedTbl tblAddr = 68 | visitedTbls.Contains tblAddr 69 | 70 | type DataManager (hdl) = 71 | 72 | let funDataDict = Dictionary () 73 | 74 | let relocatableFuncs = parseRelocatableFuncs hdl 75 | 76 | //let jmpTables = JumpTableMaintainer () 77 | 78 | /// Return the JumpTableMaintainer. 79 | //member __.JumpTables with get() = jmpTables 80 | /// Return a map from a relocatable offset to its corresponding symbol. This 81 | /// map considers relocatable functions only. 82 | member __.RelocatableFuncs with get() = relocatableFuncs 83 | 84 | 85 | member __.GetOrAddFunDataMgr entry = 86 | match funDataDict.TryGetValue entry with 87 | | true, funCodeMgr -> funCodeMgr 88 | | _, _ -> 89 | let funDataMgr = FunDataManager() 90 | funDataDict[entry] <- funDataMgr 91 | funDataMgr 92 | member __.GetFunDataMgr entry = 93 | funDataDict[entry] 94 | 95 | member __.RemoveFunDataMgr (entry:Addr) = 96 | funDataDict.Remove entry 97 | -------------------------------------------------------------------------------- /superCFGBuilder/ControlFlowAnalysis/EvalHelper.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | module SuperCFG.ControlFlowAnalysis.EvalHelper 26 | 27 | open System.Collections.Generic 28 | open B2R2 29 | open B2R2.FrontEnd 30 | open B2R2.MiddleEnd.ConcEval 31 | open SuperCFG.BinGraph 32 | open SuperCFG.ControlFlowGraph 33 | 34 | let private memoryReader (hdl: BinHandle) _pc addr typ _e = 35 | let len = RegType.toByteWidth typ 36 | let fileInfo = hdl.File 37 | if addr < System.UInt64.MaxValue && fileInfo.IsValidAddr addr then 38 | match hdl.TryReadBytes (addr, len) with 39 | | Ok v -> Ok (BitVector.OfArr v) 40 | | Error e -> Error e 41 | else Error ErrorCase.InvalidMemoryRead 42 | 43 | let private stackAddr t = BitVector.OfInt32 0x1000000 t 44 | 45 | let private obtainStackDef (hdl: BinHandle) isa = 46 | match hdl.RegisterFactory.StackPointer with 47 | | Some r -> Some (r, isa.WordSize |> WordSize.toRegType |> stackAddr) 48 | | None -> None 49 | 50 | let private obtainFramePointerDef (hdl: BinHandle) isa = 51 | match hdl.RegisterFactory.FramePointer with 52 | | Some r -> Some (r, isa.WordSize |> WordSize.toRegType |> BitVector.Zero) 53 | | None -> None 54 | 55 | let private initState hdl isa pc = 56 | let st = EvalState (true) 57 | st.LoadFailureEventHandler <- memoryReader hdl 58 | [ obtainStackDef hdl isa; obtainFramePointerDef hdl isa ] 59 | |> List.choose id 60 | |> st.InitializeContext pc 61 | st 62 | 63 | let evalBlock hdl isa (blk: Vertex) = 64 | let pc = blk.VData.PPoint.Address 65 | let st = initState hdl isa pc 66 | st.SideEffectEventHandler <- fun _ st -> st.AbortInstr () 67 | match blk.VData.IRStatements |> SafeEvaluator.evalBlock st pc with 68 | | Ok st -> st 69 | | Error _ -> Utils.impossible () 70 | 71 | let evalFunctionUntilStopFn hdl isa (fn: RegularFunction) stopFn = 72 | let visited = HashSet () 73 | let rec evalLoop (blk: Vertex) st stopFn = 74 | let pp = blk.VData.PPoint 75 | if visited.Contains pp then None 76 | else 77 | visited.Add pp |> ignore 78 | let result = 79 | blk.VData.IRStatements 80 | |> SafeEvaluator.evalBlock st pp.Address 81 | match result with 82 | | Ok st' -> 83 | if stopFn blk then Some st' 84 | else 85 | match fn.TryFindVertex (ProgramPoint (st'.PC, 0)) with 86 | | Some v -> evalLoop v st' stopFn 87 | | None -> None 88 | | Error _ -> None 89 | let pc = fn.EntryPoint 90 | let st = initState hdl isa pc 91 | let root = fn.FindVertex (ProgramPoint (pc, 0)) 92 | evalLoop root st stopFn 93 | 94 | let readReg (st: EvalState) regID = 95 | match st.TryGetReg regID with 96 | | Def v -> Some v 97 | | Undef -> None 98 | 99 | let readMem (st: EvalState) addr endian size = 100 | let addr = BitVector.ToUInt64 addr 101 | match st.Memory.Read addr endian size with 102 | | Ok bs -> BitVector.ToUInt64 bs |> Some 103 | | Error _ -> None 104 | -------------------------------------------------------------------------------- /superCFGBuilder/ControlFlowAnalysis/FortranRegularJmpResolution.fs: -------------------------------------------------------------------------------- 1 | module SuperCFG.ControlFlowAnalysis.FortranRegularJmpResolution 2 | 3 | open SuperCFG.ControlFlowGraph 4 | open SuperCFG.ControlFlowAnalysis.LowUIRHelper 5 | /// Indirect jump resolution. 6 | type FortranRegularJmpResolution () = 7 | inherit PerFunctionAnalysis () 8 | override __.Name = "FortranIndirectJumpResolution" 9 | 10 | 11 | member private __.Analyze hdl (codeMgr: CodeManager) dataMgr fn addrs res evts = 12 | let rootBBL, bblDict, noV = ConstructBBLInfo fn 13 | 14 | match addrs with 15 | | iAddr :: rest -> 16 | let funCodeMgr = codeMgr.GetFunCodeMgr (fn: RegularFunction).EntryPoint 17 | let bblInfo = funCodeMgr.GetBBL iAddr 18 | let blkAddr = Set.minElement bblInfo.InstrAddrs 19 | let brInfo = ResolveFortranIndBr 20 | codeMgr fn blkAddr iAddr rootBBL bblDict noV 21 | let _, src = (fn: RegularFunction).IndJmpBBLs.TryGetValue iAddr 22 | if brInfo |> List.length > 0 then 23 | let evts = 24 | brInfo 25 | |> List.fold(fun evts target -> CFGEvents.addEdgeEvt fn src target 26 | IndirectJmpEdge evts) evts 27 | __.Analyze hdl codeMgr dataMgr fn addrs true evts 28 | else res, evts 29 | | [] -> res, evts 30 | 31 | 32 | member __.Resolve hdl codeMgr dataMgr fn evts = 33 | 34 | let addrs = (fn: RegularFunction).IndirectJumps 35 | |> Seq.choose( 36 | fun (KeyValue(addr, kinds)) -> 37 | if kinds |> Set.contains(UnknownIndJmp) then Some addr 38 | else None) 39 | |> Seq.toList 40 | __.Analyze hdl codeMgr dataMgr fn addrs false evts 41 | 42 | 43 | 44 | override __.Run hdl isa codeMgr dataMgr fn evts = 45 | let res, evts = __.Resolve hdl codeMgr dataMgr fn evts 46 | 47 | match res with 48 | | true -> 49 | Ok (evts |> CFGEvents.addPerFuncAnalysisEvt fn.EntryPoint) 50 | | false -> 51 | Ok evts 52 | -------------------------------------------------------------------------------- /superCFGBuilder/ControlFlowAnalysis/HistoryManager.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.ControlFlowAnalysis 26 | 27 | open System.Collections.Generic 28 | open B2R2 29 | 30 | /// To support the rollback mechanism, we remember inter-function edges created 31 | /// during the recovery of an indirect branch. If a vertex has been promoted due 32 | /// to such an edge, and the edge has turned out to be invalid, then we should 33 | /// be able to revert the promotion. 34 | type HistoricalFact = 35 | | CreatedFunction of func: Addr 36 | 37 | module private HistoricalFact = 38 | let toString = function 39 | | CreatedFunction (func) -> "CreatedFunction(" + func.ToString("x") + ")" 40 | 41 | /// Record and manage the CFG recovery history. 42 | type HistoryManager () = 43 | let functionStack = Stack () 44 | let history = Dictionary> () 45 | 46 | /// Check if the given function address exists in the function stack excluding 47 | /// the stack top. 48 | member __.HasFunctionLater addr = 49 | if functionStack.Count = 0 then false 50 | else Seq.tail functionStack |> Seq.exists (fun a -> addr = a) 51 | 52 | /// Record the historical fact. 53 | member __.Record fact = 54 | match functionStack.TryPeek () with 55 | | true, funcAddr -> 56 | match history.TryGetValue funcAddr with 57 | | true, stack -> stack.Push fact 58 | | false, _ -> 59 | let stack = Stack() 60 | stack.Push fact 61 | history[funcAddr] <- stack 62 | | false, _ -> () 63 | 64 | /// Peek the history of the current function. 65 | member __.PeekFunctionHistory fnAddr = 66 | let arr = 67 | match history.TryGetValue fnAddr with 68 | | true, stack -> 69 | history[fnAddr] <- Stack () 70 | stack |> Seq.toArray 71 | | false, _ -> [||] 72 | Array.append arr [| CreatedFunction (fnAddr) |] 73 | 74 | #if CFGDEBUG 75 | /// Debug print the history. 76 | member __.DebugPrint () = 77 | match functionStack.TryPeek () with 78 | | true, funcAddr -> 79 | match history.TryGetValue funcAddr with 80 | | true, stack -> 81 | stack |> Seq.iter (fun fact -> 82 | HistoricalFact.toString fact 83 | |> dbglog (nameof HistoryManager) "%s") 84 | | false, _ -> () 85 | | false, _ -> () 86 | #endif 87 | -------------------------------------------------------------------------------- /superCFGBuilder/ControlFlowAnalysis/ICFGBuildable.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.ControlFlowAnalysis 26 | 27 | /// The main interface for building a CFG. 28 | type ICFGBuildable = 29 | /// Update CFGs based on the given CFGEvents. This function will run our CFG 30 | /// analysis by consuming the CFGEvents until there's no more event to 31 | /// consume. When everything is done well, this function returns "Ok ()". 32 | abstract Update: CFGEvents -> Result 33 | -------------------------------------------------------------------------------- /superCFGBuilder/ControlFlowAnalysis/IPluggableAnalysis.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.ControlFlowAnalysis 26 | 27 | open B2R2.FrontEnd 28 | open SuperCFG.ControlFlowAnalysis 29 | 30 | /// Return types of pluggable analysis interface. 31 | type PluggableAnalysisReturnType = 32 | /// Analysis done ok. 33 | | PluggableAnalysisOk 34 | /// Analysis done with error. 35 | | PluggableAnalysisError 36 | /// Analysis done and a new BinHandle has been created. 37 | | PluggableAnalysisNewBinary of BinHandle 38 | 39 | /// Pluggable analysis interface. Any CFG-related analysis implementing this 40 | /// interface can be plugged in or unplugged from the BinEssence. 41 | type IPluggableAnalysis = 42 | 43 | /// The name of the analysis (for debugging purpose). 44 | abstract Name: string 45 | 46 | /// Run the analysis, which will return whether it needs further iteration. 47 | abstract Run: 48 | CFGBuilder 49 | -> BinHandle 50 | -> CodeManager 51 | -> DataManager 52 | -> PluggableAnalysisReturnType 53 | -------------------------------------------------------------------------------- /superCFGBuilder/ControlFlowAnalysis/JumpTableMaintainer.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.ControlFlowAnalysis 26 | 27 | open System.Collections.Generic 28 | open B2R2 29 | 30 | /// Indirect branch jump table information. 31 | type JumpTable = { 32 | /// The address of the owner function of the indirect branch. 33 | HostFunctionEntry: Addr 34 | /// The indirect branch instruction address. 35 | InstructionAddr: Addr 36 | /// The base address used to compute the final target address. 37 | BranchBaseAddr: Addr 38 | /// Start address of the jump table, i.e., the first table entry's address. 39 | JTStartAddr: Addr 40 | /// Jump table's entry size. Typically this is 4-byte. 41 | JTEntrySize: int 42 | } 43 | with 44 | static member Init entry ins bAddr tAddr rt = 45 | { HostFunctionEntry = entry 46 | InstructionAddr = ins 47 | BranchBaseAddr = bAddr 48 | JTStartAddr = tAddr 49 | JTEntrySize = RegType.toByteWidth rt } 50 | 51 | type JumpTableMaintainer () = 52 | let jumpTables = SortedList<(Addr*Addr), JumpTable> () 53 | let potentialEndPoints = Dictionary<(Addr*Addr), Addr> () 54 | let confirmedEndPoints = Dictionary<(Addr*Addr), Addr> () 55 | 56 | /// Register a new jump table. 57 | member __.Register funcEntry insAddr bAddr tAddr rt = 58 | let jt = JumpTable.Init funcEntry insAddr bAddr tAddr rt 59 | if jumpTables.ContainsKey (tAddr, insAddr) then 60 | (* We had another jump table at the exactly the same location earlier. 61 | This means our rollback mechanism removed some history, and we just 62 | encountered the same indirect branch again. In this case, we will just 63 | reuse it. *) 64 | Ok () 65 | else 66 | confirmedEndPoints[(tAddr, insAddr)] <- tAddr 67 | potentialEndPoints[(tAddr, insAddr)] <- System.UInt64.MaxValue 68 | jumpTables[(tAddr, insAddr)] <- jt 69 | Ok () 70 | 71 | /// Update the potential end-point information. 72 | member __.UpdatePotentialEndPoint (tAddr, insAddr) pAddr = 73 | potentialEndPoints[(tAddr, insAddr)] <- pAddr 74 | 75 | /// Find the current potential end-point for the given table address. 76 | member __.FindPotentialEndPoint (tAddr, insAddr) = 77 | potentialEndPoints[(tAddr, insAddr)] 78 | 79 | /// Update the confirmed end-point of the jump table located at the tAddr. 80 | member __.UpdateConfirmedEndPoint (tAddr, insAddr) epAddr = 81 | confirmedEndPoints[(tAddr, insAddr)] <- epAddr 82 | 83 | /// Find the currently confirmed end-point for the given table address. 84 | member __.FindConfirmedEndPoint (tAddr, insAddr) = 85 | confirmedEndPoints[(tAddr, insAddr)] 86 | 87 | member __.Item 88 | with get((addr, insAddr)) = jumpTables[(addr, insAddr)] 89 | and set (addr, insAddr) jt = jumpTables[(addr, insAddr)] <- jt 90 | 91 | member __.ToSeq () = jumpTables |> seq 92 | -------------------------------------------------------------------------------- /superCFGBuilder/ControlFlowAnalysis/PerFunctionAnalysis.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.ControlFlowAnalysis 26 | 27 | open B2R2 28 | open B2R2.FrontEnd 29 | open SuperCFG.DataFlow 30 | open SuperCFG.ControlFlowAnalysis 31 | 32 | /// PerFunctionAnalysis implements a core CFG-recovery algorithm, which modifies 33 | /// a function-level CFG by analyzing the function. Though it works per 34 | /// function, It can modify other functions (thus, the entire CFGInfo). An 35 | /// analysis appends CFGEvents to modify function, but it can also modify the 36 | /// function directly. 37 | [] 38 | type PerFunctionAnalysis () = 39 | 40 | /// Name of the analysis. This is for debugging. 41 | abstract Name: string 42 | 43 | /// Run the analysis. 44 | abstract Run: 45 | BinHandle 46 | -> ISA 47 | -> CodeManager 48 | -> DataManager 49 | -> RegularFunction 50 | -> CFGEvents 51 | -> Result 52 | 53 | /// Helper module for per-function analyses. 54 | [] 55 | module PerFunctionAnalysis = 56 | 57 | /// Run constant propagation on the function. 58 | let runCP hdl isa (func: RegularFunction) reader = 59 | #if CFGDEBUG 60 | let stopWatch = System.Diagnostics.Stopwatch.StartNew() 61 | #endif 62 | let ssaCFG, ssaRoot = func.GetSSACFG hdl isa 63 | let cp = 64 | match reader with 65 | | Some reader -> SparseConstantPropagation (hdl, isa, ssaCFG, reader) 66 | | None -> SparseConstantPropagation (hdl, isa, ssaCFG) 67 | let cpState = cp.Compute ssaRoot 68 | #if CFGDEBUG 69 | stopWatch.Stop() 70 | printfn "%f" stopWatch.Elapsed.TotalMilliseconds 71 | dbglog "PerFunctionAnalysis" "@%x runs runCP (%f msec)" func.EntryPoint 72 | stopWatch.Elapsed.TotalMilliseconds 73 | #endif 74 | struct (cpState, ssaCFG) 75 | -------------------------------------------------------------------------------- /superCFGBuilder/ControlFlowAnalysis/TblResolution.fs: -------------------------------------------------------------------------------- 1 | namespace SuperCFG.ControlFlowAnalysis 2 | 3 | open SuperCFG.ControlFlowAnalysis.LowUIRHelper 4 | 5 | [] 6 | module TblAnalyzer = 7 | 8 | let [] Myname = "TblAnalyzer" 9 | 10 | type TblAnalyzer () = 11 | inherit PerFunctionAnalysis () 12 | override __.Name = Myname 13 | 14 | override __.Run _hdl _isa _codeMgr _dataMgr func evts = 15 | // Add per-function analysis event to resolve discovered tables 16 | if evts.FunctionAnalysisAddrs |> List.contains(func.EntryPoint) then 17 | (Ok evts) 18 | else 19 | (Ok (CFGEvents.addPerFuncAnalysisEvt func.EntryPoint evts)) 20 | 21 | member private __.Analyze2 hdl (codeMgr: CodeManager) dataMgr fn addrs res = 22 | let rootBBL, bblDict, noV = ConstructBBLInfo fn 23 | 24 | match addrs with 25 | | iAddr :: rest -> 26 | let funCodeMgr = codeMgr.GetFunCodeMgr (fn: RegularFunction).EntryPoint 27 | let bblInfo = funCodeMgr.GetBBL iAddr 28 | let blkAddr = Set.minElement bblInfo.InstrAddrs 29 | let brInfo = ResolveIndBr codeMgr fn blkAddr iAddr rootBBL bblDict noV 30 | funCodeMgr.UpdateBrInfo iAddr brInfo 31 | let tbls 32 | = brInfo 33 | |> List.map(fun info -> info.TblAddr) |> List.distinct 34 | |> List.filter(fun addr -> 35 | if fn.IsRegisteredCandidate iAddr addr then false 36 | else 37 | (fn: RegularFunction).RegisterNewIndJump iAddr 38 | true 39 | ) 40 | 41 | if tbls.IsEmpty then 42 | __.Analyze2 hdl codeMgr dataMgr fn rest res 43 | else 44 | __.Analyze2 hdl codeMgr dataMgr fn rest true 45 | | [] -> res 46 | 47 | member __.HasTblCandidates 48 | hdl (codeMgr: CodeManager) (dataMgr: DataManager) (fn: RegularFunction) = 49 | let indJmps = fn.IndirectJumps |> Seq.map(fun (KeyValue(addr,_))-> addr ) 50 | __.Analyze2 hdl codeMgr dataMgr fn (indJmps|> List.ofSeq) false 51 | -------------------------------------------------------------------------------- /superCFGBuilder/ControlFlowGraph/BasicBlock.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.ControlFlowGraph 26 | 27 | open B2R2 28 | open SuperCFG.BinGraph 29 | 30 | /// The base type for basic block. 31 | [] 32 | type BasicBlock (pp: ProgramPoint) = 33 | inherit VertexData (VertexData.genID ()) 34 | 35 | /// The start position (ProgramPoint) of the basic block. 36 | member __.PPoint with get() = pp 37 | 38 | /// The instruction address range of the basic block. 39 | abstract Range: AddrRange with get 40 | 41 | /// Check if this is a fake basic block inserted by our analysis. We create a 42 | /// fake block to represent call target vertices in a function-level CFG. 43 | abstract IsFakeBlock: unit -> bool 44 | 45 | /// Convert this basic block to a visual representation. 46 | abstract ToVisualBlock: unit -> VisualBlock 47 | -------------------------------------------------------------------------------- /superCFGBuilder/ControlFlowGraph/ControlFlowGraph.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.ControlFlowGraph 26 | 27 | open SuperCFG.BinGraph 28 | 29 | type ControlFlowGraph<'D, 'E when 'D :> BasicBlock and 'D : equality> 30 | (core: GraphCore<'D, 'E, DiGraph<'D, 'E>>) = 31 | inherit DiGraph<'D, 'E> (core) 32 | 33 | // vim: set tw=80 sts=2 sw=2: 34 | -------------------------------------------------------------------------------- /superCFGBuilder/ControlFlowGraph/ControlFlowGraph.fsproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net9.0 5 | true 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /superCFGBuilder/ControlFlowGraph/DisasmBasicBlock.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.ControlFlowGraph 26 | 27 | open B2R2 28 | open B2R2.FrontEnd.BinLifter 29 | open SuperCFG.BinGraph 30 | 31 | /// Basic block type for a disassembly-based CFG (DisasmCFG). 32 | type DisasmBasicBlock (instrs: Instruction [], pp(*, ?funcID*)) = 33 | inherit BasicBlock (pp) 34 | 35 | let mutable instructions = instrs 36 | 37 | /// Temporarily disable this 38 | (* 39 | let symbolize (words: AsmWord []) = 40 | match funcID with 41 | | Some funcID -> 42 | words[words.Length - 1] <- 43 | { AsmWordKind = AsmWordKind.Value; AsmWordValue = funcID } 44 | | None -> () 45 | words 46 | *) 47 | 48 | override __.Range = 49 | let last = instructions[instructions.Length - 1] 50 | AddrRange (pp.Address, last.Address + uint64 last.Length - 1UL) 51 | 52 | override __.IsFakeBlock () = Array.isEmpty instructions 53 | 54 | override __.ToVisualBlock () = 55 | instructions 56 | |> Array.mapi (fun idx i -> 57 | if idx = Array.length instructions - 1 then 58 | i.Decompose (true)(* |> symbolize *) 59 | else i.Decompose (true)) 60 | 61 | member __.Instructions 62 | with get () = instructions 63 | and set (i) = instructions <- i 64 | 65 | member __.Disassemblies 66 | with get () = 67 | instructions |> Array.map (fun i -> i.Disasm ()) 68 | 69 | override __.ToString () = 70 | if instrs.Length = 0 then "DisasmBBLK(Dummy)" 71 | else $"DisasmBBLK({__.PPoint.Address:x})" 72 | 73 | type DisasmVertex = Vertex 74 | -------------------------------------------------------------------------------- /superCFGBuilder/ControlFlowGraph/DisasmCFG.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.ControlFlowGraph 26 | 27 | open SuperCFG.BinGraph 28 | open SuperCFG.ControlFlowGraph 29 | 30 | /// Disassembly-based CFG, where each node contains disassembly code. 31 | type DisasmCFG = ControlFlowGraph 32 | 33 | [] 34 | module DisasmCFG = 35 | let private initializer core = 36 | DisasmCFG (core) :> DiGraph 37 | 38 | let private initImperative () = 39 | ImperativeCore (initializer, UnknownEdge) 40 | |> DisasmCFG 41 | :> DiGraph 42 | 43 | let private initPersistent () = 44 | PersistentCore (initializer, UnknownEdge) 45 | |> DisasmCFG 46 | :> DiGraph 47 | 48 | /// Initialize IRCFG based on the implementation type. 49 | let init = function 50 | | ImperativeGraph -> initImperative () 51 | | PersistentGraph -> initPersistent () 52 | -------------------------------------------------------------------------------- /superCFGBuilder/ControlFlowGraph/FakeBlockInfo.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.ControlFlowGraph 26 | 27 | open B2R2 28 | 29 | /// Is this a get-pc-thunk function? 30 | type GetPCThunkInfo = 31 | /// It is not a get-pc-thunk. 32 | | NoGetPCThunk 33 | /// It is a get-pc-thunk, and the register wlil be assigned after this 34 | /// function. 35 | | YesGetPCThunk of RegisterID 36 | 37 | module GetPCThunkInfo = 38 | let isGetPCThunk = function 39 | | YesGetPCThunk _ -> true 40 | | _ -> false 41 | 42 | /// IRBasicBlock can be either a fake block or a regular block. FakeBlockInfo 43 | /// exists only for fake blocks. 44 | type FakeBlockInfo = { 45 | /// Call site address, i.e., the call instruction's address. 46 | CallSite: Addr 47 | /// How many bytes of the stack does this function unwind when return? 48 | UnwindingBytes: int64 49 | /// What is the distance between the caller's stack frame (activation record) 50 | /// and the callee's stack frame? If the distance is always constant, we 51 | /// remember the value here. 52 | FrameDistance: int option 53 | /// If this fake block represents a "get_pc" thunk, then return the register 54 | /// ID holding the current PC value after this function returns. 55 | GetPCThunkInfo: GetPCThunkInfo 56 | /// Is this fake block points to a PLT entry? 57 | IsPLT: bool 58 | /// Is this fake block represents a tail call? So, this fake block is 59 | /// connected with a regular jump edge, not with a call edge. 60 | IsTailCall: bool 61 | /// Is the caller invoke this fake block as an indirect call? 62 | IsIndirectCall: bool 63 | /// Is this a system call? This is possible when a `call` instruction is used 64 | /// to make a system call. For example, in x86, `call dword ptr [GS:0x10]` 65 | /// will be a system call. 66 | IsSysCall: bool 67 | } 68 | -------------------------------------------------------------------------------- /superCFGBuilder/ControlFlowGraph/IRCFG.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.ControlFlowGraph 26 | 27 | open SuperCFG.BinGraph 28 | 29 | type IRVertex = Vertex 30 | 31 | type IRCFG = ControlFlowGraph 32 | 33 | [] 34 | module IRCFG = 35 | let private initializer core = 36 | IRCFG (core) :> DiGraph 37 | 38 | let private initImperative () = 39 | ImperativeCore (initializer, UnknownEdge) 40 | |> IRCFG 41 | :> DiGraph 42 | 43 | let private initPersistent () = 44 | PersistentCore (initializer, UnknownEdge) 45 | |> IRCFG 46 | :> DiGraph 47 | 48 | /// Initialize IRCFG based on the implementation type. 49 | let init = function 50 | | ImperativeGraph -> initImperative () 51 | | PersistentGraph -> initPersistent () 52 | -------------------------------------------------------------------------------- /superCFGBuilder/ControlFlowGraph/InstructionInfo.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.ControlFlowGraph 26 | 27 | open B2R2 28 | open B2R2.BinIR.LowUIR 29 | open B2R2.FrontEnd.BinLifter 30 | 31 | /// Abstract information about the instruction and its corresponding IR 32 | /// statements. 33 | type InstructionInfo = { 34 | /// Instruction. 35 | Instruction: Instruction 36 | /// IR. 37 | Stmts: Stmt [] 38 | /// Corresponding BBL's address. 39 | BBLAddr: Addr 40 | } 41 | -------------------------------------------------------------------------------- /superCFGBuilder/ControlFlowGraph/SSATypes.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.ControlFlowGraph 26 | 27 | open B2R2 28 | open SuperCFG.SSA 29 | open System.Collections.Generic 30 | 31 | /// A mapping from an address to a SSACFG vertex. 32 | type SSAVMap = Dictionary 33 | 34 | /// This is a mapping from an edge to a dummy vertex (for external function 35 | /// calls). We first separately create dummy vertices even if they are 36 | /// associated with the same node (address) in order to compute dominance 37 | /// relationships without introducing incorrect paths or cycles. For 38 | /// convenience, we will always consider as a key "a return edge" from a fake 39 | /// vertex to a fall-through vertex. 40 | type FakeVMap = Dictionary 41 | 42 | /// Mapping from a variable to a set of defining SSA basic blocks. 43 | type DefSites = Dictionary> 44 | 45 | /// Defined variables per node in a SSACFG. 46 | type DefsPerNode = Dictionary> 47 | 48 | /// Counter for each variable. 49 | type VarCountMap = Dictionary 50 | 51 | /// Variable ID stack. 52 | type IDStack = Dictionary 53 | -------------------------------------------------------------------------------- /superCFGBuilder/ControlFlowGraph/SyscallTailInfo.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.ControlFlowGraph 26 | 27 | /// A basic block may end with a syscall instruction, and the syscall may 28 | /// terminate the program or not. We store such information for each basic 29 | /// block to easily track them. 30 | type SyscallTailInfo = 31 | /// The basic block has no syscall. 32 | | NoSyscallTail 33 | /// The basic block has a syscall, but we didn't yet analyzed its type. 34 | | UnknownSyscallTail 35 | /// The basic block has a regular (non-exit) syscall. 36 | | RegularSyscallTail 37 | /// The basic block has an exit syscall. 38 | | ExitSyscallTail 39 | -------------------------------------------------------------------------------- /superCFGBuilder/ControlFlowGraph/VisualBlock.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.ControlFlowGraph 26 | 27 | open B2R2 28 | open B2R2.FrontEnd.BinLifter 29 | 30 | /// A visual line of a basic block. 31 | type VisualLine = AsmWord [] 32 | 33 | module VisualLine = 34 | [] 35 | let lineWidth terms = 36 | terms |> Array.fold (fun width term -> width + AsmWord.Width term) 0 37 | 38 | [] 39 | let toString terms = 40 | terms |> Array.map AsmWord.ToString |> String.concat " " 41 | 42 | /// A visual representation of a basic block. 43 | type VisualBlock = VisualLine [] 44 | 45 | module VisualBlock = 46 | let empty (addr: Addr): VisualBlock = 47 | [| 48 | [| { AsmWordKind = AsmWordKind.String 49 | AsmWordValue = $"# fake block @ {addr:x}" } 50 | |] 51 | |] 52 | -------------------------------------------------------------------------------- /superCFGBuilder/DataFlow/ConstantPropagation.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.DataFlow 26 | 27 | open SuperCFG.SSA 28 | open SuperCFG.BinGraph 29 | open SuperCFG.ControlFlowGraph 30 | 31 | /// The constant propagation framework, which is a modified version of sparse 32 | /// conditional constant propagation of Wegman et al. 33 | [] 34 | type ConstantPropagation<'L when 'L: equality> (ssaCFG) = 35 | inherit DataFlowAnalysis<'L, SSABasicBlock> () 36 | 37 | /// Constant propagation state. 38 | abstract State: CPState<'L> 39 | 40 | member private __.GetNumIncomingExecutedEdges st (blk: SSAVertex) = 41 | let myid = blk.GetID () 42 | DiGraph.GetPreds (ssaCFG, blk) 43 | |> List.map (fun p -> p.GetID (), myid) 44 | |> List.filter (fun (src, dst) -> CPState.isExecuted st src dst) 45 | |> List.length 46 | 47 | member private __.ProcessSSA st = 48 | while st.SSAWorkList.Count > 0 do 49 | let def = st.SSAWorkList.Pop () 50 | match Map.tryFind def st.SSAEdges.Uses with 51 | | Some uses -> 52 | uses 53 | |> Set.iter (fun (vid, idx) -> 54 | let v = DiGraph.FindVertexByID (ssaCFG, vid) 55 | if __.GetNumIncomingExecutedEdges st v > 0 then 56 | let ppoint, stmt = v.VData.SSAStmtInfos[idx] 57 | st.CPCore.Transfer st ssaCFG v ppoint stmt 58 | else ()) 59 | | None -> () 60 | 61 | member private __.ProcessFlow st = 62 | if st.FlowWorkList.Count > 0 then 63 | let parentid, myid = st.FlowWorkList.Dequeue () 64 | st.ExecutedEdges.Add (parentid, myid) |> ignore 65 | let blk = DiGraph.FindVertexByID (ssaCFG, myid) 66 | blk.VData.SSAStmtInfos 67 | |> Array.iter (fun (ppoint, stmt) -> 68 | st.CPCore.Transfer st ssaCFG blk ppoint stmt) 69 | if blk.VData.IsFakeBlock () then () 70 | else 71 | match blk.VData.GetLastStmt () with 72 | | Jmp _ -> () 73 | | _ -> (* Fall-through cases. *) 74 | DiGraph.GetSuccs (ssaCFG, blk) 75 | |> List.iter (fun succ -> 76 | let succid = succ.GetID () 77 | CPState.markExecutable st myid succid) 78 | else () 79 | 80 | member __.Compute (root: Vertex<_>) = 81 | __.State.FlowWorkList.Enqueue (0, root.GetID ()) 82 | while __.State.FlowWorkList.Count > 0 || __.State.SSAWorkList.Count > 0 do 83 | __.ProcessFlow __.State 84 | __.ProcessSSA __.State 85 | __.State 86 | -------------------------------------------------------------------------------- /superCFGBuilder/DataFlow/DFHelper.fs: -------------------------------------------------------------------------------- 1 | module SuperCFG.DataFlow.DFHelper 2 | 3 | open B2R2 4 | open B2R2.FrontEnd.BinLifter 5 | open SuperCFG.DataFlow 6 | 7 | let getDefs cfg root count = 8 | ///let cfg, root = BinEssence.getFunctionCFG ess 0UL |> Result.get 9 | let chain = DataFlowChain.init cfg root false 10 | let vp = 11 | { ProgramPoint = count //ProgramPoint (0x0UL, 1) 12 | VarExpr = Regular (Intel.Register.toRegID Intel.Register.RDX) } 13 | let res = chain.UseDefChain |> Map.find vp 14 | 15 | let vp2 = 16 | { ProgramPoint = ProgramPoint (0x685UL, 1) 17 | VarExpr = Regular (Intel.Register.toRegID Intel.Register.RDX) } 18 | 19 | ///let res2 = chain.UseDefChain |> Map.find res.[0] |> Set.toArray 20 | let res2 = chain.UseDefChain |> Map.find vp2 |> Set.toArray 21 | 22 | ///let res4 = chain.UseDefChain |> Map.find res2.[0] |> Set.toArray 23 | res2 24 | -------------------------------------------------------------------------------- /superCFGBuilder/DataFlow/DataFlow.fsproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net9.0 5 | true 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /superCFGBuilder/DataFlow/ReachingDefinitions.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.DataFlow 26 | 27 | open System.Collections.Generic 28 | open B2R2 29 | open B2R2.BinIR 30 | open SuperCFG.BinGraph 31 | open SuperCFG.ControlFlowGraph 32 | 33 | [] 34 | type ReachingDefinitions<'Expr, 'BBL when 'Expr: comparison 35 | and 'BBL: equality 36 | and 'BBL :> BasicBlock> 37 | (_cfg: DiGraph<'BBL, CFGEdgeKind>) = 38 | inherit TopologicalDataFlowAnalysis>, 'BBL> (Forward) 39 | 40 | let gens = Dictionary>> () 41 | let kills = Dictionary>> () 42 | 43 | member __.Gens with get() = gens 44 | 45 | member __.Kills with get() = kills 46 | 47 | override __.Meet a b = Set.union a b 48 | 49 | override __.Top = Set.empty 50 | 51 | override __.Transfer i v = 52 | let vid = v.GetID () 53 | Set.union gens[vid] (Set.difference i kills[vid]) 54 | 55 | /// Reaching definition analysis with a LowUIR-based CFG. 56 | type LowUIRReachingDefinitions (cfg) as this = 57 | inherit ReachingDefinitions (cfg) 58 | 59 | do this.Initialize this.Gens this.Kills 60 | 61 | member private __.FindDefs (v: Vertex) = 62 | v.VData.InsInfos 63 | |> Array.fold (fun list info -> 64 | info.Stmts 65 | |> Array.foldi (fun list idx stmt -> 66 | match stmt.S with 67 | | LowUIR.Put ({ LowUIR.E = LowUIR.TempVar (_, n) }, _) -> 68 | let pp = ProgramPoint (info.Instruction.Address, idx) 69 | { ProgramPoint = pp; VarExpr = Temporary n } :: list 70 | | LowUIR.Put ({ LowUIR.E = LowUIR.Var (_, id, _) }, _) -> 71 | let pp = ProgramPoint (info.Instruction.Address, idx) 72 | { ProgramPoint = pp; VarExpr = Regular id } :: list 73 | | _ -> list) list 74 | |> fst) [] 75 | 76 | member private __.Initialize 77 | (gens: Dictionary<_, _>) (kills: Dictionary<_, _>) = 78 | let vpPerVar = Dictionary>> () 79 | let vpPerVertex = Dictionary list> () 80 | (cfg: DiGraph<_, _>).IterVertex (fun v -> 81 | let vid = v.GetID () 82 | let defs = __.FindDefs v 83 | gens[vid] <- defs |> Set.ofList 84 | vpPerVertex[vid] <- defs 85 | defs |> List.iter (fun ({ VarExpr = v } as vp) -> 86 | if vpPerVar.ContainsKey v then vpPerVar[v] <- Set.add vp vpPerVar[v] 87 | else vpPerVar[v] <- Set.singleton vp 88 | ) 89 | ) 90 | cfg.IterVertex (fun v -> 91 | let vid = v.GetID () 92 | let defVarPoints = vpPerVertex[vid] 93 | let vars = defVarPoints |> List.map (fun vp -> vp.VarExpr) 94 | let vps = defVarPoints |> Set.ofList 95 | let alldefs = 96 | vars |> List.fold (fun set v -> Set.union set vpPerVar[v]) Set.empty 97 | kills[vid] <- Set.difference alldefs vps 98 | ) 99 | -------------------------------------------------------------------------------- /superCFGBuilder/DataFlow/SPTransfer.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | module SuperCFG.DataFlow.SPTransfer 26 | 27 | open B2R2 28 | open B2R2.BinIR 29 | open B2R2.FrontEnd 30 | open SuperCFG.SSA 31 | open SuperCFG.ControlFlowGraph 32 | 33 | let evalBinOp op c1 c2 = 34 | match op with 35 | | BinOpType.ADD -> SPValue.add c1 c2 36 | | BinOpType.SUB -> SPValue.sub c1 c2 37 | | BinOpType.AND -> SPValue.``and`` c1 c2 38 | | _ -> NotAConst 39 | 40 | let isStackRelatedRegister (st: CPState) regid = 41 | st.BinHandle.RegisterFactory.IsStackPointer regid 42 | || st.BinHandle.RegisterFactory.IsFramePointer regid 43 | 44 | let evalReturn (st: CPState) (blk: SSAVertex) var = 45 | match var.Kind with 46 | | RegVar (rt, rid, _) -> 47 | let hdl = st.BinHandle 48 | if isStackRelatedRegister st rid then 49 | if hdl.RegisterFactory.IsStackPointer rid then 50 | let value = CPState.findReg st var 51 | let shiftAmount = Const (Utils.computeStackShift rt blk) 52 | evalBinOp BinOpType.ADD value shiftAmount 53 | else CPState.findReg st var 54 | else NotAConst 55 | | _ -> Utils.impossible () 56 | 57 | let rec evalExpr st blk = function 58 | | Num bv -> Const bv 59 | | Var v -> CPState.findReg st v 60 | | Nil -> NotAConst 61 | | Load _ -> NotAConst 62 | | UnOp _ -> NotAConst 63 | | FuncName _ -> NotAConst 64 | | BinOp (op, _, e1, e2) -> 65 | let c1 = evalExpr st blk e1 66 | let c2 = evalExpr st blk e2 67 | evalBinOp op c1 c2 68 | | RelOp _ -> NotAConst 69 | | Ite _ -> NotAConst 70 | | Cast _ -> NotAConst 71 | | Extract _ -> NotAConst 72 | | Undefined _ -> NotAConst 73 | | ReturnVal (addr, _, v) -> evalReturn st blk v 74 | | _ -> Utils.impossible () 75 | 76 | let evalDef (st: CPState) blk v e = 77 | match v.Kind with 78 | | RegVar (_, regid, _) when isStackRelatedRegister st regid -> 79 | evalExpr st blk e |> CPState.updateConst st v 80 | | RegVar _ -> CPState.updateConst st v NotAConst 81 | | TempVar _ -> evalExpr st blk e |> CPState.updateConst st v 82 | | _ -> () 83 | 84 | let evalPhi st cfg blk dst srcIDs = 85 | match CPState.getExecutableSources st cfg blk srcIDs with 86 | | [||] -> () 87 | | executableSrcIDs -> 88 | match dst.Kind with 89 | | RegVar _ | TempVar _ -> 90 | executableSrcIDs 91 | |> Array.choose (fun i -> 92 | { dst with Identifier = i } |> CPState.tryFindReg st true) 93 | |> Array.reduce st.CPCore.Meet 94 | |> fun merged -> CPState.updateConst st dst merged 95 | | _ -> () 96 | 97 | let evalJmp st cfg blk = function 98 | | InterJmp _ -> CPState.markExceptCallFallThrough st cfg blk 99 | | _ -> CPState.markAllSuccessors st cfg blk 100 | 101 | let evalStmt st cfg blk = function 102 | | Def (v, e) -> evalDef st blk v e 103 | | Phi (v, ns) -> evalPhi st cfg blk v ns 104 | | Jmp jmpTy -> evalJmp st cfg blk jmpTy 105 | | LMark _ | ExternalCall _ | SideEffect _ -> () 106 | -------------------------------------------------------------------------------- /superCFGBuilder/DataFlow/SPValue.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.DataFlow 26 | 27 | open B2R2 28 | 29 | /// StackPointerPropagation values. 30 | type SPValue = 31 | | NotAConst 32 | | Const of BitVector 33 | | Undef 34 | 35 | module SPValue = 36 | 37 | let goingUp fromV toV = 38 | match fromV, toV with 39 | | Const _, Undef 40 | | NotAConst, Undef 41 | | NotAConst, Const _ -> true 42 | | _ -> false 43 | 44 | let meet c1 c2 = 45 | match c1, c2 with 46 | | Undef, c | c, Undef -> c 47 | | Const bv1, Const bv2 -> if bv1 = bv2 then c1 else NotAConst 48 | | _ -> NotAConst 49 | 50 | let add c1 c2 = 51 | match c1, c2 with 52 | | Undef, _ | _, Undef -> Undef 53 | | Const bv1, Const bv2 -> Const (BitVector.Add (bv1, bv2)) 54 | | _ -> NotAConst 55 | 56 | let sub c1 c2 = 57 | match c1, c2 with 58 | | Undef, _ | _, Undef -> Undef 59 | | Const bv1, Const bv2 -> Const (BitVector.Sub (bv1, bv2)) 60 | | _ -> NotAConst 61 | 62 | let ``and`` c1 c2 = 63 | match c1, c2 with 64 | | Undef, _ | _, Undef -> Undef 65 | | Const bv1, Const bv2 -> Const (BitVector.BAnd (bv1, bv2)) 66 | | _ -> NotAConst 67 | -------------------------------------------------------------------------------- /superCFGBuilder/DataFlow/SparseConstantPropagation.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.DataFlow 26 | 27 | open System.Collections.Generic 28 | open B2R2 29 | open B2R2.FrontEnd 30 | open SuperCFG.SSA 31 | open SuperCFG.DataFlow.Utils 32 | 33 | [] 34 | module private SparseConstantPropagation = 35 | 36 | let initRegister (hdl: BinHandle) = 37 | let dict = Dictionary () 38 | match hdl.RegisterFactory.StackPointer with 39 | | Some sp -> 40 | let rt = hdl.RegisterFactory.RegIDToRegType sp 41 | let str = hdl.RegisterFactory.RegIDToString sp 42 | let var = { Kind = RegVar (rt, sp, str); Identifier = 0 } 43 | dict[var] <- Const (BitVector.OfUInt64 InitialStackPointer rt) 44 | dict 45 | | None -> dict 46 | 47 | /// The most basic constant propagation algorithm, which can track stack-based 48 | /// memory objects and GOT pointers. The reader is to enable reading data 49 | /// from external sections, e.g., rodata. If the reader is not given, we simply 50 | /// ignore such global data. 51 | type SparseConstantPropagation (hdl, isa, ssaCFG, ?reader) as this = 52 | 53 | inherit ConstantPropagation (ssaCFG) 54 | 55 | let reader = defaultArg reader (fun _ _ -> None) 56 | let st = CPState.initState hdl isa ssaCFG (initRegister hdl) (initMemory ()) this 57 | 58 | override __.State = st 59 | 60 | override __.Top = Undef 61 | 62 | interface IConstantPropagationCore with 63 | member __.Bottom = NotAConst 64 | member __.GoingUp a b = SCPValue.goingUp a b 65 | member __.Meet a b = SCPValue.meet a b 66 | member __.Transfer st cfg blk _ stmt = SCPTransfer.evalStmt st cfg blk stmt 67 | member __.MemoryRead addr rt = reader addr rt 68 | -------------------------------------------------------------------------------- /superCFGBuilder/DataFlow/StackPointerPropagation.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.DataFlow 26 | 27 | open System.Collections.Generic 28 | open B2R2 29 | open B2R2.FrontEnd 30 | open SuperCFG.SSA 31 | 32 | [] 33 | module private StackPointerPropagation = 34 | 35 | let initRegister (hdl: BinHandle) = 36 | let dict = Dictionary () 37 | match hdl.RegisterFactory.StackPointer with 38 | | Some sp -> 39 | let rt = hdl.RegisterFactory.RegIDToRegType sp 40 | let str = hdl.RegisterFactory.RegIDToString sp 41 | let var = { Kind = RegVar (rt, sp, str); Identifier = 0 } 42 | dict[var] <- Const (BitVector.OfUInt64 Utils.InitialStackPointer rt) 43 | dict 44 | | None -> dict 45 | 46 | /// This is a variant of the SparseConstantPropagation, which only tracks 47 | /// the stack pointer used in a function. We initiate the stack pointer with a 48 | /// constant first, and check how it propagates within the function. 49 | /// StackPointerPropagation is generally much faster than 50 | /// SparseConstantPropagation due to its simplicity. 51 | type StackPointerPropagation (hdl, isa, ssaCFG) as this = 52 | inherit ConstantPropagation (ssaCFG) 53 | 54 | let st = CPState.initState hdl isa ssaCFG (initRegister hdl) (Dictionary ()) this 55 | 56 | override __.State = st 57 | 58 | override __.Top = Undef 59 | 60 | interface IConstantPropagationCore with 61 | member __.Bottom = NotAConst 62 | member __.GoingUp a b = SPValue.goingUp a b 63 | member __.Meet a b = SPValue.meet a b 64 | member __.Transfer st cfg v _ stmt = SPTransfer.evalStmt st cfg v stmt 65 | member __.MemoryRead _addr _rt = None 66 | -------------------------------------------------------------------------------- /superCFGBuilder/DataFlow/UVTransfer.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | module SuperCFG.DataFlow.UVTransfer 26 | 27 | open B2R2.BinIR 28 | open SuperCFG.SSA 29 | 30 | let evalVar st v = 31 | match CPState.tryFindReg st false v with 32 | | None -> 33 | if v.Identifier = 0 then Untouched (RegisterTag v) (* Initialize here. *) 34 | else Touched 35 | | Some c -> c 36 | 37 | let rec evalExpr st blk = function 38 | | Var v -> evalVar st v 39 | | Extract (e, _, _) 40 | | Cast (CastKind.ZeroExt, _, e) 41 | | Cast (CastKind.SignExt, _, e) -> evalExpr st blk e 42 | | _ -> Touched (* Any other operations will be considered "touched". *) 43 | 44 | let evalDef st blk dstVar e = 45 | match dstVar.Kind with 46 | | MemVar 47 | | PCVar _ -> () (* Just ignore PCVar as it will always be "touched". *) 48 | | _ -> evalExpr st blk e |> CPState.updateConst st dstVar 49 | 50 | let evalPhi st cfg blk dst srcIDs = 51 | match CPState.getExecutableSources st cfg blk srcIDs with 52 | | [||] -> () 53 | | executableSrcIDs -> 54 | match dst.Kind with 55 | | MemVar | PCVar _ -> () 56 | | _ -> 57 | match CPState.tryFindReg st true dst with 58 | | Some Touched -> () 59 | | _ -> 60 | executableSrcIDs 61 | |> Array.choose (fun i -> 62 | { dst with Identifier = i } |> CPState.tryFindReg st true) 63 | |> Array.reduce UVValue.meet 64 | |> fun merged -> CPState.updateConst st dst merged 65 | 66 | let evalJmp st cfg blk = function 67 | | InterJmp _ -> CPState.markExceptCallFallThrough st cfg blk 68 | | _ -> CPState.markAllSuccessors st cfg blk 69 | 70 | let evalStmt st cfg blk = function 71 | | Def (v, e) -> evalDef st blk v e 72 | | Phi (v, ns) -> evalPhi st cfg blk v ns 73 | | Jmp jmpTy -> evalJmp st cfg blk jmpTy 74 | | LMark _ | ExternalCall _ | SideEffect _ -> () 75 | -------------------------------------------------------------------------------- /superCFGBuilder/DataFlow/UVValue.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.DataFlow 26 | 27 | open B2R2 28 | open SuperCFG.SSA 29 | 30 | type UntouchedTag = 31 | | RegisterTag of Variable 32 | | MemoryTag of Addr 33 | 34 | /// Untouched value propagation value. 35 | type UVValue = 36 | /// Touched means the value is redefined. 37 | | Touched 38 | /// This value is never defined within the function. 39 | | Untouched of UntouchedTag 40 | | Undef 41 | 42 | module UVValue = 43 | 44 | let goingUp fromV toV = 45 | match fromV, toV with 46 | | Untouched _, Undef 47 | | Touched, Undef 48 | | Touched, Untouched _ -> true 49 | | _ -> false 50 | 51 | let meet c1 c2 = 52 | match c1, c2 with 53 | | Undef, c | c, Undef -> c 54 | | Untouched t1, Untouched t2 when t1 = t2 -> c1 55 | | Untouched _, Untouched _ -> Touched 56 | | _ -> Touched 57 | -------------------------------------------------------------------------------- /superCFGBuilder/DataFlow/UntouchedValuePropagation.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | namespace SuperCFG.DataFlow 26 | 27 | open System.Collections.Generic 28 | open B2R2.FrontEnd 29 | open SuperCFG.SSA 30 | open SuperCFG.DataFlow.Utils 31 | 32 | [] 33 | module private UntouchedValuePropagation = 34 | 35 | let initRegister (hdl: BinHandle) = 36 | let dict = Dictionary () 37 | hdl.RegisterFactory.GetGeneralRegExprs () 38 | |> List.iter (fun regExpr -> 39 | let rid = hdl.RegisterFactory.RegIDFromRegExpr regExpr 40 | let rt = hdl.RegisterFactory.RegIDToRegType rid 41 | let str = hdl.RegisterFactory.RegIDToString rid 42 | let var = { Kind = RegVar (rt, rid, str); Identifier = 0 } 43 | dict[var] <- Untouched (RegisterTag var) 44 | ) 45 | match hdl.RegisterFactory.StackPointer with 46 | | Some sp -> 47 | let rt = hdl.RegisterFactory.RegIDToRegType sp 48 | let str = hdl.RegisterFactory.RegIDToString sp 49 | let var = { Kind = RegVar (rt, sp, str); Identifier = 0 } 50 | dict[var] <- Touched 51 | dict 52 | | None -> dict 53 | 54 | /// This is a variant of the SparseConstantPropagation, which computes which 55 | /// registers or memory cells are not re-defined (i.e., are untouched) within a 56 | /// function. This algorithm assumes that the SSA has been promoted. 57 | type UntouchedValuePropagation (hdl, isa, ssaCFG) as this = 58 | inherit ConstantPropagation (ssaCFG) 59 | 60 | let st = CPState.initState hdl isa ssaCFG (initRegister hdl) (initMemory ()) this 61 | 62 | override __.State = st 63 | 64 | override __.Top = Undef 65 | 66 | interface IConstantPropagationCore with 67 | member __.Bottom = Touched 68 | member __.GoingUp a b = UVValue.goingUp a b 69 | member __.Meet a b = UVValue.meet a b 70 | member __.Transfer st cfg v _ppoint stmt = UVTransfer.evalStmt st cfg v stmt 71 | member __.MemoryRead _addr _rt = None 72 | -------------------------------------------------------------------------------- /superCFGBuilder/DataFlow/Utils.fs: -------------------------------------------------------------------------------- 1 | (* 2 | B2R2 - the Next-Generation Reversing Platform 3 | 4 | Copyright (c) SoftSec Lab. @ KAIST, since 2016 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | *) 24 | 25 | module SuperCFG.DataFlow.Utils 26 | 27 | open System.Collections.Generic 28 | open B2R2 29 | open B2R2.BinIR.LowUIR 30 | open SuperCFG.ControlFlowGraph 31 | 32 | /// We use this constant for our data-flow analyses. 33 | let [] InitialStackPointer = 0x80000000UL 34 | 35 | let rec private extractUseFromExpr e = 36 | match e.E with 37 | | Var (_, id, _) -> [ Regular id ] 38 | | TempVar (_, n) -> [ Temporary n ] 39 | | UnOp (_, e) -> extractUseFromExpr e 40 | | BinOp (_, _, e1, e2) -> extractUseFromExpr e1 @ extractUseFromExpr e2 41 | | RelOp (_, e1, e2) -> extractUseFromExpr e1 @ extractUseFromExpr e2 42 | | Load (_, _, e) -> extractUseFromExpr e 43 | | Ite (c, e1, e2) -> 44 | extractUseFromExpr c @ extractUseFromExpr e1 @ extractUseFromExpr e2 45 | | Cast (_, _, e) -> extractUseFromExpr e 46 | | Extract (e, _, _) -> extractUseFromExpr e 47 | | _ -> [] 48 | 49 | let private extractUseFromStmt s = 50 | match s.S with 51 | | Put (_, e) 52 | | Store (_, _, e) 53 | | Jmp (e) 54 | | CJmp (e, _, _) 55 | | InterJmp (e, _) -> extractUseFromExpr e 56 | | InterCJmp (c, e1, e2) -> 57 | extractUseFromExpr c @ extractUseFromExpr e1 @ extractUseFromExpr e2 58 | | _ -> [] 59 | 60 | let extractUses stmt = 61 | extractUseFromStmt stmt 62 | |> Set.ofList 63 | 64 | let filterRegularVars vars = 65 | vars |> Set.filter (function 66 | | Regular _ -> true 67 | | _ -> false) 68 | 69 | let inline initMemory () = 70 | let dict = Dictionary () 71 | dict[0] <- (Map.empty, Set.empty) 72 | dict 73 | 74 | let computeStackShift rt (blk: SSAVertex) = 75 | let retAddrSize = RegType.toByteWidth rt |> int64 76 | let adj = blk.VData.FakeBlockInfo.UnwindingBytes 77 | BitVector.OfInt64 (retAddrSize + adj) rt 78 | -------------------------------------------------------------------------------- /superCFGBuilder/SSA/SSA.fsproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net9.0 5 | true 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /superCFGBuilder/superCFGBuilder/ASanGen.fs: -------------------------------------------------------------------------------- 1 | module SupersetCFG.ASanGen 2 | 3 | open System.Collections 4 | open B2R2 5 | open B2R2.FrontEnd 6 | open SuperCFG.BinEssence 7 | open B2R2.FrontEnd.BinLifter.Intel 8 | open SuperCFG.BinGraph 9 | open SuperCFG.ControlFlowAnalysis 10 | open SuperCFG.ControlFlowGraph 11 | open SuperCFG.ControlFlowAnalysis.LowUIRHelper 12 | open SupersetCFG.MetaGen 13 | 14 | type MemInfo = { 15 | Addr: string 16 | MemAccType: string 17 | MemAccSize: uint list 18 | } 19 | type ASanInfo = { 20 | Addr: string 21 | InstList: MemInfo list 22 | } 23 | 24 | let isMemAccess = function 25 | | OprMem (_, _, _, size) -> uint size 26 | | _ -> uint 0 27 | 28 | let getMemAccess = function 29 | | NoOperand -> [] 30 | | OneOperand opr -> [(isMemAccess opr)] 31 | | TwoOperands (opr1, opr2) -> 32 | [(isMemAccess opr1); (isMemAccess opr2)] 33 | | ThreeOperands (opr1, opr2, opr3) -> 34 | [(isMemAccess opr1); (isMemAccess opr2); (isMemAccess opr3)] 35 | | FourOperands (opr1, opr2, opr3, opr4) -> 36 | [(isMemAccess opr1); (isMemAccess opr2); 37 | (isMemAccess opr3); (isMemAccess opr4)] 38 | 39 | let getMemAccessType instInfo memAcc = 40 | let operands = memAcc |> List.filter(fun x -> x > uint 0) 41 | if operands |> List.length = 1 then 42 | let result = instInfo.Stmts 43 | |> Seq.fold(fun acc stmt -> GetAccType stmt acc) List.Empty 44 | |> List.distinct |> List.ofSeq 45 | 46 | result 47 | else List.Empty 48 | 49 | let rec memAccCheckLoop (hdl: BinHandle) (lu: LiftingUnit) funCodeMgr (addr:Addr) (eAddr:Addr) acc = 50 | if addr = eAddr then acc 51 | else 52 | let ins = lu.ParseInstruction (addr) 53 | let memAcc = getMemAccess (ins :?> IntelInstruction).Operands 54 | 55 | let insInfo = (funCodeMgr: FunCodeManager).InsMap[addr] 56 | let memAccType = getMemAccessType insInfo memAcc 57 | let aType = if memAccType.Length > 1 then 58 | "S" 59 | elif memAccType.Length = 1 then memAccType[0] 60 | else "" 61 | let nextAddr = addr + uint64 ins.Length 62 | if aType <> "" then 63 | let addrStr = sprintf "0x%x" addr 64 | let code = {Addr=addrStr; MemAccType=aType; MemAccSize=memAcc} 65 | memAccCheckLoop hdl lu funCodeMgr nextAddr eAddr (code::acc) 66 | else 67 | memAccCheckLoop hdl lu funCodeMgr nextAddr eAddr (acc) 68 | 69 | 70 | let memAccCheck hdl lu funCodeMgr (vertex: Vertex) = 71 | let sAddr = vertex.VData.PPoint.Address 72 | if vertex.VData.IsFakeBlock() then 73 | None 74 | else if vertex.VData.PPoint.Position >= 0 then 75 | let eAddr = vertex.VData.Range.Count + sAddr 76 | let code = memAccCheckLoop hdl lu funCodeMgr sAddr eAddr [] |> List.rev 77 | Some code 78 | else 79 | None 80 | 81 | let ASanMetaGen (ess: BinEssence) hdl = 82 | let fnList = 83 | ess.CodeManager.FunctionMaintainer.RegularFunctions 84 | |> List.ofArray 85 | |> List.filter (fun fn -> 86 | if ess.CodeManager.HasFunCodeMgr fn.EntryPoint then true 87 | else 88 | printf "Unresolved Entry point: %x" fn.EntryPoint 89 | false ) 90 | |> List.map(fun fn -> 91 | let funCodeMgr = ess.CodeManager.GetFunCodeMgr fn.EntryPoint 92 | let cfg, root = BinEssence.getFunctionCFG ess fn.EntryPoint 93 | |> Result.get 94 | let disasmcfg, root2 = DisasmLens.filter2 funCodeMgr cfg root 95 | let allVertices = CollectVertices disasmcfg root2 96 | let instList 97 | = allVertices 98 | |> List.fold(fun acc (v, edges) -> 99 | match memAccCheck hdl ess.LiftingUnit funCodeMgr v with 100 | | Some code -> code@acc 101 | | _ -> acc 102 | ) List.Empty 103 | {Addr = $"0x%x{fn.EntryPoint}"; InstList = instList} ) 104 | 105 | () 106 | fnList 107 | -------------------------------------------------------------------------------- /superCFGBuilder/superCFGBuilder/Program.fs: -------------------------------------------------------------------------------- 1 | module SupersetCFG.Main 2 | 3 | open System.Collections.Generic 4 | open B2R2 5 | open B2R2.FrontEnd 6 | open B2R2.FrontEnd.BinLifter 7 | open B2R2.FrontEnd.BinLifter.Intel 8 | open SuperCFG.BinEssence 9 | open System.IO 10 | #if DEBUG_META_FILE 11 | open System.Text.Json 12 | #else 13 | open Newtonsoft.Json 14 | #endif 15 | open SupersetCFG.MetaGen 16 | open SupersetCFG.ASanGen 17 | 18 | type SupersetRecord = { 19 | FunDict: IDictionary 20 | PLTDict: IDictionary 21 | FalseFunList: string list 22 | SuspiciousFunList: string list 23 | } 24 | 25 | let ConstructCFG ess hdl = 26 | #if DEBUG 27 | let startTime = System.DateTime.Now 28 | #endif 29 | let fnList = MetaGen ess hdl 30 | #if DEBUG 31 | let endTime = System.DateTime.Now 32 | endTime.Subtract(startTime).TotalSeconds 33 | |> printfn "[*] Construct CFG %f sec." 34 | #endif 35 | fnList 36 | 37 | let CreateASanMeta ess hdl = 38 | #if DEBUG 39 | let startTime = System.DateTime.Now 40 | #endif 41 | let fnList = ASanMetaGen ess hdl 42 | #if DEBUG 43 | let endTime = System.DateTime.Now 44 | endTime.Subtract(startTime).TotalSeconds 45 | |> printfn "[*] Construct CFG %f sec." 46 | #endif 47 | fnList 48 | 49 | let MakeB2R2Meta ess (fnList: FnInfo list) = 50 | #if DEBUG 51 | let startTime = System.DateTime.Now 52 | #endif 53 | let falseFnList = 54 | ess.CodeManager.FalseFunSet 55 | |> Seq.distinct |> Seq.map(fun x-> $"0x%x{x}") |> List.ofSeq 56 | 57 | let suspiciousFnList = 58 | ess.CodeManager.SuspiciousFunSet 59 | |> Seq.distinct |> Seq.map(fun x-> $"0x%x{x}") |> List.ofSeq 60 | |> List.filter (fun entry -> not <| List.contains entry falseFnList) 61 | 62 | let fnDict = fnList |> Seq.map (fun funInfo -> funInfo.Addr, funInfo ) 63 | |> dict 64 | let pltDict = ess.CodeManager.FunctionMaintainer.PLTFunctions 65 | |> Seq.map(fun (KeyValue(k, v)) -> ( $"0x%x{k}"), v) 66 | |> dict 67 | let superSetRec = {FunDict = fnDict; PLTDict=pltDict 68 | FalseFunList=falseFnList 69 | SuspiciousFunList = suspiciousFnList} 70 | #if DEBUG 71 | let endTime = System.DateTime.Now 72 | endTime.Subtract(startTime).TotalSeconds 73 | |> printfn "[*] Extract data %f sec." 74 | #endif 75 | superSetRec 76 | 77 | let MakeBioAsanMeta fnList = 78 | let fnDict = fnList |> Seq.map (fun funInfo -> funInfo.Addr, funInfo ) 79 | |> dict 80 | fnDict 81 | 82 | let SaveJSON fileName data = 83 | #if DEBUG 84 | let startTime = System.DateTime.Now 85 | #endif 86 | 87 | #if DEBUG_META_FILE 88 | let mutable options = JsonSerializerOptions() 89 | options.WriteIndented <- true 90 | let serialized_data = JsonSerializer.Serialize (data, options) 91 | File.WriteAllText(fileName, serialized_data); 92 | #else 93 | let fileStream = new StreamWriter(fileName: string) 94 | let serializer = new JsonSerializer() 95 | serializer.Serialize(fileStream, data) 96 | fileStream.Close() 97 | #endif 98 | 99 | #if DEBUG 100 | let endTime = System.DateTime.Now 101 | endTime.Subtract(startTime).TotalSeconds 102 | |> printfn "[*] JsonSerializer %f sec." 103 | #endif 104 | 105 | [] 106 | let main args = 107 | let path = args[0] 108 | if File.Exists(path) then 109 | let isa = ISA.DefaultISA 110 | let hdl = BinHandle (args[0], isa) 111 | let fileName = args[1] 112 | let ess = BinEssence.init hdl isa [] [] [] 113 | if args.Length = 2 then 114 | let fnList = ConstructCFG ess hdl 115 | let data = MakeB2R2Meta ess fnList 116 | SaveJSON fileName data 117 | elif args.Length > 2 && args[2] = "att" then 118 | Disasm.setDisassemblyFlavor ATTSyntax 119 | let fnList = ConstructCFG ess hdl 120 | let data = MakeB2R2Meta ess fnList 121 | SaveJSON fileName data 122 | elif args.Length > 2 && args[2] = "asan" then 123 | let fnList = CreateASanMeta ess hdl 124 | SaveJSON fileName fnList 125 | 0 126 | else 1 127 | -------------------------------------------------------------------------------- /superCFGBuilder/superCFGBuilder/superCFGBuilder.fsproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net9.0 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /superSymbolizer/README.md: -------------------------------------------------------------------------------- 1 | # superSymbolizer 2 | -------------------------------------------------------------------------------- /superSymbolizer/requirements.txt: -------------------------------------------------------------------------------- 1 | pyelftools>=0.29 2 | --------------------------------------------------------------------------------