├── .gitignore ├── LICENSE ├── README.md ├── creal.py ├── databaseconstructor ├── IOGenerator.py ├── __init__.py ├── displayfunc.py ├── functioner.py ├── functionextractor │ ├── CMakeLists.txt │ ├── README.md │ ├── extractor.py │ └── src │ │ ├── CMakeLists.txt │ │ ├── FunctionExtractor.cpp │ │ ├── FunctionExtractor.hpp │ │ ├── FunctionPrinter.cpp │ │ ├── FunctionPrinter.hpp │ │ ├── FunctionProcess.cpp │ │ ├── FunctionProcess.hpp │ │ ├── ProcessCall.cpp │ │ ├── ProcessCall.hpp │ │ ├── RenameFunction.cpp │ │ ├── RenameFunction.hpp │ │ ├── RenameGlobal.cpp │ │ ├── RenameGlobal.hpp │ │ ├── RuleActionCallback.cpp │ │ ├── RuleActionCallback.hpp │ │ ├── Utils.cpp │ │ ├── Utils.hpp │ │ └── tool │ │ ├── CMakeLists.txt │ │ └── FunctionExtract.cpp ├── functions_pointer_global_io.json ├── generate.py ├── proxy.py └── variable.py ├── generate_mutants.py ├── profiler ├── CMakeLists.txt └── src │ ├── CMakeLists.txt │ ├── GlobalMacro.cpp │ ├── GlobalMacro.hpp │ ├── ProfilerEntry.cpp │ ├── ProfilerEntry.hpp │ ├── RuleActionCallback.cpp │ ├── RuleActionCallback.hpp │ ├── TagExpression.cpp │ ├── TagExpression.hpp │ └── tool │ ├── CMakeLists.txt │ └── Profiler.cpp ├── synthesizer └── synthesizer.py └── utils ├── __init__.py └── compcert.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | build/ 3 | tests/ 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2024 Shaohua Li 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 4 | 5 | http://www.apache.org/licenses/LICENSE-2.0 6 | 7 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Creal 2 | 3 | This is the tool for our PLDI 2024 paper "*Boosting Compiler Testing by Injecting Real-World Code*". The code and data is also available in our [artifact](https://doi.org/10.5281/zenodo.10951313). 4 | 5 | **Creal** is an automated program generator for C. Given a valid C program as the seed, Creal can inject new functions into it and produce new valid programs. By default, Creal uses [Csmith](https://github.com/csmith-project/csmith) to produce seed programs. 6 | 7 | ## Structure of the project 8 | 9 | ``` 10 | |-- creal.py # The default script for applying Creal on Csmith 11 | |-- generate_mutants.py # The script for applying Creal on a given seed program 12 | |-- generate_csmith_seed.py # An auxilary script for generating Csmith programs 13 | |-- synthesizer # The synthesizer implementation directory 14 | | |-- synthesizer.py # The synthesizer implementation of Creal 15 | |-- profiler # Profiling tools 16 | | |-- src # The code for the profiler 17 | | |-- build # The compiled profiler(./build/bin/profile) used by synthesizer.py 18 | |-- databaseconstructor # Constructing function database 19 | | |-- functionextractor 20 | | | |-- extractor.py # For extracting valid functions from a C/C++ project 21 | | |-- generate.py # For generating IO for functions 22 | ``` 23 | 24 | 25 | ## Use Creal 26 | 27 | **Step 1: Install necessary packages** 28 | - **Python** >= 3.10 29 | - **Csmith** (Please install it following [Csmith](https://github.com/csmith-project/csmith)) 30 | - **CSMITH_HOME**: After installing Csmith, please set the environment variable `CSMITH_HOME` to the installation path, with which we can locate `$CSMITH_HOME/include/csmith.h`. 31 | - **CompCert** (Please install it following [CompCert](https://compcert.org/man/manual002.html#install)) 32 | - **clang** >= 14, **libclang-dev** 33 | - **diopter** == 0.0.24 (`pip install diopter==0.0.24`) 34 | - **termcolor** (`pip install termcolor`) 35 | 36 | **Step 2: Compile the profiler** 37 | ```shell 38 | $ cd profiler 39 | $ mkdir build 40 | $ cd build 41 | $ cmake .. 42 | $ make 43 | ``` 44 | 45 | **Step 3: Use Creal** 46 | To generate new programs from Csmith programs, run 47 | ```shell 48 | ./creal.py --dst ./tmp --syn-prob 20 --num-mutants 5 49 | ``` 50 | This script will first invoke Csmith to generate a seed program and then generate mutated programs. 51 | The used function database is `databaseconstructor/functions_pointer_global_io.json`. 52 | The seed program will be saved in the directory specified by ``"--dst"``, which is ``"./tmp"`` in the command above. 53 | Parameter explanation: 54 | - `--dst`: path to the directory where programs will be saved. 55 | - `--syn-prob`: synthesis probabiliy (0~100). 56 | - `--num-mutants`: number of mutants per seed. 57 | 58 | 59 | ## More 60 | 61 | ### Run Creal on a given seed program 62 | We also provide a script for generating new programs by mutating a given seed program. 63 | Note that the given program should satisfy the following requirements: 64 | 65 | - It is executable, i.e., contains a main function. 66 | 67 | - It has at least another function with some statements and this function is reachable from the main function. 68 | 69 | - It returns 0, i.e., normal exit. 70 | 71 | For example, the following is a valid seed program: 72 | 73 | ```C 74 | 75 | int foo(int x) { 76 | int a = 0; 77 | a = a + x; 78 | return a; 79 | } 80 | int main(){ 81 | foo(1); 82 | return 0; 83 | } 84 | ``` 85 | Suppose you save this program as `a.c`. 86 | You can invoke Creal on this program by running 87 | 88 | ```shell 89 | $ ./generate_mutants.py --seed /path/to/a.c --dst ./tmp --syn-prob 20 --num-mutants 5 90 | ``` 91 | 92 | ### Build new function database 93 | 94 | All the 50K functions used by ``creal.py`` and ``generate_mutants.py`` are available at ``databaseconstructor/functions_pointer_global_io.json``. 95 | 96 | You can generate a new function database as follows: 97 | 98 | **Step 0**, build the function extractor 99 | 100 | ```shell 101 | $ cd ./databaseconstructor/functionextractor/ 102 | $ mkdir build && cd build 103 | $ cmake .. && make 104 | ``` 105 | 106 | **Step 1**, prepare a C/C++ project and extract all valid functions from it by running: 107 | 108 | ```shell 109 | $ cd ./databaseconstructor/functionextractor/ 110 | $ ./extract.py --src /path/to/your/project --dst functions.json --cpu 10 111 | ``` 112 | Parameters: 113 | - ``--src``: path to the prepared C/C++ project. 114 | - ``--dst``: the extracted functions will be saved in the specified json files 115 | 116 | **Step 2**, generate IO for the extracted functions by running: 117 | 118 | ```shell 119 | $ cd ./databaseconstructor/ 120 | $ ./generate.py --src /path/to/functions.json --dst functions_io.json --num 5 --cpu 10 121 | ``` 122 | Parameters: 123 | - ``--src``: the extracted functions.json 124 | - ``--dst``: the new functions_io.json with generated IO pairs 125 | - ``--num``: number of IO for each function 126 | 127 | **Step 3**, after generating the new function database (``functions_io.json``), you can modify ``creal.py`` or ``generate_mutants.py`` to change the path t 128 | o the function database by modifying the value ``FUNCTION_DB_FILE``. 129 | 130 | **Step 4**, now you can following the above guidelines to use Creal on the new function database. 131 | -------------------------------------------------------------------------------- /creal.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os, sys, shutil, re, time, tempfile, signal, random, string, argparse 3 | from datetime import datetime 4 | from glob import glob 5 | from enum import Enum, auto 6 | from diopter.compiler import ( 7 | CompilationSetting, 8 | CompilerExe, 9 | OptLevel, 10 | SourceProgram, 11 | Language, 12 | ObjectCompilationOutput 13 | ) 14 | from diopter.sanitizer import Sanitizer 15 | from diopter.utils import TempDirEnv 16 | import subprocess as sp 17 | from synthesizer.synthesizer import Synthesizer, SynthesizerError 18 | from utils.compcert import CComp as this_CComp 19 | from pathlib import Path 20 | from datetime import datetime 21 | from termcolor import colored 22 | 23 | def print_red(msg): 24 | print(colored(datetime.now().strftime("%Y-%m-%d %H:%M:%S") + ' >', 'yellow'), colored(msg, 'red'), flush=True) 25 | def print_green(msg): 26 | print(colored(datetime.now().strftime("%Y-%m-%d %H:%M:%S") + ' >', 'yellow'), colored(msg, 'green'), flush=True) 27 | def print_blue(msg): 28 | print(colored(datetime.now().strftime("%Y-%m-%d %H:%M:%S") + ' >', 'yellow'), colored(msg, 'blue'), flush=True) 29 | 30 | def id_generator(size=6, chars=string.ascii_uppercase + string.digits): 31 | return ''.join(random.choice(chars) for _ in range(size)) 32 | 33 | 34 | DEBUG = 0 35 | """CONFIG""" 36 | FUNCTION_DB_FILE = os.path.join(os.path.dirname(__file__), './databaseconstructor/functions_pointer_global_io.json') 37 | MIN_PROGRAM_SIZE = 8000 # programs shorter than this many bytes are too boring to test 38 | NUM_MUTANTS = 10 # number of mutants generated by the synthesizer per seed. 39 | COMPILER_TIMEOUT = 200 40 | PROG_TIMEOUT = 10 41 | CCOMP_TIMEOUT = 60 # compcert timeout 42 | CSMITH_USER_OPTIONS = "--no-volatiles --no-volatile-pointers --no-unions" 43 | CSMITH_TIMEOUT = 20 44 | CREDUCE_JOBS = 1 45 | """TOOL""" 46 | CSMITH_HOME = os.environ["CSMITH_HOME"] 47 | 48 | if not os.path.exists(os.path.join(CSMITH_HOME, 'include/csmith.h')): 49 | print_red('CSMITH_HOME is not set correctly, cannot find csmith.h in "$CSMITH_HOME/include/".') 50 | sys.exit(1) 51 | 52 | CC = CompilationSetting( 53 | compiler=CompilerExe.get_system_gcc(), 54 | opt_level=OptLevel.O3, 55 | flags=("-march=native",f"-I{CSMITH_HOME}/include"), 56 | ) 57 | SAN_SAN = Sanitizer(checked_warnings=False, use_ccomp_if_available=False) # sanitizers only 58 | SAN_CCOMP = this_CComp.get_system_ccomp() # CompCert only 59 | 60 | """Global vars""" 61 | 62 | class CompCode(Enum): 63 | """Compile status 64 | """ 65 | OK = auto() # ok 66 | Timeout = auto() # timeout during compilation 67 | Sanfail = auto() # sanitization failed 68 | Crash = auto() # compiler crash 69 | Error = auto() # compiler error 70 | WrongEval= auto() # inconsistent results across compilers but consistent within the same compiler 71 | Wrong = auto() # inconsistent results across compilers/opts 72 | 73 | def generate_random_string(len:int=5) -> str: 74 | """Generate a random string of length len""" 75 | return ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for _ in range(len)) 76 | 77 | def run_cmd(cmd, timeout): 78 | if type(cmd) is not list: 79 | cmd = cmd.split(' ') 80 | cmd = list(filter(lambda x: x!='', cmd)) 81 | # Start the subprocess 82 | process = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE) 83 | # Wait for the subprocess to finish or timeout 84 | try: 85 | output, error = process.communicate(timeout=timeout) 86 | output = output.decode("utf-8") 87 | except sp.TimeoutExpired: 88 | # Timeout occurred, kill the process 89 | try: 90 | os.killpg(process.pid, signal.SIGTERM) 91 | except ProcessLookupError: 92 | pass 93 | finally: 94 | output = '' 95 | # A workaround to tmpxxx.exe as it sometimes escapes from os.killpg 96 | cmd_str = " ".join(cmd) 97 | time.sleep(2) 98 | if '.exe' in cmd_str: 99 | os.system(f"pkill -9 -f {cmd_str}") 100 | return 124, output 101 | 102 | # Return the exit code and stdout of the process 103 | return process.returncode, output 104 | 105 | def write_bug_desc_to_file(to_file, data): 106 | with open(to_file, "a") as f: 107 | f.write(f"/* {data} */\n") 108 | 109 | def read_checksum(data): 110 | res = re.findall(r'checksum = (.*)', data) 111 | if len(res) > 0: 112 | return res[0] 113 | return 'NO_CKSUM' 114 | 115 | def check_sanitizers(src): 116 | """Check validity with sanitizers""" 117 | with open(src, 'r') as f: 118 | code = f.read() 119 | prog = SourceProgram(code=code, language=Language.C) 120 | preprog = CC.preprocess_program(prog, make_compiler_agnostic=True) 121 | if DEBUG: 122 | print(datetime.now().strftime("%d/%m/%Y %H:%M:%S"), "SAN.sanitize", flush=True) 123 | if not SAN_SAN.sanitize(preprog): 124 | return False 125 | return True 126 | 127 | def check_ccomp(src, random_count=1): 128 | """ 129 | Check validity with CompCert. 130 | src:str -> source file 131 | random_count:int -> the number of times using ccomp -random for checking 132 | """ 133 | with open(src, 'r') as f: 134 | code = f.read() 135 | prog = SourceProgram(code=code, language=Language.C) 136 | preprog = CC.preprocess_program(prog, make_compiler_agnostic=True) 137 | if DEBUG: 138 | print(datetime.now().strftime("%d/%m/%Y %H:%M:%S"), "SAN.ccomp", flush=True) 139 | with TempDirEnv(): 140 | try: 141 | ccomp_result = SAN_CCOMP.check_program(preprog, timeout=CCOMP_TIMEOUT, debug=DEBUG) 142 | except sp.TimeoutExpired: 143 | return False 144 | if ccomp_result is False: 145 | return False 146 | with TempDirEnv(): 147 | for _ in range(random_count): 148 | try: 149 | ccomp_result_random = SAN_CCOMP.check_program(preprog, timeout=CCOMP_TIMEOUT, debug=DEBUG, additional_flags=["-random"]) 150 | except sp.TimeoutExpired: 151 | return False 152 | if ccomp_result_random is False: 153 | return False 154 | # check for unspecified behavior 155 | if ccomp_result.stdout != ccomp_result_random.stdout: 156 | return False 157 | return True 158 | 159 | def compile_and_run(compiler, src): 160 | cksum = '' 161 | tmp_f = tempfile.NamedTemporaryFile(suffix=".exe", delete=False) 162 | tmp_f.close() 163 | exe = tmp_f.name 164 | cmd = f"{compiler} {src} -I{CSMITH_HOME}/include -o {exe}" 165 | ret, out = run_cmd(cmd, COMPILER_TIMEOUT) 166 | if ret == 124: # another compile chance when timeout 167 | time.sleep(1) 168 | ret, out = run_cmd(cmd, COMPILER_TIMEOUT) 169 | if ret == 124: # we treat timeout as crash now. 170 | write_bug_desc_to_file(src, f"Compiler timeout! Can't compile with {compiler}") 171 | if os.path.exists(exe): os.remove(exe) 172 | return CompCode.Timeout, cksum 173 | if ret != 0: 174 | write_bug_desc_to_file(src, f"Compiler crash! Can't compile with {compiler}") 175 | if os.path.exists(exe): os.remove(exe) 176 | return CompCode.Crash, cksum 177 | ret, out = run_cmd(f"{exe}", PROG_TIMEOUT) 178 | cksum = read_checksum(out) 179 | write_bug_desc_to_file(src, f"EXITof {compiler}: {ret}") 180 | write_bug_desc_to_file(src, f"CKSMof {compiler}: {cksum}") 181 | if os.path.exists(exe): os.remove(exe) 182 | return CompCode.OK, cksum 183 | 184 | def check_compile(src:str, compilers:list) -> CompCode: 185 | """Compile the program with a list of compilers and check their status 186 | """ 187 | cksum_list = [] 188 | for comp in compilers: 189 | if DEBUG: 190 | print(datetime.now().strftime("%d/%m/%Y %H:%M:%S"), "compiler_and_run: ", comp, flush=True) 191 | ret, cksum = compile_and_run(comp, src) 192 | if ret == CompCode.Crash: 193 | return CompCode.Crash 194 | if ret == CompCode.Timeout: 195 | return CompCode.Timeout 196 | if ret != CompCode.OK: 197 | return CompCode.Error 198 | cksum_list.append(cksum) 199 | if len(cksum_list) != len(compilers) or len(set(cksum_list)) != 1: 200 | maybe_WrongEval = True 201 | for i in range(len(compilers)): 202 | for j in range(i+1, len(compilers)): 203 | if compilers[i].split(' ')[0] == compilers[j].split(' ')[0] and cksum_list[i] != cksum_list[j]: 204 | maybe_WrongEval = False 205 | if maybe_WrongEval: 206 | return CompCode.WrongEval 207 | return CompCode.Wrong 208 | return CompCode.OK 209 | 210 | def run_one(compilers:list[str], dst_dir:Path, SYNER:Synthesizer) -> Path | None: 211 | """Run compiler testing 212 | """ 213 | save_realsmith_dir = (dst_dir) 214 | succ_file_id = id_generator() 215 | src = str((dst_dir / f'{succ_file_id}_seed.c').absolute()) 216 | print_blue('Generating seed...') 217 | while True: 218 | cmd = f"{CSMITH_HOME}/bin/csmith {CSMITH_USER_OPTIONS} --output {src}" 219 | ret, out = run_cmd(cmd, CSMITH_TIMEOUT) 220 | if ret != 0: 221 | print("csmith failed: generation.") 222 | continue 223 | # check size 224 | if os.path.getsize(src) < MIN_PROGRAM_SIZE: 225 | print("csmith failed: small program.") 226 | continue 227 | # check sanitization 228 | if check_sanitizers(src) and check_ccomp(src): 229 | break 230 | print("csmith failed: sanitization.") 231 | print_blue(f"Seed generated: {src}") 232 | ret = check_compile(src, compilers) 233 | print_blue('Synthesizing mutants...') 234 | # synthesize 235 | try: 236 | syn_files = SYNER.synthesizer(src_filename=src, num_mutant=NUM_MUTANTS, DEBUG=DEBUG) 237 | except: 238 | print('SynthesizerError!') 239 | os.remove(src) 240 | return 0 241 | 242 | print_green(f'Synthesizing done! Programs saved as {src.replace(".c", "_syn*.c")}') 243 | 244 | return 0 245 | 246 | 247 | if __name__=='__main__': 248 | parser = argparse.ArgumentParser(description="Generate a number of realsmith mutants for evaluation.") 249 | parser.add_argument("--dst", required=True, type=Path, help="Destination directory for generated seeds.") 250 | parser.add_argument("--syn-prob", required=True, type=int, help="Synthesis probability") 251 | parser.add_argument("--num-mutants", required=True, type=int, help="The number of mutants per seed by realsmith") 252 | args = parser.parse_args() 253 | 254 | dst_dir = Path(args.dst) 255 | dst_dir.mkdir(parents=True, exist_ok=True) 256 | 257 | NUM_MUTANTS = args.num_mutants 258 | 259 | compilers = [ 260 | "gcc -O0", 261 | "clang -O0" 262 | ] 263 | SYNER = Synthesizer(func_database=FUNCTION_DB_FILE, prob=args.syn_prob) 264 | with TempDirEnv() as tmp_dir: 265 | os.environ['TMPDIR'] = tmp_dir.absolute().as_posix() 266 | total = 0 267 | ret = run_one(compilers, dst_dir, SYNER) 268 | -------------------------------------------------------------------------------- /databaseconstructor/IOGenerator.py: -------------------------------------------------------------------------------- 1 | import re 2 | from shutil import which 3 | from pathlib import Path 4 | from diopter.compiler import ( 5 | CompilationSetting, 6 | CompilerExe, 7 | CompilerProject, 8 | ExeCompilationOutput, 9 | OptLevel, 10 | SourceProgram, 11 | Language 12 | ) 13 | from diopter.sanitizer import Sanitizer 14 | from variable import * 15 | from functioner import * 16 | from proxy import generate_proxy_function, generate_closure_program 17 | 18 | 19 | class IOGenerator(): 20 | """An IO generator for a C function 21 | """ 22 | def __init__(self) -> None: 23 | """Generate a valid input for the input function and return its IO pair 24 | Args: 25 | input_func (Function): the input function 26 | max_try_time (int): the max number of times we try to find a valid input 27 | """ 28 | self.compilers = [] 29 | for opt in ['O0', 'O1', 'O2', 'O3', 'Os']: 30 | self.compilers.append( 31 | CompilationSetting( 32 | compiler=CompilerExe.get_system_gcc(), 33 | opt_level=OptLevel.from_str(opt), 34 | flags=("-march=native",), 35 | ) 36 | ) 37 | self.compilers.append( 38 | CompilationSetting( 39 | compiler=CompilerExe.get_system_clang(), 40 | opt_level=OptLevel.from_str(opt), 41 | flags=("-march=native",), 42 | ) 43 | ) 44 | 45 | 46 | self.sanitizer = Sanitizer(use_ub_address_sanitizer=True, use_memory_sanitizer=True, use_ccomp_if_available=True) 47 | 48 | if which("typesanitizer") is None: 49 | self.typesanitizer = None 50 | else: 51 | self.typesanitizer = CompilationSetting( 52 | compiler=CompilerExe(CompilerProject.LLVM, Path(which("typesanitizer")), "main"), 53 | opt_level=OptLevel.from_str("O0"), 54 | flags=("-march=native", "-fsanitize=type",), 55 | ) 56 | 57 | def generate(self, input_func:Function, max_try_time:int=5, debug:bool=False) -> tuple[Optional[list], Optional[Function]]: 58 | """Generate a valid input for the input function and return its IO pair 59 | Args: 60 | input_func (Function): the input function 61 | max_try_time (int): the max number of times we try to find a valid input 62 | Return: 63 | None: failed to find a valid IO pair within max_try_time 64 | list: the [input, output] pair 65 | """ 66 | try_time = 0 67 | # try with random input ranges 68 | while try_time < max_try_time: 69 | inp = self.get_input(input_func) 70 | try: 71 | out, new_func = self.synthesize_proxy(input_func, inp, debug) 72 | return [inp, out], new_func # succeeded to generate a valida IO pair 73 | except ValidateError: 74 | try_time += 1 75 | # try with a small input range 76 | try_time = 0 77 | while try_time < max_try_time: 78 | inp = self.get_input(input_func, given_min=-5, given_max=5) 79 | try: 80 | out, new_func = self.synthesize_proxy(input_func, inp) 81 | return [inp, out], new_func # succeeded to generate a valida IO pair 82 | except ValidateError: 83 | try_time += 1 84 | 85 | return None, None # failed to generate a valid IO pair 86 | 87 | def get_input(self, input_func:Function, to_string=False, given_min=None, given_max=None): 88 | """Randomly select a valid for each input arg 89 | """ 90 | inp = [] 91 | for arg_type in input_func.args_type: 92 | if arg_type == VarType.VOID: 93 | continue 94 | v = VarType.get_random_value(arg_type, given_min=given_min, given_max=given_max) 95 | if to_string and type(v) is str: 96 | v = f'"{v}"' 97 | else: 98 | v = str(v) 99 | inp.append(v) 100 | return inp 101 | 102 | def parse_output(self, out:str) -> Optional[str]: 103 | res = re.findall(r'ret=(.*)', out.strip()) 104 | if len(res) == 0: 105 | return None 106 | return res[0] 107 | 108 | def compile_and_run(self, compiler:CompilationSetting, program:SourceProgram) -> str: 109 | """Compile and run the program 110 | """ 111 | try: 112 | comp_out = compiler.compile_program(program, ExeCompilationOutput(), timeout=5) 113 | except: 114 | raise ValidateError 115 | try: 116 | out = comp_out.output.run(timeout=5) 117 | except: 118 | raise ValidateError 119 | out = comp_out.output.run(timeout=5) 120 | return self.parse_output(out.stdout) 121 | 122 | def check_type_sanitizer(self, program:SourceProgram) -> bool: 123 | """Validate if the program violates the strict aliasing rule 124 | """ 125 | # when typesanitizer is not available 126 | if self.typesanitizer is None: 127 | return True 128 | try: 129 | comp_out = self.typesanitizer.compile_program(program, ExeCompilationOutput(), timeout=5) 130 | except: 131 | return True 132 | try: 133 | out = comp_out.output.run(timeout=5) 134 | except: 135 | return False 136 | out = comp_out.output.run(timeout=5) 137 | if "TypeSanitizer" in out.stderr: 138 | return False 139 | return True 140 | 141 | def execute_program(self, src:str, debug:bool=False) -> str: 142 | """ 143 | Validate if the function is valid under the given input; if yes, return the output 144 | """ 145 | prog = SourceProgram(code=src, language=Language.C) 146 | sanitizer_ret = self.sanitizer.sanitize(prog) 147 | if not sanitizer_ret: 148 | raise ValidateError 149 | # verify with TypeSanitizer 150 | typesanitizer_ret = self.check_type_sanitizer(prog) 151 | if not typesanitizer_ret: 152 | raise ValidateError 153 | 154 | # verify consistent outputs from all compilers 155 | out_list = [] 156 | for compiler in self.compilers: 157 | out = self.compile_and_run(compiler, prog) 158 | if out_list != []: 159 | if out_list[0] != out: 160 | raise InconsistentOutputError 161 | out_list.append(out) 162 | if len(set(out_list)) != 1: 163 | raise InconsistentOutputError 164 | out = out_list[0] 165 | 166 | return out 167 | 168 | def synthesize_proxy(self, input_func:Function, inp:list[str], debug:bool=False) -> tuple[str, Function]: 169 | """ 170 | Synthesize a proxy function if needed and generate output from the given input on the new proxy function. 171 | """ 172 | # get the proxy function 173 | proxy_function = generate_proxy_function(input_func, inp) 174 | closure_program, new_input_function = generate_closure_program(proxy_function, input_func, inp) 175 | if debug: 176 | with open('debug.c', 'w') as f: 177 | f.write(closure_program) 178 | out = self.execute_program(closure_program, debug) 179 | return out, new_input_function 180 | 181 | 182 | class ValidateError(Exception): 183 | """Validate failure 184 | """ 185 | pass 186 | 187 | class InconsistentOutputError(Exception): 188 | """The output across compilers is inconsistent 189 | This is a workaround for the strict aliasing issue 190 | """ 191 | pass 192 | -------------------------------------------------------------------------------- /databaseconstructor/__init__.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.path.dirname(__file__)) 3 | import functioner, IOGenerator, variable 4 | 5 | 6 | __all__ = [ 7 | "functioner", 8 | "IOGenerator", 9 | "variable", 10 | ] -------------------------------------------------------------------------------- /databaseconstructor/displayfunc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import io, json, random, sys 3 | from pygments import highlight 4 | from pygments.lexers import CppLexer, PythonLexer 5 | from pygments.formatters import TerminalFormatter 6 | from functioner import * 7 | 8 | if len(sys.argv) < 2 or not os.path.exists(sys.argv[1]): 9 | raise ValueError("Please input the function database json file and make sure it exist.") 10 | 11 | functiondb = FunctionDB(sys.argv[1]) 12 | 13 | while True: 14 | rand_id = random.randint(0, len(functiondb)-1) 15 | # if functiondb[rand_id].has_io: 16 | break 17 | 18 | # display_message = highlight(functiondb[rand_id].function_body, PythonLexer(), TerminalFormatter()) 19 | io_str = '' 20 | # for each_io in functiondb[rand_id].io_list: 21 | # inp = each_io[0] 22 | # out = each_io[1] 23 | # io_str += f"input: {inp}\noutput: {out}\n" 24 | display_message = f"{functiondb[rand_id].function_body}\n\n/*\n{io_str}*/" 25 | print(display_message) 26 | -------------------------------------------------------------------------------- /databaseconstructor/functioner.py: -------------------------------------------------------------------------------- 1 | import os, json 2 | from typing import Optional 3 | from variable import * 4 | 5 | 6 | class Function: 7 | """A container for a function 8 | """ 9 | call_name = '' 10 | args_type = '' 11 | return_type = '' 12 | function_body = '' 13 | io_list = [] 14 | misc = [] 15 | src_file = '' 16 | include_headers = [] 17 | include_sources = [] 18 | is_valid:bool = False 19 | has_io:bool = False 20 | load_from_file:bool = False 21 | def __init__(self, func_json:dict) -> None: 22 | if 'function_name' in func_json: 23 | self.call_name = func_json['function_name'] 24 | else: 25 | return 26 | if 'parameter_types' in func_json: 27 | self.args_type = VarType.from_list(func_json['parameter_types']) 28 | else: 29 | return 30 | if 'return_type' in func_json: 31 | if isinstance(func_json['return_type'], VarType): 32 | self.return_type = func_json['return_type'] 33 | else: 34 | self.return_type = VarType.from_str(func_json['return_type']) 35 | else: 36 | return 37 | if 'function' in func_json: 38 | self.function_body = func_json['function'] 39 | else: 40 | return 41 | # no io_list is fine as we may add it later 42 | if 'io_list' in func_json: 43 | self.set_io(func_json['io_list']) 44 | self.is_valid = True 45 | if 'misc' in func_json: 46 | self.misc = func_json['misc'] 47 | if 'src_file' in func_json: 48 | self.src_file = func_json['src_file'] 49 | self.load_from_file = True 50 | if 'include_headers' in func_json: 51 | self.include_headers = func_json['include_headers'] 52 | if 'include_sources' in func_json: 53 | self.include_sources = func_json['include_sources'] 54 | 55 | def to_json(self): 56 | out_str = { 57 | "function_name": self.call_name, 58 | "parameter_types": [VarType.to_str(t) for t in self.args_type], 59 | "return_type": VarType.to_str(self.return_type), 60 | "function": self.function_body, 61 | "io_list": self.io_list, 62 | "misc": self.misc, 63 | "src_file": self.src_file, 64 | "include_headers": self.include_headers, 65 | "include_sources": self.include_sources 66 | } 67 | return out_str 68 | 69 | def set_io(self, io_list:list) -> None: 70 | self.io_list = io_list 71 | self.has_io = True if len(io_list) > 0 else False 72 | self.num_io = len(io_list) 73 | 74 | def get_random_io(self): 75 | return random.choice(self.io_list) 76 | 77 | 78 | class FunctionDB: 79 | """A database class that contains a set of Function() 80 | """ 81 | def __init__(self, func_db_file:Optional[str]=None) -> None: 82 | self.all_functions = [] 83 | if func_db_file is None: 84 | return 85 | if not os.path.exists(func_db_file): 86 | ValueError(f"{func_db_file} does not exist!") 87 | with open(func_db_file, 'r') as f: 88 | raw_function_db = json.loads(f.read()) 89 | for func_json in raw_function_db: 90 | func = Function(func_json) 91 | if func.is_valid: 92 | self.all_functions.append(func) 93 | 94 | def from_list(self, function_list:list[Function])->None: 95 | for function in function_list: 96 | assert isinstance(function, Function) 97 | self.all_functions = function_list 98 | 99 | def __len__(self): 100 | return len(self.all_functions) 101 | 102 | def __iter__(self): 103 | self.curr = 0 104 | return self 105 | 106 | def __next__(self): 107 | if self.curr < len(self): 108 | self.curr += 1 109 | return self.all_functions[self.curr-1] 110 | else: 111 | raise StopIteration 112 | 113 | def __getitem__(self, i): 114 | return self.all_functions[i] 115 | 116 | def to_json(self): 117 | return [func.to_json() for func in self] 118 | 119 | def append(self, function:Function): 120 | assert isinstance(function, Function) 121 | self.all_functions.append(function) 122 | -------------------------------------------------------------------------------- /databaseconstructor/functionextractor/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.19) 2 | project(FunctionExtractor LANGUAGES CXX C) 3 | 4 | find_package(LLVM REQUIRED CONFIG) 5 | 6 | message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") 7 | message(STATUS "Using LLVMConfig.cmake in ${LLVM_DIR}") 8 | 9 | list(APPEND CMAKE_MODULE_PATH ${LLVM_CMAKE_DIR}) 10 | 11 | find_package(Clang REQUIRED CONFIG) 12 | message(STATUS "Using ClangConfig.cmake in ${Clang_DIR}") 13 | list(APPEND CMAKE_MODULE_PATH ${CLANG_CMAKE_DIR}) 14 | include(${CLANG_CMAKE_DIR}/AddClang.cmake) 15 | 16 | if("${LLVM_VERSION_MAJOR}" VERSION_LESS 13) 17 | message(FATAL_ERROR "Found LLVM ${LLVM_VERSION_MAJOR}, but need LLVM >= 13") 18 | endif() 19 | 20 | find_package(nlohmann_json REQUIRED) 21 | 22 | 23 | if (NOT CMAKE_BUILD_TYPE) 24 | set(CMAKE_BUILD_TYPE Release) 25 | endif () 26 | 27 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wpedantic -march=native") 28 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -w") 29 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -w") 30 | set(CMAKE_CXX_STANDARD 17) 31 | 32 | if(${LLVM_REQUIRES_RTTI}) 33 | else() 34 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti") 35 | endif(${LLVM_REQUIRES_RTTI}) 36 | 37 | separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS}) 38 | add_definitions(${LLVM_DEFINITIONS_LIST}) 39 | 40 | if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") 41 | add_compile_options (-fdiagnostics-color=always) 42 | elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") 43 | add_compile_options (-fcolor-diagnostics) 44 | endif () 45 | 46 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) 47 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) 48 | 49 | add_subdirectory(src) 50 | -------------------------------------------------------------------------------- /databaseconstructor/functionextractor/README.md: -------------------------------------------------------------------------------- 1 | #### Function filter 2 | 3 | This tool checks all defined functions in an input file and prints the names of 4 | these with the following properties: 5 | 6 | - All arguments are of type `int` 7 | - The return value is of type `int` 8 | - All references variables are locals 9 | - No other functions are called 10 | 11 | #### Prerequisites 12 | 13 | LLVM, clang, cmake, parallel, ninja 14 | 15 | 16 | #### Build 17 | 18 | ``` 19 | make build 20 | cd build 21 | cmake .. -G Ninja 22 | ninja 23 | ``` 24 | 25 | ### Run 26 | 27 | ``` 28 | ./build/bin/function-filter input_file.c -- 29 | ``` 30 | The output is of the form: 31 | ``` 32 | File:/path/to/input/file Function:function 33 | ``` 34 | 35 | `./find_functions.sh dir` will filter all functions in files under `dir` 36 | -------------------------------------------------------------------------------- /databaseconstructor/functionextractor/extractor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os, argparse, json, tempfile, re, random, string 3 | from pathlib import Path 4 | import subprocess as sp 5 | from copy import deepcopy 6 | import multiprocessing as mp 7 | from tqdm import tqdm 8 | from diopter.compiler import ( 9 | CompilationSetting, 10 | CompilerExe, 11 | ExeCompilationOutput, 12 | OptLevel, 13 | SourceProgram, 14 | Language 15 | ) 16 | 17 | # path of functionextractor 18 | FUNCTION_EXTRACTOR_PATH = os.path.join(os.path.dirname(__file__), 'build/bin/functionextractor') 19 | # compiler args such as -I$CSMITH_HOME/include 20 | CC_ARGS = '' 21 | # minimum size of extracted function in tokens separated by space 22 | MIN_FUNC_SIZE = 0 23 | 24 | def run_cmd(cmd, timeout=5): 25 | if type(cmd) is not list: 26 | cmd = cmd.split(' ') 27 | try: 28 | proc = sp.run(cmd, timeout=timeout, capture_output=True) 29 | return True if proc.returncode == 0 else False, proc.stdout.decode("utf-8") 30 | except: 31 | return False, '' 32 | 33 | def is_interesting_function(function_text): 34 | """Returns True if the function body is long enough (>MIN_FUNC_SIZE) 35 | """ 36 | body_start = function_text.find("{")+1 37 | body_end = function_text.rfind("}") 38 | if len([ x for x in function_text[body_start:body_end].replace("\t", " ").replace("\n", " ").replace("(1);", " ").split(" ") if x != ""]) < MIN_FUNC_SIZE: 39 | return False 40 | return True 41 | 42 | def extract_one_file(src_file): 43 | # preprocess the file 44 | with open(src_file, 'r') as f: 45 | prog = SourceProgram(code=f.read(), language=Language.C) 46 | comp = CompilationSetting( 47 | compiler=CompilerExe.get_system_clang(), 48 | opt_level=OptLevel.O0, 49 | ) 50 | pre_prog = comp.preprocess_program(prog) 51 | with tempfile.NamedTemporaryFile(suffix=".c", mode="w", delete=False) as tmp_f: 52 | tmp_f.write(pre_prog.get_modified_code()) 53 | tmp_f.close() 54 | # --mode process 55 | ret, _ = run_cmd(f'{FUNCTION_EXTRACTOR_PATH} --mode process {tmp_f.name} -- -w {CC_ARGS}') 56 | # --mode rename 57 | ret, _ = run_cmd(f'{FUNCTION_EXTRACTOR_PATH} --mode rename {tmp_f.name} -- -w {CC_ARGS}') 58 | # --mode rename-global 59 | ret, _ = run_cmd(f'{FUNCTION_EXTRACTOR_PATH} --mode rename-global {tmp_f.name} -- -w {CC_ARGS}') 60 | # --mode extract 61 | ret, res = run_cmd(f'{FUNCTION_EXTRACTOR_PATH} --mode extract {tmp_f.name} -- -w {CC_ARGS}') 62 | os.remove(tmp_f.name) 63 | if ret == False or res == '': 64 | return '' 65 | extracted_json = {"misc": [], "function": ""} 66 | to_replace_typedef_list = [] 67 | for item in res.split('\n'): 68 | if item.strip() == '': 69 | continue 70 | item_json = json.loads(item) 71 | if "typedef" in item_json: 72 | # extracted_json["misc"].append(item_json["typedef"] + ';') 73 | matched_typedef = re.findall(r'typedef\s+([\w|\_|\s|\*]+)\s([\w|\_]+)', item_json["typedef"]) 74 | if len(matched_typedef) > 0: 75 | to_replace_typedef_list.append(matched_typedef[0]) 76 | elif "global" in item_json: 77 | extracted_json["misc"].append(item_json["global"] + ' = ' + str(random.randint(-10, 20)) + ';') 78 | else: 79 | for key in item_json: 80 | extracted_json[key] = item_json[key] 81 | 82 | extracted_json["src_file"] = str(src_file) 83 | 84 | if not is_interesting_function(extracted_json["function"]): 85 | extracted_json = '' 86 | else: 87 | matched_realsmith_name = re.findall(r'(realsmith\_\w+)', extracted_json["function"]) 88 | if len(matched_realsmith_name) > 0: 89 | matched_realsmith_name = matched_realsmith_name[0] 90 | else: 91 | matched_realsmith_name = 'realsmith_' + ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for _ in range(5)) 92 | while True: 93 | has_type_change = False 94 | for type_replace, type_orig in to_replace_typedef_list[::-1]: 95 | for key in extracted_json: 96 | if type(extracted_json[key]) is list: 97 | for i in range(len(extracted_json[key])): 98 | orig_extracted_json_value = deepcopy(extracted_json[key][i]) 99 | extracted_json[key][i] = re.sub(r'\b' + type_orig + r'\b', type_replace, extracted_json[key][i]) 100 | if orig_extracted_json_value != extracted_json[key][i]: 101 | has_type_change = True 102 | else: 103 | orig_extracted_json_value = deepcopy(extracted_json[key]) 104 | extracted_json[key] = re.sub(r'\b' + type_orig + r'\b', type_replace, extracted_json[key]) 105 | if orig_extracted_json_value != extracted_json[key]: 106 | has_type_change = True 107 | if not has_type_change: 108 | break 109 | 110 | return extracted_json 111 | 112 | if __name__=='__main__': 113 | 114 | parser = argparse.ArgumentParser(description='Extract closed functions from files.') 115 | parser.add_argument('--src', dest='SRC', required=True, help='C/C++ source directory.') 116 | parser.add_argument('--dst', dest='DST', default='./functions.json', help='(json) filename to store the extracted functions.') 117 | parser.add_argument('--cpu', dest='CPU', default=-1, type=int, help='number of io pairs generated for each function. (default=#ALL_CPUs)') 118 | parser.add_argument('--min', dest='MIN_SIZE', default=5, type=int, help='minimum size of a function in tokens. (default=5)') 119 | args = parser.parse_args() 120 | if not os.path.exists(args.SRC): 121 | print(f"Directory {args.SRC} does not exist!") 122 | parser.print_help() 123 | exit(1) 124 | MIN_FUNC_SIZE = args.MIN_SIZE 125 | 126 | src_files = list(Path(args.SRC).rglob('*.c')) 127 | src_files.extend(list(Path(args.SRC).rglob('*.cpp'))) 128 | 129 | cpu_count = mp.cpu_count() 130 | cpu_use = cpu_count if args.CPU == -1 else min(cpu_count, args.CPU) 131 | results = [] 132 | with tqdm(total=len(src_files)) as pbar, mp.Pool(cpu_use) as pool: 133 | for res in pool.imap(extract_one_file, src_files): 134 | pbar.update() 135 | if res != '': 136 | results.append(res) 137 | with open(args.DST, 'w') as f: 138 | json.dump(results, f) 139 | -------------------------------------------------------------------------------- /databaseconstructor/functionextractor/src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(FunctionExtractorLib 2 | Utils.cpp 3 | RuleActionCallback.cpp 4 | FunctionPrinter.cpp 5 | FunctionExtractor.cpp 6 | FunctionProcess.cpp 7 | ProcessCall.cpp 8 | RenameFunction.cpp 9 | RenameGlobal.cpp 10 | ) 11 | target_include_directories(FunctionExtractorLib PUBLIC ${CLANG_INCLUDE_DIRS} ${LLVM_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}) 12 | 13 | if(CLANG_LINK_CLANG_DYLIB) 14 | target_link_libraries(FunctionExtractorLib PUBLIC LLVM) 15 | clang_target_link_libraries(FunctionExtractorLib PUBLIC) 16 | else() 17 | llvm_map_components_to_libnames(llvm_libs support core) 18 | target_link_libraries(FunctionExtractorLib PUBLIC ${llvm_libs} 19 | clangASTMatchers 20 | clangTransformer 21 | clangTooling) 22 | endif(CLANG_LINK_CLANG_DYLIB) 23 | 24 | add_subdirectory(tool) 25 | -------------------------------------------------------------------------------- /databaseconstructor/functionextractor/src/FunctionExtractor.cpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "FunctionExtractor.hpp" 8 | 9 | namespace extractor { 10 | 11 | auto functionMatcher = functionDecl( 12 | isExpansionInMainFile(), 13 | isDefinition(), 14 | anyOf( 15 | returns(isInteger()), 16 | returns(isAnyCharacter()), 17 | returns(pointsTo(isInteger())), 18 | returns(pointsTo(isAnyCharacter())) 19 | ), 20 | anyOf( 21 | unless(hasDescendant(parmVarDecl(unless(anyOf( 22 | hasType(isInteger()), 23 | hasType(isAnyCharacter()), 24 | hasType(pointsTo(isInteger())), 25 | hasType(pointsTo(isAnyCharacter())) 26 | ))))), 27 | unless(hasDescendant(parmVarDecl())) 28 | ), 29 | unless(hasDescendant(declRefExpr(to(varDecl(unless(anyOf(hasLocalStorage(), hasType(isInteger())))))))), 30 | unless(hasDescendant(invocation())) 31 | ).bind("function"); 32 | 33 | auto typedefMatcher = typedefDecl( 34 | isExpansionInMainFile(), 35 | anyOf( 36 | hasType(isInteger()), 37 | hasType(isAnyCharacter()), 38 | hasType(realFloatingPointType()), 39 | hasType(pointsTo(isInteger())), 40 | hasType(pointsTo(isAnyCharacter())), 41 | hasType(pointsTo(realFloatingPointType())) 42 | ) 43 | ).bind("typedef"); 44 | 45 | auto globalDeclMatcher = varDecl( 46 | isExpansionInMainFile(), 47 | hasGlobalStorage(), 48 | anyOf( 49 | hasType(isInteger()), 50 | hasType(isAnyCharacter()), 51 | hasType(realFloatingPointType()), 52 | hasType(pointsTo(isInteger())), 53 | hasType(pointsTo(isAnyCharacter())), 54 | hasType(pointsTo(realFloatingPointType())) 55 | ) 56 | ).bind("globalDecl"); 57 | 58 | } //namespace extractor 59 | 60 | 61 | extractor::FunctionExtractor::FunctionExtractor( 62 | std::map &FileToReplacements) 63 | : FileToReplacements{FileToReplacements} {} 64 | 65 | void extractor::FunctionExtractor::registerMatchers(clang::ast_matchers::MatchFinder &Finder) { 66 | Finder.addMatcher(functionMatcher, &Printer); 67 | Finder.addMatcher(typedefMatcher, &Printer); 68 | Finder.addMatcher(globalDeclMatcher, &Printer); 69 | } 70 | -------------------------------------------------------------------------------- /databaseconstructor/functionextractor/src/FunctionExtractor.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "FunctionPrinter.hpp" 15 | 16 | namespace extractor { 17 | 18 | class FunctionExtractor { 19 | public: 20 | FunctionExtractor(std::map &FileToReplacements); 21 | FunctionExtractor(const FunctionExtractor &) = delete; 22 | FunctionExtractor(FunctionExtractor &&) = delete; 23 | 24 | void registerMatchers(clang::ast_matchers::MatchFinder &Finder); 25 | 26 | 27 | private: 28 | std::map &FileToReplacements; 29 | std::map FileToNumberValueTrackers; 30 | printer::FunctionPrinter Printer; 31 | }; 32 | } // namespace extractor -------------------------------------------------------------------------------- /databaseconstructor/functionextractor/src/FunctionPrinter.cpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "FunctionPrinter.hpp" 11 | 12 | using namespace clang; 13 | using namespace ast_matchers; 14 | 15 | namespace printer { 16 | 17 | std::string getFunctionAsText(const Decl *F, 18 | const SourceManager &SM, const LangOptions &lp) { 19 | auto SR = CharSourceRange::getTokenRange(F->getSourceRange()); 20 | return Lexer::getSourceText(SR, SM, lp).str(); 21 | } 22 | 23 | FunctionPrinter::FunctionPrinter(){} 24 | 25 | void FunctionPrinter::run(const clang::ast_matchers::MatchFinder::MatchResult &Result) { 26 | nlohmann::json J; 27 | if (const auto *F = Result.Nodes.getNodeAs("function")) { 28 | std::vector ParameterTypes; 29 | if (F->param_size() == 0) 30 | ParameterTypes.push_back("void"); 31 | const auto &SM = *Result.SourceManager; 32 | auto *FEntry = SM.getFileEntryForID( 33 | SM.getDecomposedLoc(F->getLocation()).first); 34 | // J["original_file"] = FEntry->getName(); // may crash 35 | J["function"] = getFunctionAsText(F, *Result.SourceManager, Result.Context->getLangOpts()); 36 | std::transform(F->param_begin(), F->param_end(), 37 | std::back_inserter(ParameterTypes), 38 | [](const auto &Param) -> std::string { 39 | return Param->getType().getAsString(); 40 | }); 41 | J["parameter_types"] = ParameterTypes; 42 | J["return_type"] = F->getReturnType().getAsString(); 43 | J["function_name"] = F->getName(); 44 | std::cout << J << '\n'; 45 | } 46 | if (const auto *F = Result.Nodes.getNodeAs("typedef")) { 47 | const auto &SM = *Result.SourceManager; 48 | J["typedef"] = getFunctionAsText(F, *Result.SourceManager, Result.Context->getLangOpts()); 49 | std::cout << J << '\n'; 50 | } 51 | if (const auto *F = Result.Nodes.getNodeAs("globalDecl")) { 52 | const auto &SM = *Result.SourceManager; 53 | J["global"] = getFunctionAsText(F, *Result.SourceManager, Result.Context->getLangOpts()); 54 | std::cout << J << '\n'; 55 | } 56 | } 57 | 58 | } // namespace printer 59 | -------------------------------------------------------------------------------- /databaseconstructor/functionextractor/src/FunctionPrinter.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | using namespace clang; 5 | using namespace ast_matchers; 6 | 7 | namespace printer { 8 | 9 | class FunctionPrinter : public clang::ast_matchers::MatchFinder::MatchCallback { 10 | public: 11 | FunctionPrinter(); 12 | FunctionPrinter(const FunctionPrinter &) = delete; 13 | FunctionPrinter(FunctionPrinter &&) = delete; 14 | void 15 | run(const clang::ast_matchers::MatchFinder::MatchResult &Result) override; 16 | }; 17 | 18 | std::string getFunctionAsText(const Decl *F, const SourceManager &SM, const LangOptions &lp); 19 | } // namespace printer -------------------------------------------------------------------------------- /databaseconstructor/functionextractor/src/FunctionProcess.cpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "FunctionProcess.hpp" 8 | 9 | process::FunctionProcess::FunctionProcess( 10 | std::map &FileToReplacements) 11 | : FileToReplacements{FileToReplacements} { 12 | ruleCallbacks.emplace_back(ruleactioncallback::RuleActionCallback{ 13 | process::processCallRule(), FileToReplacements, FileToNumberValueTrackers}); 14 | ruleCallbacks.emplace_back(ruleactioncallback::RuleActionCallback{ 15 | process::processExternRule(), FileToReplacements, FileToNumberValueTrackers}); 16 | } 17 | 18 | void process::FunctionProcess::registerMatchers(clang::ast_matchers::MatchFinder &Finder) { 19 | for (auto &Callback : ruleCallbacks){ 20 | Callback.registerMatchers(Finder); 21 | } 22 | } 23 | 24 | -------------------------------------------------------------------------------- /databaseconstructor/functionextractor/src/FunctionProcess.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "RuleActionCallback.hpp" 4 | #include "ProcessCall.hpp" 5 | #include "RenameFunction.hpp" 6 | 7 | namespace process { 8 | 9 | class FunctionProcess { 10 | public: 11 | FunctionProcess(std::map &FileToReplacements); 12 | FunctionProcess(const FunctionProcess &) = delete; 13 | FunctionProcess(FunctionProcess &&) = delete; 14 | 15 | void registerMatchers(clang::ast_matchers::MatchFinder &Finder); 16 | 17 | 18 | private: 19 | std::map &FileToReplacements; 20 | std::vector ruleCallbacks; 21 | std::map FileToNumberValueTrackers; 22 | }; 23 | 24 | } // namespace process -------------------------------------------------------------------------------- /databaseconstructor/functionextractor/src/ProcessCall.cpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "ProcessCall.hpp" 3 | 4 | namespace process { 5 | 6 | std::string getTypeValue(std::string typeStr) { 7 | std::string value = ""; 8 | /* char * */ 9 | if (typeStr.find("char*") != -1 || typeStr.find("char *") != -1) { 10 | value.append("\"0\""); 11 | return value; 12 | } 13 | /*ignore types*/ 14 | if ( 15 | typeStr.find("struct") != -1 || 16 | typeStr.find("union") != -1 || 17 | std::count(typeStr.begin(), typeStr.end(), '*') > 0 || 18 | std::count(typeStr.begin(), typeStr.end(), '[') > 0 19 | ) { 20 | return value; 21 | } 22 | /*integer*/ 23 | if ( 24 | typeStr.find("int") != -1 || 25 | typeStr.find("long") != -1 || 26 | typeStr.find("signed") != -1 27 | ) { 28 | value.append("1"); 29 | } 30 | /*char*/ 31 | else if ( 32 | typeStr.find("char") != -1 || 33 | typeStr.find("long") != -1 || 34 | typeStr.find("signed") != -1 35 | ) { 36 | value.append("\'a\'"); 37 | } 38 | return value; 39 | 40 | } 41 | 42 | class ProcessCallAction : public MatchComputation { 43 | public: 44 | ProcessCallAction() = default; 45 | llvm::Error eval(const ast_matchers::MatchFinder::MatchResult &mResult, 46 | std::string *Result) const override { 47 | const Expr *expr = mResult.Nodes.getNodeAs("call"); 48 | std::string typeStr = expr->getType().getDesugaredType(*mResult.Context).getAsString(); 49 | std::string replaceStr = getTypeValue(typeStr); 50 | if (replaceStr == "") { 51 | replaceStr = getExprAsText(expr, *mResult.SourceManager, mResult.Context->getLangOpts()); 52 | } 53 | Result->append("(" + replaceStr + ")"); 54 | return llvm::Error::success(); 55 | } 56 | std::string toString() const override { return "{}"; } 57 | static std::string getExprAsText(const Expr *E, const SourceManager &SM, const LangOptions &lp) { 58 | auto SR = CharSourceRange::getTokenRange(E->getSourceRange()); 59 | return Lexer::getSourceText(SR, SM, lp).str(); 60 | } 61 | }; 62 | 63 | 64 | struct clang::transformer::RewriteRule processCallRule() { 65 | auto callMatcher = functionDecl( 66 | isExpansionInMainFile(), 67 | isDefinition(), 68 | forEachDescendant( 69 | invocation(unless(hasAncestor(invocation()))).bind("call") 70 | ) 71 | ); 72 | 73 | return makeRule(callMatcher, { 74 | /*we don't use changeTo but two inserts is becase using changeTo will change locations of ast and thus causing some offset 75 | */ 76 | changeTo(node("call"), std::make_unique()), 77 | }); 78 | } 79 | 80 | /* Remove extern function declarations */ 81 | struct clang::transformer::RewriteRule processExternRule() { 82 | auto functionDeclMatcher = functionDecl( 83 | isExpansionInMainFile(), 84 | isDefinition(), 85 | forEachDescendant( 86 | functionDecl().bind("functionDecl") 87 | ) 88 | ); 89 | 90 | return makeRule(functionDeclMatcher, { 91 | /*we don't use changeTo but two inserts is becase using changeTo will change locations of ast and thus causing some offset 92 | */ 93 | changeTo(node("functionDecl"), cat("")), 94 | }); 95 | } 96 | 97 | 98 | } 99 | -------------------------------------------------------------------------------- /databaseconstructor/functionextractor/src/ProcessCall.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "RuleActionCallback.hpp" 4 | #include "FunctionProcess.hpp" 5 | 6 | namespace process { 7 | 8 | struct clang::transformer::RewriteRule processCallRule(); 9 | struct clang::transformer::RewriteRule processExternRule(); 10 | 11 | } // namespace process -------------------------------------------------------------------------------- /databaseconstructor/functionextractor/src/RenameFunction.cpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include "RenameFunction.hpp" 4 | 5 | namespace process { 6 | 7 | class ProcessRenameFunctionAction : public MatchComputation { 8 | public: 9 | ProcessRenameFunctionAction() = default; 10 | llvm::Error eval(const ast_matchers::MatchFinder::MatchResult &mResult, 11 | std::string *Result) const override { 12 | const FunctionDecl *FD = mResult.Nodes.getNodeAs("function"); 13 | std::string func_name = FD->getNameAsString(); 14 | std::string new_func_name = "realsmith_" + extractor_utils::generate_random_string(5); 15 | // Result->append("realsmith_"+generate_random_string(5)); 16 | std::string func_str = getFunctionAsText(FD, *mResult.SourceManager, mResult.Context->getLangOpts()); 17 | size_t index = func_str.find(func_name); 18 | func_str.replace(index, func_name.length(), new_func_name); 19 | Result->append(func_str); 20 | return llvm::Error::success(); 21 | } 22 | 23 | std::string toString() const override { return "{}"; } 24 | static std::string getFunctionAsText(const FunctionDecl *F, 25 | const SourceManager &SM, const LangOptions &lp) { 26 | auto SR = CharSourceRange::getTokenRange(F->getSourceRange()); 27 | return Lexer::getSourceText(SR, SM, lp).str(); 28 | } 29 | }; 30 | 31 | 32 | struct clang::transformer::RewriteRule processRenameFunctionRule() { 33 | auto functionMatcher = functionDecl( 34 | isExpansionInMainFile(), 35 | isDefinition() 36 | ).bind("function"); 37 | 38 | return makeRule(functionMatcher, { 39 | changeTo(node("function"), std::make_unique()) 40 | }); 41 | } 42 | } 43 | 44 | process::RenameFunction::RenameFunction( 45 | std::map &FileToReplacements) 46 | : FileToReplacements{FileToReplacements} { 47 | ruleCallbacks.emplace_back(ruleactioncallback::RuleActionCallback{ 48 | process::processRenameFunctionRule(), FileToReplacements, FileToNumberValueTrackers}); 49 | } 50 | 51 | void process::RenameFunction::registerMatchers(clang::ast_matchers::MatchFinder &Finder) { 52 | for (auto &Callback : ruleCallbacks){ 53 | Callback.registerMatchers(Finder); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /databaseconstructor/functionextractor/src/RenameFunction.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "RuleActionCallback.hpp" 4 | 5 | namespace process { 6 | 7 | class RenameFunction { 8 | public: 9 | RenameFunction(std::map &FileToReplacements); 10 | RenameFunction(const RenameFunction &) = delete; 11 | RenameFunction(RenameFunction &&) = delete; 12 | 13 | void registerMatchers(clang::ast_matchers::MatchFinder &Finder); 14 | 15 | 16 | private: 17 | std::map &FileToReplacements; 18 | std::vector ruleCallbacks; 19 | std::map FileToNumberValueTrackers; 20 | }; 21 | 22 | } // namespace process -------------------------------------------------------------------------------- /databaseconstructor/functionextractor/src/RenameGlobal.cpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include "RenameGlobal.hpp" 4 | 5 | namespace process { 6 | 7 | /* Rename global variable declaration */ 8 | struct clang::transformer::RewriteRule processRenameGlobalRule() { 9 | auto globalDeclMatcher = varDecl( 10 | isExpansionInMainFile(), 11 | hasGlobalStorage(), 12 | hasAncestor(translationUnitDecl( 13 | hasDescendant( 14 | functionDecl( 15 | isExpansionInMainFile(), 16 | isDefinition() 17 | ).bind("function") 18 | ) 19 | )) 20 | ).bind("globalDecl"); 21 | 22 | return makeRule(globalDeclMatcher, { 23 | insertAfter(name("globalDecl"), cat("_", name("function"))), 24 | }); 25 | } 26 | 27 | /* Rename global variable reference */ 28 | struct clang::transformer::RewriteRule processRenameGlobalRefRule() { 29 | auto globalDefRefMatcher = declRefExpr( 30 | isExpansionInMainFile(), 31 | to(varDecl( 32 | hasGlobalStorage() 33 | )), 34 | hasAncestor(functionDecl( 35 | isExpansionInMainFile(), 36 | isDefinition() 37 | ).bind("function") 38 | ) 39 | ).bind("globalDeclRef"); 40 | 41 | return makeRule(globalDefRefMatcher, { 42 | insertAfter(node("globalDeclRef"), cat("_", name("function"))), 43 | }); 44 | } 45 | 46 | } // namespace process 47 | 48 | process::RenameGlobal::RenameGlobal( 49 | std::map &FileToReplacements) 50 | : FileToReplacements{FileToReplacements} { 51 | ruleCallbacks.emplace_back(ruleactioncallback::RuleActionCallback{ 52 | process::processRenameGlobalRule(), FileToReplacements, FileToNumberValueTrackers}); 53 | ruleCallbacks.emplace_back(ruleactioncallback::RuleActionCallback{ 54 | process::processRenameGlobalRefRule(), FileToReplacements, FileToNumberValueTrackers}); 55 | } 56 | 57 | void process::RenameGlobal::registerMatchers(clang::ast_matchers::MatchFinder &Finder) { 58 | for (auto &Callback : ruleCallbacks){ 59 | Callback.registerMatchers(Finder); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /databaseconstructor/functionextractor/src/RenameGlobal.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "RuleActionCallback.hpp" 4 | 5 | namespace process { 6 | 7 | class RenameGlobal { 8 | public: 9 | RenameGlobal(std::map &FileToReplacements); 10 | RenameGlobal(const RenameGlobal &) = delete; 11 | RenameGlobal(RenameGlobal &&) = delete; 12 | 13 | void registerMatchers(clang::ast_matchers::MatchFinder &Finder); 14 | 15 | 16 | private: 17 | std::map &FileToReplacements; 18 | std::vector ruleCallbacks; 19 | std::map FileToNumberValueTrackers; 20 | }; 21 | 22 | } // namespace process -------------------------------------------------------------------------------- /databaseconstructor/functionextractor/src/RuleActionCallback.cpp: -------------------------------------------------------------------------------- 1 | #include "RuleActionCallback.hpp" 2 | 3 | using namespace clang; 4 | using namespace ast_matchers; 5 | using namespace transformer; 6 | 7 | namespace ruleactioncallback { 8 | std::string GetFilenameFromRange(const CharSourceRange &R, 9 | const SourceManager &SM) { 10 | const std::pair DecomposedLocation = 11 | SM.getDecomposedLoc(SM.getSpellingLoc(R.getBegin())); 12 | const FileEntry *Entry = SM.getFileEntryForID(DecomposedLocation.first); 13 | return std::string(Entry ? Entry->getName() : ""); 14 | 15 | } 16 | 17 | Expected getNode(const ast_matchers::BoundNodes &Nodes, 18 | StringRef ID) { 19 | auto &NodesMap = Nodes.getMap(); 20 | auto It = NodesMap.find(ID); 21 | if (It == NodesMap.end()) 22 | return llvm::make_error(llvm::errc::invalid_argument, 23 | ID + "not bound"); 24 | return It->second; 25 | } 26 | 27 | RangeSelector startOfFile(std::string ID) { 28 | return [ID](const clang::ast_matchers::MatchFinder::MatchResult &Result) 29 | -> Expected { 30 | auto Node = getNode(Result.Nodes, ID); 31 | if (!Node) 32 | return Node.takeError(); 33 | const auto &SM = Result.Context->getSourceManager(); 34 | auto Start = SM.getLocForStartOfFile( 35 | SM.getFileID(Node->getSourceRange().getBegin())); 36 | return CharSourceRange(SourceRange(Start), false); 37 | }; 38 | } 39 | 40 | }//namespace: ruleactioncallback 41 | 42 | ruleactioncallback::RuleActionCallback::RuleActionCallback( 43 | RewriteRule Rule, 44 | std::map &FileToReplacements, 45 | std::map &FileToNumberValueTrackers) 46 | : Rule{Rule}, FileToReplacements{FileToReplacements}, 47 | FileToNumberValueTrackers{FileToNumberValueTrackers} {} 48 | 49 | void ruleactioncallback::RuleActionCallback::run( 50 | const clang::ast_matchers::MatchFinder::MatchResult &Result) { 51 | 52 | 53 | if (Result.Context->getDiagnostics().hasErrorOccurred()) { 54 | llvm::errs() << "An error has occured.\n"; 55 | return; 56 | } 57 | Expected> Edits = 58 | transformer::detail::findSelectedCase(Result, Rule).Edits(Result); 59 | if (!Edits) { 60 | llvm::errs() << "Rewrite failed: " << llvm::toString(Edits.takeError()) 61 | << "\n"; 62 | return; 63 | } 64 | auto SM = Result.SourceManager; 65 | for (const auto &T : *Edits) { 66 | assert(T.Kind == transformer::EditKind::Range); 67 | auto FilePath = GetFilenameFromRange(T.Range, *SM); 68 | auto N = FileToNumberValueTrackers[FilePath]++; 69 | auto R = tooling::Replacement( 70 | *SM, T.Range, T.Replacement); 71 | auto &Replacements = FileToReplacements[FilePath]; 72 | auto Err = Replacements.add(R); 73 | if (Err) { 74 | auto NewOffset = Replacements.getShiftedCodePosition(R.getOffset()); 75 | auto NewLength = Replacements.getShiftedCodePosition( 76 | R.getOffset() + R.getLength()) - 77 | NewOffset; 78 | if (NewLength == R.getLength()) { 79 | R = clang::tooling::Replacement(R.getFilePath(), NewOffset, 80 | NewLength, 81 | R.getReplacementText()); 82 | Replacements = Replacements.merge(tooling::Replacements(R)); 83 | } else { 84 | llvm_unreachable(llvm::toString(std::move(Err)).c_str()); 85 | } 86 | } 87 | } 88 | 89 | } 90 | 91 | void ruleactioncallback::RuleActionCallback::registerMatchers( 92 | clang::ast_matchers::MatchFinder &Finder) { 93 | for (auto &Matcher : transformer::detail::buildMatchers(Rule)) 94 | Finder.addDynamicMatcher( 95 | Matcher.withTraversalKind(clang::TK_IgnoreUnlessSpelledInSource), 96 | this); 97 | } 98 | -------------------------------------------------------------------------------- /databaseconstructor/functionextractor/src/RuleActionCallback.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include "Utils.hpp" 21 | 22 | using namespace clang; 23 | using namespace ast_matchers; 24 | using namespace transformer; 25 | 26 | namespace ruleactioncallback { 27 | 28 | class RuleActionCallback 29 | : public clang::ast_matchers::MatchFinder::MatchCallback { 30 | public: 31 | RuleActionCallback( 32 | clang::transformer::RewriteRule Rule, 33 | std::map &FileToReplacements, 34 | std::map &FileToNumberValueTrackers); 35 | void 36 | run(const clang::ast_matchers::MatchFinder::MatchResult &Result) override; 37 | void registerMatchers(clang::ast_matchers::MatchFinder &Finder); 38 | std::string getFunctionAsText(const clang::Decl *F, 39 | const clang::SourceManager &SM, const clang::LangOptions &lp); 40 | 41 | private: 42 | clang::transformer::RewriteRule Rule; 43 | std::map &FileToReplacements; 44 | std::map &FileToNumberValueTrackers; 45 | }; 46 | 47 | std::string GetFilenameFromRange(const CharSourceRange &R, const SourceManager &SM); 48 | RangeSelector startOfFile(std::string ID); 49 | 50 | } 51 | -------------------------------------------------------------------------------- /databaseconstructor/functionextractor/src/Utils.cpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Utils.hpp" 4 | 5 | namespace extractor_utils{ 6 | 7 | std::string generate_random_string(int length) { 8 | static const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; 9 | static const int charset_size = sizeof(charset) - 1; 10 | std::random_device rd; 11 | std::mt19937 gen(rd()); 12 | std::uniform_int_distribution<> dis(0, charset_size - 1); 13 | std::string result(length, ' '); 14 | for (int i = 0; i < length; ++i) { 15 | result[i] = charset[dis(gen)]; 16 | } 17 | return result; 18 | } 19 | } -------------------------------------------------------------------------------- /databaseconstructor/functionextractor/src/Utils.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "RuleActionCallback.hpp" 4 | 5 | namespace extractor_utils{ 6 | 7 | std::string generate_random_string(int length); 8 | 9 | } 10 | -------------------------------------------------------------------------------- /databaseconstructor/functionextractor/src/tool/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(functionextractor FunctionExtract.cpp) 2 | target_link_libraries(functionextractor PUBLIC FunctionExtractorLib) 3 | install(TARGETS functionextractor DESTINATION bin) 4 | 5 | -------------------------------------------------------------------------------- /databaseconstructor/functionextractor/src/tool/FunctionExtract.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "FunctionExtractor.hpp" 12 | #include "FunctionProcess.hpp" 13 | #include "RenameFunction.hpp" 14 | #include "RenameGlobal.hpp" 15 | 16 | using namespace llvm; 17 | using namespace clang; 18 | using namespace clang::tooling; 19 | using namespace clang::ast_matchers; 20 | 21 | namespace { 22 | 23 | enum class ToolMode { Extract, Process, Rename, RenameGlobal}; 24 | 25 | cl::OptionCategory ToolOptions("options"); 26 | 27 | cl::opt 28 | Mode("mode", cl::desc("Target functions to be extracted."), 29 | cl::values(clEnumValN(ToolMode::Extract, "extract", 30 | "Extract functions with nemeric input args, return type, no external function calls, and no global accesses." 31 | "The default mode."), 32 | clEnumValN(ToolMode::Process, "process", 33 | "Process functions to remove externel function calls."), 34 | clEnumValN(ToolMode::Rename, "rename", 35 | "Rename function name."), 36 | clEnumValN(ToolMode::RenameGlobal, "rename-global", 37 | "Rename global variables.") 38 | ), 39 | cl::init(ToolMode::Extract), 40 | cl::cat(ToolOptions)); 41 | 42 | bool applyReplacements(RefactoringTool &Tool) { 43 | LangOptions DefaultLangOptions; 44 | IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); 45 | clang::TextDiagnosticPrinter DiagnosticPrinter(errs(), &*DiagOpts); 46 | DiagnosticsEngine Diagnostics( 47 | IntrusiveRefCntPtr(new DiagnosticIDs()), &*DiagOpts, 48 | &DiagnosticPrinter, false); 49 | auto &FileMgr = Tool.getFiles(); 50 | SourceManager Sources(Diagnostics, FileMgr); 51 | 52 | Rewriter Rewrite(Sources, DefaultLangOptions); 53 | 54 | bool Result = true; 55 | for (const auto &FileAndReplaces : groupReplacementsByFile( 56 | Rewrite.getSourceMgr().getFileManager(), Tool.getReplacements())) { 57 | auto &CurReplaces = FileAndReplaces.second; 58 | 59 | Result = applyAllReplacements(CurReplaces, Rewrite) && Result; 60 | } 61 | if (!Result) { 62 | llvm::errs() << "Failed applying all replacements.\n"; 63 | return false; 64 | } 65 | 66 | return !Rewrite.overwriteChangedFiles(); 67 | } 68 | 69 | template int runToolOnCode(RefactoringTool &Tool) { 70 | InstrTool Instr(Tool.getReplacements()); 71 | ast_matchers::MatchFinder Finder; 72 | Instr.registerMatchers(Finder); 73 | std::unique_ptr Factory = 74 | tooling::newFrontendActionFactory(&Finder); 75 | 76 | auto Ret = Tool.run(Factory.get()); 77 | if (std::is_same::value) { 78 | return Ret; 79 | } 80 | if (!Ret) 81 | if (!applyReplacements(Tool)) { 82 | llvm::errs() << "Failed to overwrite the input files.\n"; 83 | return 1; 84 | } 85 | 86 | return Ret; 87 | } 88 | 89 | 90 | } // namespace 91 | 92 | int main(int argc, const char **argv) { 93 | auto ExpectedParser = 94 | CommonOptionsParser::create(argc, argv, ToolOptions); 95 | if (!ExpectedParser) { 96 | llvm::errs() << ExpectedParser.takeError(); 97 | return 1; 98 | } 99 | CommonOptionsParser &OptionsParser = ExpectedParser.get(); 100 | 101 | const auto &Compilations = OptionsParser.getCompilations(); 102 | const auto &Files = OptionsParser.getSourcePathList(); 103 | RefactoringTool Tool(Compilations, Files); 104 | int Result = 0; 105 | 106 | if (Mode == ToolMode::Extract) { 107 | Result = runToolOnCode(Tool); 108 | if (Result) { 109 | llvm::errs() << "Something went wrong...\n"; 110 | return Result; 111 | } 112 | } else if (Mode == ToolMode::Process) { 113 | Result = runToolOnCode(Tool); 114 | if (Result) { 115 | llvm::errs() << "Something went wrong...\n"; 116 | return Result; 117 | } 118 | } else if (Mode == ToolMode::Rename) { 119 | Result = runToolOnCode(Tool); 120 | if (Result) { 121 | llvm::errs() << "Something went wrong...\n"; 122 | return Result; 123 | } 124 | } else if (Mode == ToolMode::RenameGlobal) { 125 | Result = runToolOnCode(Tool); 126 | if (Result) { 127 | llvm::errs() << "Something went wrong...\n"; 128 | return Result; 129 | } 130 | } 131 | 132 | return 0; 133 | } -------------------------------------------------------------------------------- /databaseconstructor/generate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os, argparse, json, warnings 3 | from pathlib import Path 4 | from tqdm import tqdm 5 | import multiprocessing as mp 6 | from IOGenerator import * 7 | from functioner import * 8 | 9 | DEBUG = False 10 | 11 | NUM_IO=5 12 | 13 | def generate_io(input_func: Function)->Function: 14 | """Generate IO pairs 15 | Args: 16 | input_func (Function) : the input function 17 | """ 18 | # remove inline keyword in input_func to avoid undefined reference to non-static inlined function 19 | if 'inline ' in input_func.function_body and 'static' not in input_func.function_body: 20 | input_func.function_body = input_func.function_body.replace('inline ', ' ') 21 | 22 | iogenerator = IOGenerator() 23 | io_list = [] 24 | num_generated = 0 25 | new_func = None 26 | while num_generated < NUM_IO: 27 | try: 28 | io, generated_new_func = iogenerator.generate(input_func, debug=DEBUG) 29 | except InconsistentOutputError: 30 | # we probably meet a violation of strict aliasing 31 | new_func = None 32 | io_list = [] 33 | break 34 | if io is not None and io not in io_list: 35 | io_list.append(io) 36 | if generated_new_func is not None: 37 | new_func = generated_new_func 38 | num_generated += 1 39 | if new_func is not None and 'realsmith_proxy' in new_func.function_body: # we do not support more than 1 NUM_IO for functions with proxy because each time we call iogenerator.generate(input_func), the proxy function would change. 40 | break 41 | if len(io_list) != 0: 42 | new_func.set_io(io_list) 43 | return new_func 44 | else: 45 | return None 46 | 47 | 48 | if __name__=='__main__': 49 | 50 | parser = argparse.ArgumentParser(description='Generate IO pairs for funtioncs.') 51 | parser.add_argument('--src', dest='SRC', required=True, help='path to the source function_db_file.') 52 | parser.add_argument('--dst', dest='DST', required=True, help='path to the destination function_db_file with io.') 53 | parser.add_argument('--num', dest='NUM', default=5, type=int, help='number of io pairs generated for each function. (default=5)') 54 | parser.add_argument('--cpu', dest='CPU', default=-1, type=int, help='number of io pairs generated for each function. (default=#ALL_CPUs)') 55 | args = parser.parse_args() 56 | if not os.path.exists(args.SRC): 57 | print(f"File {args.SRC} does not exist!") 58 | parser.print_help() 59 | exit(1) 60 | NUM_IO = args.NUM 61 | 62 | # construct function database 63 | functiondb = FunctionDB(args.SRC) 64 | new_functiondb = FunctionDB() 65 | 66 | if DEBUG: 67 | for func in functiondb: 68 | new_func = generate_io(func) 69 | if new_func is not None: 70 | new_functiondb.append(new_func) 71 | with open(args.DST, "w") as f: 72 | json.dump(new_functiondb.to_json(), f) 73 | exit(0) 74 | 75 | # typesanitizer is used in IOGenerator.py 76 | if which('typesanitizer') is None: 77 | warnings.warn("The compiler `typesanitizer` is not found in your path and thus possible misaligned types could happen in programs.") 78 | 79 | cpu_count = mp.cpu_count() 80 | cpu_use = cpu_count if args.CPU == -1 else min(cpu_count, args.CPU) 81 | with tqdm(total=len(functiondb)) as pbar, mp.Pool(cpu_use) as pool: 82 | for idx, new_func in enumerate(pool.imap(generate_io, functiondb)): 83 | pbar.update() 84 | if new_func is not None: 85 | new_functiondb.append(new_func) 86 | 87 | with open(args.DST, "w") as f: 88 | json.dump(new_functiondb.to_json(), f) 89 | 90 | -------------------------------------------------------------------------------- /databaseconstructor/proxy.py: -------------------------------------------------------------------------------- 1 | import os, string 2 | from copy import deepcopy 3 | from variable import * 4 | from functioner import * 5 | 6 | 7 | def generate_random_string(len:int=5) -> str: 8 | """Generate a random string of length len""" 9 | return ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for _ in range(len)) 10 | 11 | def generate_proxy_function(input_func:Function, synthesized_input:list[str], expose_pointer:bool=False) -> str: 12 | """ 13 | Synthesize a proxy function for a function. 14 | When there are unsupported types in func_arg_tpyes such as pointers and structs, 15 | proxy function can make these types transparent to invokers. 16 | Args: 17 | input_func: the input function of type Function 18 | synthesized_input: the input list 19 | expose_pointer: whether to keep pointers in the parameters of proxy function 20 | """ 21 | # no need for a proxy function if all input args and return are base type. 22 | need_proxy = False 23 | for input_arg_type in input_func.args_type: 24 | if input_arg_type != VarType.get_base_type(input_arg_type): 25 | need_proxy = True 26 | break 27 | if input_func.return_type != VarType.get_base_type(input_func.return_type): 28 | need_proxy = True 29 | if not need_proxy: 30 | return Function({}) 31 | 32 | if expose_pointer: 33 | return generate_proxy_function_expose_pointer(input_func, synthesized_input) 34 | 35 | return generate_proxy_function_hide_pointer(input_func, synthesized_input) 36 | 37 | def generate_proxy_function_hide_pointer(input_func:Function, synthesized_input:list[str]) -> str: 38 | # proxy function name 39 | proxy_function_name = f"realsmith_proxy_{generate_random_string(5)}" 40 | proxy_args_var = [f"p_{idx}_{generate_random_string(5)}" for idx in range(len(input_func.args_type))] 41 | 42 | proxy_args_type = [] 43 | for input_arg_type in input_func.args_type: 44 | # use base type of the original type if it is a base type or a pointer of base type 45 | if not VarType.is_unsupport_type(input_arg_type): 46 | proxy_args_type.append(VarType.get_base_type(input_arg_type)) 47 | # use a random type otherwise 48 | else: 49 | raise ValueError(f"Not supported yet {VarType.to_str(input_arg_type)}") 50 | proxy_args_str = ", ".join([f"{VarType.to_str(proxy_args_type[idx])} {proxy_args_var[idx]}" if proxy_args_type[idx] != VarType.VOID else "" for idx in range(len(proxy_args_var))]) 51 | 52 | pre_call_to_input_func = [] 53 | call_args_to_input_func = [] 54 | post_call_to_input_func = [] 55 | 56 | for idx in range(len(input_func.args_type)): 57 | # we do not need to call to function with void parameter. 58 | if proxy_args_type[idx] == VarType.VOID: 59 | continue 60 | # base type or a pointer of base type 61 | if proxy_args_type[idx] == input_func.args_type[idx]: 62 | call_args_to_input_func.append(proxy_args_var[idx]) 63 | # pointer 64 | elif proxy_args_type[idx] == VarType.get_base_type(input_func.args_type[idx]): 65 | proxy_args_idx_base_type = VarType.get_base_type(proxy_args_type[idx]) 66 | match random.choice(['pointer', 'array']): 67 | case 'pointer': 68 | # call arg 69 | call_args_to_input_func.append(f"&({proxy_args_var[idx]})") 70 | case 'array': 71 | proxy_var_arr = f"proxy_{generate_random_string(5)}" 72 | # randomly decide arrat length, use [10, 20) 73 | arr_len = random.randint(10, 20) 74 | # initialize array 75 | arr_values = [] 76 | for _ in range(arr_len): 77 | arr_values.append(random.choice([f"{proxy_args_var[idx]}", str(VarType.get_random_value(proxy_args_idx_base_type))])) 78 | arr_init = ', '.join(arr_values) 79 | pre_call_to_input_func.append(f"{VarType.to_str(proxy_args_idx_base_type)} {proxy_var_arr}[{arr_len}] = {{ {arr_init} }};") 80 | # call arg 81 | call_args_to_input_func.append(proxy_var_arr) 82 | # unusual types such as pointer 83 | else: 84 | raise VarType(f"(TODO) Unsupported type, maybe a struct.") 85 | 86 | proxy_function_body = f"{VarType.to_str(VarType.get_base_type(input_func.return_type))} {proxy_function_name}({proxy_args_str}) {{\n" 87 | 88 | # pre call 89 | for pre_call in pre_call_to_input_func: 90 | proxy_function_body += f"{pre_call}\n" 91 | # call 92 | proxy_ret_var = f"proxy_ret_{generate_random_string(5)}" 93 | proxy_ret_type = input_func.return_type 94 | call_args_str = ', '.join(call_args_to_input_func) 95 | proxy_function_body += f"{VarType.to_str(proxy_ret_type)} {proxy_ret_var} = {input_func.call_name}({call_args_str});\n" 96 | # post call 97 | for post_call in post_call_to_input_func: 98 | proxy_function_body += f"{post_call}\n" 99 | # return 100 | if proxy_ret_type == VarType.get_base_type(proxy_ret_type): 101 | proxy_function_body += f"return {proxy_ret_var};\n" 102 | else: 103 | proxy_function_body += f"return *{proxy_ret_var};\n" 104 | proxy_ret_type = VarType.get_base_type(proxy_ret_type) 105 | 106 | proxy_function_body += "}\n" 107 | 108 | proxy_function = { 109 | "function_name": proxy_function_name, 110 | "parameter_types": proxy_args_type, 111 | "return_type": proxy_ret_type, 112 | "function": proxy_function_body 113 | } 114 | 115 | return Function(proxy_function) 116 | 117 | 118 | def generate_proxy_function_expose_pointer(input_func:Function, synthesized_input:list[str]) -> str: 119 | # proxy function name 120 | proxy_function_name = f"realsmith_proxy_{generate_random_string(5)}" 121 | proxy_args_var = [f"p_{idx}_{generate_random_string(5)}" for idx in range(len(input_func.args_type))] 122 | 123 | proxy_args_type = [] 124 | for input_arg_type in input_func.args_type: 125 | # use original type if it is a base type or a pointer of base type 126 | if not VarType.is_unsupport_type(input_arg_type): 127 | proxy_args_type.append(input_arg_type) 128 | # use a random type otherwise 129 | else: 130 | proxy_args_type.append(VarType.get_random_type()) 131 | proxy_args_str = ", ".join([f"{VarType.to_str(proxy_args_type[idx])} {proxy_args_var[idx]}" for idx in range(len(proxy_args_var))]) 132 | 133 | pre_call_to_input_func = [] 134 | call_args_to_input_func = [] 135 | post_call_to_input_func = [] 136 | 137 | for idx in range(len(input_func.args_type)): 138 | # base type or a pointer of base type 139 | if proxy_args_type[idx] == input_func.args_type[idx]: 140 | # base type 141 | if proxy_args_type[idx] == VarType.get_base_type(proxy_args_type[idx]): 142 | call_args_to_input_func.append(proxy_args_var[idx]) 143 | # pointer 144 | else: 145 | proxy_args_idx_base_type = VarType.get_base_type(proxy_args_type[idx]) 146 | proxy_var = f"proxy_{generate_random_string(5)}" 147 | # save original pointed-to value 148 | pre_call_to_input_func.append(f"{VarType.to_str(proxy_args_idx_base_type)} {proxy_var} = *{proxy_args_var[idx]};") 149 | # mutate original value 150 | pre_call_to_input_func.append(f"*{proxy_args_var[idx]} = {synthesized_input[idx]};") 151 | match random.choice(['pointer', 'array']): 152 | case 'pointer': 153 | # call arg 154 | call_args_to_input_func.append(proxy_args_var[idx]) 155 | case 'array': 156 | proxy_var_arr = f"proxy_{generate_random_string(5)}" 157 | # randomly decide arrat length, use [10, 20) 158 | arr_len = random.randint(10, 20) 159 | # initialize array 160 | arr_values = [] 161 | for _ in range(arr_len): 162 | arr_values.append(random.choice([f"*{proxy_args_var[idx]}", str(VarType.get_random_value(proxy_args_idx_base_type))])) 163 | arr_init = ', '.join(arr_values) 164 | pre_call_to_input_func.append(f"{VarType.to_str(proxy_args_idx_base_type)} {proxy_var_arr}[{arr_len}] = {{ {arr_init} }};") 165 | # call arg 166 | call_args_to_input_func.append(proxy_var_arr) 167 | # recover original pointed-to value 168 | post_call_to_input_func.append(f"*{proxy_args_var[idx]} = {proxy_var};") 169 | # unusual types such as pointer 170 | else: 171 | raise VarType(f"(TODO) Unsupported type, maybe a struct.") 172 | 173 | proxy_function_body = f"{VarType.to_str(VarType.get_base_type(input_func.return_type))} {proxy_function_name}({proxy_args_str}) {{\n" 174 | 175 | # pre call 176 | for pre_call in pre_call_to_input_func: 177 | proxy_function_body += f"{pre_call}\n" 178 | # call 179 | proxy_ret_var = f"proxy_ret_{generate_random_string(5)}" 180 | proxy_ret_type = input_func.return_type 181 | call_args_str = ', '.join(call_args_to_input_func) 182 | proxy_function_body += f"{VarType.to_str(proxy_ret_type)} {proxy_ret_var} = {input_func.call_name}({call_args_str});\n" 183 | # post call 184 | for post_call in post_call_to_input_func: 185 | proxy_function_body += f"{post_call}\n" 186 | # return 187 | if proxy_ret_type == VarType.get_base_type(proxy_ret_type): 188 | proxy_function_body += f"return {proxy_ret_var};\n" 189 | else: 190 | proxy_function_body += f"return *{proxy_ret_var};\n" 191 | 192 | proxy_function_body += "}\n" 193 | 194 | proxy_function = { 195 | "function_name": proxy_function_name, 196 | "parameter_types": proxy_args_type, 197 | "return_type": proxy_ret_type, 198 | "function": proxy_function_body 199 | } 200 | 201 | return Function(proxy_function) 202 | 203 | DRIVER_FUNC = """ 204 | #include 205 | 206 | RealSmith_MISC_PLACEHOLDER 207 | 208 | RealSmith_FUNCTION_PLACEHOLDER 209 | 210 | PROXY_FUNCTION_PLACEHOLDER 211 | 212 | int main() { 213 | 214 | PRE_CALL_PLACEHOLDER 215 | 216 | long long ret = FUNCTION_CALL_PLACEHOLDER(CALL_ARGS); 217 | 218 | POST_CALL_PLACEHOLDER 219 | 220 | printf(\"ret=%lld\", ret); 221 | return 0; 222 | } 223 | """ 224 | 225 | def generate_closure_program(proxy_function:Function, input_function:Function, synthesized_intput:list[str]) -> tuple[str, Function]: 226 | synthesized_intput = deepcopy(synthesized_intput) 227 | RealSmith_MISC_PLACEHOLDER = '\n'.join(input_function.misc) 228 | RealSmith_FUNCTION_PLACEHOLDER = input_function.function_body 229 | if proxy_function.function_body == '': 230 | PROXY_FUNCTION_PLACEHOLDER = "" 231 | FUNCTION_CALL_PLACEHOLDER = input_function.call_name 232 | else: 233 | PROXY_FUNCTION_PLACEHOLDER = proxy_function.function_body 234 | FUNCTION_CALL_PLACEHOLDER = proxy_function.call_name 235 | 236 | PRE_CALL_PLACEHOLDER = "" 237 | pre_call_list = [] 238 | for idx in range(len(proxy_function.args_type)): 239 | arg_type = proxy_function.args_type[idx] 240 | if arg_type != VarType.get_base_type(arg_type): 241 | arg_var_base = f"arg_{generate_random_string(5)}" 242 | arg_var = f"arg_{generate_random_string(5)}" 243 | arg_var_value = VarType.get_random_value(VarType.get_base_type(arg_type)) 244 | pre_call_list.append(f"{VarType.to_str(VarType.get_base_type(arg_type))} {arg_var_base} = {arg_var_value}; {VarType.to_str(arg_type)} {arg_var} = &{arg_var_base};") 245 | synthesized_intput[idx] = arg_var 246 | PRE_CALL_PLACEHOLDER += "\n".join(pre_call_list) 247 | 248 | CALL_ARGS = ", ".join(list(map(str, synthesized_intput))) 249 | 250 | POST_CALL_PLACEHOLDER = "" 251 | # construct the program 252 | closure_program = DRIVER_FUNC\ 253 | .replace( 254 | "RealSmith_MISC_PLACEHOLDER", RealSmith_MISC_PLACEHOLDER)\ 255 | .replace( 256 | "RealSmith_FUNCTION_PLACEHOLDER", RealSmith_FUNCTION_PLACEHOLDER)\ 257 | .replace( 258 | "PROXY_FUNCTION_PLACEHOLDER", PROXY_FUNCTION_PLACEHOLDER)\ 259 | .replace( 260 | "PRE_CALL_PLACEHOLDER", PRE_CALL_PLACEHOLDER)\ 261 | .replace( 262 | "FUNCTION_CALL_PLACEHOLDER", FUNCTION_CALL_PLACEHOLDER)\ 263 | .replace( 264 | "CALL_ARGS", CALL_ARGS)\ 265 | .replace( 266 | "POST_CALL_PLACEHOLDER", POST_CALL_PLACEHOLDER) 267 | 268 | if proxy_function.function_body != '': 269 | proxy_function.function_body = f"{input_function.function_body}\n{proxy_function.function_body}" 270 | proxy_function.misc = input_function.misc 271 | new_function = proxy_function 272 | else: 273 | new_function = input_function 274 | return closure_program, new_function 275 | -------------------------------------------------------------------------------- /databaseconstructor/variable.py: -------------------------------------------------------------------------------- 1 | from enum import Enum, auto 2 | import random 3 | import ctypes 4 | 5 | 6 | class VarType(Enum): 7 | INT8 = auto() 8 | UINT8 = auto() 9 | INT16 = auto() 10 | UINT16 = auto() 11 | INT32 = auto() 12 | UINT32 = auto() 13 | INT64 = auto() 14 | UINT64 = auto() 15 | CHAR = auto() 16 | UCHAR = auto() 17 | VOID = auto() 18 | STRUCT = auto() 19 | '''pointers''' 20 | PTR_INT8 = auto() 21 | PTR_UINT8 = auto() 22 | PTR_INT16 = auto() 23 | PTR_UINT16 = auto() 24 | PTR_INT32 = auto() 25 | PTR_UINT32 = auto() 26 | PTR_INT64 = auto() 27 | PTR_UINT64 = auto() 28 | PTR_CHAR = auto() 29 | PTR_UCHAR = auto() 30 | PTR_VOID = auto() 31 | 32 | def __eq__(self, other_type): 33 | return self.value == other_type.value 34 | 35 | @staticmethod 36 | def from_str(type_str:str): 37 | type_str = type_str.strip() 38 | if "const" in type_str: 39 | Warning(f"{type_str} found. We now only ignore the \"const\".") 40 | type_str = type_str.replace("const", "").strip() 41 | for map_type in VAR_MAP: 42 | for map_type_str in map_type.type_str_list: 43 | if map_type_str == type_str: 44 | return VarType(map_type.vartype.value) 45 | if type_str.count("*") == 1: 46 | base_type = VarType.from_str(type_str.replace("*", "").strip()) 47 | for map_type in VAR_MAP: 48 | if map_type.vartype == base_type: 49 | return VarType(map_type.pointertype.value) 50 | # we now treat all other types as int 51 | return VarType.INT32 52 | # raise ValueError(f"{type_str} is not a valid variable type") 53 | 54 | @staticmethod 55 | def from_list(type_list): 56 | return [VarType.from_str(x) if not isinstance(x, VarType) else x for x in type_list] 57 | 58 | @staticmethod 59 | def get_base_type(var_type): 60 | for map_type in VAR_MAP: 61 | if map_type.pointertype == var_type: 62 | return VarType(map_type.vartype.value) 63 | if map_type.vartype == var_type: 64 | return VarType(map_type.vartype.value) 65 | raise ValueError("Unknown pointer type from VarType.get_pointer_base_type") 66 | 67 | @staticmethod 68 | def to_str(vartype): 69 | for map_type in VAR_MAP: 70 | if vartype == map_type.vartype: 71 | return map_type.type_str_list[0] 72 | if vartype == map_type.pointertype: 73 | return f"{map_type.type_str_list[0]} *" 74 | raise ValueError 75 | 76 | @staticmethod 77 | def get_range(var_type): 78 | for type_info in VAR_MAP: 79 | if type_info.vartype == var_type: 80 | return type_info.range_list[0], type_info.range_list[1] 81 | return -1, -1 82 | 83 | @staticmethod 84 | def get_random_value(var_type, given_min=None, given_max=None): 85 | """ 86 | In theory, we could generate any value sampled from VarType.get_range(var_type) 87 | But this would generate large values that can not be used by other smaller types. 88 | So we limit the range to [-100, 100] 89 | """ 90 | range_min, range_min = VarType.get_range(var_type) 91 | _min = -100 if not given_min else given_min 92 | _max = 100 if not given_min else given_max 93 | if range_min >= 0: 94 | _min = 0 95 | return random.randint(_min, _max) 96 | 97 | @staticmethod 98 | def is_unsupport_type(var_type) -> bool: 99 | """Now only struct is unsupported""" 100 | return var_type == VarType.STRUCT# or var_type == VarType.VOID 101 | 102 | @staticmethod 103 | def get_random_type(): 104 | """get a random VarType that is not VOID or STRUCT""" 105 | while True: 106 | rand_type = random.choice(list(VarType)) 107 | if rand_type != VarType.VOID and rand_type != VarType.STRUCT: 108 | return rand_type 109 | 110 | @staticmethod 111 | def get_ctypes(var_type, var_value=None): 112 | """Get a ctype object""" 113 | for map_type in VAR_MAP: 114 | if var_type == map_type.vartype: 115 | if var_value == None: 116 | return map_type.ctypes_conver_func 117 | else: 118 | return map_type.ctypes_conver_func(var_value) 119 | raise(f"Cannot get ctypes of {VarType.to_str(var_type)}.") 120 | 121 | @staticmethod 122 | def get_format(var_type): 123 | """Get the printf format for the type""" 124 | for map_type in VAR_MAP: 125 | if var_type == map_type.vartype: 126 | return map_type.fmt 127 | raise(f"Cannot get format of {VarType.to_str(var_type)}.") 128 | 129 | class TypeInfo: 130 | """ 131 | [base type, pointer type, list[type str], range, ctype convert function, printf format] 132 | """ 133 | def __init__(self, vartype:VarType, pointertype:VarType, type_str_list:list[str], range_list:list[int], ctypes_conver_func, fmt): 134 | self.vartype = vartype 135 | self.pointertype = pointertype 136 | self.type_str_list = type_str_list 137 | self.range_list = range_list 138 | self.ctypes_conver_func = ctypes_conver_func 139 | self.fmt = fmt 140 | 141 | VAR_MAP = [] 142 | VAR_MAP.append(TypeInfo(VarType.INT8, VarType.PTR_INT8, ["int8_t"], [-128, 127], ctypes.c_int8, "PRId8")) 143 | VAR_MAP.append(TypeInfo(VarType.UINT8, VarType.PTR_UINT8, ["uint8_t"], [0, 255], ctypes.c_uint8, "PRIu8")) 144 | VAR_MAP.append(TypeInfo(VarType.INT16, VarType.PTR_INT16, ["int16_t"], [-32768, 32767], ctypes.c_int16, "PRId16")) 145 | VAR_MAP.append(TypeInfo(VarType.UINT16, VarType.PTR_UINT16, ["uint16_t"], [0, 65535], ctypes.c_uint16, "PRIu16")) 146 | VAR_MAP.append(TypeInfo(VarType.INT32, VarType.PTR_INT32, ["int", "int32_t"], [-2147483648, 2147483647], ctypes.c_int32, "PRId32")) 147 | VAR_MAP.append(TypeInfo(VarType.UINT32, VarType.PTR_UINT32, ["unsigned int", "uint32_t"], [0, 4294967295], ctypes.c_uint32, "PRIu32")) 148 | VAR_MAP.append(TypeInfo(VarType.INT64, VarType.PTR_INT64, ["long", "int64_t"], [-9223372036854775808, 9223372036854775807], ctypes.c_int64, "PRId64")) 149 | VAR_MAP.append(TypeInfo(VarType.UINT64, VarType.PTR_UINT64, ["unsigned long", "uint64_t"], [0, 18446744073709551615], ctypes.c_uint64, "PRIu64")) 150 | VAR_MAP.append(TypeInfo(VarType.CHAR, VarType.PTR_CHAR, ["char"], [-128, 127], ctypes.c_int8, "PRId8")) 151 | VAR_MAP.append(TypeInfo(VarType.UCHAR, VarType.PTR_UCHAR, ["unsigned char"], [0, 255], ctypes.c_uint8, "PRIu8")) 152 | VAR_MAP.append(TypeInfo(VarType.VOID, VarType.PTR_VOID, ["void"], [0, 0], None, None)) 153 | 154 | 155 | def CAST_VAR(value:int, from_type:VarType, to_type:VarType) -> int: 156 | """ 157 | Casting the value from type 'from_type' to type 'to_type' 158 | """ 159 | for var_map in VAR_MAP: 160 | if var_map.vartype == to_type: 161 | return var_map.ctypes_conver_func(value) 162 | -------------------------------------------------------------------------------- /generate_mutants.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os, sys, shutil, re, time, tempfile, signal, random, string, argparse 3 | from datetime import datetime 4 | from glob import glob 5 | from enum import Enum, auto 6 | from diopter.compiler import ( 7 | CompilationSetting, 8 | CompilerExe, 9 | OptLevel, 10 | SourceProgram, 11 | Language, 12 | ObjectCompilationOutput 13 | ) 14 | from diopter.sanitizer import Sanitizer 15 | from diopter.utils import TempDirEnv 16 | import subprocess as sp 17 | from synthesizer.synthesizer import Synthesizer, SynthesizerError 18 | from utils.compcert import CComp as this_CComp 19 | from pathlib import Path 20 | 21 | DEBUG = 0 22 | """CONFIG""" 23 | FUNCTION_DB_FILE = os.path.join(os.path.dirname(__file__), './databaseconstructor/functions_pointer_global_io.json') 24 | MIN_PROGRAM_SIZE = 8000 # programs shorter than this many bytes are too boring to test 25 | NUM_MUTANTS = 10 # number of mutants generated by the synthesizer per seed. 26 | COMPILER_TIMEOUT = 200 27 | PROG_TIMEOUT = 10 28 | CCOMP_TIMEOUT = 60 # compcert timeout 29 | CSMITH_USER_OPTIONS = "--no-volatiles --no-volatile-pointers --no-unions" 30 | CSMITH_TIMEOUT = 20 31 | CREDUCE_JOBS = 1 32 | """TOOL""" 33 | CSMITH_HOME = os.environ["CSMITH_HOME"] 34 | CC = CompilationSetting( 35 | compiler=CompilerExe.get_system_gcc(), 36 | opt_level=OptLevel.O3, 37 | flags=("-march=native",f"-I{CSMITH_HOME}/include"), 38 | ) 39 | SAN_SAN = Sanitizer(checked_warnings=False, use_ccomp_if_available=False) # sanitizers only 40 | SAN_CCOMP = this_CComp.get_system_ccomp() # CompCert only 41 | 42 | """Global vars""" 43 | 44 | class CompCode(Enum): 45 | """Compile status 46 | """ 47 | OK = auto() # ok 48 | Timeout = auto() # timeout during compilation 49 | Sanfail = auto() # sanitization failed 50 | Crash = auto() # compiler crash 51 | Error = auto() # compiler error 52 | WrongEval= auto() # inconsistent results across compilers but consistent within the same compiler 53 | Wrong = auto() # inconsistent results across compilers/opts 54 | 55 | def generate_random_string(len:int=5) -> str: 56 | """Generate a random string of length len""" 57 | return ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for _ in range(len)) 58 | 59 | def run_cmd(cmd, timeout): 60 | if type(cmd) is not list: 61 | cmd = cmd.split(' ') 62 | cmd = list(filter(lambda x: x!='', cmd)) 63 | # Start the subprocess 64 | process = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE) 65 | # Wait for the subprocess to finish or timeout 66 | try: 67 | output, error = process.communicate(timeout=timeout) 68 | output = output.decode("utf-8") 69 | except sp.TimeoutExpired: 70 | # Timeout occurred, kill the process 71 | try: 72 | os.killpg(process.pid, signal.SIGTERM) 73 | except ProcessLookupError: 74 | pass 75 | finally: 76 | output = '' 77 | # A workaround to tmpxxx.exe as it sometimes escapes from os.killpg 78 | cmd_str = " ".join(cmd) 79 | time.sleep(2) 80 | if '.exe' in cmd_str: 81 | os.system(f"pkill -9 -f {cmd_str}") 82 | return 124, output 83 | 84 | # Return the exit code and stdout of the process 85 | return process.returncode, output 86 | 87 | def write_bug_desc_to_file(to_file, data): 88 | with open(to_file, "a") as f: 89 | f.write(f"/* {data} */\n") 90 | 91 | def read_checksum(data): 92 | res = re.findall(r'checksum = (.*)', data) 93 | if len(res) > 0: 94 | return res[0] 95 | return 'NO_CKSUM' 96 | 97 | def check_sanitizers(src): 98 | """Check validity with sanitizers""" 99 | with open(src, 'r') as f: 100 | code = f.read() 101 | prog = SourceProgram(code=code, language=Language.C) 102 | preprog = CC.preprocess_program(prog, make_compiler_agnostic=True) 103 | if DEBUG: 104 | print(datetime.now().strftime("%d/%m/%Y %H:%M:%S"), "SAN.sanitize", flush=True) 105 | if not SAN_SAN.sanitize(preprog): 106 | return False 107 | return True 108 | 109 | def check_ccomp(src, random_count=1): 110 | """ 111 | Check validity with CompCert. 112 | src:str -> source file 113 | random_count:int -> the number of times using ccomp -random for checking 114 | """ 115 | with open(src, 'r') as f: 116 | code = f.read() 117 | prog = SourceProgram(code=code, language=Language.C) 118 | preprog = CC.preprocess_program(prog, make_compiler_agnostic=True) 119 | if DEBUG: 120 | print(datetime.now().strftime("%d/%m/%Y %H:%M:%S"), "SAN.ccomp", flush=True) 121 | with TempDirEnv(): 122 | try: 123 | ccomp_result = SAN_CCOMP.check_program(preprog, timeout=CCOMP_TIMEOUT, debug=DEBUG) 124 | except sp.TimeoutExpired: 125 | return False 126 | if ccomp_result is False: 127 | return False 128 | with TempDirEnv(): 129 | for _ in range(random_count): 130 | try: 131 | ccomp_result_random = SAN_CCOMP.check_program(preprog, timeout=CCOMP_TIMEOUT, debug=DEBUG, additional_flags=["-random"]) 132 | except sp.TimeoutExpired: 133 | return False 134 | if ccomp_result_random is False: 135 | return False 136 | # check for unspecified behavior 137 | if ccomp_result.stdout != ccomp_result_random.stdout: 138 | return False 139 | return True 140 | 141 | def compile_and_run(compiler, src): 142 | cksum = '' 143 | tmp_f = tempfile.NamedTemporaryFile(suffix=".exe", delete=False) 144 | tmp_f.close() 145 | exe = tmp_f.name 146 | cmd = f"{compiler} {src} -I{CSMITH_HOME}/include -o {exe}" 147 | ret, out = run_cmd(cmd, COMPILER_TIMEOUT) 148 | if ret == 124: # another compile chance when timeout 149 | time.sleep(1) 150 | ret, out = run_cmd(cmd, COMPILER_TIMEOUT) 151 | if ret == 124: # we treat timeout as crash now. 152 | write_bug_desc_to_file(src, f"Compiler timeout! Can't compile with {compiler}") 153 | if os.path.exists(exe): os.remove(exe) 154 | return CompCode.Timeout, cksum 155 | if ret != 0: 156 | write_bug_desc_to_file(src, f"Compiler crash! Can't compile with {compiler}") 157 | if os.path.exists(exe): os.remove(exe) 158 | return CompCode.Crash, cksum 159 | ret, out = run_cmd(f"{exe}", PROG_TIMEOUT) 160 | cksum = read_checksum(out) 161 | write_bug_desc_to_file(src, f"EXITof {compiler}: {ret}") 162 | write_bug_desc_to_file(src, f"CKSMof {compiler}: {cksum}") 163 | if os.path.exists(exe): os.remove(exe) 164 | return CompCode.OK, cksum 165 | 166 | def check_compile(src:str, compilers:list) -> CompCode: 167 | """Compile the program with a list of compilers and check their status 168 | """ 169 | cksum_list = [] 170 | for comp in compilers: 171 | if DEBUG: 172 | print(datetime.now().strftime("%d/%m/%Y %H:%M:%S"), "compiler_and_run: ", comp, flush=True) 173 | ret, cksum = compile_and_run(comp, src) 174 | if ret == CompCode.Crash: 175 | return CompCode.Crash 176 | if ret == CompCode.Timeout: 177 | return CompCode.Timeout 178 | if ret != CompCode.OK: 179 | return CompCode.Error 180 | cksum_list.append(cksum) 181 | if len(cksum_list) != len(compilers) or len(set(cksum_list)) != 1: 182 | maybe_WrongEval = True 183 | for i in range(len(compilers)): 184 | for j in range(i+1, len(compilers)): 185 | if compilers[i].split(' ')[0] == compilers[j].split(' ')[0] and cksum_list[i] != cksum_list[j]: 186 | maybe_WrongEval = False 187 | if maybe_WrongEval: 188 | return CompCode.WrongEval 189 | return CompCode.Wrong 190 | return CompCode.OK 191 | 192 | def run_one(seed: str, compilers:list[str], dst_dir:Path, SYNER:Synthesizer, succ_file_id:str) -> Path | None: 193 | """Run compiler testing 194 | """ 195 | save_realsmith_dir = (dst_dir) 196 | 197 | src_file = tempfile.NamedTemporaryFile(suffix='.c', delete=False) 198 | src_file.close() 199 | src = src_file.name 200 | shutil.copy(seed, src) 201 | 202 | # synthesize 203 | try: 204 | syn_files = SYNER.synthesizer(src_filename=src, num_mutant=NUM_MUTANTS, DEBUG=DEBUG) 205 | except: 206 | print('SynthesizerError') 207 | os.remove(src) 208 | return 0 209 | for syn_i, syn_f in enumerate(syn_files): 210 | shutil.copy(syn_f, save_realsmith_dir / f"{succ_file_id}_syn{syn_i}.c") 211 | 212 | for syn_f in syn_files: 213 | os.remove(syn_f) 214 | os.remove(src) 215 | return 1 216 | 217 | 218 | if __name__=='__main__': 219 | parser = argparse.ArgumentParser(description="Generate a number of realsmith mutants for evaluation.") 220 | parser.add_argument("--seed", required=True, type=Path, help="the seed.") 221 | parser.add_argument("--dst", required=True, type=Path, help="Destination directory for generated seeds.") 222 | parser.add_argument("--syn-prob", required=True, type=int, help="Synthesis probability") 223 | parser.add_argument("--num-mutants", required=True, type=int, help="The number of mutants per seed by realsmith") 224 | args = parser.parse_args() 225 | 226 | dst_dir = Path(args.dst) 227 | dst_dir.mkdir(parents=True, exist_ok=True) 228 | 229 | NUM_MUTANTS = args.num_mutants 230 | 231 | compilers = [ 232 | "gcc -O0", 233 | "clang -O0" 234 | ] 235 | SYNER = Synthesizer(func_database=FUNCTION_DB_FILE, prob=args.syn_prob) 236 | with TempDirEnv() as tmp_dir: 237 | os.environ['TMPDIR'] = tmp_dir.absolute().as_posix() 238 | total = 0 239 | ret = run_one(str(args.seed), compilers, dst_dir, SYNER, args.seed.stem) 240 | -------------------------------------------------------------------------------- /profiler/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.19) 2 | project(Profiler LANGUAGES CXX C) 3 | 4 | find_package(LLVM REQUIRED CONFIG) 5 | 6 | message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") 7 | message(STATUS "Using LLVMConfig.cmake in ${LLVM_DIR}") 8 | 9 | list(APPEND CMAKE_MODULE_PATH ${LLVM_CMAKE_DIR}) 10 | 11 | find_package(Clang REQUIRED CONFIG) 12 | message(STATUS "Using ClangConfig.cmake in ${Clang_DIR}") 13 | list(APPEND CMAKE_MODULE_PATH ${CLANG_CMAKE_DIR}) 14 | include(${CLANG_CMAKE_DIR}/AddClang.cmake) 15 | 16 | if("${LLVM_VERSION_MAJOR}" VERSION_LESS 13) 17 | message(FATAL_ERROR "Found LLVM ${LLVM_VERSION_MAJOR}, but need LLVM >= 13") 18 | endif() 19 | 20 | 21 | if (NOT CMAKE_BUILD_TYPE) 22 | set(CMAKE_BUILD_TYPE Release) 23 | endif () 24 | 25 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wpedantic -march=native") 26 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -w") 27 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -w") 28 | set(CMAKE_CXX_STANDARD 17) 29 | 30 | if(${LLVM_REQUIRES_RTTI}) 31 | else() 32 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti") 33 | endif(${LLVM_REQUIRES_RTTI}) 34 | 35 | separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS}) 36 | add_definitions(${LLVM_DEFINITIONS_LIST}) 37 | 38 | if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") 39 | add_compile_options (-fdiagnostics-color=always) 40 | elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") 41 | add_compile_options (-fcolor-diagnostics) 42 | endif () 43 | 44 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) 45 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) 46 | 47 | add_subdirectory(src) 48 | -------------------------------------------------------------------------------- /profiler/src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(ProfilerLib 2 | RuleActionCallback.cpp 3 | ProfilerEntry.cpp 4 | GlobalMacro.cpp 5 | TagExpression.cpp 6 | ) 7 | target_include_directories(ProfilerLib PUBLIC ${CLANG_INCLUDE_DIRS} ${LLVM_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}) 8 | 9 | if(CLANG_LINK_CLANG_DYLIB) 10 | target_link_libraries(ProfilerLib PUBLIC LLVM) 11 | clang_target_link_libraries(ProfilerLib PUBLIC) 12 | else() 13 | llvm_map_components_to_libnames(llvm_libs support core) 14 | target_link_libraries(ProfilerLib PUBLIC ${llvm_libs} 15 | clangASTMatchers 16 | clangTransformer 17 | clangTooling) 18 | endif(CLANG_LINK_CLANG_DYLIB) 19 | 20 | add_subdirectory(tool) 21 | -------------------------------------------------------------------------------- /profiler/src/GlobalMacro.cpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "TagExpression.hpp" 4 | #include "GlobalMacro.hpp" 5 | 6 | using namespace std; 7 | 8 | namespace globalmacro { 9 | 10 | std::map TypeToFormat = { 11 | {"char", "PRId8"}, {"unsignedchar", "PRIu8"}, 12 | {"short", "PRId16"}, {"unsignedshort", "PRIu16"}, 13 | {"shortint", "PRId16"}, {"unsignedshortint", "PRIu16"}, 14 | {"int", "PRId32"}, {"unsignedint", "PRIu32"}, 15 | {"int8_t", "PRId8"}, {"uint8_t", "PRIu8"}, 16 | {"int16_t", "PRId16"}, {"uint16_t", "PRIu16"}, 17 | {"int32_t", "PRId32"}, {"uint32_t", "PRIu32"}, 18 | {"int64_t", "PRId64"}, {"uint64_t", "PRIu64"}, 19 | }; 20 | 21 | class AddGlobalMacro : public MatchComputation { 22 | public: 23 | AddGlobalMacro() = default; 24 | llvm::Error eval(const ast_matchers::MatchFinder::MatchResult &, 25 | std::string *Result) const override { 26 | Result->append("#include\n"); 27 | Result->append("#include\n"); 28 | /*Add macros for TagExpression*/ 29 | for (auto tag = tagexpression::Tags.begin(); tag != tagexpression::Tags.end(); ++tag) { 30 | auto tag_id = tag->first; 31 | auto tag_type = tag->second; 32 | if (tag_id == 0) { 33 | continue; 34 | } 35 | std:string tag_id_str = std::to_string(tag_id); 36 | Result->append( 37 | "#define Tag" + tag_id_str + "(x) (x)\n" 38 | ); 39 | } 40 | return llvm::Error::success(); 41 | } 42 | 43 | std::string toString() const override { 44 | return "AddGlobalMacroError\n"; 45 | } 46 | }; 47 | 48 | struct clang::transformer::RewriteRule AddGlobalMacroRule() { 49 | return makeRule(functionDecl( 50 | isExpansionInMainFile(), 51 | isMain() 52 | ).bind("main"), 53 | insertAfter(ruleactioncallback::startOfFile("main"), 54 | std::make_unique())); 55 | } 56 | 57 | 58 | } //namespace globalmacro -------------------------------------------------------------------------------- /profiler/src/GlobalMacro.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ProfilerEntry.hpp" 4 | 5 | namespace globalmacro { 6 | 7 | struct clang::transformer::RewriteRule AddGlobalMacroRule(void); 8 | 9 | } -------------------------------------------------------------------------------- /profiler/src/ProfilerEntry.cpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "ProfilerEntry.hpp" 8 | 9 | using namespace tagexpression; 10 | using namespace globalmacro; 11 | 12 | namespace profiler { 13 | 14 | ProfilerEntry::ProfilerEntry( 15 | std::map &FileToReplacements, ToolMode mode) 16 | : FileToReplacements{FileToReplacements} { 17 | switch (mode) 18 | { 19 | case ToolMode::Expression: 20 | Callbacks.emplace_back(ruleactioncallback::RuleActionCallback{ 21 | TagExpressionRule(), FileToReplacements, FileToNumberValueTrackers}); 22 | break; 23 | case ToolMode::Statement: 24 | Callbacks.emplace_back(ruleactioncallback::RuleActionCallback{ 25 | TagStatementRule(), FileToReplacements, FileToNumberValueTrackers}); 26 | break; 27 | case ToolMode::All: 28 | Callbacks.emplace_back(ruleactioncallback::RuleActionCallback{ 29 | TagExpressionRule(), FileToReplacements, FileToNumberValueTrackers}); 30 | Callbacks.emplace_back(ruleactioncallback::RuleActionCallback{ 31 | TagStatementRule(), FileToReplacements, FileToNumberValueTrackers}); 32 | break; 33 | default: 34 | break; 35 | } 36 | Callbacks.emplace_back(ruleactioncallback::RuleActionCallback{ 37 | AddGlobalMacroRule(), FileToReplacements, FileToNumberValueTrackers}); 38 | 39 | } 40 | 41 | void ProfilerEntry::registerMatchers(clang::ast_matchers::MatchFinder &Finder) { 42 | for (auto &Callback : Callbacks) 43 | Callback.registerMatchers(Finder); 44 | } 45 | 46 | } //namespace profiler 47 | -------------------------------------------------------------------------------- /profiler/src/ProfilerEntry.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "RuleActionCallback.hpp" 15 | #include "TagExpression.hpp" 16 | #include "GlobalMacro.hpp" 17 | 18 | namespace profiler { 19 | 20 | enum class ToolMode { 21 | Expression, 22 | Statement, 23 | All 24 | }; 25 | 26 | class ProfilerEntry { 27 | public: 28 | ProfilerEntry(std::map &FileToReplacements, ToolMode mode); 29 | ProfilerEntry(const ProfilerEntry &) = delete; 30 | ProfilerEntry(ProfilerEntry &&) = delete; 31 | 32 | void registerMatchers(clang::ast_matchers::MatchFinder &Finder); 33 | 34 | 35 | private: 36 | std::map &FileToReplacements; 37 | std::vector Callbacks; 38 | std::map FileToNumberValueTrackers; 39 | }; 40 | } // namespace profiler -------------------------------------------------------------------------------- /profiler/src/RuleActionCallback.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "RuleActionCallback.hpp" 18 | 19 | using namespace clang; 20 | using namespace ast_matchers; 21 | using namespace transformer; 22 | 23 | namespace ruleactioncallback { 24 | std::string GetFilenameFromRange(const CharSourceRange &R, 25 | const SourceManager &SM) { 26 | const std::pair DecomposedLocation = 27 | SM.getDecomposedLoc(SM.getSpellingLoc(R.getBegin())); 28 | const FileEntry *Entry = SM.getFileEntryForID(DecomposedLocation.first); 29 | return std::string(Entry ? Entry->getName() : ""); 30 | 31 | } 32 | 33 | Expected getNode(const ast_matchers::BoundNodes &Nodes, 34 | StringRef ID) { 35 | auto &NodesMap = Nodes.getMap(); 36 | auto It = NodesMap.find(ID); 37 | if (It == NodesMap.end()) 38 | return llvm::make_error(llvm::errc::invalid_argument, 39 | ID + "not bound"); 40 | return It->second; 41 | } 42 | 43 | RangeSelector startOfFile(std::string ID) { 44 | return [ID](const clang::ast_matchers::MatchFinder::MatchResult &Result) 45 | -> Expected { 46 | auto Node = getNode(Result.Nodes, ID); 47 | if (!Node) 48 | return Node.takeError(); 49 | const auto &SM = Result.Context->getSourceManager(); 50 | auto Start = SM.getLocForStartOfFile( 51 | SM.getFileID(Node->getSourceRange().getBegin())); 52 | return CharSourceRange(SourceRange(Start), false); 53 | }; 54 | } 55 | 56 | }//namespace: ruleactioncallback 57 | 58 | ruleactioncallback::RuleActionCallback::RuleActionCallback( 59 | RewriteRule Rule, 60 | std::map &FileToReplacements, 61 | std::map &FileToNumberValueTrackers) 62 | : Rule{Rule}, FileToReplacements{FileToReplacements}, 63 | FileToNumberValueTrackers{FileToNumberValueTrackers} {} 64 | 65 | void ruleactioncallback::RuleActionCallback::run( 66 | const clang::ast_matchers::MatchFinder::MatchResult &Result) { 67 | 68 | 69 | if (Result.Context->getDiagnostics().hasErrorOccurred()) { 70 | llvm::errs() << "An error has occured.\n"; 71 | return; 72 | } 73 | Expected> Edits = 74 | transformer::detail::findSelectedCase(Result, Rule).Edits(Result); 75 | if (!Edits) { 76 | llvm::errs() << "Rewrite failed: " << llvm::toString(Edits.takeError()) 77 | << "\n"; 78 | return; 79 | } 80 | auto SM = Result.SourceManager; 81 | for (const auto &T : *Edits) { 82 | assert(T.Kind == transformer::EditKind::Range); 83 | auto FilePath = GetFilenameFromRange(T.Range, *SM); 84 | auto N = FileToNumberValueTrackers[FilePath]++; 85 | auto R = tooling::Replacement( 86 | *SM, T.Range, T.Replacement); 87 | auto &Replacements = FileToReplacements[FilePath]; 88 | auto Err = Replacements.add(R); 89 | if (Err) { 90 | auto NewOffset = Replacements.getShiftedCodePosition(R.getOffset()); 91 | auto NewLength = Replacements.getShiftedCodePosition( 92 | R.getOffset() + R.getLength()) - 93 | NewOffset; 94 | if (NewLength == R.getLength()) { 95 | R = clang::tooling::Replacement(R.getFilePath(), NewOffset, 96 | NewLength, 97 | R.getReplacementText()); 98 | Replacements = Replacements.merge(tooling::Replacements(R)); 99 | } else { 100 | llvm_unreachable(llvm::toString(std::move(Err)).c_str()); 101 | } 102 | } 103 | } 104 | 105 | } 106 | 107 | void ruleactioncallback::RuleActionCallback::registerMatchers( 108 | clang::ast_matchers::MatchFinder &Finder) { 109 | for (auto &Matcher : transformer::detail::buildMatchers(Rule)) 110 | Finder.addDynamicMatcher( 111 | Matcher.withTraversalKind(clang::TK_IgnoreUnlessSpelledInSource), 112 | this); 113 | } 114 | -------------------------------------------------------------------------------- /profiler/src/RuleActionCallback.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | using namespace clang; 18 | using namespace ast_matchers; 19 | using namespace transformer; 20 | 21 | namespace ruleactioncallback { 22 | 23 | class RuleActionCallback 24 | : public clang::ast_matchers::MatchFinder::MatchCallback { 25 | public: 26 | RuleActionCallback( 27 | clang::transformer::RewriteRule Rule, 28 | std::map &FileToReplacements, 29 | std::map &FileToNumberValueTrackers); 30 | void 31 | run(const clang::ast_matchers::MatchFinder::MatchResult &Result) override; 32 | void registerMatchers(clang::ast_matchers::MatchFinder &Finder); 33 | std::string getFunctionAsText(const clang::Decl *F, 34 | const clang::SourceManager &SM, const clang::LangOptions &lp); 35 | 36 | private: 37 | clang::transformer::RewriteRule Rule; 38 | std::map &FileToReplacements; 39 | std::map &FileToNumberValueTrackers; 40 | }; 41 | 42 | std::string GetFilenameFromRange(const CharSourceRange &R, const SourceManager &SM); 43 | RangeSelector startOfFile(std::string ID); 44 | 45 | } 46 | -------------------------------------------------------------------------------- /profiler/src/TagExpression.cpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "TagExpression.hpp" 4 | 5 | namespace tagexpression { 6 | 7 | std::map Tags{{0, ""}}; 8 | 9 | std::list> StmtVars; 10 | 11 | class TagExpressionAction : public MatchComputation { 12 | public: 13 | TagExpressionAction() = default; 14 | llvm::Error eval(const ast_matchers::MatchFinder::MatchResult &mResult, 15 | std::string *Result) const override { 16 | // get declaration of expr 17 | const VarDecl *decl = mResult.Nodes.getNodeAs("decl"); 18 | std::string var_storage = "1"; //local 19 | if (decl->hasGlobalStorage()) { 20 | var_storage = "0";//global 21 | } 22 | 23 | const Expr *expr = mResult.Nodes.getNodeAs("expr"); 24 | std::string typeStr = expr->getType().getAsString(); 25 | if (typeStr.find("struct") != -1 || typeStr.find("union") != -1) { 26 | return llvm::Error::success(); 27 | } 28 | 29 | std::string exprStr = getExprAsText(expr, *mResult.SourceManager, mResult.Context->getLangOpts()); 30 | 31 | // if this is a statement tag 32 | const Stmt *stmt = mResult.Nodes.getNodeAs("stmt"); 33 | std::string stmt_curr = "0"; 34 | if (stmt) { 35 | int64_t stmt_id = stmt->getID(*mResult.Context); 36 | stmt_curr = std::to_string(stmt_id); 37 | } 38 | 39 | std::string tag_style = "e"; 40 | 41 | //debug 42 | std::string scope_current = var_storage; 43 | const Stmt *scopeCurrent = mResult.Nodes.getNodeAs("scope_curr"); 44 | if (scopeCurrent) { 45 | scope_current = std::to_string(scopeCurrent->getID(*mResult.Context)); 46 | } 47 | std::string scope_parent = scope_current; 48 | const Stmt *scopeParent = mResult.Nodes.getNodeAs("scope_parent"); 49 | if (scopeParent) { 50 | scope_parent = std::to_string(scopeParent->getID(*mResult.Context)); 51 | } 52 | 53 | // generate tag 54 | int tag_id = Tags.rbegin()->first; 55 | tag_id++; 56 | std::string tagStr = "Tag" + std::to_string(tag_id) + 57 | "(" + "/*" + typeStr + ":" + scope_current + ":" + scope_parent + ":" + stmt_curr + ":" + tag_style + "*/"; 58 | Result->append(tagStr); 59 | // for statemtent tag, append more 60 | // if (stmt) { 61 | // Result->append(exprStr + ")/*s*/;\n"); 62 | // } 63 | 64 | //replace const keyword in typeStr 65 | size_t pos_const = typeStr.find("const"); 66 | if (pos_const != std::string::npos) { 67 | typeStr.replace(pos_const, std::strlen("const"), ""); 68 | } 69 | Tags.insert({tag_id, typeStr}); 70 | return llvm::Error::success(); 71 | } 72 | std::string toString() const override { return "{}"; } 73 | static std::string getExprAsText(const Expr *E, const SourceManager &SM, const LangOptions &lp) { 74 | auto SR = CharSourceRange::getTokenRange(E->getSourceRange()); 75 | return Lexer::getSourceText(SR, SM, lp).str(); 76 | } 77 | }; 78 | 79 | 80 | class TagStatementAction : public MatchComputation { 81 | public: 82 | TagStatementAction() = default; 83 | llvm::Error eval(const ast_matchers::MatchFinder::MatchResult &mResult, 84 | std::string *Result) const override { 85 | const Stmt *stmt = mResult.Nodes.getNodeAs("stmt"); 86 | std::string stmt_curr = "0"; 87 | if (stmt) { 88 | int64_t stmt_id = stmt->getID(*mResult.Context); 89 | stmt_curr = std::to_string(stmt_id); 90 | } 91 | 92 | Result->append(stmt_curr); 93 | 94 | return llvm::Error::success(); 95 | } 96 | std::string toString() const override { return "{}"; } 97 | }; 98 | 99 | auto scopeMatcher = anyOf( 100 | hasAncestor( 101 | stmt(hasParent(compoundStmt( 102 | anyOf( 103 | hasAncestor(stmt(hasParent(compoundStmt().bind("scope_parent")))), 104 | hasParent(functionDecl()) 105 | ) 106 | ).bind("scope_curr"))) 107 | ), 108 | hasAncestor(functionDecl(hasDescendant(compoundStmt().bind("scope_curr")))), // function args 109 | hasAncestor(translationUnitDecl()) // global vars 110 | ); 111 | 112 | auto matcher = expr( 113 | expr().bind("expr"), 114 | anyOf( 115 | declRefExpr( 116 | hasDeclaration(decl(scopeMatcher).bind("decl")), 117 | unless(hasAncestor(memberExpr())) 118 | ), 119 | unaryOperator(hasOperatorName("*"), hasDescendant(declRefExpr(hasDeclaration(decl(scopeMatcher).bind("decl"))))), 120 | arraySubscriptExpr( 121 | hasBase(hasDescendant(declRefExpr(hasDeclaration(decl(scopeMatcher).bind("decl"))).bind("arrbase"))), 122 | unless(hasDescendant(declRefExpr(unless(equalsBoundNode("arrbase"))))), 123 | unless(hasAncestor(memberExpr())) 124 | ), 125 | memberExpr((hasDescendant(declRefExpr(hasDeclaration(decl(scopeMatcher).bind("decl")))))) 126 | ), 127 | isExpansionInMainFile(), 128 | hasType(isInteger()), 129 | hasAncestor(compoundStmt()), 130 | unless(hasAncestor(functionDecl(isMain()))), 131 | // unless(hasParent(memberExpr())), // hack to avoid member expressions 132 | unless(hasAncestor(binaryOperator(isAssignmentOperator(), 133 | hasLHS(ignoringParenImpCasts(equalsBoundNode("expr")))))), 134 | unless(hasAncestor(unaryOperator(hasOperatorName("&")))), 135 | unless(hasAncestor( 136 | unaryOperator(hasAnyOperatorName("++", "--")))), 137 | unless(hasDescendant(unaryOperator(hasAnyOperatorName("++", "--")))), 138 | hasAncestor(stmt(hasParent(compoundStmt())).bind("stmt")) 139 | ); 140 | 141 | auto statementMatcher = expr( 142 | expr().bind("expr"), 143 | // declRefExpr(), //anyOf(declRefExpr(), integerLiteral(), characterLiteral(), floatLiteral()), 144 | anyOf( 145 | declRefExpr(hasDeclaration(decl().bind("decl"))), 146 | unaryOperator(hasOperatorName("*"), hasDescendant(declRefExpr(hasDeclaration(decl().bind("decl"))))), 147 | arraySubscriptExpr( 148 | hasBase(hasDescendant(declRefExpr(hasDeclaration(decl().bind("decl"))).bind("arrbase"))) 149 | ) 150 | ), 151 | isExpansionInMainFile(), 152 | anyOf( 153 | hasType(asString("int")), hasType(asString("const int")), 154 | hasType(asString("unsigned int")), hasType(asString("const unsigned int")), 155 | hasType(asString("long")), hasType(asString("const long")), 156 | hasType(asString("char")), hasType(asString("const char")), 157 | hasType(asString("int8_t")), hasType(asString("const int8_t")), 158 | hasType(asString("uint8_t")), hasType(asString("const uint8_t")), 159 | hasType(asString("int16_t")), hasType(asString("const int16_t")), 160 | hasType(asString("uint16_t")), hasType(asString("const uint16_t")), 161 | hasType(asString("int32_t")), hasType(asString("const int32_t")), 162 | hasType(asString("uint32_t")), hasType(asString("const uint32_t")), 163 | hasType(asString("int64_t")), hasType(asString("const int64_t")), 164 | hasType(asString("uint64_t")), hasType(asString("const uint64_t")) 165 | ), 166 | hasAncestor(compoundStmt()), 167 | unless(hasAncestor(functionDecl(isMain()))), 168 | unless(hasParent(memberExpr())), // hack to avoid member expressions 169 | unless(hasAncestor(binaryOperator(isAssignmentOperator(), 170 | hasLHS(ignoringParenImpCasts(equalsBoundNode("expr")))))), 171 | unless(hasAncestor(unaryOperator(hasOperatorName("&")))), 172 | unless(hasAncestor( 173 | unaryOperator(hasAnyOperatorName("++", "--")))), 174 | unless(hasDescendant(unaryOperator(hasAnyOperatorName("++", "--")))), 175 | hasAncestor(stmt(hasParent(compoundStmt( 176 | anyOf( 177 | hasParent(stmt(hasParent(compoundStmt().bind("scope1")))), 178 | hasParent(functionDecl()) 179 | ) 180 | ).bind("scope0"))).bind("stmt")), 181 | unless(hasAncestor(forStmt(hasLoopInit(hasDescendant(declRefExpr(hasDeclaration(equalsBoundNode("decl")))))))), // int i; for(i=0;i<1;i++) 182 | unless(hasAncestor(forStmt(hasLoopInit(hasDescendant(varDecl((equalsBoundNode("decl")))))))) // for(int i=0;i<1;i++) 183 | ); 184 | 185 | 186 | struct clang::transformer::RewriteRule TagExpressionRule() { 187 | return makeRule(matcher, { 188 | insertBefore(node("expr"), std::make_unique()), 189 | insertAfter(node("expr"), cat(")")), 190 | insertBefore(statement("stmt"), cat("/*bef_stmt:", std::make_unique(), "*/\n")), 191 | insertAfter(statement("stmt"), cat("\n/*aft_stmt:", std::make_unique(), "*/")) 192 | }); 193 | } 194 | 195 | struct clang::transformer::RewriteRule TagStatementRule() { 196 | return makeRule(statementMatcher, { 197 | insertBefore(node("stmt"), std::make_unique()), 198 | insertAfter(node("stmt"), cat(")")) 199 | }); 200 | } 201 | 202 | } //namespace tag_expression -------------------------------------------------------------------------------- /profiler/src/TagExpression.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ProfilerEntry.hpp" 4 | 5 | namespace tagexpression { 6 | 7 | extern std::map Tags; // 8 | 9 | struct clang::transformer::RewriteRule TagExpressionRule(); 10 | struct clang::transformer::RewriteRule TagStatementRule(); 11 | 12 | } -------------------------------------------------------------------------------- /profiler/src/tool/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(profiler Profiler.cpp) 2 | target_link_libraries(profiler PUBLIC ProfilerLib) 3 | install(TARGETS profiler DESTINATION bin) 4 | 5 | -------------------------------------------------------------------------------- /profiler/src/tool/Profiler.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "ProfilerEntry.hpp" 12 | 13 | using namespace llvm; 14 | using namespace clang; 15 | using namespace clang::tooling; 16 | using namespace clang::ast_matchers; 17 | 18 | namespace { 19 | 20 | cl::OptionCategory ToolOptions("options"); 21 | 22 | cl::opt 23 | Mode("mode", cl::desc("Profiling mode."), 24 | cl::values(clEnumValN(profiler::ToolMode::Expression, "expr", 25 | "Expression."), 26 | clEnumValN(profiler::ToolMode::Statement, "stmt", 27 | "Statement."), 28 | clEnumValN(profiler::ToolMode::All, "all", 29 | "Both expression and statement.") 30 | ), 31 | cl::init(profiler::ToolMode::Expression), 32 | cl::cat(ToolOptions)); 33 | 34 | bool applyReplacements(RefactoringTool &Tool) { 35 | LangOptions DefaultLangOptions; 36 | IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); 37 | clang::TextDiagnosticPrinter DiagnosticPrinter(errs(), &*DiagOpts); 38 | DiagnosticsEngine Diagnostics( 39 | IntrusiveRefCntPtr(new DiagnosticIDs()), &*DiagOpts, 40 | &DiagnosticPrinter, false); 41 | auto &FileMgr = Tool.getFiles(); 42 | SourceManager Sources(Diagnostics, FileMgr); 43 | 44 | Rewriter Rewrite(Sources, DefaultLangOptions); 45 | 46 | bool Result = true; 47 | for (const auto &FileAndReplaces : groupReplacementsByFile( 48 | Rewrite.getSourceMgr().getFileManager(), Tool.getReplacements())) { 49 | auto &CurReplaces = FileAndReplaces.second; 50 | 51 | Result = applyAllReplacements(CurReplaces, Rewrite) && Result; 52 | } 53 | if (!Result) { 54 | llvm::errs() << "Failed applying all replacements.\n"; 55 | return false; 56 | } 57 | 58 | return !Rewrite.overwriteChangedFiles(); 59 | } 60 | 61 | template int runToolOnCode(RefactoringTool &Tool) { 62 | InstrTool Instr(Tool.getReplacements(), Mode); 63 | ast_matchers::MatchFinder Finder; 64 | Instr.registerMatchers(Finder); 65 | std::unique_ptr Factory = 66 | tooling::newFrontendActionFactory(&Finder); 67 | 68 | auto Ret = Tool.run(Factory.get()); 69 | if (!Ret) 70 | if (!applyReplacements(Tool)) { 71 | llvm::errs() << "Failed to overwrite the input files.\n"; 72 | return 1; 73 | } 74 | 75 | return Ret; 76 | } 77 | 78 | 79 | } // namespace 80 | 81 | int main(int argc, const char **argv) { 82 | auto ExpectedParser = 83 | CommonOptionsParser::create(argc, argv, ToolOptions); 84 | if (!ExpectedParser) { 85 | llvm::errs() << ExpectedParser.takeError(); 86 | return 1; 87 | } 88 | CommonOptionsParser &OptionsParser = ExpectedParser.get(); 89 | 90 | const auto &Compilations = OptionsParser.getCompilations(); 91 | const auto &Files = OptionsParser.getSourcePathList(); 92 | RefactoringTool Tool(Compilations, Files); 93 | int Result = 0; 94 | Result = runToolOnCode(Tool); 95 | if (Result) { 96 | llvm::errs() << "Something went wrong...\n"; 97 | return Result; 98 | } 99 | return 0; 100 | } -------------------------------------------------------------------------------- /synthesizer/synthesizer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os, re, tempfile, sys, argparse, shutil 3 | from datetime import datetime 4 | from copy import deepcopy, copy 5 | import random 6 | import subprocess as sp 7 | import ctypes 8 | from enum import Enum, auto 9 | sys.path.append(os.path.join(os.path.dirname(__file__), "..")) 10 | from databaseconstructor.functioner import * 11 | from databaseconstructor.variable import * 12 | 13 | CC1 = "gcc" # use two compilers to avoid unspecified behavior 14 | CC2 = "clang" 15 | NUM_ENV = 5 # number of env variables used for each tag, "1" means one env_val, e.g., Tag1:tag_val:env_val 16 | PROFILER = f"{os.path.dirname(__file__)}/../profiler/build/bin/profiler --mode=expr" 17 | CSMITH_HOME = os.environ["CSMITH_HOME"] 18 | 19 | INVALID_TAG_VALUE = 111 # we use this value to indicate invalid tag values 20 | 21 | class CMD(Enum): 22 | OK = auto() 23 | Timeout = auto() 24 | Error = auto() 25 | 26 | class VarValue(Enum): 27 | STABLE = auto() # value has been the same 28 | UNSTABLE = auto() # value has been changing 29 | 30 | class Var: 31 | """Variable""" 32 | var_name:str # variable name 33 | var_type:str # variable type as string 34 | var_value:int # value 35 | is_stable:bool = True # if the variable values is stable, i.e., never changed or len(set(values))<=1. 36 | is_constant:bool=False # variable has "const" keyword 37 | is_global:bool=False # if the vairable has global storage 38 | scope_id:int=-1 # scope id of the variable 39 | 40 | class Tag: 41 | """Tag""" 42 | tag_id:int 43 | tag_str:str # the original tag string showsn in the source file 44 | tag_check_strs:list[str]=[] # inserted tag and tagcheck strings 45 | tag_var:Var # tagged variable 46 | tag_envs:list[Var] # env vairales 47 | statement_id:int # id of the statement that the Tag belongs to 48 | is_statement:bool = False # if this tag is a stand-alone statement 49 | 50 | class ScopeTree: 51 | def __init__(self, id:int) -> None: 52 | self.parent = None 53 | self.children = [] 54 | self.id = id 55 | 56 | def run_cmd(cmd, timeout=10, DEBUG=False): 57 | if type(cmd) is not list: 58 | cmd = cmd.split(' ') 59 | if DEBUG: 60 | print(datetime.now().strftime("%d/%m/%Y %H:%M:%S"), ">>run_cmd: \n", ' '.join(cmd), flush=True) 61 | ret, out = CMD.OK, '' 62 | try: 63 | process = sp.run(cmd, timeout=timeout, capture_output=True) 64 | if DEBUG: 65 | print(datetime.now().strftime("%d/%m/%Y %H:%M:%S"), ">>run_cmd: exit.", flush=True) 66 | out = process.stdout.decode("utf-8") 67 | if process.returncode != 0: 68 | ret = CMD.Error 69 | except sp.TimeoutExpired: 70 | if DEBUG: 71 | print(datetime.now().strftime("%d/%m/%Y %H:%M:%S"), ">>run_cmd: time out.", flush=True) 72 | ret = CMD.Timeout 73 | if DEBUG: 74 | print(datetime.now().strftime("%d/%m/%Y %H:%M:%S"), ">>run_cmd: done.", flush=True) 75 | return ret, out 76 | 77 | def strip_type_str(ori_type_str:str)->str: 78 | '''Strip type str to remove keywords like const, static, volatile, etc.''' 79 | return ori_type_str\ 80 | .replace('static', '')\ 81 | .replace('const', '')\ 82 | .replace('volatile', '')\ 83 | .strip() 84 | 85 | MAX_CONST_CCOMP = 4611686018427387904 # 2**62, CompCert cannot handle constant values larger than this 86 | 87 | class Synthesizer: 88 | def __init__(self, func_database:str, prob:int) -> None: 89 | assert 0 < prob <= 100 90 | self.prob = prob 91 | self.functionDB = FunctionDB(func_database) 92 | 93 | def static_analysis(self, src_file): 94 | """ 95 | Statically analyze the source file to (1) get tag_var_name for each tag 96 | and (2) get all variables' values and stabibility information. 97 | """ 98 | # get global/local information of each tag 99 | with open(src_file, "r") as f: 100 | code = f.read() 101 | static_tags = re.findall(r'(Tag(\d+)\(\/\*(.*?):(\w+):(\w+):(\w+):(\w+)\*\/(.*?)\))', code) 102 | self.tags = {} 103 | self.scope_up = {} # key:val ==> child_scope:parent_scope 104 | self.scope_down = {} # key:[val] ==> parent_scope:[child_scope(s)] 105 | for tag_info in static_tags: 106 | tag_str, tag_id, tag_type_str, scope_curr_id, scope_parent_id, stmt_id, tag_style, tag_var_name = tag_info[:] 107 | tag_id = int(tag_id) 108 | scope_curr_id = int(scope_curr_id) 109 | scope_parent_id = int(scope_parent_id) 110 | assert tag_id not in self.tags 111 | new_var = Var() 112 | new_var.scope_id = scope_curr_id 113 | new_var.is_constant = "const" in tag_type_str 114 | new_var.var_name = tag_var_name 115 | new_var.var_type = strip_type_str(tag_type_str) 116 | new_var.is_global = scope_curr_id == 0 117 | 118 | new_tag = Tag() 119 | new_tag.tag_str = tag_str 120 | new_tag.is_statement = tag_style == 's' 121 | new_tag.tag_var = new_var 122 | new_tag.tag_envs = [] 123 | new_tag.statement_id = int(stmt_id) 124 | self.tags[tag_id] = new_tag 125 | #construct scope_up tree 126 | if scope_curr_id not in self.scope_up: 127 | self.scope_up[scope_curr_id] = scope_parent_id 128 | else: 129 | assert self.scope_up[scope_curr_id] == scope_parent_id 130 | 131 | def valid_scope(self, from_scope:int, to_scope:int) -> bool: 132 | """Identify if we can access something in to_scope from from_scope""" 133 | if to_scope == 0: # global 134 | return True 135 | if from_scope == to_scope: 136 | return True 137 | child_scope = from_scope 138 | while True: 139 | if self.scope_up[child_scope] == to_scope: 140 | return True 141 | child_scope = self.scope_up[child_scope] 142 | if child_scope not in self.scope_up or child_scope == self.scope_up[child_scope]: 143 | break 144 | return False 145 | 146 | def get_envs(self, tag_id, env_num=1): 147 | """ 148 | Get env vars for the given tag 149 | """ 150 | curr_scope_id = self.tags[tag_id].tag_var.scope_id 151 | curr_tag_var_name = self.tags[tag_id].tag_var.var_name 152 | tag_id_list = list(self.tags.keys()) 153 | tag_index = tag_id_list.index(tag_id) 154 | MAX_STEP = 20 # search backward or forward for MAX_STEP tags 155 | envs = [] 156 | env_vars = [] 157 | # for k in range(max(0, tag_index-MAX_STEP), min(len(tag_id_list), tag_index+MAX_STEP)): # search both upward and downward 158 | for k in range(max(0, tag_index-MAX_STEP), tag_index): # search upward only to avoid use uninitialized variable 159 | env_tag_id = tag_id_list[k] 160 | if self.tags[env_tag_id].tag_var.var_name == curr_tag_var_name: 161 | continue 162 | #FIXME: this is a work around to avoid using uninitialized i,j,k in csmith generated prgrams 163 | if self.tags[env_tag_id].tag_var.var_name in ['i', 'j', 'k']: 164 | continue 165 | if self.valid_scope(from_scope=curr_scope_id, to_scope=self.tags[env_tag_id].tag_var.scope_id): 166 | if self.tags[env_tag_id].tag_var.var_name not in env_vars: 167 | envs.append(env_tag_id) 168 | env_vars.append(self.tags[env_tag_id].tag_var.var_name) 169 | random.shuffle(envs) 170 | return envs[:env_num] 171 | 172 | def construct_tag_def(self, tag_id:int, var_types:list[str]) -> str: 173 | """Construct Tag definition""" 174 | return_type = var_types[0] 175 | fmt_strs = "" 176 | for var_ty in var_types: 177 | fmt_strs += f':%"{VarType.get_format(VarType.from_str(var_ty))}"' 178 | v_para_strs = ",".join([f'v{var_i}' for var_i in range(len(var_types))]) 179 | print_tag = f'printf("Tag{tag_id}{fmt_strs}\\n", {v_para_strs});' 180 | var_defs = [] 181 | count_defs = [] 182 | last_defs = [] 183 | all_i_ones = [] 184 | last_assigns = [] 185 | else_ifs = [] 186 | for var_i, var_ty in enumerate(var_types): 187 | var_defs.append(f'{var_ty} v{var_i}') 188 | count_defs.append(f'static char i{var_i}=0;') 189 | last_defs.append(f'static {var_ty} last_v{var_i}=0;') 190 | all_i_ones.append(f'i{var_i}=1;') 191 | last_assigns.append(f'last_v{var_i}=v{var_i};') 192 | else_ifs.append(f'else if(i{var_i}==1&&v{var_i}!=last_v{var_i}){{{print_tag}i{var_i}=2;}}') 193 | 194 | tag_def = \ 195 | f'{return_type} Tag{tag_id}({",".join(var_defs)}){{ \ 196 | {" ".join(count_defs)} \ 197 | {" ".join(last_defs)} \ 198 | if (i0 == 0) {{ \ 199 | {"".join(all_i_ones)} \ 200 | {print_tag} \ 201 | {"".join(last_assigns)}\ 202 | }} \ 203 | {"".join(else_ifs)}\ 204 | return v0; \ 205 | }}' 206 | return tag_def 207 | 208 | def add_tags(self, src_file): 209 | """ 210 | Add Tags for later profiling 211 | """ 212 | with open(src_file, 'r') as f: 213 | src = f.read() 214 | for tag_id in self.tags: 215 | envs = self.get_envs(tag_id, env_num=NUM_ENV) 216 | for env_id in envs: 217 | self.tags[tag_id].tag_envs.append(deepcopy(self.tags[env_id].tag_var)) 218 | envs = [tag_id] + envs # add self as the first env 219 | envs_str = ','.join([self.tags[env_id].tag_var.var_name if '*' not in self.tags[env_id].tag_var.var_name else '&({var})==0?{invalid}:{var}'.format(var=self.tags[env_id].tag_var.var_name, invalid=INVALID_TAG_VALUE) for env_id in envs]) 220 | # place TagBefore check Call 221 | bef_tag_call = f'/*bef*/Tag{tag_id}({envs_str});' 222 | src = src.replace(f'/*bef_stmt:{self.tags[tag_id].statement_id}*/', bef_tag_call, 1) 223 | self.tags[tag_id].tag_check_strs.append(bef_tag_call+"\n") 224 | # place Tag call 225 | tag_call = f'/*tag*/Tag{tag_id}({envs_str})' 226 | src = src.replace(self.tags[tag_id].tag_str, tag_call, 1) 227 | # self.tags[tag_id].tag_str = tag_call 228 | # place TagAfter check Call 229 | aft_tag_call = f'/*aft*/Tag{tag_id}({envs_str});' 230 | src = src.replace(f'/*aft_stmt:{self.tags[tag_id].statement_id}*/', aft_tag_call, 1) 231 | self.tags[tag_id].tag_check_strs.append(aft_tag_call+"\n") 232 | # replace Tag declaration 233 | var_types = [] 234 | for env in envs: 235 | var_types.append(self.tags[env].tag_var.var_type) 236 | tag_defs = "\n" + self.construct_tag_def(tag_id, var_types) 237 | src = src.replace(f"#define Tag{tag_id}(x) (x)", tag_defs, 1) 238 | self.tags[tag_id].tag_check_strs.append(tag_defs + "\n") 239 | with open(src_file, 'w') as f: 240 | f.write(src) 241 | 242 | 243 | def profiling(self, filename): 244 | """ 245 | Instrument file with profiler; 246 | Run and collect values. 247 | """ 248 | # profiling 249 | ret, _ = run_cmd(f"{PROFILER} {filename} -- -I{CSMITH_HOME}/include", DEBUG=self.DEBUG) 250 | if ret != CMD.OK: 251 | raise SynthesizerError 252 | 253 | # further synthesis will be based on self.src_syn instead of self.src to avoid heavy removal of useless tags after synthesis. 254 | with open(filename, 'r') as f: 255 | self.src_syn_orig = f.read() 256 | 257 | self.static_analysis(filename) 258 | self.add_tags(filename) 259 | 260 | with tempfile.NamedTemporaryFile(suffix=".out", delete=True) as tmp_f: 261 | tmp_f.close() 262 | exe_out = tmp_f.name 263 | # run with CC1 264 | ret, _ = run_cmd(f"{CC1} -I{CSMITH_HOME}/include -w -O0 {filename} -o {exe_out}", DEBUG=self.DEBUG) 265 | if ret != CMD.OK: 266 | if os.path.exists(exe_out): 267 | os.remove(exe_out) 268 | raise SynthesizerError 269 | ret, profile_out_1 = run_cmd(exe_out, timeout=3, DEBUG=self.DEBUG) 270 | if ret != CMD.OK: 271 | os.remove(exe_out) 272 | raise SynthesizerError 273 | os.remove(exe_out) 274 | env_re_str = ":".join([':?([-|\d]+)?']*(NUM_ENV)) #@FIXME: no need to have exact NUM_ENV env vars here, now a temp fix is shown below and thus env_re_str is useless. 275 | 276 | raw_values_1 = [[item.split(':')[0].replace('Tag', '')]+[x for x in item.split(':')[1:] if x != ''] for item in profile_out_1.split() if 'Tag' in item] 277 | 278 | if self.DEBUG: 279 | print(datetime.now().strftime("%d/%m/%Y %H:%M:%S"), f">>length of raw_values: {len(raw_values_1)}", flush=True) 280 | # construct tags 281 | self.alive_tags = [] 282 | # get values and check stability with raw_values_1 283 | checked_tag_id = [] # all tag_id that have been checked. A tag's env is not stable if it has never been checked. 284 | for i in range(len(raw_values_1)): 285 | tag_info = raw_values_1[i] 286 | curr_tag_id = int(tag_info[0]) 287 | curr_num_env = len(tag_info) - 2 288 | curr_tag_var_value = int(tag_info[1]) 289 | curr_tag_env_value_list = [] if curr_num_env == 0 else list(map(int, tag_info[2:])) 290 | # Test the stability of the tag_var 291 | if hasattr(self.tags[curr_tag_id].tag_var, "var_value"): 292 | if curr_tag_var_value != self.tags[curr_tag_id].tag_var.var_value: 293 | self.tags[curr_tag_id].tag_var.is_stable = False 294 | else: 295 | self.tags[curr_tag_id].tag_var.var_value = curr_tag_var_value 296 | if curr_tag_var_value == INVALID_TAG_VALUE: # invalid tag value because of null pointer. should only in env vars 297 | self.tags[curr_tag_id].tag_var.is_stable = False 298 | # Test the stability of each env var 299 | for env_i in range(curr_num_env): 300 | if hasattr(self.tags[curr_tag_id].tag_envs[env_i], "var_value"): 301 | if curr_tag_env_value_list[env_i] != self.tags[curr_tag_id].tag_envs[env_i].var_value: 302 | self.tags[curr_tag_id].tag_envs[env_i].is_stable = False 303 | checked_tag_id.append(curr_tag_id) # if we are not assigning the value for the first time, the value is now checked. 304 | else: 305 | self.tags[curr_tag_id].tag_envs[env_i].var_value =curr_tag_env_value_list[env_i] 306 | if curr_tag_env_value_list[env_i] == INVALID_TAG_VALUE: # invalid tag value because of null pointer. should only in env vars 307 | self.tags[curr_tag_id].tag_envs[env_i].is_stable = False 308 | if curr_tag_id not in self.alive_tags: 309 | self.alive_tags.append(curr_tag_id) 310 | # all tag_id that have been checked. A tag's env is not stable if it has never been checked. 311 | for tag_id in self.alive_tags: 312 | if tag_id not in checked_tag_id: 313 | for env_i in range(len(self.tags[tag_id].tag_envs)): 314 | self.tags[tag_id].tag_envs[env_i].is_stable = False 315 | 316 | 317 | def remove_valuetag(self): 318 | """ 319 | Remove a ValueTag from source file 320 | """ 321 | for tag_id in self.tags: 322 | self.src = self.src.replace(f"#define Tag{tag_id}(x) (x)\n", "") 323 | if self.tags[tag_id].is_statement: 324 | self.src = self.src.replace(self.tags[tag_id].tag_str, '') 325 | else: 326 | self.src =self.src.replace(self.tags[tag_id].tag_str, self.tags[tag_id].tag_var.var_name) 327 | for tag_check_str in self.tags[tag_id].tag_check_strs: 328 | self.src = self.src.replace(tag_check_str, '') 329 | self.src = re.sub(r'[\w|_|\s|*]+ Tag\d+\(.*\)\{.*\}\n', '', self.src) 330 | 331 | def ignore_typedef(self, _typedef:str) -> bool: 332 | ignored_typedef = [ 333 | "int8_t", "uint8_t", "int16_t", "uint16_t", "int32_t", "uint32_t", "int64_t", "uint64_t", "char" 334 | ] 335 | for ignored in ignored_typedef: 336 | if f'{ignored};' in _typedef: 337 | return True 338 | return False 339 | 340 | def insert_func_decl(self, func_id_list): 341 | # locate the last header include 342 | headers = re.findall(r'(#include.*)', self.src_syn) 343 | if len(headers) == 0: 344 | header_end_loc = 0 345 | else: 346 | header_end_loc = self.src_syn.index(headers[-1]) + len(headers[-1]) 347 | # insert the function declaration 348 | for func_id in list(set(func_id_list)): 349 | for misc in self.functionDB[func_id].misc: 350 | if not self.ignore_typedef(misc): 351 | misc = "\n" + misc + "\n" 352 | self.src_syn = self.src_syn[:header_end_loc] + misc + self.src_syn[header_end_loc:] 353 | header_end_loc += len(misc) 354 | 355 | function_body = self.functionDB[func_id].function_body 356 | #FIXME: the added attribute may be incompatible with existing function attributes from the database. Can use this feature again if attributes are removed from the database. 357 | # prob_attr = random.randint(0, 100-1) 358 | # if prob_attr > 50: 359 | # function_body = "inline __attribute__((always_inline))\n" + function_body 360 | function_body = "\n" + function_body + "\n" 361 | self.src_syn = self.src_syn[:header_end_loc] + function_body + self.src_syn[header_end_loc:] 362 | header_end_loc += len(function_body) 363 | 364 | def synthesize_input(self, env_vars:list[Var], func_inp_list:list[str], func_inp_types:list[VarType]): 365 | """Synthesize input to the target function call with environmental variables""" 366 | new_inp_list = [] 367 | for inp_i in range(len(func_inp_list)): 368 | inp_value = int(func_inp_list[inp_i]) 369 | if len(env_vars) > 0: 370 | env = random.choice(env_vars) 371 | env_value_cast = VarType.get_ctypes(func_inp_types[inp_i], env.var_value).value 372 | if abs(env_value_cast) > MAX_CONST_CCOMP: 373 | new_inp_list.append(f"{inp_value}") 374 | else: 375 | new_inp_list.append(f"({VarType.to_str(func_inp_types[inp_i])})({env.var_name})+({inp_value - env_value_cast})") 376 | else: 377 | new_inp_list.append(f"{inp_value}") 378 | return new_inp_list 379 | 380 | def synthesize_output(self, env_vars:list[Var], func_out, func_return_type:VarType): 381 | """Synthesize output to make sure the function return a value in a reasonable range""" 382 | ret_val_min, ret_val_max = VarType.get_range(func_return_type) # the range of the return value 383 | func_return_type = VarType.get_base_type(func_return_type) #FIXME: This is a bug in generating proxy functions where we did not convert the proxy return type. This has been fixed in proxy.py but need to regenerate function datebase to get rid of this work-around 384 | func_out = int(func_out) 385 | output_str = "" 386 | output = func_out 387 | # if func_out already exceeds the range limit 388 | if not (ret_val_min <= func_out <= ret_val_max): 389 | output_str += f'-({func_out})' 390 | output = 0 391 | for env in env_vars: 392 | env_value_cast = VarType.get_ctypes(func_return_type, env.var_value).value 393 | if abs(env_value_cast) > MAX_CONST_CCOMP or abs(env_value_cast+output) > MAX_CONST_CCOMP: 394 | continue 395 | if ret_val_min <= env_value_cast+output <= ret_val_max: 396 | output_str += f'+({VarType.to_str(func_return_type)})({env.var_name})' 397 | output += env_value_cast 398 | else: 399 | output_str += f'+(({VarType.to_str(func_return_type)})({env.var_name})-({env_value_cast}))' 400 | return output_str, output 401 | 402 | def replace_valuetag_with_func(self, tag_id:int, tgt_func_idx:int): 403 | """ 404 | Replace a ValueTag with the selected function call 405 | """ 406 | # use stable tag_var and env_vars for synthesis 407 | stable_env_vars = [] 408 | if self.tags[tag_id].tag_var.is_stable: 409 | stable_env_vars.append(self.tags[tag_id].tag_var) 410 | for env in self.tags[tag_id].tag_envs: 411 | if env.is_stable: 412 | stable_env_vars.append(env) 413 | 414 | # randomly select an io pair of the tgt_func 415 | func_inp_list, func_out = random.choice(self.functionDB[tgt_func_idx].io_list) 416 | new_input_str = self.synthesize_input(stable_env_vars, func_inp_list, self.functionDB[tgt_func_idx].args_type) 417 | new_output_str, new_output = self.synthesize_output(stable_env_vars, func_out, self.functionDB[tgt_func_idx].return_type) 418 | 419 | # synthesize func_call for expr, make sure to restore the value of the expr 420 | if not self.tags[tag_id].is_statement: 421 | func_call = "(({tag_type})({call_name}({input}){output})+{tag_var_value})".format( 422 | tag_type=self.tags[tag_id].tag_var.var_type, 423 | call_name=self.functionDB[tgt_func_idx].call_name, 424 | input=", ".join(new_input_str), 425 | output=f"{new_output_str}-({new_output})", 426 | tag_var_value=self.tags[tag_id].tag_var.var_name, 427 | ) 428 | # for statement tag, we also want to assign the function call to a stable env variable 429 | else: 430 | func_call = "({call_name}({input}){output})".format( 431 | call_name=self.functionDB[tgt_func_idx].call_name, 432 | input=",".join(new_input_str), 433 | output=new_output_str 434 | ) 435 | restore_env = None 436 | if not self.tags[tag_id].tag_var.is_constant: 437 | restore_env = self.tags[tag_id].tag_var 438 | elif len(stable_env_vars) > 0: 439 | restore_env = random.choice(stable_env_vars) 440 | if restore_env is not None and not restore_env.is_constant: 441 | func_call = f'{restore_env.var_name} = ({restore_env.var_type})({func_call}-({new_output}))+({restore_env.var_value});' 442 | 443 | # insert the function call 444 | self.src_syn = self.src_syn.replace(self.tags[tag_id].tag_str, f'/*TAG{tag_id}:STA*/' + func_call + f'/*TAG{tag_id}:END:{self.tags[tag_id].tag_var.var_name}*/') 445 | 446 | 447 | def synthesizer(self, src_filename:str, num_mutant:int=1, DEBUG:bool=False): 448 | """ 449 | Synthesize a source file by replacing variables/constants with function calls. 450 | """ 451 | self.tags = {} # all tags information 452 | self.vars = {} # all variale information 453 | self.tag_id_list = [] # tag_id in a sequential order as appeared in the execution 454 | self.scope_up = {} # key:val ==> child_scope:parent_scope 455 | self.scope_down = {} # key:[val] ==> parent_scope:[child_scope(s)] 456 | self.alive_tags = [] # all alive tag id 457 | 458 | self.DEBUG = DEBUG 459 | assert num_mutant >= 1 460 | # backup src file 461 | tmp_f = tempfile.NamedTemporaryFile(suffix=".c", delete=False) 462 | tmp_f.close 463 | shutil.copy(src_filename, tmp_f.name) 464 | # insert ValueTag 465 | if self.DEBUG: 466 | print(datetime.now().strftime("%d/%m/%Y %H:%M:%S"), ">profiling start", flush=True) 467 | self.profiling(tmp_f.name) 468 | if self.DEBUG: 469 | print(datetime.now().strftime("%d/%m/%Y %H:%M:%S"), ">profiling end", flush=True) 470 | with open(tmp_f.name, "r") as f: 471 | self.src_orig = f.read() 472 | os.remove(tmp_f.name) 473 | # sythesis 474 | all_syn_files = [] 475 | if len(self.alive_tags) == 0: 476 | return all_syn_files 477 | for num_i in range(num_mutant): 478 | if self.DEBUG: 479 | print(datetime.now().strftime("%d/%m/%Y %H:%M:%S"), ">synthesize mutatant start", num_i, flush=True) 480 | self.src = copy(self.src_orig) 481 | self.src_syn = copy(self.src_syn_orig) 482 | replaced_valuetag = [] 483 | inserted_func_ids = [] 484 | for tag_id in self.alive_tags: 485 | # randomly decide if we want to replace this value 486 | if tag_id in replaced_valuetag or random.randint(0, 100) > self.prob: 487 | continue #skip this value 488 | # randomly select a function from database 489 | while True: 490 | tgt_func_idx = random.randint(0, len(self.functionDB)-1) 491 | if self.functionDB[tgt_func_idx].has_io: 492 | break 493 | # replace the ValueTag with the selected function 494 | self.replace_valuetag_with_func(tag_id, tgt_func_idx) 495 | replaced_valuetag.append(tag_id) 496 | inserted_func_ids.append(tgt_func_idx) 497 | # self.remove_valuetag() # we don't do this now because this removal is too costly and it has no impact on the semantics of the synthesized program. 498 | self.insert_func_decl(inserted_func_ids) 499 | dst_filename = f'{os.path.splitext(src_filename)[0]}_syn{num_i}.c' 500 | with open(dst_filename, "w") as f: 501 | f.write(self.src_syn) 502 | all_syn_files.append(dst_filename) 503 | if self.DEBUG: 504 | print(datetime.now().strftime("%d/%m/%Y %H:%M:%S"), ">synthesize mutatant end", num_i, flush=True) 505 | 506 | return all_syn_files 507 | 508 | 509 | class SynthesizerError(Exception): 510 | pass 511 | 512 | 513 | if __name__=='__main__': 514 | 515 | parser = argparse.ArgumentParser(description='Synthesize a new program based on a seed program and a function database.') 516 | parser.add_argument('--src', dest='SRC', required=True, help='path to the seed program.') 517 | parser.add_argument('--dst', dest='DST', required=True, help='path to the destination program.') 518 | parser.add_argument('--db', dest='DB', required=True, help='path to the function database json file.') 519 | args = parser.parse_args() 520 | if not os.path.exists(args.SRC): 521 | print(f"File {args.SRC} does not exist!") 522 | parser.print_help() 523 | exit(1) 524 | if not os.path.exists(args.DB): 525 | print(f"File {args.DB} does not exist!") 526 | parser.print_help() 527 | exit(1) 528 | 529 | syner = Synthesizer(args.DB, prob=100) 530 | try: 531 | all_syn_files = syner.synthesizer(args.SRC, num_mutant=1) 532 | except SynthesizerError: 533 | print("SynthesizerError (OK).") 534 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UniCodeSphere/Creal/6851a38e605ab2db77c5190817d72c97c694b900/utils/__init__.py -------------------------------------------------------------------------------- /utils/compcert.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from dataclasses import dataclass 3 | import re 4 | import subprocess 5 | import tempfile 6 | from itertools import chain 7 | from pathlib import Path 8 | from shutil import which 9 | 10 | from diopter.utils import CommandOutput, run_cmd, run_cmd_async, temporary_file 11 | from diopter.compiler import SourceProgram, Language, CompileError 12 | 13 | 14 | @dataclass(frozen=True, kw_only=True) 15 | class CComp: 16 | """A ccomp(compcert) instance. 17 | 18 | Attributes: 19 | exe (Path): path to compcert/ccomp 20 | """ 21 | 22 | exe: Path 23 | # TODO: timeout_s: int = 8 24 | 25 | @staticmethod 26 | def get_system_ccomp(): 27 | """Returns: 28 | CComp: 29 | the system's ccomp 30 | """ 31 | 32 | ccomp = which("ccomp") 33 | if not ccomp: 34 | return None 35 | return CComp(exe=Path(ccomp).resolve(strict=True)) 36 | 37 | def check_program( 38 | self, program: SourceProgram, timeout: int | None = None, debug: bool = False, 39 | additional_flags: list[str] = [], 40 | ) -> bool: 41 | """Checks the input program for errors using ccomp's interpreter mode. 42 | 43 | Args: 44 | program (SourceProgram): the input program 45 | timeout (int | None): timeout in seconds for the checking 46 | debug (bool): if true ccomp's output will be printed on failure 47 | 48 | Returns: 49 | bool: 50 | was the check successful? 51 | """ 52 | assert program.language == Language.C 53 | 54 | # ccomp doesn't like these 55 | code = re.sub(r"__asm__ [^\)]*\)", r"", program.get_modified_code()) 56 | 57 | tf = temporary_file(contents=code, suffix=".c") 58 | cmd = ( 59 | [ 60 | str(self.exe), 61 | str(tf.name), 62 | "-interp", 63 | "-fall", 64 | ] 65 | + [ 66 | f"-I{ipath}" 67 | for ipath in chain(program.include_paths, program.system_include_paths) 68 | ] 69 | + [f"-D{macro}" for macro in program.defined_macros] 70 | + additional_flags 71 | ) 72 | try: 73 | result = run_cmd( 74 | cmd, 75 | additional_env={"TMPDIR": str(tempfile.gettempdir())}, 76 | timeout=timeout, 77 | ) 78 | except subprocess.CalledProcessError as e: 79 | if debug: 80 | print(CompileError.from_called_process_exception(" ".join(cmd), e)) 81 | return False 82 | return result --------------------------------------------------------------------------------