├── backup └── empty ├── prepare.sh ├── knowledges ├── README.md ├── class.py ├── function.py ├── class.php ├── dataflow.php ├── function.php └── seed-preprocessing.py ├── docker └── Dockerfile ├── prepare.py ├── README.md ├── dataflow.py ├── mutator.py ├── bot.py ├── reduce.py ├── LICENSE ├── main.py └── fuse.py /backup/empty: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /prepare.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | mkdir phpt_seeds; 3 | mkdir phpt_deps; 4 | git clone https://github.com/php/php-src.git; 5 | cd php-src; 6 | git clone https://github.com/php/php-langspec.git; # this also contains many phpt files 7 | find ./ -name "*.phpt" > /tmp/flowfusion-prepare.log; 8 | cd ..; python3 prepare.py; -------------------------------------------------------------------------------- /knowledges/README.md: -------------------------------------------------------------------------------- 1 | FlowFusion uses pre-collected knowledges about PHP functions, classes and test cases to avoid parsing them during the runtime. 2 | 3 | 4 | you need the following knowledge base: 5 | 6 | apis.db: run `php function.php` first, then run `python3 function.py` 7 | 8 | class.db: run `php class.php` first, then run `python3 class.py` 9 | 10 | seeds.db: run `python3 seed-preprocessing.py` -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # Use the official Ubuntu 22.04 as a base image 2 | FROM ubuntu:22.04 3 | 4 | # Set the maintainer label 5 | LABEL maintainer="yuancheng@comp.nus.edu.sg" 6 | 7 | # Prevent interactive prompts from appearing during package installation 8 | ARG DEBIAN_FRONTEND=noninteractive 9 | 10 | # Update the package list and install essential packages 11 | RUN apt-get update && \ 12 | apt-get upgrade -y && \ 13 | apt-get install -y \ 14 | build-essential \ 15 | curl \ 16 | wget \ 17 | git \ 18 | vim \ 19 | clang-12 \ 20 | autoconf \ 21 | bison \ 22 | re2c \ 23 | libxml2-dev \ 24 | libsqlite3-dev \ 25 | tmux \ 26 | pkg-config \ 27 | sudo \ 28 | gcovr \ 29 | lcov \ 30 | zip \ 31 | tmux \ 32 | sqlite3 \ 33 | valgrind \ 34 | php8.1-dev \ 35 | php-pear \ 36 | python3 \ 37 | python3-pip \ 38 | libpng-dev \ 39 | libonig-dev \ 40 | libssl-dev \ 41 | libbz2-dev \ 42 | libcurl4-openssl-dev \ 43 | libenchant-2-dev \ 44 | libgmp-dev \ 45 | libldap-dev \ 46 | libedit-dev \ 47 | libmm-dev \ 48 | libsnmp-dev \ 49 | libsodium-dev \ 50 | libzip-dev \ 51 | libxslt-dev \ 52 | libwebp-dev \ 53 | libjpeg-dev \ 54 | libfreetype-dev \ 55 | libpq-dev \ 56 | && apt-get clean \ 57 | && rm -rf /var/lib/apt/lists/* 58 | 59 | # Add a default user "phpfuzz" with password "phpfuzz" and add to sudo group 60 | RUN useradd -m phpfuzz && \ 61 | echo "phpfuzz:phpfuzz" | chpasswd && \ 62 | usermod -aG sudo phpfuzz 63 | 64 | RUN pecl install ast 65 | 66 | RUN echo "extension=ast.so" >> /etc/php/8.1/cli/php.ini 67 | 68 | WORKDIR /home/phpfuzz/WorkSpace 69 | 70 | RUN chown -R phpfuzz /home/phpfuzz/WorkSpace 71 | 72 | USER phpfuzz 73 | 74 | # Define the default command 75 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /knowledges/class.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sqlite3 3 | 4 | # Read the JSON data from class.json 5 | with open('class.json', 'r') as f: 6 | data = json.load(f) 7 | 8 | # Connect to the SQLite database (it will be created if it doesn't exist) 9 | conn = sqlite3.connect('class.db') 10 | cursor = conn.cursor() 11 | 12 | # Create tables 13 | cursor.execute(''' 14 | CREATE TABLE IF NOT EXISTS classes ( 15 | id INTEGER PRIMARY KEY AUTOINCREMENT, 16 | class_name TEXT UNIQUE 17 | ) 18 | ''') 19 | 20 | cursor.execute(''' 21 | CREATE TABLE IF NOT EXISTS attributes ( 22 | id INTEGER PRIMARY KEY AUTOINCREMENT, 23 | class_id INTEGER, 24 | name TEXT, 25 | FOREIGN KEY (class_id) REFERENCES classes (id) 26 | ) 27 | ''') 28 | 29 | cursor.execute(''' 30 | CREATE TABLE IF NOT EXISTS methods ( 31 | id INTEGER PRIMARY KEY AUTOINCREMENT, 32 | class_id INTEGER, 33 | name TEXT, 34 | params_count INTEGER, 35 | FOREIGN KEY (class_id) REFERENCES classes (id) 36 | ) 37 | ''') 38 | 39 | # Insert data into the tables 40 | for class_info in data: 41 | class_name = class_info['class_name'] 42 | 43 | # Insert the class name into the classes table 44 | cursor.execute('INSERT OR IGNORE INTO classes (class_name) VALUES (?)', (class_name,)) 45 | conn.commit() 46 | 47 | # Get the class_id of the inserted or existing class 48 | cursor.execute('SELECT id FROM classes WHERE class_name = ?', (class_name,)) 49 | class_id = cursor.fetchone()[0] 50 | 51 | # Insert attributes 52 | for attr_name in class_info.get('attributes', []): 53 | cursor.execute('INSERT INTO attributes (class_id, name) VALUES (?, ?)', (class_id, attr_name)) 54 | 55 | # Insert methods 56 | for method_info in class_info.get('methods', []): 57 | method_name = method_info['name'] 58 | params_count = method_info['params_count'] 59 | cursor.execute('INSERT INTO methods (class_id, name, params_count) VALUES (?, ?, ?)', (class_id, method_name, params_count)) 60 | 61 | # Commit the transactions and close the connection 62 | conn.commit() 63 | conn.close() 64 | 65 | -------------------------------------------------------------------------------- /knowledges/function.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sqlite3 3 | 4 | def load_apis_json(json_file): 5 | with open(json_file, 'r') as f: 6 | data = json.load(f) 7 | return data 8 | 9 | def create_database(db_name): 10 | conn = sqlite3.connect(db_name) 11 | cursor = conn.cursor() 12 | 13 | # Create 'functions' table 14 | cursor.execute(''' 15 | CREATE TABLE IF NOT EXISTS functions ( 16 | id INTEGER PRIMARY KEY AUTOINCREMENT, 17 | name TEXT NOT NULL, 18 | num_params INTEGER NOT NULL 19 | ) 20 | ''') 21 | 22 | # Create 'parameters' table 23 | cursor.execute(''' 24 | CREATE TABLE IF NOT EXISTS parameters ( 25 | id INTEGER PRIMARY KEY AUTOINCREMENT, 26 | function_id INTEGER NOT NULL, 27 | name TEXT NOT NULL, 28 | type TEXT, 29 | is_optional INTEGER NOT NULL, 30 | default_value TEXT, 31 | FOREIGN KEY (function_id) REFERENCES functions (id) 32 | ) 33 | ''') 34 | 35 | conn.commit() 36 | return conn 37 | 38 | def insert_data(conn, data): 39 | cursor = conn.cursor() 40 | 41 | for function in data: 42 | function_name = function.get('name') 43 | num_params = function.get('num_params', 0) 44 | params = function.get('params', []) 45 | 46 | # Insert function into 'functions' table 47 | cursor.execute(''' 48 | INSERT INTO functions (name, num_params) 49 | VALUES (?, ?) 50 | ''', (function_name, num_params)) 51 | function_id = cursor.lastrowid 52 | 53 | # Insert parameters into 'parameters' table 54 | for param in params: 55 | param_name = param.get('name') 56 | param_type = param.get('type') 57 | is_optional = 1 if param.get('is_optional') else 0 58 | default_value = param.get('default_value') 59 | 60 | # Convert default_value to string for storage 61 | if default_value is not None: 62 | default_value = str(default_value) 63 | 64 | cursor.execute(''' 65 | INSERT INTO parameters (function_id, name, type, is_optional, default_value) 66 | VALUES (?, ?, ?, ?, ?) 67 | ''', (function_id, param_name, param_type, is_optional, default_value)) 68 | 69 | conn.commit() 70 | 71 | def main(): 72 | json_file = 'apis.json' # Path to your JSON file 73 | db_name = 'apis.db' # Name of the SQLite database file 74 | 75 | # Load data from JSON file 76 | data = load_apis_json(json_file) 77 | 78 | # Create database and tables 79 | conn = create_database(db_name) 80 | 81 | # Insert data into database 82 | insert_data(conn, data) 83 | 84 | # Close the connection 85 | conn.close() 86 | print(f"Data has been successfully imported into '{db_name}'.") 87 | 88 | if __name__ == '__main__': 89 | main() 90 | -------------------------------------------------------------------------------- /prepare.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # Change directory to 'php-src' where PHP source code is located 4 | os.chdir("./php-src/") 5 | 6 | # Create a directory to store merged test files 7 | os.system("mkdir ./tests/merged/") 8 | 9 | # Path to the log file that contains a list of PHPT file paths 10 | phpts_filepath = "/tmp/flowfusion-prepare.log" 11 | 12 | # Read the PHPT file paths from the log file 13 | with open(phpts_filepath, "r") as f: 14 | phpts = f.read().strip("\n").split("\n") 15 | 16 | # Begin preparing PHPT seeds 17 | print("Preparing PHPT seeds") 18 | for each_phpt in phpts: 19 | # Move each PHPT file from its current location to the 'phpt_seeds' directory 20 | os.system(f"mv {each_phpt} ../phpt_seeds/") 21 | 22 | print("PHPT seeds are ready") 23 | 24 | # List to store the unique folders that contain PHPT files 25 | folders = [] 26 | 27 | # Extract folder paths from the list of PHPT files 28 | for eachline in phpts: 29 | folder = "/".join(eachline.split("/")[:-1]) + "/" 30 | if folder not in folders: 31 | folders.append(folder) 32 | 33 | # Begin preparing dependencies by copying required files from each folder 34 | print("Preparing dependencies") 35 | for each_folder in folders: 36 | if each_folder=='/': 37 | continue 38 | # Copy all files from each folder to 'phpt_deps' directory 39 | os.system(f"cp -r {each_folder}* ../phpt_deps 2>/dev/null") 40 | 41 | print("Dependencies are ready") 42 | 43 | print("===start configuring===") 44 | os.system("./buildconf") 45 | os.system('CC="clang-12" CXX="clang++-12" CFLAGS="-DZEND_VERIFY_TYPE_INFERENCE" CXXFLAGS="-DZEND_VERIFY_TYPE_INFERENCE" ./configure --enable-debug --enable-address-sanitizer --enable-undefined-sanitizer --enable-re2c-cgoto --enable-fpm --enable-litespeed --enable-phpdbg-debug --enable-zts --enable-bcmath --enable-calendar --enable-dba --enable-dl-test --enable-exif --enable-ftp --enable-gd --enable-gd-jis-conv --enable-mbstring --enable-pcntl --enable-shmop --enable-soap --enable-sockets --enable-sysvmsg --enable-zend-test --with-zlib --with-bz2 --with-curl --with-enchant --with-gettext --with-gmp --with-mhash --with-ldap --with-libedit --with-readline --with-snmp --with-sodium --with-xsl --with-zip --with-mysqli --with-pdo-mysql --with-pdo-pgsql --with-pgsql --with-sqlite3 --with-pdo-sqlite --with-webp --with-jpeg --with-freetype --enable-sigchild --with-readline --with-pcre-jit --with-iconv') 46 | print("configuring finished") 47 | print("start compiling") 48 | os.system("make -j16 --silent") 49 | print("compiling finished") 50 | if os.path.exists("./sapi/cli/php"): 51 | print("compile finished!") 52 | else: 53 | print("compile failed!") 54 | exit(-1) 55 | 56 | os.chdir("../knowledges/") 57 | 58 | print("preparing knowledges") 59 | 60 | os.system("../php-src/sapi/cli/php ./function.php") 61 | 62 | os.system("python3 function.py") 63 | 64 | os.system("../php-src/sapi/cli/php ./class.php") 65 | 66 | os.system("python3 class.py") 67 | 68 | os.system("python3 seed-preprocessing.py") 69 | 70 | print("all ready!") 71 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## FlowFusion — A Dataflow-Driven Fuzzer 2 | 3 | ### What is FlowFusion? 4 | 5 | FlowFusion is a fully automated, dataflow-driven fuzzing tool that detects various bugs (e.g., memory errors, undefined behaviors, assertion failures) in the PHP interpreter. 6 | 7 | ### How Does FlowFusion Work? 8 | 9 | The core idea behind FlowFusion is to leverage **dataflow** as an efficient representation of the official `.phpt` test files maintained by PHP developers. FlowFusion merges two (or more) test cases to produce fused test cases with more complex code semantics. It interleaves the dataflows of multiple test cases, thereby combining their code contexts. This approach enables interactions among existing unit tests (which typically verify a single functionality) to create more intricate code paths—leading to more effective bug-finding. 10 | 11 | **Why dataflow?** 12 | Around 96.1% of `.phpt` files exhibit sequential control flow (i.e., they execute without branching), which means control flow alone contributes little to the overall code semantics. By focusing on dataflow, FlowFusion captures the essential semantics of these test programs. 13 | 14 | **Why effective?** 15 | 1. With ~20K test cases, pairwise combinations already exceed 400M fused test cases; combining more than two grows this number exponentially. 16 | 2. The interleaving process itself has randomness, offering multiple ways to connect two test cases. 17 | 3. FlowFusion applies additional mutations and also fuzzes runtime configurations (e.g., JIT settings). 18 | 19 | FlowFusion additionally fuzzes all defined functions and class methods in the context of the fused test cases. A SQLite3 database stores information on available functions, classes, methods, and their parameters to guide fuzzing. 20 | 21 | Because FlowFusion relies on the official `.phpt` files, as soon as new tests are added, thousands of new fused tests can be generated. **This ensures FlowFusion remains current and continues to reveal new bugs over time**. 22 | 23 | --- 24 | 25 | ### Instructions 26 | 27 | Below are the steps to fuzz the latest commit of `php-src` inside a Docker container. 28 | 29 | 1. **Start Docker** 30 | ```bash 31 | docker run --name phpfuzz -dit 0599jiangyc/flowfusion:latest bash 32 | ``` 33 | - Username: `phpfuzz` 34 | - Password: `phpfuzz` 35 | 36 | Then enter the container: 37 | ```bash 38 | docker exec -it phpfuzz bash 39 | ``` 40 | 41 | 2. **Clone FlowFusion & Prepare** 42 | Inside the container, clone the FlowFusion repository into `/home/phpfuzz/WorkSpace`: 43 | ```bash 44 | git clone https://github.com/php/flowfusion.git 45 | cd flowfusion 46 | ./prepare.sh 47 | ``` 48 | *Note:* The preparation step can take several minutes. 49 | 50 | 3. **Start Fuzzing** 51 | Use `tmux` to keep the session running in the background: 52 | ```bash 53 | tmux new-session -s fuzz 'bash' 54 | ``` 55 | Then run FlowFusion: 56 | ```bash 57 | python3 main.py 58 | ``` 59 | 60 | 4. **View Found Bugs** 61 | To check for bugs: 62 | ```bash 63 | find ./bugs -name "*.out" | xargs grep -E "Sanitizer|Assertion " 64 | ``` 65 | 66 | --- 67 | 68 | ### Bugs 69 | 70 | FlowFusion has already discovered [hundreds of bugs](https://github.com/php/php-src/issues?q=author%3AYuanchengJiang%20) in the PHP interpreter. 71 | 72 | ### Research Paper 73 | 74 | For a more detailed explanation, see the research paper: 75 | [Fuzzing the PHP Interpreter via Dataflow Fusion](https://yuanchengjiang.github.io/docs/flowfusion.pdf). 76 | 77 | --- -------------------------------------------------------------------------------- /knowledges/class.php: -------------------------------------------------------------------------------- 1 | isAbstract() || $rc->isInterface() || ($rc->isInternal() && $rc->isFinal())) { 63 | continue; 64 | } 65 | 66 | $classInfo = []; 67 | $classInfo['class_name'] = $className; 68 | 69 | // Collect the class attributes (properties) names 70 | $properties = $rc->getProperties(); 71 | $propertyNames = []; 72 | foreach ($properties as $property) { 73 | $propertyNames[] = $property->getName(); 74 | } 75 | $classInfo['attributes'] = $propertyNames; 76 | 77 | // Get all methods of the class 78 | $methods = $rc->getMethods(); 79 | $classInfo['methods'] = []; 80 | 81 | if (!empty($methods)) { 82 | // Collect method information 83 | foreach ($methods as $method) { 84 | $methodName = $method->getName(); 85 | 86 | // Skip methods that should be skipped 87 | if (skipFunction([$className, $methodName])) { 88 | continue; 89 | } 90 | 91 | if ($method->isAbstract() || !$method->isPublic()) { 92 | // Skip abstract or non-public methods 93 | continue; 94 | } 95 | 96 | // Collect method info 97 | $methodInfo = []; 98 | $methodInfo['name'] = $methodName; 99 | 100 | // Collect parameter count 101 | $parameters = $method->getParameters(); 102 | $methodInfo['params_count'] = count($parameters); 103 | 104 | $classInfo['methods'][] = $methodInfo; 105 | } 106 | } 107 | 108 | $allClassesInfo[] = $classInfo; 109 | } 110 | 111 | // Dump all class info into class.json 112 | file_put_contents('class.json', json_encode($allClassesInfo, JSON_PRETTY_PRINT)); 113 | 114 | -------------------------------------------------------------------------------- /knowledges/dataflow.php: -------------------------------------------------------------------------------- 1 | children as $child) { 27 | if ($child instanceof Node) { 28 | switch ($child->kind) { 29 | case ast\AST_ASSIGN: 30 | handleAssign($child, $parent, $allVars); 31 | break; 32 | default: 33 | analyze($child, $parent, $allVars); 34 | } 35 | } elseif ($child !== null) { 36 | // Handle simple variable usage (e.g., variable declarations without assignments) 37 | $vars = getVars($child); 38 | foreach ($vars as $var) { 39 | $allVars[$var] = true; 40 | } 41 | } 42 | } 43 | } 44 | 45 | function handleAssign(Node $assignNode, array &$parent, array &$allVars) 46 | { 47 | $varNode = $assignNode->children['var']; 48 | $exprNode = $assignNode->children['expr']; 49 | 50 | $lhsVars = getVars($varNode); 51 | $rhsVars = []; 52 | 53 | // Check if RHS is a function call 54 | if ($exprNode instanceof Node && $exprNode->kind === ast\AST_CALL) { 55 | $funcVars = getFuncCallVars($exprNode); 56 | $rhsVars = array_merge($rhsVars, $funcVars); 57 | } else { 58 | $rhsVars = getVars($exprNode); 59 | } 60 | 61 | // Collect all variables 62 | foreach ($lhsVars as $var) { 63 | $allVars[$var] = true; 64 | } 65 | foreach ($rhsVars as $var) { 66 | $allVars[$var] = true; 67 | } 68 | 69 | // Union LHS and RHS variables 70 | foreach ($lhsVars as $lhsVar) { 71 | foreach ($rhsVars as $rhsVar) { 72 | union($lhsVar, $rhsVar, $parent); 73 | } 74 | } 75 | } 76 | 77 | function getVars($node) 78 | { 79 | $vars = []; 80 | if ($node instanceof Node) { 81 | if ($node->kind === ast\AST_VAR) { 82 | $vars[] = '$' . $node->children['name']; 83 | } else { 84 | foreach ($node->children as $child) { 85 | $vars = array_merge($vars, getVars($child)); 86 | } 87 | } 88 | } 89 | return $vars; 90 | } 91 | 92 | function getFuncCallVars(Node $callNode) 93 | { 94 | $vars = []; 95 | $argsNode = $callNode->children['args']; 96 | foreach ($argsNode->children as $arg) { 97 | $vars = array_merge($vars, getVars($arg)); 98 | } 99 | return $vars; 100 | } 101 | 102 | // Union-Find Functions 103 | function find($item, &$parent) 104 | { 105 | if (!isset($parent[$item])) { 106 | $parent[$item] = $item; 107 | } 108 | if ($parent[$item] !== $item) { 109 | $parent[$item] = find($parent[$item], $parent); 110 | } 111 | return $parent[$item]; 112 | } 113 | 114 | function union($item1, $item2, &$parent) 115 | { 116 | $root1 = find($item1, $parent); 117 | $root2 = find($item2, $parent); 118 | if ($root1 !== $root2) { 119 | $parent[$root2] = $root1; 120 | } 121 | } 122 | 123 | analyze($ast, $parent, $allVars); 124 | 125 | // Group variables by their root parent 126 | $groups = []; 127 | foreach ($allVars as $var => $_) { 128 | $root = find($var, $parent); 129 | $groups[$root][] = $var; 130 | } 131 | 132 | // Remove duplicates and sort the groups 133 | foreach ($groups as &$group) { 134 | $group = array_unique($group); 135 | sort($group); 136 | } 137 | unset($group); 138 | 139 | // Output the dataflow groups in a format that can be eval'd by Python 140 | $group_strings = []; 141 | foreach ($groups as $group) { 142 | $escaped_vars = array_map(function($var) { 143 | return "'" . addslashes($var) . "'"; 144 | }, $group); 145 | $group_strings[] = "[" . implode(", ", $escaped_vars) . "]"; 146 | } 147 | echo "[" . implode(", ", $group_strings) . "]"; 148 | -------------------------------------------------------------------------------- /dataflow.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | """ 4 | dataflow.py is a module for conducting lightweight source code analysis on PHP code. 5 | It performs a very coarse-grained dataflow extraction, which is different from traditional 6 | taint analysis. The focus is on tolerance to false positives (FP) and false negatives (FN) 7 | in the context of fuzz testing. 8 | """ 9 | 10 | class PHPFastDataflow: 11 | """ 12 | A class that performs fast, coarse-grained dataflow analysis on PHP code. 13 | It does not guarantee completeness but aims for soundness. 14 | """ 15 | 16 | def __init__(self): 17 | """ 18 | Initializes the PHPFastDataflow object with empty variables and dataflows. 19 | """ 20 | self.variables = [] # List to store extracted variables from PHP code 21 | self.dataflows = [] # List of lists to store dataflows between variables 22 | 23 | def clean(self): 24 | """ 25 | Resets the variables and dataflows to empty lists. 26 | """ 27 | self.variables = [] 28 | self.dataflows = [] 29 | 30 | def extract_variables(self): 31 | """ 32 | Extracts all PHP variables from the PHP code using a regular expression. 33 | It ensures that each variable is unique by converting the list to a set. 34 | """ 35 | # Regular expression to match valid PHP variables 36 | regex = r"\$[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*" 37 | # Find all PHP variables in the provided code 38 | self.variables = re.findall(regex, self.phpcode) 39 | # Ensure unique variables by converting the list to a set 40 | self.variables = list(set(self.variables)) 41 | 42 | def analyze_php_line(self, php_line): 43 | """ 44 | Analyzes a single line of PHP code to find variables and check 45 | if multiple variables are interacting in the same line. 46 | 47 | Returns: 48 | - A tuple (True, [variables]) if multiple variables are found. 49 | - A tuple (False, None) if no interaction between variables is detected. 50 | """ 51 | regex = r"\$[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*" 52 | variables = list(set(re.findall(regex, php_line))) 53 | if len(variables) > 1: 54 | return (True, variables) 55 | else: 56 | return (False, None) 57 | 58 | def merge_dataflows(self): 59 | """ 60 | Merges the dataflows by grouping variables that interact with each other. 61 | Variables that appear together in dataflow analysis are merged into a single 62 | list to represent their relationship. 63 | """ 64 | list_of_lists = self.variables 65 | 66 | # Convert any single variables to lists to ensure consistent structure 67 | for i in range(len(list_of_lists)): 68 | if type(list_of_lists[i]) != list: 69 | list_of_lists[i] = [list_of_lists[i]] 70 | 71 | # Initialize an empty list to store merged sublists 72 | merged_lists = [] 73 | 74 | # Iterate through each sublist (representing variables that interact) 75 | for sublist in list_of_lists: 76 | merged_with_existing = False 77 | for merged_sublist in merged_lists: 78 | # If any variable in the current sublist exists in a merged sublist, 79 | # merge them by extending the merged list 80 | if any(var in merged_sublist for var in sublist): 81 | merged_sublist.extend(var for var in sublist if var not in merged_sublist) 82 | merged_with_existing = True 83 | break 84 | 85 | # If no merge occurred, add the current sublist as a new group 86 | if not merged_with_existing: 87 | merged_lists.append(sublist) 88 | 89 | # Update the variables with the merged dataflows 90 | self.variables = merged_lists 91 | 92 | def extract_dataflow(self): 93 | """ 94 | Extracts dataflows from the PHP code by analyzing each line of code. 95 | It identifies variables and their interactions, grouping them into dataflows. 96 | """ 97 | for eachline in self.phpcode.split('\n'): 98 | result, variables = self.analyze_php_line(eachline) 99 | if result: 100 | for each_var in variables: 101 | # If the variable is already in the list, remove and replace it 102 | if each_var in self.variables: 103 | self.variables.remove(each_var) 104 | # Add the new set of interacting variables 105 | self.variables.append(variables) 106 | 107 | # Merge dataflows to group interacting variables 108 | self.merge_dataflows() 109 | 110 | def analyze(self, phpcode): 111 | """ 112 | The main function to analyze a given PHP code for dataflows. 113 | It extracts variables and their interactions to produce a list 114 | of dataflows. 115 | 116 | Args: 117 | - phpcode: The PHP source code to analyze. 118 | 119 | Returns: 120 | - A list of merged dataflows, each representing groups of interacting variables. 121 | """ 122 | self.phpcode = phpcode 123 | self.clean() # Reset variables and dataflows 124 | self.extract_variables() # Extract variables from the code 125 | self.extract_dataflow() # Extract dataflows between variables 126 | return self.variables 127 | -------------------------------------------------------------------------------- /knowledges/function.php: -------------------------------------------------------------------------------- 1 | getNumberOfParameters(); 101 | $params = $reflection->getParameters(); 102 | 103 | // Prepare parameter info 104 | $paramInfos = []; 105 | foreach ($params as $param) { 106 | $paramDetails = [ 107 | 'name' => $param->getName(), 108 | 'type' => $param->hasType() ? (string)$param->getType() : null, 109 | 'is_optional' => $param->isOptional(), 110 | 'default_value' => null, 111 | ]; 112 | 113 | // Suppress deprecation warnings when getting default value 114 | if ($param->isDefaultValueAvailable()) { 115 | $originalErrorReporting = error_reporting(); 116 | error_reporting($originalErrorReporting & ~E_DEPRECATED); 117 | $defaultValue = $param->getDefaultValue(); 118 | error_reporting($originalErrorReporting); 119 | 120 | // Convert default value to a JSON-serializable format 121 | if (is_scalar($defaultValue) || is_null($defaultValue)) { 122 | $paramDetails['default_value'] = $defaultValue; 123 | } else { 124 | // Convert non-scalar values to their string representation 125 | $paramDetails['default_value'] = var_export($defaultValue, true); 126 | } 127 | } 128 | 129 | $paramInfos[] = $paramDetails; 130 | } 131 | 132 | // Collect function info 133 | $functionInfo = [ 134 | 'name' => $functionName, 135 | 'num_params' => $numParams, 136 | 'params' => $paramInfos, 137 | ]; 138 | 139 | $functionInfoList[] = $functionInfo; 140 | 141 | } catch (\Throwable $e) { 142 | // Handle any exceptions or errors 143 | // You can log the error if needed 144 | } 145 | } 146 | 147 | // Write the function info list to JSON file 148 | $json = json_encode($functionInfoList, JSON_PRETTY_PRINT); 149 | 150 | // Check if json_encode failed 151 | if ($json === false) { 152 | echo "json_encode error: " . json_last_error_msg() . "\n"; 153 | // Optionally, you can handle the error further here 154 | } else { 155 | file_put_contents('./apis.json', $json); 156 | } 157 | } 158 | 159 | // Call the function 160 | collect_functions([]); 161 | -------------------------------------------------------------------------------- /mutator.py: -------------------------------------------------------------------------------- 1 | from random import randint, choice, shuffle, random 2 | import re 3 | import subprocess 4 | import os 5 | 6 | 7 | class Mutator: 8 | """ 9 | This class aims to mutate the PHPT (PHP Test) file, specifically targeting the --FILE-- section. 10 | The goal is to introduce mutations in various parts of the code: 11 | - Special integers: -1, 0, PHP_INT_MAX, PHP_INT_MIN 12 | - Special characters: random byte, special encoding 13 | - Special class variables: random magic class variables 14 | - Special values: null values, etc. 15 | """ 16 | 17 | def __init__(self): 18 | pass 19 | 20 | def extract_sec(self, test, section): 21 | """ 22 | Extract a specific section from the PHPT file, identified by the section header. 23 | Args: 24 | test: The full PHPT file content. 25 | section: The section to extract (e.g., --FILE--). 26 | 27 | Returns: 28 | The content of the specified section or an empty string if not found. 29 | """ 30 | if section not in test: 31 | return "" 32 | start_idx = test.find(section) + len(section) 33 | x = re.search("--([_A-Z]+)--", test[start_idx:]) 34 | end_idx = x.start() if x != None else len(test) - 1 35 | ret = test[start_idx:start_idx + end_idx].strip("\n") 36 | return ret 37 | 38 | """ 39 | `mr` means `mutation rule` 40 | Below are various mutation rules applied to the PHP code. 41 | """ 42 | 43 | def _mr_arith_operators(self, phpcode): 44 | """ 45 | Randomly mutate arithmetic operators such as +, -, *, /, %, **. 46 | 99.9% of the time, this function will return the original PHP code without changes. 47 | """ 48 | if random() > 0.001: 49 | return phpcode 50 | 51 | # Regular expression to match arithmetic operators 52 | target_regex = r'\+\+|[-*/%]|\*\*' 53 | replacements = ['+', '-', '*', '/', '%', '**'] 54 | victims = re.findall(target_regex, phpcode) 55 | 56 | if len(victims) == 0: 57 | return phpcode 58 | 59 | # Randomly replace one arithmetic operator 60 | phpcode = phpcode.replace(choice(victims), choice(replacements)) 61 | return phpcode 62 | 63 | def _mr_assign_operators(self, phpcode): 64 | """ 65 | Randomly mutate assignment operators such as +=, -=, *=, /=, %=. 66 | 99.9% of the time, this function will return the original PHP code without changes. 67 | """ 68 | if random() > 0.001: 69 | return phpcode 70 | 71 | # Regular expression to match assignment operators 72 | target_regex = r'\+=|-=|\*=|/=|%=' 73 | replacements = ['+=', '-=', '*=', '/=', '%='] 74 | 75 | # Find all assignment operators in the PHP code 76 | victims = re.findall(target_regex, phpcode) 77 | if len(victims) == 0: 78 | return phpcode 79 | 80 | # Randomly select a victim and a replacement operator 81 | victim = choice(victims) 82 | replace = choice([op for op in replacements if op != victim]) 83 | 84 | # Replace a randomly chosen occurrence of the victim operator 85 | phpcode = re.sub(re.escape(victim), replace, phpcode, 1) 86 | return phpcode 87 | 88 | def _mr_logical_operators(self, phpcode): 89 | """ 90 | Randomly mutate logical operators such as 'and', 'or', 'xor', '&&', '||'. 91 | 99.9% of the time, this function will return the original PHP code without changes. 92 | """ 93 | if random() > 0.001: 94 | return phpcode 95 | 96 | # Regular expression to match logical operators 97 | target_regex = r'\band\b|\bor\b|\bxor\b|&&|\|\|' 98 | replacements = ['and', 'or', 'xor', '&&', '||'] 99 | 100 | # Find all logical operators in the PHP code 101 | victims = re.findall(target_regex, phpcode) 102 | if len(victims) == 0: 103 | return phpcode 104 | 105 | # Randomly select a victim and a replacement operator 106 | victim = choice(victims) 107 | replace = choice([op for op in replacements if op != victim]) 108 | 109 | # Replace a randomly chosen occurrence of the logical operator 110 | phpcode = re.sub(re.escape(victim), replace, phpcode, 1) 111 | return phpcode 112 | 113 | def _mr_integer(self, phpcode): 114 | """ 115 | Randomly mutate integer expressions to special boundary values like -1, 0, PHP_INT_MAX, etc. 116 | 99.9% of the time, this function will return the original PHP code without changes. 117 | """ 118 | if random() > 0.001: 119 | return phpcode 120 | 121 | # Regular expression to match integers (in decimal, octal, or hexadecimal) 122 | target_regex = r'(? 0.01: 141 | return phpcode 142 | 143 | # Regular expression to match single and double-quoted strings 144 | target_regex = r"'([^'\\]+(\\.[^'\\]*)*)'|\"([^\"\\]+(\\.[^\"\\]*)*)\"" 145 | replacements = [f"'{chr(randint(0, 255))}'", 'NULL', "''", "'?~K?~U'", "'test\\0test'"] 146 | 147 | # Find all string literals in the PHP code 148 | victims = re.findall(target_regex, phpcode) 149 | 150 | # Flatten the list to get the full match 151 | victims = [match[0] if match[0] else match[2] for match in victims] 152 | 153 | if len(victims) == 0: 154 | return phpcode 155 | 156 | # Randomly replace one occurrence of a string 157 | victim = choice(victims) 158 | replace = choice(replacements) 159 | phpcode = re.sub(re.escape(victim), replace, phpcode, 1) 160 | return phpcode 161 | 162 | def _mr_variable(self, phpcode): 163 | """ 164 | Randomly mutate variables by replacing them with other variables. 165 | 99.5% of the time, this function will return the original PHP code without changes. 166 | """ 167 | if random() > 0.005: 168 | return phpcode 169 | 170 | # Regular expression to match variables 171 | target_regex = r'\$\w+' 172 | variables = re.findall(target_regex, phpcode) 173 | 174 | if len(variables) == 0: 175 | return phpcode 176 | 177 | # Randomly select a victim and a replacement variable 178 | victim = choice(variables) 179 | replace = choice(variables) 180 | 181 | # Replace a random occurrence of the victim variable 182 | occurrences = [m.start() for m in re.finditer(re.escape(victim), phpcode)] 183 | if not occurrences: 184 | return phpcode 185 | 186 | num_replacements = choice(range(1, len(occurrences) + 1)) 187 | selected_replacements = set(choice(occurrences) for _ in range(num_replacements)) 188 | 189 | result = [] 190 | last_index = 0 191 | for i, char in enumerate(phpcode): 192 | if i in selected_replacements: 193 | result.append(phpcode[last_index:i]) 194 | result.append(replace) 195 | last_index = i + len(victim) 196 | 197 | result.append(phpcode[last_index:]) 198 | return ''.join(result) 199 | 200 | def mutate(self, phpcode): 201 | 202 | # Apply all mutation rules 203 | phpcode = self._mr_arith_operators(phpcode) 204 | phpcode = self._mr_assign_operators(phpcode) 205 | phpcode = self._mr_logical_operators(phpcode) 206 | phpcode = self._mr_integer(phpcode) 207 | phpcode = self._mr_string(phpcode) 208 | phpcode = self._mr_variable(phpcode) 209 | 210 | return phpcode 211 | -------------------------------------------------------------------------------- /bot.py: -------------------------------------------------------------------------------- 1 | # this file is to automatically generate bug reports 2 | 3 | import os 4 | import re 5 | import json 6 | import time 7 | import html 8 | import signal 9 | from reduce import reduce_php 10 | 11 | 12 | def handler(signum, frame): 13 | raise Exception("end of time") 14 | 15 | test_root = "/home/phpfuzz/WorkSpace/flowfusion" 16 | 17 | plain_text_bug_report = """ 18 | ================ 19 | PHP Bug Report 20 | 21 | **PHP Commit:** 22 | {php_commit} 23 | 24 | **Compiling Flags:** 25 | {php_config} 26 | 27 | **Crash Site:** 28 | {crashsite} 29 | 30 | **Keywords:** 31 | {keyword} 32 | 33 | **Reproducing config:** 34 | {reducedconfig} 35 | 36 | **Reproducing PHP (best-effort reduced):** 37 | {reducedphp} 38 | 39 | **Output:** 40 | {bugout} 41 | 42 | **Reproducing PHP:** 43 | {bugphp} 44 | 45 | **Reproducing PHPT:** 46 | {bugphpt} 47 | 48 | **This report is automatically generated via FlowFusion** 49 | ================ 50 | """ 51 | 52 | 53 | # copy dependencies for reproducing 54 | if os.path.exists("/tmp/flowfusion_reproducing/")==False: 55 | os.mkdir("/tmp/flowfusion_reproducing/") 56 | os.system(f"cp -R {test_root}/phpt_deps/* /tmp/flowfusion_reproducing/") 57 | 58 | # Change directory to the "bugs" folder 59 | os.chdir(f"{test_root}/bugs") 60 | 61 | # Find all '.out' files, search for 'Sanitizer' (excluding 'leak') and store the results in a log file 62 | os.system("find ./ -name '*.out' | xargs grep -E 'Sanitizer|Assertion ' | grep -v 'leak' > /tmp/flowfusion_bug.log") 63 | 64 | os.chdir(f"{test_root}") 65 | 66 | print("Filtering finished") 67 | 68 | # Initialize lists to store unique bug identifiers and bug information 69 | identifiers = [] 70 | bugs_info = [] 71 | 72 | if not os.path.exists(f'{test_root}/bug_reports/'): 73 | os.mkdir(f'{test_root}/bug_reports/') 74 | 75 | if os.path.exists(f'{test_root}/bug_reports/bugs.json'): 76 | with open(f'{test_root}/bug_reports/bugs.json', 'r') as file: 77 | bugs_info = json.load(file) 78 | identifiers = [bug['identifier'] for bug in bugs_info] 79 | 80 | for each_existing_bug in bugs_info: 81 | each_existing_bug['new'] = 0 82 | 83 | # Read the contents of the bug log file 84 | with open('/tmp/flowfusion_bug.log', 'r') as f: 85 | bugs = f.read().strip('\n').split('\n') 86 | 87 | # Regular expression to extract identifier patterns from the log 88 | identifier_pattern = r"(\/php-src\/[^:]+:\d+)" 89 | 90 | 91 | # last_modified_time = os.path.getmtime(file_path) 92 | 93 | # Loop through each bug entry in the log 94 | for eachbug in bugs: 95 | # Search for the identifier using the regular expression 96 | identifier = re.search(identifier_pattern, eachbug) 97 | if identifier: 98 | identifier = identifier.group() 99 | # If the identifier is new, add it to the identifiers list and create a bug entry 100 | if identifier not in identifiers: 101 | identifiers.append(identifier) 102 | bug_folder = eachbug.split('/')[1] 103 | last_modified_time = os.path.getmtime(f"{test_root}/bugs/{bug_folder}") 104 | readable_time = time.ctime(last_modified_time) 105 | bugs_info.append({ 106 | "bugID": len(bugs_info) + 1, # Assign a unique ID to each bug 107 | "identifier": identifier, # Store the identifier (file path and line number) 108 | "details": [eachbug.split('/')[1]], 109 | "mtime": readable_time, 110 | "new": 1 111 | }) 112 | else: 113 | # If the identifier already exists, update the existing bug entry 114 | bug_idx = identifiers.index(identifier) 115 | bug_folder = eachbug.split('/')[1] 116 | mtime = bugs_info[bug_idx]["mtime"] 117 | parsed_time = time.strptime(mtime, "%a %b %d %H:%M:%S %Y") 118 | # Convert struct_time to a timestamp (seconds since epoch) 119 | timestamp = time.mktime(parsed_time) 120 | last_modified_time = os.path.getmtime(f"{test_root}/bugs/{bug_folder}") 121 | if last_modified_time > timestamp: 122 | readable_time = time.ctime(last_modified_time) 123 | bugs_info[bug_idx]["mtime"] = readable_time 124 | 125 | # Convert the bug information into a JSON format for further processing 126 | # Load the list of bug information into a JSON-compatible Python dictionary 127 | data = json.loads(str(bugs_info).replace("'", '"')) 128 | 129 | # Pretty-print the JSON data to a file for easy readability 130 | with open(f'{test_root}/bug_reports/bugs.json', 'w') as file: 131 | json.dump(data, file, indent=4) 132 | 133 | #with open("/tmp/flowfusion-php-commit","r") as file: 134 | # php_commit = file.read() 135 | 136 | php_commit = "test" 137 | 138 | #with open(f"{test_root}/php-src/config.log","r") as file: 139 | # while True: 140 | # line = file.readline() 141 | # if "./configure" in line: 142 | # php_config = line.strip(' ').strip('$') 143 | # break 144 | 145 | php_config = "test" 146 | 147 | with open(f"{test_root}/bug_reports/bugs.json", 'r') as file: 148 | data = json.load(file) 149 | 150 | if os.path.exists(f"{test_root}/bugs")==False: 151 | print("Please run in flowfusion folder") 152 | exit() 153 | 154 | errors = ["stack-overflow","stack-underflow","heap-buffer-overflow","null pointer","integer overflow","heap-use-after-free","SEGV","core dumped"] 155 | 156 | # Accessing the parsed data 157 | for bug in data: 158 | upload_bug_folder_name = bug['identifier'].split('/php-src/')[1].replace('/','_').replace('.','_').replace(':','_') 159 | # if bug['new']==0 and os.path.exists(f"{test_root}/../flowfusion-php.github.io/{upload_bug_folder_name}"): 160 | # # sed -i -E 's/this bug has been detected for [0-9]+ times/this bug has been detected for 2 times/g' ./sapi_phpdbg_phpdbg_bp_c_132/index.html 161 | # continue 162 | print(f"analyzing and uploading {upload_bug_folder_name}") 163 | bug_folder = f"./bugs/{bug['details'][0]}/" 164 | 165 | # get bugout 166 | f = open(f"{bug_folder}/test.out", "r", encoding="iso_8859_1") 167 | bugout = f.read() 168 | f.close() 169 | 170 | # get keywords 171 | keywords = [] 172 | for error in errors: 173 | if error in bugout: 174 | keywords.append(error) 175 | 176 | dangerous = 0 177 | # if "heap-buffer-overflow" in keywords or "heap-use-after-free" in keywords: 178 | # dangerous = 1 179 | 180 | # get bugphp 181 | f = open(f"{bug_folder}/test.php", "r") 182 | bugphp = f.read() 183 | f.close() 184 | 185 | # get bugphpt 186 | f = open(f"{bug_folder}/test.phpt", "r") 187 | bugphpt = f.read() 188 | f.close() 189 | 190 | # get bugsh 191 | f = open(f"{bug_folder}/test.sh", "r") 192 | bugsh = f.read() 193 | f.close() 194 | 195 | bug_outputs = ["UndefinedBehaviorSanitizer: undefined-behavior", "AddressSanitizer", "core dumped"] 196 | # get reducedphp 197 | os.system(f"cp {bug_folder}/test.php /tmp/flowfusion_reproducing/") 198 | bug_output = "" 199 | for each in bug_outputs: 200 | if each in bugout: 201 | bug_output = each 202 | break 203 | bug_config = "" 204 | for eachline in bugsh.split('\n'): 205 | if "gdb --args" in eachline: 206 | bug_config = eachline.split(' -d ')[1:] 207 | bug_config[-1] = bug_config[-1].split(' -f ')[0] 208 | bug_config = ' -d '+' -d '.join(bug_config) 209 | break 210 | 211 | signal.signal(signal.SIGALRM, handler) 212 | # set 5 mins for reducing one bug 213 | signal.alarm(300) 214 | try: 215 | reducedphp, reduced_config = reduce_php( 216 | testpath = "/tmp/flowfusion_reproducing/test.php", 217 | phppath = f"{test_root}/php-src/sapi/cli/php", 218 | config = bug_config, 219 | bug_output = bug_output 220 | ) 221 | except: 222 | reducedphp = 'reducing timeout ..' 223 | reduced_config = 'reducing timeout ..' 224 | 225 | bug_report = plain_text_bug_report.format( 226 | php_commit = php_commit, 227 | php_config = php_config, 228 | crashsite = bug['identifier'], 229 | keyword = str(keywords), 230 | bugout = bugout, 231 | bugphp = bugphp, 232 | bugphpt = bugphpt, 233 | reducedconfig = reduced_config, 234 | reducedphp = reducedphp 235 | ) 236 | 237 | f = open(f"{test_root}/bug_reports/{upload_bug_folder_name}.md", "w") 238 | f.write(bug_report) 239 | f.close() 240 | 241 | 242 | -------------------------------------------------------------------------------- /reduce.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | 4 | stdouterr = None 5 | 6 | # Function to run the test command and check for bug presence 7 | def run_test(cmd, bug_output): 8 | """ 9 | Executes the provided command to run the PHP test and checks 10 | if the expected bug output or any sanitizer error appears. 11 | """ 12 | # Run the command and capture the output 13 | try: 14 | result = subprocess.run(cmd, shell=True, capture_output=True, text=True, encoding='iso-8859-1', timeout=10) 15 | except: 16 | return False 17 | 18 | 19 | # Check if the bug output or any sanitizer errors are in the stdout/stderr 20 | if not (bug_output in result.stdout or bug_output in result.stderr) and \ 21 | ("LeakSanitizer" not in result.stdout and "LeakSanitizer" not in result.stderr): 22 | 23 | # If another sanitizer message shows up, print the error 24 | if "Sanitizer" in result.stdout or "Sanitizer" in result.stderr: 25 | print("Other error messages found:") 26 | print(result.stdout) 27 | print(result.stderr) 28 | # Uncomment below if you want to pause for input when this happens 29 | # input() 30 | 31 | if bug_output in result.stdout or bug_output in result.stderr: 32 | global stdouterr 33 | if stdouterr == None: 34 | stdouterr = result.stderr 35 | 36 | # Return True if the bug output is found in the test results 37 | return bug_output in result.stdout or bug_output in result.stderr 38 | 39 | # Function to minimize the test case by removing lines 40 | def minimize_testcase(lines, bug_output, testpath, reproduce_cmd): 41 | print("reducing .. it may cost some times") 42 | """ 43 | Minimizes the test case by iteratively removing lines and checking 44 | if the bug still reproduces. Uses a stepwise approach for efficiency. 45 | """ 46 | n = len(lines) 47 | step = max(n // 2, 1) # Start with removing half of the lines at a time 48 | 49 | init_step = step 50 | 51 | # Reduce the number of lines step by step 52 | while step > 0: 53 | print(f"Current step: {step}") 54 | 55 | # Try removing 'step' lines at a time 56 | for i in range(0, n, step): 57 | temp_lines = lines[:i] + lines[i+step:] 58 | with open(testpath, "w") as f: 59 | f.write("\n".join(temp_lines)) 60 | 61 | # If the bug reproduces, accept this as the minimized version 62 | if run_test(reproduce_cmd, bug_output) or run_test(reproduce_cmd, bug_output) or run_test(reproduce_cmd, bug_output): 63 | lines = temp_lines 64 | n = len(lines) 65 | break 66 | else: 67 | step //= 2 # If no further reduction is found, reduce step size 68 | 69 | return lines, init_step 70 | 71 | # Function for further minimizing by removing multiple lines at a time 72 | def further_minimize_testcase(lines, bug_output, testpath, reproduce_cmd): 73 | """ 74 | Further minimizes the test case by removing 2 to 5 lines at a time 75 | and checking if the bug still reproduces. 76 | """ 77 | n = len(lines) 78 | 79 | # Try removing 2 to 5 lines at a time 80 | for count in range(2, 6): 81 | # print(f"Trying to remove {count} lines at a time.") 82 | 83 | # Try removing 'count' lines from each part of the test case 84 | for i in range(n - count + 1): 85 | temp_lines = lines[:i] + lines[i+count:] 86 | with open(testpath, "w") as f: 87 | f.write("\n".join(temp_lines)) 88 | 89 | # If the bug reproduces, accept this as the minimized version 90 | if run_test(reproduce_cmd, bug_output) or run_test(reproduce_cmd, bug_output) or run_test(reproduce_cmd, bug_output): 91 | lines = temp_lines 92 | n = len(lines) 93 | break 94 | 95 | return lines 96 | 97 | def reduce_php(testpath, phppath, config, bug_output): 98 | reproduce_cmd = f'{phppath} {config} {testpath}' 99 | # Initial test to verify if the reproduce command triggers the bug 100 | if not run_test(reproduce_cmd, bug_output) and not run_test(reproduce_cmd, bug_output) and not run_test(reproduce_cmd, bug_output): 101 | return "bug not reproduced when reducing", "bug not reproduced when reducing" 102 | else: 103 | while True: 104 | # Read the original test file lines 105 | with open(testpath, "r") as f: 106 | lines = f.readlines() 107 | 108 | # Strip any extra whitespace or newlines 109 | lines = [line.strip() for line in lines] 110 | 111 | # Begin minimizing the test case by removing lines 112 | minimized_lines, init_step = minimize_testcase(lines, bug_output, testpath, reproduce_cmd) 113 | 114 | # Further minimize by removing multiple lines at once 115 | further_minimized_lines = further_minimize_testcase(minimized_lines, bug_output, testpath, reproduce_cmd) 116 | 117 | # Restore the original test case in the file 118 | with open(testpath, "w") as f: 119 | f.write("\n".join(further_minimized_lines)) 120 | 121 | n = len(further_minimized_lines) 122 | step = max(n // 2, 1) 123 | if step==init_step: 124 | print("reducing php finished") 125 | break 126 | reducedphp = "\n".join(further_minimized_lines) 127 | 128 | # Initialize reduced_config with the full configuration 129 | reduced_config = config 130 | 131 | while True: 132 | # Split the configuration into individual options 133 | test_config = reduced_config.split(' -d ') 134 | # Remove any empty strings resulting from the split 135 | test_config = [c for c in test_config if c != ''] 136 | # Store the length to check for changes after iteration 137 | before_reduced_config_len = len(reduced_config) 138 | # Flag to check if a shorter configuration is found 139 | found_shorter_config = False 140 | 141 | # Iterate over a copy of the list to avoid modifying it during iteration 142 | for i in range(len(test_config)): 143 | # Create a new configuration without the current option 144 | test_config_copy = test_config[:i] + test_config[i+1:] 145 | # Reconstruct the configuration string 146 | if test_config_copy: 147 | configstr = ' -d ' + ' -d '.join(test_config_copy) 148 | else: 149 | configstr = '' 150 | # Build the command to test 151 | test_cmd = f'{phppath} {configstr} {testpath}' 152 | # Run the test to see if the bug still occurs 153 | if run_test(test_cmd, bug_output) or run_test(test_cmd, bug_output) or run_test(test_cmd, bug_output): 154 | # Update reduced_config if the bug still occurs 155 | reduced_config = configstr 156 | found_shorter_config = True 157 | # Break to restart the while loop with the new reduced_config 158 | break 159 | # If no shorter configuration is found, exit the loop 160 | if not found_shorter_config: 161 | break 162 | 163 | return reducedphp, reduced_config.strip('\n') 164 | 165 | 166 | 167 | if __name__ == "__main__": 168 | 169 | # Define the path to the test PHP file, you need to move the php to the tmp 170 | # best to also copy all dependencies to /tmp for reproduce 171 | testpath = "/tmp/test.php" 172 | 173 | # default php path 174 | phppath = "/home/phpfuzz/WorkSpace/flowfusion/php-src/sapi/cli/php" 175 | 176 | # Configuration options for the PHP test run 177 | config = '' 178 | 179 | # The expected bug output that we are trying to reproduce 180 | # if sanitizers' alerts 181 | bug_output = 'Sanitizer' 182 | # if assertion failure 183 | # bug_output = 'core dumped' 184 | 185 | reducedphp, reduced_config = reduce_php(testpath, phppath, config, bug_output) 186 | 187 | reduced_config = f'./php-src/sapi/cli/php {reduced_config} ./test.php' 188 | 189 | # auto generate bug report 190 | report_template = "\nThe following code:\n\n```php\n{poc}\n```\n\nResulted in this output:\n```\n{stdouterr}\n```\n\nTo reproduce:\n```\n{config}\n```\n\nCommit:\n```\n{commit}\n```\n\nConfigurations:\n```\n{php_config}\n```\n\nOperating System:\n```\n{os}\n```\n\n*This report is automatically generated by [FlowFusion](https://github.com/php/flowfusion)*\n" 191 | 192 | os.system("cd /home/phpfuzz/WorkSpace/flowfusion/php-src; git rev-parse origin/master > /tmp/php_commit") 193 | f = open("/tmp/php_commit","r") 194 | commit = f.read() 195 | f.close() 196 | 197 | php_config = 'CC="clang-12" CXX="clang++-12" CFLAGS="-DZEND_VERIFY_TYPE_INFERENCE" CXXFLAGS="-DZEND_VERIFY_TYPE_INFERENCE" ./configure --enable-debug --enable-address-sanitizer --enable-undefined-sanitizer --enable-re2c-cgoto --enable-fpm --enable-litespeed --enable-phpdbg-debug --enable-zts --enable-bcmath --enable-calendar --enable-dba --enable-dl-test --enable-exif --enable-ftp --enable-gd --enable-gd-jis-conv --enable-mbstring --enable-pcntl --enable-shmop --enable-soap --enable-sockets --enable-sysvmsg --enable-zend-test --with-zlib --with-bz2 --with-curl --with-enchant --with-gettext --with-gmp --with-mhash --with-ldap --with-libedit --with-readline --with-snmp --with-sodium --with-xsl --with-zip --with-mysqli --with-pdo-mysql --with-pdo-pgsql --with-pgsql --with-sqlite3 --with-pdo-sqlite --with-webp --with-jpeg --with-freetype --enable-sigchild --with-readline --with-pcre-jit --with-iconv' 198 | 199 | os = "Ubuntu 20.04 Host, Docker 0599jiangyc/flowfusion:latest" 200 | 201 | bug_report = report_template.format( 202 | poc = reducedphp, 203 | stdouterr = stdouterr, 204 | config = reduced_config, 205 | commit = commit, 206 | php_config = php_config, 207 | os = os 208 | ) 209 | 210 | print('\033[94m'+bug_report+'\033[0m') 211 | -------------------------------------------------------------------------------- /knowledges/seed-preprocessing.py: -------------------------------------------------------------------------------- 1 | import re 2 | import os 3 | import sqlite3 4 | import subprocess 5 | 6 | def get_php_dataflow_groups(php_script_path, dataflow_script_path='dataflow.php'): 7 | """ 8 | Invokes the PHP dataflow analysis script and collects the dataflow list. 9 | 10 | Parameters: 11 | php_script_path (str): The path to the PHP file to analyze. 12 | dataflow_script_path (str): The path to the PHP dataflow analysis script. 13 | 14 | Returns: 15 | List[List[str]]: A list of dataflow groups, each group is a list of variable names. 16 | """ 17 | try: 18 | # Execute the PHP dataflow analysis script 19 | result = subprocess.run( 20 | ['php', dataflow_script_path, php_script_path], 21 | capture_output=True, 22 | text=True, 23 | check=True 24 | ) 25 | 26 | # Extract the output 27 | output = result.stdout.strip() 28 | 29 | # Use eval to parse the output 30 | dataflow_groups = eval(output) 31 | 32 | return dataflow_groups 33 | 34 | except subprocess.CalledProcessError as e: 35 | print(f"Error executing PHP script: {e.stderr}") 36 | return [] 37 | except Exception as e: 38 | print(f"Error parsing output: {e}") 39 | return [] 40 | 41 | 42 | class PHPFastDataflow: 43 | """ 44 | A class that performs fast, coarse-grained dataflow analysis on PHP code. 45 | It does not guarantee completeness but aims for soundness. 46 | """ 47 | 48 | def __init__(self): 49 | """ 50 | Initializes the PHPFastDataflow object with empty variables and dataflows. 51 | """ 52 | self.variables = [] # List to store extracted variables from PHP code 53 | self.dataflows = [] # List of lists to store dataflows between variables 54 | 55 | def clean(self): 56 | """ 57 | Resets the variables and dataflows to empty lists. 58 | """ 59 | self.variables = [] 60 | self.dataflows = [] 61 | 62 | def extract_variables(self): 63 | """ 64 | Extracts all PHP variables from the PHP code using a regular expression. 65 | It ensures that each variable is unique by converting the list to a set. 66 | """ 67 | # Regular expression to match valid PHP variables 68 | regex = r"\$[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*" 69 | # Find all PHP variables in the provided code 70 | self.variables = re.findall(regex, self.phpcode) 71 | # Ensure unique variables by converting the list to a set 72 | self.variables = list(set(self.variables)) 73 | 74 | def analyze_php_line(self, php_line): 75 | """ 76 | Analyzes a single line of PHP code to find variables and check 77 | if multiple variables are interacting in the same line. 78 | 79 | Returns: 80 | - A tuple (True, [variables]) if multiple variables are found. 81 | - A tuple (False, None) if no interaction between variables is detected. 82 | """ 83 | regex = r"\$[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*" 84 | variables = list(set(re.findall(regex, php_line))) 85 | if len(variables) > 1: 86 | return (True, variables) 87 | else: 88 | return (False, None) 89 | 90 | def merge_dataflows(self): 91 | """ 92 | Merges the dataflows by grouping variables that interact with each other. 93 | Variables that appear together in dataflow analysis are merged into a single 94 | list to represent their relationship. 95 | """ 96 | list_of_lists = self.variables 97 | 98 | # Convert any single variables to lists to ensure consistent structure 99 | for i in range(len(list_of_lists)): 100 | if type(list_of_lists[i]) != list: 101 | list_of_lists[i] = [list_of_lists[i]] 102 | 103 | # Initialize an empty list to store merged sublists 104 | merged_lists = [] 105 | 106 | # Iterate through each sublist (representing variables that interact) 107 | for sublist in list_of_lists: 108 | merged_with_existing = False 109 | for merged_sublist in merged_lists: 110 | # If any variable in the current sublist exists in a merged sublist, 111 | # merge them by extending the merged list 112 | if any(var in merged_sublist for var in sublist): 113 | merged_sublist.extend(var for var in sublist if var not in merged_sublist) 114 | merged_with_existing = True 115 | break 116 | 117 | # If no merge occurred, add the current sublist as a new group 118 | if not merged_with_existing: 119 | merged_lists.append(sublist) 120 | 121 | # Update the variables with the merged dataflows 122 | self.variables = merged_lists 123 | 124 | def extract_dataflow(self): 125 | """ 126 | Extracts dataflows from the PHP code by analyzing each line of code. 127 | It identifies variables and their interactions, grouping them into dataflows. 128 | """ 129 | for eachline in self.phpcode.split('\n'): 130 | result, variables = self.analyze_php_line(eachline) 131 | if result: 132 | for each_var in variables: 133 | # If the variable is already in the list, remove and replace it 134 | if each_var in self.variables: 135 | self.variables.remove(each_var) 136 | # Add the new set of interacting variables 137 | self.variables.append(variables) 138 | 139 | # Merge dataflows to group interacting variables 140 | self.merge_dataflows() 141 | 142 | def analyze(self, phpcode): 143 | """ 144 | The main function to analyze a given PHP code for dataflows. 145 | It extracts variables and their interactions to produce a list 146 | of dataflows. 147 | 148 | Args: 149 | - phpcode: The PHP source code to analyze. 150 | 151 | Returns: 152 | - A list of merged dataflows, each representing groups of interacting variables. 153 | """ 154 | self.phpcode = phpcode 155 | self.clean() # Reset variables and dataflows 156 | self.extract_variables() # Extract variables from the code 157 | self.vars = [] 158 | for each in self.variables: 159 | self.vars.append(each) 160 | self.extract_dataflow() # Extract dataflows between variables 161 | return self.vars, self.variables 162 | 163 | def remove_php_comments(code): 164 | result = '' 165 | i = 0 166 | in_single_quote = False 167 | in_double_quote = False 168 | in_single_line_comment = False 169 | in_multi_line_comment = False 170 | escaped = False 171 | code_length = len(code) 172 | 173 | while i < code_length: 174 | c = code[i] 175 | next_c = code[i+1] if i+1 < code_length else '' 176 | 177 | # Handle string literals 178 | if in_single_quote: 179 | result += c 180 | if not escaped and c == '\\': 181 | escaped = True 182 | elif escaped: 183 | escaped = False 184 | elif c == "'": 185 | in_single_quote = False 186 | i += 1 187 | continue 188 | elif in_double_quote: 189 | result += c 190 | if not escaped and c == '\\': 191 | escaped = True 192 | elif escaped: 193 | escaped = False 194 | elif c == '"': 195 | in_double_quote = False 196 | i += 1 197 | continue 198 | 199 | # Handle comments 200 | if in_single_line_comment: 201 | if c == '\n': 202 | in_single_line_comment = False 203 | result += c 204 | i += 1 205 | continue 206 | elif in_multi_line_comment: 207 | if c == '*' and next_c == '/': 208 | in_multi_line_comment = False 209 | i += 2 210 | else: 211 | i += 1 212 | continue 213 | 214 | # Detect start of string literals 215 | if c == "'" and not in_double_quote: 216 | in_single_quote = True 217 | result += c 218 | i += 1 219 | continue 220 | elif c == '"' and not in_single_quote: 221 | in_double_quote = True 222 | result += c 223 | i += 1 224 | continue 225 | 226 | # Detect start of comments 227 | if c == '/' and next_c == '/': 228 | in_single_line_comment = True 229 | i += 2 230 | continue 231 | elif c == '/' and next_c == '*': 232 | in_multi_line_comment = True 233 | i += 2 234 | continue 235 | elif c == '#' and not in_single_quote and not in_double_quote: 236 | in_single_line_comment = True 237 | i += 1 238 | continue 239 | 240 | # Copy other characters 241 | result += c 242 | i += 1 243 | 244 | return result 245 | 246 | 247 | # Extract a section from a test case 248 | def extract_sec(test, section): 249 | if section not in test: 250 | return "" 251 | start_idx = test.find(section) + len(section) 252 | end_match = re.search("--([_A-Z]+)--", test[start_idx:]) 253 | end_idx = end_match.start() if end_match else len(test) - 1 254 | return test[start_idx:start_idx + end_idx].strip("\n") 255 | 256 | seeds = os.listdir("../phpt_seeds/") 257 | 258 | # Initialize the SQLite database 259 | conn = sqlite3.connect('seeds.db') 260 | cursor = conn.cursor() 261 | 262 | # Create the table if it doesn't exist 263 | cursor.execute(''' 264 | CREATE TABLE IF NOT EXISTS seeds ( 265 | id INTEGER PRIMARY KEY AUTOINCREMENT, 266 | phpcode TEXT, 267 | variable TEXT, 268 | dataflow TEXT, 269 | description TEXT, 270 | configuration TEXT, 271 | skipif TEXT, 272 | extension TEXT, 273 | secondary BOOL 274 | ) 275 | ''') 276 | 277 | count = 0 278 | print("dataflow pre-processing") 279 | for seed in seeds: 280 | count += 1 281 | f = open(f"../phpt_seeds/{seed}","r",encoding="iso_8859_1") 282 | phpt = f.read() 283 | f.close() 284 | # when fuse, such tests should be placed in the second place 285 | if "--EXPECTF--" in phpt or "declare(" in phpt or "namespace" in phpt: 286 | secondary = True 287 | else: 288 | secondary = False 289 | description = extract_sec(phpt, "--TEST--") 290 | configuration = extract_sec(phpt, "--INI--") 291 | skipif = extract_sec(phpt, "--SKIPIF--") 292 | phpcode = extract_sec(phpt, "--FILE--") 293 | extension = extract_sec(phpt, "--EXTENSION--") 294 | phpcode = remove_php_comments(phpcode) 295 | f = open(f"/tmp/tmp.php", "w", encoding="iso_8859_1") 296 | f.write(phpcode) 297 | f.close() 298 | dataflow = PHPFastDataflow() 299 | variables, dataflows = dataflow.analyze(phpcode) 300 | # this is for PHP-AST dataflow analysis 301 | # dataflows = get_php_dataflow_groups("/tmp/tmp.php") 302 | # variables = set() 303 | # for i in dataflows: 304 | # for j in i: 305 | # variables.add(j) 306 | # variables = list(variables) 307 | cursor.execute(''' 308 | INSERT INTO seeds (phpcode, variable, dataflow, description, configuration, skipif, extension, secondary) 309 | VALUES (?, ?, ?, ?, ?, ?, ?, ?) 310 | ''', (phpcode, str(variables), str(dataflows), description, configuration, skipif, extension, secondary)) 311 | 312 | # Commit the changes and close the connection 313 | conn.commit() 314 | conn.close() -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import time 4 | import datetime 5 | import shutil 6 | import threading 7 | from fuse import Fusion 8 | from mutator import Mutator 9 | 10 | # Class for handling PHP fuzzing process 11 | class PHPFuzz: 12 | 13 | def __init__(self): 14 | """ 15 | Initialize the PHPFuzz class with various configurations and settings. 16 | """ 17 | # Configurations for different fuzzing features 18 | self.mutation = True 19 | self.apifuzz = True 20 | self.ini = True 21 | self.fusion = True 22 | 23 | # Coverage feedback (off by default due to overhead) 24 | self.coverage = False 25 | self.test_root = "/home/phpfuzz/WorkSpace/flowfusion" 26 | self.php_root = f"{self.test_root}/php-src" 27 | self.fused = f"{self.php_root}/tests/fused" 28 | self.mutated = f"{self.php_root}/tests/mutated" 29 | self.bug_folder = f"{self.test_root}/bugs/" 30 | self.fixme_folder = f"{self.test_root}/fixme/" 31 | self.log_path = "/tmp/test.log" # Log path for test execution 32 | 33 | # Initialize necessary folders and files 34 | self.patch_run_test() 35 | self.backup_initials() 36 | self.check_target_exist() 37 | self.init_fused_folder() 38 | self.init_bug_folder() 39 | self.init_phpt_path() 40 | self.moveout_builtin_phpts() 41 | 42 | self.total_count = 1 43 | self.syntax_error_count = 0 44 | self.stopping_test_num = -1 # stop the fuzzer after executing this number of test cases, -1 means infinite 45 | 46 | # PHP may mess up folders 47 | def backup_initials(self): 48 | # TODO: we need a robust run-tests.php for fuzzing 49 | # update 07/01/2025: we just save one working version of run-tests.php 50 | # under the backup folder, and restore it everytime before fuzzloop 51 | # we dont backup the latest run-tests.php, it may have various updates 52 | # os.system(f"cp {self.php_root}/run-tests.php {self.test_root}/backup/") 53 | os.system(f"cp {self.php_root}/Makefile {self.test_root}/backup/") 54 | os.system(f"cp {self.php_root}/libtool {self.test_root}/backup/") 55 | 56 | # Patch the run-tests.php script to avoid conflicts 57 | def patch_run_test(self): 58 | os.chdir(self.php_root) 59 | os.system("sed -i 's/foreach (\$fileConflictsWith\[\$file\] as \$conflictKey) {/foreach (\$fileConflictsWith\[\$file\] as \$conflictKey) { continue;/g' ./run-tests.php") 60 | os.system("sed -i 's/proc_terminate(\$workerProcs\[\$i\]);/\/\/proc_terminate(\$workerProcs\[\$i\]);/' ./run-tests.php") 61 | os.system("sed -i 's/unset(\$workerProcs\[\$i\], \$workerSocks\[\$i\]);/\/\/unset(\$workerProcs\[\$i\], \$workerSocks\[\$i\]);/' ./run-tests.php") 62 | os.system("sed -i 's/foreach (\$test_files as \$i => \$file) {/foreach (\$test_files as \$i => \$file) { continue;/' ./run-tests.php") 63 | os.chdir(self.test_root) 64 | 65 | # Remove built-in PHPT files to avoid conflicts 66 | def moveout_builtin_phpts(self): 67 | os.system(f"find {self.php_root} -name '*.phpt' | xargs rm 2>/dev/null") 68 | 69 | # Initialize the path to PHPT files 70 | def init_phpt_path(self): 71 | os.system(f'find {self.test_root}/phpt_seeds/ -name "*.phpt" > {self.test_root}/testpaths') 72 | 73 | # Create the bug folder if it doesn't exist 74 | def init_bug_folder(self): 75 | if not os.path.exists(self.bug_folder): 76 | os.makedirs(self.bug_folder) 77 | if not os.path.exists(self.fixme_folder): 78 | os.makedirs(self.fixme_folder) 79 | 80 | # Check if the target PHP build exists 81 | def check_target_exist(self): 82 | if not os.path.exists(self.php_root): 83 | print(f"{self.php_root} not found..") 84 | exit(-1) 85 | 86 | # Clean and initialize the fused test folder 87 | def init_fused_folder(self): 88 | if not os.path.exists(self.fused): 89 | os.system(f"mkdir {self.fused}") 90 | 91 | # Check for dependencies in the phpt_deps folder 92 | dependency = f"{self.test_root}/phpt_deps" 93 | if not os.path.exists(dependency): 94 | print(f"{dependency} not found..") 95 | exit(-1) 96 | 97 | # Restore dependencies and initials 98 | os.system(f"cp -R {dependency}/* {self.fused}") 99 | os.system(f"cp {self.test_root}/backup/run-tests.php {self.php_root}/") 100 | os.system(f"cp {self.test_root}/backup/Makefile {self.php_root}/") 101 | os.system(f"cp {self.test_root}/backup/libtool {self.php_root}/") 102 | os.system(f"cd {self.php_root}/tests/fused/ && find . -type d -empty -exec touch {{}}/.gitkeep \;") 103 | os.system(f"cd {self.php_root} && git add ./tests/fused/ && git add -f ./tests/fused/* && git config --global user.email '0599jiangyc@gmail.com' && git config --global user.name 'fuzzsave' && git commit -m 'fuzzsave'") 104 | print("fused inited! git status saved!") 105 | 106 | # Check if the PHP build exists 107 | def check_build(self): 108 | return os.path.exists(f"{self.php_root}/sapi/cli/php") 109 | 110 | # Parse the test log for failed tests and possible bugs 111 | def parse_log(self): 112 | known_crash_sites = ["leak"] 113 | 114 | with open(self.log_path, "r") as f: 115 | logs = f.read().strip("\n").split("\n") 116 | 117 | next_log_id = len(os.listdir(self.bug_folder)) + 1 118 | fixme_log_id = len(os.listdir(self.fixme_folder)) + 1 119 | for eachlog in logs: 120 | # we only care failed fusion tests 121 | if "FAIL" not in eachlog or "tests/fused" not in eachlog: 122 | continue 123 | casepath = self.php_root + "/" + eachlog.split("[")[-1].split("]")[0].replace(".phpt", "") 124 | stdouterr = f"{casepath}.out" 125 | if not os.path.exists(stdouterr): 126 | continue 127 | with open(stdouterr, "r", encoding="iso_8859_1") as f: 128 | content = f.read() 129 | self.total_count += 1 130 | if "Parse error" in content: 131 | self.syntax_error_count += 1 132 | if "leaked in" in content: 133 | # be default, memory leak is ignored 134 | continue 135 | if "Sanitizer" in content or "(core dumped)" in content: 136 | os.makedirs(f"{self.bug_folder}/{next_log_id}") 137 | shutil.move(f"{casepath}.out", f"{self.bug_folder}/{next_log_id}/test.out") 138 | shutil.move(f"{casepath}.php", f"{self.bug_folder}/{next_log_id}/test.php") 139 | shutil.move(f"{casepath}.phpt", f"{self.bug_folder}/{next_log_id}/test.phpt") 140 | shutil.move(f"{casepath}.sh", f"{self.bug_folder}/{next_log_id}/test.sh") 141 | next_log_id += 1 142 | if "Parse error: syntax error" in content and False: # only for debugging 143 | os.makedirs(f"{self.fixme_folder}/{fixme_log_id}") 144 | shutil.move(f"{casepath}.out", f"{self.fixme_folder}/{fixme_log_id}/test.out") 145 | shutil.move(f"{casepath}.php", f"{self.fixme_folder}/{fixme_log_id}/test.php") 146 | shutil.move(f"{casepath}.phpt", f"{self.fixme_folder}/{fixme_log_id}/test.phpt") 147 | shutil.move(f"{casepath}.sh", f"{self.fixme_folder}/{fixme_log_id}/test.sh") 148 | fixme_log_id += 1 149 | 150 | # Clean up test artifacts like logs and output files 151 | def clean(self): 152 | os.system(f"find {self.fused} -type f -name '*.log' -o -name '*.out' -o -name '*.diff' -o -name '*.sh' -o -name '*.php' -o -name '*.phpt' | xargs rm 2>/dev/null") 153 | 154 | # Collect coverage information at regular intervals 155 | def collect_cov(self, fuzztime): 156 | def run_coverage_collection(): 157 | #os.system("python3 bot.py") 158 | os.chdir(self.php_root) 159 | cmd = f"gcovr -sr . -o /tmp/gcovr-{fuzztime}.xml --xml --exclude-directories 'ext/date/lib$$' -e 'ext/bcmath/libbcmath/.*' -e 'ext/date/lib/.*' -e 'ext/fileinfo/libmagic/.*' -e 'ext/gd/libgd/.*' -e 'ext/hash/sha3/.*' -e 'ext/mbstring/libmbfl/.*' -e 'ext/pcre/pcre2lib/.*' > /dev/null" 160 | os.system(cmd) 161 | os.chdir(self.test_root) 162 | with open(f"/tmp/gcovr-{fuzztime}.xml", "r") as f: 163 | x = f.read() 164 | self.coverage = float(x.split('line-rate="')[1].split('"')[0]) 165 | print(f"Coverage: {self.coverage:.2%}") 166 | 167 | # Create a new thread for running coverage collection 168 | coverage_thread = threading.Thread(target=run_coverage_collection) 169 | coverage_thread.start() 170 | 171 | # Display runtime logs with current progress 172 | def runtime_log(self, seconds, rounds): 173 | bugs_found = len(os.listdir(f"{self.test_root}/bugs/")) 174 | syntax_correct_rate = float((self.total_count-self.syntax_error_count)/self.total_count) 175 | print(f"\ntime: {int(seconds)} seconds | bugs found: {bugs_found} | tests executed : {self.total_count} | syntax correct rate: {syntax_correct_rate:.2%} | throughput: {self.total_count/seconds} tests per second\n") 176 | if self.coverage != 0: 177 | print(f"line code coverage : {self.coverage:.2%}") 178 | if self.stopping_test_num>0 and self.total_count > self.stopping_test_num: 179 | print("stopped") 180 | exit(0) 181 | 182 | # Main function to execute the fuzzing process 183 | def main(self): 184 | if not self.check_build(): 185 | print("php not build") 186 | exit() 187 | 188 | count = 0 189 | start = time.time() 190 | covtime = 60*60 # Interval for counting coverage (in seconds) 191 | fuzztime = 0 192 | self.coverage = 0 193 | 194 | fusion_thread = None 195 | 196 | print("Start flowfusion...") 197 | while True: 198 | count += 1 199 | # we often need to clean the folder... :( 200 | if count % 10 == 0: 201 | # clean the test folder 202 | os.system(f"cd {self.test_root} && git clean -fd -e php-src -e phpt_deps -e phpt_seeds -e knowledges -e backup -e fixme -e bugs -e testpaths") 203 | os.system(f"cp {self.test_root}/backup/run-tests.php {self.php_root}/") 204 | os.system(f"cp {self.test_root}/backup/Makefile {self.php_root}/") 205 | os.system(f"cp {self.test_root}/backup/libtool {self.php_root}/") 206 | self.clean() 207 | 208 | # Run the fusion process in a separate thread 209 | if self.fusion: 210 | if fusion_thread is None or not fusion_thread.is_alive(): 211 | phpFusion = Fusion(self.test_root, self.php_root, self.apifuzz, self.ini, self.mutation) 212 | fusion_thread = threading.Thread(target=phpFusion.main) 213 | fusion_thread.start() 214 | 215 | # Run tests and parse logs 216 | os.system(f"mv /tmp/fused*.phpt {self.php_root}/tests/fused/") # load fused tests 217 | 218 | # TODO: 219 | # Note: by default 32 parallel fuzzing, however, it is not stable due to run-tests.php :( 220 | 221 | os.chdir(self.php_root) 222 | os.system('timeout 30 make test TEST_PHP_ARGS="-j32 --set-timeout 5" 2>/dev/null | grep "FAIL" > /tmp/test.log') 223 | os.chdir(self.test_root) 224 | os.system(f"chmod -R 777 {self.test_root} 2>/dev/null") 225 | os.system("kill -9 `ps aux | grep \"/home/phpfuzz/WorkSpace/flowfusion/php-src/sapi/cli/php\" | grep -v grep | awk '{print $2}'` > /dev/null 2>&1") 226 | os.system("kill -9 `ps aux | grep \"/home/phpfuzz/WorkSpace/flowfusion/php-src/sapi/phpdbg/phpdbg\" | grep -v grep | awk '{print $2}'` > /dev/null 2>&1") 227 | self.parse_log() 228 | 229 | # clean 230 | os.system(f"cd {self.php_root} && git clean -fd > /dev/null") 231 | 232 | # Collect coverage periodically 233 | end = time.time() 234 | timelen = end - start 235 | if timelen > covtime + fuzztime: 236 | fuzztime += covtime 237 | self.collect_cov(fuzztime) 238 | 239 | # Log runtime information 240 | self.runtime_log(timelen, count) 241 | 242 | # Initialize and run the fuzzing process 243 | fuzz = PHPFuzz() 244 | fuzz.main() 245 | -------------------------------------------------------------------------------- /fuse.py: -------------------------------------------------------------------------------- 1 | # Import necessary libraries and modules 2 | import sqlite3 3 | from random import randint, choice, random 4 | import re 5 | import subprocess 6 | import os 7 | from dataflow import PHPFastDataflow 8 | from mutator import Mutator 9 | 10 | # Set to True to use simple concatenation as a baseline 11 | ConcatBaseline = False 12 | 13 | # Replace a random occurrence of a substring in a string 14 | def replace_random_occurrence(s, old, new): 15 | # Find all positions of the substring 'old' 16 | positions = [] 17 | start = 0 18 | while True: 19 | start = s.find(old, start) 20 | if start == -1: 21 | break 22 | positions.append(start) 23 | start += len(old) 24 | 25 | # If no occurrences found, return the original string 26 | if not positions: 27 | return s 28 | 29 | # Select a random position to replace the substring 30 | random_pos = choice(positions) 31 | return s[:random_pos] + new + s[random_pos + len(old):] 32 | 33 | # Fusion class for handling test file fusion and mutation 34 | class Fusion(): 35 | # PHP code to fuse internal variables using random internal functions 36 | apifuzz_func = "" 37 | 38 | # Class attributes 39 | fuse_count = 0 # Number of fused test cases 40 | 41 | # Initialize with paths and configuration options 42 | def __init__(self, test_root, php_root, apifuzz, ini, mutation): 43 | self.test_root = test_root 44 | self.php_root = php_root 45 | self.apifuzz = apifuzz # Whether to fuzz internal interfaces 46 | self.ini = ini # Whether to fuzz execution environments (JIT, etc.) 47 | self.mutation = mutation # Whether to mutate the original test case 48 | self.mut = Mutator() 49 | 50 | # Randomly generate a JIT mode configuration for opcache 51 | def random_jit_mode(self): 52 | # TODO: shall we fuzz all these jit modes? 53 | # jit_mode = choice(['1111','1215','1211','1213','1254','1255','1201','1202','1205','1101','1103','1105','1231','1235','1011','1015']) 54 | jit_mode = choice(['1254','1205']) 55 | jit_ini = ''' 56 | opcache.enable=1 57 | opcache.enable_cli=1 58 | opcache.jit=''' + jit_mode + '\n' 59 | return jit_ini 60 | 61 | # TODO: fuzz the configurations 62 | def get_random_config(self): 63 | config_options = { 64 | "precision": choice([10, 12, 13, 14, 17]), 65 | "serialize_precision": choice([5, 10, 14, 15, 75, -1]), 66 | "memory_limit": choice(["2M", "33M", "16M", "20M", "32M", "100M", "256M", "512M", "5M", "8M", "128M", "6G", "-1"]), 67 | "post_max_size": choice(["1", "1M", "1024"]), 68 | "max_input_vars": choice([1, 4, 5, 10, 100, 1000]), 69 | "max_execution_time": choice([0, 1, 2, 10, 12, 60]), 70 | "default_charset": choice(["cp932", "big5", "ISO-8859-1", "UTF-8", "", "cp874", "cp936", "cp1251", "cp1252", "cp1253", "cp1254", "cp1255", "cp1256"]), 71 | "short_open_tag": choice(["on", "off", 1]), 72 | "auto_globals_jit": choice([0, 1]), 73 | "expose_php": choice([0, "On"]), 74 | "implicit_flush": choice([0, 1]), 75 | "allow_url_include": choice([0, 1]), 76 | 77 | # Timezone settings 78 | "date.timezone": choice([ 79 | "Europe/London", "UTC", "Atlantic/Azores", "GMT", "America/Los_Angeles", "Asia/Singapore", 80 | "Asia/Chongqing", "Europe/Amsterdam", "Europe/Berlin", "Europe/Paris", "America/New_York", 81 | "America/Montreal", "America/Sao_Paulo", "America/Vancouver", "America/Mendoza", "Europe/Rome", 82 | "GMT0", "Mars/Utopia_Planitia", "Incorrect/Zone" 83 | ]), 84 | 85 | # Opcache settings 86 | "opcache.enable": choice([0, 1]), 87 | "opcache.enable_cli": choice([0, 1]), 88 | "opcache.preload": "{PWD}/" + choice([ 89 | "preload_undef_const_2.inc", "preload_variance_ind.inc", "preload_inheritance_error_ind.inc", 90 | "preload_ind.inc", "preload_bug81256.inc", "preload_user.inc" 91 | ]), 92 | "opcache.jit": choice([0, 1205, 1235, 1255]), 93 | "opcache.jit_buffer_size": choice(["1M", "128M", "0"]), 94 | "opcache.jit_blacklist_root_trace": choice(["16", "255"]), 95 | "opcache.jit_blacklist_side_trace": choice(["8", "255"]), 96 | "opcache.jit_max_loop_unrolls": choice(["8", "10"]), 97 | "opcache.jit_max_recursive_calls": choice(["2", "10"]), 98 | "opcache.jit_max_recursive_returns": choice(["2", "4"]), 99 | "opcache.jit_max_polymorphic_calls": choice(["2", "1000"]), 100 | "opcache.file_update_protection": choice([0, 2]), 101 | "opcache.optimization_level": choice([-1, 0, 0x7fffffff, 0x4ff, 0x7FFFBFFF]), 102 | "opcache.memory_consumption": choice([7, 64]), 103 | "opcache.max_accelerated_files": choice([10, 1000000]), 104 | "opcache.revalidate_freq": choice([0, 60]), 105 | "opcache.validate_timestamps": choice([0, 1]), 106 | "opcache.interned_strings_buffer": choice([-1, 16, 131072]), 107 | 108 | # Session settings 109 | "session.save_handler": choice(["files", "non-existent", "qwerty"]), 110 | "session.auto_start": choice([0, 1]), 111 | "session.use_cookies": choice([0, 1]), 112 | "session.cookie_httponly": choice([0, "TRUE"]), 113 | "session.cookie_secure": choice([0, "TRUE"]), 114 | "session.use_strict_mode": choice([0, 1]), 115 | "session.use_trans_sid": choice([0, 1]), 116 | "session.gc_maxlifetime": choice([300, 0]), 117 | "session.upload_progress.enabled": choice([0, 1]), 118 | "session.gc_probability": choice([0, 1]), 119 | "session.sid_length": choice([32]), 120 | 121 | # Error reporting settings 122 | "error_reporting": choice([0, -1, 1, 8191, 14335, 2039, 2047, "E_ALL", "E_ALL^E_NOTICE", "E_ALL & ~E_DEPRECATED", "E_ALL & ~E_WARNING & ~E_NOTICE", "E_ALL & ~E_WARNING", "E_ALL & ~E_DEPRECATED", "E_ALL & E_NOTICE | E_PARSE ^ E_DEPRECATED & ~E_WARNING | !E_ERROR"]), 123 | 124 | # Mail settings 125 | "sendmail_path": "{MAIL:{PWD}/" + choice([ 126 | "mb_send_mail04.eml", "mailBasic7.out", "gh8086.eml", "mb_send_mail03.eml", "gh7902.eml" 127 | ]) + "}" 128 | } 129 | 130 | # Randomly select one key-value pair from the config options 131 | random_key = choice(list(config_options.keys())) 132 | return f"{random_key}={config_options[random_key]}" 133 | 134 | # Randomly generate INI settings with possible JIT configuration 135 | def random_inis(self): 136 | if self.ini==False: 137 | return "" 138 | inis = self.get_random_config() + '\n' 139 | if choice([True, False, False, False]): 140 | inis += self.random_jit_mode() 141 | return inis 142 | 143 | # Fuse two test cases by interleaving their dataflows 144 | def _fuse_dataflow_interleave(self, test1, test2, dataflow1, dataflow2): 145 | if not dataflow1 or not dataflow2: 146 | return test1, test2 147 | 148 | # we can mix our random class variables with the rest code context 149 | # NOTE: $clsAttr can be non-exist 150 | dataflow1 += [["$cls","$clsAttr"]]; 151 | 152 | if choice([True, False]): 153 | test1_flow = choice(choice(dataflow1)) 154 | test2_flow = choice(choice(dataflow2)) 155 | test1 += f"\n$fusion = {test1_flow};\n" 156 | test2 = replace_random_occurrence(test2, test2_flow, "$fusion") 157 | return test1, test2 158 | 159 | # Identify the longest dataflows for interleaving 160 | max_dataflow_1 = 0 161 | max_dataflow_1_len = 0 162 | max_dataflow_2 = 0 163 | max_dataflow_2_len = 0 164 | 165 | for each_dataflow in dataflow1: 166 | if len(each_dataflow)>max_dataflow_1_len: 167 | max_dataflow_1_len = len(each_dataflow) 168 | max_dataflow_1 = each_dataflow 169 | 170 | for each_dataflow in dataflow2: 171 | if len(each_dataflow)>max_dataflow_2_len: 172 | max_dataflow_2_len = len(each_dataflow) 173 | max_dataflow_2 = each_dataflow 174 | 175 | test1_flow = choice(max_dataflow_1) 176 | test2_flow = choice(max_dataflow_2) 177 | 178 | # step 1: keeping the max dataflow from test 1 179 | test1 += f"\n$fusion = {test1_flow};\n" 180 | 181 | # step 2: interleave the max dataflow in test2 182 | test2 = replace_random_occurrence(test2, test2_flow, "$fusion") 183 | 184 | return test1, test2 185 | 186 | # return function_name, params 187 | def select_random_function(self): 188 | function_name, param_num = choice(self.apis) 189 | return function_name, param_num 190 | 191 | # fuzz the api 192 | # defined_vars: the defined vars in the php program to be the arguments of apis 193 | def _instrumentation_apifuzz(self, defined_vars): 194 | _instruments = [] 195 | func, param_num = self.select_random_function() 196 | # we try 10 times to randomly fuzz the arguments 197 | for i in range(10): 198 | args = [] 199 | for x in range(param_num): 200 | args.append(choice(defined_vars)) 201 | _instrument = f"{func}({','.join(args)});" 202 | _instrument = "try {"+_instrument+"} catch (Exception $e) { echo($e); }" 203 | _instruments.append(_instrument) 204 | return '\n'+'\n'.join(_instruments)+'\n' 205 | 206 | def _instrumentation_classfuzz(self, defined_vars): 207 | _pre_instrument = [] 208 | _after_instrument = [] 209 | 210 | # Connect to the SQLite database 211 | conn = sqlite3.connect(f'{self.test_root}/knowledges/class.db') 212 | cursor = conn.cursor() 213 | 214 | # Select a random class 215 | cursor.execute('SELECT id, class_name FROM classes ORDER BY RANDOM() LIMIT 1') 216 | class_row = cursor.fetchone() 217 | 218 | if class_row: 219 | class_id, class_name = class_row 220 | _pre_instrument.append(f"\n$cls = new {class_name}();\n") 221 | 222 | # Select a random attribute for the selected class 223 | cursor.execute('SELECT name FROM attributes WHERE class_id = ? ORDER BY RANDOM() LIMIT 1', (class_id,)) 224 | attr_row = cursor.fetchone() 225 | if attr_row: 226 | attr_name = attr_row[0] 227 | _pre_instrument.append(f"\n$clsAttr=$cls.{attr_name};\n") 228 | 229 | # Select a random method for the selected class 230 | cursor.execute('SELECT name, params_count FROM methods WHERE class_id = ? ORDER BY RANDOM() LIMIT 1', (class_id,)) 231 | method_row = cursor.fetchone() 232 | if method_row: 233 | method_name, params_count = method_row 234 | # we try 10 times to randomly fuzz the arguments 235 | for i in range(10): 236 | args = [] 237 | for x in range(params_count): 238 | args.append(choice(defined_vars)) 239 | _instrument = f"$cls->{method_name}({','.join(args)});" 240 | _instrument = "try {"+_instrument+"} catch (Exception $e) { echo($e); }" 241 | _after_instrument.append(_instrument) 242 | _after_instrument = '\n'+'\n'.join(_after_instrument)+'\n' 243 | else: 244 | _after_instrument = "" 245 | else: 246 | print('No classes found in the database.') 247 | exit() 248 | 249 | # Close the database connection 250 | conn.close() 251 | 252 | _pre_instrument = '\n'+'\n'.join(_pre_instrument)+'\n' 253 | 254 | return _pre_instrument, _after_instrument 255 | 256 | # Clean up the PHP code by removing unnecessary headers and footers 257 | def clean_php_header_tail(self, phpcode): 258 | phpcode = phpcode.strip().strip('\n').strip("===DONE===").strip("==DONE==").strip("Done") 259 | if phpcode.startswith(''): 262 | phpcode = phpcode[:-len('?>')].rstrip() 263 | phpcode = phpcode.strip("") 264 | return '\n' + phpcode + '\n' 265 | 266 | # Read file content 267 | def read_file(self, filepath): 268 | with open(filepath, "r", encoding="iso-8859-1") as f: 269 | return f.read() 270 | 271 | # Write content to a file 272 | def write_file(self, filepath, content): 273 | with open(filepath, "w") as f: 274 | f.write(content) 275 | 276 | # Ad-hoc patches for known syntax errors after fusion 277 | def adhoc_syntax_patch(self, phpt): 278 | phpt = phpt.replace('echo "Done"\n', 'echo "Done";\n') 279 | return phpt 280 | 281 | # Fuse two test cases while handling different sections 282 | def fuse(self): 283 | phpcode1, variable1, dataflow1, description1, configuration1, skipif1, extension1 = self.select_random_seed() 284 | phpcode2, variable2, dataflow2, description2, configuration2, skipif2, extension2 = self.select_random_seed() 285 | 286 | phpcode1 = self.mut.mutate(phpcode1) 287 | phpcode2 = self.mut.mutate(phpcode2) 288 | 289 | fused_description = f"--TEST--\n{description1} + {description2}\n" 290 | fused_configurations = f"\n--INI--\n{configuration1}\n{configuration2}\n{self.random_inis()}\n" 291 | 292 | fused_skipif = "" 293 | 294 | # if skipif1!="" or skipif2!="": 295 | # fused_skipif = f"\n--SKIPIF--\n{skipif1}\n{skipif2.strip('