├── mole ├── __init__.py ├── common │ ├── __init__.py │ ├── helper │ │ ├── __init__.py │ │ ├── variable.py │ │ └── symbol.py │ ├── task.py │ ├── parse.py │ └── log.py ├── core │ ├── __init__.py │ └── ai.py ├── views │ ├── __init__.py │ └── sidebar.py ├── controllers │ ├── __init__.py │ └── ai.py ├── services │ └── __init__.py ├── resources │ └── icon.png ├── grouping │ ├── source_sink.py │ ├── call_graph.py │ └── __init__.py ├── models │ ├── __init__.py │ └── config.py ├── conf │ └── 001-settings.yml └── cli │ └── main.py ├── tests ├── __init__.py ├── slicing │ ├── __init__.py │ ├── test_serialization.py │ ├── test_pointer_deref.py │ ├── test_multithreading.py │ ├── test_mangling.py │ ├── test_function_out_params.py │ ├── test_simple_server.py │ ├── test_various.py │ ├── test_pointer.py │ ├── test_function_calling.py │ ├── test_object_oriented.py │ └── conftest.py ├── data │ ├── src │ │ ├── load-01.c │ │ ├── load-02.c │ │ ├── gets-01.c │ │ ├── name_mangling-03.cpp │ │ ├── function_calling-09.c │ │ ├── load-04.c │ │ ├── memcpy-11.c │ │ ├── pointer_analysis-11.c │ │ ├── name_mangling-04.cpp │ │ ├── pointer_analysis-14.c │ │ ├── gets-02.c │ │ ├── pointer_analysis-16.c │ │ ├── load-03.c │ │ ├── load-05.c │ │ ├── sscanf-01.c │ │ ├── function_out_params-06.c │ │ ├── pointer_analysis-01.c │ │ ├── pointer_analysis-07.c │ │ ├── memcpy-02.c │ │ ├── memcpy-03.c │ │ ├── memcpy-01.c │ │ ├── function_out_params-02.c │ │ ├── function_out_params-04.c │ │ ├── memcpy-06.c │ │ ├── pointer_analysis-02.c │ │ ├── memcpy-09.c │ │ ├── function_calling-07.c │ │ ├── pointer_analysis-17.c │ │ ├── function_calling-05.c │ │ ├── function_out_params-07.c │ │ ├── pointer_analysis-06.c │ │ ├── name_mangling-02.cpp │ │ ├── function_calling-08.c │ │ ├── function_calling-06.c │ │ ├── memcpy-08.c │ │ ├── name_mangling-01.cpp │ │ ├── function_calling-13.c │ │ ├── pointer_analysis-08.c │ │ ├── pointer_analysis-05.c │ │ ├── function_out_params-03.c │ │ ├── function_out_params-01.c │ │ ├── pointer_analysis-03.c │ │ ├── memcpy-04.c │ │ ├── function_out_params-05.c │ │ ├── function_out_params-08.c │ │ ├── pointer_analysis-09.c │ │ ├── memcpy-10.c │ │ ├── name_mangling-05.cpp │ │ ├── pointer_analysis-10.c │ │ ├── function_calling-11.c │ │ ├── memcpy-07.c │ │ ├── memcpy-05.c │ │ ├── name_mangling-06.cpp │ │ ├── pointer_analysis-04.c │ │ ├── function_calling-10.c │ │ ├── function_calling-14.c │ │ ├── function_calling-01.c │ │ ├── function_calling-12.c │ │ ├── function_calling-15.c │ │ ├── function_calling-02.c │ │ ├── pointer_analysis-15.c │ │ ├── function_calling-03.c │ │ ├── pointer_analysis-12.c │ │ ├── function_calling-04.c │ │ ├── object_oriented-01.cpp │ │ ├── object_oriented-02.cpp │ │ ├── pointer_analysis-13.c │ │ ├── object_oriented-03.cpp │ │ ├── simple_http_server-01.c │ │ ├── simple_http_server-02.c │ │ ├── simple_http_server-04.c │ │ └── simple_http_server-03.c │ └── Makefile ├── README.md └── test_data.py ├── development ├── debug.py ├── update_dependencies.py └── update_description.py ├── requirements.txt ├── .pre-commit-config.yaml ├── .gitignore ├── docs ├── 03-Customization.md ├── 01-Installation.md └── 04-Pointers.md ├── .github └── workflows │ └── release.yml ├── __init__.py ├── pyproject.toml ├── plugin.json └── README.md /mole/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mole/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mole/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mole/views/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mole/controllers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mole/services/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/slicing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mole/common/helper/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mole/resources/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cyber-defence-campus/mole/HEAD/mole/resources/icon.png -------------------------------------------------------------------------------- /development/debug.py: -------------------------------------------------------------------------------- 1 | from binaryninja import connect_vscode_debugger 2 | 3 | 4 | connect_vscode_debugger(port=5678) 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | PyYAML==6.0.2 2 | ijson==3.3.0 3 | lark==1.2.2 4 | networkx[default]==3.4.2 5 | openai >= 1.78.1 , <2.0.0 6 | pydantic>=2.0.0 7 | termcolor==2.4.0 8 | -------------------------------------------------------------------------------- /tests/data/src/load-01.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - MLIL_LOAD with HLIL constant pointer dereferencing source 8 | */ 9 | 10 | char* cmd; 11 | 12 | __attribute__ ((noinline, optimize("O0"))) 13 | int main(int argc, char *argv[]) { 14 | cmd = getenv("CMD"); 15 | return system(cmd); 16 | } -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: local 3 | hooks: 4 | - id: check 5 | name: Check Code Issues 6 | entry: ruff check 7 | args: [ -q ] 8 | language: python 9 | files: .*\.py$ 10 | - repo: local 11 | hooks: 12 | - id: format 13 | name: Format Code 14 | entry: ruff format 15 | language: python 16 | files: .*\.py$ -------------------------------------------------------------------------------- /tests/data/src/load-02.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - MLIL_LOAD with HLIL variable dereferencing source 8 | */ 9 | 10 | __attribute__ ((noinline, optimize("O0"))) 11 | int main(int argc, char *argv[]) { 12 | char** my_array = (char**) malloc(1 * sizeof(char*)); 13 | my_array[0] = getenv("CMD"); 14 | return system(my_array[0]); 15 | } -------------------------------------------------------------------------------- /tests/data/src/gets-01.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define BUF_LEN 16 5 | 6 | /* 7 | Testcase Description: 8 | - gets 9 | */ 10 | 11 | char *gets(char *s); 12 | 13 | int main(int argc, char *argv[]) { 14 | char s[BUF_LEN]; 15 | 16 | if(gets(s) == NULL) { 17 | fprintf(stderr, "Could not read from STDIN.\n"); 18 | return EXIT_FAILURE; 19 | } 20 | 21 | fprintf(stdout, "s: '%s'\n", s); 22 | return EXIT_SUCCESS; 23 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-Compiled / Optimized 2 | __pycache__/ 3 | 4 | # Distribution / Packaging 5 | build/ 6 | dist/ 7 | *.egg-info/ 8 | .gradle 9 | 10 | # Environments 11 | .venv/ 12 | venv/ 13 | 14 | # Linting / Formatting 15 | .ruff_cache 16 | 17 | # Visual Studio Code 18 | .vscode 19 | 20 | # Custom YML Files 21 | mole/conf/*.yml 22 | mole/conf/*.yml.bak 23 | !mole/conf/001-settings.yml 24 | !mole/conf/003-libc.yml 25 | 26 | # Test assets 27 | tests/data/bin/ 28 | .coverage 29 | -------------------------------------------------------------------------------- /tests/data/src/name_mangling-03.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* 4 | Testcase Description: 5 | - Function in a namespace (C++ name mangling) 6 | */ 7 | 8 | namespace ns 9 | { 10 | __attribute__ ((noinline, optimize("O0"))) 11 | int my_func(char *cmd) { 12 | return system(cmd); 13 | } 14 | } 15 | 16 | int main(int argc, char *argv[]) { 17 | char *cmd = getenv("CMD"); 18 | if(cmd != NULL) { 19 | ns::my_func(cmd); 20 | } 21 | return EXIT_SUCCESS; 22 | } -------------------------------------------------------------------------------- /tests/data/src/function_calling-09.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - Call function twice 8 | */ 9 | 10 | __attribute__ ((noinline)) 11 | char* func(char* env) { 12 | return env; 13 | } 14 | 15 | __attribute__((optimize("O0"))) 16 | int main(int argc, char *argv[]) { 17 | char *env_cmd, *cmd; 18 | env_cmd = getenv("ENV_CMD"); 19 | cmd = func(env_cmd); 20 | cmd = func(cmd); 21 | return system(cmd); 22 | } -------------------------------------------------------------------------------- /tests/data/src/load-04.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - MLIL_LOAD with HLIL array indexing source 8 | */ 9 | 10 | #define BUF_SIZE 32 11 | 12 | __attribute__ ((noinline, optimize("O1"))) 13 | int main(int argc, char *argv[]) { 14 | char cmd[BUF_SIZE]; 15 | argv[1] = getenv("FILE"); 16 | argv[2] = getenv("TERM"); 17 | snprintf(cmd, sizeof(cmd), "grep %s %s", argv[1], argv[2]); 18 | return system(cmd); 19 | } -------------------------------------------------------------------------------- /tests/data/src/memcpy-11.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - uncontrollable 8 | */ 9 | 10 | int main(int argc, char *argv[]) { 11 | size_t dest_size = 16; 12 | char dest[dest_size]; 13 | char* env_src = getenv("MEMCPY_SRC"); 14 | if(env_src != NULL) { 15 | memcpy(dest, "IF", 3); 16 | 17 | } else { 18 | memcpy(dest, "ELSE", 5); 19 | } 20 | fprintf(stdout, "dest: '%s'\n", dest); 21 | return EXIT_SUCCESS; 22 | } -------------------------------------------------------------------------------- /tests/data/src/pointer_analysis-11.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define CMD_LEN 64 5 | 6 | __attribute__ ((noinline, optimize("O0"))) 7 | char* validate(char* cmd) { 8 | if(cmd == NULL) cmd = ""; 9 | return cmd; 10 | } 11 | 12 | __attribute__ ((noinline)) 13 | int execute(char* cmd) { 14 | cmd = validate(cmd); 15 | return system(cmd); 16 | } 17 | 18 | int main(int argc, char *argv[]) { 19 | char cmd[CMD_LEN]; 20 | char* env = getenv("CMD"); 21 | snprintf(cmd, CMD_LEN, "%s", env); 22 | return execute(cmd); 23 | } -------------------------------------------------------------------------------- /tests/data/src/name_mangling-04.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* 4 | Testcase Description: 5 | - Template instantiation (C++ name mangling) 6 | */ 7 | 8 | template 9 | __attribute__ ((noinline, optimize("O0"))) 10 | int my_func(char *cmd, T dummy) { 11 | if(dummy) { 12 | printf("[+] template function called with cmd='%s'\n", cmd); 13 | } 14 | return system(cmd); 15 | } 16 | 17 | int main(int argc, char *argv[]) { 18 | char *cmd = getenv("CMD"); 19 | if(cmd != NULL) { 20 | my_func(cmd, 1); 21 | } 22 | return EXIT_SUCCESS; 23 | } -------------------------------------------------------------------------------- /tests/data/src/pointer_analysis-14.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define CMD_LEN 64 6 | 7 | /* 8 | Testcase Description: 9 | - pointer analysis 10 | - system with user-controllabel command 11 | */ 12 | 13 | int main() { 14 | char cmd[CMD_LEN]; 15 | char *env_cmd = getenv("CMD"); 16 | if(env_cmd == NULL) { 17 | printf("CMD not set.\n"); 18 | return EXIT_FAILURE; 19 | } 20 | snprintf(cmd, CMD_LEN, "%s", env_cmd); 21 | int res = system(cmd); 22 | printf("CMD: %s\n", cmd); 23 | return res; 24 | } -------------------------------------------------------------------------------- /tests/data/src/gets-02.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define BUF_LEN 16 6 | 7 | /* 8 | Testcase Description: 9 | - gets with memcpy 10 | */ 11 | 12 | char *gets(char *s); 13 | 14 | int main(int argc, char *argv[]) { 15 | char dest[BUF_LEN]; 16 | char src[BUF_LEN]; 17 | 18 | if(gets(src) == NULL) { 19 | fprintf(stderr, "Could not read from STDIN.\n"); 20 | return EXIT_FAILURE; 21 | } 22 | 23 | memcpy(dest, src, BUF_LEN); 24 | fprintf(stdout, "dest: '%s'\n", dest); 25 | return EXIT_SUCCESS; 26 | } -------------------------------------------------------------------------------- /tests/data/src/pointer_analysis-16.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | # define BUF_SIZE 64 5 | 6 | /* 7 | Testcase Description: 8 | - Array element 9 | */ 10 | 11 | __attribute__ ((noinline, optimize("O0"))) 12 | int main(int argc, char * argv[]) 13 | { 14 | char data_buf[BUF_SIZE] = "ls"; 15 | char* data = &data_buf[0]; 16 | char* env_cmd = getenv("CMD"); 17 | int data_len = strlen(data); 18 | if (env_cmd != NULL) 19 | { 20 | strncat(data+data_len, env_cmd, BUF_SIZE-data_len-1); 21 | } 22 | system(data); 23 | return 0; 24 | } -------------------------------------------------------------------------------- /tests/data/src/load-03.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - MLIL_LOAD with HLIL variable+offset dereferencing source 8 | */ 9 | 10 | #define BUF_SIZE 32 11 | 12 | __attribute__ ((noinline, optimize("O0"))) 13 | int main(int argc, char *argv[]) { 14 | char cmd[BUF_SIZE]; 15 | char** my_array = (char**) malloc(3 * sizeof(char*)); 16 | my_array[1] = getenv("FILE"); 17 | my_array[2] = getenv("TERM"); 18 | snprintf(cmd, sizeof(cmd), "grep %s %s", my_array[1], my_array[2]); 19 | return system(cmd); 20 | } -------------------------------------------------------------------------------- /tests/data/src/load-05.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - MLIL_LOAD_STRUCT with HLIL field dereferencing source 8 | */ 9 | 10 | #define BUF_SIZE 32 11 | 12 | char dest[BUF_SIZE]; 13 | 14 | typedef struct { 15 | char* src; 16 | int size; 17 | } MyStruct; 18 | 19 | __attribute__ ((noinline, optimize("O0"))) 20 | int main(int argc, char *argv[]) { 21 | MyStruct s, *p = &s; 22 | p->src = getenv("CMD"); 23 | p->size = atoi(getenv("SIZE")); 24 | memcpy(dest, p->src, p->size); 25 | return 0; 26 | } -------------------------------------------------------------------------------- /tests/data/src/sscanf-01.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - sscanf 8 | */ 9 | 10 | int main(int argc, char *argv[]) { 11 | int result, integer; 12 | 13 | char *env_input = getenv("SSCANF_STR"); 14 | if(env_input == NULL) { 15 | fprintf(stderr, "SSCANF_STR environment variable not set.\n"); 16 | return EXIT_FAILURE; 17 | } 18 | 19 | result = sscanf(env_input, "%d", &integer); 20 | fprintf(stdout, "result: '%d', integer: '%d'\n", result, integer); 21 | 22 | return EXIT_SUCCESS; 23 | } -------------------------------------------------------------------------------- /tests/data/src/function_out_params-06.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | /* 5 | Testcase Description: 6 | - Output parameter 1 (char**): not written and influence on the sink 7 | */ 8 | 9 | __attribute__ ((noinline, optimize("O0"))) 10 | int check_cmd(char** cmd){ 11 | if(*cmd != NULL){ 12 | return 0; 13 | } 14 | return -1; 15 | } 16 | 17 | int main() { 18 | char *cmd = getenv("CMD"); 19 | if (check_cmd(&cmd) == 0) { 20 | system(cmd); 21 | } else { 22 | fprintf(stderr, "CMD environment variable not set.\n"); 23 | } 24 | return 0; 25 | } -------------------------------------------------------------------------------- /tests/data/src/pointer_analysis-01.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define CMD_LEN 64 6 | 7 | /* 8 | Testcase Description: 9 | - pointer analysis 10 | - system with user-controllabel command 11 | */ 12 | 13 | int main(int argc, char *argv[]) { 14 | char cmd[CMD_LEN]; 15 | 16 | char *env_cmd = getenv("SYSTEM_COMMAND"); 17 | if(env_cmd == NULL) { 18 | fprintf(stderr, "SYSTEM_COMMAND environment variable not set.\n"); 19 | return EXIT_FAILURE; 20 | } 21 | snprintf(cmd, CMD_LEN, "%s", env_cmd); 22 | return system(cmd); 23 | } -------------------------------------------------------------------------------- /tests/data/src/pointer_analysis-07.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define BUF_LEN 64 6 | 7 | /* 8 | Testcase Description: 9 | - pointer analysis 10 | - memcpy with user-controllabel source 11 | */ 12 | 13 | char* src; 14 | 15 | __attribute__((noinline)) 16 | void my_getenv(char **env_src) { 17 | *env_src = getenv("MEMCPY_SRC"); 18 | } 19 | 20 | int main(int argc, char *argv[]) { 21 | char dest[BUF_LEN]; 22 | 23 | my_getenv(&src); 24 | memcpy(dest, src, BUF_LEN); 25 | fprintf(stdout, "dest: %s\n", dest); 26 | 27 | return EXIT_SUCCESS; 28 | } -------------------------------------------------------------------------------- /tests/data/src/memcpy-02.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - controllable source and size 8 | */ 9 | 10 | int main(int argc, char *argv[]) { 11 | char dest[16]; 12 | 13 | char* env_src = getenv("MEMCPY_SRC"); 14 | if(env_src == NULL) { 15 | fprintf(stderr, "MEMCPY_SRC environment variable not set.\n"); 16 | return EXIT_FAILURE; 17 | } 18 | size_t n = strlen(env_src); 19 | 20 | memcpy(dest, env_src, n); 21 | dest[n] = '\0'; 22 | fprintf(stdout, "dest: '%s'\n", dest); 23 | 24 | return EXIT_SUCCESS; 25 | } -------------------------------------------------------------------------------- /tests/data/src/memcpy-03.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - controllable destination 8 | */ 9 | 10 | int main(int argc, char *argv[]) { 11 | char src[] = "03-memcpy"; 12 | 13 | char* env_dest = getenv("MEMCPY_DEST"); 14 | if(env_dest == NULL) { 15 | fprintf(stderr, "MEMCPY_DEST environment variable not set.\n"); 16 | return EXIT_FAILURE; 17 | } 18 | size_t n = strlen(src); 19 | 20 | memcpy(env_dest, src, n); 21 | env_dest[n] = '\0'; 22 | fprintf(stdout, "dest: '%s'\n", env_dest); 23 | 24 | return EXIT_SUCCESS; 25 | } -------------------------------------------------------------------------------- /tests/data/src/memcpy-01.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - controllable size 8 | */ 9 | 10 | int main(int argc, char *argv[]) { 11 | char dest[16]; 12 | char src[] = "memcpy-01"; 13 | 14 | char* env_size = getenv("MEMCPY_SIZE"); 15 | if(env_size == NULL) { 16 | fprintf(stderr, "MEMCPY_SIZE environment variable not set.\n"); 17 | return EXIT_FAILURE; 18 | } 19 | int n = atoi(env_size); 20 | 21 | memcpy(dest, src, n); 22 | dest[n] = '\0'; 23 | fprintf(stdout, "dest: '%s'\n", dest); 24 | 25 | return EXIT_SUCCESS; 26 | } -------------------------------------------------------------------------------- /tests/data/src/function_out_params-02.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | /* 5 | Testcase Description: 6 | - Output parameter 1 (char**): written and influence on the sink 7 | */ 8 | 9 | __attribute__ ((noinline)) 10 | int get_cmd(char **out_cmd){ 11 | char *env_cmd = getenv("CMD"); 12 | if (env_cmd != NULL) { 13 | *out_cmd = env_cmd; 14 | return 0; 15 | } 16 | return -1; 17 | } 18 | 19 | int main() { 20 | char *cmd = NULL; 21 | if (get_cmd(&cmd) == 0) { 22 | system(cmd); 23 | } else { 24 | fprintf(stderr, "CMD environment variable not set.\n"); 25 | } 26 | return 0; 27 | } -------------------------------------------------------------------------------- /tests/data/src/function_out_params-04.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | /* 5 | Testcase Description: 6 | - Function with output parameter (char**) 7 | */ 8 | 9 | typedef struct { 10 | char* cmd; 11 | } MyStruct; 12 | 13 | __attribute__ ((noinline)) 14 | int get_cmd(MyStruct *s){ 15 | char *env_cmd = getenv("CMD"); 16 | if (env_cmd != NULL) { 17 | s->cmd = env_cmd; 18 | return 0; 19 | } 20 | return -1; 21 | } 22 | 23 | int main() { 24 | MyStruct s; 25 | if (get_cmd(&s) == 0) { 26 | system(s.cmd); 27 | } else { 28 | fprintf(stderr, "CMD environment variable not set.\n"); 29 | } 30 | return 0; 31 | } -------------------------------------------------------------------------------- /tests/data/src/memcpy-06.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - uncontrollable source 8 | */ 9 | 10 | int main(int argc, char *argv[]) { 11 | char dest[16]; 12 | char src[] = "01-memcpy"; 13 | size_t n = strlen(src); 14 | 15 | char *env_select = getenv("MEMCPY_SELECT"); 16 | if(env_select == NULL) { 17 | fprintf(stderr, "MEMCPY_SELECT environment variable not set.\n"); 18 | return EXIT_FAILURE; 19 | } 20 | env_select = src; 21 | 22 | memcpy(dest, env_select, n); 23 | dest[n] = '\0'; 24 | fprintf(stdout, "dest: '%s'\n", dest); 25 | 26 | return EXIT_SUCCESS; 27 | } -------------------------------------------------------------------------------- /tests/data/src/pointer_analysis-02.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define CMD_LEN 64 6 | 7 | /* 8 | Testcase Description: 9 | - pointer analysis 10 | - system with user-controllabel command 11 | */ 12 | 13 | int main(int argc, char *argv[]) { 14 | char cmd[CMD_LEN]; 15 | 16 | char *env_cmd = getenv("SYSTEM_COMMAND"); 17 | if(env_cmd == NULL) { 18 | fprintf(stderr, "SYSTEM_COMMAND environment variable not set.\n"); 19 | return EXIT_FAILURE; 20 | } 21 | snprintf(cmd, CMD_LEN, "%s", env_cmd); 22 | snprintf(cmd, CMD_LEN, "%d", 1337); 23 | snprintf(cmd, CMD_LEN, "%d", 31337); 24 | return system(cmd); 25 | } -------------------------------------------------------------------------------- /mole/common/helper/variable.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | import binaryninja as bn 3 | 4 | 5 | class VariableHelper: 6 | """ 7 | This class provides helper functions with respect to variables. 8 | """ 9 | 10 | @staticmethod 11 | def get_var_info(var: bn.Variable) -> str: 12 | """ 13 | This method returns a string with information about the variable `var`. 14 | """ 15 | return f"{var.name}" 16 | 17 | @staticmethod 18 | def get_ssavar_info(var: bn.SSAVariable) -> str: 19 | """ 20 | This method returns a string with information about the SSA variable `var`. 21 | """ 22 | return f"{var.name}#{var.version}" 23 | -------------------------------------------------------------------------------- /tests/data/src/memcpy-09.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - uncontrollable 8 | */ 9 | 10 | int main(int argc, char *argv[]) { 11 | char dest[16]; 12 | char src[] = "01-memcpy"; 13 | 14 | char* env_unused = getenv("MEMCPY_UNUSED"); 15 | if(env_unused == NULL) { 16 | fprintf(stderr, "MEMCPY_UNUSED environment variable not set.\n"); 17 | return EXIT_FAILURE; 18 | } 19 | fprintf(stdout, "unused: '%s'\n", env_unused); 20 | 21 | size_t n = strlen(src); 22 | memcpy(dest, src, n); 23 | dest[n] = '\0'; 24 | fprintf(stdout, "dest: '%s'\n", dest); 25 | 26 | return EXIT_SUCCESS; 27 | } -------------------------------------------------------------------------------- /tests/data/src/function_calling-07.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - allow function inlining 8 | - do not follow all function parameters blindly (negative) 9 | */ 10 | 11 | char* func(char* env) { 12 | char *cmd = (char *) malloc(4); 13 | cmd[0] = 'l'; 14 | cmd[1] = 's'; 15 | cmd[2] = '\0'; 16 | cmd[3] = '\0'; 17 | fprintf(stdout, "--- FUN ---\n"); 18 | fprintf(stdout, "env: '%s'\ncmd: '%s'\n", env, cmd); 19 | return cmd; 20 | } 21 | 22 | int main(int argc, char *argv[]) { 23 | char *env, *cmd; 24 | env = getenv("CMD"); 25 | cmd = func(env); 26 | system(cmd); 27 | return EXIT_SUCCESS; 28 | } -------------------------------------------------------------------------------- /tests/data/src/pointer_analysis-17.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define BUF_SIZE 1024 6 | 7 | /* 8 | Testcase Description: 9 | - pointer analysis 10 | */ 11 | 12 | struct MyStruct { 13 | int sock_fd; 14 | char buf[BUF_SIZE]; 15 | }; 16 | 17 | __attribute__ ((noinline, optimize("O0"))) 18 | int main(int argc, char * argv[]) { 19 | int n; 20 | char buf[BUF_SIZE]; 21 | struct MyStruct* s; 22 | 23 | s = malloc(sizeof(struct MyStruct)); 24 | s->sock_fd = socket(AF_INET, SOCK_STREAM, 0); 25 | 26 | n = recv(s->sock_fd, &buf, BUF_SIZE, 0); 27 | memcpy(&s->buf, &buf, n); 28 | system((const char*) &s->buf); 29 | 30 | return 0; 31 | } -------------------------------------------------------------------------------- /tests/data/src/function_calling-05.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - allow function inlining 8 | - do not follow all function parameters blindly (positive) 9 | */ 10 | 11 | char* func(char* env) { 12 | size_t len = strlen(env); 13 | char *cmd = (char *) malloc(len+1); 14 | for(int i = 0; i 2 | #include 3 | 4 | /* 5 | Testcase Description: 6 | - Output parameter 1 (char*) : written but no influence on the sink 7 | - Output parameter 2 (char**): not written and influence on the sink 8 | */ 9 | 10 | __attribute__ ((noinline, optimize("O0"))) 11 | int check_cmd(char* msg, char** cmd){ 12 | if(*cmd != NULL){ 13 | return 0; 14 | } 15 | msg = getenv("MSG"); 16 | if(msg != NULL){ 17 | printf("%s!\n", msg); 18 | } 19 | return -1; 20 | } 21 | 22 | int main() { 23 | char *msg = NULL; 24 | char *cmd = getenv("CMD"); 25 | if (check_cmd(msg, &cmd) == 0) { 26 | system(cmd); 27 | } 28 | return 0; 29 | } -------------------------------------------------------------------------------- /tests/data/src/pointer_analysis-06.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define BUF_LEN 64 6 | 7 | /* 8 | Testcase Description: 9 | - pointer analysis 10 | - memcpy size is user-controllable 11 | */ 12 | 13 | __attribute__((noinline)) 14 | void modify_n(int *n) { 15 | char* env_n = getenv("MEMCPY_SIZE"); 16 | if(env_n != NULL) { 17 | *n = atoi(env_n); 18 | } 19 | } 20 | 21 | int main(int argc, char *argv[]) { 22 | char dest[BUF_LEN], src[] = "pointer_analysis"; 23 | int n, *n_ptr; 24 | 25 | n = BUF_LEN; 26 | n_ptr = &n; 27 | 28 | modify_n(n_ptr); 29 | memcpy(dest, src, n); 30 | fprintf(stdout, "n: '%d'\n", n); 31 | 32 | return EXIT_SUCCESS; 33 | } -------------------------------------------------------------------------------- /tests/data/src/name_mangling-02.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* 4 | Testcase Description: 5 | - Member function of a class (C++ name mangling) 6 | */ 7 | 8 | struct MyStruct { 9 | __attribute__ ((noinline, optimize("O0"))) 10 | int my_func(char *cmd) { 11 | return system(cmd); 12 | } 13 | }; 14 | 15 | class MyClass { 16 | public: 17 | __attribute__ ((noinline, optimize("O0"))) 18 | int my_func(char *cmd) { 19 | return system(cmd); 20 | } 21 | }; 22 | 23 | int main(int argc, char *argv[]) { 24 | char *cmd = getenv("CMD"); 25 | if(cmd != NULL) { 26 | MyStruct s; 27 | s.my_func(cmd); 28 | MyClass c; 29 | c.my_func(cmd); 30 | } 31 | return EXIT_SUCCESS; 32 | } -------------------------------------------------------------------------------- /tests/data/src/function_calling-08.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - disallow function inlining 8 | - do not follow all function parameters blindly (negative) 9 | */ 10 | 11 | __attribute__ ((noinline)) 12 | char* func(char* env) { 13 | char *cmd = (char *) malloc(4); 14 | cmd[0] = 'l'; 15 | cmd[1] = 's'; 16 | cmd[2] = '\0'; 17 | cmd[3] = '\0'; 18 | fprintf(stdout, "--- FUN ---\n"); 19 | fprintf(stdout, "env: '%s'\ncmd: '%s'\n", env, cmd); 20 | return cmd; 21 | } 22 | 23 | int main(int argc, char *argv[]) { 24 | char *env, *cmd; 25 | env = getenv("CMD"); 26 | cmd = func(env); 27 | system(cmd); 28 | return EXIT_SUCCESS; 29 | } -------------------------------------------------------------------------------- /tests/data/src/function_calling-06.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - disallow function inlining 8 | - do not follow all function parameters blindly (positive) 9 | */ 10 | 11 | __attribute__ ((noinline)) 12 | char* func(char* env) { 13 | size_t len = strlen(env); 14 | char *cmd = (char *) malloc(len+1); 15 | for(int i = 0; i 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - controllable size 8 | - non-reachable 9 | */ 10 | 11 | volatile int always_false = 0; 12 | 13 | int main(int argc, char *argv[]) { 14 | char dest[16]; 15 | char src[] = "01-memcpy"; 16 | 17 | char* env_size = getenv("MEMCPY_SIZE"); 18 | if(env_size == NULL) { 19 | fprintf(stderr, "MEMCPY_SIZE environment variable not set.\n"); 20 | return EXIT_FAILURE; 21 | } 22 | int n = atoi(env_size); 23 | 24 | if(always_false) { 25 | memcpy(dest, src, n); 26 | dest[n] = '\0'; 27 | fprintf(stdout, "dest: '%s'\n", dest); 28 | } 29 | 30 | return EXIT_SUCCESS; 31 | } -------------------------------------------------------------------------------- /tests/data/src/name_mangling-01.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | /* 5 | Testcase Description: 6 | - Function with overloading (C++ name mangling) 7 | */ 8 | 9 | __attribute__ ((noinline, optimize("O0"))) 10 | int overloaded_func(char *cmd) { 11 | return system(cmd); 12 | } 13 | 14 | __attribute__ ((noinline, optimize("O0"))) 15 | int overloaded_func(char *cmd, int debug) { 16 | if(debug) { 17 | printf("[+] overloaded_func called with cmd='%s'\n", cmd); 18 | } 19 | return system(cmd); 20 | } 21 | 22 | 23 | int main(int argc, char *argv[]) { 24 | char *cmd = getenv("CMD"); 25 | if(cmd != NULL) { 26 | overloaded_func(cmd); 27 | overloaded_func(cmd, 1); 28 | } 29 | return EXIT_SUCCESS; 30 | } -------------------------------------------------------------------------------- /tests/data/src/function_calling-13.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - disallow function inlining 8 | - with tail calls 9 | - direct recursion 10 | */ 11 | 12 | __attribute__ ((noinline, optimize("O0"))) 13 | int system_1(char *cmd, int* cnt) { 14 | if(*cnt <= 0) { 15 | return system(cmd); 16 | } else { 17 | (*cnt)--; 18 | return system_1(cmd, cnt); 19 | } 20 | } 21 | 22 | int main(int argc, char *argv[]) { 23 | int cnt = argc; 24 | char *env_cmd = getenv("CMD"); 25 | if(env_cmd == NULL) { 26 | fprintf(stderr, "CMD environment variable not set.\n"); 27 | return EXIT_FAILURE; 28 | } 29 | return system_1(env_cmd, &cnt); 30 | } -------------------------------------------------------------------------------- /tests/data/src/pointer_analysis-08.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define BUF_LEN 64 6 | 7 | /* 8 | Testcase Description: 9 | - pointer analysis 10 | - memcpy with user-controllabel source 11 | */ 12 | 13 | char* src; 14 | 15 | __attribute__((noinline)) 16 | void dummy(int *value) { 17 | *value = 0; 18 | } 19 | 20 | __attribute__((noinline)) 21 | void my_getenv(char **env_src) { 22 | *env_src = getenv("MEMCPY_SRC"); 23 | } 24 | 25 | int main(int argc, char *argv[]) { 26 | int value = 1; 27 | char dest[BUF_LEN]; 28 | 29 | my_getenv(&src); 30 | dummy(&value); 31 | memcpy(dest, src, BUF_LEN); 32 | fprintf(stdout, "dest: %s, value: %d\n", dest, value); 33 | 34 | return EXIT_SUCCESS; 35 | } -------------------------------------------------------------------------------- /tests/data/src/pointer_analysis-05.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define BUF_LEN 64 6 | 7 | /* 8 | Testcase Description: 9 | - pointer analysis 10 | - memcpy size is not user-controllable 11 | */ 12 | 13 | __attribute__((noinline)) 14 | void modify_n(int *n) { 15 | *n = BUF_LEN; 16 | } 17 | 18 | int main(int argc, char *argv[]) { 19 | char dest[BUF_LEN], src[] = "pointer_analysis"; 20 | int n, *n_ptr; 21 | 22 | n = BUF_LEN; 23 | n_ptr = &n; 24 | 25 | char* env_n = getenv("MEMCPY_SIZE"); 26 | if(env_n != NULL) { 27 | n = atoi(env_n); 28 | modify_n(n_ptr); 29 | memcpy(dest, src, n); 30 | fprintf(stdout, "n: '%d'\n", n); 31 | } 32 | 33 | return EXIT_SUCCESS; 34 | } -------------------------------------------------------------------------------- /tests/data/src/function_out_params-03.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | /* 5 | Testcase Description: 6 | - Output parameter 1 (char**): written and influence on the sink 7 | */ 8 | 9 | __attribute__ ((noinline, optimize("O0"))) 10 | int get_cmd(char **out_cmd){ 11 | char* env_cmd = getenv("CMD"); 12 | char** out_cmd_cpy = out_cmd; 13 | if (env_cmd != NULL) { 14 | *out_cmd_cpy = env_cmd; 15 | return 0; 16 | } 17 | *out_cmd_cpy = "Test"; 18 | printf("%s\n", *out_cmd_cpy); 19 | return -1; 20 | } 21 | 22 | int main() { 23 | char *cmd = NULL; 24 | if (get_cmd(&cmd) == 0) { 25 | system(cmd); 26 | } else { 27 | fprintf(stderr, "CMD environment variable not set.\n"); 28 | } 29 | return 0; 30 | } -------------------------------------------------------------------------------- /tests/data/src/function_out_params-01.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define BUF_SIZE 8 6 | 7 | /* 8 | Testcase Description: 9 | - Output parameter 1 (int*): written and influence on the sink 10 | */ 11 | 12 | char dest[BUF_SIZE]; 13 | char src[] = "Hello, World!"; 14 | 15 | __attribute__ ((noinline)) 16 | int get_size(int* size){ 17 | char* env_size = getenv("SIZE"); 18 | if(env_size != NULL) { 19 | *size = atoi(env_size); 20 | return 0; 21 | } 22 | return -1; 23 | } 24 | 25 | int main() { 26 | int size = 0; 27 | if (get_size(&size) == 0) { 28 | memcpy(dest, src, size); 29 | } else { 30 | fprintf(stderr, "SIZE environment variable not set.\n"); 31 | } 32 | return 0; 33 | } -------------------------------------------------------------------------------- /tests/data/src/pointer_analysis-03.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define CMD_LEN 64 6 | 7 | /* 8 | Testcase Description: 9 | - pointer analysis 10 | - system with user-controllabel command 11 | */ 12 | 13 | int main(int argc, char *argv[]) { 14 | char cmd[CMD_LEN]; 15 | 16 | char *env_cmd = getenv("SYSTEM_COMMAND"); 17 | if(env_cmd == NULL) { 18 | fprintf(stderr, "SYSTEM_COMMAND environment variable not set.\n"); 19 | return EXIT_FAILURE; 20 | } 21 | snprintf(cmd, CMD_LEN, "%s", env_cmd); 22 | snprintf(cmd, CMD_LEN, "%s %d", cmd, 31337); 23 | snprintf(cmd, CMD_LEN, "%s %s %d", cmd, cmd, 31337); 24 | snprintf(cmd, CMD_LEN, "%s %s %d", cmd, cmd, cmd, 31337); 25 | return system(cmd); 26 | } -------------------------------------------------------------------------------- /tests/data/src/memcpy-04.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - controllable size 8 | - source in function without inlining 9 | */ 10 | 11 | __attribute__ ((noinline)) char* my_getenv(const char* name) { 12 | return getenv(name); 13 | } 14 | 15 | int main(int argc, char *argv[]) { 16 | char dest[16]; 17 | char src[] = "01-memcpy"; 18 | 19 | char* env_size = my_getenv("MEMCPY_SIZE"); 20 | if(env_size == NULL) { 21 | fprintf(stderr, "MEMCPY_SIZE environment variable not set.\n"); 22 | return EXIT_FAILURE; 23 | } 24 | int n = atoi(env_size); 25 | 26 | memcpy(dest, src, n); 27 | dest[n] = '\0'; 28 | fprintf(stdout, "dest: '%s'\n", dest); 29 | 30 | return EXIT_SUCCESS; 31 | } -------------------------------------------------------------------------------- /tests/data/src/function_out_params-05.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | /* 5 | Testcase Description: 6 | - Output parameter 1 (char**): written but no influence on the sink 7 | - Output parameter 2 (char**): written and influence on the sink 8 | */ 9 | 10 | __attribute__ ((noinline)) 11 | int get_cmd(char **out_msg, char **out_cmd){ 12 | char *env_cmd = getenv("CMD"); 13 | if (env_cmd != NULL) { 14 | *out_cmd = env_cmd; 15 | return 0; 16 | } 17 | *out_msg = getenv("MSG"); 18 | return -1; 19 | } 20 | 21 | int main() { 22 | char *msg = NULL; 23 | char *cmd = NULL; 24 | if (get_cmd(&msg, &cmd) == 0) { 25 | system(cmd); 26 | } else { 27 | fprintf(stderr, "CMD environment variable not set.\n"); 28 | } 29 | return 0; 30 | } -------------------------------------------------------------------------------- /tests/data/src/function_out_params-08.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | /* 5 | Testcase Description: 6 | - Output parameter 1 (char**): written in a callee 7 | */ 8 | 9 | __attribute__ ((noinline, optimize("O0"))) 10 | void get_cmd(int debug, char** cmd){ 11 | *cmd = getenv("CMD"); 12 | if(debug) { 13 | printf("[DEBUG] cmd='%s'\n", *cmd); 14 | } 15 | return; 16 | } 17 | 18 | __attribute__ ((noinline, optimize("O0"))) 19 | int check_cmd(char** cmd){ 20 | get_cmd(1, cmd); 21 | if(*cmd != NULL){ 22 | return 0; 23 | } 24 | return -1; 25 | } 26 | 27 | int main() { 28 | char *cmd = NULL; 29 | if (check_cmd(&cmd) == 0) { 30 | system(cmd); 31 | } else { 32 | fprintf(stderr, "CMD environment variable not set.\n"); 33 | } 34 | return 0; 35 | } -------------------------------------------------------------------------------- /tests/data/src/pointer_analysis-09.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define CMD_LEN 64 6 | 7 | /* 8 | Testcase Description: 9 | - pointer analysis 10 | - system with user-controllabel command 11 | */ 12 | 13 | __attribute__((noinline)) 14 | void dummy(int *value) { 15 | *value = 0; 16 | } 17 | 18 | int main(int argc, char *argv[]) { 19 | int value = 1; 20 | char cmd[CMD_LEN]; 21 | 22 | char *env_cmd = getenv("SYSTEM_COMMAND"); 23 | if(env_cmd == NULL) { 24 | fprintf(stderr, "SYSTEM_COMMAND environment variable not set.\n"); 25 | return EXIT_FAILURE; 26 | } 27 | snprintf(cmd, CMD_LEN, "%s", env_cmd); 28 | dummy(&value); 29 | system(cmd); 30 | fprintf(stdout, "cmd: %s, value: %d\n", cmd, value); 31 | 32 | return EXIT_SUCCESS; 33 | } -------------------------------------------------------------------------------- /tests/data/src/memcpy-10.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - controllable source and size 8 | - unexploitable do to validation 9 | */ 10 | 11 | int main(int argc, char *argv[]) { 12 | size_t dest_size = 16; 13 | char dest[dest_size]; 14 | char* env_src = getenv("MEMCPY_SRC"); 15 | if(env_src == NULL) { 16 | fprintf(stderr, "MEMCPY_SRC environment variable not set.\n"); 17 | return EXIT_FAILURE; 18 | } 19 | size_t src_size = strlen(env_src); 20 | if(src_size >= dest_size) { 21 | fprintf(stderr, "MEMCPY_SRC size >= %zu\n.", dest_size); 22 | return EXIT_FAILURE; 23 | } 24 | memcpy(dest, env_src, src_size); 25 | dest[src_size] = '\0'; 26 | fprintf(stdout, "dest: '%s'\n", dest); 27 | return EXIT_SUCCESS; 28 | } -------------------------------------------------------------------------------- /tests/slicing/test_serialization.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from mole.core.data import Path 3 | from tests.slicing.conftest import TestSlicing 4 | from typing import List 5 | import binaryninja as bn 6 | 7 | 8 | class TestSerialization(TestSlicing): 9 | def test_serialization_01( 10 | self, filenames: List[str] = ["function_calling-02"] 11 | ) -> None: 12 | for file in self.load_files(filenames): 13 | # Load and analyze test binary with Binary Ninja 14 | bv = bn.load(file) 15 | bv.update_analysis_and_wait() 16 | # Analyze test binary 17 | paths = self.get_paths(bv) 18 | # Assert results 19 | for path in paths: 20 | assert path == Path.from_dict(bv, path.to_dict()), "serialization" 21 | bv.file.close() 22 | return 23 | -------------------------------------------------------------------------------- /tests/data/src/name_mangling-05.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* 4 | Testcase Description: 5 | - Static data member of a class (C++ name mangling) 6 | */ 7 | 8 | struct MyStruct { 9 | static char* cmd; 10 | 11 | __attribute__ ((noinline, optimize("O0"))) 12 | static int my_func() { 13 | return system(cmd); 14 | } 15 | }; 16 | char* MyStruct::cmd = getenv("CMD"); 17 | 18 | class MyClass { 19 | public: 20 | static char* cmd; 21 | 22 | __attribute__ ((noinline, optimize("O0"))) 23 | static int my_func() { 24 | return system(cmd); 25 | } 26 | }; 27 | char* MyClass::cmd = getenv("CMD"); 28 | 29 | int main(int argc, char *argv[]) { 30 | if(MyStruct::cmd != NULL) { 31 | MyStruct::my_func(); 32 | } 33 | if(MyClass::cmd != NULL) { 34 | MyClass::my_func(); 35 | } 36 | return EXIT_SUCCESS; 37 | } -------------------------------------------------------------------------------- /tests/data/src/pointer_analysis-10.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define CMD_LEN 64 6 | 7 | /* 8 | Testcase Description: 9 | - pointer analysis 10 | - system with user-controllabel command 11 | */ 12 | 13 | __attribute__((noinline)) 14 | void dummy(int *value) { 15 | *value = 0; 16 | } 17 | 18 | int main(int argc, char *argv[]) { 19 | int value = 1; 20 | char cmd[CMD_LEN]; 21 | 22 | char *env_cmd = getenv("SYSTEM_COMMAND"); 23 | if(env_cmd == NULL) { 24 | fprintf(stderr, "SYSTEM_COMMAND environment variable not set.\n"); 25 | return EXIT_FAILURE; 26 | } 27 | for(int i=0; i<10; i++) { 28 | snprintf(cmd, CMD_LEN, "%s %i", env_cmd, i); 29 | dummy(&value); 30 | } 31 | system(cmd); 32 | fprintf(stdout, "cmd: %s, value: %d\n", cmd, value); 33 | 34 | return EXIT_SUCCESS; 35 | } -------------------------------------------------------------------------------- /tests/data/src/function_calling-11.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - disallow function inlining 8 | - with tail calls 9 | - direct recursion 10 | */ 11 | 12 | __attribute__ ((noinline, optimize("O0"))) 13 | char* getenv_2() { 14 | return getenv("CMD"); 15 | } 16 | 17 | __attribute__ ((noinline, optimize("O0"))) 18 | char* getenv_1(int* cnt) { 19 | char* cmd = NULL; 20 | if(*cnt > 0) { 21 | (*cnt)--; 22 | cmd = getenv_1(cnt); 23 | } else{ 24 | cmd = getenv_2(); 25 | } 26 | return cmd; 27 | } 28 | 29 | int main(int argc, char *argv[]) { 30 | int cnt = argc; 31 | char *env_cmd = getenv_1(&cnt); 32 | if(env_cmd == NULL) { 33 | fprintf(stderr, "CMD environment variable not set.\n"); 34 | return EXIT_FAILURE; 35 | } 36 | return system(env_cmd); 37 | } -------------------------------------------------------------------------------- /tests/data/src/memcpy-07.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - controllable source and size 8 | - copying source 9 | */ 10 | 11 | int main() { 12 | char dest[16]; 13 | 14 | char* env_src = getenv("MEMCPY_SRC"); 15 | if(env_src == NULL) { 16 | fprintf(stderr, "MEMCPY_SRC environment variable not set.\n"); 17 | return EXIT_FAILURE; 18 | } 19 | 20 | size_t size = strlen(env_src); 21 | char* buf = (char*) malloc(size + 1); 22 | if(buf == NULL) { 23 | fprintf(stderr, "Failed to allocate memory.\n"); 24 | return EXIT_FAILURE; 25 | } 26 | 27 | for(size_t i=0; i 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - controllable source and size 8 | - source in function without inlining 9 | */ 10 | 11 | __attribute__ ((noinline, optimize("O0"))) 12 | char* my_getenv(const char* name) { 13 | char *env = getenv(name); 14 | if(env == NULL) { 15 | return NULL; 16 | } 17 | for(size_t i=0; i 2 | 3 | /* 4 | Testcase Description: 5 | - Operator overloading (C++ name mangling) 6 | */ 7 | 8 | struct MyStruct { 9 | std::string cmd; 10 | 11 | MyStruct(const std::string& cmd) : cmd(cmd) {} 12 | 13 | MyStruct operator+(const MyStruct &other) { 14 | char *env_cmd = getenv("CMD"); 15 | if(env_cmd != NULL) { 16 | this->cmd = std::string(this->cmd + ";" + std::string(env_cmd) + ";" + other.cmd); 17 | } else { 18 | this->cmd = std::string(this->cmd + ";" + other.cmd); 19 | } 20 | return *this; 21 | } 22 | 23 | __attribute__ ((noinline, optimize("O0"))) 24 | int my_func() { 25 | return system(this->cmd.c_str()); 26 | } 27 | }; 28 | 29 | int main(int argc, char *argv[]) { 30 | MyStruct s1("echo '>>'"); 31 | MyStruct s2("echo '<<'"); 32 | MyStruct s3 = s1 + s2; 33 | s3.my_func(); 34 | return EXIT_SUCCESS; 35 | } -------------------------------------------------------------------------------- /tests/data/src/pointer_analysis-04.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define CMD_LEN 64 6 | 7 | /* 8 | Testcase Description: 9 | - pointer analysis 10 | - system with user-controllabel command 11 | */ 12 | 13 | int main(int argc, char *argv[]) { 14 | char cmd[CMD_LEN]; 15 | 16 | char *env_cmd_str = getenv("SYSTEM_COMMAND_STR"); 17 | if(env_cmd_str == NULL) { 18 | fprintf(stderr, "SYSTEM_COMMAND_STR environment variable not set.\n"); 19 | return EXIT_FAILURE; 20 | } 21 | char *env_cmd_int = getenv("SYSTEM_COMMAND_INT"); 22 | if(env_cmd_int == NULL) { 23 | fprintf(stderr, "SYSTEM_COMMAND_INT environment variable not set.\n"); 24 | return EXIT_FAILURE; 25 | } 26 | size_t value = atoi(env_cmd_int); 27 | 28 | snprintf(cmd, CMD_LEN, "%s", env_cmd_str); 29 | size_t *ptr_value = &value; 30 | snprintf(cmd, CMD_LEN, "%s %d", cmd, *ptr_value); 31 | return system(cmd); 32 | } -------------------------------------------------------------------------------- /tests/data/src/function_calling-10.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - disallow function inlining 8 | - with tail calls 9 | */ 10 | 11 | __attribute__ ((noinline)) 12 | char* getenv_2(char *cmd) { 13 | return getenv(cmd); 14 | } 15 | 16 | __attribute__ ((noinline)) 17 | char* getenv_1() { 18 | return getenv_2("CMD"); 19 | } 20 | 21 | __attribute__ ((noinline)) 22 | char* system_3(char *cmd) { 23 | return cmd; 24 | } 25 | 26 | __attribute__ ((noinline)) 27 | char* system_2(char *cmd) { 28 | return system_3(cmd); 29 | } 30 | 31 | __attribute__ ((noinline)) 32 | int system_1(char *cmd) { 33 | cmd = system_2(cmd); 34 | return system(cmd); 35 | } 36 | 37 | int main(int argc, char *argv[]) { 38 | char *env_cmd = getenv_1(); 39 | if(env_cmd == NULL) { 40 | fprintf(stderr, "CMD environment variable not set.\n"); 41 | return EXIT_FAILURE; 42 | } 43 | system_1(env_cmd); 44 | return EXIT_SUCCESS; 45 | } -------------------------------------------------------------------------------- /docs/03-Customization.md: -------------------------------------------------------------------------------- 1 | # Customization 2 | This section documents extension points within *Mole* that allow users to customize and extend its analysis and UI behavior. 3 | ## Path Grouping Strategy 4 | To implement a custom path grouping strategy, follow these steps: 5 | 1. Create a new subclass of [`PathGrouper`](../mole/grouping/__init__.py#L17) within the [grouping](../mole/grouping/) package. 6 | 2. There is no need to register the strategy manually - its name will be detected dynamically. 7 | 3. Define a key tuple with the following fields: 8 | - `display_name`: A string shown to users in the tree view. 9 | - `internal_id`: A unique identifier for the group. 10 | - `level`: Specifies the group's depth in the tree view hierarchy. 11 | 12 | **Note**: You can also inherit from existing strategies. For an example, see [`CallgraphPathGrouper`](../mole/grouping/call_graph.py#L10). 13 | 14 | ---------------------------------------------------------------------------------------------------- 15 | [Back-To-README](../README.md#documentation) -------------------------------------------------------------------------------- /mole/grouping/source_sink.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module implements a source / sink grouping strategy. 3 | """ 4 | 5 | from __future__ import annotations 6 | from mole.core.data import Path 7 | from mole.grouping import PathGrouper 8 | from typing import List, Tuple 9 | 10 | 11 | class SourceSinkPathGrouper(PathGrouper): 12 | """ 13 | This class implements a strategy that groups by source and sink symbols. 14 | """ 15 | 16 | def get_group_keys(self, path: Path, *args, **kwargs) -> List[Tuple[str, str, int]]: 17 | """ 18 | This method groups paths by source and sink symbols. 19 | """ 20 | return [ 21 | (f"Source: {path.src_sym_name:s}", path.src_sym_name, 0), 22 | ( 23 | f"Sink: {path.snk_sym_name:s}", 24 | f"{path.src_sym_name:s}:{path.snk_sym_name:s}", 25 | 1, 26 | ), 27 | ] 28 | 29 | def get_strategy_name(self) -> str: 30 | """ 31 | This method returns the name of this grouping strategy. 32 | """ 33 | return "Source / Sink" 34 | -------------------------------------------------------------------------------- /tests/data/src/function_calling-14.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - disallow function inlining 8 | - with tail calls 9 | - indirect recursion 10 | */ 11 | 12 | int system_3(char*, int*); 13 | int system_2(char*, int*); 14 | int system_1(char*, int*); 15 | 16 | __attribute__ ((noinline, optimize("O0"))) 17 | int system_3(char* cmd, int* cnt) { 18 | if(*cnt <= 0) { 19 | return system(cmd); 20 | } else { 21 | (*cnt)--; 22 | return system_1(cmd, cnt); 23 | } 24 | } 25 | 26 | __attribute__ ((noinline, optimize("O0"))) 27 | int system_2(char* cmd, int* cnt) { 28 | return system_3(cmd, cnt); 29 | } 30 | 31 | __attribute__ ((noinline, optimize("O0"))) 32 | int system_1(char* cmd, int* cnt) { 33 | return system_2(cmd, cnt); 34 | } 35 | 36 | int main(int argc, char *argv[]) { 37 | int cnt = argc; 38 | char *env_cmd = getenv("CMD"); 39 | if(env_cmd == NULL) { 40 | fprintf(stderr, "CMD environment variable not set.\n"); 41 | return EXIT_FAILURE; 42 | } 43 | return system_1(env_cmd, &cnt); 44 | } -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Publish Release 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v*.*.*' 7 | 8 | jobs: 9 | release: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout Repository 13 | uses: actions/checkout@v4 14 | 15 | - name: Set up Python 16 | uses: actions/setup-python@v5 17 | with: 18 | python-version: '3.x' 19 | 20 | - name: Install Dependencies 21 | run: | 22 | python -m pip install --upgrade pip 23 | # python -m pip install setuptools wheel 24 | pip install . 25 | 26 | - name: Build Package 27 | run: | 28 | python -m pip install --upgrade build 29 | python -m build 30 | 31 | - name: Create GitHub Release 32 | uses: ncipollo/release-action@v1 33 | with: 34 | tag: ${{ github.ref }} 35 | name: Release ${{ github.ref }} 36 | body: | 37 | New release for tag ${{ github.ref }} 38 | generateReleaseNotes: true 39 | draft: false 40 | prerelease: false 41 | artifactErrorsFailBuild: true 42 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | import sys 3 | 4 | 5 | # Load components only when not being run by `pytest` 6 | if "pytest" not in sys.modules: 7 | from mole.controllers.ai import AiController 8 | from mole.controllers.config import ConfigController 9 | from mole.controllers.path import PathController 10 | from mole.models.config import ConfigModel 11 | from mole.services.config import ConfigService 12 | from mole.views.ai import AiView 13 | from mole.views.config import ConfigView 14 | from mole.views.path import PathView 15 | from mole.views.sidebar import MoleSidebar 16 | 17 | # Services 18 | config_service = ConfigService() 19 | 20 | # Models 21 | config_model = ConfigModel(config_service.load_config()) 22 | 23 | # Views 24 | config_view = ConfigView() 25 | ai_view = AiView() 26 | path_view = PathView() 27 | 28 | # Controllers 29 | config_ctr = ConfigController(config_service, config_model, config_view) 30 | ai_ctr = AiController(ai_view, config_ctr) 31 | path_ctr = PathController(path_view, config_ctr, ai_ctr) 32 | 33 | # Initialize sidebar in Binary Ninja 34 | sidebar = MoleSidebar(path_view) 35 | sidebar.init() 36 | -------------------------------------------------------------------------------- /tests/data/src/function_calling-01.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - allow function inlining 8 | - with tail calls 9 | */ 10 | 11 | char* getenv_2(char *cmd) { 12 | return getenv(cmd); 13 | } 14 | 15 | char* getenv_1a() { 16 | return getenv_2("SYSTEM_COMMAND_1a"); 17 | } 18 | 19 | char* getenv_1b() { 20 | return getenv_2("SYSTEM_COMMAND_1b"); 21 | } 22 | 23 | char* getenv_1c() { 24 | return getenv("NOT_DANGEROUS"); 25 | } 26 | 27 | int system_2(char *cmd) { 28 | return system(cmd); 29 | } 30 | 31 | int system_1a(char *cmd) { 32 | return system_2(cmd); 33 | } 34 | 35 | int system_1b(char *cmd) { 36 | return system_2(cmd); 37 | } 38 | 39 | int main(int argc, char *argv[]) { 40 | char *env_cmd; 41 | 42 | if(argc <= 1) { 43 | env_cmd = getenv_1a(); 44 | } else { 45 | env_cmd = getenv_1b(); 46 | } 47 | fprintf(stdout, "main: '%s'\n", env_cmd); 48 | if(env_cmd == NULL) { 49 | fprintf(stderr, "SYSTEM_COMMAND environment variable not set.\n"); 50 | return EXIT_FAILURE; 51 | } 52 | system_1a(env_cmd); 53 | system_1b("whoami"); 54 | getenv_1c(); 55 | return EXIT_SUCCESS; 56 | } -------------------------------------------------------------------------------- /tests/data/src/function_calling-12.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - disallow function inlining 8 | - with tail calls 9 | - indirect recursion 10 | */ 11 | 12 | char* getenv_4(); 13 | char* getenv_3(int* cnt); 14 | char* getenv_2(int* cnt); 15 | char* getenv_1(int* cnt); 16 | 17 | __attribute__ ((noinline, optimize("O0"))) 18 | char* getenv_4() { 19 | return getenv("CMD"); 20 | } 21 | 22 | __attribute__ ((noinline, optimize("O0"))) 23 | char* getenv_3(int* cnt) { 24 | char* cmd = NULL; 25 | if(*cnt > 0) { 26 | (*cnt)--; 27 | return getenv_1(cnt); 28 | } else { 29 | cmd = getenv_4(); 30 | } 31 | return cmd; 32 | } 33 | 34 | __attribute__ ((noinline, optimize("O0"))) 35 | char* getenv_2(int* cnt) { 36 | return getenv_3(cnt); 37 | } 38 | 39 | __attribute__ ((noinline, optimize("O0"))) 40 | char* getenv_1(int* cnt) { 41 | return getenv_2(cnt); 42 | } 43 | 44 | int main(int argc, char *argv[]) { 45 | int cnt = argc; 46 | char *env_cmd = getenv_1(&cnt); 47 | if(env_cmd == NULL) { 48 | fprintf(stderr, "CMD environment variable not set.\n"); 49 | return EXIT_FAILURE; 50 | } 51 | return system(env_cmd); 52 | } -------------------------------------------------------------------------------- /tests/data/Makefile: -------------------------------------------------------------------------------- 1 | # Compilation settings (cross-compile with `CC=arm-linux-gcc CXX=arm-linux-g++ EXT=.linux-armv7 make`) 2 | CC ?= gcc 3 | CXX ?= g++ 4 | CFLAGS ?= -Wall -O3 -g -fno-builtin -U_FORTIFY_SOURCE 5 | CXXFLAGS ?= -Wall -O3 -g -fno-builtin -U_FORTIFY_SOURCE 6 | EXT ?= 7 | 8 | # Directories 9 | SRC_DIR := src 10 | BIN_DIR := bin 11 | 12 | # Files 13 | C_SRC_FILES := $(wildcard $(SRC_DIR)/*.c) 14 | C_BIN_FILES := $(patsubst $(SRC_DIR)/%.c, $(BIN_DIR)/%$(EXT), $(C_SRC_FILES)) 15 | CPP_SRC_FILES := $(wildcard $(SRC_DIR)/*.cpp) 16 | CPP_BIN_FILES := $(patsubst $(SRC_DIR)/%.cpp, $(BIN_DIR)/%$(EXT), $(CPP_SRC_FILES)) 17 | 18 | # Ensure directory bin/ exists 19 | $(shell mkdir -p bin) 20 | 21 | # Default target 22 | all: $(C_BIN_FILES) $(CPP_BIN_FILES) 23 | 24 | # Compile .c files 25 | $(BIN_DIR)/%$(EXT): $(SRC_DIR)/%.c 26 | $(CC) $(CFLAGS) -o $@ $< 27 | 28 | # Compile .cpp files 29 | $(BIN_DIR)/%$(EXT): $(SRC_DIR)/%.cpp 30 | $(CXX) $(CXXFLAGS) -o $@ $< 31 | 32 | # Remove binaries 33 | clean: 34 | rm -rf $(BIN_DIR) 35 | 36 | # Compile a single program 37 | $(notdir $(basename $(C_SRC_FILES) $(CPP_SRC_FILES))): %: $(BIN_DIR)/%$(EXT) 38 | 39 | # Prevent make from confusing targets with a file names 40 | .PHONY: all $(notdir $(basename $(C_SRC_FILES) $(CPP_SRC_FILES))) clean -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=42", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [tool.setuptools.packages.find] 6 | where = ["."] 7 | include = ["mole*"] 8 | 9 | [tool.setuptools.package-data] 10 | "mole" = ["conf/*", "resources/*"] 11 | 12 | [project] 13 | name = "mole" 14 | version = "0.5.1" 15 | description = "A Binary Ninja plugin to identify interesting paths using static backward slicing" 16 | authors = [ 17 | {name = "Damian Pfammatter"}, 18 | {name = "Sergio Paganoni"} 19 | ] 20 | requires-python = ">=3.10" 21 | dependencies = [ 22 | "ijson==3.3.0", 23 | "lark==1.2.2", 24 | "networkx[default]==3.4.2", 25 | "PyYAML==6.0.2", 26 | "termcolor==2.4.0", 27 | "pydantic>=2.0.0", 28 | "openai >= 1.78.1 , <2.0.0" 29 | ] 30 | 31 | [project.optional-dependencies] 32 | develop = [ 33 | "debugpy==1.8.1", 34 | "pre_commit==4.2.0", 35 | "pytest==8.3.4", 36 | "pytest-cov==7.0.0", 37 | "ruff==0.9.9", 38 | "tomli==2.2.1" 39 | ] 40 | 41 | [project.scripts] 42 | mole = "mole.cli.main:main" 43 | 44 | [tool.ruff.lint] 45 | extend-select = ["TID"] 46 | 47 | [tool.pytest.ini_options] 48 | testpaths = ["tests"] 49 | python_files = ["test_*.py"] 50 | python_classes = ["Test*"] 51 | python_functions = ["test_*"] 52 | pythonpath = ["."] -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | # Tests 2 | This directory contains the test suite for the *Mole* project. 3 | ## Build Test Binaries 4 | Before running the tests, you must first built the test binaries. Navigate to the tests data directory: 5 | ```bash 6 | cd tests/data/ 7 | ``` 8 | Then, compile all test binaries: 9 | ```bash 10 | make all 11 | ``` 12 | To cross-compile the test binaries for a specific target architecture, specify the appropriate compilers. You may also define a custom file extension to distinguish the resulting binaries. For example: 13 | ```bash 14 | CC=arm-linux-gcc CXX=arm-linux-g++ EXT=.linux-armv7 make all 15 | ``` 16 | ## Run Tests 17 | In the following commands, we assume you are in the *Mole* project’s root directory: 18 | #### Run All Tests 19 | ```bash 20 | pytest 21 | ``` 22 | #### Run Specific Test File 23 | ```bash 24 | pytest tests/test_data.py 25 | pytest tests/slicing/test_pointer.py 26 | ``` 27 | #### Run Specific Test Class or Method 28 | ```bash 29 | pytest tests/slicing/test_pointer.py::TestPointerAnalysis 30 | pytest tests/slicing/test_pointer.py::TestPointerAnalysis::test_pointer_analysis_01 31 | ``` 32 | #### Run Tests for a Specific Architecture 33 | To run tests only on binaries built for a specific architecture, set the `EXT` parameter to match the extension used during compilation: 34 | ```bash 35 | EXT=".linux-armv7" pytest 36 | ``` -------------------------------------------------------------------------------- /tests/data/src/function_calling-15.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - marking function parameters 8 | */ 9 | 10 | __attribute__ ((noinline, optimize("O0"))) 11 | char* getenv_2(char *cmd, int debug) { 12 | if (debug) { 13 | printf("[DEBUG] getenv_2 called with cmd='%s'\n", cmd); 14 | } 15 | return getenv(cmd); 16 | } 17 | 18 | __attribute__ ((noinline, optimize("O0"))) 19 | char* getenv_1(char* cmd, int debug) { 20 | if (debug) { 21 | printf("[DEBUG] getenv_1"); 22 | } 23 | return getenv_2(cmd, debug); 24 | } 25 | 26 | __attribute__ ((noinline, optimize("O0"))) 27 | int system_2(char *cmd, int debug) { 28 | if(debug) { 29 | printf("[DEBUG] system_2"); 30 | } 31 | return system(cmd); 32 | } 33 | 34 | __attribute__ ((noinline, optimize("O0"))) 35 | int system_1(char *cmd, int debug) { 36 | if(debug) { 37 | printf("[DEBUG] system_1"); 38 | } 39 | return system_2(cmd, debug); 40 | } 41 | 42 | int main(int argc, char *argv[]) { 43 | char *env_cmd; 44 | int debug = 1; 45 | 46 | env_cmd = getenv_1("CMD", debug); 47 | if(env_cmd == NULL) { 48 | fprintf(stderr, "CMD environment variable not set.\n"); 49 | return EXIT_FAILURE; 50 | } 51 | 52 | system_1(env_cmd, debug); 53 | return EXIT_SUCCESS; 54 | } -------------------------------------------------------------------------------- /mole/grouping/call_graph.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module implements a call graph grouping strategy. 3 | """ 4 | 5 | from __future__ import annotations 6 | from mole.core.data import Path 7 | from mole.grouping.source_sink import SourceSinkPathGrouper 8 | 9 | 10 | class CallgraphPathGrouper(SourceSinkPathGrouper): 11 | """ 12 | This class implements a strategy that groups by source and sink symbols, as well as call graphs. 13 | """ 14 | 15 | def get_group_keys(self, path: Path, *args, **kwargs): 16 | """ 17 | This method groups paths by source and sink symbols, as well as call graphs. 18 | """ 19 | max_calls = kwargs.get("max_calls", 6) 20 | calls = [call[0].source_function.symbol.short_name for call in path.calls] 21 | if len(calls) > max_calls: 22 | calls = calls[: int(max_calls / 2)] + ["..."] + calls[int(-max_calls / 2) :] 23 | calls = " - ".join(reversed(calls)) 24 | keys = super().get_group_keys(path, *args, **kwargs) 25 | keys.append( 26 | ( 27 | f"Calls: {calls:s}", 28 | f"{path.src_sym_name:s}:{path.snk_sym_name}:{calls:s}", 29 | 2, 30 | ) 31 | ) 32 | return keys 33 | 34 | def get_strategy_name(self) -> str: 35 | """ 36 | This method returns the name of this grouping strategy. 37 | """ 38 | return "Call Graph" 39 | -------------------------------------------------------------------------------- /tests/slicing/test_pointer_deref.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from tests.slicing.conftest import TestSlicing 3 | from typing import List 4 | 5 | 6 | class TestPointerDeref(TestSlicing): 7 | def test_load_01(self, filenames: List[str] = ["load-01"]) -> None: 8 | self.assert_paths( 9 | srcs=[("getenv", None)], 10 | snks=[("system", 1)], 11 | call_chains=[["main"]], 12 | filenames=filenames, 13 | ) 14 | return 15 | 16 | def test_load_02(self, filenames: List[str] = ["load-02"]) -> None: 17 | self.test_load_01(filenames=filenames) 18 | return 19 | 20 | def test_load_03(self, filenames: List[str] = ["load-03"]) -> None: 21 | self.assert_paths( 22 | srcs=[("getenv", None)], 23 | snks=[("system", 1)], 24 | call_chains=[["main"], ["main"]], 25 | filenames=filenames, 26 | ) 27 | return 28 | 29 | def test_load_04(self, filenames: List[str] = ["load-04"]) -> None: 30 | self.test_load_03(filenames=filenames) 31 | return 32 | 33 | def test_load_05(self, filenames: List[str] = ["load-05"]) -> None: 34 | self.assert_paths( 35 | srcs=[("getenv", None)], 36 | snks=[("memcpy", 2), ("memcpy", 3)], 37 | call_chains=[["main"], ["main"]], 38 | filenames=filenames, 39 | ) 40 | return 41 | -------------------------------------------------------------------------------- /mole/common/task.py: -------------------------------------------------------------------------------- 1 | from mole.common.log import log 2 | from typing import Any, Callable, Optional, Tuple 3 | import binaryninja as bn 4 | 5 | 6 | tag = "Mole.Task" 7 | 8 | 9 | class BackgroundTask(bn.BackgroundTaskThread): 10 | """ 11 | This class implements a general background task. 12 | """ 13 | 14 | def __init__( 15 | self, 16 | initial_progress_text: str = "", 17 | can_cancel: bool = False, 18 | run: Optional[Callable[..., Any]] = None, 19 | *args: Any, 20 | **kwargs: Any, 21 | ) -> None: 22 | """ 23 | This method initializes the background task. 24 | """ 25 | super().__init__(initial_progress_text, can_cancel) 26 | self._run = run 27 | self._args: Tuple[Any, ...] = args 28 | self._kwargs: dict[str, Any] = kwargs 29 | self._results: Any = None 30 | return 31 | 32 | def run(self) -> None: 33 | """ 34 | This method runs the background task. 35 | """ 36 | log.info(tag, "Starting background task") 37 | if self._run: 38 | self._results = self._run(*self._args, **self._kwargs) 39 | log.info(tag, "Background task completed") 40 | return 41 | 42 | def results(self) -> Any: 43 | """ 44 | This method waits for the background task to complete and returns its results. 45 | """ 46 | self.join() 47 | return self._results 48 | -------------------------------------------------------------------------------- /tests/data/src/function_calling-02.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - disallow function inlining 8 | - with tail calls 9 | */ 10 | 11 | __attribute__ ((noinline)) 12 | char* getenv_2(char *cmd) { 13 | return getenv(cmd); 14 | } 15 | 16 | __attribute__ ((noinline)) 17 | char* getenv_1a() { 18 | return getenv_2("SYSTEM_COMMAND_1a"); 19 | } 20 | 21 | __attribute__ ((noinline)) 22 | char* getenv_1b() { 23 | return getenv_2("SYSTEM_COMMAND_1b"); 24 | } 25 | 26 | __attribute__ ((noinline)) 27 | char* getenv_1c() { 28 | return getenv("NOT_DANGEROUS"); 29 | } 30 | 31 | __attribute__ ((noinline)) 32 | int system_2(char *cmd) { 33 | return system(cmd); 34 | } 35 | 36 | __attribute__ ((noinline)) 37 | int system_1a(char *cmd) { 38 | return system_2(cmd); 39 | } 40 | 41 | __attribute__ ((noinline)) 42 | int system_1b(char *cmd) { 43 | return system_2(cmd); 44 | } 45 | 46 | int main(int argc, char *argv[]) { 47 | char *env_cmd; 48 | 49 | if(argc <= 1) { 50 | env_cmd = getenv_1a(); 51 | } else { 52 | env_cmd = getenv_1b(); 53 | } 54 | fprintf(stdout, "main: '%s'\n", env_cmd); 55 | if(env_cmd == NULL) { 56 | fprintf(stderr, "SYSTEM_COMMAND environment variable not set.\n"); 57 | return EXIT_FAILURE; 58 | } 59 | system_1a(env_cmd); 60 | system_1b("whoami"); 61 | getenv_1c(); 62 | return EXIT_SUCCESS; 63 | } -------------------------------------------------------------------------------- /mole/models/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from enum import Enum 3 | from typing import List 4 | 5 | 6 | class IndexedLabeledEnum(Enum): 7 | def __new__(cls, index: int, label: str = "") -> IndexedLabeledEnum: 8 | obj = object.__new__(cls) 9 | obj._value_ = index 10 | obj._label = label 11 | return obj 12 | 13 | @property 14 | def index(self) -> int: 15 | return self._value_ 16 | 17 | @property 18 | def label(self) -> str: 19 | return self._label 20 | 21 | @classmethod 22 | def indexes(cls: IndexedLabeledEnum) -> List[int]: 23 | """ 24 | This method returns a list with the enum members' indexes. 25 | """ 26 | return [member._value_ for member in cls] 27 | 28 | @classmethod 29 | def labels(cls: IndexedLabeledEnum) -> List[str]: 30 | """ 31 | This method returns a list with the enum members' labels. 32 | """ 33 | return [member._label for member in cls] 34 | 35 | def __str__(self) -> str: 36 | return self._label 37 | 38 | def __lt__(self, other: object) -> bool: 39 | if isinstance(other, IndexedLabeledEnum): 40 | return self._value_ < other._value_ 41 | return NotImplemented 42 | 43 | def __eq__(self, other: object) -> bool: 44 | if isinstance(other, IndexedLabeledEnum): 45 | return self._value_ == other._value_ 46 | return NotImplemented 47 | 48 | def __hash__(self) -> int: 49 | return hash(self._value) 50 | -------------------------------------------------------------------------------- /tests/data/src/pointer_analysis-15.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #define BUF_SIZE 16 7 | 8 | /* 9 | Testcase Description: 10 | - getopt param ends up in a call to strcpy 11 | 12 | Testcase Analysis (linux-x86_64): 13 | - Backward slicing ends in: 14 | ``` 15 | 0x4010f5 src#3 = [0x404020] (MediumLevelILSetVarSsa) 16 | 0x4010f5 [0x404020] (MediumLevelILImport) 17 | ``` 18 | - This corresponds to the source code line: 19 | ``` 20 | src = optarg; 21 | ``` 22 | - `optarg` is an (external) global variable defined in the `.bss` section: 23 | ``` 24 | .bss (NOBITS) section started {0x404020-0x404030} 25 | 00404020 char* optarg = 0x0 26 | ``` 27 | - Our current **pointer analysis** implementation is not able to track global variables. More specifically for the listed example, slicing does not enter `getopt`, since it does not explicitely get `optarg` as a function parameter. 28 | */ 29 | 30 | // External global variable defined in libc 31 | extern char* optarg; 32 | 33 | int main(int argc, char *argv[]) { 34 | int opt; 35 | char dest[BUF_SIZE]; 36 | char *src = NULL; 37 | 38 | // Parse command-line options 39 | while ((opt = getopt(argc, argv, "s:")) != -1) { 40 | switch (opt) { 41 | case 's': 42 | src = optarg; 43 | break; 44 | default: 45 | return EXIT_FAILURE; 46 | } 47 | } 48 | // Copy src to dest 49 | if (src != NULL) { 50 | strcpy(dest, src); 51 | printf("dest: '%s'\n", dest); 52 | } 53 | return EXIT_SUCCESS; 54 | } -------------------------------------------------------------------------------- /docs/01-Installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | In the following, we assume that the variables `$BINJA_BIN` and `$BINJA_USR` point to your *Binary Ninja*'s [binary path](https://docs.binary.ninja/guide/index.html#binary-path) and [user folder](https://docs.binary.ninja/guide/index.html#user-folder), respectively. Use the following steps to install *Mole*: 3 | 4 | - Clone the plugin to your *Binary Ninja*'s user folder: 5 | ```shell 6 | cd $BINJA_USR/plugins/ 7 | git clone https://github.com/pdamian/mole.git mole-plugin && cd mole-plugin/ 8 | ``` 9 | - Create and activate a new Python virtual environment for *Mole* (optional, but recommended): 10 | ```shell 11 | python3 -m venv venv/mole 12 | source venv/mole/bin/activate 13 | ``` 14 | - Install *Binary Ninja*'s Python [API](https://docs.binary.ninja/dev/batch.html#install-the-api): 15 | ```shell 16 | (mole)$ python $BINJA_BIN/scripts/install_api.py 17 | ``` 18 | - Install *Mole* either in standard or development mode: 19 | ```shell 20 | # Standard 21 | (mole)$ pip install . 22 | 23 | # Development 24 | # WARNING: 25 | # When installed using the -e (editable) flag with pip, Binary Ninja must be launched from 26 | # within the activated virtual environment (mole) 27 | (mole)$ pip install -e .[develop] 28 | (mole)$ pre-commit install 29 | (mole)$ $BINJA_BIN/binaryninja & 30 | ``` 31 | - If you are using a virtual environment, consider configuring the corresponding `site-packages` directory in *Binary Ninja*'s settings. 32 | ---------------------------------------------------------------------------------------------------- 33 | [Back-To-README](../README.md#documentation) -------------------------------------------------------------------------------- /tests/data/src/function_calling-03.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - allow function inlining 8 | - without tail calls 9 | */ 10 | 11 | char* getenv_2(char *cmd) { 12 | char *env_cmd = getenv(cmd); 13 | fprintf(stdout, "getenv_2: '%s'\n", env_cmd); 14 | return env_cmd; 15 | } 16 | 17 | char* getenv_1a() { 18 | char *env_cmd = getenv_2("SYSTEM_COMMAND_1a"); 19 | fprintf(stdout, "getenv_1a: '%s'\n", env_cmd); 20 | return env_cmd; 21 | } 22 | 23 | char* getenv_1b() { 24 | char *env_cmd = getenv_2("SYSTEM_COMMAND_1b"); 25 | fprintf(stdout, "getenv_1b: '%s'\n", env_cmd); 26 | return env_cmd; 27 | } 28 | 29 | char* getenv_1c() { 30 | char *env_cmd = getenv("NOT_DANGEROUS"); 31 | fprintf(stdout, "getenv_1c: '%s'\n", env_cmd); 32 | return env_cmd; 33 | } 34 | 35 | int system_2(char *cmd) { 36 | int res = system(cmd); 37 | fprintf(stdout, "system_2: '%d'\n", res); 38 | return res; 39 | } 40 | 41 | int system_1a(char *cmd) { 42 | int res = system_2(cmd); 43 | fprintf(stdout, "system_1a: '%d'\n", res); 44 | return res; 45 | } 46 | 47 | int system_1b(char *cmd) { 48 | int res = system_2(cmd); 49 | fprintf(stdout, "system_1b: '%d'\n", res); 50 | return res; 51 | } 52 | 53 | int main(int argc, char *argv[]) { 54 | char *env_cmd; 55 | 56 | if(argc <= 1) { 57 | env_cmd = getenv_1a(); 58 | } else { 59 | env_cmd = getenv_1b(); 60 | } 61 | fprintf(stdout, "main: '%s'\n", env_cmd); 62 | if(env_cmd == NULL) { 63 | fprintf(stderr, "SYSTEM_COMMAND environment variable not set.\n"); 64 | return EXIT_FAILURE; 65 | } 66 | system_1a(env_cmd); 67 | system_1b("whoami"); 68 | getenv_1c(); 69 | return EXIT_SUCCESS; 70 | } -------------------------------------------------------------------------------- /tests/data/src/pointer_analysis-12.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | /* 5 | Testcase Description: 6 | - Multiple source memory definitions 7 | */ 8 | 9 | __attribute__ ((noinline)) 10 | int create_cmd(char** str_ptr, const char* user_name) { 11 | const char* fmt = "echo %s"; 12 | // Compute length of formatted string 13 | int len = snprintf(NULL, 0, fmt, user_name); 14 | if(len < 0) return -1; 15 | // Allocate memory for the string 16 | *str_ptr = (char*) malloc(len + 1); 17 | if(*str_ptr == NULL) { 18 | return -1; 19 | } 20 | // Write formatted string to allocated memory 21 | int res = snprintf(*str_ptr, len + 1, fmt, user_name); 22 | if(res < 0) { 23 | free(*str_ptr); 24 | *str_ptr = NULL; 25 | return -1; 26 | } 27 | return res; 28 | } 29 | 30 | int main(int argc, char *argv[]) { 31 | // Source: User inputs via environment variables 32 | char *env_user_id = getenv("USER_ID"); 33 | char *env_user_name = getenv("USER_NAME"); 34 | if(env_user_id == NULL || env_user_name == NULL) { 35 | fprintf(stderr, "Missing environment variables.\n"); 36 | return EXIT_FAILURE; 37 | } 38 | int user_id = atoi(env_user_id); 39 | // Create command string 40 | char *cmd = NULL; 41 | if(user_id == 0) { 42 | if(create_cmd(&cmd, env_user_name) < 0) { 43 | fprintf(stderr, "Failed to create command for root user '%s'.\n", env_user_name); 44 | return EXIT_FAILURE; 45 | } 46 | } else { 47 | if(create_cmd(&cmd, env_user_name) < 0) { 48 | fprintf(stderr, "Failed to create command for user '%s'.\n", env_user_name); 49 | return EXIT_FAILURE; 50 | } 51 | } 52 | // Sink: Execute command 53 | if(system(cmd) == -1) { 54 | fprintf(stderr, "Failed to execute command.\n"); 55 | free(cmd); 56 | return EXIT_FAILURE; 57 | } 58 | free(cmd); 59 | return EXIT_SUCCESS; 60 | } -------------------------------------------------------------------------------- /tests/data/src/function_calling-04.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - disallow function inlining 8 | - without tail calls 9 | */ 10 | 11 | __attribute__ ((noinline)) 12 | char* getenv_2(char *cmd) { 13 | char *env_cmd = getenv(cmd); 14 | fprintf(stdout, "getenv_2: '%s'\n", env_cmd); 15 | return env_cmd; 16 | } 17 | 18 | __attribute__ ((noinline)) 19 | char* getenv_1a() { 20 | char *env_cmd = getenv_2("SYSTEM_COMMAND_1a"); 21 | fprintf(stdout, "getenv_1a: '%s'\n", env_cmd); 22 | return env_cmd; 23 | } 24 | 25 | __attribute__ ((noinline)) 26 | char* getenv_1b() { 27 | char *env_cmd = getenv_2("SYSTEM_COMMAND_1b"); 28 | fprintf(stdout, "getenv_1b: '%s'\n", env_cmd); 29 | return env_cmd; 30 | } 31 | 32 | __attribute__ ((noinline)) 33 | char* getenv_1c() { 34 | char *env_cmd = getenv("NOT_DANGEROUS"); 35 | fprintf(stdout, "getenv_1c: '%s'\n", env_cmd); 36 | return env_cmd; 37 | } 38 | 39 | __attribute__ ((noinline)) 40 | int system_2(char *cmd) { 41 | int res = system(cmd); 42 | fprintf(stdout, "system_2: '%d'\n", res); 43 | return res; 44 | } 45 | 46 | __attribute__ ((noinline)) 47 | int system_1a(char *cmd) { 48 | int res = system_2(cmd); 49 | fprintf(stdout, "system_1a: '%d'\n", res); 50 | return res; 51 | } 52 | 53 | __attribute__ ((noinline)) 54 | int system_1b(char *cmd) { 55 | int res = system_2(cmd); 56 | fprintf(stdout, "system_1b: '%d'\n", res); 57 | return res; 58 | } 59 | 60 | int main(int argc, char *argv[]) { 61 | char *env_cmd; 62 | 63 | if(argc <= 1) { 64 | env_cmd = getenv_1a(); 65 | } else { 66 | env_cmd = getenv_1b(); 67 | } 68 | fprintf(stdout, "main: '%s'\n", env_cmd); 69 | if(env_cmd == NULL) { 70 | fprintf(stderr, "SYSTEM_COMMAND environment variable not set.\n"); 71 | return EXIT_FAILURE; 72 | } 73 | system_1a(env_cmd); 74 | system_1b("whoami"); 75 | getenv_1c(); 76 | return EXIT_SUCCESS; 77 | } -------------------------------------------------------------------------------- /tests/data/src/object_oriented-01.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | using namespace std; 5 | 6 | /* 7 | Testcase Description: 8 | - Inheritance 9 | - Without virtual functions (no polymorphism) 10 | */ 11 | 12 | class MyParent { 13 | protected: 14 | const char* name; 15 | 16 | public: 17 | __attribute__ ((noinline, optimize("O0"))) 18 | MyParent(const char* name) { 19 | this->name = name; 20 | cout << "MyParent Constructor: Hello " << this->name << "!" << endl; 21 | } 22 | 23 | __attribute__ ((noinline, optimize("O0"))) 24 | ~MyParent() { 25 | cout << "MyParent Destructor: Goodbye " << this->name << "!" << endl; 26 | } 27 | 28 | __attribute__ ((noinline, optimize("O0"))) 29 | void my_func(const char* cmd) { 30 | cout << "MyParent::my_func: " << this->name << " calls `system('" << cmd << "')`!" << endl; 31 | system(cmd); 32 | } 33 | }; 34 | 35 | class MyChild : public MyParent { 36 | public: 37 | __attribute__ ((noinline, optimize("O0"))) 38 | MyChild(const char* name) : MyParent(name) { 39 | cout << "MyChild Constructor: Hello " << this->name << "!" << endl; 40 | } 41 | 42 | __attribute__ ((noinline, optimize("O0"))) 43 | ~MyChild() { 44 | cout << "MyChild Destructor: Goodbye " << this->name << "!" << endl; 45 | } 46 | 47 | __attribute__ ((noinline, optimize("O0"))) 48 | void my_func(const char* cmd) { 49 | cout << "MyChild::my_func: " << this->name << " calls `popen('" << cmd << "', 'r')`!" << endl; 50 | FILE* fp = popen(cmd, "r"); 51 | if(fp != NULL) { 52 | pclose(fp); 53 | } 54 | } 55 | }; 56 | 57 | __attribute__ ((noinline, optimize("O3"))) 58 | int main(int argc, char *argv[]) { 59 | MyParent* p = new MyParent("Alice"); 60 | MyParent* c = new MyChild("Bob"); 61 | char* cmd = getenv("CMD"); 62 | if(cmd != NULL) { 63 | p->my_func(cmd); 64 | c->my_func(cmd); 65 | } 66 | delete p; 67 | delete c; 68 | return EXIT_SUCCESS; 69 | } -------------------------------------------------------------------------------- /mole/conf/001-settings.yml: -------------------------------------------------------------------------------- 1 | settings: 2 | max_workers: 3 | help: maximum number of worker threads that backward slicing uses 4 | value: 1 5 | min_value: -1 6 | max_value: 256 7 | fix_func_type: 8 | help: whether to fix types of source/sink functions before slicing 9 | value: false 10 | max_call_level: 11 | help: backward slicing visits called functions up to the given level 12 | value: 10 13 | min_value: -1 14 | max_value: 99 15 | max_slice_depth: 16 | help: maximum slice depth to stop the search 17 | value: 1000 18 | min_value: -1 19 | max_value: 9999 20 | max_memory_slice_depth: 21 | help: maximum memory slice depth to stop the search 22 | value: 10 23 | min_value: -1 24 | max_value: 9999 25 | src_highlight_color: 26 | help: color used to highlight instructions originating from slicing a source function 27 | value: Orange 28 | items: 29 | - Blue 30 | - Green 31 | - Cyan 32 | - Red 33 | - Magenta 34 | - Yellow 35 | - Orange 36 | snk_highlight_color: 37 | help: color used to highlight instructions originating from slicing a sink function 38 | value: Red 39 | items: 40 | - Blue 41 | - Green 42 | - Cyan 43 | - Red 44 | - Magenta 45 | - Yellow 46 | - Orange 47 | path_grouping: 48 | help: strategy used to group paths 49 | value: Call Graph 50 | openai_base_url: 51 | help: OpenAI API base URL 52 | value: "https://api.openai.com/v1" 53 | openai_api_key: 54 | help: OpenAI API key 55 | value: "" 56 | openai_model: 57 | help: OpenAI model 58 | value: "o4-mini" 59 | max_turns: 60 | help: maximum number of turns in a conversation with the AI 61 | value: 10 62 | min_value: 1 63 | max_value: 256 64 | max_completion_tokens: 65 | help: maximum number of tokens in a completion 66 | value: 4096 67 | min_value: 0 68 | max_value: 100000 69 | temperature: 70 | help: the sampling temperature to use 71 | value: 1.0 72 | min_value: 0.0 73 | max_value: 2.0 -------------------------------------------------------------------------------- /tests/data/src/object_oriented-02.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | using namespace std; 5 | 6 | /* 7 | Testcase Description: 8 | - Inheritance 9 | - With virtual functions (polymorphism) 10 | */ 11 | 12 | class MyParent { 13 | protected: 14 | const char* name; 15 | 16 | public: 17 | __attribute__ ((noinline, optimize("O0"))) 18 | MyParent(const char* name) { 19 | this->name = name; 20 | cout << "MyParent Constructor: Hello " << this->name << "!" << endl; 21 | } 22 | 23 | __attribute__ ((noinline, optimize("O0"))) 24 | virtual ~MyParent() { 25 | cout << "MyParent Destructor: Goodbye " << this->name << "!" << endl; 26 | } 27 | 28 | __attribute__ ((noinline, optimize("O0"))) 29 | virtual void my_func(const char* cmd) { 30 | cout << "MyParent::my_func: " << this->name << " calls `system('" << cmd << "')`!" << endl; 31 | system(cmd); 32 | } 33 | }; 34 | 35 | class MyChild : public MyParent { 36 | public: 37 | __attribute__ ((noinline, optimize("O0"))) 38 | MyChild(const char* name) : MyParent(name) { 39 | cout << "MyChild Constructor: Hello " << this->name << "!" << endl; 40 | } 41 | 42 | __attribute__ ((noinline, optimize("O0"))) 43 | ~MyChild() override { 44 | cout << "MyChild Destructor: Goodbye " << this->name << "!" << endl; 45 | } 46 | 47 | __attribute__ ((noinline, optimize("O0"))) 48 | void my_func(const char* cmd) override { 49 | cout << "MyChild::my_func: " << this->name << " calls `popen('" << cmd << "', 'r')`!" << endl; 50 | FILE* fp = popen(cmd, "r"); 51 | if(fp != NULL) { 52 | pclose(fp); 53 | } 54 | } 55 | }; 56 | 57 | __attribute__ ((noinline, optimize("O3"))) 58 | int main(int argc, char *argv[]) { 59 | MyParent* p = new MyParent("Alice"); 60 | MyParent* c = new MyChild("Bob"); 61 | char* cmd = getenv("CMD"); 62 | if(cmd != NULL) { 63 | p->my_func(cmd); 64 | c->my_func(cmd); 65 | } 66 | delete p; 67 | delete c; 68 | return EXIT_SUCCESS; 69 | } -------------------------------------------------------------------------------- /tests/data/src/pointer_analysis-13.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* 6 | Testcase Description: 7 | - Multiple source memory definitions 8 | - Usage of va structs 9 | */ 10 | 11 | __attribute__ ((noinline)) 12 | int create_cmd(char** str_ptr, const char* fmt, ...) { 13 | va_list args, args_cpy; 14 | // Compute length of formatted string 15 | va_start(args, fmt); 16 | va_copy(args_cpy, args); 17 | int len = vsnprintf(NULL, 0, fmt, args_cpy); 18 | va_end(args_cpy); 19 | if(len < 0) { 20 | va_end(args); 21 | return -1; 22 | } 23 | // Allocate memory for the string 24 | *str_ptr = (char*) malloc(len + 1); 25 | if(*str_ptr == NULL) { 26 | va_end(args); 27 | return -1; 28 | } 29 | // Write formatted string to allocated memory 30 | int res = vsnprintf(*str_ptr, len + 1, fmt, args); 31 | va_end(args); 32 | return res; 33 | } 34 | 35 | int main(int argc, char *argv[]) { 36 | // Source: User inputs via environment variables 37 | char *env_user_id = getenv("USER_ID"); 38 | char *env_user_name = getenv("USER_NAME"); 39 | if(env_user_id == NULL || env_user_name == NULL) { 40 | fprintf(stderr, "Missing environment variables.\n"); 41 | return EXIT_FAILURE; 42 | } 43 | int user_id = atoi(env_user_id); 44 | // Create command string 45 | char *cmd = NULL; 46 | if(user_id == 0) { 47 | if(create_cmd(&cmd, "echo %s", env_user_name) < 0) { 48 | fprintf(stderr, "Failed to create command for root user '%s'.\n", env_user_name); 49 | return EXIT_FAILURE; 50 | } 51 | } else { 52 | if(create_cmd(&cmd, "echo %s", env_user_name) < 0) { 53 | fprintf(stderr, "Failed to create command for user '%s'.\n", env_user_name); 54 | return EXIT_FAILURE; 55 | } 56 | } 57 | // Sink: Execute command 58 | if(system(cmd) == -1) { 59 | fprintf(stderr, "Failed to execute command.\n"); 60 | free(cmd); 61 | return EXIT_FAILURE; 62 | } 63 | free(cmd); 64 | return EXIT_SUCCESS; 65 | } -------------------------------------------------------------------------------- /tests/slicing/test_multithreading.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from tests.slicing.conftest import TestSlicing 3 | from typing import List 4 | import binaryninja as bn 5 | 6 | 7 | class TestMultiThreading(TestSlicing): 8 | def test_consistency_01( 9 | self, filenames: List[str] = ["function_calling-02"] 10 | ) -> None: 11 | for file in self.load_files(filenames): 12 | # Load and analyze test binary with Binary Ninja 13 | bv = bn.load(file) 14 | bv.update_analysis_and_wait() 15 | # Assert results 16 | paths = self.get_paths(bv, max_workers=1) 17 | for max_workers in [2, 4, 8, -1]: 18 | paths_mt = self.get_paths(bv, max_workers) 19 | for path in paths: 20 | if path in paths_mt: 21 | paths_mt.remove(path) 22 | else: 23 | assert False, ( 24 | f"Inconsistent results with {max_workers:d} workers" 25 | ) 26 | assert not paths_mt, ( 27 | f"Inconsistent results with {max_workers:d} workers" 28 | ) 29 | # Close binary 30 | bv.file.close() 31 | return 32 | 33 | def test_consistency_02(self, filenames: List[str] = ["name_mangling-01"]) -> None: 34 | self.test_consistency_01(filenames) 35 | return 36 | 37 | def test_consistency_03(self, filenames: List[str] = ["load-05"]) -> None: 38 | self.test_consistency_01(filenames) 39 | return 40 | 41 | def test_consistency_04( 42 | self, filenames: List[str] = ["pointer_analysis-06"] 43 | ) -> None: 44 | self.test_consistency_01(filenames) 45 | return 46 | 47 | def test_consistency_05( 48 | self, filenames: List[str] = ["simple_http_server-03"] 49 | ) -> None: 50 | self.test_consistency_01(filenames) 51 | return 52 | 53 | def test_consistency_06(self, filenames: List[str] = ["memcpy-05"]) -> None: 54 | self.test_consistency_01(filenames) 55 | return 56 | -------------------------------------------------------------------------------- /mole/views/sidebar.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from mole.views.path import PathView 3 | from typing import Any 4 | import binaryninjaui as bnui 5 | import os as os 6 | import PySide6.QtCore as qtc 7 | import PySide6.QtGui as qtui 8 | 9 | 10 | class MoleSidebar(bnui.SidebarWidgetType): 11 | """ 12 | This class implements the view for the plugin's sidebar. 13 | """ 14 | 15 | def __init__(self, sidebar_view: PathView) -> None: 16 | """ 17 | This method initializes a view (MVC pattern). 18 | """ 19 | super().__init__(self._init_icon(), "Mole") 20 | self._sidebar_view = sidebar_view 21 | return 22 | 23 | def _init_icon(self) -> qtui.QImage: 24 | """ 25 | This method initializes the sidebar's icon. 26 | """ 27 | icon = qtui.QImage( 28 | os.path.join( 29 | os.path.dirname(os.path.abspath(__file__)), "../resources/icon.png" 30 | ) 31 | ) 32 | if icon.isNull(): 33 | icon = qtui.QImage(56, 56, qtui.QImage.Format_RGB32) 34 | icon.fill(0) 35 | p = qtui.QPainter() 36 | p.begin(icon) 37 | p.setFont(qtui.QFont("Open Sans", 12)) 38 | p.setPen(qtui.QColor(255, 255, 255, 255)) 39 | p.drawText(qtc.QRectF(0, 0, 56, 56), qtc.Qt.AlignCenter, "MOLE") 40 | p.end() 41 | return icon 42 | 43 | def init(self) -> PathView: 44 | """ 45 | This method registers the sidebar with Binary Ninja. 46 | """ 47 | bnui.Sidebar.addSidebarWidgetType(self) 48 | return self 49 | 50 | def createWidget(self, frame: Any, data: Any) -> PathView: 51 | """ 52 | This method creates the sidebar's widget. 53 | """ 54 | return self._sidebar_view 55 | 56 | def defaultLocation(self) -> bnui.SidebarWidgetLocation: 57 | """ 58 | This method places the widget to the right sidebar. 59 | """ 60 | return bnui.SidebarWidgetLocation.RightContent 61 | 62 | def contextSensitivity(self) -> bnui.SidebarContextSensitivity: 63 | """ 64 | This method configures the widget to use a single instance that detects changes. 65 | """ 66 | return bnui.SidebarContextSensitivity.SelfManagedSidebarContext 67 | -------------------------------------------------------------------------------- /tests/data/src/object_oriented-03.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | using namespace std; 5 | 6 | #define MAX_CMD_LENGTH 256 7 | 8 | /* 9 | Testcase Description: 10 | - Inheritance 11 | - With virtual functions (polymorphism) 12 | - Using member variable assigned in constructor 13 | */ 14 | 15 | class MyParent { 16 | protected: 17 | const char* name; 18 | 19 | public: 20 | __attribute__ ((noinline, optimize("O0"))) 21 | MyParent(const char* name) { 22 | this->name = name; 23 | cout << "MyParent Constructor: Hello " << this->name << "!" << endl; 24 | } 25 | 26 | __attribute__ ((noinline, optimize("O0"))) 27 | virtual ~MyParent() { 28 | cout << "MyParent Destructor: Goodbye " << this->name << "!" << endl; 29 | } 30 | 31 | __attribute__ ((noinline, optimize("O0"))) 32 | virtual void my_func() { 33 | char cmd[MAX_CMD_LENGTH]; 34 | snprintf(cmd, sizeof(cmd), "echo Hello %s!", this->name); 35 | cout << "MyParent::my_func: " << this->name << " calls `system('" << string(cmd) << "'`!" << endl; 36 | system(cmd); 37 | } 38 | }; 39 | 40 | class MyChild : public MyParent { 41 | public: 42 | __attribute__ ((noinline, optimize("O0"))) 43 | MyChild(const char* name) : MyParent(name) { 44 | cout << "MyChild Constructor: Hello " << this->name << "!" << endl; 45 | } 46 | 47 | __attribute__ ((noinline, optimize("O0"))) 48 | ~MyChild() override { 49 | cout << "MyChild Destructor: Goodbye " << this->name << "!" << endl; 50 | } 51 | 52 | __attribute__ ((noinline, optimize("O0"))) 53 | void my_func() override { 54 | char cmd[MAX_CMD_LENGTH]; 55 | snprintf(cmd, sizeof(cmd), "echo Hello %s!", this->name); 56 | cout << "MyChild::my_func: " << this->name << " calls `popen('" << string(cmd) << "', 'r')`!" << endl; 57 | FILE* fp = popen(cmd, "r"); 58 | if(fp != NULL) { 59 | pclose(fp); 60 | } 61 | } 62 | }; 63 | 64 | __attribute__ ((noinline, optimize("O3"))) 65 | int main(int argc, char *argv[]) { 66 | char* p_name = getenv("PARENT_NAME"); 67 | if(p_name != NULL) { 68 | MyParent* p = new MyParent(p_name); 69 | p->my_func(); 70 | delete p; 71 | } 72 | char* c_name = getenv("CHILD_NAME"); 73 | if(c_name != NULL) { 74 | MyParent* c = new MyChild(c_name); 75 | c->my_func(); 76 | delete c; 77 | } 78 | return EXIT_SUCCESS; 79 | } -------------------------------------------------------------------------------- /tests/slicing/test_mangling.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from tests.slicing.conftest import TestSlicing 3 | from typing import List 4 | import pytest 5 | 6 | 7 | class TestNameMangling(TestSlicing): 8 | def test_name_mangling_01( 9 | self, filenames: List[str] = ["name_mangling-01"] 10 | ) -> None: 11 | self.assert_paths( 12 | srcs=[("getenv", None)], 13 | snks=[("system", 1)], 14 | call_chains=[["overloaded_func", "main"], ["overloaded_func", "main"]], 15 | filenames=filenames, 16 | ) 17 | return 18 | 19 | def test_name_mangling_02( 20 | self, filenames: List[str] = ["name_mangling-02"] 21 | ) -> None: 22 | self.assert_paths( 23 | srcs=[("getenv", None)], 24 | snks=[("system", 1)], 25 | call_chains=[["MyStruct::my_func", "main"], ["MyClass::my_func", "main"]], 26 | filenames=filenames, 27 | ) 28 | return 29 | 30 | def test_name_mangling_03( 31 | self, filenames: List[str] = ["name_mangling-03"] 32 | ) -> None: 33 | self.assert_paths( 34 | srcs=[("getenv", None)], 35 | snks=[("system", 1)], 36 | call_chains=[["ns::my_func", "main"]], 37 | filenames=filenames, 38 | ) 39 | return 40 | 41 | def test_name_mangling_04( 42 | self, filenames: List[str] = ["name_mangling-04"] 43 | ) -> None: 44 | self.assert_paths( 45 | srcs=[("getenv", None)], 46 | snks=[("system", 1)], 47 | call_chains=[["my_func", "main"]], 48 | filenames=filenames, 49 | ) 50 | return 51 | 52 | @pytest.mark.xfail 53 | def test_name_mangling_05( 54 | self, filenames: List[str] = ["name_mangling-05"] 55 | ) -> None: 56 | self.assert_paths( 57 | srcs=[("getenv", None)], 58 | snks=[("system", 1)], 59 | call_chains=[ 60 | ["MyStruct::my_func", "_GLOBAL__sub_I__ZN8MyStruct3cmdE"], 61 | ["MyClass::my_func", "_GLOBAL__sub_I__ZN8MyStruct3cmdE"], 62 | ], 63 | filenames=filenames, 64 | ) 65 | return 66 | 67 | @pytest.mark.xfail 68 | def test_name_mangling_06( 69 | self, filenames: List[str] = ["name_mangling-06"] 70 | ) -> None: 71 | self.assert_paths( 72 | srcs=[("getenv", None)], 73 | snks=[("system", 1)], 74 | call_chains=[["MyStruct::my_func", "main", "MyStruct::operator+"]], 75 | filenames=filenames, 76 | ) 77 | return 78 | -------------------------------------------------------------------------------- /tests/slicing/test_function_out_params.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from tests.slicing.conftest import TestSlicing 3 | from typing import List 4 | import binaryninja as bn 5 | 6 | 7 | class TestFunctionOutParams(TestSlicing): 8 | def test_function_out_params_01( 9 | self, filenames: List[str] = ["function_out_params-01"] 10 | ) -> None: 11 | self.assert_paths( 12 | srcs=[("getenv", None)], 13 | snks=[("memcpy", 3)], 14 | call_chains=[["main", "get_size"]], 15 | filenames=filenames, 16 | ) 17 | return 18 | 19 | def test_function_out_params_02( 20 | self, filenames: List[str] = ["function_out_params-02"] 21 | ) -> None: 22 | self.assert_paths( 23 | srcs=[("getenv", None)], 24 | snks=[("system", 1)], 25 | call_chains=[["main", "get_cmd"]], 26 | filenames=filenames, 27 | ) 28 | return 29 | 30 | def test_function_out_params_03( 31 | self, filenames: List[str] = ["function_out_params-03"] 32 | ) -> None: 33 | self.test_function_out_params_02(filenames) 34 | return 35 | 36 | def test_function_out_params_04( 37 | self, filenames: List[str] = ["function_out_params-04"] 38 | ) -> None: 39 | self.test_function_out_params_02(filenames) 40 | return 41 | 42 | def test_function_out_params_05( 43 | self, filenames: List[str] = ["function_out_params-05"] 44 | ) -> None: 45 | self.test_function_out_params_02(filenames) 46 | return 47 | 48 | def test_function_out_params_06( 49 | self, filenames: List[str] = ["function_out_params-06"] 50 | ) -> None: 51 | self.assert_paths( 52 | srcs=[("getenv", None)], 53 | snks=[("system", 1)], 54 | call_chains=[["main"]], 55 | filenames=filenames, 56 | ) 57 | return 58 | 59 | def test_function_out_params_07( 60 | self, filenames: List[str] = ["function_out_params-07"] 61 | ) -> None: 62 | self.test_function_out_params_06(filenames) 63 | return 64 | 65 | def test_function_out_params_08( 66 | self, filenames: List[str] = ["function_out_params-08"] 67 | ) -> None: 68 | def manually_set_types(bv: bn.BinaryView) -> None: 69 | get_cmd = bv.get_functions_by_name("get_cmd")[0] 70 | printf_call_site = get_cmd.call_sites[1] 71 | printf_type, _ = bv.parse_type_string( 72 | "int printf(const char* format, char* msg)" 73 | ) 74 | get_cmd.set_call_type_adjustment(printf_call_site.address, printf_type) 75 | bv.update_analysis_and_wait() 76 | return 77 | 78 | self.assert_paths( 79 | srcs=[("getenv", None)], 80 | snks=[("system", 1)], 81 | call_chains=[["main", "check_cmd", "get_cmd"]], 82 | filenames=filenames, 83 | bv_callback=manually_set_types, 84 | ) 85 | return 86 | -------------------------------------------------------------------------------- /mole/models/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from mole.core.data import Configuration, Function, Library, WidgetSetting 3 | from typing import Dict, List, Literal, Optional 4 | 5 | 6 | class ConfigModel: 7 | """ 8 | This class implements a model to handle Mole's configuration. 9 | """ 10 | 11 | def __init__(self, config: Configuration) -> None: 12 | """ 13 | Initialize the configuration model with optional pre-loaded configuration. 14 | 15 | Args: 16 | config: A Configuration object to initialize the model with. 17 | If None, an empty configuration will be created. 18 | """ 19 | self._config = config 20 | return 21 | 22 | def get(self) -> Configuration: 23 | """ 24 | This method returns the configuration. 25 | """ 26 | return self._config 27 | 28 | def set(self, config: Configuration) -> None: 29 | """ 30 | This method sets the configuration. 31 | """ 32 | self._config = config 33 | return 34 | 35 | def get_libraries( 36 | self, fun_type: Optional[Literal["Sources", "Sinks"]] 37 | ) -> Dict[str, Library]: 38 | """ 39 | This method returns all libraries matching the given type. 40 | """ 41 | match fun_type: 42 | case "Sources": 43 | return self._config.sources 44 | case "Sinks": 45 | return self._config.sinks 46 | return {} 47 | 48 | def get_functions( 49 | self, 50 | lib_name: Optional[str] = None, 51 | cat_name: Optional[str] = None, 52 | fun_name: Optional[str] = None, 53 | fun_type: Optional[Literal["Sources", "Sinks"]] = None, 54 | fun_enabled: Optional[bool] = None, 55 | ) -> List[Function]: 56 | """ 57 | This method returns all functions matching the given attributes. An attribute of `None` 58 | indicates that the corresponding attribute is irrelevant. 59 | """ 60 | funs: List[Function] = [] 61 | match fun_type: 62 | case "Sources": 63 | libs = self._config.sources.values() 64 | case "Sinks": 65 | libs = self._config.sinks.values() 66 | case _: 67 | libs = self._config.sources.values() + self._config.sinks.values() 68 | for lib in libs: 69 | if lib_name is None or lib.name == lib_name: 70 | for cat in lib.categories.values(): 71 | if cat_name is None or cat.name == cat_name: 72 | for fun in cat.functions.values(): 73 | if fun_name is None or fun.name == fun_name: 74 | if fun_enabled is None or fun.enabled == fun_enabled: 75 | funs.append(fun) 76 | return funs 77 | 78 | def get_setting(self, name: str) -> Optional[WidgetSetting]: 79 | """ 80 | This method returns the setting with name `name`. 81 | """ 82 | return self._config.settings.get(name, None) 83 | -------------------------------------------------------------------------------- /mole/common/helper/symbol.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from typing import Dict, List, Optional, Set 3 | import binaryninja as bn 4 | 5 | 6 | class SymbolHelper: 7 | """ 8 | This class provides helper functions with respect to symbols. 9 | """ 10 | 11 | @staticmethod 12 | def get_symbol_by_section( 13 | bv: bn.BinaryView, symbol_name: str, section_name: str = ".plt" 14 | ) -> Optional[bn.CoreSymbol]: 15 | """ 16 | This method returns the symbol with name `symbol_name` belonging to section `section_name`. 17 | """ 18 | section = bv.get_section_by_name(section_name) 19 | if section is None: 20 | return None 21 | for symbol in bv.symbols.get(symbol_name, []): 22 | if section.start <= symbol.address < section.end: 23 | return symbol 24 | return None 25 | 26 | @staticmethod 27 | def get_code_refs( 28 | bv: bn.BinaryView, symbol_names: List[str] 29 | ) -> Dict[str, Set[bn.MediumLevelILInstruction]]: 30 | """ 31 | This method determines code references for the provided `symbol_names`. The returned 32 | dictionary contains individual `symbol_names` as keys, and the corresponding code references 33 | as values. Code references correspond to `bn.MediumLevelILInstruction`s in SSA form. 34 | """ 35 | mlil_ssa_code_refs = {} 36 | for symbol_name in symbol_names: 37 | for symbol in bv.symbols.get(symbol_name, []): 38 | # Check if the symbol is in the PE sections .idata 39 | idata = bv.sections.get(".idata") 40 | in_idata = idata.start <= symbol.address < idata.end if idata else False 41 | # Check if the symbol is in the PE sections .synthetic_builtins 42 | synthetic = bv.sections.get(".synthetic_builtins") 43 | in_synthetic_builtins = ( 44 | synthetic.start <= symbol.address < synthetic.end 45 | if synthetic 46 | else False 47 | ) 48 | # Check if there is code at the symbol address 49 | in_code = bv.get_function_at(symbol.address) is not None 50 | # Ignore symbols that are neither in code, the .idata or .synthetic_builtins sections 51 | if not (in_code or in_idata or in_synthetic_builtins): 52 | continue 53 | # Store code references 54 | mlil_insts: Set[bn.MediumLevelILInstruction] = mlil_ssa_code_refs.get( 55 | symbol_name, set() 56 | ) 57 | for code_ref in bv.get_code_refs(symbol.address): 58 | # Store all instructions at the code reference address 59 | funcs = bv.get_functions_containing(code_ref.address) 60 | if funcs is None: 61 | continue 62 | for func in funcs: 63 | if ( 64 | func is None 65 | or func.mlil is None 66 | or func.mlil.ssa_form is None 67 | ): 68 | continue 69 | func = func.mlil.ssa_form 70 | for inst in func.instructions: 71 | if inst.address == code_ref.address: 72 | mlil_insts.add(inst) 73 | if mlil_insts: 74 | mlil_ssa_code_refs[symbol_name] = mlil_insts 75 | return mlil_ssa_code_refs 76 | -------------------------------------------------------------------------------- /tests/slicing/test_simple_server.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from tests.slicing.conftest import TestSlicing 3 | from typing import List 4 | 5 | 6 | class TestSimpleServer(TestSlicing): 7 | def test_simple_http_server_01( 8 | self, filenames: List[str] = ["simple_http_server-01"] 9 | ) -> None: 10 | self.assert_paths( 11 | srcs=[("recv", 2)], 12 | snks=[("system", 1)], 13 | call_chains=[["handle_get_request"], ["handle_post_request"]], 14 | filenames=filenames, 15 | ) 16 | return 17 | 18 | def test_simple_http_server_02( 19 | self, filenames: List[str] = ["simple_http_server-02"] 20 | ) -> None: 21 | self.assert_paths( 22 | srcs=[("recv", 2)], 23 | snks=[("system", 1)], 24 | call_chains=[ 25 | ["execute_cgi_command", "handle_get_request", "receive_data"], 26 | ["execute_cgi_command", "handle_post_request", "receive_data"], 27 | ], 28 | filenames=filenames, 29 | ) 30 | return 31 | 32 | def test_simple_http_server_03( 33 | self, filenames: List[str] = ["simple_http_server-03"] 34 | ) -> None: 35 | self.assert_paths( 36 | srcs=[("recv", 2)], 37 | snks=[("system", 1)], 38 | call_chains=[ 39 | [ 40 | "execute_cgi_command", 41 | "wrap_and_execute", 42 | "process_request", 43 | "handle_get_request", 44 | "receive_data", 45 | ], 46 | [ 47 | "execute_cgi_command", 48 | "wrap_and_execute", 49 | "process_request", 50 | "handle_post_request", 51 | "receive_data", 52 | ], 53 | [ 54 | "execute_cgi_command", 55 | "wrap_and_execute", 56 | "process_request", 57 | "handle_put_request", 58 | "receive_data", 59 | ], 60 | [ 61 | "execute_cgi_command", 62 | "wrap_and_execute", 63 | "process_request", 64 | "handle_delete_request", 65 | "receive_data", 66 | ], 67 | ], 68 | filenames=filenames, 69 | ) 70 | return 71 | 72 | def test_simple_http_server_04( 73 | self, filenames: List[str] = ["simple_http_server-04"] 74 | ) -> None: 75 | self.assert_paths( 76 | srcs=[("recv", 2)], 77 | snks=[("system", 1)], 78 | call_chains=[ 79 | [ 80 | "execute_cgi_command", 81 | "process_post_request", 82 | "handle_post_request", 83 | "receive_data", 84 | ], 85 | [ 86 | "execute_cgi_command", 87 | "process_post_request", 88 | "handle_post_request", 89 | "receive_data", 90 | ], 91 | [ 92 | "execute_cgi_command", 93 | "process_post_request", 94 | "handle_post_request", 95 | "receive_data", 96 | ], 97 | [ 98 | "execute_cgi_command", 99 | "process_post_request", 100 | "handle_post_request", 101 | "receive_data", 102 | ], 103 | ], 104 | filenames=filenames, 105 | ) 106 | return 107 | -------------------------------------------------------------------------------- /tests/slicing/test_various.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from tests.slicing.conftest import TestSlicing 3 | from typing import List 4 | import pytest 5 | 6 | 7 | class TestVarious(TestSlicing): 8 | def test_gets_01(self, filenames: List[str] = ["gets-01"]) -> None: 9 | self.assert_paths( 10 | srcs=[("gets", 1)], 11 | snks=[("gets", 1)], 12 | call_chains=[["main"]], 13 | filenames=filenames, 14 | ) 15 | return 16 | 17 | def test_gets_02(self, filenames: List[str] = ["gets-02"]) -> None: 18 | self.assert_paths( 19 | srcs=[("gets", 1)], 20 | snks=[("gets", 1), ("memcpy", 2)], 21 | call_chains=[["main"], ["main"]], 22 | filenames=filenames, 23 | ) 24 | return 25 | 26 | def test_sscanf_01(self, filenames: List[str] = ["sscanf-01"]) -> None: 27 | self.assert_paths( 28 | srcs=[("getenv", None)], 29 | snks=[("sscanf", 1), ("__isoc99_sscanf", 1)], 30 | call_chains=[["main"]], 31 | filenames=filenames, 32 | ) 33 | return 34 | 35 | def test_memcpy_01(self, filenames: List[str] = ["memcpy-01"]) -> None: 36 | self.assert_paths( 37 | srcs=[("getenv", None)], 38 | snks=[("memcpy", 3)], 39 | call_chains=[["main"]], 40 | filenames=filenames, 41 | ) 42 | return 43 | 44 | def test_memcpy_02(self, filenames: List[str] = ["memcpy-02"]) -> None: 45 | self.assert_paths( 46 | srcs=[("getenv", None)], 47 | snks=[("memcpy", 2), ("memcpy", 3)], 48 | call_chains=[["main"], ["main"]], 49 | filenames=filenames, 50 | ) 51 | return 52 | 53 | def test_memcpy_03(self, filenames: List[str] = ["memcpy-03"]) -> None: 54 | self.assert_paths( 55 | srcs=[("getenv", None)], 56 | snks=[("memcpy", 1)], 57 | call_chains=[["main"]], 58 | filenames=filenames, 59 | ) 60 | return 61 | 62 | def test_memcpy_04(self, filenames: List[str] = ["memcpy-04"]) -> None: 63 | self.assert_paths( 64 | srcs=[("getenv", None)], 65 | snks=[("memcpy", 3)], 66 | call_chains=[["main", "my_getenv"]], 67 | filenames=filenames, 68 | ) 69 | return 70 | 71 | def test_memcpy_05(self, filenames: List[str] = ["memcpy-05"]) -> None: 72 | self.assert_paths( 73 | srcs=[("getenv", None)], 74 | snks=[("memcpy", 2), ("memcpy", 3)], 75 | call_chains=[["main", "my_getenv"], ["main", "my_getenv"]], 76 | filenames=filenames, 77 | ) 78 | return 79 | 80 | def test_memcpy_06(self, filenames: List[str] = ["memcpy-06"]) -> None: 81 | self.assert_paths( 82 | srcs=[], 83 | snks=[], 84 | call_chains=[], 85 | filenames=filenames, 86 | ) 87 | return 88 | 89 | def test_memcpy_07(self, filenames: List[str] = ["memcpy-07"]) -> None: 90 | self.test_memcpy_02(filenames) 91 | return 92 | 93 | @pytest.mark.xfail 94 | def test_memcpy_08(self, filenames: List[str] = ["memcpy-08"]) -> None: 95 | self.test_memcpy_06(filenames) 96 | return 97 | 98 | def test_memcpy_09(self, filenames: List[str] = ["memcpy-09"]) -> None: 99 | self.test_memcpy_06(filenames) 100 | return 101 | 102 | @pytest.mark.xfail 103 | def test_memcpy_10(self, filenames: List[str] = ["memcpy-10"]) -> None: 104 | self.test_memcpy_06(filenames) 105 | return 106 | 107 | def test_memcpy_11(self, filenames: List[str] = ["memcpy-11"]) -> None: 108 | self.test_memcpy_06(filenames) 109 | return 110 | -------------------------------------------------------------------------------- /mole/controllers/ai.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from mole.controllers.config import ConfigController 3 | from mole.core.data import Path 4 | from mole.models.ai import AiVulnerabilityReport 5 | from mole.views.ai import AiView 6 | from mole.services.ai import AiService 7 | from typing import Callable, List, Tuple 8 | import binaryninja as bn 9 | 10 | 11 | tag = "Mole.AI" 12 | 13 | 14 | class AiController: 15 | """ 16 | This class implements a controller to analyze paths using AI. 17 | """ 18 | 19 | def __init__( 20 | self, 21 | ai_view: AiView, 22 | config_ctr: ConfigController, 23 | ) -> None: 24 | """ 25 | This method initializes the AI controller. 26 | """ 27 | # Initialization 28 | self.ai_view = ai_view.init(self) 29 | self.config_ctr = config_ctr 30 | return 31 | 32 | def analyze_paths( 33 | self, 34 | bv: bn.BinaryView, 35 | paths: List[Tuple[int, Path]], 36 | analyzed_path: Callable[[int, AiVulnerabilityReport], None], 37 | ) -> AiService: 38 | """ 39 | This method starts a service that analyzes each path using AI. 40 | """ 41 | # Get settings 42 | max_workers = None 43 | max_workers_setting = self.config_ctr.get_setting("max_workers") 44 | if max_workers_setting: 45 | max_workers = int(max_workers_setting.value) 46 | if max_workers <= 0: 47 | max_workers = None 48 | base_url = "" 49 | base_url_setting = self.config_ctr.get_setting("openai_base_url") 50 | if base_url_setting: 51 | base_url = str(base_url_setting.value) 52 | api_key = "" 53 | api_key_setting = self.config_ctr.get_setting("openai_api_key") 54 | if api_key_setting: 55 | api_key = str(api_key_setting.value) 56 | model = "" 57 | model_setting = self.config_ctr.get_setting("openai_model") 58 | if model_setting: 59 | model = str(model_setting.value) 60 | max_turns = 0 61 | max_turns_setting = self.config_ctr.get_setting("max_turns") 62 | if max_turns_setting: 63 | max_turns = int(max_turns_setting.value) 64 | max_completion_tokens = None 65 | max_completion_tokens_setting = self.config_ctr.get_setting( 66 | "max_completion_tokens" 67 | ) 68 | if max_completion_tokens_setting: 69 | max_completion_tokens = int(max_completion_tokens_setting.value) 70 | if max_completion_tokens < 1: 71 | max_completion_tokens = None 72 | temperature = None 73 | temperature_setting = self.config_ctr.get_setting("temperature") 74 | if temperature_setting: 75 | temperature = float(temperature_setting.value) 76 | if temperature < 0.0 or temperature > 2.0: 77 | temperature = None 78 | # Initialize and start AI service 79 | ai_service = AiService( 80 | bv=bv, 81 | paths=paths, 82 | analyzed_path=analyzed_path, 83 | max_workers=max_workers, 84 | base_url=base_url, 85 | api_key=api_key, 86 | model=model, 87 | max_turns=max_turns, 88 | max_completion_tokens=max_completion_tokens, 89 | temperature=temperature, 90 | initial_progress_text="Mole analyzes paths...", 91 | can_cancel=True, 92 | ) 93 | ai_service.start() 94 | # Return AI service instance 95 | return ai_service 96 | 97 | def show_report(self, report: AiVulnerabilityReport) -> None: 98 | """ 99 | This method shows the AI-generated `report` in the AI view. 100 | """ 101 | self.ai_view.show_report(report) 102 | return 103 | -------------------------------------------------------------------------------- /development/update_dependencies.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | import json 3 | import os 4 | import pathlib 5 | import tomli 6 | 7 | 8 | def extract_dependencies(pyproject_path: pathlib.Path) -> List[str]: 9 | """Extract pip dependencies from pyproject.toml file.""" 10 | with open(pyproject_path, "rb") as f: 11 | try: 12 | pyproject_data = tomli.load(f) 13 | except Exception as e: 14 | print(f"Error parsing pyproject.toml: {str(e):s}") 15 | return [] 16 | 17 | # Check for dependencies in different possible locations 18 | dependencies = [] 19 | 20 | # Check for project.dependencies (PEP 621 format) 21 | if "project" in pyproject_data and "dependencies" in pyproject_data["project"]: 22 | dependencies.extend(pyproject_data["project"]["dependencies"]) 23 | 24 | # Check for tool.poetry.dependencies (Poetry format) 25 | elif "tool" in pyproject_data and "poetry" in pyproject_data["tool"]: 26 | poetry_deps = pyproject_data["tool"]["poetry"].get("dependencies", {}) 27 | # Filter out python dependency and convert dict to requirements format 28 | for pkg, version in poetry_deps.items(): 29 | if pkg != "python": 30 | if isinstance(version, str): 31 | dependencies.append(f"{pkg:s}=={version:s}") 32 | elif isinstance(version, dict) and "version" in version: 33 | dependencies.append(f"{pkg:s}=={version['version']:s}") 34 | else: 35 | dependencies.append(pkg) 36 | 37 | # Check for tool.flit.metadata.requires (Flit format) 38 | elif "tool" in pyproject_data and "flit" in pyproject_data["tool"]: 39 | if "metadata" in pyproject_data["tool"]["flit"]: 40 | flit_deps = pyproject_data["tool"]["flit"]["metadata"].get("requires", []) 41 | dependencies.extend(flit_deps) 42 | 43 | return sorted(dependencies) 44 | 45 | 46 | def update_plugin_json(plugin_json_path: pathlib.Path, dependencies: List[str]) -> None: 47 | """Update the dependencies field in plugin.json""" 48 | try: 49 | with open(plugin_json_path, "r") as f: 50 | plugin_data = json.load(f) 51 | 52 | # Update the dependencies field 53 | if "dependencies" not in plugin_data: 54 | plugin_data["dependencies"] = {} 55 | 56 | plugin_data["dependencies"]["pip"] = dependencies 57 | 58 | # Write back to the file 59 | with open(plugin_json_path, "w") as f: 60 | json.dump(plugin_data, f, indent=2) 61 | 62 | print(f"Updated dependencies in '{str(plugin_json_path):s}'") 63 | except Exception as e: 64 | print(f"Error updating plugin.json: {str(e):s}") 65 | return 66 | 67 | 68 | def create_requirements_txt( 69 | requirements_path: pathlib.Path, dependencies: List[str] 70 | ) -> None: 71 | """Create a requirements.txt file from the dependencies""" 72 | try: 73 | with open(requirements_path, "w") as f: 74 | for dep in dependencies: 75 | f.write(f"{dep:s}\n") 76 | print(f"Created requirements.txt at '{str(requirements_path):s}'") 77 | except Exception as e: 78 | print(f"Error creating requirements.txt: {str(e):s}") 79 | return 80 | 81 | 82 | def main() -> None: 83 | # Get the directory of the current script 84 | script_dir = pathlib.Path(os.path.dirname(os.path.abspath(__file__))) 85 | # pyproject.toml and plugin.json are in the parent folder of the script 86 | pyproject_path = script_dir.parent / "pyproject.toml" 87 | requirements_path = script_dir.parent / "requirements.txt" 88 | 89 | if not pyproject_path.exists(): 90 | print("Error: pyproject.toml not found") 91 | return 92 | 93 | dependencies = extract_dependencies(pyproject_path) 94 | 95 | # Create requirements.txt 96 | create_requirements_txt(requirements_path, dependencies) 97 | return 98 | 99 | 100 | if __name__ == "__main__": 101 | main() 102 | -------------------------------------------------------------------------------- /mole/grouping/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This package contains implementations to group paths. All `PathGrouper` subclasses are imported 3 | here to be discovered dynamically. 4 | """ 5 | 6 | from __future__ import annotations 7 | from mole.core.data import Path 8 | from abc import ABC, abstractmethod 9 | from typing import Dict, List, Tuple, Type 10 | import importlib 11 | import inspect 12 | import os 13 | import pkgutil 14 | import sys 15 | 16 | 17 | class PathGrouper(ABC): 18 | """ 19 | This class is an abstract base class for path grouping strategies. Implementations should 20 | provide logic for how paths are organized in a tree structure. 21 | """ 22 | 23 | @abstractmethod 24 | def get_group_keys(self, path: Path, *args, **kwargs) -> List[Tuple[str, str, int]]: 25 | """ 26 | This method returns a list of hierarchy keys for organizing paths. Each key is a tuple of 27 | (display_name, internal_id, level). The level indicates the depth in the tree (0=root, 28 | 1=first level, etc.). 29 | 30 | Args: 31 | path: `Path` object to be grouped 32 | args: Custom positional arguments 33 | kwargs: Custom keyword arguments 34 | 35 | Returns: 36 | List of tuples containing (display_name, internal_id, level) 37 | """ 38 | raise NotImplementedError 39 | 40 | @abstractmethod 41 | def get_strategy_name(self) -> str: 42 | """ 43 | This method returns the name of this grouping strategy. This should match the corresponding 44 | strategy constant. 45 | """ 46 | raise NotImplementedError 47 | 48 | @staticmethod 49 | def get_all_subclasses() -> List[Type["PathGrouper"]]: 50 | """ 51 | This method recursively returns all subclasses of `PathGrouper`. 52 | 53 | Returns: 54 | List of `PathGrouper` subclass types 55 | """ 56 | all_subclasses = [] 57 | for subclass in PathGrouper.__subclasses__(): 58 | all_subclasses.append(subclass) 59 | all_subclasses.extend(subclass.__subclasses__()) 60 | return all_subclasses 61 | 62 | @staticmethod 63 | def get_strategy_map() -> Dict[str, PathGrouper]: 64 | """ 65 | This method returns a mapping of all available strategy names to their implementations. 66 | Dynamically discovers all `PathGrouper` subclasses. 67 | 68 | Returns: 69 | Dictionary mapping strategy names to `PathGrouper` instances 70 | """ 71 | strategy_map = {"None": None} 72 | # Find all PathGrouper subclasses and instantiate them 73 | for cls in PathGrouper.get_all_subclasses(): 74 | # Skip the abstract base class itself 75 | if cls == PathGrouper or inspect.isabstract(cls): 76 | continue 77 | try: 78 | instance = cls() 79 | strategy_map[instance.get_strategy_name()] = instance 80 | except Exception as e: 81 | print( 82 | f"Error instantiating {cls.__name__:s}: {str(e):s}", file=sys.stderr 83 | ) 84 | return strategy_map 85 | 86 | 87 | def get_all_grouping_strategies() -> List[str]: 88 | """ 89 | This method returns a list of all available strategy names. 90 | 91 | Returns: 92 | List of strategy names as strings 93 | """ 94 | return list(PathGrouper.get_strategy_map().keys()) 95 | 96 | 97 | def get_grouper(strategy: str) -> PathGrouper: 98 | """ 99 | This method is a factory method to create a grouper based on the strategy. 100 | 101 | Args: 102 | strategy: The strategy name 103 | 104 | Returns: 105 | An instance of the appropriate `PathGrouper` implementation or None if the strategy is 106 | invalid 107 | """ 108 | return PathGrouper.get_strategy_map().get(strategy, None) 109 | 110 | 111 | # Dynamically import all modules in this package after the `PathGrouper` class is defined 112 | package_dir = os.path.dirname(__file__) 113 | for _, module_name, _ in pkgutil.iter_modules([package_dir]): 114 | # Skip importing this module to avoid circular imports 115 | if module_name != "__init__": 116 | importlib.import_module(f"{__name__:s}.{module_name:s}") 117 | -------------------------------------------------------------------------------- /mole/common/parse.py: -------------------------------------------------------------------------------- 1 | from lark import Lark, Token, Transformer, v_args 2 | from mole.common.log import log 3 | from typing import Callable, Optional 4 | 5 | 6 | tag = "Mole.Parse" 7 | 8 | 9 | class LogicalExpressionParser: 10 | """ 11 | This class parses logical expressions. 12 | """ 13 | 14 | grammar = """ 15 | start: expr 16 | 17 | ?expr: term ("or" term)* -> or_expr 18 | ?term: factor ("and" factor)* -> and_expr 19 | ?factor: atom 20 | | "not" factor -> not_expr 21 | | "(" expr ")" 22 | ?atom: "True" -> true 23 | | "true" -> true 24 | | "False" -> false 25 | | "false" -> false 26 | | "i" "==" value -> eq 27 | | "i" "!=" value -> neq 28 | | "i" ">" value -> gt 29 | | "i" "<" value -> lt 30 | | "i" ">=" value -> ge 31 | | "i" "<=" value -> le 32 | | "i" -> var 33 | 34 | ?value: "-" NUMBER -> neg_number 35 | | NUMBER -> number 36 | 37 | %import common.NUMBER 38 | %import common.WS 39 | %ignore WS 40 | """ 41 | 42 | def __init__(self) -> None: 43 | """ 44 | This method initializes a parser for logical expressions. 45 | """ 46 | self._parser = Lark( 47 | grammar=self.grammar, 48 | parser="lalr", 49 | transformer=LogicalExpressionTransformer(), 50 | ) 51 | return 52 | 53 | def parse(self, expr: str) -> Optional[Callable[[int], bool]]: 54 | """ 55 | This method parses a logical exression. 56 | """ 57 | if expr: 58 | try: 59 | e = self._parser.parse(expr).children[0] 60 | 61 | def f(i): 62 | return eval(e) 63 | 64 | return f 65 | except Exception as e: 66 | log.warn(tag, f"Failed to parse expression '{expr}': {str(e):s}") 67 | return None 68 | 69 | 70 | @v_args(inline=True) 71 | class LogicalExpressionTransformer(Transformer): 72 | """ 73 | This class convers Lark trees into logical expressions. 74 | """ 75 | 76 | def or_expr(self, *exprs: str) -> str: 77 | """ 78 | This method adds 'or' logic. 79 | """ 80 | return f"({') or ('.join(exprs):s})" 81 | 82 | def and_expr(self, *exprs: str) -> str: 83 | """ 84 | This method adds 'and' logic. 85 | """ 86 | return f"({') and ('.join(exprs):s})" 87 | 88 | def not_expr(self, expr: str) -> str: 89 | """ 90 | This method adds 'not' logic. 91 | """ 92 | return f"not ({expr:s})" 93 | 94 | def eq(self, value: str) -> str: 95 | """ 96 | This method adds '==' logic. 97 | """ 98 | return f"i == {value:s}" 99 | 100 | def neq(self, value: str) -> str: 101 | """ 102 | This method adds '!=' logic. 103 | """ 104 | return f"i != {value:s}" 105 | 106 | def gt(self, value: str) -> str: 107 | """ 108 | This method adds '>' logic. 109 | """ 110 | return f"i > {value:s}" 111 | 112 | def lt(self, value: str) -> str: 113 | """ 114 | This method adds '<' logic. 115 | """ 116 | return f"i < {value:s}" 117 | 118 | def ge(self, value: str) -> str: 119 | """ 120 | This method adds '>=' logic. 121 | """ 122 | return f"i >= {value:s}" 123 | 124 | def le(self, value: str) -> str: 125 | """ 126 | This method adds '<=' logic. 127 | """ 128 | return f"i <= {value:s}" 129 | 130 | def var(self) -> str: 131 | """ 132 | This method adds variables logic. 133 | """ 134 | return "i" 135 | 136 | def neg_number(self, t: Token) -> str: 137 | """ 138 | This method adds numbers logic. 139 | """ 140 | return f"-{t.value:s}" 141 | 142 | def number(self, t: Token) -> str: 143 | """ 144 | This method adds numbers logic. 145 | """ 146 | return f"{t.value:s}" 147 | 148 | def true(self) -> str: 149 | """ 150 | This method adds 'True' logic. 151 | """ 152 | return "True" 153 | 154 | def false(self) -> str: 155 | """ 156 | This method adds 'False' logic. 157 | """ 158 | return "False" 159 | -------------------------------------------------------------------------------- /tests/data/src/simple_http_server-01.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define PORT 8080 8 | #define BUFFER_SIZE 256 9 | 10 | /* 11 | Testcase Description: 12 | - server example 13 | - allow function inlining 14 | */ 15 | 16 | void handle_client(int client_socket); 17 | void execute_cgi_command(const char *buffer); 18 | void send_response(int client_socket, const char *response); 19 | int create_server_socket(struct sockaddr_in *address); 20 | void handle_get_request(int client_socket); 21 | void handle_post_request(int client_socket); 22 | char* receive_data(int client_socket, int *size); 23 | 24 | int main() { 25 | int server_fd, client_socket; 26 | struct sockaddr_in address; 27 | int addrlen = sizeof(address); 28 | 29 | server_fd = create_server_socket(&address); 30 | 31 | while (1) { 32 | if ((client_socket = accept(server_fd, (struct sockaddr *)&address, (socklen_t*)&addrlen)) < 0) { 33 | perror("accept"); 34 | close(server_fd); 35 | exit(EXIT_FAILURE); 36 | } 37 | handle_client(client_socket); 38 | } 39 | 40 | close(server_fd); 41 | return 0; 42 | } 43 | 44 | void handle_client(int client_socket) { 45 | int size; 46 | char *method = receive_data(client_socket, &size); 47 | 48 | if (method == NULL) { 49 | close(client_socket); 50 | return; 51 | } 52 | 53 | if (strncmp(method, "GET ", 4) == 0) { 54 | handle_get_request(client_socket); 55 | } else if (strncmp(method, "POST", 4) == 0) { 56 | handle_post_request(client_socket); 57 | } else { 58 | send_response(client_socket, "HTTP/1.1 405 Method Not Allowed\r\nContent-Type: text/plain\r\n\r\nMethod Not Allowed.\n"); 59 | close(client_socket); 60 | } 61 | 62 | free(method); 63 | } 64 | 65 | void handle_get_request(int client_socket) { 66 | int size; 67 | char *buffer = receive_data(client_socket, &size); 68 | 69 | if (buffer == NULL) { 70 | close(client_socket); 71 | return; 72 | } 73 | 74 | execute_cgi_command(buffer); 75 | send_response(client_socket, "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\n\r\nGET request received.\n"); 76 | close(client_socket); 77 | free(buffer); 78 | } 79 | 80 | void handle_post_request(int client_socket) { 81 | int size; 82 | char *buffer = receive_data(client_socket, &size); 83 | 84 | if (buffer == NULL) { 85 | close(client_socket); 86 | return; 87 | } 88 | 89 | execute_cgi_command(buffer); 90 | send_response(client_socket, "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\n\r\nPOST request received.\n"); 91 | close(client_socket); 92 | free(buffer); 93 | } 94 | 95 | void execute_cgi_command(const char *buffer) { 96 | char *cgi_start = strstr(buffer, "/cgi-bin/"); 97 | if (cgi_start) { 98 | cgi_start += strlen("/cgi-bin/"); 99 | char *cgi_end = strchr(cgi_start, ' '); 100 | if (cgi_end) { 101 | *cgi_end = '\0'; 102 | system(cgi_start); 103 | } 104 | } 105 | } 106 | 107 | void send_response(int client_socket, const char *response) { 108 | write(client_socket, response, strlen(response)); 109 | } 110 | 111 | int create_server_socket(struct sockaddr_in *address) { 112 | int server_fd; 113 | 114 | if ((server_fd = socket(AF_INET, SOCK_STREAM, 0)) == 0) { 115 | perror("socket failed"); 116 | exit(EXIT_FAILURE); 117 | } 118 | 119 | address->sin_family = AF_INET; 120 | address->sin_addr.s_addr = INADDR_ANY; 121 | address->sin_port = htons(PORT); 122 | 123 | if (bind(server_fd, (struct sockaddr *)address, sizeof(*address)) < 0) { 124 | perror("bind failed"); 125 | close(server_fd); 126 | exit(EXIT_FAILURE); 127 | } 128 | 129 | if (listen(server_fd, 3) < 0) { 130 | perror("listen"); 131 | close(server_fd); 132 | exit(EXIT_FAILURE); 133 | } 134 | 135 | return server_fd; 136 | } 137 | 138 | char* receive_data(int client_socket, int *size) { 139 | char *buffer = (char *)malloc(BUFFER_SIZE); 140 | if (buffer == NULL) { 141 | perror("malloc"); 142 | return NULL; 143 | } 144 | 145 | int bytes_read = recv(client_socket, buffer, BUFFER_SIZE - 1, 0); 146 | if (bytes_read < 0) { 147 | perror("recv"); 148 | free(buffer); 149 | return NULL; 150 | } 151 | 152 | buffer[bytes_read] = '\0'; 153 | *size = bytes_read; 154 | return buffer; 155 | } -------------------------------------------------------------------------------- /tests/data/src/simple_http_server-02.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define PORT 8080 8 | #define BUFFER_SIZE 256 9 | 10 | /* 11 | Testcase Description: 12 | - server example 13 | - disallow function inlining 14 | */ 15 | 16 | __attribute__ ((noinline)) 17 | void handle_client(int client_socket); 18 | 19 | __attribute__ ((noinline)) 20 | void execute_cgi_command(const char *buffer); 21 | 22 | __attribute__ ((noinline)) 23 | void send_response(int client_socket, const char *response); 24 | 25 | __attribute__ ((noinline)) 26 | int create_server_socket(struct sockaddr_in *address); 27 | 28 | __attribute__ ((noinline)) 29 | void handle_get_request(int client_socket); 30 | 31 | __attribute__ ((noinline)) 32 | void handle_post_request(int client_socket); 33 | 34 | __attribute__ ((noinline)) 35 | char* receive_data(int client_socket, int *size); 36 | 37 | int main() { 38 | int server_fd, client_socket; 39 | struct sockaddr_in address; 40 | int addrlen = sizeof(address); 41 | 42 | server_fd = create_server_socket(&address); 43 | 44 | while (1) { 45 | if ((client_socket = accept(server_fd, (struct sockaddr *)&address, (socklen_t*)&addrlen)) < 0) { 46 | perror("accept"); 47 | close(server_fd); 48 | exit(EXIT_FAILURE); 49 | } 50 | handle_client(client_socket); 51 | } 52 | 53 | close(server_fd); 54 | return 0; 55 | } 56 | 57 | void handle_client(int client_socket) { 58 | int size; 59 | char *method = receive_data(client_socket, &size); 60 | 61 | if (method == NULL) { 62 | close(client_socket); 63 | return; 64 | } 65 | 66 | if (strncmp(method, "GET ", 4) == 0) { 67 | handle_get_request(client_socket); 68 | } else if (strncmp(method, "POST", 4) == 0) { 69 | handle_post_request(client_socket); 70 | } else { 71 | send_response(client_socket, "HTTP/1.1 405 Method Not Allowed\r\nContent-Type: text/plain\r\n\r\nMethod Not Allowed.\n"); 72 | close(client_socket); 73 | } 74 | 75 | free(method); 76 | } 77 | 78 | void handle_get_request(int client_socket) { 79 | int size; 80 | char *buffer = receive_data(client_socket, &size); 81 | 82 | if (buffer == NULL) { 83 | close(client_socket); 84 | return; 85 | } 86 | 87 | execute_cgi_command(buffer); 88 | send_response(client_socket, "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\n\r\nGET request received.\n"); 89 | close(client_socket); 90 | free(buffer); 91 | } 92 | 93 | void handle_post_request(int client_socket) { 94 | int size; 95 | char *buffer = receive_data(client_socket, &size); 96 | 97 | if (buffer == NULL) { 98 | close(client_socket); 99 | return; 100 | } 101 | 102 | execute_cgi_command(buffer); 103 | send_response(client_socket, "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\n\r\nPOST request received.\n"); 104 | close(client_socket); 105 | free(buffer); 106 | } 107 | 108 | void execute_cgi_command(const char *buffer) { 109 | char *cgi_start = strstr(buffer, "/cgi-bin/"); 110 | if (cgi_start) { 111 | cgi_start += strlen("/cgi-bin/"); 112 | char *cgi_end = strchr(cgi_start, ' '); 113 | if (cgi_end) { 114 | *cgi_end = '\0'; 115 | system(cgi_start); 116 | } 117 | } 118 | } 119 | 120 | void send_response(int client_socket, const char *response) { 121 | write(client_socket, response, strlen(response)); 122 | } 123 | 124 | int create_server_socket(struct sockaddr_in *address) { 125 | int server_fd; 126 | 127 | if ((server_fd = socket(AF_INET, SOCK_STREAM, 0)) == 0) { 128 | perror("socket failed"); 129 | exit(EXIT_FAILURE); 130 | } 131 | 132 | address->sin_family = AF_INET; 133 | address->sin_addr.s_addr = INADDR_ANY; 134 | address->sin_port = htons(PORT); 135 | 136 | if (bind(server_fd, (struct sockaddr *)address, sizeof(*address)) < 0) { 137 | perror("bind failed"); 138 | close(server_fd); 139 | exit(EXIT_FAILURE); 140 | } 141 | 142 | if (listen(server_fd, 3) < 0) { 143 | perror("listen"); 144 | close(server_fd); 145 | exit(EXIT_FAILURE); 146 | } 147 | 148 | return server_fd; 149 | } 150 | 151 | char* receive_data(int client_socket, int *size) { 152 | char *buffer = (char *)malloc(BUFFER_SIZE); 153 | if (buffer == NULL) { 154 | perror("malloc"); 155 | return NULL; 156 | } 157 | 158 | int bytes_read = recv(client_socket, buffer, BUFFER_SIZE - 1, 0); 159 | if (bytes_read < 0) { 160 | perror("recv"); 161 | free(buffer); 162 | return NULL; 163 | } 164 | 165 | buffer[bytes_read] = '\0'; 166 | *size = bytes_read; 167 | return buffer; 168 | } -------------------------------------------------------------------------------- /docs/04-Pointers.md: -------------------------------------------------------------------------------- 1 | # Pointer Analysis 2 | ## Pointer and Variable Dereferencing 3 | See unit-tests `load-01.c`, `load-02.c` and `load-03.c`. 4 | ## Array Indexing 5 | Consider the following C source code: 6 | ```c 7 | #include 8 | #include 9 | 10 | __attribute__ ((noinline, optimize("O0"))) 11 | int main(int argc, char *argv[]) { 12 | if(argc >= 2) { 13 | argv[1] = getenv("CMD"); // Source 14 | system(argv[1]); // Sink 15 | } 16 | return 0; 17 | } 18 | ``` 19 | 20 | The code is straightforward. It defines a sink function, `system`, which executes the command string stored in `argv[1]`. This argument is assigned a value read from an environment variable, serving as our source. 21 | 22 | **Note**: Yes, assigning to `argv[1]` is intentional. It's just a stand-in for arbitrary array indexing 😉. 23 | 24 | The `main` function's MLIL representation in SSA form is shown below: 25 | ``` 26 | 00401170 int32_t main(int argc, char** argv) 27 | 28 | 0 @ 0040117d var_1c#1 = argc#0 29 | 1 @ 00401180 var_28#1 = argv#0 30 | 2 @ 00401188 if (var_1c#1 s<= 1) then 3 else 4 @ 0x40118a 31 | 32 | 3 @ 00401188 goto 14 @ 0x4011b7 33 | 34 | 4 @ 0040118a rax_1#1 = var_28#1 35 | 5 @ 0040118e rbx_1#1 = rax_1#1 + 8 36 | 6 @ 0040119c rax_2#2, mem#1 = getenv(name: "CMD") @ mem#0 // Source 37 | 7 @ 004011a1 [rbx_1#1].q = rax_2#2 @ mem#1 -> mem#2 // MLIL_STORE: Write quadword to the memory address stored in variable rbx_1#1 38 | 8 @ 004011a4 rax_3#3 = var_28#1 39 | 9 @ 004011a8 rax_4#4 = rax_3#3 + 8 40 | 10 @ 004011ac rax_5#5 = [rax_4#4].q @ mem#2 // MLIL_LOAD : Read quadword from the memory address stored in variable rax_4#4 41 | 11 @ 004011af rdi#1 = rax_5#5 42 | 12 @ 004011b2 mem#3 = system(line: rdi#1) @ mem#2 // Sink 43 | 13 @ 004011b2 goto 14 @ 0x4011b7 44 | 45 | 14 @ 004011b7 rax_5#6 = ϕ(rax#0, rax_5#5) 46 | 15 @ 004011b7 rbx_1#2 = ϕ(rbx#0, rbx_1#1) 47 | 16 @ 004011b7 rdi#2 = ϕ(argc#0, rdi#1) 48 | 17 @ 004011b7 mem#4 = ϕ(mem#0, mem#3) 49 | 18 @ 004011b7 rax_6#7 = 0 50 | 19 @ 004011c1 return 0 51 | ``` 52 | 53 | If we begin backward slicing from the sink function's parameter `rdi#1`, we may eventually encounter the MLIL_LOAD instruction `[rax_4#4].q @ mem#2` (the use-site). This instruction reads a quadword from the memory address stored in the variable `rax_4#4`. To continue the slicing process, we need to locate the definition-site of `[rax_4#4].q @ mem#2`, that is, the instruction responsible for writing to the corresponding memory region. 54 | 55 | To achieve this, we perform backward slicing on memory versions. The MLIL_LOAD instruction where we stopped has memory version 2 (`@ mem#2`). The instruction defining this memory version is the MLIL_STORE `[rbx_1#1].q = rax_2#2 @ mem#1 -> mem#2`, which writes a quadword to the memory address stored in the variable `rbx_1#1`. 56 | 57 | The slicer should therefore jump from the MLIL_LOAD to the corresponding MLIL_STORE if `rax_4#4` and `rbx_1#1` refer to the same memory location. By manually inspecting the instructions, we can confirm that this is indeed the case, i.e. both point to `argv[1]`. 58 | ``` 59 | rax_4#4 = rax_3#3 + 8 = var_28#1 + 8 = argv#0 + 8 --> argv[1] 60 | rbx_1#1 = rax_1#1 + 8 = var_28#1 + 8 = argv#0 + 8 --> argv[1] 61 | ``` 62 | 63 | The above relationship is however difficult to infer automatically at the MLIL, but interestingly, if we look at the HLIL, it caputers it. That's one of the many beauties of Binary Ninja's multi-level IL design! 64 | ``` 65 | # Load `argv[1]` 66 | >>> mlil_load_inst 67 | 68 | 69 | >>> mlil_load_inst.hlil.ssa_form 70 | 71 | 72 | >>> mlil_load_inst.hlil.ssa_form.src.var, mlil_load_inst.hlil.ssa_form.index.constant 73 | (, 1) <-- argv[1] 74 | 75 | 76 | # Store `argv[1]` 77 | >>> mlil_store_inst 78 | mem#2> 79 | 80 | >>> mlil_store_inst.hlil.ssa_form 81 | mem#1 @ mem#1> 82 | 83 | >>> mlil_store_inst.hlil.ssa_form.dest 84 | 85 | 86 | >>> mlil_store_inst.hlil.ssa_form.dest.src.var, mlil_store_inst.hlil.ssa_form.dest.index.constant 87 | (, 1) <-- argv[1] 88 | ``` 89 | 90 | When reaching `rax_5#5 = [rax_4#4].q @ mem#2` (MLIL_LOAD - use-site), Mole therefore knows it should continue slicing at `[rbx_1#1].q = rax_2#2 @ mem#1 -> mem#` (MLIL_STORE - def-site): 91 | ``` 92 | 0x4011ac [rax_4#4].q @ mem#2 (MediumLevelILLoadSsa) 93 | 94 | Follow store instruction '0x4011a1 [rbx_1#1].q = rax_2#2 @ mem#1 -> mem#2' since it writes the same 95 | array element ('argv#0[1]') as load instruction '0x4011ac [rax_4#4].q @ mem#2 (MediumLevelILLoadSsa)' 96 | 97 | 0x4011a1 [rbx_1#1].q = rax_2#2 @ mem#1 -> mem#2 (MediumLevelILStoreSsa) 98 | ``` 99 | ## Struct Field Dereferencing 100 | See unit-test `load-05.c`. -------------------------------------------------------------------------------- /plugin.json: -------------------------------------------------------------------------------- 1 | { 2 | "pluginmetadataversion": 2, 3 | "name": "Mole", 4 | "type": [ 5 | "ui", 6 | "helper" 7 | ], 8 | "api": [ 9 | "python3" 10 | ], 11 | "description": "Uncover interesting paths using static backward slicing", 12 | "longdescription": "

\n \"Mole\n

\n\n**_Mole_** is a *Binary Ninja* plugin designed to identify **interesting paths** in binaries. It performs **static backward slicing** on variables using *Binary Ninja*'s *Medium Level Intermediate Language* (*MLIL*) in its *Static Single Assignment* (*SSA*) form.\n\nIn *Mole*, a **path** refers to the flow of data between a defined source and sink. What constitutes an \"interesting\" path depends on the analysis goals. For instance, when searching for **vulnerabilities**, one might look for paths where untrusted inputs (sources) influence sensitive operations (sinks) in potentially dangerous ways.\n\nThe following list highlights some of *Mole*'s current **features**:\n- **Operation Mode**: *Mole* can be run either within *Binary Ninja*'s UI or in headless mode. Headless mode is particularly useful for scripted analysis across a large number of binaries. Conversely, using *Mole* within the UI is ideal for closely investigating detected paths.\n- **Path Identification**:\n - **Configuration**: *Mole* allows users to define source and sink functions through Binary Ninja\u2019s UI or configuration files (see Usage). This provides flexibility in selecting sources and sinks based on the specific usage scenario.\n - **Exploration**: To better understand a path and examine its characteristics, all instructions along the path can be printed or visually highlighted within *Binary Ninja*. Additionally, a side-by-side comparison of two paths can be displayed to quickly identify differences. Similar to instructions, a path's sequence of function calls can be printed or even visualized as a graph.\n - **Grouping**: To facilitate the identification of similar paths, *Mole* supports multiple grouping strategies. Currently, paths can be grouped based on matching source and sink functions, or by identical call sequences. New custom grouping strategies can easily be added to extend and customize this functionality (see Customization).\n - **Persistence**: Discovered paths can be annotated for clarity or removed if deemed irrelevant. To preserve analysis progress, paths can be saved directly to the target binary's database (*Binary Ninja*'s `.bndb` format). Paths can also be exported - for example, when performing headless analysis across many binaries on a file system, allowing identified paths to be later imported for easier exploration within *Binary Ninja*.\n- **Path Analysis With AI**: *Mole* can interact with local or remote *Large Language Models* (*LLMs*) via the *OpenAI API* (see Usage). The models are used to analyze identified paths and reason whether a given path corresponds to an exploitable vulnerability or not. The *LLM* attempts to classify the vulnerability and assign a severity level, which can help prioritize which paths are worth further investigation by an analyst. *Mole* provides a basic set of tools that the *LLM* can use to request additional information about the binary under analysis. This feature is an initial prototype and has not yet undergone systematic evaluation (e.g., models, prompts, tools). If it proves useful, we plan to improve it in future releases.\n\n- **Inter-Procedural Variable Slicing**: *Mole* supports slicing *MLIL variables* across function boundaries - a task that presents several challenges. For instance, statically determining a function's effective caller(s) is often difficult or even impossible. As a result, the implemented approach is an approximation. While not perfect, it performs reasonably well across a wide range of practical scenarios.\n- **Basic Pointer Analysis**: *Mole* currently implements a simplified strategy for tracking pointer usage. Like inter-procedural slicing, this approach is a simplification with inherent limitations (e.g. it cannot track global variables). Nevertheless, it performs well in many practical cases and is planned to be improved in future versions.", 13 | "license": { 14 | "name": "Apache-2.0", 15 | "text": "Copyright (c) 2025 Damian Pfammatter and Sergio Paganoni\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\nhttp://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License." 16 | }, 17 | "platforms": [ 18 | "Darwin", 19 | "Linux", 20 | "Windows" 21 | ], 22 | "installinstructions": { 23 | "Darwin": "", 24 | "Linux": "", 25 | "Windows": "" 26 | }, 27 | "version": "0.5.1", 28 | "author": "Damian Pfammatter and Sergio Paganoni", 29 | "minimumbinaryninjaversion": 6455 30 | } -------------------------------------------------------------------------------- /tests/data/src/simple_http_server-04.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define PORT 8080 8 | #define BUFFER_SIZE 256 9 | 10 | /* 11 | Testcase Description: 12 | - server example 13 | - disallow function inlining 14 | - generate duplicates path (phis in process_post_request) 15 | */ 16 | 17 | 18 | __attribute__ ((noinline)) 19 | void process_post_request(int client_socket, const char *buffer); 20 | 21 | __attribute__ ((noinline)) 22 | void handle_client(int client_socket); 23 | 24 | __attribute__ ((noinline)) 25 | void execute_cgi_command(const char *buffer); 26 | 27 | __attribute__ ((noinline)) 28 | void send_response(int client_socket, const char *response); 29 | 30 | __attribute__ ((noinline)) 31 | int create_server_socket(struct sockaddr_in *address); 32 | 33 | __attribute__ ((noinline)) 34 | void handle_post_request(int client_socket); 35 | 36 | __attribute__ ((noinline)) 37 | char* receive_data(int client_socket, int *size); 38 | 39 | int main() { 40 | int server_fd, client_socket; 41 | struct sockaddr_in address; 42 | int addrlen = sizeof(address); 43 | 44 | server_fd = create_server_socket(&address); 45 | 46 | while (1) { 47 | if ((client_socket = accept(server_fd, (struct sockaddr *)&address, (socklen_t*)&addrlen)) < 0) { 48 | perror("accept"); 49 | close(server_fd); 50 | exit(EXIT_FAILURE); 51 | } 52 | handle_client(client_socket); 53 | } 54 | 55 | close(server_fd); 56 | return 0; 57 | } 58 | 59 | void handle_client(int client_socket) { 60 | int size; 61 | char *method = receive_data(client_socket, &size); 62 | 63 | if (method == NULL) { 64 | close(client_socket); 65 | return; 66 | } 67 | 68 | if (strncmp(method, "POST", 4) == 0) { 69 | handle_post_request(client_socket); 70 | } else { 71 | send_response(client_socket, "HTTP/1.1 405 Method Not Allowed\r\nContent-Type: text/plain\r\n\r\nMethod Not Allowed.\n"); 72 | close(client_socket); 73 | } 74 | 75 | free(method); 76 | } 77 | 78 | void process_post_request(int client_socket, const char *buffer) { 79 | // extract body from buffer 80 | char *body = strstr(buffer, "\r\n\r\n"); 81 | if (body) { 82 | body += 4; // skip the "\r\n\r\n" 83 | } else { 84 | body = (char*)buffer; 85 | } 86 | while (1) { 87 | 88 | // for each line in body execute the command 89 | char *line = strstr(body, "\r\n"); 90 | if(line == NULL) { 91 | break; 92 | } 93 | char* cmd = strstr(line, "EXECUTE"); 94 | if (cmd) { 95 | cmd += 8; // skip the "EXECUTE " 96 | } else { 97 | cmd = line; 98 | } 99 | execute_cgi_command(cmd); 100 | 101 | } 102 | } 103 | 104 | 105 | void handle_post_request(int client_socket) { 106 | int size; 107 | char *buffer = receive_data(client_socket, &size); 108 | 109 | if (buffer == NULL) { 110 | close(client_socket); 111 | return; 112 | } 113 | 114 | process_post_request(client_socket, buffer); 115 | send_response(client_socket, "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\n\r\nPOST request received.\n"); 116 | close(client_socket); 117 | free(buffer); 118 | } 119 | 120 | void execute_cgi_command(const char *buffer) { 121 | char *cgi_start = strstr(buffer, "/cgi-bin/"); 122 | if (cgi_start) { 123 | cgi_start += strlen("/cgi-bin/"); 124 | char *cgi_end = strchr(cgi_start, ' '); 125 | if (cgi_end) { 126 | *cgi_end = '\0'; 127 | system(cgi_start); 128 | } 129 | } 130 | } 131 | 132 | void send_response(int client_socket, const char *response) { 133 | write(client_socket, response, strlen(response)); 134 | } 135 | 136 | int create_server_socket(struct sockaddr_in *address) { 137 | int server_fd; 138 | 139 | if ((server_fd = socket(AF_INET, SOCK_STREAM, 0)) == 0) { 140 | perror("socket failed"); 141 | exit(EXIT_FAILURE); 142 | } 143 | 144 | address->sin_family = AF_INET; 145 | address->sin_addr.s_addr = INADDR_ANY; 146 | address->sin_port = htons(PORT); 147 | 148 | if (bind(server_fd, (struct sockaddr *)address, sizeof(*address)) < 0) { 149 | perror("bind failed"); 150 | close(server_fd); 151 | exit(EXIT_FAILURE); 152 | } 153 | 154 | if (listen(server_fd, 3) < 0) { 155 | perror("listen"); 156 | close(server_fd); 157 | exit(EXIT_FAILURE); 158 | } 159 | 160 | return server_fd; 161 | } 162 | 163 | char* receive_data(int client_socket, int *size) { 164 | char *buffer = (char *)malloc(BUFFER_SIZE); 165 | if (buffer == NULL) { 166 | perror("malloc"); 167 | return NULL; 168 | } 169 | 170 | int bytes_read = recv(client_socket, buffer, BUFFER_SIZE - 1, 0); 171 | if (bytes_read < 0) { 172 | perror("recv"); 173 | free(buffer); 174 | return NULL; 175 | } 176 | 177 | buffer[bytes_read] = '\0'; 178 | *size = bytes_read; 179 | return buffer; 180 | } -------------------------------------------------------------------------------- /tests/slicing/test_pointer.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from tests.slicing.conftest import TestSlicing 3 | from typing import List 4 | import pytest 5 | 6 | 7 | class TestPointerAnalysis(TestSlicing): 8 | def test_pointer_analysis_01( 9 | self, filenames: List[str] = ["pointer_analysis-01"] 10 | ) -> None: 11 | self.assert_paths( 12 | srcs=[("getenv", None)], 13 | snks=[("system", 1)], 14 | call_chains=[["main"]], 15 | filenames=filenames, 16 | ) 17 | return 18 | 19 | def test_pointer_analysis_02( 20 | self, filenames: List[str] = ["pointer_analysis-02"] 21 | ) -> None: 22 | self.test_pointer_analysis_01(filenames) 23 | return 24 | 25 | def test_pointer_analysis_03( 26 | self, filenames: List[str] = ["pointer_analysis-03"] 27 | ) -> None: 28 | self.test_pointer_analysis_01(filenames) 29 | return 30 | 31 | def test_pointer_analysis_04( 32 | self, filenames: List[str] = ["pointer_analysis-04"] 33 | ) -> None: 34 | self.assert_paths( 35 | srcs=[("getenv", None)], 36 | snks=[("system", 1)], 37 | call_chains=[["main"], ["main"]], 38 | filenames=filenames, 39 | ) 40 | return 41 | 42 | def test_pointer_analysis_05( 43 | self, filenames: List[str] = ["pointer_analysis-05"] 44 | ) -> None: 45 | self.assert_paths( 46 | srcs=[], 47 | snks=[], 48 | call_chains=[], 49 | filenames=filenames, 50 | ) 51 | return 52 | 53 | def test_pointer_analysis_06( 54 | self, filenames: List[str] = ["pointer_analysis-06"] 55 | ) -> None: 56 | self.assert_paths( 57 | srcs=[("getenv", None)], 58 | snks=[("memcpy", 3)], 59 | call_chains=[["main", "modify_n"]], 60 | filenames=filenames, 61 | ) 62 | return 63 | 64 | def test_pointer_analysis_07( 65 | self, filenames: List[str] = ["pointer_analysis-07"] 66 | ) -> None: 67 | self.assert_paths( 68 | srcs=[("getenv", None)], 69 | snks=[("memcpy", 2)], 70 | call_chains=[["main", "my_getenv"]], 71 | filenames=filenames, 72 | ) 73 | return 74 | 75 | def test_pointer_analysis_08( 76 | self, filenames: List[str] = ["pointer_analysis-08"] 77 | ) -> None: 78 | self.test_pointer_analysis_07(filenames) 79 | return 80 | 81 | def test_pointer_analysis_09( 82 | self, filenames: List[str] = ["pointer_analysis-09"] 83 | ) -> None: 84 | self.test_pointer_analysis_01(filenames) 85 | return 86 | 87 | def test_pointer_analysis_10( 88 | self, filenames: List[str] = ["pointer_analysis-10"] 89 | ) -> None: 90 | self.test_pointer_analysis_01(filenames) 91 | return 92 | 93 | def test_pointer_analysis_11( 94 | self, filenames: List[str] = ["pointer_analysis-11"] 95 | ) -> None: 96 | self.assert_paths( 97 | srcs=[("getenv", None)], 98 | snks=[("system", 1)], 99 | call_chains=[["execute", "main"]], 100 | filenames=filenames, 101 | ) 102 | return 103 | 104 | def test_pointer_analysis_12( 105 | self, filenames: List[str] = ["pointer_analysis-12"] 106 | ) -> None: 107 | self.assert_paths( 108 | srcs=[("getenv", None)], 109 | snks=[("system", 1)], 110 | call_chains=[["main"], ["main"]], 111 | filenames=filenames, 112 | ) 113 | return 114 | 115 | @pytest.mark.xfail 116 | def test_pointer_analysis_13( 117 | self, filenames: List[str] = ["pointer_analysis-13"] 118 | ) -> None: 119 | self.test_pointer_analysis_12(filenames) 120 | return 121 | 122 | def test_pointer_analysis_14( 123 | self, filenames: List[str] = ["pointer_analysis-14"] 124 | ) -> None: 125 | self.test_pointer_analysis_01(filenames) 126 | return 127 | 128 | @pytest.mark.xfail 129 | def test_pointer_analysis_15( 130 | self, filenames: List[str] = ["pointer_analysis-15"] 131 | ) -> None: 132 | self.assert_paths( 133 | srcs=[("getopt", 2)], 134 | snks=[("strcpy", 2)], 135 | call_chains=[["main"]], 136 | filenames=filenames, 137 | ) 138 | return 139 | 140 | def test_pointer_analysis_16( 141 | self, filenames: List[str] = ["pointer_analysis-16"] 142 | ) -> None: 143 | self.assert_paths( 144 | srcs=[("getenv", None)], 145 | snks=[("strncat", 2), ("system", 1)], 146 | call_chains=[["main"], ["main"]], 147 | filenames=filenames, 148 | ) 149 | return 150 | 151 | def test_pointer_analysis_17( 152 | self, filenames: List[str] = ["pointer_analysis-17"] 153 | ) -> None: 154 | self.assert_paths( 155 | srcs=[("recv", None), ("recv", 2)], 156 | snks=[("memcpy", 2), ("memcpy", 3), ("system", 1)], 157 | call_chains=[["main"], ["main"], ["main"], ["main"]], 158 | filenames=filenames, 159 | ) 160 | return 161 | -------------------------------------------------------------------------------- /tests/slicing/test_function_calling.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from tests.slicing.conftest import TestSlicing 3 | from typing import List 4 | 5 | 6 | class TestFunctionCalling(TestSlicing): 7 | def test_function_calling_01( 8 | self, filenames: List[str] = ["function_calling-01"] 9 | ) -> None: 10 | self.assert_paths( 11 | srcs=[("getenv", None)], 12 | snks=[("system", 1)], 13 | call_chains=[["main"], ["main"]], 14 | filenames=filenames, 15 | ) 16 | return 17 | 18 | def test_function_calling_02( 19 | self, filenames: List[str] = ["function_calling-02"] 20 | ) -> None: 21 | self.assert_paths( 22 | srcs=[("getenv", None)], 23 | snks=[("system", 1)], 24 | call_chains=[ 25 | ["system_2", "system_1a", "main", "getenv_1a", "getenv_2"], 26 | ["system_2", "system_1a", "main", "getenv_1b", "getenv_2"], 27 | ], 28 | filenames=filenames, 29 | ) 30 | return 31 | 32 | def test_function_calling_03( 33 | self, filenames: List[str] = ["function_calling-03"] 34 | ) -> None: 35 | self.assert_paths( 36 | srcs=[("getenv", None)], 37 | snks=[("system", 1)], 38 | call_chains=[ 39 | ["system_1a", "main", "getenv_1a"], 40 | ["system_1a", "main", "getenv_1b"], 41 | ], 42 | filenames=filenames, 43 | ) 44 | return 45 | 46 | def test_function_calling_04( 47 | self, filenames: List[str] = ["function_calling-04"] 48 | ) -> None: 49 | self.test_function_calling_02(filenames) 50 | return 51 | 52 | def test_function_calling_05( 53 | self, filenames: List[str] = ["function_calling-05"] 54 | ) -> None: 55 | self.assert_paths( 56 | srcs=[("getenv", None)], 57 | snks=[("system", 1)], 58 | call_chains=[["main"]], 59 | filenames=filenames, 60 | ) 61 | return 62 | 63 | def test_function_calling_06( 64 | self, filenames: List[str] = ["function_calling-06"] 65 | ) -> None: 66 | self.test_function_calling_05(filenames) 67 | return 68 | 69 | def test_function_calling_07( 70 | self, filenames: List[str] = ["function_calling-07"] 71 | ) -> None: 72 | self.assert_paths( 73 | srcs=[], 74 | snks=[], 75 | call_chains=[], 76 | filenames=filenames, 77 | ) 78 | return 79 | 80 | def test_function_calling_08( 81 | self, filenames: List[str] = ["function_calling-08"] 82 | ) -> None: 83 | self.test_function_calling_07(filenames) 84 | return 85 | 86 | def test_function_calling_09( 87 | self, filenames: List[str] = ["function_calling-09"] 88 | ) -> None: 89 | self.test_function_calling_05(filenames) 90 | return 91 | 92 | def test_function_calling_10( 93 | self, filenames: List[str] = ["function_calling-10"] 94 | ) -> None: 95 | self.assert_paths( 96 | srcs=[("getenv", None)], 97 | snks=[("system", 1)], 98 | call_chains=[["system_1", "main", "getenv_1", "getenv_2"]], 99 | filenames=filenames, 100 | ) 101 | return 102 | 103 | def test_function_calling_11( 104 | self, filenames: List[str] = ["function_calling-11"] 105 | ) -> None: 106 | self.assert_paths( 107 | srcs=[("getenv", None)], 108 | snks=[("system", 1)], 109 | call_chains=[["main", "getenv_1", "getenv_2"]], 110 | filenames=filenames, 111 | ) 112 | return 113 | 114 | def test_function_calling_12( 115 | self, filenames: List[str] = ["function_calling-12"] 116 | ) -> None: 117 | self.assert_paths( 118 | srcs=[("getenv", None)], 119 | snks=[("system", 1)], 120 | call_chains=[["main", "getenv_1", "getenv_2", "getenv_3", "getenv_4"]], 121 | filenames=filenames, 122 | ) 123 | return 124 | 125 | def test_function_calling_13( 126 | self, filenames: List[str] = ["function_calling-13"] 127 | ) -> None: 128 | self.assert_paths( 129 | srcs=[("getenv", None)], 130 | snks=[("system", 1)], 131 | call_chains=[["system_1", "main"]], 132 | filenames=filenames, 133 | ) 134 | return 135 | 136 | def test_function_calling_14( 137 | self, filenames: List[str] = ["function_calling-14"] 138 | ) -> None: 139 | self.assert_paths( 140 | srcs=[("getenv", None)], 141 | snks=[("system", 1)], 142 | call_chains=[["system_3", "system_2", "system_1", "main"]], 143 | filenames=filenames, 144 | ) 145 | return 146 | 147 | def test_function_calling_15( 148 | self, filenames: List[str] = ["function_calling-15"] 149 | ) -> None: 150 | self.assert_paths( 151 | srcs=[("getenv", None)], 152 | snks=[("system", 1)], 153 | call_chains=[["system_2", "system_1", "main", "getenv_1", "getenv_2"]], 154 | filenames=filenames, 155 | ) 156 | return 157 | -------------------------------------------------------------------------------- /tests/slicing/test_object_oriented.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from tests.slicing.conftest import TestSlicing 3 | from typing import List 4 | import binaryninja as bn 5 | import pytest 6 | 7 | 8 | class TestObjectOriented(TestSlicing): 9 | def test_object_oriented_01( 10 | self, filenames: List[str] = ["object_oriented-01"] 11 | ) -> None: 12 | self.assert_paths( 13 | srcs=[("getenv", None)], 14 | snks=[("system", 1)], 15 | call_chains=[ 16 | ["MyParent::my_func", "main"], 17 | ["MyParent::my_func", "main"], 18 | ], 19 | filenames=filenames, 20 | ) 21 | return 22 | 23 | @pytest.mark.xfail( 24 | reason="Binja returns an invalid code x-ref for `MyParent::VTable::my_func`" 25 | ) 26 | def test_object_oriented_02( 27 | self, filenames: List[str] = ["object_oriented-02"] 28 | ) -> None: 29 | def manually_set_types(bv: bn.BinaryView) -> None: 30 | main_func = bv.get_functions_by_name("main")[0] 31 | # Manually define class MyParent 32 | p_class = """ 33 | class MyParent __packed 34 | { 35 | `MyParent::VTable`* vptr; 36 | char const* name; 37 | }; 38 | """ 39 | parsed_p_class = bv.parse_types_from_string(p_class) 40 | for name, type in parsed_p_class.types.items(): 41 | bv.define_user_type(name, type) 42 | # Manually set type and name of variable `p` (i.e. `MyParent* p = ...`) 43 | p_class_type = bv.get_type_by_name("MyParent") 44 | p_new_inst: bn.HighLevelILVarInit = main_func.call_sites[0].hlil 45 | p_new_inst.dest.type = bn.Type.pointer(bv.arch, p_class_type) 46 | # Manually define class MyChild 47 | c_class = """ 48 | class MyChild __packed 49 | { 50 | `MyParent::MyChild::VTable`* vptr; 51 | char const* name; 52 | }; 53 | """ 54 | parsed_c_class = bv.parse_types_from_string(c_class) 55 | for name, type in parsed_c_class.types.items(): 56 | bv.define_user_type(name, type) 57 | # Manually set type and name of variable `c` (i.e. `MyChild* c = ...`) 58 | c_class_type = bv.get_type_by_name("MyChild") 59 | c_new_inst: bn.HighLevelILVarInit = main_func.call_sites[2].hlil 60 | c_new_inst.dest.type = bn.Type.pointer(bv.arch, c_class_type) 61 | return 62 | 63 | self.assert_paths( 64 | srcs=[("getenv", None)], 65 | snks=[("system", 1), ("popen", 1)], 66 | call_chains=[ 67 | ["MyParent::my_func", "main"], 68 | ["MyChild::my_func", "main"], 69 | ], 70 | filenames=filenames, 71 | bv_callback=manually_set_types, 72 | ) 73 | return 74 | 75 | @pytest.mark.xfail( 76 | reason="Mole fails to track def-site MLIL_STORE_STRUCT (in constructor) of use-site MLIL_LOAD_STRUCT for member variable `this->name`." 77 | ) 78 | def test_object_oriented_03( 79 | self, filenames: List[str] = ["object_oriented-03"] 80 | ) -> None: 81 | def manually_set_types(bv: bn.BinaryView) -> None: 82 | main_func = bv.get_functions_by_name("main")[0] 83 | # Manually define class MyParent 84 | p_class = """ 85 | class MyParent __packed 86 | { 87 | `MyParent::VTable`* vptr; 88 | char const* name; 89 | }; 90 | """ 91 | parsed_p_class = bv.parse_types_from_string(p_class) 92 | for name, type in parsed_p_class.types.items(): 93 | bv.define_user_type(name, type) 94 | # Manually set type and name of variable `p` (i.e. `MyParent* p = ...`) 95 | p_class_type = bv.get_type_by_name("MyParent") 96 | p_new_inst: bn.HighLevelILVarInit = main_func.call_sites[1].hlil 97 | p_new_inst.dest.type = bn.Type.pointer(bv.arch, p_class_type) 98 | # Manually define class MyChild 99 | c_class = """ 100 | class MyChild __packed 101 | { 102 | `MyParent::MyChild::VTable`* vptr; 103 | char const* name; 104 | }; 105 | """ 106 | parsed_c_class = bv.parse_types_from_string(c_class) 107 | for name, type in parsed_c_class.types.items(): 108 | bv.define_user_type(name, type) 109 | # Manually set type and name of variable `c` (i.e. `MyChild* c = ...`) 110 | c_class_type = bv.get_type_by_name("MyChild") 111 | c_new_inst: bn.HighLevelILVarInit = main_func.call_sites[6].hlil 112 | c_new_inst.dest.type = bn.Type.pointer(bv.arch, c_class_type) 113 | return 114 | 115 | self.assert_paths( 116 | srcs=[("getenv", None)], 117 | snks=[("system", 1), ("popen", 1)], 118 | call_chains=[ 119 | ["MyParent::my_func", "main"], 120 | ["MyChild::my_func", "main"], 121 | ], 122 | filenames=filenames, 123 | bv_callback=manually_set_types, 124 | ) 125 | return 126 | -------------------------------------------------------------------------------- /development/update_description.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | import json 3 | import os 4 | import re 5 | 6 | 7 | def readme_to_json_string( 8 | readme_filename="README.md", save_test_file=True 9 | ) -> Optional[str]: 10 | """ 11 | Reads the README file and returns its content as a JSON-escaped string. 12 | Only keeps the first section content and removes all other sections. 13 | 14 | Args: 15 | readme_filename (str): The name of the README file. 16 | save_test_file (bool): Whether to save a test file with processed 17 | content. 18 | 19 | Returns: 20 | str: The JSON-escaped string content of the README file (including 21 | quotes), or None if the file cannot be read. 22 | """ 23 | script_dir = os.path.dirname(os.path.abspath(__file__)) 24 | parent_dir = os.path.dirname(script_dir) 25 | readme_path = os.path.join(parent_dir, readme_filename) 26 | 27 | if not os.path.exists(readme_path): 28 | print(f"Error: File '{readme_path:s}' not found") 29 | return None 30 | 31 | try: 32 | with open(readme_path, "r", encoding="utf-8") as f: 33 | content = f.read() 34 | 35 | # Find the first occurrence of '#' indicating a heading (the root heading) 36 | start_index = content.find("#") 37 | if start_index != -1: 38 | # Find the end of the first heading (next newline) 39 | end_of_first_heading = content.find("\n", start_index) 40 | if end_of_first_heading != -1: 41 | # Skip the root heading and start from the next line 42 | filtered_content = content[end_of_first_heading + 1 :].lstrip() 43 | else: 44 | # If no newline after heading (unlikely), use original content 45 | filtered_content = content 46 | else: 47 | # If no heading found, use the original content 48 | filtered_content = content 49 | 50 | # Find the second heading (which marks the end of first section) 51 | second_heading_index = filtered_content.find("\n#") 52 | if second_heading_index != -1: 53 | # Only keep content up to the second heading 54 | processed_content = filtered_content[:second_heading_index].strip() 55 | else: 56 | # If no second heading, keep all content 57 | processed_content = filtered_content 58 | 59 | # Replace markdown links [text](url) with just the text 60 | processed_content = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", processed_content) 61 | 62 | # Save the processed content to a test file if requested 63 | if save_test_file: 64 | test_file_path = os.path.join("/tmp", "processed_readme.md") 65 | with open(test_file_path, "w", encoding="utf-8") as test_file: 66 | test_file.write(processed_content) 67 | print(f"Saved processed markdown to '{test_file_path:s}'") 68 | 69 | # Use json.dumps to correctly escape the string for JSON embedding 70 | json_string = json.dumps(processed_content) 71 | return json_string 72 | except Exception as e: 73 | print(f"Error reading or processing file '{readme_path:s}': {str(e):s}") 74 | return None 75 | 76 | 77 | def update_plugin_json(readme_content: str) -> bool: 78 | """ 79 | Updates the longdescription attribute in the plugin.json file. 80 | 81 | Args: 82 | readme_content (str): The README content to use for longdescription 83 | 84 | Returns: 85 | bool: True if successful, False otherwise 86 | """ 87 | script_dir = os.path.dirname(os.path.abspath(__file__)) 88 | parent_dir = os.path.dirname(script_dir) 89 | plugin_json_path = os.path.join(parent_dir, "plugin.json") 90 | 91 | if not os.path.exists(plugin_json_path): 92 | print(f"Error: plugin.json not found at '{plugin_json_path:s}'") 93 | return False 94 | 95 | try: 96 | # Read the existing plugin.json 97 | with open(plugin_json_path, "r", encoding="utf-8") as f: 98 | plugin_data = json.load(f) 99 | 100 | # Update the longdescription attribute 101 | plugin_data["longdescription"] = readme_content 102 | 103 | # Write back to the file with pretty formatting 104 | with open(plugin_json_path, "w", encoding="utf-8") as f: 105 | json.dump(plugin_data, f, indent=2) 106 | 107 | print(f"Successfully updated longdescription in '{plugin_json_path:s}'") 108 | return True 109 | except Exception as e: 110 | print(f"Error updating plugin.json: {str(e):s}") 111 | return False 112 | 113 | 114 | if __name__ == "__main__": 115 | json_escaped_readme_with_quotes = readme_to_json_string() 116 | 117 | if json_escaped_readme_with_quotes: 118 | # We need the raw content *without* the extra quotes added by the first 119 | # json.dumps because we are embedding it into another JSON structure. 120 | # json.loads will remove the outer quotes and unescape the content. 121 | readme_content = json.loads(json_escaped_readme_with_quotes) 122 | 123 | # Update the plugin.json file instead of printing 124 | update_plugin_json(readme_content) 125 | else: 126 | # Error message already printed by readme_to_json_string 127 | pass 128 | -------------------------------------------------------------------------------- /mole/cli/main.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from mole.common.log import log 3 | from mole.models.config import ConfigModel 4 | from mole.services.config import ConfigService 5 | from mole.services.path import PathService 6 | from typing import Dict, List 7 | import argparse as ap 8 | import binaryninja as bn 9 | import hashlib as hl 10 | import json as json 11 | import os as os 12 | import yaml as yaml 13 | 14 | 15 | def main() -> None: 16 | """ 17 | This function is used to process a given binary in headless mode. 18 | """ 19 | # Parse arguments 20 | description = """ 21 | Mole is a Binary Ninja plugin designed to identify interesting paths in binaries. It performs 22 | static backward slicing on variables using BN's MLIL in its SSA form. The plugin can be run 23 | either in BN's UI or in headless mode. 24 | """ 25 | parser = ap.ArgumentParser( 26 | description=description, formatter_class=ap.ArgumentDefaultsHelpFormatter 27 | ) 28 | parser.add_argument("file", help="file to analyze") 29 | parser.add_argument("--config_file", help="custom configuration file to use") 30 | parser.add_argument( 31 | "--log_level", 32 | choices=["error", "warning", "info", "debug"], 33 | default="debug", 34 | help="log level", 35 | ) 36 | parser.add_argument( 37 | "--max_workers", 38 | type=int, 39 | default=None, 40 | help="maximum number of worker threads that backward slicing uses", 41 | ) 42 | parser.add_argument( 43 | "--fix_func_type", 44 | action="store_true", 45 | help="whether to fix function types during analysis", 46 | ) 47 | parser.add_argument( 48 | "--max_call_level", 49 | type=int, 50 | default=None, 51 | help="backward slicing visits called functions up to the given level", 52 | ) 53 | parser.add_argument( 54 | "--max_slice_depth", 55 | type=int, 56 | default=None, 57 | help="maximum slice depth to stop the search", 58 | ) 59 | parser.add_argument( 60 | "--max_memory_slice_depth", 61 | type=int, 62 | default=None, 63 | help="maximum memory slice depth to stop the search", 64 | ) 65 | parser.add_argument( 66 | "--export_paths_to_json_file", help="export identified paths in JSON format" 67 | ) 68 | parser.add_argument( 69 | "--export_paths_to_yml_file", help="export identified paths in YAML format" 70 | ) 71 | parser.add_argument( 72 | "--save_bndb", help="save BN database file with analysis results" 73 | ) 74 | args = vars(parser.parse_args()) 75 | 76 | # Change properties of logger 77 | log.change_properties(level=args["log_level"], runs_headless=True) 78 | try: 79 | # Load and analyze binary with Binary Ninja 80 | bv = bn.load(args["file"]) 81 | bv.update_analysis_and_wait() 82 | # Analyze binary with Mole 83 | slicer = PathService( 84 | bv=bv, 85 | config_model=ConfigModel(ConfigService(args["config_file"]).load_config()), 86 | max_workers=args["max_workers"], 87 | fix_func_type=args["fix_func_type"], 88 | max_call_level=args["max_call_level"], 89 | max_slice_depth=args["max_slice_depth"], 90 | max_memory_slice_depth=args["max_memory_slice_depth"], 91 | ) 92 | slicer.start() 93 | paths = slicer.paths() 94 | # Export identified paths 95 | if ( 96 | args["export_paths_to_yml_file"] 97 | or args["export_paths_to_json_file"] 98 | or args["save_bndb"] 99 | ): 100 | # Calculate SHA1 hash of binary 101 | sha1_hash = hl.sha1(bv.file.raw.read(0, bv.file.raw.end)).hexdigest() 102 | # Serialize paths 103 | s_paths: List[Dict] = [] 104 | for path in paths: 105 | s_path = path.to_dict() 106 | s_path["sha1"] = sha1_hash 107 | s_paths.append(s_path) 108 | # Write JSON data (default) 109 | if args["export_paths_to_json_file"]: 110 | fp = args["export_paths_to_json_file"] 111 | fp = os.path.abspath(os.path.expanduser(os.path.expandvars(fp))) 112 | with open(fp, "w") as f: 113 | json.dump(s_paths, f, indent=2) 114 | # Write YAML data 115 | if args["export_paths_to_yml_file"]: 116 | fp = args["export_paths_to_yml_file"] 117 | fp = os.path.abspath(os.path.expanduser(os.path.expandvars(fp))) 118 | with open(os.path.abspath(fp), "w") as f: 119 | yaml.safe_dump( 120 | s_paths, 121 | f, 122 | sort_keys=False, 123 | default_style=None, 124 | default_flow_style=False, 125 | encoding="utf-8", 126 | ) 127 | # Write BN database 128 | if args["save_bndb"]: 129 | bv.store_metadata("mole_paths", json.dumps(s_paths)) 130 | fp = args["save_bndb"] 131 | fp = os.path.abspath(os.path.expanduser(os.path.expandvars(fp))) 132 | bv.create_database(fp) 133 | # Close binary 134 | bv.file.close() 135 | except KeyboardInterrupt: 136 | log.info(msg="Keyboard interrupt caught") 137 | except Exception as e: 138 | log.error(msg=f"Exception caught: '{str(e):s}'") 139 | return 140 | 141 | 142 | if __name__ == "__main__": 143 | main() 144 | -------------------------------------------------------------------------------- /mole/core/ai.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from mole.common.helper.function import FunctionHelper 3 | from mole.common.log import log 4 | import binaryninja as bn 5 | 6 | 7 | def get_il_code(func: bn.Function, il_type: str) -> str: 8 | """ 9 | This method dispatches the IL type `il_type` and returns the corresponding code of the function 10 | `func`. 11 | """ 12 | il_func = None 13 | match il_type.upper(): 14 | case "PSEUDO_C": 15 | return FunctionHelper.get_pseudo_c_code(func) 16 | case "HLIL": 17 | il_func = func.hlil 18 | case "MLIL": 19 | il_func = func.mlil 20 | case "LLIL": 21 | il_func = func.llil 22 | return FunctionHelper.get_il_code(il_func) 23 | 24 | 25 | def get_code_for_functions_containing( 26 | bv: bn.BinaryView, 27 | addr: str, 28 | il_type: str, 29 | tag: str = None, 30 | ) -> str: 31 | """ 32 | This method returns code of functions containing address `addr`, in the specified BNIL 33 | representation `il_type`. 34 | """ 35 | log.info( 36 | tag, 37 | f"Tool call 'get_code_for_functions_containing(addr={addr:s}, il_type={il_type:s})'", 38 | ) 39 | res_code = "" 40 | try: 41 | _addr = int(addr, 0) 42 | il_type = il_type.upper() 43 | func_code = [] 44 | funcs = bv.get_functions_containing(_addr) 45 | if funcs is None: 46 | res_code = f"No functions found containing address `0x{_addr:x}`" 47 | else: 48 | for func in funcs: 49 | header = f"{il_type:s} code of function `0x{func.start:x}: {str(func):s}`, which contains address `0x{_addr:x}`:" 50 | code = get_il_code(func, il_type) 51 | func_code.append(header + "\n```\n" + code + "\n```\n") 52 | log.debug( 53 | tag, 54 | f"Return {il_type:s} code of function '0x{func.start:x}: {str(func):s}'", 55 | ) 56 | res_code = "\n".join(func_code) 57 | except Exception as e: 58 | msg = f"Failed to get {il_type:s} code of functions containing address '{addr:s}': {str(e):s}" 59 | log.warn(tag, msg) 60 | res_code = msg 61 | return res_code 62 | 63 | 64 | def get_code_for_functions_by_name( 65 | bv: bn.BinaryView, 66 | name: str, 67 | il_type: str, 68 | tag: str = None, 69 | ) -> str: 70 | """ 71 | This method returns code of functions with name `name`, in the specified BNIL representation 72 | `il_type`. 73 | """ 74 | log.info( 75 | tag, 76 | f"Tool call 'get_code_for_functions_by_name(name={name:s}, il_type={il_type:s})'", 77 | ) 78 | res_code = "" 79 | try: 80 | il_type = il_type.upper() 81 | func_code = [] 82 | for func in bv.get_functions_by_name(name): 83 | header = f"{il_type:s} code of function `0x{func.start:x}: {str(func):s}`:" 84 | code = get_il_code(func, il_type) 85 | func_code.append(header + "\n```\n" + code + "\n```\n") 86 | log.debug( 87 | tag, 88 | f"Return {il_type:s} code of function '0x{func.start:x}: {str(func):s}'", 89 | ) 90 | res_code = "\n".join(func_code) 91 | except Exception as e: 92 | msg = f"Failed to get {il_type:s} code of functions with name '{name:s}': {str(e):s}" 93 | log.warn(tag, msg) 94 | res_code = msg 95 | return res_code 96 | 97 | 98 | def get_callers_by_address( 99 | bv: bn.BinaryView, 100 | addr: str, 101 | tag: str = None, 102 | ) -> str: 103 | """ 104 | This method returns the callers of functions containing address `addr`. 105 | """ 106 | log.info(tag, f"Tool call 'get_callers_by_address(addr={addr:s})'") 107 | res_callers = "" 108 | try: 109 | _addr = int(addr, 0) 110 | callers = [] 111 | funcs = bv.get_functions_containing(_addr) 112 | if funcs is None: 113 | res_callers = f"No functions found containing address `0x{_addr:x}`" 114 | else: 115 | for func in funcs: 116 | header = f"Callers of function `0x{func.start:x}: {str(func):s}`, which contains address `0x{_addr:x}`:" 117 | func_callers = "\n".join( 118 | f"- `0x{caller.start:x}`: `{caller.symbol.short_name:s}`" 119 | for caller in func.callers 120 | ) 121 | callers.append(header + "\n" + func_callers + "\n") 122 | res_callers = "\n".join(callers) 123 | except Exception as e: 124 | msg = f"Failed to get callers of functions containing address '{addr:s}': {str(e):s}" 125 | log.warn(tag, msg) 126 | res_callers = msg 127 | return res_callers 128 | 129 | 130 | def get_callers_by_name( 131 | bv: bn.BinaryView, 132 | name: str, 133 | tag: str = None, 134 | ) -> str: 135 | """ 136 | This method returns the callers of functions with name `name`. 137 | """ 138 | log.info(tag, f"Tool call 'get_callers_by_name(name={name:s})'") 139 | res_callers = "" 140 | try: 141 | callers = [] 142 | for func in bv.get_functions_by_name(name): 143 | header = f"Callers of function `0x{func.start:x}: {str(func):s}`:" 144 | func_callers = "\n".join( 145 | f"- `0x{caller.start:x}`: `{caller.symbol.short_name:s}`" 146 | for caller in func.callers 147 | ) 148 | callers.append(header + "\n" + func_callers + "\n") 149 | res_callers = "\n".join(callers) 150 | except Exception as e: 151 | msg = f"Failed to get callers of functions with name '{name:s}': {str(e):s}" 152 | log.warn(tag, msg) 153 | res_callers = msg 154 | return res_callers 155 | -------------------------------------------------------------------------------- /tests/slicing/conftest.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from mole.common.log import log 3 | from mole.core.data import Path 4 | from mole.models.config import ConfigModel 5 | from mole.services.config import ConfigService 6 | from mole.services.path import PathService 7 | from typing import Callable, List, Optional, Tuple 8 | import atexit 9 | import binaryninja as bn 10 | import os 11 | import pytest 12 | 13 | 14 | tested_files = set() 15 | atexit_registered = False 16 | 17 | 18 | def print_tested_files() -> None: 19 | print(f"\nTested slicing on {len(tested_files):d} files") 20 | return 21 | 22 | 23 | if not atexit_registered: 24 | atexit.register(print_tested_files) 25 | atexit_registered = True 26 | 27 | 28 | class TestSlicing: 29 | """ 30 | This class implements general functionality for slicing tests. 31 | """ 32 | 33 | @pytest.fixture(autouse=True) 34 | def setup(self) -> None: 35 | log.change_properties(level="debug", runs_headless=True) 36 | config_file = os.path.join( 37 | os.path.dirname(os.path.abspath(__file__)), "../../mole/conf/003-libc.yml" 38 | ) 39 | config = ConfigService().import_config(config_file) 40 | self._model = ConfigModel(config) 41 | self._ext = os.environ.get("EXT", None) 42 | return 43 | 44 | def load_files(self, names: List[str]) -> List[str]: 45 | """ 46 | This method returns all files in the `testcases` directory matching `name` but ignoring the 47 | file extension. 48 | """ 49 | directory = os.path.join(os.path.dirname(__file__), "..", "data", "bin") 50 | files = [] 51 | for dirpath, _, filenames in os.walk(directory): 52 | for filename in filenames: 53 | name, ext = os.path.splitext(filename) 54 | if name in names: 55 | if self._ext is None or self._ext == ext: 56 | files.append(os.path.join(dirpath, filename)) 57 | tested_files.add(filename) 58 | return files 59 | 60 | def get_paths( 61 | self, 62 | bv: bn.BinaryView, 63 | max_workers: int | None = 1, 64 | fix_func_type: bool | None = False, 65 | max_call_level: int = 5, 66 | max_slice_depth: int = -1, 67 | max_memory_slice_depth: int = -1, 68 | enable_all_funs: bool = False, 69 | ) -> List[Path]: 70 | """ 71 | This method is a helper to find paths. 72 | """ 73 | slicer = PathService( 74 | bv=bv, 75 | config_model=self._model, 76 | max_workers=max_workers, 77 | fix_func_type=fix_func_type, 78 | max_call_level=max_call_level, 79 | max_slice_depth=max_slice_depth, 80 | max_memory_slice_depth=max_memory_slice_depth, 81 | enable_all_funs=enable_all_funs, 82 | ) 83 | slicer.start() 84 | return slicer.paths() 85 | 86 | def assert_paths( 87 | self, 88 | srcs: List[Tuple[str, Optional[int]]], 89 | snks: List[Tuple[str, Optional[int]]], 90 | call_chains: List[List[str]], 91 | filenames: List[str], 92 | bv_callback: Optional[Callable[[bn.BinaryView], None]] = lambda bv: None, 93 | ) -> None: 94 | # Ensure relevant source functions are enabled 95 | src_names = [src[0] for src in srcs] 96 | src_funs = self._model.get_functions("libc", fun_type="Sources") 97 | for src_fun in src_funs: 98 | if src_fun.name in src_names: 99 | src_fun.enabled = True 100 | # Ensure relevant sink functions are enabled 101 | snk_names = [snk[0] for snk in snks] 102 | snk_funs = self._model.get_functions("libc", fun_type="Sinks") 103 | for snk_fun in snk_funs: 104 | if snk_fun.name in snk_names: 105 | snk_fun.enabled = True 106 | # Iterate over all test files 107 | for file in self.load_files(filenames): 108 | # Load and analyze test binary with Binary Ninja 109 | bv = bn.load(file) 110 | bv.update_analysis_and_wait() 111 | bv_callback(bv) 112 | # Find paths in test binary with backward slicing 113 | paths = self.get_paths(bv) 114 | # Determine call chains 115 | _call_chains = [] 116 | for path in paths: 117 | _call_chains.append( 118 | [call[0].source_function.symbol.short_name for call in path.calls] 119 | ) 120 | # Assert source 121 | assert isinstance( 122 | path.insts[-1], 123 | bn.MediumLevelILCallSsa | bn.MediumLevelILTailcallSsa, 124 | ), "invalid source instruction type" 125 | assert ( 126 | path.src_sym_name, 127 | path.src_par_idx, 128 | ) in srcs, "invalid source" 129 | # Assert sink 130 | assert isinstance( 131 | path.insts[0], 132 | bn.MediumLevelILCallSsa | bn.MediumLevelILTailcallSsa, 133 | ), "invalid sink instruction type" 134 | assert ( 135 | path.snk_sym_name, 136 | path.snk_par_idx, 137 | ) in snks, "invalid sink" 138 | # Assert call chains 139 | for call_chain in call_chains: 140 | if call_chain in _call_chains: 141 | _call_chains.remove(call_chain) 142 | else: 143 | assert False, "invalid call chains" 144 | assert not _call_chains, "invalid call chains" 145 | # Close test binary 146 | bv.file.close() 147 | return 148 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Publish Release](https://github.com/pdamian/mole/actions/workflows/release.yml/badge.svg)](https://github.com/pdamian/mole/actions/workflows/release.yml) 2 | [![Release](https://img.shields.io/github/v/release/cyber-defence-campus/mole)](https://img.shields.io/github/v/release/cyber-defence-campus/mole) 3 | # Mole 4 | 5 |

6 | Mole Logo 7 |

8 | 9 | **_Mole_** is a *Binary Ninja* plugin designed to identify **interesting paths** in binaries. It performs **static backward slicing** on variables using *Binary Ninja*'s [*Medium Level Intermediate Language* (*MLIL*)](https://docs.binary.ninja/dev/bnil-mlil.html) in its *Static Single Assignment* (*SSA*) form. 10 | 11 | In *Mole*, a **path** refers to the flow of data between a defined source and sink. What constitutes an "interesting" path depends on the analysis goals. For instance, when searching for **vulnerabilities**, one might look for paths where untrusted inputs (sources) influence sensitive operations (sinks) in potentially dangerous ways. 12 | 13 | The following list highlights some of *Mole*'s current **features**: 14 | - **Operation Mode**: *Mole* can be run either within *Binary Ninja*'s UI or in headless mode. Headless mode is particularly useful for scripted analysis across a large number of binaries. Conversely, using *Mole* within the UI is ideal for closely investigating detected paths. 15 | - **Path Identification**: 16 | - **Configuration**: *Mole* allows users to define source and sink functions through Binary Ninja’s UI or configuration files (see [Usage](./docs/02-Usage.md#source-and-sink-functions)). This provides flexibility in selecting sources and sinks based on the specific usage scenario. 17 | - **Exploration**: To better understand a path and examine its characteristics, all instructions along the path can be printed or visually highlighted within *Binary Ninja*. Additionally, a side-by-side comparison of two paths can be displayed to quickly identify differences. Similar to instructions, a path's sequence of function calls can be printed or even visualized as a graph. 18 | - **Grouping**: To facilitate the identification of similar paths, *Mole* supports multiple grouping strategies. Currently, paths can be grouped based on matching source and sink functions, or by identical call sequences. New custom grouping strategies can easily be added to extend and customize this functionality (see [Customization](./docs/03-Customization.md#path-grouping-strategy)). 19 | - **Persistence**: Discovered paths can be annotated for clarity or removed if deemed irrelevant. To preserve analysis progress, paths can be saved directly to the target binary's database (*Binary Ninja*'s `.bndb` format). Paths can also be exported - for example, when performing headless analysis across many binaries on a file system, allowing identified paths to be later imported for easier exploration within *Binary Ninja*. 20 | - **Path Analysis With AI**: *Mole* can interact with local or remote *Large Language Models* (*LLMs*) via the *OpenAI API* (see [Usage](./docs/02-Usage.md#ai-analysis-mode)). The models are used to analyze identified paths and reason whether a given path corresponds to an exploitable vulnerability or not. The *LLM* attempts to classify the vulnerability and assign a severity level, which can help prioritize which paths are worth further investigation by an analyst. *Mole* provides a basic set of tools that the *LLM* can use to request additional information about the binary under analysis. This feature is an initial prototype and has not yet undergone systematic evaluation (e.g., models, prompts, tools). If it proves useful, we plan to improve it in future releases. 21 | 22 | - **Inter-Procedural Variable Slicing**: *Mole* supports slicing *MLIL variables* across function boundaries - a task that presents several challenges. For instance, statically determining a function's effective caller(s) is often difficult or even impossible. As a result, the implemented approach is an approximation. While not perfect, it performs reasonably well across a wide range of practical scenarios. 23 | - **Basic Pointer Analysis**: *Mole* currently implements a simplified strategy for tracking pointer usage. Like inter-procedural slicing, this approach is a simplification with inherent limitations (e.g. it cannot track global variables). Nevertheless, it performs well in many practical cases and is planned to be improved in future versions. 24 | 25 | ## Usage Example 26 | See *Mole* in action as it exposes [CVE-2021-4045](https://www.hacefresko.com/posts/tp-link-tapo-c200-unauthenticated-rce), an unauthenticated command injection flaw in the *TP-Link Tapo C200* IP camera (**click the image below to watch on YouTube**): 27 | 28 | [![Mole Usage Example](https://img.youtube.com/vi/jCn7ePhjYNk/maxresdefault.jpg)](https://www.youtube.com/watch?v=jCn7ePhjYNk) 29 | 30 | ## Documentation 31 | 1. [Installation](./docs/01-Installation.md) 32 | 2. [Usage](./docs/02-Usage.md) 33 | 3. [Customization](./docs/03-Customization.md) 34 | 35 | ## Contribute or Share Your Experience 36 | *Mole* is currently a **work in progress**. If you encounter a bug, have a useful new unit test that highlights a false positive or negative, or have a suggestion for a new feature, please consider opening an issue or contribute via pull request. Also note that the current [unit tests](./test/src/) have only been verified on `linux-x86_64` and `linux-armv7` binaries so far. 37 | 38 | If you have an interesting **success story** - such as finding a vulnerability with the help of *Mole* - we would love to hear about it! Feel free to share your experience with us. 39 | 40 | ## Contributors 41 | - [Damian Pfammatter](https://github.com/pdamian), [Cyber-Defence Campus (armasuisse S+T)](https://www.cydcampus.admin.ch/en) 42 | - [Daniel Hulliger](https://github.com/dhulliger), [Cyber-Defence Campus (armasuisse S+T)](https://www.cydcampus.admin.ch/en) 43 | - [Sergio Paganoni](https://github.com/wizche) 44 | -------------------------------------------------------------------------------- /mole/common/log.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from datetime import datetime 3 | from termcolor import colored 4 | from typing import List, Literal 5 | import binaryninja as bn 6 | import sys as sys 7 | 8 | 9 | class Logger: 10 | """ 11 | This class prints messages to the console or Binary Ninja's log. 12 | """ 13 | 14 | _levels = ["debug", "info", "warning", "error"] 15 | 16 | def __init__( 17 | self, 18 | level: Literal["debug", "info", "warning", "error"] = "debug", 19 | runs_headless: bool = False, 20 | ) -> None: 21 | """ 22 | This method initializes a `Logger` that can be used to write messages of a given `level` 23 | (and above) to Binary Ninja's log and to stdout/stderr. 24 | """ 25 | self._logger = bn.Logger(0, "Plugin: Mole") 26 | self.change_properties(level, runs_headless) 27 | self.find_attached_debugger() 28 | return 29 | 30 | def change_properties( 31 | self, 32 | level: Literal["debug", "info", "warning", "error"] = "debug", 33 | runs_headless: bool = False, 34 | ) -> None: 35 | """ 36 | This method changes the properties of a `Logger`. 37 | """ 38 | self._level = self._levels.index(level) 39 | self._runs_headless = runs_headless 40 | return 41 | 42 | def find_attached_debugger(self) -> None: 43 | """ 44 | This method checks whether or not a debugger is attached. 45 | """ 46 | self._runs_debugger = any( 47 | module.startswith("debugpy") for module in sys.modules 48 | ) 49 | return 50 | 51 | def get_level(self) -> str: 52 | """ 53 | This method returns the configured log level. 54 | """ 55 | return self._levels[self._level] 56 | 57 | def _tag_msg( 58 | self, 59 | tag: str = None, 60 | msg: str = None, 61 | ) -> str: 62 | """ 63 | This method concatenates tag `tag` to the message `msg`. 64 | """ 65 | m = "" 66 | if tag: 67 | m = f"[{tag:s}]" 68 | if msg: 69 | m = f"{m:s} {msg:s}" 70 | return m.strip() 71 | 72 | def _print( 73 | self, 74 | tag: str, 75 | msg: str, 76 | color: str, 77 | on_color: str = None, 78 | print_raw: bool = False, 79 | attrs: List[str] = [], 80 | file=sys.stdout, 81 | ) -> None: 82 | """ 83 | This method prints the message `msg` to the console. 84 | """ 85 | if not print_raw: 86 | now = datetime.now().strftime("%Y-%m-%d %H:%M:%S") 87 | head = f"[{now:s}] [{tag:s}] " 88 | else: 89 | head = "" 90 | print( 91 | colored(f"{head:s}{msg:s}", color=color, on_color=on_color, attrs=attrs), 92 | file=file, 93 | flush=True, 94 | ) 95 | return 96 | 97 | def debug( 98 | self, 99 | tag: str = None, 100 | msg: str = None, 101 | color: str = "magenta", 102 | on_color: str = None, 103 | print_raw: bool = False, 104 | attrs: List[str] = [], 105 | ) -> None: 106 | """ 107 | This method prints a tagged message of log level debug to the console or Binary Ninja's log. 108 | """ 109 | text = self._tag_msg(tag, msg) 110 | if self._level > 0: 111 | return 112 | if not self._runs_headless and not self._runs_debugger: 113 | self._logger.log_debug(text) 114 | else: 115 | self._print( 116 | "DEBG", 117 | text, 118 | color=color, 119 | on_color=on_color, 120 | print_raw=print_raw, 121 | attrs=attrs, 122 | file=sys.stdout, 123 | ) 124 | return 125 | 126 | def info( 127 | self, 128 | tag: str = None, 129 | msg: str = None, 130 | color: str = "blue", 131 | on_color: str = None, 132 | print_raw: bool = False, 133 | attrs: List[str] = [], 134 | ) -> None: 135 | """ 136 | This method prints a tagged message of log level info to the console or Binary Ninja's log. 137 | """ 138 | text = self._tag_msg(tag, msg) 139 | if self._level > 1: 140 | return 141 | if not self._runs_headless and not self._runs_debugger: 142 | self._logger.log_info(text) 143 | else: 144 | self._print( 145 | "INFO", 146 | text, 147 | color=color, 148 | on_color=on_color, 149 | print_raw=print_raw, 150 | attrs=attrs, 151 | file=sys.stdout, 152 | ) 153 | return 154 | 155 | def warn( 156 | self, 157 | tag: str = None, 158 | msg: str = None, 159 | color: str = "yellow", 160 | on_color: str = None, 161 | print_raw: bool = False, 162 | attrs: List[str] = [], 163 | ) -> None: 164 | """ 165 | This method prints a tagged message of log level warn to the console or Binary Ninja's log. 166 | """ 167 | text = self._tag_msg(tag, msg) 168 | if self._level > 2: 169 | return 170 | if not self._runs_headless and not self._runs_debugger: 171 | self._logger.log_warn(text) 172 | else: 173 | self._print( 174 | "WARN", 175 | text, 176 | color=color, 177 | on_color=on_color, 178 | print_raw=print_raw, 179 | attrs=attrs, 180 | file=sys.stderr, 181 | ) 182 | return 183 | 184 | def error( 185 | self, 186 | tag: str = None, 187 | msg: str = None, 188 | color: str = "red", 189 | on_color: str = None, 190 | print_raw: bool = False, 191 | attrs: List[str] = [], 192 | ) -> None: 193 | """ 194 | This method prints a tagged message of log level error to the console or Binary Ninja's log. 195 | """ 196 | text = self._tag_msg(tag, msg) 197 | if self._level > 3: 198 | return 199 | if not self._runs_headless and not self._runs_debugger: 200 | self._logger.log_error(text) 201 | else: 202 | self._print( 203 | "ERRO", 204 | text, 205 | color=color, 206 | on_color=on_color, 207 | print_raw=print_raw, 208 | attrs=attrs, 209 | file=sys.stderr, 210 | ) 211 | return 212 | 213 | 214 | log = Logger() 215 | -------------------------------------------------------------------------------- /tests/data/src/simple_http_server-03.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define PORT 8080 8 | #define BUFFER_SIZE 256 9 | 10 | /* 11 | Testcase Description: 12 | - server example 13 | - disallow function inlining 14 | */ 15 | 16 | __attribute__ ((noinline)) 17 | void handle_client(int client_socket); 18 | 19 | __attribute__ ((noinline)) 20 | void execute_cgi_command(const char *buffer); 21 | 22 | __attribute__ ((noinline)) 23 | void send_response(int client_socket, const char *response); 24 | 25 | __attribute__ ((noinline)) 26 | int create_server_socket(struct sockaddr_in *address); 27 | 28 | __attribute__ ((noinline)) 29 | void handle_get_request(int client_socket); 30 | 31 | __attribute__ ((noinline)) 32 | void handle_post_request(int client_socket); 33 | 34 | __attribute__ ((noinline)) 35 | char* receive_data(int client_socket, int *size); 36 | 37 | __attribute__ ((noinline)) 38 | void log_request(const char *method, const char *buffer); 39 | 40 | __attribute__ ((noinline)) 41 | void process_request(int client_socket, const char *method, const char *buffer); 42 | 43 | __attribute__ ((noinline)) 44 | void handle_put_request(int client_socket); 45 | 46 | __attribute__ ((noinline)) 47 | void handle_delete_request(int client_socket); 48 | 49 | __attribute__ ((noinline)) 50 | void wrap_and_execute(const char *buffer); 51 | 52 | int main() { 53 | int server_fd, client_socket; 54 | struct sockaddr_in address; 55 | int addrlen = sizeof(address); 56 | 57 | server_fd = create_server_socket(&address); 58 | 59 | while (1) { 60 | if ((client_socket = accept(server_fd, (struct sockaddr *)&address, (socklen_t*)&addrlen)) < 0) { 61 | perror("accept"); 62 | close(server_fd); 63 | exit(EXIT_FAILURE); 64 | } 65 | handle_client(client_socket); 66 | } 67 | 68 | close(server_fd); 69 | return 0; 70 | } 71 | 72 | void handle_client(int client_socket) { 73 | int size; 74 | char *method = receive_data(client_socket, &size); 75 | 76 | if (method == NULL) { 77 | close(client_socket); 78 | return; 79 | } 80 | 81 | if (strncmp(method, "GET ", 4) == 0) { 82 | handle_get_request(client_socket); 83 | } else if (strncmp(method, "POST", 4) == 0) { 84 | handle_post_request(client_socket); 85 | } else if (strncmp(method, "PUT ", 4) == 0) { 86 | handle_put_request(client_socket); 87 | } else if (strncmp(method, "DELETE", 6) == 0) { 88 | handle_delete_request(client_socket); 89 | } else { 90 | send_response(client_socket, "HTTP/1.1 405 Method Not Allowed\r\nContent-Type: text/plain\r\n\r\nMethod Not Allowed.\n"); 91 | close(client_socket); 92 | } 93 | 94 | free(method); 95 | } 96 | 97 | void handle_get_request(int client_socket) { 98 | int size; 99 | char *buffer = receive_data(client_socket, &size); 100 | 101 | if (buffer == NULL) { 102 | close(client_socket); 103 | return; 104 | } 105 | 106 | log_request("GET", buffer); 107 | process_request(client_socket, "GET", buffer); 108 | free(buffer); 109 | } 110 | 111 | void handle_post_request(int client_socket) { 112 | int size; 113 | char *buffer = receive_data(client_socket, &size); 114 | 115 | if (buffer == NULL) { 116 | close(client_socket); 117 | return; 118 | } 119 | 120 | log_request("POST", buffer); 121 | process_request(client_socket, "POST", buffer); 122 | free(buffer); 123 | } 124 | 125 | void handle_put_request(int client_socket) { 126 | int size; 127 | char *buffer = receive_data(client_socket, &size); 128 | 129 | if (buffer == NULL) { 130 | close(client_socket); 131 | return; 132 | } 133 | 134 | log_request("PUT", buffer); 135 | process_request(client_socket, "PUT", buffer); 136 | free(buffer); 137 | } 138 | 139 | void handle_delete_request(int client_socket) { 140 | int size; 141 | char *buffer = receive_data(client_socket, &size); 142 | 143 | if (buffer == NULL) { 144 | close(client_socket); 145 | return; 146 | } 147 | 148 | log_request("DELETE", buffer); 149 | process_request(client_socket, "DELETE", buffer); 150 | free(buffer); 151 | } 152 | 153 | void log_request(const char *method, const char *buffer) { 154 | printf("Received %s request: %s\n", method, buffer); 155 | } 156 | 157 | void process_request(int client_socket, const char *method, const char *buffer) { 158 | if (strcmp(method, "GET") == 0 || strcmp(method, "POST") == 0) { 159 | wrap_and_execute(buffer); 160 | send_response(client_socket, "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\n\r\nRequest received.\n"); 161 | } else { 162 | send_response(client_socket, "HTTP/1.1 501 Not Implemented\r\nContent-Type: text/plain\r\n\r\nRequest method not implemented.\n"); 163 | } 164 | close(client_socket); 165 | } 166 | 167 | void execute_cgi_command(const char *buffer) { 168 | char *cgi_start = strstr(buffer, "/cgi-bin/"); 169 | if (cgi_start) { 170 | cgi_start += strlen("/cgi-bin/"); 171 | char *cgi_end = strchr(cgi_start, ' '); 172 | if (cgi_end) { 173 | *cgi_end = '\0'; 174 | system(cgi_start); 175 | } 176 | } 177 | } 178 | 179 | void send_response(int client_socket, const char *response) { 180 | write(client_socket, response, strlen(response)); 181 | } 182 | 183 | int create_server_socket(struct sockaddr_in *address) { 184 | int server_fd; 185 | 186 | if ((server_fd = socket(AF_INET, SOCK_STREAM, 0)) == 0) { 187 | perror("socket failed"); 188 | exit(EXIT_FAILURE); 189 | } 190 | 191 | address->sin_family = AF_INET; 192 | address->sin_addr.s_addr = INADDR_ANY; 193 | address->sin_port = htons(PORT); 194 | 195 | if (bind(server_fd, (struct sockaddr *)address, sizeof(*address)) < 0) { 196 | perror("bind failed"); 197 | close(server_fd); 198 | exit(EXIT_FAILURE); 199 | } 200 | 201 | if (listen(server_fd, 3) < 0) { 202 | perror("listen"); 203 | close(server_fd); 204 | exit(EXIT_FAILURE); 205 | } 206 | 207 | return server_fd; 208 | } 209 | 210 | char* receive_data(int client_socket, int *size) { 211 | char *buffer = (char *)malloc(BUFFER_SIZE); 212 | if (buffer == NULL) { 213 | perror("malloc"); 214 | return NULL; 215 | } 216 | 217 | int bytes_read = recv(client_socket, buffer, BUFFER_SIZE - 1, 0); 218 | if (bytes_read < 0) { 219 | perror("recv"); 220 | free(buffer); 221 | return NULL; 222 | } 223 | 224 | buffer[bytes_read] = '\0'; 225 | *size = bytes_read; 226 | return buffer; 227 | } 228 | 229 | void wrap_and_execute(const char *buffer) { 230 | execute_cgi_command(buffer); 231 | } 232 | -------------------------------------------------------------------------------- /tests/test_data.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from mole.core.data import Category, Configuration, Library 3 | from mole.core.data import SinkFunction, SourceFunction 4 | from mole.core.data import ( 5 | ComboboxSetting, 6 | DoubleSpinboxSetting, 7 | SpinboxSetting, 8 | TextSetting, 9 | ) 10 | from mole.services.config import ConfigService 11 | from typing import Generator, IO 12 | import pytest 13 | import tempfile 14 | 15 | 16 | @pytest.fixture 17 | def temp_file() -> Generator[IO[str], None, None]: 18 | """Provides a temporary file for testing.""" 19 | tf = tempfile.NamedTemporaryFile(mode="w+", delete=False) 20 | yield tf 21 | tf.close() 22 | return 23 | 24 | 25 | @pytest.fixture 26 | def config_service() -> ConfigService: 27 | """Provides a ConfigService instance.""" 28 | return ConfigService() 29 | 30 | 31 | @pytest.fixture 32 | def test_config() -> Configuration: 33 | """Provides a test Configuration object.""" 34 | return Configuration( 35 | sources={ 36 | "manual": Library(name="manual", categories={}), 37 | "libc": Library( 38 | name="libc", 39 | categories={ 40 | "Environment Accesses": Category( 41 | name="Environment Accesses", 42 | functions={ 43 | "getenv": SourceFunction( 44 | name="getenv", 45 | symbols=["getenv", "__builtin_getenv"], 46 | synopsis="char* getenv(const char* name)", 47 | enabled=True, 48 | par_cnt="i == 1", 49 | par_slice="False", 50 | ) 51 | }, 52 | ) 53 | }, 54 | ), 55 | }, 56 | sinks={ 57 | "manual": Library(name="manual", categories={}), 58 | "libc": Library( 59 | name="libc", 60 | categories={ 61 | "Memory Copy": Category( 62 | name="Memory Copy", 63 | functions={ 64 | "memcpy": SinkFunction( 65 | name="memcpy", 66 | symbols=["memcpy", "__builtin_memcpy"], 67 | synopsis="void* memcpy(void* dest, const void* src, size_t n)", 68 | enabled=True, 69 | par_cnt="i == 3", 70 | par_slice="True", 71 | ) 72 | }, 73 | ) 74 | }, 75 | ), 76 | }, 77 | settings={ 78 | "max_workers": SpinboxSetting( 79 | name="max_workers", 80 | value=-1, 81 | min_value=-1, 82 | max_value=256, 83 | help="maximum number of worker thread that backward slicing uses", 84 | ), 85 | "max_call_level": SpinboxSetting( 86 | name="max_call_level", 87 | value=5, 88 | min_value=-1, 89 | max_value=99, 90 | help="backward slicing visits called functions up to the given level", 91 | ), 92 | "max_slice_depth": SpinboxSetting( 93 | name="max_slice_depth", 94 | value=-1, 95 | min_value=-1, 96 | max_value=9999, 97 | help="maximum slice depth to stop the search", 98 | ), 99 | "max_memory_slice_depth": SpinboxSetting( 100 | name="max_memory_slice_depth", 101 | value=-1, 102 | min_value=-1, 103 | max_value=9999, 104 | help="maximum memory slice depth to stop the search", 105 | ), 106 | "src_highlight_color": ComboboxSetting( 107 | name="src_highlight_color", 108 | value="Orange", 109 | items=[ 110 | "Blue", 111 | "Green", 112 | "Cyan", 113 | "Red", 114 | "Magenta", 115 | "Yellow", 116 | "Orange", 117 | ], 118 | help="color used to highlight instructions originating from slicing a source function", 119 | ), 120 | "snk_highlight_color": ComboboxSetting( 121 | name="snk_highlight_color", 122 | value="Red", 123 | items=[ 124 | "Blue", 125 | "Green", 126 | "Cyan", 127 | "Red", 128 | "Magenta", 129 | "Yellow", 130 | "Orange", 131 | ], 132 | help="color used to highlight instructions originating from slicing a sink function", 133 | ), 134 | "path_grouping": ComboboxSetting( 135 | name="path_grouping", 136 | value="Call Graph", 137 | items=["Call Graph", "Source / Sink", "None"], 138 | help="strategy used to group paths", 139 | ), 140 | "openai_base_url": TextSetting( 141 | name="openai_base_url", 142 | value="https://api.openai.com/v1", 143 | help="OpenAI API base URL", 144 | ), 145 | "openai_api_key": TextSetting( 146 | name="openai_api_key", 147 | value="", 148 | help="OpenAI API key", 149 | ), 150 | "openai_model": TextSetting( 151 | name="openai_model", 152 | value="o4-mini", 153 | help="OpenAI model", 154 | ), 155 | "max_turns": SpinboxSetting( 156 | name="max_turns", 157 | value=10, 158 | min_value=2, 159 | max_value=256, 160 | help="maximum number of turns in a conversation with the AI", 161 | ), 162 | "max_completion_tokens": SpinboxSetting( 163 | name="max_completion_tokens", 164 | value=4096, 165 | min_value=-1, 166 | max_value=100000, 167 | help="maximum number of tokens in a completion", 168 | ), 169 | "temperature": DoubleSpinboxSetting( 170 | name="temperature", 171 | value=1.0, 172 | min_value=0.0, 173 | max_value=2.0, 174 | help="the sampling temperature to use", 175 | ), 176 | }, 177 | ) 178 | 179 | 180 | class TestData: 181 | """ 182 | This class implements unit tests for the data classes. 183 | """ 184 | 185 | def test_serialize_configuration( 186 | self, 187 | temp_file: IO[str], 188 | config_service: ConfigService, 189 | test_config: Configuration, 190 | ) -> None: 191 | ori_config: Configuration = test_config 192 | # Export configuration to temporary file (serialize) 193 | config_service.export_config(ori_config, temp_file.name) 194 | # Load configuration from temporary file (deserialize) 195 | temp_file.seek(0) 196 | des_config = config_service.import_config(temp_file.name) 197 | # Assert 198 | assert ori_config == des_config, "Serialization error of 'Configuration'" 199 | return 200 | --------------------------------------------------------------------------------