├── mole
    ├── __init__.py
    ├── common
    │   ├── __init__.py
    │   ├── helper
    │   │   ├── __init__.py
    │   │   ├── variable.py
    │   │   └── symbol.py
    │   ├── task.py
    │   ├── parse.py
    │   └── log.py
    ├── core
    │   ├── __init__.py
    │   └── ai.py
    ├── views
    │   ├── __init__.py
    │   └── sidebar.py
    ├── controllers
    │   ├── __init__.py
    │   └── ai.py
    ├── services
    │   └── __init__.py
    ├── resources
    │   └── icon.png
    ├── grouping
    │   ├── source_sink.py
    │   ├── call_graph.py
    │   └── __init__.py
    ├── models
    │   ├── __init__.py
    │   └── config.py
    ├── conf
    │   └── 001-settings.yml
    └── cli
    │   └── main.py
├── tests
    ├── __init__.py
    ├── slicing
    │   ├── __init__.py
    │   ├── test_serialization.py
    │   ├── test_pointer_deref.py
    │   ├── test_multithreading.py
    │   ├── test_mangling.py
    │   ├── test_function_out_params.py
    │   ├── test_simple_server.py
    │   ├── test_various.py
    │   ├── test_pointer.py
    │   ├── test_function_calling.py
    │   ├── test_object_oriented.py
    │   └── conftest.py
    ├── data
    │   ├── src
    │   │   ├── load-01.c
    │   │   ├── load-02.c
    │   │   ├── gets-01.c
    │   │   ├── name_mangling-03.cpp
    │   │   ├── function_calling-09.c
    │   │   ├── load-04.c
    │   │   ├── memcpy-11.c
    │   │   ├── pointer_analysis-11.c
    │   │   ├── name_mangling-04.cpp
    │   │   ├── pointer_analysis-14.c
    │   │   ├── gets-02.c
    │   │   ├── pointer_analysis-16.c
    │   │   ├── load-03.c
    │   │   ├── load-05.c
    │   │   ├── sscanf-01.c
    │   │   ├── function_out_params-06.c
    │   │   ├── pointer_analysis-01.c
    │   │   ├── pointer_analysis-07.c
    │   │   ├── memcpy-02.c
    │   │   ├── memcpy-03.c
    │   │   ├── memcpy-01.c
    │   │   ├── function_out_params-02.c
    │   │   ├── function_out_params-04.c
    │   │   ├── memcpy-06.c
    │   │   ├── pointer_analysis-02.c
    │   │   ├── memcpy-09.c
    │   │   ├── function_calling-07.c
    │   │   ├── pointer_analysis-17.c
    │   │   ├── function_calling-05.c
    │   │   ├── function_out_params-07.c
    │   │   ├── pointer_analysis-06.c
    │   │   ├── name_mangling-02.cpp
    │   │   ├── function_calling-08.c
    │   │   ├── function_calling-06.c
    │   │   ├── memcpy-08.c
    │   │   ├── name_mangling-01.cpp
    │   │   ├── function_calling-13.c
    │   │   ├── pointer_analysis-08.c
    │   │   ├── pointer_analysis-05.c
    │   │   ├── function_out_params-03.c
    │   │   ├── function_out_params-01.c
    │   │   ├── pointer_analysis-03.c
    │   │   ├── memcpy-04.c
    │   │   ├── function_out_params-05.c
    │   │   ├── function_out_params-08.c
    │   │   ├── pointer_analysis-09.c
    │   │   ├── memcpy-10.c
    │   │   ├── name_mangling-05.cpp
    │   │   ├── pointer_analysis-10.c
    │   │   ├── function_calling-11.c
    │   │   ├── memcpy-07.c
    │   │   ├── memcpy-05.c
    │   │   ├── name_mangling-06.cpp
    │   │   ├── pointer_analysis-04.c
    │   │   ├── function_calling-10.c
    │   │   ├── function_calling-14.c
    │   │   ├── function_calling-01.c
    │   │   ├── function_calling-12.c
    │   │   ├── function_calling-15.c
    │   │   ├── function_calling-02.c
    │   │   ├── pointer_analysis-15.c
    │   │   ├── function_calling-03.c
    │   │   ├── pointer_analysis-12.c
    │   │   ├── function_calling-04.c
    │   │   ├── object_oriented-01.cpp
    │   │   ├── object_oriented-02.cpp
    │   │   ├── pointer_analysis-13.c
    │   │   ├── object_oriented-03.cpp
    │   │   ├── simple_http_server-01.c
    │   │   ├── simple_http_server-02.c
    │   │   ├── simple_http_server-04.c
    │   │   └── simple_http_server-03.c
    │   └── Makefile
    ├── README.md
    └── test_data.py
├── development
    ├── debug.py
    ├── update_dependencies.py
    └── update_description.py
├── requirements.txt
├── .pre-commit-config.yaml
├── .gitignore
├── docs
    ├── 03-Customization.md
    ├── 01-Installation.md
    └── 04-Pointers.md
├── .github
    └── workflows
    │   └── release.yml
├── __init__.py
├── pyproject.toml
├── plugin.json
└── README.md


/mole/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mole/common/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mole/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mole/views/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mole/controllers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mole/services/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/slicing/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mole/common/helper/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mole/resources/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cyber-defence-campus/mole/HEAD/mole/resources/icon.png


--------------------------------------------------------------------------------
/development/debug.py:
--------------------------------------------------------------------------------
1 | from binaryninja import connect_vscode_debugger
2 | 
3 | 
4 | connect_vscode_debugger(port=5678)
5 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | PyYAML==6.0.2
2 | ijson==3.3.0
3 | lark==1.2.2
4 | networkx[default]==3.4.2
5 | openai >= 1.78.1 , <2.0.0
6 | pydantic>=2.0.0
7 | termcolor==2.4.0
8 | 


--------------------------------------------------------------------------------
/tests/data/src/load-01.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - MLIL_LOAD with HLIL constant pointer dereferencing source
 8 | */
 9 | 
10 | char* cmd;
11 | 
12 | __attribute__ ((noinline, optimize("O0")))
13 | int main(int argc, char *argv[]) {
14 |     cmd = getenv("CMD");
15 |     return system(cmd);
16 | }


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: local
 3 |     hooks:
 4 |       - id: check
 5 |         name: Check Code Issues
 6 |         entry: ruff check
 7 |         args: [ -q ]
 8 |         language: python
 9 |         files: .*\.py$
10 |   - repo: local
11 |     hooks:
12 |       - id: format
13 |         name: Format Code
14 |         entry: ruff format
15 |         language: python
16 |         files: .*\.py$


--------------------------------------------------------------------------------
/tests/data/src/load-02.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - MLIL_LOAD with HLIL variable dereferencing source
 8 | */
 9 | 
10 | __attribute__ ((noinline, optimize("O0")))
11 | int main(int argc, char *argv[]) {
12 |     char** my_array = (char**) malloc(1 * sizeof(char*));
13 |     my_array[0] = getenv("CMD");
14 |     return system(my_array[0]);
15 | }


--------------------------------------------------------------------------------
/tests/data/src/gets-01.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #define BUF_LEN 16
 5 | 
 6 | /*
 7 | Testcase Description:
 8 | - gets
 9 | */
10 | 
11 | char *gets(char *s);
12 | 
13 | int main(int argc, char *argv[]) {
14 |     char s[BUF_LEN];
15 | 
16 |     if(gets(s) == NULL) {
17 |         fprintf(stderr, "Could not read from STDIN.\n");
18 |         return EXIT_FAILURE;
19 |     }
20 | 
21 |     fprintf(stdout, "s: '%s'\n", s);
22 |     return EXIT_SUCCESS;
23 | }


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-Compiled / Optimized
 2 | __pycache__/
 3 | 
 4 | # Distribution / Packaging
 5 | build/
 6 | dist/
 7 | *.egg-info/
 8 | .gradle
 9 | 
10 | # Environments
11 | .venv/
12 | venv/
13 | 
14 | # Linting / Formatting
15 | .ruff_cache
16 | 
17 | # Visual Studio Code
18 | .vscode
19 | 
20 | # Custom YML Files
21 | mole/conf/*.yml
22 | mole/conf/*.yml.bak
23 | !mole/conf/001-settings.yml
24 | !mole/conf/003-libc.yml
25 | 
26 | # Test assets
27 | tests/data/bin/
28 | .coverage
29 | 


--------------------------------------------------------------------------------
/tests/data/src/name_mangling-03.cpp:
--------------------------------------------------------------------------------
 1 | #include <cstdlib>
 2 | 
 3 | /*
 4 | Testcase Description:
 5 | - Function in a namespace (C++ name mangling)
 6 | */
 7 | 
 8 | namespace ns
 9 | {
10 |     __attribute__ ((noinline, optimize("O0")))
11 |     int my_func(char *cmd) {
12 |         return system(cmd);
13 |     }
14 | }
15 | 
16 | int main(int argc, char *argv[]) {
17 |     char *cmd = getenv("CMD");
18 |     if(cmd != NULL) {
19 |         ns::my_func(cmd);
20 |     }
21 |     return EXIT_SUCCESS;
22 | }


--------------------------------------------------------------------------------
/tests/data/src/function_calling-09.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - Call function twice
 8 | */
 9 | 
10 | __attribute__ ((noinline))
11 | char* func(char* env) {
12 |     return env;
13 | }
14 | 
15 | __attribute__((optimize("O0")))
16 | int main(int argc, char *argv[]) {
17 |     char *env_cmd, *cmd;
18 |     env_cmd = getenv("ENV_CMD");
19 |     cmd = func(env_cmd);
20 |     cmd = func(cmd);
21 |     return system(cmd);
22 | }


--------------------------------------------------------------------------------
/tests/data/src/load-04.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - MLIL_LOAD with HLIL array indexing source
 8 | */
 9 | 
10 | #define BUF_SIZE 32
11 | 
12 | __attribute__ ((noinline, optimize("O1")))
13 | int main(int argc, char *argv[]) {
14 |     char cmd[BUF_SIZE];
15 |     argv[1] = getenv("FILE");
16 |     argv[2] = getenv("TERM");
17 |     snprintf(cmd, sizeof(cmd), "grep %s %s", argv[1], argv[2]);
18 |     return system(cmd);
19 | }


--------------------------------------------------------------------------------
/tests/data/src/memcpy-11.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - uncontrollable
 8 | */
 9 | 
10 | int main(int argc, char *argv[]) {
11 |     size_t dest_size = 16;
12 |     char dest[dest_size];
13 |     char* env_src = getenv("MEMCPY_SRC");
14 |     if(env_src != NULL) {
15 |         memcpy(dest, "IF", 3);
16 |         
17 |     } else {
18 |         memcpy(dest, "ELSE", 5);
19 |     }
20 |     fprintf(stdout, "dest: '%s'\n", dest);
21 |     return EXIT_SUCCESS;
22 | }


--------------------------------------------------------------------------------
/tests/data/src/pointer_analysis-11.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <stdio.h>
 3 | 
 4 | #define CMD_LEN 64
 5 | 
 6 | __attribute__ ((noinline, optimize("O0")))
 7 | char* validate(char* cmd) {
 8 |     if(cmd == NULL) cmd = "";
 9 |     return cmd;
10 | }
11 | 
12 | __attribute__ ((noinline))
13 | int execute(char* cmd) {
14 |     cmd = validate(cmd);
15 |     return system(cmd);
16 | }
17 | 
18 | int main(int argc, char *argv[]) {
19 |     char cmd[CMD_LEN];
20 |     char* env = getenv("CMD");
21 |     snprintf(cmd, CMD_LEN, "%s", env);
22 |     return execute(cmd);
23 | }


--------------------------------------------------------------------------------
/tests/data/src/name_mangling-04.cpp:
--------------------------------------------------------------------------------
 1 | #include <string>
 2 | 
 3 | /*
 4 | Testcase Description:
 5 | - Template instantiation (C++ name mangling)
 6 | */
 7 | 
 8 | template<typename T>
 9 | __attribute__ ((noinline, optimize("O0")))
10 | int my_func(char *cmd, T dummy) {
11 |     if(dummy) {
12 |         printf("[+] template function called with cmd='%s'\n", cmd);
13 |     }
14 |     return system(cmd);
15 | }
16 | 
17 | int main(int argc, char *argv[]) {
18 |     char *cmd = getenv("CMD");
19 |     if(cmd != NULL) {
20 |         my_func(cmd, 1);
21 |     }
22 |     return EXIT_SUCCESS;
23 | }


--------------------------------------------------------------------------------
/tests/data/src/pointer_analysis-14.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | #define CMD_LEN 64
 6 | 
 7 | /*
 8 | Testcase Description:
 9 | - pointer analysis
10 | - system with user-controllabel command
11 | */
12 | 
13 | int main() {
14 |     char cmd[CMD_LEN];
15 |     char *env_cmd = getenv("CMD");
16 |     if(env_cmd == NULL) {
17 |         printf("CMD not set.\n");
18 |         return EXIT_FAILURE;
19 |     }
20 |     snprintf(cmd, CMD_LEN, "%s", env_cmd);
21 |     int res = system(cmd);
22 |     printf("CMD: %s\n", cmd);
23 |     return res;
24 | }


--------------------------------------------------------------------------------
/tests/data/src/gets-02.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | #define BUF_LEN 16
 6 | 
 7 | /*
 8 | Testcase Description:
 9 | - gets with memcpy
10 | */
11 | 
12 | char *gets(char *s);
13 | 
14 | int main(int argc, char *argv[]) {
15 |     char dest[BUF_LEN];
16 |     char src[BUF_LEN];
17 | 
18 |     if(gets(src) == NULL) {
19 |         fprintf(stderr, "Could not read from STDIN.\n");
20 |         return EXIT_FAILURE;
21 |     }
22 | 
23 |     memcpy(dest, src, BUF_LEN);
24 |     fprintf(stdout, "dest: '%s'\n", dest);
25 |     return EXIT_SUCCESS;
26 | }


--------------------------------------------------------------------------------
/tests/data/src/pointer_analysis-16.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <string.h>
 3 | 
 4 | # define BUF_SIZE 64
 5 | 
 6 | /*
 7 | Testcase Description:
 8 | - Array element
 9 | */
10 | 
11 | __attribute__ ((noinline, optimize("O0")))
12 | int main(int argc, char * argv[])
13 | {
14 |     char data_buf[BUF_SIZE] = "ls";
15 |     char* data = &data_buf[0];
16 |     char* env_cmd = getenv("CMD");
17 |     int data_len = strlen(data);
18 |     if (env_cmd != NULL)
19 |     {
20 |         strncat(data+data_len, env_cmd, BUF_SIZE-data_len-1);
21 |     }
22 |     system(data);
23 |     return 0;
24 | }


--------------------------------------------------------------------------------
/tests/data/src/load-03.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - MLIL_LOAD with HLIL variable+offset dereferencing source
 8 | */
 9 | 
10 | #define BUF_SIZE 32
11 | 
12 | __attribute__ ((noinline, optimize("O0")))
13 | int main(int argc, char *argv[]) {
14 |     char cmd[BUF_SIZE];
15 |     char** my_array = (char**) malloc(3 * sizeof(char*));
16 |     my_array[1] = getenv("FILE");
17 |     my_array[2] = getenv("TERM");
18 |     snprintf(cmd, sizeof(cmd), "grep %s %s", my_array[1], my_array[2]);
19 |     return system(cmd);
20 | }


--------------------------------------------------------------------------------
/tests/data/src/load-05.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - MLIL_LOAD_STRUCT with HLIL field dereferencing source
 8 | */
 9 | 
10 | #define BUF_SIZE 32
11 | 
12 | char dest[BUF_SIZE];
13 | 
14 | typedef struct {
15 |     char* src;
16 |     int size;
17 | } MyStruct;
18 | 
19 | __attribute__ ((noinline, optimize("O0")))
20 | int main(int argc, char *argv[]) {
21 |     MyStruct s, *p = &s;
22 |     p->src = getenv("CMD");
23 |     p->size = atoi(getenv("SIZE"));
24 |     memcpy(dest, p->src, p->size);
25 |     return 0;
26 | }


--------------------------------------------------------------------------------
/tests/data/src/sscanf-01.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - sscanf
 8 | */
 9 | 
10 | int main(int argc, char *argv[]) {
11 |     int result, integer;
12 | 
13 |     char *env_input = getenv("SSCANF_STR");
14 |     if(env_input == NULL) {
15 |         fprintf(stderr, "SSCANF_STR environment variable not set.\n");
16 |         return EXIT_FAILURE;
17 |     }
18 | 
19 |     result = sscanf(env_input, "%d", &integer);
20 |     fprintf(stdout, "result: '%d', integer: '%d'\n", result, integer);
21 | 
22 |     return EXIT_SUCCESS;
23 | }


--------------------------------------------------------------------------------
/tests/data/src/function_out_params-06.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | /*
 5 | Testcase Description:
 6 | - Output parameter 1 (char**): not written and influence on the sink
 7 | */
 8 | 
 9 | __attribute__ ((noinline, optimize("O0")))
10 | int check_cmd(char** cmd){
11 |     if(*cmd != NULL){
12 |         return 0;
13 |     }
14 |     return -1;
15 | }
16 | 
17 | int main() {
18 |     char *cmd = getenv("CMD");
19 |     if (check_cmd(&cmd) == 0) {
20 |         system(cmd);
21 |     } else {
22 |         fprintf(stderr, "CMD environment variable not set.\n");
23 |     }
24 |     return 0;
25 | }


--------------------------------------------------------------------------------
/tests/data/src/pointer_analysis-01.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | #define CMD_LEN 64
 6 | 
 7 | /*
 8 | Testcase Description:
 9 | - pointer analysis
10 | - system with user-controllabel command
11 | */
12 | 
13 | int main(int argc, char *argv[]) {
14 |     char cmd[CMD_LEN];
15 | 
16 |     char *env_cmd = getenv("SYSTEM_COMMAND");
17 |     if(env_cmd == NULL) {
18 |         fprintf(stderr, "SYSTEM_COMMAND environment variable not set.\n");
19 |         return EXIT_FAILURE;
20 |     }
21 |     snprintf(cmd, CMD_LEN, "%s", env_cmd);
22 |     return system(cmd);
23 | }


--------------------------------------------------------------------------------
/tests/data/src/pointer_analysis-07.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | #define BUF_LEN 64
 6 | 
 7 | /*
 8 | Testcase Description:
 9 | - pointer analysis
10 | - memcpy with user-controllabel source
11 | */
12 | 
13 | char* src;
14 | 
15 | __attribute__((noinline))
16 | void my_getenv(char **env_src) {
17 |     *env_src = getenv("MEMCPY_SRC");
18 | }
19 | 
20 | int main(int argc, char *argv[]) {
21 |     char dest[BUF_LEN];
22 | 
23 |     my_getenv(&src);
24 |     memcpy(dest, src, BUF_LEN);
25 |     fprintf(stdout, "dest: %s\n", dest);
26 | 
27 |     return EXIT_SUCCESS;
28 | }


--------------------------------------------------------------------------------
/tests/data/src/memcpy-02.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - controllable source and size
 8 | */
 9 | 
10 | int main(int argc, char *argv[]) {
11 |     char dest[16];
12 | 
13 |     char* env_src = getenv("MEMCPY_SRC");
14 |     if(env_src == NULL) {
15 |         fprintf(stderr, "MEMCPY_SRC environment variable not set.\n");
16 |         return EXIT_FAILURE;
17 |     }
18 |     size_t n = strlen(env_src);
19 | 
20 |     memcpy(dest, env_src, n);
21 |     dest[n] = '\0';
22 |     fprintf(stdout, "dest: '%s'\n", dest);
23 | 
24 |     return EXIT_SUCCESS;
25 | }


--------------------------------------------------------------------------------
/tests/data/src/memcpy-03.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - controllable destination
 8 | */
 9 | 
10 | int main(int argc, char *argv[]) {
11 |     char src[] = "03-memcpy";
12 | 
13 |     char* env_dest = getenv("MEMCPY_DEST");
14 |     if(env_dest == NULL) {
15 |         fprintf(stderr, "MEMCPY_DEST environment variable not set.\n");
16 |         return EXIT_FAILURE;
17 |     }
18 |     size_t n = strlen(src);
19 | 
20 |     memcpy(env_dest, src, n);
21 |     env_dest[n] = '\0';
22 |     fprintf(stdout, "dest: '%s'\n", env_dest);
23 | 
24 |     return EXIT_SUCCESS;
25 | }


--------------------------------------------------------------------------------
/tests/data/src/memcpy-01.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - controllable size
 8 | */
 9 | 
10 | int main(int argc, char *argv[]) {
11 |     char dest[16];
12 |     char src[] = "memcpy-01";
13 | 
14 |     char* env_size = getenv("MEMCPY_SIZE");
15 |     if(env_size == NULL) {
16 |         fprintf(stderr, "MEMCPY_SIZE environment variable not set.\n");
17 |         return EXIT_FAILURE;
18 |     }
19 |     int n = atoi(env_size);
20 | 
21 |     memcpy(dest, src, n);
22 |     dest[n] = '\0';
23 |     fprintf(stdout, "dest: '%s'\n", dest);
24 | 
25 |     return EXIT_SUCCESS;
26 | }


--------------------------------------------------------------------------------
/tests/data/src/function_out_params-02.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | /*
 5 | Testcase Description:
 6 | - Output parameter 1 (char**): written and influence on the sink
 7 | */
 8 | 
 9 | __attribute__ ((noinline))
10 | int get_cmd(char **out_cmd){
11 |     char *env_cmd = getenv("CMD");
12 |     if (env_cmd != NULL) {
13 |         *out_cmd = env_cmd;
14 |         return 0;
15 |     }
16 |     return -1;
17 | }
18 | 
19 | int main() {
20 |     char *cmd = NULL;
21 |     if (get_cmd(&cmd) == 0) {
22 |         system(cmd);
23 |     } else {
24 |         fprintf(stderr, "CMD environment variable not set.\n");
25 |     }
26 |     return 0;
27 | }


--------------------------------------------------------------------------------
/tests/data/src/function_out_params-04.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | /*
 5 | Testcase Description:
 6 | - Function with output parameter (char**)
 7 | */
 8 | 
 9 | typedef struct {
10 |     char* cmd;
11 | } MyStruct;
12 | 
13 | __attribute__ ((noinline))
14 | int get_cmd(MyStruct *s){
15 |     char *env_cmd = getenv("CMD");
16 |     if (env_cmd != NULL) {
17 |         s->cmd = env_cmd;
18 |         return 0;
19 |     }
20 |     return -1;
21 | }
22 | 
23 | int main() {
24 |     MyStruct s;
25 |     if (get_cmd(&s) == 0) {
26 |         system(s.cmd);
27 |     } else {
28 |         fprintf(stderr, "CMD environment variable not set.\n");
29 |     }
30 |     return 0;
31 | }


--------------------------------------------------------------------------------
/tests/data/src/memcpy-06.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - uncontrollable source
 8 | */
 9 | 
10 | int main(int argc, char *argv[]) {
11 |     char dest[16];
12 |     char src[] = "01-memcpy";
13 |     size_t n = strlen(src);
14 | 
15 |     char *env_select = getenv("MEMCPY_SELECT");
16 |     if(env_select == NULL) {
17 |         fprintf(stderr, "MEMCPY_SELECT environment variable not set.\n");
18 |         return EXIT_FAILURE;
19 |     }
20 |     env_select = src;
21 | 
22 |     memcpy(dest, env_select, n);
23 |     dest[n] = '\0';
24 |     fprintf(stdout, "dest: '%s'\n", dest);
25 | 
26 |     return EXIT_SUCCESS;
27 | }


--------------------------------------------------------------------------------
/tests/data/src/pointer_analysis-02.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | #define CMD_LEN 64
 6 | 
 7 | /*
 8 | Testcase Description:
 9 | - pointer analysis
10 | - system with user-controllabel command
11 | */
12 | 
13 | int main(int argc, char *argv[]) {
14 |     char cmd[CMD_LEN];
15 |     
16 |     char *env_cmd = getenv("SYSTEM_COMMAND");
17 |     if(env_cmd == NULL) {
18 |         fprintf(stderr, "SYSTEM_COMMAND environment variable not set.\n");
19 |         return EXIT_FAILURE;
20 |     }
21 |     snprintf(cmd, CMD_LEN, "%s", env_cmd);
22 |     snprintf(cmd, CMD_LEN, "%d", 1337);
23 |     snprintf(cmd, CMD_LEN, "%d", 31337);
24 |     return system(cmd);
25 | }


--------------------------------------------------------------------------------
/mole/common/helper/variable.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | import binaryninja as bn
 3 | 
 4 | 
 5 | class VariableHelper:
 6 |     """
 7 |     This class provides helper functions with respect to variables.
 8 |     """
 9 | 
10 |     @staticmethod
11 |     def get_var_info(var: bn.Variable) -> str:
12 |         """
13 |         This method returns a string with information about the variable `var`.
14 |         """
15 |         return f"{var.name}"
16 | 
17 |     @staticmethod
18 |     def get_ssavar_info(var: bn.SSAVariable) -> str:
19 |         """
20 |         This method returns a string with information about the SSA variable `var`.
21 |         """
22 |         return f"{var.name}#{var.version}"
23 | 


--------------------------------------------------------------------------------
/tests/data/src/memcpy-09.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - uncontrollable
 8 | */
 9 | 
10 | int main(int argc, char *argv[]) {
11 |     char dest[16];
12 |     char src[] = "01-memcpy";
13 | 
14 |     char* env_unused = getenv("MEMCPY_UNUSED");
15 |     if(env_unused == NULL) {
16 |         fprintf(stderr, "MEMCPY_UNUSED environment variable not set.\n");
17 |         return EXIT_FAILURE;
18 |     }
19 |     fprintf(stdout, "unused: '%s'\n", env_unused);
20 | 
21 |     size_t n = strlen(src);
22 |     memcpy(dest, src, n);
23 |     dest[n] = '\0';
24 |     fprintf(stdout, "dest: '%s'\n", dest);
25 | 
26 |     return EXIT_SUCCESS;
27 | }


--------------------------------------------------------------------------------
/tests/data/src/function_calling-07.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - allow function inlining
 8 | - do not follow all function parameters blindly (negative)
 9 | */
10 | 
11 | char* func(char* env) {
12 |     char *cmd = (char *) malloc(4);
13 |     cmd[0] = 'l';
14 |     cmd[1] = 's';
15 |     cmd[2] = '\0';
16 |     cmd[3] = '\0';
17 |     fprintf(stdout, "--- FUN ---\n");
18 |     fprintf(stdout, "env: '%s'\ncmd: '%s'\n", env, cmd);
19 |     return cmd;
20 | }
21 | 
22 | int main(int argc, char *argv[]) {
23 |     char *env, *cmd;
24 |     env = getenv("CMD");
25 |     cmd = func(env);
26 |     system(cmd);
27 |     return EXIT_SUCCESS;
28 | }


--------------------------------------------------------------------------------
/tests/data/src/pointer_analysis-17.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <string.h>
 3 | #include <sys/socket.h>
 4 | 
 5 | #define BUF_SIZE 1024
 6 | 
 7 | /*
 8 | Testcase Description:
 9 | - pointer analysis
10 | */
11 | 
12 | struct MyStruct {
13 |     int sock_fd;
14 |     char buf[BUF_SIZE];
15 | };
16 | 
17 | __attribute__ ((noinline, optimize("O0")))
18 | int main(int argc, char * argv[]) {
19 |     int n;
20 |     char buf[BUF_SIZE];
21 |     struct MyStruct* s;
22 | 
23 |     s = malloc(sizeof(struct MyStruct));
24 |     s->sock_fd = socket(AF_INET, SOCK_STREAM, 0);
25 | 
26 |     n = recv(s->sock_fd, &buf, BUF_SIZE, 0);
27 |     memcpy(&s->buf, &buf, n);
28 |     system((const char*) &s->buf);
29 | 
30 |     return 0;
31 | }


--------------------------------------------------------------------------------
/tests/data/src/function_calling-05.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - allow function inlining
 8 | - do not follow all function parameters blindly (positive)
 9 | */
10 | 
11 | char* func(char* env) {
12 |     size_t len = strlen(env);
13 |     char *cmd = (char *) malloc(len+1);
14 |     for(int i = 0; i<len; i++) {
15 |         cmd[i] = env[i];
16 |     }
17 |     cmd[len] = '\0';
18 |     fprintf(stdout, "env: '%s'\ncmd: '%s'\n", env, cmd);
19 |     return cmd;
20 | }
21 | 
22 | int main(int argc, char *argv[]) {
23 |     char *env, *cmd;
24 |     env = getenv("CMD");
25 |     cmd = func(env);
26 |     system(cmd);
27 |     return EXIT_SUCCESS;
28 | }


--------------------------------------------------------------------------------
/tests/data/src/function_out_params-07.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | /*
 5 | Testcase Description:
 6 | - Output parameter 1 (char*) : written but no influence on the sink
 7 | - Output parameter 2 (char**): not written and influence on the sink
 8 | */
 9 | 
10 | __attribute__ ((noinline, optimize("O0")))
11 | int check_cmd(char* msg, char** cmd){
12 |     if(*cmd != NULL){
13 |         return 0;
14 |     }
15 |     msg = getenv("MSG");
16 |     if(msg != NULL){
17 |         printf("%s!\n", msg);
18 |     }
19 |     return -1;
20 | }
21 | 
22 | int main() {
23 |     char *msg = NULL;
24 |     char *cmd = getenv("CMD");
25 |     if (check_cmd(msg, &cmd) == 0) {
26 |         system(cmd);
27 |     }
28 |     return 0;
29 | }


--------------------------------------------------------------------------------
/tests/data/src/pointer_analysis-06.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | #define BUF_LEN 64
 6 | 
 7 | /*
 8 | Testcase Description:
 9 | - pointer analysis
10 | - memcpy size is user-controllable
11 | */
12 | 
13 | __attribute__((noinline))
14 | void modify_n(int *n) {
15 |     char* env_n = getenv("MEMCPY_SIZE");
16 |     if(env_n != NULL) {
17 |         *n = atoi(env_n);
18 |     }
19 | }
20 | 
21 | int main(int argc, char *argv[]) {
22 |     char dest[BUF_LEN], src[] = "pointer_analysis";
23 |     int n, *n_ptr;
24 | 
25 |     n = BUF_LEN;
26 |     n_ptr = &n;
27 |     
28 |     modify_n(n_ptr);
29 |     memcpy(dest, src, n);
30 |     fprintf(stdout, "n: '%d'\n", n);
31 | 
32 |     return EXIT_SUCCESS;
33 | }


--------------------------------------------------------------------------------
/tests/data/src/name_mangling-02.cpp:
--------------------------------------------------------------------------------
 1 | #include <cstdlib>
 2 | 
 3 | /*
 4 | Testcase Description:
 5 | - Member function of a class (C++ name mangling)
 6 | */
 7 | 
 8 | struct MyStruct {
 9 |     __attribute__ ((noinline, optimize("O0")))
10 |     int my_func(char *cmd) {
11 |         return system(cmd);
12 |     }
13 | };
14 | 
15 | class MyClass {
16 |     public:
17 |     __attribute__ ((noinline, optimize("O0")))
18 |     int my_func(char *cmd) {
19 |         return system(cmd);
20 |     }
21 | };
22 | 
23 | int main(int argc, char *argv[]) {
24 |     char *cmd = getenv("CMD");
25 |     if(cmd != NULL) {
26 |         MyStruct s;
27 |         s.my_func(cmd);
28 |         MyClass c;
29 |         c.my_func(cmd);
30 |     }
31 |     return EXIT_SUCCESS;
32 | }


--------------------------------------------------------------------------------
/tests/data/src/function_calling-08.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - disallow function inlining
 8 | - do not follow all function parameters blindly (negative)
 9 | */
10 | 
11 | __attribute__ ((noinline))
12 | char* func(char* env) {
13 |     char *cmd = (char *) malloc(4);
14 |     cmd[0] = 'l';
15 |     cmd[1] = 's';
16 |     cmd[2] = '\0';
17 |     cmd[3] = '\0';
18 |     fprintf(stdout, "--- FUN ---\n");
19 |     fprintf(stdout, "env: '%s'\ncmd: '%s'\n", env, cmd);
20 |     return cmd;
21 | }
22 | 
23 | int main(int argc, char *argv[]) {
24 |     char *env, *cmd;
25 |     env = getenv("CMD");
26 |     cmd = func(env);
27 |     system(cmd);
28 |     return EXIT_SUCCESS;
29 | }


--------------------------------------------------------------------------------
/tests/data/src/function_calling-06.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - disallow function inlining
 8 | - do not follow all function parameters blindly (positive)
 9 | */
10 | 
11 | __attribute__ ((noinline))
12 | char* func(char* env) {
13 |     size_t len = strlen(env);
14 |     char *cmd = (char *) malloc(len+1);
15 |     for(int i = 0; i<len; i++) {
16 |         cmd[i] = env[i];
17 |     }
18 |     cmd[len] = '\0';
19 |     fprintf(stdout, "env: '%s'\ncmd: '%s'\n", env, cmd);
20 |     return cmd;
21 | }
22 | 
23 | int main(int argc, char *argv[]) {
24 |     char *env, *cmd;
25 |     env = getenv("CMD");
26 |     cmd = func(env);
27 |     system(cmd);
28 |     return EXIT_SUCCESS;
29 | }


--------------------------------------------------------------------------------
/tests/data/src/memcpy-08.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - controllable size
 8 | - non-reachable
 9 | */
10 | 
11 | volatile int always_false = 0;
12 | 
13 | int main(int argc, char *argv[]) {
14 |     char dest[16];
15 |     char src[] = "01-memcpy";
16 | 
17 |     char* env_size = getenv("MEMCPY_SIZE");
18 |     if(env_size == NULL) {
19 |         fprintf(stderr, "MEMCPY_SIZE environment variable not set.\n");
20 |         return EXIT_FAILURE;
21 |     }
22 |     int n = atoi(env_size);
23 | 
24 |     if(always_false) {
25 |         memcpy(dest, src, n);
26 |         dest[n] = '\0';
27 |         fprintf(stdout, "dest: '%s'\n", dest);
28 |     }
29 | 
30 |     return EXIT_SUCCESS;
31 | }


--------------------------------------------------------------------------------
/tests/data/src/name_mangling-01.cpp:
--------------------------------------------------------------------------------
 1 | #include <cstdlib>
 2 | #include <cstdio>
 3 | 
 4 | /*
 5 | Testcase Description:
 6 | - Function with overloading (C++ name mangling)
 7 | */
 8 | 
 9 | __attribute__ ((noinline, optimize("O0")))
10 | int overloaded_func(char *cmd) {
11 |     return system(cmd);
12 | }
13 | 
14 | __attribute__ ((noinline, optimize("O0")))
15 | int overloaded_func(char *cmd, int debug) {
16 |     if(debug) {
17 |         printf("[+] overloaded_func called with cmd='%s'\n", cmd);
18 |     }
19 |     return system(cmd);
20 | }
21 | 
22 | 
23 | int main(int argc, char *argv[]) {
24 |     char *cmd = getenv("CMD");
25 |     if(cmd != NULL) {
26 |         overloaded_func(cmd);
27 |         overloaded_func(cmd, 1);
28 |     }
29 |     return EXIT_SUCCESS;
30 | }


--------------------------------------------------------------------------------
/tests/data/src/function_calling-13.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - disallow function inlining
 8 | - with tail calls
 9 | - direct recursion
10 | */
11 | 
12 | __attribute__ ((noinline, optimize("O0")))
13 | int system_1(char *cmd, int* cnt) {
14 |     if(*cnt <= 0) {
15 |         return system(cmd);
16 |     } else {
17 |         (*cnt)--;
18 |         return system_1(cmd, cnt);
19 |     }
20 | }
21 | 
22 | int main(int argc, char *argv[]) {
23 |     int cnt = argc;
24 |     char *env_cmd = getenv("CMD");
25 |     if(env_cmd == NULL) {
26 |         fprintf(stderr, "CMD environment variable not set.\n");
27 |         return EXIT_FAILURE;
28 |     }
29 |     return system_1(env_cmd, &cnt);
30 | }


--------------------------------------------------------------------------------
/tests/data/src/pointer_analysis-08.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | #define BUF_LEN 64
 6 | 
 7 | /*
 8 | Testcase Description:
 9 | - pointer analysis
10 | - memcpy with user-controllabel source
11 | */
12 | 
13 | char* src;
14 | 
15 | __attribute__((noinline))
16 | void dummy(int *value) {
17 |     *value = 0;
18 | }
19 | 
20 | __attribute__((noinline))
21 | void my_getenv(char **env_src) {
22 |     *env_src = getenv("MEMCPY_SRC");
23 | }
24 | 
25 | int main(int argc, char *argv[]) {
26 |     int value = 1;
27 |     char dest[BUF_LEN];
28 | 
29 |     my_getenv(&src);
30 |     dummy(&value);
31 |     memcpy(dest, src, BUF_LEN);
32 |     fprintf(stdout, "dest: %s, value: %d\n", dest, value);
33 | 
34 |     return EXIT_SUCCESS;
35 | }


--------------------------------------------------------------------------------
/tests/data/src/pointer_analysis-05.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | #define BUF_LEN 64
 6 | 
 7 | /*
 8 | Testcase Description:
 9 | - pointer analysis
10 | - memcpy size is not user-controllable
11 | */
12 | 
13 | __attribute__((noinline))
14 | void modify_n(int *n) {
15 |     *n = BUF_LEN;
16 | }
17 | 
18 | int main(int argc, char *argv[]) {
19 |     char dest[BUF_LEN], src[] = "pointer_analysis";
20 |     int n, *n_ptr;
21 | 
22 |     n = BUF_LEN;
23 |     n_ptr = &n;
24 | 
25 |     char* env_n = getenv("MEMCPY_SIZE");
26 |     if(env_n != NULL) {
27 |         n = atoi(env_n);
28 |         modify_n(n_ptr);
29 |         memcpy(dest, src, n);
30 |         fprintf(stdout, "n: '%d'\n", n);
31 |     }
32 | 
33 |     return EXIT_SUCCESS;
34 | }


--------------------------------------------------------------------------------
/tests/data/src/function_out_params-03.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | /*
 5 | Testcase Description:
 6 | - Output parameter 1 (char**): written and influence on the sink
 7 | */
 8 | 
 9 | __attribute__ ((noinline, optimize("O0")))
10 | int get_cmd(char **out_cmd){
11 |     char* env_cmd = getenv("CMD");
12 |     char** out_cmd_cpy = out_cmd;
13 |     if (env_cmd != NULL) {
14 |         *out_cmd_cpy = env_cmd;
15 |         return 0;
16 |     }
17 |     *out_cmd_cpy = "Test";
18 |     printf("%s\n", *out_cmd_cpy);
19 |     return -1;
20 | }
21 | 
22 | int main() {
23 |     char *cmd = NULL;
24 |     if (get_cmd(&cmd) == 0) {
25 |         system(cmd);
26 |     } else {
27 |         fprintf(stderr, "CMD environment variable not set.\n");
28 |     }
29 |     return 0;
30 | }


--------------------------------------------------------------------------------
/tests/data/src/function_out_params-01.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | #define BUF_SIZE 8
 6 | 
 7 | /*
 8 | Testcase Description:
 9 | - Output parameter 1 (int*): written and influence on the sink
10 | */
11 | 
12 | char dest[BUF_SIZE];
13 | char src[] = "Hello, World!";
14 | 
15 | __attribute__ ((noinline))
16 | int get_size(int* size){
17 |     char* env_size = getenv("SIZE");
18 |     if(env_size != NULL) {
19 |         *size = atoi(env_size);
20 |         return 0;
21 |     }
22 |     return -1;
23 | }
24 | 
25 | int main() {
26 |     int size = 0;
27 |     if (get_size(&size) == 0) {
28 |         memcpy(dest, src, size);
29 |     } else {
30 |         fprintf(stderr, "SIZE environment variable not set.\n");
31 |     }
32 |     return 0;
33 | }


--------------------------------------------------------------------------------
/tests/data/src/pointer_analysis-03.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | #define CMD_LEN 64
 6 | 
 7 | /*
 8 | Testcase Description:
 9 | - pointer analysis
10 | - system with user-controllabel command
11 | */
12 | 
13 | int main(int argc, char *argv[]) {
14 |     char cmd[CMD_LEN];
15 |     
16 |     char *env_cmd = getenv("SYSTEM_COMMAND");
17 |     if(env_cmd == NULL) {
18 |         fprintf(stderr, "SYSTEM_COMMAND environment variable not set.\n");
19 |         return EXIT_FAILURE;
20 |     }
21 |     snprintf(cmd, CMD_LEN, "%s", env_cmd);
22 |     snprintf(cmd, CMD_LEN, "%s %d", cmd, 31337);
23 |     snprintf(cmd, CMD_LEN, "%s %s %d", cmd, cmd, 31337);
24 |     snprintf(cmd, CMD_LEN, "%s %s %d", cmd, cmd, cmd, 31337);
25 |     return system(cmd);
26 | }


--------------------------------------------------------------------------------
/tests/data/src/memcpy-04.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - controllable size
 8 | - source in function without inlining
 9 | */
10 | 
11 | __attribute__ ((noinline)) char* my_getenv(const char* name) {
12 |     return getenv(name);
13 | }
14 | 
15 | int main(int argc, char *argv[]) {
16 |     char dest[16];
17 |     char src[] = "01-memcpy";
18 | 
19 |     char* env_size = my_getenv("MEMCPY_SIZE");
20 |     if(env_size == NULL) {
21 |         fprintf(stderr, "MEMCPY_SIZE environment variable not set.\n");
22 |         return EXIT_FAILURE;
23 |     }
24 |     int n = atoi(env_size);
25 | 
26 |     memcpy(dest, src, n);
27 |     dest[n] = '\0';
28 |     fprintf(stdout, "dest: '%s'\n", dest);
29 | 
30 |     return EXIT_SUCCESS;
31 | }


--------------------------------------------------------------------------------
/tests/data/src/function_out_params-05.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | /*
 5 | Testcase Description:
 6 | - Output parameter 1 (char**): written but no influence on the sink
 7 | - Output parameter 2 (char**): written and influence on the sink
 8 | */
 9 | 
10 | __attribute__ ((noinline))
11 | int get_cmd(char **out_msg, char **out_cmd){
12 |     char *env_cmd = getenv("CMD");
13 |     if (env_cmd != NULL) {
14 |         *out_cmd = env_cmd;
15 |         return 0;
16 |     }
17 |     *out_msg = getenv("MSG");
18 |     return -1;
19 | }
20 | 
21 | int main() {
22 |     char *msg = NULL;
23 |     char *cmd = NULL;
24 |     if (get_cmd(&msg, &cmd) == 0) {
25 |         system(cmd);
26 |     } else {
27 |         fprintf(stderr, "CMD environment variable not set.\n");
28 |     }
29 |     return 0;
30 | }


--------------------------------------------------------------------------------
/tests/data/src/function_out_params-08.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | /*
 5 | Testcase Description:
 6 | - Output parameter 1 (char**): written in a callee
 7 | */
 8 | 
 9 | __attribute__ ((noinline, optimize("O0")))
10 | void get_cmd(int debug, char** cmd){
11 |     *cmd = getenv("CMD");
12 |     if(debug) {
13 |         printf("[DEBUG] cmd='%s'\n", *cmd);
14 |     }
15 |     return;
16 | }
17 | 
18 | __attribute__ ((noinline, optimize("O0")))
19 | int check_cmd(char** cmd){
20 |     get_cmd(1, cmd);
21 |     if(*cmd != NULL){
22 |         return 0;
23 |     }
24 |     return -1;
25 | }
26 | 
27 | int main() {
28 |     char *cmd = NULL;
29 |     if (check_cmd(&cmd) == 0) {
30 |         system(cmd);
31 |     } else {
32 |         fprintf(stderr, "CMD environment variable not set.\n");
33 |     }
34 |     return 0;
35 | }


--------------------------------------------------------------------------------
/tests/data/src/pointer_analysis-09.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | #define CMD_LEN 64
 6 | 
 7 | /*
 8 | Testcase Description:
 9 | - pointer analysis
10 | - system with user-controllabel command
11 | */
12 | 
13 | __attribute__((noinline))
14 | void dummy(int *value) {
15 |     *value = 0;
16 | }
17 | 
18 | int main(int argc, char *argv[]) {
19 |     int value = 1;
20 |     char cmd[CMD_LEN];
21 | 
22 |     char *env_cmd = getenv("SYSTEM_COMMAND");
23 |     if(env_cmd == NULL) {
24 |         fprintf(stderr, "SYSTEM_COMMAND environment variable not set.\n");
25 |         return EXIT_FAILURE;
26 |     }
27 |     snprintf(cmd, CMD_LEN, "%s", env_cmd);
28 |     dummy(&value);
29 |     system(cmd);
30 |     fprintf(stdout, "cmd: %s, value: %d\n", cmd, value);
31 | 
32 |     return EXIT_SUCCESS;
33 | }


--------------------------------------------------------------------------------
/tests/data/src/memcpy-10.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - controllable source and size
 8 | - unexploitable do to validation
 9 | */
10 | 
11 | int main(int argc, char *argv[]) {
12 |     size_t dest_size = 16;
13 |     char dest[dest_size];
14 |     char* env_src = getenv("MEMCPY_SRC");
15 |     if(env_src == NULL) {
16 |         fprintf(stderr, "MEMCPY_SRC environment variable not set.\n");
17 |         return EXIT_FAILURE;
18 |     }
19 |     size_t src_size = strlen(env_src);
20 |     if(src_size >= dest_size) {
21 |         fprintf(stderr, "MEMCPY_SRC size >= %zu\n.", dest_size);
22 |         return EXIT_FAILURE;
23 |     }
24 |     memcpy(dest, env_src, src_size);
25 |     dest[src_size] = '\0';
26 |     fprintf(stdout, "dest: '%s'\n", dest);
27 |     return EXIT_SUCCESS;
28 | }


--------------------------------------------------------------------------------
/tests/slicing/test_serialization.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | from mole.core.data import Path
 3 | from tests.slicing.conftest import TestSlicing
 4 | from typing import List
 5 | import binaryninja as bn
 6 | 
 7 | 
 8 | class TestSerialization(TestSlicing):
 9 |     def test_serialization_01(
10 |         self, filenames: List[str] = ["function_calling-02"]
11 |     ) -> None:
12 |         for file in self.load_files(filenames):
13 |             # Load and analyze test binary with Binary Ninja
14 |             bv = bn.load(file)
15 |             bv.update_analysis_and_wait()
16 |             # Analyze test binary
17 |             paths = self.get_paths(bv)
18 |             # Assert results
19 |             for path in paths:
20 |                 assert path == Path.from_dict(bv, path.to_dict()), "serialization"
21 |             bv.file.close()
22 |         return
23 | 


--------------------------------------------------------------------------------
/tests/data/src/name_mangling-05.cpp:
--------------------------------------------------------------------------------
 1 | #include <cstdlib>
 2 | 
 3 | /*
 4 | Testcase Description:
 5 | - Static data member of a class (C++ name mangling)
 6 | */
 7 | 
 8 | struct MyStruct {
 9 |     static char* cmd;
10 | 
11 |     __attribute__ ((noinline, optimize("O0")))
12 |     static int my_func() {
13 |         return system(cmd);
14 |     }
15 | };
16 | char* MyStruct::cmd = getenv("CMD");
17 | 
18 | class MyClass {
19 |     public:
20 |     static char* cmd;
21 | 
22 |     __attribute__ ((noinline, optimize("O0")))
23 |     static int my_func() {
24 |         return system(cmd);
25 |     }
26 | };
27 | char* MyClass::cmd = getenv("CMD");
28 | 
29 | int main(int argc, char *argv[]) {
30 |     if(MyStruct::cmd != NULL) {
31 |         MyStruct::my_func();
32 |     }
33 |     if(MyClass::cmd != NULL) {
34 |         MyClass::my_func();
35 |     }
36 |     return EXIT_SUCCESS;
37 | }


--------------------------------------------------------------------------------
/tests/data/src/pointer_analysis-10.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | #define CMD_LEN 64
 6 | 
 7 | /*
 8 | Testcase Description:
 9 | - pointer analysis
10 | - system with user-controllabel command
11 | */
12 | 
13 | __attribute__((noinline))
14 | void dummy(int *value) {
15 |     *value = 0;
16 | }
17 | 
18 | int main(int argc, char *argv[]) {
19 |     int value = 1;
20 |     char cmd[CMD_LEN];
21 | 
22 |     char *env_cmd = getenv("SYSTEM_COMMAND");
23 |     if(env_cmd == NULL) {
24 |         fprintf(stderr, "SYSTEM_COMMAND environment variable not set.\n");
25 |         return EXIT_FAILURE;
26 |     }
27 |     for(int i=0; i<10; i++) {
28 |         snprintf(cmd, CMD_LEN, "%s %i", env_cmd, i);
29 |         dummy(&value);
30 |     }
31 |     system(cmd);
32 |     fprintf(stdout, "cmd: %s, value: %d\n", cmd, value);
33 | 
34 |     return EXIT_SUCCESS;
35 | }


--------------------------------------------------------------------------------
/tests/data/src/function_calling-11.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - disallow function inlining
 8 | - with tail calls
 9 | - direct recursion
10 | */
11 | 
12 | __attribute__ ((noinline, optimize("O0")))
13 | char* getenv_2() {
14 |     return getenv("CMD");
15 | }
16 | 
17 | __attribute__ ((noinline, optimize("O0")))
18 | char* getenv_1(int* cnt) {
19 |     char* cmd = NULL;
20 |     if(*cnt > 0) {
21 |         (*cnt)--;
22 |         cmd = getenv_1(cnt);
23 |     } else{
24 |         cmd = getenv_2();
25 |     }
26 |     return cmd;
27 | }
28 | 
29 | int main(int argc, char *argv[]) {
30 |     int cnt = argc;
31 |     char *env_cmd = getenv_1(&cnt);
32 |     if(env_cmd == NULL) {
33 |         fprintf(stderr, "CMD environment variable not set.\n");
34 |         return EXIT_FAILURE;
35 |     }
36 |     return system(env_cmd);
37 | }


--------------------------------------------------------------------------------
/tests/data/src/memcpy-07.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - controllable source and size
 8 | - copying source
 9 | */
10 | 
11 | int main() {
12 |     char dest[16];
13 |     
14 |     char* env_src = getenv("MEMCPY_SRC");
15 |     if(env_src == NULL) {
16 |         fprintf(stderr, "MEMCPY_SRC environment variable not set.\n");
17 |         return EXIT_FAILURE;
18 |     }
19 | 
20 |     size_t size = strlen(env_src);
21 |     char* buf = (char*) malloc(size + 1);
22 |     if(buf == NULL) {
23 |         fprintf(stderr, "Failed to allocate memory.\n");
24 |         return EXIT_FAILURE;
25 |     }
26 | 
27 |     for(size_t i=0; i<size; i++) {
28 |         buf[i] = env_src[i];
29 |     }
30 |     buf[size] = '\0';
31 | 
32 |     memcpy(dest, buf, size+1);
33 |     fprintf(stdout, "dest: '%s'\n", dest);
34 |     free(buf);
35 | 
36 |     return EXIT_SUCCESS;
37 | }
38 | 


--------------------------------------------------------------------------------
/tests/data/src/memcpy-05.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - controllable source and size
 8 | - source in function without inlining
 9 | */
10 | 
11 | __attribute__ ((noinline, optimize("O0")))
12 | char* my_getenv(const char* name) {
13 |     char *env = getenv(name);
14 |     if(env == NULL) {
15 |         return NULL;
16 |     }
17 |     for(size_t i=0; i<strlen(env); i++) {
18 |         env[i] += 1;
19 |     }
20 |     return env;
21 | }
22 | 
23 | int main(int argc, char *argv[]) {
24 |     char dest[16];
25 | 
26 |     char* env_src = my_getenv("MEMCPY_SRC");
27 |     if(env_src == NULL) {
28 |         fprintf(stderr, "MEMCPY_SRC environment variable not set.\n");
29 |         return EXIT_FAILURE;
30 |     }
31 |     size_t n = strlen(env_src);
32 | 
33 |     memcpy(dest, env_src, n);
34 |     dest[n] = '\0';
35 |     fprintf(stdout, "dest: '%s'\n", dest);
36 | 
37 |     return EXIT_SUCCESS;
38 | }


--------------------------------------------------------------------------------
/tests/data/src/name_mangling-06.cpp:
--------------------------------------------------------------------------------
 1 | #include <string>
 2 | 
 3 | /*
 4 | Testcase Description:
 5 | - Operator overloading (C++ name mangling)
 6 | */
 7 | 
 8 | struct MyStruct {
 9 |     std::string cmd;
10 | 
11 |     MyStruct(const std::string& cmd) : cmd(cmd) {}
12 | 
13 |     MyStruct operator+(const MyStruct &other) {
14 |         char *env_cmd = getenv("CMD");
15 |         if(env_cmd != NULL) {
16 |             this->cmd = std::string(this->cmd + ";" + std::string(env_cmd) + ";" + other.cmd);
17 |         } else {
18 |             this->cmd = std::string(this->cmd + ";" + other.cmd);
19 |         }
20 |         return *this;
21 |     }
22 | 
23 |     __attribute__ ((noinline, optimize("O0")))
24 |     int my_func() {
25 |         return system(this->cmd.c_str());
26 |     }
27 | };
28 | 
29 | int main(int argc, char *argv[]) {
30 |     MyStruct s1("echo '>>'");
31 |     MyStruct s2("echo '<<'");
32 |     MyStruct s3 = s1 + s2;
33 |     s3.my_func();
34 |     return EXIT_SUCCESS;
35 | }


--------------------------------------------------------------------------------
/tests/data/src/pointer_analysis-04.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | #define CMD_LEN 64
 6 | 
 7 | /*
 8 | Testcase Description:
 9 | - pointer analysis
10 | - system with user-controllabel command
11 | */
12 | 
13 | int main(int argc, char *argv[]) {
14 |     char cmd[CMD_LEN];
15 |     
16 |     char *env_cmd_str = getenv("SYSTEM_COMMAND_STR");
17 |     if(env_cmd_str == NULL) {
18 |         fprintf(stderr, "SYSTEM_COMMAND_STR environment variable not set.\n");
19 |         return EXIT_FAILURE;
20 |     }
21 |     char *env_cmd_int = getenv("SYSTEM_COMMAND_INT");
22 |     if(env_cmd_int == NULL) {
23 |         fprintf(stderr, "SYSTEM_COMMAND_INT environment variable not set.\n");
24 |         return EXIT_FAILURE;
25 |     }
26 |     size_t value = atoi(env_cmd_int);
27 |     
28 |     snprintf(cmd, CMD_LEN, "%s", env_cmd_str);
29 |     size_t *ptr_value = &value;
30 |     snprintf(cmd, CMD_LEN, "%s %d", cmd, *ptr_value);
31 |     return system(cmd);
32 | }


--------------------------------------------------------------------------------
/tests/data/src/function_calling-10.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - disallow function inlining
 8 | - with tail calls
 9 | */
10 | 
11 | __attribute__ ((noinline))
12 | char* getenv_2(char *cmd) {
13 |     return getenv(cmd);
14 | }
15 | 
16 | __attribute__ ((noinline))
17 | char* getenv_1() {
18 |     return getenv_2("CMD");
19 | }
20 | 
21 | __attribute__ ((noinline))
22 | char* system_3(char *cmd) {
23 |     return cmd;
24 | }
25 | 
26 | __attribute__ ((noinline))
27 | char* system_2(char *cmd) {
28 |     return system_3(cmd);
29 | }
30 | 
31 | __attribute__ ((noinline))
32 | int system_1(char *cmd) {
33 |     cmd = system_2(cmd);
34 |     return system(cmd);
35 | }
36 | 
37 | int main(int argc, char *argv[]) {
38 |     char *env_cmd = getenv_1();
39 |     if(env_cmd == NULL) {
40 |         fprintf(stderr, "CMD environment variable not set.\n");
41 |         return EXIT_FAILURE;
42 |     }
43 |     system_1(env_cmd);
44 |     return EXIT_SUCCESS;
45 | }


--------------------------------------------------------------------------------
/docs/03-Customization.md:
--------------------------------------------------------------------------------
 1 | # Customization
 2 | This section documents extension points within *Mole* that allow users to customize and extend its analysis and UI behavior.
 3 | ## Path Grouping Strategy
 4 | To implement a custom path grouping strategy, follow these steps:
 5 | 1. Create a new subclass of [`PathGrouper`](../mole/grouping/__init__.py#L17) within the [grouping](../mole/grouping/) package.
 6 | 2. There is no need to register the strategy manually - its name will be detected dynamically.
 7 | 3. Define a key tuple with the following fields:
 8 |   - `display_name`: A string shown to users in the tree view.
 9 |   - `internal_id`: A unique identifier for the group.
10 |   - `level`: Specifies the group's depth in the tree view hierarchy.
11 | 
12 | **Note**: You can also inherit from existing strategies. For an example, see [`CallgraphPathGrouper`](../mole/grouping/call_graph.py#L10).
13 | 
14 | ----------------------------------------------------------------------------------------------------
15 | [Back-To-README](../README.md#documentation)


--------------------------------------------------------------------------------
/mole/grouping/source_sink.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module implements a source / sink grouping strategy.
 3 | """
 4 | 
 5 | from __future__ import annotations
 6 | from mole.core.data import Path
 7 | from mole.grouping import PathGrouper
 8 | from typing import List, Tuple
 9 | 
10 | 
11 | class SourceSinkPathGrouper(PathGrouper):
12 |     """
13 |     This class implements a strategy that groups by source and sink symbols.
14 |     """
15 | 
16 |     def get_group_keys(self, path: Path, *args, **kwargs) -> List[Tuple[str, str, int]]:
17 |         """
18 |         This method groups paths by source and sink symbols.
19 |         """
20 |         return [
21 |             (f"Source: {path.src_sym_name:s}", path.src_sym_name, 0),
22 |             (
23 |                 f"Sink: {path.snk_sym_name:s}",
24 |                 f"{path.src_sym_name:s}:{path.snk_sym_name:s}",
25 |                 1,
26 |             ),
27 |         ]
28 | 
29 |     def get_strategy_name(self) -> str:
30 |         """
31 |         This method returns the name of this grouping strategy.
32 |         """
33 |         return "Source / Sink"
34 | 


--------------------------------------------------------------------------------
/tests/data/src/function_calling-14.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - disallow function inlining
 8 | - with tail calls
 9 | - indirect recursion
10 | */
11 | 
12 | int system_3(char*, int*);
13 | int system_2(char*, int*);
14 | int system_1(char*, int*);
15 | 
16 | __attribute__ ((noinline, optimize("O0")))
17 | int system_3(char* cmd, int* cnt) {
18 |     if(*cnt <= 0) {
19 |         return system(cmd);
20 |     } else {
21 |         (*cnt)--;
22 |         return system_1(cmd, cnt);
23 |     }
24 | }
25 | 
26 | __attribute__ ((noinline, optimize("O0")))
27 | int system_2(char* cmd, int* cnt) {
28 |     return system_3(cmd, cnt);
29 | }
30 | 
31 | __attribute__ ((noinline, optimize("O0")))
32 | int system_1(char* cmd, int* cnt) {
33 |     return system_2(cmd, cnt);
34 | }
35 | 
36 | int main(int argc, char *argv[]) {
37 |     int cnt = argc;
38 |     char *env_cmd = getenv("CMD");
39 |     if(env_cmd == NULL) {
40 |         fprintf(stderr, "CMD environment variable not set.\n");
41 |         return EXIT_FAILURE;
42 |     }
43 |     return system_1(env_cmd, &cnt);
44 | }


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Publish Release
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - 'v*.*.*'
 7 | 
 8 | jobs:
 9 |   release:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - name: Checkout Repository
13 |         uses: actions/checkout@v4
14 |       
15 |       - name: Set up Python
16 |         uses: actions/setup-python@v5
17 |         with:
18 |           python-version: '3.x'
19 |       
20 |       - name: Install Dependencies
21 |         run: |
22 |           python -m pip install --upgrade pip
23 |           # python -m pip install setuptools wheel
24 |           pip install .
25 |       
26 |       - name: Build Package
27 |         run: |
28 |           python -m pip install --upgrade build
29 |           python -m build
30 |       
31 |       - name: Create GitHub Release
32 |         uses: ncipollo/release-action@v1
33 |         with:
34 |           tag: ${{ github.ref }}
35 |           name: Release ${{ github.ref }}
36 |           body: |
37 |             New release for tag ${{ github.ref }}
38 |           generateReleaseNotes: true
39 |           draft: false
40 |           prerelease: false
41 |           artifactErrorsFailBuild: true
42 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | import sys
 3 | 
 4 | 
 5 | # Load components only when not being run by `pytest`
 6 | if "pytest" not in sys.modules:
 7 |     from mole.controllers.ai import AiController
 8 |     from mole.controllers.config import ConfigController
 9 |     from mole.controllers.path import PathController
10 |     from mole.models.config import ConfigModel
11 |     from mole.services.config import ConfigService
12 |     from mole.views.ai import AiView
13 |     from mole.views.config import ConfigView
14 |     from mole.views.path import PathView
15 |     from mole.views.sidebar import MoleSidebar
16 | 
17 |     # Services
18 |     config_service = ConfigService()
19 | 
20 |     # Models
21 |     config_model = ConfigModel(config_service.load_config())
22 | 
23 |     # Views
24 |     config_view = ConfigView()
25 |     ai_view = AiView()
26 |     path_view = PathView()
27 | 
28 |     # Controllers
29 |     config_ctr = ConfigController(config_service, config_model, config_view)
30 |     ai_ctr = AiController(ai_view, config_ctr)
31 |     path_ctr = PathController(path_view, config_ctr, ai_ctr)
32 | 
33 |     # Initialize sidebar in Binary Ninja
34 |     sidebar = MoleSidebar(path_view)
35 |     sidebar.init()
36 | 


--------------------------------------------------------------------------------
/tests/data/src/function_calling-01.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - allow function inlining
 8 | - with tail calls
 9 | */
10 | 
11 | char* getenv_2(char *cmd) {
12 |     return getenv(cmd);
13 | }
14 | 
15 | char* getenv_1a() {
16 |     return getenv_2("SYSTEM_COMMAND_1a");
17 | }
18 | 
19 | char* getenv_1b() {
20 |     return getenv_2("SYSTEM_COMMAND_1b");
21 | }
22 | 
23 | char* getenv_1c() {
24 |     return getenv("NOT_DANGEROUS");
25 | }
26 | 
27 | int system_2(char *cmd) {
28 |     return system(cmd);
29 | }
30 | 
31 | int system_1a(char *cmd) {
32 |     return system_2(cmd);
33 | }
34 | 
35 | int system_1b(char *cmd) {
36 |     return system_2(cmd);
37 | }
38 | 
39 | int main(int argc, char *argv[]) {
40 |     char *env_cmd;
41 | 
42 |     if(argc <= 1) {
43 |         env_cmd = getenv_1a();
44 |     } else {
45 |         env_cmd = getenv_1b();
46 |     }
47 |     fprintf(stdout, "main: '%s'\n", env_cmd);
48 |     if(env_cmd == NULL) {
49 |         fprintf(stderr, "SYSTEM_COMMAND environment variable not set.\n");
50 |         return EXIT_FAILURE;
51 |     }
52 |     system_1a(env_cmd);
53 |     system_1b("whoami");
54 |     getenv_1c();
55 |     return EXIT_SUCCESS;
56 | }


--------------------------------------------------------------------------------
/tests/data/src/function_calling-12.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - disallow function inlining
 8 | - with tail calls
 9 | - indirect recursion
10 | */
11 | 
12 | char* getenv_4();
13 | char* getenv_3(int* cnt);
14 | char* getenv_2(int* cnt);
15 | char* getenv_1(int* cnt);
16 | 
17 | __attribute__ ((noinline, optimize("O0")))
18 | char* getenv_4() {
19 |     return getenv("CMD");
20 | }
21 | 
22 | __attribute__ ((noinline, optimize("O0")))
23 | char* getenv_3(int* cnt) {
24 |     char* cmd = NULL;
25 |     if(*cnt > 0) {
26 |         (*cnt)--;
27 |         return getenv_1(cnt);
28 |     } else {
29 |         cmd = getenv_4();
30 |     }
31 |     return cmd;
32 | }
33 | 
34 | __attribute__ ((noinline, optimize("O0")))
35 | char* getenv_2(int* cnt) {
36 |     return getenv_3(cnt);
37 | }
38 | 
39 | __attribute__ ((noinline, optimize("O0")))
40 | char* getenv_1(int* cnt) {
41 |     return getenv_2(cnt);
42 | }
43 | 
44 | int main(int argc, char *argv[]) {
45 |     int cnt = argc;
46 |     char *env_cmd = getenv_1(&cnt);
47 |     if(env_cmd == NULL) {
48 |         fprintf(stderr, "CMD environment variable not set.\n");
49 |         return EXIT_FAILURE;
50 |     }
51 |     return system(env_cmd);
52 | }


--------------------------------------------------------------------------------
/tests/data/Makefile:
--------------------------------------------------------------------------------
 1 | # Compilation settings (cross-compile with `CC=arm-linux-gcc CXX=arm-linux-g++ EXT=.linux-armv7 make`)
 2 | CC      	?= gcc
 3 | CXX			?= g++
 4 | CFLAGS  	?= -Wall -O3 -g -fno-builtin -U_FORTIFY_SOURCE
 5 | CXXFLAGS	?= -Wall -O3 -g -fno-builtin -U_FORTIFY_SOURCE
 6 | EXT     	?=
 7 | 
 8 | # Directories
 9 | SRC_DIR := src
10 | BIN_DIR := bin
11 | 
12 | # Files
13 | C_SRC_FILES   := $(wildcard $(SRC_DIR)/*.c)
14 | C_BIN_FILES   := $(patsubst $(SRC_DIR)/%.c, $(BIN_DIR)/%$(EXT), $(C_SRC_FILES))
15 | CPP_SRC_FILES := $(wildcard $(SRC_DIR)/*.cpp)
16 | CPP_BIN_FILES := $(patsubst $(SRC_DIR)/%.cpp, $(BIN_DIR)/%$(EXT), $(CPP_SRC_FILES))
17 | 
18 | # Ensure directory bin/ exists
19 | $(shell mkdir -p bin)
20 | 
21 | # Default target
22 | all: $(C_BIN_FILES) $(CPP_BIN_FILES)
23 | 
24 | # Compile .c files
25 | $(BIN_DIR)/%$(EXT): $(SRC_DIR)/%.c
26 | 	$(CC) $(CFLAGS) -o $@ $<
27 | 
28 | # Compile .cpp files
29 | $(BIN_DIR)/%$(EXT): $(SRC_DIR)/%.cpp
30 | 	$(CXX) $(CXXFLAGS) -o $@ $<
31 | 
32 | # Remove binaries
33 | clean:
34 | 	rm -rf $(BIN_DIR)
35 | 
36 | # Compile a single program
37 | $(notdir $(basename $(C_SRC_FILES) $(CPP_SRC_FILES))): %: $(BIN_DIR)/%$(EXT)
38 | 
39 | # Prevent make from confusing targets with a file names
40 | .PHONY: all $(notdir $(basename $(C_SRC_FILES) $(CPP_SRC_FILES))) clean


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=42", "wheel"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [tool.setuptools.packages.find]
 6 | where = ["."]
 7 | include = ["mole*"]
 8 | 
 9 | [tool.setuptools.package-data]
10 | "mole" = ["conf/*", "resources/*"]
11 | 
12 | [project]
13 | name = "mole"
14 | version = "0.5.1"
15 | description = "A Binary Ninja plugin to identify interesting paths using static backward slicing"
16 | authors = [
17 |     {name = "Damian Pfammatter"},
18 |     {name = "Sergio Paganoni"}
19 | ]
20 | requires-python = ">=3.10"
21 | dependencies = [
22 |     "ijson==3.3.0",
23 |     "lark==1.2.2",
24 |     "networkx[default]==3.4.2",
25 |     "PyYAML==6.0.2",
26 |     "termcolor==2.4.0",
27 |     "pydantic>=2.0.0",
28 |     "openai >= 1.78.1 , <2.0.0"
29 | ]
30 | 
31 | [project.optional-dependencies]
32 | develop = [
33 |     "debugpy==1.8.1",
34 |     "pre_commit==4.2.0",
35 |     "pytest==8.3.4",
36 |     "pytest-cov==7.0.0",
37 |     "ruff==0.9.9",
38 |     "tomli==2.2.1"
39 | ]
40 | 
41 | [project.scripts]
42 | mole = "mole.cli.main:main"
43 | 
44 | [tool.ruff.lint]
45 | extend-select = ["TID"]
46 | 
47 | [tool.pytest.ini_options]
48 | testpaths = ["tests"]
49 | python_files = ["test_*.py"]
50 | python_classes = ["Test*"]
51 | python_functions = ["test_*"]
52 | pythonpath = ["."]


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
 1 | # Tests
 2 | This directory contains the test suite for the *Mole* project.
 3 | ## Build Test Binaries
 4 | Before running the tests, you must first built the test binaries. Navigate to the tests data directory:
 5 | ```bash
 6 | cd tests/data/
 7 | ```
 8 | Then, compile all test binaries:
 9 | ```bash
10 | make all
11 | ```
12 | To cross-compile the test binaries for a specific target architecture, specify the appropriate compilers. You may also define a custom file extension to distinguish the resulting binaries. For example:
13 | ```bash
14 | CC=arm-linux-gcc CXX=arm-linux-g++ EXT=.linux-armv7 make all
15 | ```
16 | ## Run Tests
17 | In the following commands, we assume you are in the *Mole* project’s root directory:
18 | #### Run All Tests
19 | ```bash
20 | pytest
21 | ```
22 | #### Run Specific Test File
23 | ```bash
24 | pytest tests/test_data.py
25 | pytest tests/slicing/test_pointer.py
26 | ```
27 | #### Run Specific Test Class or Method
28 | ```bash
29 | pytest tests/slicing/test_pointer.py::TestPointerAnalysis
30 | pytest tests/slicing/test_pointer.py::TestPointerAnalysis::test_pointer_analysis_01
31 | ```
32 | #### Run Tests for a Specific Architecture
33 | To run tests only on binaries built for a specific architecture, set the `EXT` parameter to match the extension used during compilation:
34 | ```bash
35 | EXT=".linux-armv7" pytest
36 | ```


--------------------------------------------------------------------------------
/tests/data/src/function_calling-15.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - marking function parameters
 8 | */
 9 | 
10 | __attribute__ ((noinline, optimize("O0")))
11 | char* getenv_2(char *cmd, int debug) {
12 |     if (debug) {
13 |         printf("[DEBUG] getenv_2 called with cmd='%s'\n", cmd);
14 |     }
15 |     return getenv(cmd);
16 | }
17 | 
18 | __attribute__ ((noinline, optimize("O0")))
19 | char* getenv_1(char* cmd, int debug) {
20 |     if (debug) {
21 |         printf("[DEBUG] getenv_1");
22 |     }
23 |     return getenv_2(cmd, debug);
24 | }
25 | 
26 | __attribute__ ((noinline, optimize("O0")))
27 | int system_2(char *cmd, int debug) {
28 |     if(debug) {
29 |         printf("[DEBUG] system_2");
30 |     }
31 |     return system(cmd);
32 | }
33 | 
34 | __attribute__ ((noinline, optimize("O0")))
35 | int system_1(char *cmd, int debug) {
36 |     if(debug) {
37 |         printf("[DEBUG] system_1");
38 |     }
39 |     return system_2(cmd, debug);
40 | }
41 | 
42 | int main(int argc, char *argv[]) {
43 |     char *env_cmd;
44 |     int debug = 1;
45 | 
46 |     env_cmd = getenv_1("CMD", debug);
47 |     if(env_cmd == NULL) {
48 |         fprintf(stderr, "CMD environment variable not set.\n");
49 |         return EXIT_FAILURE;
50 |     }
51 | 
52 |     system_1(env_cmd, debug);
53 |     return EXIT_SUCCESS;
54 | }


--------------------------------------------------------------------------------
/mole/grouping/call_graph.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module implements a call graph grouping strategy.
 3 | """
 4 | 
 5 | from __future__ import annotations
 6 | from mole.core.data import Path
 7 | from mole.grouping.source_sink import SourceSinkPathGrouper
 8 | 
 9 | 
10 | class CallgraphPathGrouper(SourceSinkPathGrouper):
11 |     """
12 |     This class implements a strategy that groups by source and sink symbols, as well as call graphs.
13 |     """
14 | 
15 |     def get_group_keys(self, path: Path, *args, **kwargs):
16 |         """
17 |         This method groups paths by source and sink symbols, as well as call graphs.
18 |         """
19 |         max_calls = kwargs.get("max_calls", 6)
20 |         calls = [call[0].source_function.symbol.short_name for call in path.calls]
21 |         if len(calls) > max_calls:
22 |             calls = calls[: int(max_calls / 2)] + ["..."] + calls[int(-max_calls / 2) :]
23 |         calls = " - ".join(reversed(calls))
24 |         keys = super().get_group_keys(path, *args, **kwargs)
25 |         keys.append(
26 |             (
27 |                 f"Calls: {calls:s}",
28 |                 f"{path.src_sym_name:s}:{path.snk_sym_name}:{calls:s}",
29 |                 2,
30 |             )
31 |         )
32 |         return keys
33 | 
34 |     def get_strategy_name(self) -> str:
35 |         """
36 |         This method returns the name of this grouping strategy.
37 |         """
38 |         return "Call Graph"
39 | 


--------------------------------------------------------------------------------
/tests/slicing/test_pointer_deref.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | from tests.slicing.conftest import TestSlicing
 3 | from typing import List
 4 | 
 5 | 
 6 | class TestPointerDeref(TestSlicing):
 7 |     def test_load_01(self, filenames: List[str] = ["load-01"]) -> None:
 8 |         self.assert_paths(
 9 |             srcs=[("getenv", None)],
10 |             snks=[("system", 1)],
11 |             call_chains=[["main"]],
12 |             filenames=filenames,
13 |         )
14 |         return
15 | 
16 |     def test_load_02(self, filenames: List[str] = ["load-02"]) -> None:
17 |         self.test_load_01(filenames=filenames)
18 |         return
19 | 
20 |     def test_load_03(self, filenames: List[str] = ["load-03"]) -> None:
21 |         self.assert_paths(
22 |             srcs=[("getenv", None)],
23 |             snks=[("system", 1)],
24 |             call_chains=[["main"], ["main"]],
25 |             filenames=filenames,
26 |         )
27 |         return
28 | 
29 |     def test_load_04(self, filenames: List[str] = ["load-04"]) -> None:
30 |         self.test_load_03(filenames=filenames)
31 |         return
32 | 
33 |     def test_load_05(self, filenames: List[str] = ["load-05"]) -> None:
34 |         self.assert_paths(
35 |             srcs=[("getenv", None)],
36 |             snks=[("memcpy", 2), ("memcpy", 3)],
37 |             call_chains=[["main"], ["main"]],
38 |             filenames=filenames,
39 |         )
40 |         return
41 | 


--------------------------------------------------------------------------------
/mole/common/task.py:
--------------------------------------------------------------------------------
 1 | from mole.common.log import log
 2 | from typing import Any, Callable, Optional, Tuple
 3 | import binaryninja as bn
 4 | 
 5 | 
 6 | tag = "Mole.Task"
 7 | 
 8 | 
 9 | class BackgroundTask(bn.BackgroundTaskThread):
10 |     """
11 |     This class implements a general background task.
12 |     """
13 | 
14 |     def __init__(
15 |         self,
16 |         initial_progress_text: str = "",
17 |         can_cancel: bool = False,
18 |         run: Optional[Callable[..., Any]] = None,
19 |         *args: Any,
20 |         **kwargs: Any,
21 |     ) -> None:
22 |         """
23 |         This method initializes the background task.
24 |         """
25 |         super().__init__(initial_progress_text, can_cancel)
26 |         self._run = run
27 |         self._args: Tuple[Any, ...] = args
28 |         self._kwargs: dict[str, Any] = kwargs
29 |         self._results: Any = None
30 |         return
31 | 
32 |     def run(self) -> None:
33 |         """
34 |         This method runs the background task.
35 |         """
36 |         log.info(tag, "Starting background task")
37 |         if self._run:
38 |             self._results = self._run(*self._args, **self._kwargs)
39 |         log.info(tag, "Background task completed")
40 |         return
41 | 
42 |     def results(self) -> Any:
43 |         """
44 |         This method waits for the background task to complete and returns its results.
45 |         """
46 |         self.join()
47 |         return self._results
48 | 


--------------------------------------------------------------------------------
/tests/data/src/function_calling-02.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - disallow function inlining
 8 | - with tail calls
 9 | */
10 | 
11 | __attribute__ ((noinline))
12 | char* getenv_2(char *cmd) {
13 |     return getenv(cmd);
14 | }
15 | 
16 | __attribute__ ((noinline))
17 | char* getenv_1a() {
18 |     return getenv_2("SYSTEM_COMMAND_1a");
19 | }
20 | 
21 | __attribute__ ((noinline))
22 | char* getenv_1b() {
23 |     return getenv_2("SYSTEM_COMMAND_1b");
24 | }
25 | 
26 | __attribute__ ((noinline))
27 | char* getenv_1c() {
28 |     return getenv("NOT_DANGEROUS");
29 | }
30 | 
31 | __attribute__ ((noinline))
32 | int system_2(char *cmd) {
33 |     return system(cmd);
34 | }
35 | 
36 | __attribute__ ((noinline))
37 | int system_1a(char *cmd) {
38 |     return system_2(cmd);
39 | }
40 | 
41 | __attribute__ ((noinline))
42 | int system_1b(char *cmd) {
43 |     return system_2(cmd);
44 | }
45 | 
46 | int main(int argc, char *argv[]) {
47 |     char *env_cmd;
48 | 
49 |     if(argc <= 1) {
50 |         env_cmd = getenv_1a();
51 |     } else {
52 |         env_cmd = getenv_1b();
53 |     }
54 |     fprintf(stdout, "main: '%s'\n", env_cmd);
55 |     if(env_cmd == NULL) {
56 |         fprintf(stderr, "SYSTEM_COMMAND environment variable not set.\n");
57 |         return EXIT_FAILURE;
58 |     }
59 |     system_1a(env_cmd);
60 |     system_1b("whoami");
61 |     getenv_1c();
62 |     return EXIT_SUCCESS;
63 | }


--------------------------------------------------------------------------------
/mole/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | from enum import Enum
 3 | from typing import List
 4 | 
 5 | 
 6 | class IndexedLabeledEnum(Enum):
 7 |     def __new__(cls, index: int, label: str = "") -> IndexedLabeledEnum:
 8 |         obj = object.__new__(cls)
 9 |         obj._value_ = index
10 |         obj._label = label
11 |         return obj
12 | 
13 |     @property
14 |     def index(self) -> int:
15 |         return self._value_
16 | 
17 |     @property
18 |     def label(self) -> str:
19 |         return self._label
20 | 
21 |     @classmethod
22 |     def indexes(cls: IndexedLabeledEnum) -> List[int]:
23 |         """
24 |         This method returns a list with the enum members' indexes.
25 |         """
26 |         return [member._value_ for member in cls]
27 | 
28 |     @classmethod
29 |     def labels(cls: IndexedLabeledEnum) -> List[str]:
30 |         """
31 |         This method returns a list with the enum members' labels.
32 |         """
33 |         return [member._label for member in cls]
34 | 
35 |     def __str__(self) -> str:
36 |         return self._label
37 | 
38 |     def __lt__(self, other: object) -> bool:
39 |         if isinstance(other, IndexedLabeledEnum):
40 |             return self._value_ < other._value_
41 |         return NotImplemented
42 | 
43 |     def __eq__(self, other: object) -> bool:
44 |         if isinstance(other, IndexedLabeledEnum):
45 |             return self._value_ == other._value_
46 |         return NotImplemented
47 | 
48 |     def __hash__(self) -> int:
49 |         return hash(self._value)
50 | 


--------------------------------------------------------------------------------
/tests/data/src/pointer_analysis-15.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | #include <unistd.h>
 5 | 
 6 | #define BUF_SIZE 16
 7 | 
 8 | /*
 9 | Testcase Description:
10 | - getopt param ends up in a call to strcpy
11 | 
12 | Testcase Analysis (linux-x86_64):
13 | - Backward slicing ends in:
14 |   ```
15 |   0x4010f5 src#3 = [0x404020] (MediumLevelILSetVarSsa)
16 |   0x4010f5 [0x404020] (MediumLevelILImport)
17 |   ```
18 | - This corresponds to the source code line:
19 |   ```
20 |   src = optarg;
21 |   ```
22 | - `optarg` is an (external) global variable defined in the `.bss` section:
23 |   ```
24 |   .bss (NOBITS) section started  {0x404020-0x404030}
25 |   00404020  char* optarg = 0x0
26 |   ```
27 | - Our current **pointer analysis** implementation is not able to track global variables. More specifically for the listed example, slicing does not enter `getopt`, since it does not explicitely get `optarg` as a function parameter.
28 | */
29 | 
30 | // External global variable defined in libc
31 | extern char* optarg;
32 | 
33 | int main(int argc, char *argv[]) {
34 |     int opt;
35 |     char dest[BUF_SIZE];
36 |     char *src = NULL;
37 |     
38 |     // Parse command-line options
39 |     while ((opt = getopt(argc, argv, "s:")) != -1) {
40 |         switch (opt) {
41 |             case 's':
42 |                 src = optarg;
43 |                 break;
44 |             default:
45 |                 return EXIT_FAILURE;
46 |         }
47 |     }
48 |     // Copy src to dest
49 |     if (src != NULL) {
50 |         strcpy(dest, src);
51 |         printf("dest: '%s'\n", dest);
52 |     }
53 |     return EXIT_SUCCESS;
54 | }


--------------------------------------------------------------------------------
/docs/01-Installation.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | In the following, we assume that the variables `$BINJA_BIN` and `$BINJA_USR` point to your *Binary Ninja*'s [binary path](https://docs.binary.ninja/guide/index.html#binary-path) and [user folder](https://docs.binary.ninja/guide/index.html#user-folder), respectively. Use the following steps to install *Mole*:
 3 | 
 4 | - Clone the plugin to your *Binary Ninja*'s user folder:
 5 |   ```shell
 6 |   cd $BINJA_USR/plugins/
 7 |   git clone https://github.com/pdamian/mole.git mole-plugin && cd mole-plugin/
 8 |   ```
 9 | - Create and activate a new Python virtual environment for *Mole* (optional, but recommended):
10 |   ```shell
11 |   python3 -m venv venv/mole
12 |   source venv/mole/bin/activate
13 |   ```
14 | - Install *Binary Ninja*'s Python [API](https://docs.binary.ninja/dev/batch.html#install-the-api):
15 |   ```shell
16 |   (mole)$ python $BINJA_BIN/scripts/install_api.py
17 |   ```
18 | - Install *Mole* either in standard or development mode:
19 |   ```shell
20 |   # Standard
21 |   (mole)$ pip install .
22 | 
23 |   # Development
24 |   #   WARNING:
25 |   #   When installed using the -e (editable) flag with pip, Binary Ninja must be launched from
26 |   #   within the activated virtual environment (mole)
27 |   (mole)$ pip install -e .[develop]
28 |   (mole)$ pre-commit install
29 |   (mole)$ $BINJA_BIN/binaryninja &
30 |   ```
31 | - If you are using a virtual environment, consider configuring the corresponding `site-packages` directory in *Binary Ninja*'s settings.
32 | ----------------------------------------------------------------------------------------------------
33 | [Back-To-README](../README.md#documentation)


--------------------------------------------------------------------------------
/tests/data/src/function_calling-03.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - allow function inlining
 8 | - without tail calls
 9 | */
10 | 
11 | char* getenv_2(char *cmd) {
12 |     char *env_cmd = getenv(cmd);
13 |     fprintf(stdout, "getenv_2: '%s'\n", env_cmd);
14 |     return env_cmd;
15 | }
16 | 
17 | char* getenv_1a() {
18 |     char *env_cmd = getenv_2("SYSTEM_COMMAND_1a");
19 |     fprintf(stdout, "getenv_1a: '%s'\n", env_cmd);
20 |     return env_cmd;
21 | }
22 | 
23 | char* getenv_1b() {
24 |     char *env_cmd = getenv_2("SYSTEM_COMMAND_1b");
25 |     fprintf(stdout, "getenv_1b: '%s'\n", env_cmd);
26 |     return env_cmd;
27 | }
28 | 
29 | char* getenv_1c() {
30 |     char *env_cmd = getenv("NOT_DANGEROUS");
31 |     fprintf(stdout, "getenv_1c: '%s'\n", env_cmd);
32 |     return env_cmd;
33 | }
34 | 
35 | int system_2(char *cmd) {
36 |     int res = system(cmd);
37 |     fprintf(stdout, "system_2: '%d'\n", res);
38 |     return res;
39 | }
40 | 
41 | int system_1a(char *cmd) {
42 |     int res = system_2(cmd);
43 |     fprintf(stdout, "system_1a: '%d'\n", res);
44 |     return res;
45 | }
46 | 
47 | int system_1b(char *cmd) {
48 |     int res = system_2(cmd);
49 |     fprintf(stdout, "system_1b: '%d'\n", res);
50 |     return res;
51 | }
52 | 
53 | int main(int argc, char *argv[]) {
54 |     char *env_cmd;
55 | 
56 |     if(argc <= 1) {
57 |         env_cmd = getenv_1a();
58 |     } else {
59 |         env_cmd = getenv_1b();
60 |     }
61 |     fprintf(stdout, "main: '%s'\n", env_cmd);
62 |     if(env_cmd == NULL) {
63 |         fprintf(stderr, "SYSTEM_COMMAND environment variable not set.\n");
64 |         return EXIT_FAILURE;
65 |     }
66 |     system_1a(env_cmd);
67 |     system_1b("whoami");
68 |     getenv_1c();
69 |     return EXIT_SUCCESS;
70 | }


--------------------------------------------------------------------------------
/tests/data/src/pointer_analysis-12.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | /*
 5 | Testcase Description:
 6 | - Multiple source memory definitions
 7 | */
 8 | 
 9 | __attribute__ ((noinline))
10 | int create_cmd(char** str_ptr, const char* user_name) {
11 |     const char* fmt = "echo %s";
12 |     // Compute length of formatted string
13 |     int len = snprintf(NULL, 0, fmt, user_name);
14 |     if(len < 0) return -1;
15 |     // Allocate memory for the string
16 |     *str_ptr = (char*) malloc(len + 1);
17 |     if(*str_ptr == NULL) {
18 |         return -1;
19 |     }
20 |     // Write formatted string to allocated memory
21 |     int res = snprintf(*str_ptr, len + 1, fmt, user_name);
22 |     if(res < 0) {
23 |         free(*str_ptr);
24 |         *str_ptr = NULL;
25 |         return -1;
26 |     }
27 |     return res;
28 | }
29 | 
30 | int main(int argc, char *argv[]) {
31 |     // Source: User inputs via environment variables
32 |     char *env_user_id   = getenv("USER_ID");
33 |     char *env_user_name = getenv("USER_NAME");
34 |     if(env_user_id == NULL || env_user_name == NULL) {
35 |         fprintf(stderr, "Missing environment variables.\n");
36 |         return EXIT_FAILURE;
37 |     }
38 |     int user_id = atoi(env_user_id);
39 |     // Create command string
40 |     char *cmd = NULL;
41 |     if(user_id == 0) {
42 |         if(create_cmd(&cmd, env_user_name) < 0) {
43 |             fprintf(stderr, "Failed to create command for root user '%s'.\n", env_user_name);
44 |             return EXIT_FAILURE;
45 |         }
46 |     } else {
47 |         if(create_cmd(&cmd, env_user_name) < 0) {
48 |             fprintf(stderr, "Failed to create command for user '%s'.\n", env_user_name);
49 |             return EXIT_FAILURE;
50 |         }
51 |     }
52 |     // Sink: Execute command
53 |     if(system(cmd) == -1) {
54 |         fprintf(stderr, "Failed to execute command.\n");
55 |         free(cmd);
56 |         return EXIT_FAILURE;
57 |     }
58 |     free(cmd);
59 |     return EXIT_SUCCESS;
60 | }


--------------------------------------------------------------------------------
/tests/data/src/function_calling-04.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - disallow function inlining
 8 | - without tail calls
 9 | */
10 | 
11 | __attribute__ ((noinline))
12 | char* getenv_2(char *cmd) {
13 |     char *env_cmd = getenv(cmd);
14 |     fprintf(stdout, "getenv_2: '%s'\n", env_cmd);
15 |     return env_cmd;
16 | }
17 | 
18 | __attribute__ ((noinline))
19 | char* getenv_1a() {
20 |     char *env_cmd = getenv_2("SYSTEM_COMMAND_1a");
21 |     fprintf(stdout, "getenv_1a: '%s'\n", env_cmd);
22 |     return env_cmd;
23 | }
24 | 
25 | __attribute__ ((noinline))
26 | char* getenv_1b() {
27 |     char *env_cmd = getenv_2("SYSTEM_COMMAND_1b");
28 |     fprintf(stdout, "getenv_1b: '%s'\n", env_cmd);
29 |     return env_cmd;
30 | }
31 | 
32 | __attribute__ ((noinline))
33 | char* getenv_1c() {
34 |     char *env_cmd = getenv("NOT_DANGEROUS");
35 |     fprintf(stdout, "getenv_1c: '%s'\n", env_cmd);
36 |     return env_cmd;
37 | }
38 | 
39 | __attribute__ ((noinline))
40 | int system_2(char *cmd) {
41 |     int res = system(cmd);
42 |     fprintf(stdout, "system_2: '%d'\n", res);
43 |     return res;
44 | }
45 | 
46 | __attribute__ ((noinline))
47 | int system_1a(char *cmd) {
48 |     int res = system_2(cmd);
49 |     fprintf(stdout, "system_1a: '%d'\n", res);
50 |     return res;
51 | }
52 | 
53 | __attribute__ ((noinline))
54 | int system_1b(char *cmd) {
55 |     int res = system_2(cmd);
56 |     fprintf(stdout, "system_1b: '%d'\n", res);
57 |     return res;
58 | }
59 | 
60 | int main(int argc, char *argv[]) {
61 |     char *env_cmd;
62 | 
63 |     if(argc <= 1) {
64 |         env_cmd = getenv_1a();
65 |     } else {
66 |         env_cmd = getenv_1b();
67 |     }
68 |     fprintf(stdout, "main: '%s'\n", env_cmd);
69 |     if(env_cmd == NULL) {
70 |         fprintf(stderr, "SYSTEM_COMMAND environment variable not set.\n");
71 |         return EXIT_FAILURE;
72 |     }
73 |     system_1a(env_cmd);
74 |     system_1b("whoami");
75 |     getenv_1c();
76 |     return EXIT_SUCCESS;
77 | }


--------------------------------------------------------------------------------
/tests/data/src/object_oriented-01.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <cstdlib>
 3 | 
 4 | using namespace std;
 5 | 
 6 | /*
 7 | Testcase Description:
 8 | - Inheritance
 9 | - Without virtual functions (no polymorphism)
10 | */
11 | 
12 | class MyParent {
13 | protected:
14 |     const char* name;
15 | 
16 | public:
17 |     __attribute__ ((noinline, optimize("O0")))
18 |     MyParent(const char* name) {
19 |         this->name = name;
20 |         cout << "MyParent Constructor: Hello " << this->name << "!" << endl;
21 |     }
22 | 
23 |     __attribute__ ((noinline, optimize("O0")))
24 |     ~MyParent() {
25 |         cout << "MyParent Destructor: Goodbye " << this->name << "!" << endl;
26 |     }
27 | 
28 |     __attribute__ ((noinline, optimize("O0")))
29 |     void my_func(const char* cmd) {
30 |         cout << "MyParent::my_func: " << this->name << " calls `system('" << cmd << "')`!" << endl;
31 |         system(cmd);
32 |     }
33 | };
34 | 
35 | class MyChild : public MyParent {
36 | public:
37 |     __attribute__ ((noinline, optimize("O0")))
38 |     MyChild(const char* name) : MyParent(name) {
39 |         cout << "MyChild Constructor: Hello " << this->name << "!" << endl;
40 |     }
41 | 
42 |     __attribute__ ((noinline, optimize("O0")))
43 |     ~MyChild() {
44 |         cout << "MyChild Destructor: Goodbye " << this->name << "!" << endl;
45 |     }
46 | 
47 |     __attribute__ ((noinline, optimize("O0")))
48 |     void my_func(const char* cmd) {
49 |         cout << "MyChild::my_func: " << this->name << " calls `popen('" << cmd << "', 'r')`!" << endl;
50 |         FILE* fp = popen(cmd, "r");
51 |         if(fp != NULL) {
52 |             pclose(fp);
53 |         }
54 |     }
55 | };
56 | 
57 | __attribute__ ((noinline, optimize("O3")))
58 | int main(int argc, char *argv[]) {
59 |     MyParent* p = new MyParent("Alice");
60 |     MyParent* c = new MyChild("Bob");
61 |     char* cmd = getenv("CMD");
62 |     if(cmd != NULL) {
63 |         p->my_func(cmd);
64 |         c->my_func(cmd);
65 |     }
66 |     delete p;
67 |     delete c;
68 |     return EXIT_SUCCESS;
69 | }


--------------------------------------------------------------------------------
/mole/conf/001-settings.yml:
--------------------------------------------------------------------------------
 1 | settings:
 2 |   max_workers:
 3 |     help: maximum number of worker threads that backward slicing uses
 4 |     value: 1
 5 |     min_value: -1
 6 |     max_value: 256
 7 |   fix_func_type:
 8 |     help: whether to fix types of source/sink functions before slicing
 9 |     value: false
10 |   max_call_level:
11 |     help: backward slicing visits called functions up to the given level
12 |     value: 10
13 |     min_value: -1
14 |     max_value: 99
15 |   max_slice_depth:
16 |     help: maximum slice depth to stop the search
17 |     value: 1000
18 |     min_value: -1
19 |     max_value: 9999
20 |   max_memory_slice_depth:
21 |     help: maximum memory slice depth to stop the search
22 |     value: 10
23 |     min_value: -1
24 |     max_value: 9999
25 |   src_highlight_color:
26 |     help: color used to highlight instructions originating from slicing a source function
27 |     value: Orange
28 |     items:
29 |     - Blue
30 |     - Green
31 |     - Cyan
32 |     - Red
33 |     - Magenta
34 |     - Yellow
35 |     - Orange
36 |   snk_highlight_color:
37 |     help: color used to highlight instructions originating from slicing a sink function
38 |     value: Red
39 |     items:
40 |     - Blue
41 |     - Green
42 |     - Cyan
43 |     - Red
44 |     - Magenta
45 |     - Yellow
46 |     - Orange
47 |   path_grouping:
48 |     help: strategy used to group paths
49 |     value: Call Graph
50 |   openai_base_url:
51 |     help: OpenAI API base URL
52 |     value: "https://api.openai.com/v1"
53 |   openai_api_key:
54 |     help: OpenAI API key
55 |     value: ""
56 |   openai_model:
57 |     help: OpenAI model
58 |     value: "o4-mini"
59 |   max_turns:
60 |     help: maximum number of turns in a conversation with the AI
61 |     value: 10
62 |     min_value: 1
63 |     max_value: 256
64 |   max_completion_tokens:
65 |     help: maximum number of tokens in a completion
66 |     value: 4096
67 |     min_value: 0
68 |     max_value: 100000
69 |   temperature:
70 |     help: the sampling temperature to use
71 |     value: 1.0
72 |     min_value: 0.0
73 |     max_value: 2.0


--------------------------------------------------------------------------------
/tests/data/src/object_oriented-02.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <cstdlib>
 3 | 
 4 | using namespace std;
 5 | 
 6 | /*
 7 | Testcase Description:
 8 | - Inheritance
 9 | - With virtual functions (polymorphism)
10 | */
11 | 
12 | class MyParent {
13 | protected:
14 |     const char* name;
15 | 
16 | public:
17 |     __attribute__ ((noinline, optimize("O0")))
18 |     MyParent(const char* name) {
19 |         this->name = name;
20 |         cout << "MyParent Constructor: Hello " << this->name << "!" << endl;
21 |     }
22 | 
23 |     __attribute__ ((noinline, optimize("O0")))
24 |     virtual ~MyParent() {
25 |         cout << "MyParent Destructor: Goodbye " << this->name << "!" << endl;
26 |     }
27 | 
28 |     __attribute__ ((noinline, optimize("O0")))
29 |     virtual void my_func(const char* cmd) {
30 |         cout << "MyParent::my_func: " << this->name << " calls `system('" << cmd << "')`!" << endl;
31 |         system(cmd);
32 |     }
33 | };
34 | 
35 | class MyChild : public MyParent {
36 | public:
37 |     __attribute__ ((noinline, optimize("O0")))
38 |     MyChild(const char* name) : MyParent(name) {
39 |         cout << "MyChild Constructor: Hello " << this->name << "!" << endl;
40 |     }
41 | 
42 |     __attribute__ ((noinline, optimize("O0")))
43 |     ~MyChild() override {
44 |         cout << "MyChild Destructor: Goodbye " << this->name << "!" << endl;
45 |     }
46 | 
47 |     __attribute__ ((noinline, optimize("O0")))
48 |     void my_func(const char* cmd) override {
49 |         cout << "MyChild::my_func: " << this->name << " calls `popen('" << cmd << "', 'r')`!" << endl;
50 |         FILE* fp = popen(cmd, "r");
51 |         if(fp != NULL) {
52 |             pclose(fp);
53 |         }
54 |     }
55 | };
56 | 
57 | __attribute__ ((noinline, optimize("O3")))
58 | int main(int argc, char *argv[]) {
59 |     MyParent* p = new MyParent("Alice");
60 |     MyParent* c = new MyChild("Bob");
61 |     char* cmd = getenv("CMD");
62 |     if(cmd != NULL) {
63 |         p->my_func(cmd);
64 |         c->my_func(cmd);
65 |     }
66 |     delete p;
67 |     delete c;
68 |     return EXIT_SUCCESS;
69 | }


--------------------------------------------------------------------------------
/tests/data/src/pointer_analysis-13.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <stdarg.h>
 4 | 
 5 | /*
 6 | Testcase Description:
 7 | - Multiple source memory definitions
 8 | - Usage of va structs
 9 | */
10 | 
11 | __attribute__ ((noinline))
12 | int create_cmd(char** str_ptr, const char* fmt, ...) {
13 |     va_list args, args_cpy;
14 |     // Compute length of formatted string
15 |     va_start(args, fmt);
16 |     va_copy(args_cpy, args);
17 |     int len = vsnprintf(NULL, 0, fmt, args_cpy);
18 |     va_end(args_cpy);
19 |     if(len < 0) {
20 |         va_end(args);
21 |         return -1;
22 |     }
23 |     // Allocate memory for the string
24 |     *str_ptr = (char*) malloc(len + 1);
25 |     if(*str_ptr == NULL) {
26 |         va_end(args);
27 |         return -1;
28 |     }
29 |     // Write formatted string to allocated memory
30 |     int res = vsnprintf(*str_ptr, len + 1, fmt, args);
31 |     va_end(args);
32 |     return res;
33 | }
34 | 
35 | int main(int argc, char *argv[]) {
36 |     // Source: User inputs via environment variables
37 |     char *env_user_id   = getenv("USER_ID");
38 |     char *env_user_name = getenv("USER_NAME");
39 |     if(env_user_id == NULL || env_user_name == NULL) {
40 |         fprintf(stderr, "Missing environment variables.\n");
41 |         return EXIT_FAILURE;
42 |     }
43 |     int user_id = atoi(env_user_id);
44 |     // Create command string
45 |     char *cmd = NULL;
46 |     if(user_id == 0) {
47 |         if(create_cmd(&cmd, "echo %s", env_user_name) < 0) {
48 |             fprintf(stderr, "Failed to create command for root user '%s'.\n", env_user_name);
49 |             return EXIT_FAILURE;
50 |         }
51 |     } else {
52 |         if(create_cmd(&cmd, "echo %s", env_user_name) < 0) {
53 |             fprintf(stderr, "Failed to create command for user '%s'.\n", env_user_name);
54 |             return EXIT_FAILURE;
55 |         }
56 |     }
57 |     // Sink: Execute command
58 |     if(system(cmd) == -1) {
59 |         fprintf(stderr, "Failed to execute command.\n");
60 |         free(cmd);
61 |         return EXIT_FAILURE;
62 |     }
63 |     free(cmd);
64 |     return EXIT_SUCCESS;
65 | }


--------------------------------------------------------------------------------
/tests/slicing/test_multithreading.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | from tests.slicing.conftest import TestSlicing
 3 | from typing import List
 4 | import binaryninja as bn
 5 | 
 6 | 
 7 | class TestMultiThreading(TestSlicing):
 8 |     def test_consistency_01(
 9 |         self, filenames: List[str] = ["function_calling-02"]
10 |     ) -> None:
11 |         for file in self.load_files(filenames):
12 |             # Load and analyze test binary with Binary Ninja
13 |             bv = bn.load(file)
14 |             bv.update_analysis_and_wait()
15 |             # Assert results
16 |             paths = self.get_paths(bv, max_workers=1)
17 |             for max_workers in [2, 4, 8, -1]:
18 |                 paths_mt = self.get_paths(bv, max_workers)
19 |                 for path in paths:
20 |                     if path in paths_mt:
21 |                         paths_mt.remove(path)
22 |                     else:
23 |                         assert False, (
24 |                             f"Inconsistent results with {max_workers:d} workers"
25 |                         )
26 |                 assert not paths_mt, (
27 |                     f"Inconsistent results with {max_workers:d} workers"
28 |                 )
29 |             # Close binary
30 |             bv.file.close()
31 |         return
32 | 
33 |     def test_consistency_02(self, filenames: List[str] = ["name_mangling-01"]) -> None:
34 |         self.test_consistency_01(filenames)
35 |         return
36 | 
37 |     def test_consistency_03(self, filenames: List[str] = ["load-05"]) -> None:
38 |         self.test_consistency_01(filenames)
39 |         return
40 | 
41 |     def test_consistency_04(
42 |         self, filenames: List[str] = ["pointer_analysis-06"]
43 |     ) -> None:
44 |         self.test_consistency_01(filenames)
45 |         return
46 | 
47 |     def test_consistency_05(
48 |         self, filenames: List[str] = ["simple_http_server-03"]
49 |     ) -> None:
50 |         self.test_consistency_01(filenames)
51 |         return
52 | 
53 |     def test_consistency_06(self, filenames: List[str] = ["memcpy-05"]) -> None:
54 |         self.test_consistency_01(filenames)
55 |         return
56 | 


--------------------------------------------------------------------------------
/mole/views/sidebar.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | from mole.views.path import PathView
 3 | from typing import Any
 4 | import binaryninjaui as bnui
 5 | import os as os
 6 | import PySide6.QtCore as qtc
 7 | import PySide6.QtGui as qtui
 8 | 
 9 | 
10 | class MoleSidebar(bnui.SidebarWidgetType):
11 |     """
12 |     This class implements the view for the plugin's sidebar.
13 |     """
14 | 
15 |     def __init__(self, sidebar_view: PathView) -> None:
16 |         """
17 |         This method initializes a view (MVC pattern).
18 |         """
19 |         super().__init__(self._init_icon(), "Mole")
20 |         self._sidebar_view = sidebar_view
21 |         return
22 | 
23 |     def _init_icon(self) -> qtui.QImage:
24 |         """
25 |         This method initializes the sidebar's icon.
26 |         """
27 |         icon = qtui.QImage(
28 |             os.path.join(
29 |                 os.path.dirname(os.path.abspath(__file__)), "../resources/icon.png"
30 |             )
31 |         )
32 |         if icon.isNull():
33 |             icon = qtui.QImage(56, 56, qtui.QImage.Format_RGB32)
34 |             icon.fill(0)
35 |             p = qtui.QPainter()
36 |             p.begin(icon)
37 |             p.setFont(qtui.QFont("Open Sans", 12))
38 |             p.setPen(qtui.QColor(255, 255, 255, 255))
39 |             p.drawText(qtc.QRectF(0, 0, 56, 56), qtc.Qt.AlignCenter, "MOLE")
40 |             p.end()
41 |         return icon
42 | 
43 |     def init(self) -> PathView:
44 |         """
45 |         This method registers the sidebar with Binary Ninja.
46 |         """
47 |         bnui.Sidebar.addSidebarWidgetType(self)
48 |         return self
49 | 
50 |     def createWidget(self, frame: Any, data: Any) -> PathView:
51 |         """
52 |         This method creates the sidebar's widget.
53 |         """
54 |         return self._sidebar_view
55 | 
56 |     def defaultLocation(self) -> bnui.SidebarWidgetLocation:
57 |         """
58 |         This method places the widget to the right sidebar.
59 |         """
60 |         return bnui.SidebarWidgetLocation.RightContent
61 | 
62 |     def contextSensitivity(self) -> bnui.SidebarContextSensitivity:
63 |         """
64 |         This method configures the widget to use a single instance that detects changes.
65 |         """
66 |         return bnui.SidebarContextSensitivity.SelfManagedSidebarContext
67 | 


--------------------------------------------------------------------------------
/tests/data/src/object_oriented-03.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <cstdlib>
 3 | 
 4 | using namespace std;
 5 | 
 6 | #define MAX_CMD_LENGTH 256
 7 | 
 8 | /*
 9 | Testcase Description:
10 | - Inheritance
11 | - With virtual functions (polymorphism)
12 | - Using member variable assigned in constructor
13 | */
14 | 
15 | class MyParent {
16 | protected:
17 |     const char* name;
18 | 
19 | public:
20 |     __attribute__ ((noinline, optimize("O0")))
21 |     MyParent(const char* name) {
22 |         this->name = name;
23 |         cout << "MyParent Constructor: Hello " << this->name << "!" << endl;
24 |     }
25 | 
26 |     __attribute__ ((noinline, optimize("O0")))
27 |     virtual ~MyParent() {
28 |         cout << "MyParent Destructor: Goodbye " << this->name << "!" << endl;
29 |     }
30 | 
31 |     __attribute__ ((noinline, optimize("O0")))
32 |     virtual void my_func() {
33 |         char cmd[MAX_CMD_LENGTH];
34 |         snprintf(cmd, sizeof(cmd), "echo Hello %s!", this->name);
35 |         cout << "MyParent::my_func: " << this->name << " calls `system('" << string(cmd) << "'`!" << endl;
36 |         system(cmd);
37 |     }
38 | };
39 | 
40 | class MyChild : public MyParent {
41 | public:
42 |     __attribute__ ((noinline, optimize("O0")))
43 |     MyChild(const char* name) : MyParent(name) {
44 |         cout << "MyChild Constructor: Hello " << this->name << "!" << endl;
45 |     }
46 | 
47 |     __attribute__ ((noinline, optimize("O0")))
48 |     ~MyChild() override {
49 |         cout << "MyChild Destructor: Goodbye " << this->name << "!" << endl;
50 |     }
51 | 
52 |     __attribute__ ((noinline, optimize("O0")))
53 |     void my_func() override {
54 |         char cmd[MAX_CMD_LENGTH];
55 |         snprintf(cmd, sizeof(cmd), "echo Hello %s!", this->name);
56 |         cout << "MyChild::my_func: " << this->name << " calls `popen('" << string(cmd) << "', 'r')`!" << endl;
57 |         FILE* fp = popen(cmd, "r");
58 |         if(fp != NULL) {
59 |             pclose(fp);
60 |         }
61 |     }
62 | };
63 | 
64 | __attribute__ ((noinline, optimize("O3")))
65 | int main(int argc, char *argv[]) {
66 |     char* p_name = getenv("PARENT_NAME");
67 |     if(p_name != NULL) {
68 |         MyParent* p = new MyParent(p_name);
69 |         p->my_func();
70 |         delete p;
71 |     }
72 |     char* c_name = getenv("CHILD_NAME");
73 |     if(c_name != NULL) {
74 |         MyParent* c = new MyChild(c_name);
75 |         c->my_func();
76 |         delete c;
77 |     }
78 |     return EXIT_SUCCESS;
79 | }


--------------------------------------------------------------------------------
/tests/slicing/test_mangling.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | from tests.slicing.conftest import TestSlicing
 3 | from typing import List
 4 | import pytest
 5 | 
 6 | 
 7 | class TestNameMangling(TestSlicing):
 8 |     def test_name_mangling_01(
 9 |         self, filenames: List[str] = ["name_mangling-01"]
10 |     ) -> None:
11 |         self.assert_paths(
12 |             srcs=[("getenv", None)],
13 |             snks=[("system", 1)],
14 |             call_chains=[["overloaded_func", "main"], ["overloaded_func", "main"]],
15 |             filenames=filenames,
16 |         )
17 |         return
18 | 
19 |     def test_name_mangling_02(
20 |         self, filenames: List[str] = ["name_mangling-02"]
21 |     ) -> None:
22 |         self.assert_paths(
23 |             srcs=[("getenv", None)],
24 |             snks=[("system", 1)],
25 |             call_chains=[["MyStruct::my_func", "main"], ["MyClass::my_func", "main"]],
26 |             filenames=filenames,
27 |         )
28 |         return
29 | 
30 |     def test_name_mangling_03(
31 |         self, filenames: List[str] = ["name_mangling-03"]
32 |     ) -> None:
33 |         self.assert_paths(
34 |             srcs=[("getenv", None)],
35 |             snks=[("system", 1)],
36 |             call_chains=[["ns::my_func", "main"]],
37 |             filenames=filenames,
38 |         )
39 |         return
40 | 
41 |     def test_name_mangling_04(
42 |         self, filenames: List[str] = ["name_mangling-04"]
43 |     ) -> None:
44 |         self.assert_paths(
45 |             srcs=[("getenv", None)],
46 |             snks=[("system", 1)],
47 |             call_chains=[["my_func<int32_t>", "main"]],
48 |             filenames=filenames,
49 |         )
50 |         return
51 | 
52 |     @pytest.mark.xfail
53 |     def test_name_mangling_05(
54 |         self, filenames: List[str] = ["name_mangling-05"]
55 |     ) -> None:
56 |         self.assert_paths(
57 |             srcs=[("getenv", None)],
58 |             snks=[("system", 1)],
59 |             call_chains=[
60 |                 ["MyStruct::my_func", "_GLOBAL__sub_I__ZN8MyStruct3cmdE"],
61 |                 ["MyClass::my_func", "_GLOBAL__sub_I__ZN8MyStruct3cmdE"],
62 |             ],
63 |             filenames=filenames,
64 |         )
65 |         return
66 | 
67 |     @pytest.mark.xfail
68 |     def test_name_mangling_06(
69 |         self, filenames: List[str] = ["name_mangling-06"]
70 |     ) -> None:
71 |         self.assert_paths(
72 |             srcs=[("getenv", None)],
73 |             snks=[("system", 1)],
74 |             call_chains=[["MyStruct::my_func", "main", "MyStruct::operator+"]],
75 |             filenames=filenames,
76 |         )
77 |         return
78 | 


--------------------------------------------------------------------------------
/tests/slicing/test_function_out_params.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | from tests.slicing.conftest import TestSlicing
 3 | from typing import List
 4 | import binaryninja as bn
 5 | 
 6 | 
 7 | class TestFunctionOutParams(TestSlicing):
 8 |     def test_function_out_params_01(
 9 |         self, filenames: List[str] = ["function_out_params-01"]
10 |     ) -> None:
11 |         self.assert_paths(
12 |             srcs=[("getenv", None)],
13 |             snks=[("memcpy", 3)],
14 |             call_chains=[["main", "get_size"]],
15 |             filenames=filenames,
16 |         )
17 |         return
18 | 
19 |     def test_function_out_params_02(
20 |         self, filenames: List[str] = ["function_out_params-02"]
21 |     ) -> None:
22 |         self.assert_paths(
23 |             srcs=[("getenv", None)],
24 |             snks=[("system", 1)],
25 |             call_chains=[["main", "get_cmd"]],
26 |             filenames=filenames,
27 |         )
28 |         return
29 | 
30 |     def test_function_out_params_03(
31 |         self, filenames: List[str] = ["function_out_params-03"]
32 |     ) -> None:
33 |         self.test_function_out_params_02(filenames)
34 |         return
35 | 
36 |     def test_function_out_params_04(
37 |         self, filenames: List[str] = ["function_out_params-04"]
38 |     ) -> None:
39 |         self.test_function_out_params_02(filenames)
40 |         return
41 | 
42 |     def test_function_out_params_05(
43 |         self, filenames: List[str] = ["function_out_params-05"]
44 |     ) -> None:
45 |         self.test_function_out_params_02(filenames)
46 |         return
47 | 
48 |     def test_function_out_params_06(
49 |         self, filenames: List[str] = ["function_out_params-06"]
50 |     ) -> None:
51 |         self.assert_paths(
52 |             srcs=[("getenv", None)],
53 |             snks=[("system", 1)],
54 |             call_chains=[["main"]],
55 |             filenames=filenames,
56 |         )
57 |         return
58 | 
59 |     def test_function_out_params_07(
60 |         self, filenames: List[str] = ["function_out_params-07"]
61 |     ) -> None:
62 |         self.test_function_out_params_06(filenames)
63 |         return
64 | 
65 |     def test_function_out_params_08(
66 |         self, filenames: List[str] = ["function_out_params-08"]
67 |     ) -> None:
68 |         def manually_set_types(bv: bn.BinaryView) -> None:
69 |             get_cmd = bv.get_functions_by_name("get_cmd")[0]
70 |             printf_call_site = get_cmd.call_sites[1]
71 |             printf_type, _ = bv.parse_type_string(
72 |                 "int printf(const char* format, char* msg)"
73 |             )
74 |             get_cmd.set_call_type_adjustment(printf_call_site.address, printf_type)
75 |             bv.update_analysis_and_wait()
76 |             return
77 | 
78 |         self.assert_paths(
79 |             srcs=[("getenv", None)],
80 |             snks=[("system", 1)],
81 |             call_chains=[["main", "check_cmd", "get_cmd"]],
82 |             filenames=filenames,
83 |             bv_callback=manually_set_types,
84 |         )
85 |         return
86 | 


--------------------------------------------------------------------------------
/mole/models/config.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | from mole.core.data import Configuration, Function, Library, WidgetSetting
 3 | from typing import Dict, List, Literal, Optional
 4 | 
 5 | 
 6 | class ConfigModel:
 7 |     """
 8 |     This class implements a model to handle Mole's configuration.
 9 |     """
10 | 
11 |     def __init__(self, config: Configuration) -> None:
12 |         """
13 |         Initialize the configuration model with optional pre-loaded configuration.
14 | 
15 |         Args:
16 |             config: A Configuration object to initialize the model with.
17 |                     If None, an empty configuration will be created.
18 |         """
19 |         self._config = config
20 |         return
21 | 
22 |     def get(self) -> Configuration:
23 |         """
24 |         This method returns the configuration.
25 |         """
26 |         return self._config
27 | 
28 |     def set(self, config: Configuration) -> None:
29 |         """
30 |         This method sets the configuration.
31 |         """
32 |         self._config = config
33 |         return
34 | 
35 |     def get_libraries(
36 |         self, fun_type: Optional[Literal["Sources", "Sinks"]]
37 |     ) -> Dict[str, Library]:
38 |         """
39 |         This method returns all libraries matching the given type.
40 |         """
41 |         match fun_type:
42 |             case "Sources":
43 |                 return self._config.sources
44 |             case "Sinks":
45 |                 return self._config.sinks
46 |         return {}
47 | 
48 |     def get_functions(
49 |         self,
50 |         lib_name: Optional[str] = None,
51 |         cat_name: Optional[str] = None,
52 |         fun_name: Optional[str] = None,
53 |         fun_type: Optional[Literal["Sources", "Sinks"]] = None,
54 |         fun_enabled: Optional[bool] = None,
55 |     ) -> List[Function]:
56 |         """
57 |         This method returns all functions matching the given attributes. An attribute of `None`
58 |         indicates that the corresponding attribute is irrelevant.
59 |         """
60 |         funs: List[Function] = []
61 |         match fun_type:
62 |             case "Sources":
63 |                 libs = self._config.sources.values()
64 |             case "Sinks":
65 |                 libs = self._config.sinks.values()
66 |             case _:
67 |                 libs = self._config.sources.values() + self._config.sinks.values()
68 |         for lib in libs:
69 |             if lib_name is None or lib.name == lib_name:
70 |                 for cat in lib.categories.values():
71 |                     if cat_name is None or cat.name == cat_name:
72 |                         for fun in cat.functions.values():
73 |                             if fun_name is None or fun.name == fun_name:
74 |                                 if fun_enabled is None or fun.enabled == fun_enabled:
75 |                                     funs.append(fun)
76 |         return funs
77 | 
78 |     def get_setting(self, name: str) -> Optional[WidgetSetting]:
79 |         """
80 |         This method returns the setting with name `name`.
81 |         """
82 |         return self._config.settings.get(name, None)
83 | 


--------------------------------------------------------------------------------
/mole/common/helper/symbol.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | from typing import Dict, List, Optional, Set
 3 | import binaryninja as bn
 4 | 
 5 | 
 6 | class SymbolHelper:
 7 |     """
 8 |     This class provides helper functions with respect to symbols.
 9 |     """
10 | 
11 |     @staticmethod
12 |     def get_symbol_by_section(
13 |         bv: bn.BinaryView, symbol_name: str, section_name: str = ".plt"
14 |     ) -> Optional[bn.CoreSymbol]:
15 |         """
16 |         This method returns the symbol with name `symbol_name` belonging to section `section_name`.
17 |         """
18 |         section = bv.get_section_by_name(section_name)
19 |         if section is None:
20 |             return None
21 |         for symbol in bv.symbols.get(symbol_name, []):
22 |             if section.start <= symbol.address < section.end:
23 |                 return symbol
24 |         return None
25 | 
26 |     @staticmethod
27 |     def get_code_refs(
28 |         bv: bn.BinaryView, symbol_names: List[str]
29 |     ) -> Dict[str, Set[bn.MediumLevelILInstruction]]:
30 |         """
31 |         This method determines code references for the provided `symbol_names`. The returned
32 |         dictionary contains individual `symbol_names` as keys, and the corresponding code references
33 |         as values. Code references correspond to `bn.MediumLevelILInstruction`s in SSA form.
34 |         """
35 |         mlil_ssa_code_refs = {}
36 |         for symbol_name in symbol_names:
37 |             for symbol in bv.symbols.get(symbol_name, []):
38 |                 # Check if the symbol is in the PE sections .idata
39 |                 idata = bv.sections.get(".idata")
40 |                 in_idata = idata.start <= symbol.address < idata.end if idata else False
41 |                 # Check if the symbol is in the PE sections .synthetic_builtins
42 |                 synthetic = bv.sections.get(".synthetic_builtins")
43 |                 in_synthetic_builtins = (
44 |                     synthetic.start <= symbol.address < synthetic.end
45 |                     if synthetic
46 |                     else False
47 |                 )
48 |                 # Check if there is code at the symbol address
49 |                 in_code = bv.get_function_at(symbol.address) is not None
50 |                 # Ignore symbols that are neither in code, the .idata or .synthetic_builtins sections
51 |                 if not (in_code or in_idata or in_synthetic_builtins):
52 |                     continue
53 |                 # Store code references
54 |                 mlil_insts: Set[bn.MediumLevelILInstruction] = mlil_ssa_code_refs.get(
55 |                     symbol_name, set()
56 |                 )
57 |                 for code_ref in bv.get_code_refs(symbol.address):
58 |                     # Store all instructions at the code reference address
59 |                     funcs = bv.get_functions_containing(code_ref.address)
60 |                     if funcs is None:
61 |                         continue
62 |                     for func in funcs:
63 |                         if (
64 |                             func is None
65 |                             or func.mlil is None
66 |                             or func.mlil.ssa_form is None
67 |                         ):
68 |                             continue
69 |                         func = func.mlil.ssa_form
70 |                         for inst in func.instructions:
71 |                             if inst.address == code_ref.address:
72 |                                 mlil_insts.add(inst)
73 |                 if mlil_insts:
74 |                     mlil_ssa_code_refs[symbol_name] = mlil_insts
75 |         return mlil_ssa_code_refs
76 | 


--------------------------------------------------------------------------------
/tests/slicing/test_simple_server.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | from tests.slicing.conftest import TestSlicing
  3 | from typing import List
  4 | 
  5 | 
  6 | class TestSimpleServer(TestSlicing):
  7 |     def test_simple_http_server_01(
  8 |         self, filenames: List[str] = ["simple_http_server-01"]
  9 |     ) -> None:
 10 |         self.assert_paths(
 11 |             srcs=[("recv", 2)],
 12 |             snks=[("system", 1)],
 13 |             call_chains=[["handle_get_request"], ["handle_post_request"]],
 14 |             filenames=filenames,
 15 |         )
 16 |         return
 17 | 
 18 |     def test_simple_http_server_02(
 19 |         self, filenames: List[str] = ["simple_http_server-02"]
 20 |     ) -> None:
 21 |         self.assert_paths(
 22 |             srcs=[("recv", 2)],
 23 |             snks=[("system", 1)],
 24 |             call_chains=[
 25 |                 ["execute_cgi_command", "handle_get_request", "receive_data"],
 26 |                 ["execute_cgi_command", "handle_post_request", "receive_data"],
 27 |             ],
 28 |             filenames=filenames,
 29 |         )
 30 |         return
 31 | 
 32 |     def test_simple_http_server_03(
 33 |         self, filenames: List[str] = ["simple_http_server-03"]
 34 |     ) -> None:
 35 |         self.assert_paths(
 36 |             srcs=[("recv", 2)],
 37 |             snks=[("system", 1)],
 38 |             call_chains=[
 39 |                 [
 40 |                     "execute_cgi_command",
 41 |                     "wrap_and_execute",
 42 |                     "process_request",
 43 |                     "handle_get_request",
 44 |                     "receive_data",
 45 |                 ],
 46 |                 [
 47 |                     "execute_cgi_command",
 48 |                     "wrap_and_execute",
 49 |                     "process_request",
 50 |                     "handle_post_request",
 51 |                     "receive_data",
 52 |                 ],
 53 |                 [
 54 |                     "execute_cgi_command",
 55 |                     "wrap_and_execute",
 56 |                     "process_request",
 57 |                     "handle_put_request",
 58 |                     "receive_data",
 59 |                 ],
 60 |                 [
 61 |                     "execute_cgi_command",
 62 |                     "wrap_and_execute",
 63 |                     "process_request",
 64 |                     "handle_delete_request",
 65 |                     "receive_data",
 66 |                 ],
 67 |             ],
 68 |             filenames=filenames,
 69 |         )
 70 |         return
 71 | 
 72 |     def test_simple_http_server_04(
 73 |         self, filenames: List[str] = ["simple_http_server-04"]
 74 |     ) -> None:
 75 |         self.assert_paths(
 76 |             srcs=[("recv", 2)],
 77 |             snks=[("system", 1)],
 78 |             call_chains=[
 79 |                 [
 80 |                     "execute_cgi_command",
 81 |                     "process_post_request",
 82 |                     "handle_post_request",
 83 |                     "receive_data",
 84 |                 ],
 85 |                 [
 86 |                     "execute_cgi_command",
 87 |                     "process_post_request",
 88 |                     "handle_post_request",
 89 |                     "receive_data",
 90 |                 ],
 91 |                 [
 92 |                     "execute_cgi_command",
 93 |                     "process_post_request",
 94 |                     "handle_post_request",
 95 |                     "receive_data",
 96 |                 ],
 97 |                 [
 98 |                     "execute_cgi_command",
 99 |                     "process_post_request",
100 |                     "handle_post_request",
101 |                     "receive_data",
102 |                 ],
103 |             ],
104 |             filenames=filenames,
105 |         )
106 |         return
107 | 


--------------------------------------------------------------------------------
/tests/slicing/test_various.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | from tests.slicing.conftest import TestSlicing
  3 | from typing import List
  4 | import pytest
  5 | 
  6 | 
  7 | class TestVarious(TestSlicing):
  8 |     def test_gets_01(self, filenames: List[str] = ["gets-01"]) -> None:
  9 |         self.assert_paths(
 10 |             srcs=[("gets", 1)],
 11 |             snks=[("gets", 1)],
 12 |             call_chains=[["main"]],
 13 |             filenames=filenames,
 14 |         )
 15 |         return
 16 | 
 17 |     def test_gets_02(self, filenames: List[str] = ["gets-02"]) -> None:
 18 |         self.assert_paths(
 19 |             srcs=[("gets", 1)],
 20 |             snks=[("gets", 1), ("memcpy", 2)],
 21 |             call_chains=[["main"], ["main"]],
 22 |             filenames=filenames,
 23 |         )
 24 |         return
 25 | 
 26 |     def test_sscanf_01(self, filenames: List[str] = ["sscanf-01"]) -> None:
 27 |         self.assert_paths(
 28 |             srcs=[("getenv", None)],
 29 |             snks=[("sscanf", 1), ("__isoc99_sscanf", 1)],
 30 |             call_chains=[["main"]],
 31 |             filenames=filenames,
 32 |         )
 33 |         return
 34 | 
 35 |     def test_memcpy_01(self, filenames: List[str] = ["memcpy-01"]) -> None:
 36 |         self.assert_paths(
 37 |             srcs=[("getenv", None)],
 38 |             snks=[("memcpy", 3)],
 39 |             call_chains=[["main"]],
 40 |             filenames=filenames,
 41 |         )
 42 |         return
 43 | 
 44 |     def test_memcpy_02(self, filenames: List[str] = ["memcpy-02"]) -> None:
 45 |         self.assert_paths(
 46 |             srcs=[("getenv", None)],
 47 |             snks=[("memcpy", 2), ("memcpy", 3)],
 48 |             call_chains=[["main"], ["main"]],
 49 |             filenames=filenames,
 50 |         )
 51 |         return
 52 | 
 53 |     def test_memcpy_03(self, filenames: List[str] = ["memcpy-03"]) -> None:
 54 |         self.assert_paths(
 55 |             srcs=[("getenv", None)],
 56 |             snks=[("memcpy", 1)],
 57 |             call_chains=[["main"]],
 58 |             filenames=filenames,
 59 |         )
 60 |         return
 61 | 
 62 |     def test_memcpy_04(self, filenames: List[str] = ["memcpy-04"]) -> None:
 63 |         self.assert_paths(
 64 |             srcs=[("getenv", None)],
 65 |             snks=[("memcpy", 3)],
 66 |             call_chains=[["main", "my_getenv"]],
 67 |             filenames=filenames,
 68 |         )
 69 |         return
 70 | 
 71 |     def test_memcpy_05(self, filenames: List[str] = ["memcpy-05"]) -> None:
 72 |         self.assert_paths(
 73 |             srcs=[("getenv", None)],
 74 |             snks=[("memcpy", 2), ("memcpy", 3)],
 75 |             call_chains=[["main", "my_getenv"], ["main", "my_getenv"]],
 76 |             filenames=filenames,
 77 |         )
 78 |         return
 79 | 
 80 |     def test_memcpy_06(self, filenames: List[str] = ["memcpy-06"]) -> None:
 81 |         self.assert_paths(
 82 |             srcs=[],
 83 |             snks=[],
 84 |             call_chains=[],
 85 |             filenames=filenames,
 86 |         )
 87 |         return
 88 | 
 89 |     def test_memcpy_07(self, filenames: List[str] = ["memcpy-07"]) -> None:
 90 |         self.test_memcpy_02(filenames)
 91 |         return
 92 | 
 93 |     @pytest.mark.xfail
 94 |     def test_memcpy_08(self, filenames: List[str] = ["memcpy-08"]) -> None:
 95 |         self.test_memcpy_06(filenames)
 96 |         return
 97 | 
 98 |     def test_memcpy_09(self, filenames: List[str] = ["memcpy-09"]) -> None:
 99 |         self.test_memcpy_06(filenames)
100 |         return
101 | 
102 |     @pytest.mark.xfail
103 |     def test_memcpy_10(self, filenames: List[str] = ["memcpy-10"]) -> None:
104 |         self.test_memcpy_06(filenames)
105 |         return
106 | 
107 |     def test_memcpy_11(self, filenames: List[str] = ["memcpy-11"]) -> None:
108 |         self.test_memcpy_06(filenames)
109 |         return
110 | 


--------------------------------------------------------------------------------
/mole/controllers/ai.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | from mole.controllers.config import ConfigController
  3 | from mole.core.data import Path
  4 | from mole.models.ai import AiVulnerabilityReport
  5 | from mole.views.ai import AiView
  6 | from mole.services.ai import AiService
  7 | from typing import Callable, List, Tuple
  8 | import binaryninja as bn
  9 | 
 10 | 
 11 | tag = "Mole.AI"
 12 | 
 13 | 
 14 | class AiController:
 15 |     """
 16 |     This class implements a controller to analyze paths using AI.
 17 |     """
 18 | 
 19 |     def __init__(
 20 |         self,
 21 |         ai_view: AiView,
 22 |         config_ctr: ConfigController,
 23 |     ) -> None:
 24 |         """
 25 |         This method initializes the AI controller.
 26 |         """
 27 |         # Initialization
 28 |         self.ai_view = ai_view.init(self)
 29 |         self.config_ctr = config_ctr
 30 |         return
 31 | 
 32 |     def analyze_paths(
 33 |         self,
 34 |         bv: bn.BinaryView,
 35 |         paths: List[Tuple[int, Path]],
 36 |         analyzed_path: Callable[[int, AiVulnerabilityReport], None],
 37 |     ) -> AiService:
 38 |         """
 39 |         This method starts a service that analyzes each path using AI.
 40 |         """
 41 |         # Get settings
 42 |         max_workers = None
 43 |         max_workers_setting = self.config_ctr.get_setting("max_workers")
 44 |         if max_workers_setting:
 45 |             max_workers = int(max_workers_setting.value)
 46 |             if max_workers <= 0:
 47 |                 max_workers = None
 48 |         base_url = ""
 49 |         base_url_setting = self.config_ctr.get_setting("openai_base_url")
 50 |         if base_url_setting:
 51 |             base_url = str(base_url_setting.value)
 52 |         api_key = ""
 53 |         api_key_setting = self.config_ctr.get_setting("openai_api_key")
 54 |         if api_key_setting:
 55 |             api_key = str(api_key_setting.value)
 56 |         model = ""
 57 |         model_setting = self.config_ctr.get_setting("openai_model")
 58 |         if model_setting:
 59 |             model = str(model_setting.value)
 60 |         max_turns = 0
 61 |         max_turns_setting = self.config_ctr.get_setting("max_turns")
 62 |         if max_turns_setting:
 63 |             max_turns = int(max_turns_setting.value)
 64 |         max_completion_tokens = None
 65 |         max_completion_tokens_setting = self.config_ctr.get_setting(
 66 |             "max_completion_tokens"
 67 |         )
 68 |         if max_completion_tokens_setting:
 69 |             max_completion_tokens = int(max_completion_tokens_setting.value)
 70 |             if max_completion_tokens < 1:
 71 |                 max_completion_tokens = None
 72 |         temperature = None
 73 |         temperature_setting = self.config_ctr.get_setting("temperature")
 74 |         if temperature_setting:
 75 |             temperature = float(temperature_setting.value)
 76 |             if temperature < 0.0 or temperature > 2.0:
 77 |                 temperature = None
 78 |         # Initialize and start AI service
 79 |         ai_service = AiService(
 80 |             bv=bv,
 81 |             paths=paths,
 82 |             analyzed_path=analyzed_path,
 83 |             max_workers=max_workers,
 84 |             base_url=base_url,
 85 |             api_key=api_key,
 86 |             model=model,
 87 |             max_turns=max_turns,
 88 |             max_completion_tokens=max_completion_tokens,
 89 |             temperature=temperature,
 90 |             initial_progress_text="Mole analyzes paths...",
 91 |             can_cancel=True,
 92 |         )
 93 |         ai_service.start()
 94 |         # Return AI service instance
 95 |         return ai_service
 96 | 
 97 |     def show_report(self, report: AiVulnerabilityReport) -> None:
 98 |         """
 99 |         This method shows the AI-generated `report` in the AI view.
100 |         """
101 |         self.ai_view.show_report(report)
102 |         return
103 | 


--------------------------------------------------------------------------------
/development/update_dependencies.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | import json
  3 | import os
  4 | import pathlib
  5 | import tomli
  6 | 
  7 | 
  8 | def extract_dependencies(pyproject_path: pathlib.Path) -> List[str]:
  9 |     """Extract pip dependencies from pyproject.toml file."""
 10 |     with open(pyproject_path, "rb") as f:
 11 |         try:
 12 |             pyproject_data = tomli.load(f)
 13 |         except Exception as e:
 14 |             print(f"Error parsing pyproject.toml: {str(e):s}")
 15 |             return []
 16 | 
 17 |     # Check for dependencies in different possible locations
 18 |     dependencies = []
 19 | 
 20 |     # Check for project.dependencies (PEP 621 format)
 21 |     if "project" in pyproject_data and "dependencies" in pyproject_data["project"]:
 22 |         dependencies.extend(pyproject_data["project"]["dependencies"])
 23 | 
 24 |     # Check for tool.poetry.dependencies (Poetry format)
 25 |     elif "tool" in pyproject_data and "poetry" in pyproject_data["tool"]:
 26 |         poetry_deps = pyproject_data["tool"]["poetry"].get("dependencies", {})
 27 |         # Filter out python dependency and convert dict to requirements format
 28 |         for pkg, version in poetry_deps.items():
 29 |             if pkg != "python":
 30 |                 if isinstance(version, str):
 31 |                     dependencies.append(f"{pkg:s}=={version:s}")
 32 |                 elif isinstance(version, dict) and "version" in version:
 33 |                     dependencies.append(f"{pkg:s}=={version['version']:s}")
 34 |                 else:
 35 |                     dependencies.append(pkg)
 36 | 
 37 |     # Check for tool.flit.metadata.requires (Flit format)
 38 |     elif "tool" in pyproject_data and "flit" in pyproject_data["tool"]:
 39 |         if "metadata" in pyproject_data["tool"]["flit"]:
 40 |             flit_deps = pyproject_data["tool"]["flit"]["metadata"].get("requires", [])
 41 |             dependencies.extend(flit_deps)
 42 | 
 43 |     return sorted(dependencies)
 44 | 
 45 | 
 46 | def update_plugin_json(plugin_json_path: pathlib.Path, dependencies: List[str]) -> None:
 47 |     """Update the dependencies field in plugin.json"""
 48 |     try:
 49 |         with open(plugin_json_path, "r") as f:
 50 |             plugin_data = json.load(f)
 51 | 
 52 |         # Update the dependencies field
 53 |         if "dependencies" not in plugin_data:
 54 |             plugin_data["dependencies"] = {}
 55 | 
 56 |         plugin_data["dependencies"]["pip"] = dependencies
 57 | 
 58 |         # Write back to the file
 59 |         with open(plugin_json_path, "w") as f:
 60 |             json.dump(plugin_data, f, indent=2)
 61 | 
 62 |         print(f"Updated dependencies in '{str(plugin_json_path):s}'")
 63 |     except Exception as e:
 64 |         print(f"Error updating plugin.json: {str(e):s}")
 65 |     return
 66 | 
 67 | 
 68 | def create_requirements_txt(
 69 |     requirements_path: pathlib.Path, dependencies: List[str]
 70 | ) -> None:
 71 |     """Create a requirements.txt file from the dependencies"""
 72 |     try:
 73 |         with open(requirements_path, "w") as f:
 74 |             for dep in dependencies:
 75 |                 f.write(f"{dep:s}\n")
 76 |         print(f"Created requirements.txt at '{str(requirements_path):s}'")
 77 |     except Exception as e:
 78 |         print(f"Error creating requirements.txt: {str(e):s}")
 79 |     return
 80 | 
 81 | 
 82 | def main() -> None:
 83 |     # Get the directory of the current script
 84 |     script_dir = pathlib.Path(os.path.dirname(os.path.abspath(__file__)))
 85 |     # pyproject.toml and plugin.json are in the parent folder of the script
 86 |     pyproject_path = script_dir.parent / "pyproject.toml"
 87 |     requirements_path = script_dir.parent / "requirements.txt"
 88 | 
 89 |     if not pyproject_path.exists():
 90 |         print("Error: pyproject.toml not found")
 91 |         return
 92 | 
 93 |     dependencies = extract_dependencies(pyproject_path)
 94 | 
 95 |     # Create requirements.txt
 96 |     create_requirements_txt(requirements_path, dependencies)
 97 |     return
 98 | 
 99 | 
100 | if __name__ == "__main__":
101 |     main()
102 | 


--------------------------------------------------------------------------------
/mole/grouping/__init__.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This package contains implementations to group paths. All `PathGrouper` subclasses are imported
  3 | here to be discovered dynamically.
  4 | """
  5 | 
  6 | from __future__ import annotations
  7 | from mole.core.data import Path
  8 | from abc import ABC, abstractmethod
  9 | from typing import Dict, List, Tuple, Type
 10 | import importlib
 11 | import inspect
 12 | import os
 13 | import pkgutil
 14 | import sys
 15 | 
 16 | 
 17 | class PathGrouper(ABC):
 18 |     """
 19 |     This class is an abstract base class for path grouping strategies. Implementations should
 20 |     provide logic for how paths are organized in a tree structure.
 21 |     """
 22 | 
 23 |     @abstractmethod
 24 |     def get_group_keys(self, path: Path, *args, **kwargs) -> List[Tuple[str, str, int]]:
 25 |         """
 26 |         This method returns a list of hierarchy keys for organizing paths. Each key is a tuple of
 27 |         (display_name, internal_id, level). The level indicates the depth in the tree (0=root,
 28 |         1=first level, etc.).
 29 | 
 30 |         Args:
 31 |             path: `Path` object to be grouped
 32 |             args:   Custom positional arguments
 33 |             kwargs: Custom keyword arguments
 34 | 
 35 |         Returns:
 36 |             List of tuples containing (display_name, internal_id, level)
 37 |         """
 38 |         raise NotImplementedError
 39 | 
 40 |     @abstractmethod
 41 |     def get_strategy_name(self) -> str:
 42 |         """
 43 |         This method returns the name of this grouping strategy. This should match the corresponding
 44 |         strategy constant.
 45 |         """
 46 |         raise NotImplementedError
 47 | 
 48 |     @staticmethod
 49 |     def get_all_subclasses() -> List[Type["PathGrouper"]]:
 50 |         """
 51 |         This method recursively returns all subclasses of `PathGrouper`.
 52 | 
 53 |         Returns:
 54 |             List of `PathGrouper` subclass types
 55 |         """
 56 |         all_subclasses = []
 57 |         for subclass in PathGrouper.__subclasses__():
 58 |             all_subclasses.append(subclass)
 59 |             all_subclasses.extend(subclass.__subclasses__())
 60 |         return all_subclasses
 61 | 
 62 |     @staticmethod
 63 |     def get_strategy_map() -> Dict[str, PathGrouper]:
 64 |         """
 65 |         This method returns a mapping of all available strategy names to their implementations.
 66 |         Dynamically discovers all `PathGrouper` subclasses.
 67 | 
 68 |         Returns:
 69 |             Dictionary mapping strategy names to `PathGrouper` instances
 70 |         """
 71 |         strategy_map = {"None": None}
 72 |         # Find all PathGrouper subclasses and instantiate them
 73 |         for cls in PathGrouper.get_all_subclasses():
 74 |             # Skip the abstract base class itself
 75 |             if cls == PathGrouper or inspect.isabstract(cls):
 76 |                 continue
 77 |             try:
 78 |                 instance = cls()
 79 |                 strategy_map[instance.get_strategy_name()] = instance
 80 |             except Exception as e:
 81 |                 print(
 82 |                     f"Error instantiating {cls.__name__:s}: {str(e):s}", file=sys.stderr
 83 |                 )
 84 |         return strategy_map
 85 | 
 86 | 
 87 | def get_all_grouping_strategies() -> List[str]:
 88 |     """
 89 |     This method returns a list of all available strategy names.
 90 | 
 91 |     Returns:
 92 |         List of strategy names as strings
 93 |     """
 94 |     return list(PathGrouper.get_strategy_map().keys())
 95 | 
 96 | 
 97 | def get_grouper(strategy: str) -> PathGrouper:
 98 |     """
 99 |     This method is a factory method to create a grouper based on the strategy.
100 | 
101 |     Args:
102 |         strategy: The strategy name
103 | 
104 |     Returns:
105 |         An instance of the appropriate `PathGrouper` implementation or None if the strategy is
106 |         invalid
107 |     """
108 |     return PathGrouper.get_strategy_map().get(strategy, None)
109 | 
110 | 
111 | # Dynamically import all modules in this package after the `PathGrouper` class is defined
112 | package_dir = os.path.dirname(__file__)
113 | for _, module_name, _ in pkgutil.iter_modules([package_dir]):
114 |     # Skip importing this module to avoid circular imports
115 |     if module_name != "__init__":
116 |         importlib.import_module(f"{__name__:s}.{module_name:s}")
117 | 


--------------------------------------------------------------------------------
/mole/common/parse.py:
--------------------------------------------------------------------------------
  1 | from lark import Lark, Token, Transformer, v_args
  2 | from mole.common.log import log
  3 | from typing import Callable, Optional
  4 | 
  5 | 
  6 | tag = "Mole.Parse"
  7 | 
  8 | 
  9 | class LogicalExpressionParser:
 10 |     """
 11 |     This class parses logical expressions.
 12 |     """
 13 | 
 14 |     grammar = """
 15 |     start: expr
 16 | 
 17 |     ?expr: term ("or" term)* -> or_expr
 18 |     ?term: factor ("and" factor)* -> and_expr
 19 |     ?factor: atom
 20 |           | "not" factor -> not_expr
 21 |           | "(" expr ")"
 22 |     ?atom: "True" -> true
 23 |          | "true" -> true
 24 |          | "False" -> false
 25 |          | "false" -> false
 26 |          | "i" "==" value -> eq
 27 |          | "i" "!=" value -> neq
 28 |          | "i" ">" value -> gt
 29 |          | "i" "<" value -> lt
 30 |          | "i" ">=" value -> ge
 31 |          | "i" "<=" value -> le
 32 |          | "i" -> var
 33 | 
 34 |     ?value: "-" NUMBER -> neg_number
 35 |       | NUMBER -> number
 36 | 
 37 |     %import common.NUMBER
 38 |     %import common.WS
 39 |     %ignore WS
 40 |     """
 41 | 
 42 |     def __init__(self) -> None:
 43 |         """
 44 |         This method initializes a parser for logical expressions.
 45 |         """
 46 |         self._parser = Lark(
 47 |             grammar=self.grammar,
 48 |             parser="lalr",
 49 |             transformer=LogicalExpressionTransformer(),
 50 |         )
 51 |         return
 52 | 
 53 |     def parse(self, expr: str) -> Optional[Callable[[int], bool]]:
 54 |         """
 55 |         This method parses a logical exression.
 56 |         """
 57 |         if expr:
 58 |             try:
 59 |                 e = self._parser.parse(expr).children[0]
 60 | 
 61 |                 def f(i):
 62 |                     return eval(e)
 63 | 
 64 |                 return f
 65 |             except Exception as e:
 66 |                 log.warn(tag, f"Failed to parse expression '{expr}': {str(e):s}")
 67 |         return None
 68 | 
 69 | 
 70 | @v_args(inline=True)
 71 | class LogicalExpressionTransformer(Transformer):
 72 |     """
 73 |     This class convers Lark trees into logical expressions.
 74 |     """
 75 | 
 76 |     def or_expr(self, *exprs: str) -> str:
 77 |         """
 78 |         This method adds 'or' logic.
 79 |         """
 80 |         return f"({') or ('.join(exprs):s})"
 81 | 
 82 |     def and_expr(self, *exprs: str) -> str:
 83 |         """
 84 |         This method adds 'and' logic.
 85 |         """
 86 |         return f"({') and ('.join(exprs):s})"
 87 | 
 88 |     def not_expr(self, expr: str) -> str:
 89 |         """
 90 |         This method adds 'not' logic.
 91 |         """
 92 |         return f"not ({expr:s})"
 93 | 
 94 |     def eq(self, value: str) -> str:
 95 |         """
 96 |         This method adds '==' logic.
 97 |         """
 98 |         return f"i == {value:s}"
 99 | 
100 |     def neq(self, value: str) -> str:
101 |         """
102 |         This method adds '!=' logic.
103 |         """
104 |         return f"i != {value:s}"
105 | 
106 |     def gt(self, value: str) -> str:
107 |         """
108 |         This method adds '>' logic.
109 |         """
110 |         return f"i > {value:s}"
111 | 
112 |     def lt(self, value: str) -> str:
113 |         """
114 |         This method adds '<' logic.
115 |         """
116 |         return f"i < {value:s}"
117 | 
118 |     def ge(self, value: str) -> str:
119 |         """
120 |         This method adds '>=' logic.
121 |         """
122 |         return f"i >= {value:s}"
123 | 
124 |     def le(self, value: str) -> str:
125 |         """
126 |         This method adds '<=' logic.
127 |         """
128 |         return f"i <= {value:s}"
129 | 
130 |     def var(self) -> str:
131 |         """
132 |         This method adds variables logic.
133 |         """
134 |         return "i"
135 | 
136 |     def neg_number(self, t: Token) -> str:
137 |         """
138 |         This method adds numbers logic.
139 |         """
140 |         return f"-{t.value:s}"
141 | 
142 |     def number(self, t: Token) -> str:
143 |         """
144 |         This method adds numbers logic.
145 |         """
146 |         return f"{t.value:s}"
147 | 
148 |     def true(self) -> str:
149 |         """
150 |         This method adds 'True' logic.
151 |         """
152 |         return "True"
153 | 
154 |     def false(self) -> str:
155 |         """
156 |         This method adds 'False' logic.
157 |         """
158 |         return "False"
159 | 


--------------------------------------------------------------------------------
/tests/data/src/simple_http_server-01.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | #include <unistd.h>
  5 | #include <arpa/inet.h>
  6 | 
  7 | #define PORT 8080
  8 | #define BUFFER_SIZE 256
  9 | 
 10 | /*
 11 | Testcase Description:
 12 | - server example
 13 | - allow function inlining
 14 | */
 15 | 
 16 | void handle_client(int client_socket);
 17 | void execute_cgi_command(const char *buffer);
 18 | void send_response(int client_socket, const char *response);
 19 | int create_server_socket(struct sockaddr_in *address);
 20 | void handle_get_request(int client_socket);
 21 | void handle_post_request(int client_socket);
 22 | char* receive_data(int client_socket, int *size);
 23 | 
 24 | int main() {
 25 |     int server_fd, client_socket;
 26 |     struct sockaddr_in address;
 27 |     int addrlen = sizeof(address);
 28 | 
 29 |     server_fd = create_server_socket(&address);
 30 | 
 31 |     while (1) {
 32 |         if ((client_socket = accept(server_fd, (struct sockaddr *)&address, (socklen_t*)&addrlen)) < 0) {
 33 |             perror("accept");
 34 |             close(server_fd);
 35 |             exit(EXIT_FAILURE);
 36 |         }
 37 |         handle_client(client_socket);
 38 |     }
 39 | 
 40 |     close(server_fd);
 41 |     return 0;
 42 | }
 43 | 
 44 | void handle_client(int client_socket) {
 45 |     int size;
 46 |     char *method = receive_data(client_socket, &size);
 47 | 
 48 |     if (method == NULL) {
 49 |         close(client_socket);
 50 |         return;
 51 |     }
 52 | 
 53 |     if (strncmp(method, "GET ", 4) == 0) {
 54 |         handle_get_request(client_socket);
 55 |     } else if (strncmp(method, "POST", 4) == 0) {
 56 |         handle_post_request(client_socket);
 57 |     } else {
 58 |         send_response(client_socket, "HTTP/1.1 405 Method Not Allowed\r\nContent-Type: text/plain\r\n\r\nMethod Not Allowed.\n");
 59 |         close(client_socket);
 60 |     }
 61 | 
 62 |     free(method);
 63 | }
 64 | 
 65 | void handle_get_request(int client_socket) {
 66 |     int size;
 67 |     char *buffer = receive_data(client_socket, &size);
 68 | 
 69 |     if (buffer == NULL) {
 70 |         close(client_socket);
 71 |         return;
 72 |     }
 73 | 
 74 |     execute_cgi_command(buffer);
 75 |     send_response(client_socket, "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\n\r\nGET request received.\n");
 76 |     close(client_socket);
 77 |     free(buffer);
 78 | }
 79 | 
 80 | void handle_post_request(int client_socket) {
 81 |     int size;
 82 |     char *buffer = receive_data(client_socket, &size);
 83 | 
 84 |     if (buffer == NULL) {
 85 |         close(client_socket);
 86 |         return;
 87 |     }
 88 | 
 89 |     execute_cgi_command(buffer);
 90 |     send_response(client_socket, "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\n\r\nPOST request received.\n");
 91 |     close(client_socket);
 92 |     free(buffer);
 93 | }
 94 | 
 95 | void execute_cgi_command(const char *buffer) {
 96 |     char *cgi_start = strstr(buffer, "/cgi-bin/");
 97 |     if (cgi_start) {
 98 |         cgi_start += strlen("/cgi-bin/");
 99 |         char *cgi_end = strchr(cgi_start, ' ');
100 |         if (cgi_end) {
101 |             *cgi_end = '\0';
102 |             system(cgi_start);
103 |         }
104 |     }
105 | }
106 | 
107 | void send_response(int client_socket, const char *response) {
108 |     write(client_socket, response, strlen(response));
109 | }
110 | 
111 | int create_server_socket(struct sockaddr_in *address) {
112 |     int server_fd;
113 | 
114 |     if ((server_fd = socket(AF_INET, SOCK_STREAM, 0)) == 0) {
115 |         perror("socket failed");
116 |         exit(EXIT_FAILURE);
117 |     }
118 | 
119 |     address->sin_family = AF_INET;
120 |     address->sin_addr.s_addr = INADDR_ANY;
121 |     address->sin_port = htons(PORT);
122 | 
123 |     if (bind(server_fd, (struct sockaddr *)address, sizeof(*address)) < 0) {
124 |         perror("bind failed");
125 |         close(server_fd);
126 |         exit(EXIT_FAILURE);
127 |     }
128 | 
129 |     if (listen(server_fd, 3) < 0) {
130 |         perror("listen");
131 |         close(server_fd);
132 |         exit(EXIT_FAILURE);
133 |     }
134 | 
135 |     return server_fd;
136 | }
137 | 
138 | char* receive_data(int client_socket, int *size) {
139 |     char *buffer = (char *)malloc(BUFFER_SIZE);
140 |     if (buffer == NULL) {
141 |         perror("malloc");
142 |         return NULL;
143 |     }
144 | 
145 |     int bytes_read = recv(client_socket, buffer, BUFFER_SIZE - 1, 0);
146 |     if (bytes_read < 0) {
147 |         perror("recv");
148 |         free(buffer);
149 |         return NULL;
150 |     }
151 | 
152 |     buffer[bytes_read] = '\0';
153 |     *size = bytes_read;
154 |     return buffer;
155 | }


--------------------------------------------------------------------------------
/tests/data/src/simple_http_server-02.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | #include <unistd.h>
  5 | #include <arpa/inet.h>
  6 | 
  7 | #define PORT 8080
  8 | #define BUFFER_SIZE 256
  9 | 
 10 | /*
 11 | Testcase Description:
 12 | - server example
 13 | - disallow function inlining
 14 | */
 15 | 
 16 | __attribute__ ((noinline)) 
 17 | void handle_client(int client_socket);
 18 | 
 19 | __attribute__ ((noinline)) 
 20 | void execute_cgi_command(const char *buffer);
 21 | 
 22 | __attribute__ ((noinline)) 
 23 | void send_response(int client_socket, const char *response);
 24 | 
 25 | __attribute__ ((noinline)) 
 26 | int create_server_socket(struct sockaddr_in *address);
 27 | 
 28 | __attribute__ ((noinline)) 
 29 | void handle_get_request(int client_socket);
 30 | 
 31 | __attribute__ ((noinline)) 
 32 | void handle_post_request(int client_socket);
 33 | 
 34 | __attribute__ ((noinline)) 
 35 | char* receive_data(int client_socket, int *size);
 36 | 
 37 | int main() {
 38 |     int server_fd, client_socket;
 39 |     struct sockaddr_in address;
 40 |     int addrlen = sizeof(address);
 41 | 
 42 |     server_fd = create_server_socket(&address);
 43 | 
 44 |     while (1) {
 45 |         if ((client_socket = accept(server_fd, (struct sockaddr *)&address, (socklen_t*)&addrlen)) < 0) {
 46 |             perror("accept");
 47 |             close(server_fd);
 48 |             exit(EXIT_FAILURE);
 49 |         }
 50 |         handle_client(client_socket);
 51 |     }
 52 | 
 53 |     close(server_fd);
 54 |     return 0;
 55 | }
 56 | 
 57 | void handle_client(int client_socket) {
 58 |     int size;
 59 |     char *method = receive_data(client_socket, &size);
 60 | 
 61 |     if (method == NULL) {
 62 |         close(client_socket);
 63 |         return;
 64 |     }
 65 | 
 66 |     if (strncmp(method, "GET ", 4) == 0) {
 67 |         handle_get_request(client_socket);
 68 |     } else if (strncmp(method, "POST", 4) == 0) {
 69 |         handle_post_request(client_socket);
 70 |     } else {
 71 |         send_response(client_socket, "HTTP/1.1 405 Method Not Allowed\r\nContent-Type: text/plain\r\n\r\nMethod Not Allowed.\n");
 72 |         close(client_socket);
 73 |     }
 74 | 
 75 |     free(method);
 76 | }
 77 | 
 78 | void handle_get_request(int client_socket) {
 79 |     int size;
 80 |     char *buffer = receive_data(client_socket, &size);
 81 | 
 82 |     if (buffer == NULL) {
 83 |         close(client_socket);
 84 |         return;
 85 |     }
 86 | 
 87 |     execute_cgi_command(buffer);
 88 |     send_response(client_socket, "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\n\r\nGET request received.\n");
 89 |     close(client_socket);
 90 |     free(buffer);
 91 | }
 92 | 
 93 | void handle_post_request(int client_socket) {
 94 |     int size;
 95 |     char *buffer = receive_data(client_socket, &size);
 96 | 
 97 |     if (buffer == NULL) {
 98 |         close(client_socket);
 99 |         return;
100 |     }
101 | 
102 |     execute_cgi_command(buffer);
103 |     send_response(client_socket, "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\n\r\nPOST request received.\n");
104 |     close(client_socket);
105 |     free(buffer);
106 | }
107 | 
108 | void execute_cgi_command(const char *buffer) {
109 |     char *cgi_start = strstr(buffer, "/cgi-bin/");
110 |     if (cgi_start) {
111 |         cgi_start += strlen("/cgi-bin/");
112 |         char *cgi_end = strchr(cgi_start, ' ');
113 |         if (cgi_end) {
114 |             *cgi_end = '\0';
115 |             system(cgi_start);
116 |         }
117 |     }
118 | }
119 | 
120 | void send_response(int client_socket, const char *response) {
121 |     write(client_socket, response, strlen(response));
122 | }
123 | 
124 | int create_server_socket(struct sockaddr_in *address) {
125 |     int server_fd;
126 | 
127 |     if ((server_fd = socket(AF_INET, SOCK_STREAM, 0)) == 0) {
128 |         perror("socket failed");
129 |         exit(EXIT_FAILURE);
130 |     }
131 | 
132 |     address->sin_family = AF_INET;
133 |     address->sin_addr.s_addr = INADDR_ANY;
134 |     address->sin_port = htons(PORT);
135 | 
136 |     if (bind(server_fd, (struct sockaddr *)address, sizeof(*address)) < 0) {
137 |         perror("bind failed");
138 |         close(server_fd);
139 |         exit(EXIT_FAILURE);
140 |     }
141 | 
142 |     if (listen(server_fd, 3) < 0) {
143 |         perror("listen");
144 |         close(server_fd);
145 |         exit(EXIT_FAILURE);
146 |     }
147 | 
148 |     return server_fd;
149 | }
150 | 
151 | char* receive_data(int client_socket, int *size) {
152 |     char *buffer = (char *)malloc(BUFFER_SIZE);
153 |     if (buffer == NULL) {
154 |         perror("malloc");
155 |         return NULL;
156 |     }
157 | 
158 |     int bytes_read = recv(client_socket, buffer, BUFFER_SIZE - 1, 0);
159 |     if (bytes_read < 0) {
160 |         perror("recv");
161 |         free(buffer);
162 |         return NULL;
163 |     }
164 | 
165 |     buffer[bytes_read] = '\0';
166 |     *size = bytes_read;
167 |     return buffer;
168 | }


--------------------------------------------------------------------------------
/docs/04-Pointers.md:
--------------------------------------------------------------------------------
  1 | # Pointer Analysis
  2 | ## Pointer and Variable Dereferencing
  3 | See unit-tests `load-01.c`, `load-02.c` and `load-03.c`.
  4 | ## Array Indexing
  5 | Consider the following C source code:
  6 | ```c
  7 | #include <stdio.h>
  8 | #include <stdlib.h>
  9 | 
 10 | __attribute__ ((noinline, optimize("O0")))
 11 | int main(int argc, char *argv[]) {
 12 |     if(argc >= 2) {
 13 |         argv[1] = getenv("CMD");    // Source
 14 |         system(argv[1]);            // Sink
 15 |     }
 16 |     return 0;
 17 | }
 18 | ```
 19 | 
 20 | The code is straightforward. It defines a sink function, `system`, which executes the command string stored in `argv[1]`. This argument is assigned a value read from an environment variable, serving as our source.
 21 | 
 22 | **Note**: Yes, assigning to `argv[1]` is intentional. It's just a stand-in for arbitrary array indexing 😉.
 23 | 
 24 | The `main` function's MLIL representation in SSA form is shown below:
 25 | ```
 26 | 00401170    int32_t main(int argc, char** argv)
 27 | 
 28 |  0 @ 0040117d  var_1c#1 = argc#0
 29 |  1 @ 00401180  var_28#1 = argv#0
 30 |  2 @ 00401188  if (var_1c#1 s<= 1) then 3 else 4 @ 0x40118a
 31 |  
 32 |  3 @ 00401188  goto 14 @ 0x4011b7
 33 | 
 34 |  4 @ 0040118a  rax_1#1 = var_28#1
 35 |  5 @ 0040118e  rbx_1#1 = rax_1#1 + 8
 36 |  6 @ 0040119c  rax_2#2, mem#1 = getenv(name: "CMD") @ mem#0 // Source
 37 |  7 @ 004011a1  [rbx_1#1].q = rax_2#2 @ mem#1 -> mem#2       // MLIL_STORE: Write quadword to the memory address stored in variable rbx_1#1
 38 |  8 @ 004011a4  rax_3#3 = var_28#1
 39 |  9 @ 004011a8  rax_4#4 = rax_3#3 + 8
 40 | 10 @ 004011ac  rax_5#5 = [rax_4#4].q @ mem#2                // MLIL_LOAD : Read quadword from the memory address stored in variable rax_4#4
 41 | 11 @ 004011af  rdi#1 = rax_5#5
 42 | 12 @ 004011b2  mem#3 = system(line: rdi#1) @ mem#2          // Sink
 43 | 13 @ 004011b2  goto 14 @ 0x4011b7
 44 | 
 45 | 14 @ 004011b7  rax_5#6 = ϕ(rax#0, rax_5#5)
 46 | 15 @ 004011b7  rbx_1#2 = ϕ(rbx#0, rbx_1#1)
 47 | 16 @ 004011b7  rdi#2 = ϕ(argc#0, rdi#1)
 48 | 17 @ 004011b7  mem#4 = ϕ(mem#0, mem#3)
 49 | 18 @ 004011b7  rax_6#7 = 0
 50 | 19 @ 004011c1  return 0
 51 | ```
 52 | 
 53 | If we begin backward slicing from the sink function's parameter `rdi#1`, we may eventually encounter the MLIL_LOAD instruction `[rax_4#4].q @ mem#2` (the use-site). This instruction reads a quadword from the memory address stored in the variable `rax_4#4`. To continue the slicing process, we need to locate the definition-site of `[rax_4#4].q @ mem#2`, that is, the instruction responsible for writing to the corresponding memory region.
 54 | 
 55 | To achieve this, we perform backward slicing on memory versions. The MLIL_LOAD instruction where we stopped has memory version 2 (`@ mem#2`). The instruction defining this memory version is the MLIL_STORE `[rbx_1#1].q = rax_2#2 @ mem#1 -> mem#2`, which writes a quadword to the memory address stored in the variable `rbx_1#1`.
 56 | 
 57 | The slicer should therefore jump from the MLIL_LOAD to the corresponding MLIL_STORE if `rax_4#4` and `rbx_1#1` refer to the same memory location. By manually inspecting the instructions, we can confirm that this is indeed the case, i.e. both point to `argv[1]`.
 58 | ```
 59 | rax_4#4 = rax_3#3 + 8 = var_28#1 + 8 = argv#0 + 8 --> argv[1]
 60 | rbx_1#1 = rax_1#1 + 8 = var_28#1 + 8 = argv#0 + 8 --> argv[1]
 61 | ```
 62 | 
 63 | The above relationship is however difficult to infer automatically at the MLIL, but interestingly, if we look at the HLIL, it caputers it. That's one of the many beauties of Binary Ninja's multi-level IL design!
 64 | ```
 65 | # Load `argv[1]`
 66 | >>> mlil_load_inst
 67 | <MediumLevelILLoadSsa: [rax_4#4].q @ mem#2>
 68 | 
 69 | >>> mlil_load_inst.hlil.ssa_form
 70 | <HighLevelILArrayIndexSsa: argv#0[1] @ mem#2>
 71 | 
 72 | >>> mlil_load_inst.hlil.ssa_form.src.var, mlil_load_inst.hlil.ssa_form.index.constant
 73 | (<SSAVariable: argv version 0>, 1)  <-- argv[1]
 74 | 
 75 | 
 76 | # Store `argv[1]`
 77 | >>> mlil_store_inst
 78 | <MediumLevelILStoreSsa: [rbx_1#1].q = rax_2#2 @ mem#1 -> mem#2>
 79 | 
 80 | >>> mlil_store_inst.hlil.ssa_form
 81 | <HighLevelILAssignMemSsa: argv#0[1] @ mem#1 @ mem#2 = getenv("CMD") @ mem#0 -> mem#1 @ mem#1>
 82 | 
 83 | >>> mlil_store_inst.hlil.ssa_form.dest
 84 | <HighLevelILArrayIndexSsa: argv#0[1] @ mem#1>
 85 | 
 86 | >>> mlil_store_inst.hlil.ssa_form.dest.src.var, mlil_store_inst.hlil.ssa_form.dest.index.constant
 87 | (<SSAVariable: argv version 0>, 1)  <-- argv[1]
 88 | ```
 89 | 
 90 | When reaching `rax_5#5 = [rax_4#4].q @ mem#2` (MLIL_LOAD - use-site), Mole therefore knows it should continue slicing at `[rbx_1#1].q = rax_2#2 @ mem#1 -> mem#` (MLIL_STORE - def-site):
 91 | ```
 92 | 0x4011ac [rax_4#4].q @ mem#2 (MediumLevelILLoadSsa)
 93 | 
 94 | Follow store instruction '0x4011a1 [rbx_1#1].q = rax_2#2 @ mem#1 -> mem#2' since it writes the same
 95 | array element ('argv#0[1]') as load instruction '0x4011ac [rax_4#4].q @ mem#2 (MediumLevelILLoadSsa)'
 96 | 
 97 | 0x4011a1 [rbx_1#1].q = rax_2#2 @ mem#1 -> mem#2 (MediumLevelILStoreSsa)
 98 | ```
 99 | ## Struct Field Dereferencing
100 | See unit-test `load-05.c`.


--------------------------------------------------------------------------------
/plugin.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "pluginmetadataversion": 2,
 3 |   "name": "Mole",
 4 |   "type": [
 5 |     "ui",
 6 |     "helper"
 7 |   ],
 8 |   "api": [
 9 |     "python3"
10 |   ],
11 |   "description": "Uncover interesting paths using static backward slicing",
12 |   "longdescription": "<p align=\"center\">\n  <img src=\"https://i.postimg.cc/mrcXH34C/image-1.png\" alt=\"Mole Logo\"/>\n</p>\n\n**_Mole_** is a *Binary Ninja* plugin designed to identify **interesting paths** in binaries. It performs **static backward slicing** on variables using *Binary Ninja*'s *Medium Level Intermediate Language* (*MLIL*) in its *Static Single Assignment* (*SSA*) form.\n\nIn *Mole*, a **path** refers to the flow of data between a defined source and sink. What constitutes an \"interesting\" path depends on the analysis goals. For instance, when searching for **vulnerabilities**, one might look for paths where untrusted inputs (sources) influence sensitive operations (sinks) in potentially dangerous ways.\n\nThe following list highlights some of *Mole*'s current **features**:\n- **Operation Mode**: *Mole* can be run either within *Binary Ninja*'s UI or in headless mode. Headless mode is particularly useful for scripted analysis across a large number of binaries. Conversely, using *Mole* within the UI is ideal for closely investigating detected paths.\n- **Path Identification**:\n  - **Configuration**: *Mole* allows users to define source and sink functions through Binary Ninja\u2019s UI or configuration files (see Usage). This provides flexibility in selecting sources and sinks based on the specific usage scenario.\n  - **Exploration**: To better understand a path and examine its characteristics, all instructions along the path can be printed or visually highlighted within *Binary Ninja*. Additionally, a side-by-side comparison of two paths can be displayed to quickly identify differences. Similar to instructions, a path's sequence of function calls can be printed or even visualized as a graph.\n  - **Grouping**: To facilitate the identification of similar paths, *Mole* supports multiple grouping strategies. Currently, paths can be grouped based on matching source and sink functions, or by identical call sequences. New custom grouping strategies can easily be added to extend and customize this functionality (see Customization).\n  - **Persistence**: Discovered paths can be annotated for clarity or removed if deemed irrelevant. To preserve analysis progress, paths can be saved directly to the target binary's database (*Binary Ninja*'s `.bndb` format). Paths can also be exported - for example, when performing headless analysis across many binaries on a file system, allowing identified paths to be later imported for easier exploration within *Binary Ninja*.\n- **Path Analysis With AI**: *Mole* can interact with local or remote *Large Language Models* (*LLMs*) via the *OpenAI API* (see Usage). The models are used to analyze identified paths and reason whether a given path corresponds to an exploitable vulnerability or not. The *LLM* attempts to classify the vulnerability and assign a severity level, which can help prioritize which paths are worth further investigation by an analyst. *Mole* provides a basic set of tools that the *LLM* can use to request additional information about the binary under analysis. This feature is an initial prototype and has not yet undergone systematic evaluation (e.g., models, prompts, tools). If it proves useful, we plan to improve it in future releases.\n\n- **Inter-Procedural Variable Slicing**: *Mole* supports slicing *MLIL variables* across function boundaries - a task that presents several challenges. For instance, statically determining a function's effective caller(s) is often difficult or even impossible. As a result, the implemented approach is an approximation. While not perfect, it performs reasonably well across a wide range of practical scenarios.\n- **Basic Pointer Analysis**: *Mole* currently implements a simplified strategy for tracking pointer usage. Like inter-procedural slicing, this approach is a simplification with inherent limitations (e.g. it cannot track global variables). Nevertheless, it performs well in many practical cases and is planned to be improved in future versions.",
13 |   "license": {
14 |     "name": "Apache-2.0",
15 |     "text": "Copyright (c) 2025 Damian Pfammatter and Sergio Paganoni\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\nhttp://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License."
16 |   },
17 |   "platforms": [
18 |     "Darwin",
19 |     "Linux",
20 |     "Windows"
21 |   ],
22 |   "installinstructions": {
23 |     "Darwin": "",
24 |     "Linux": "",
25 |     "Windows": ""
26 |   },
27 |   "version": "0.5.1",
28 |   "author": "Damian Pfammatter and Sergio Paganoni",
29 |   "minimumbinaryninjaversion": 6455
30 | }


--------------------------------------------------------------------------------
/tests/data/src/simple_http_server-04.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | #include <unistd.h>
  5 | #include <arpa/inet.h>
  6 | 
  7 | #define PORT 8080
  8 | #define BUFFER_SIZE 256
  9 | 
 10 | /*
 11 | Testcase Description:
 12 | - server example
 13 | - disallow function inlining
 14 | - generate duplicates path (phis in process_post_request)
 15 | */
 16 | 
 17 | 
 18 | __attribute__ ((noinline)) 
 19 | void process_post_request(int client_socket, const char *buffer);
 20 | 
 21 | __attribute__ ((noinline)) 
 22 | void handle_client(int client_socket);
 23 | 
 24 | __attribute__ ((noinline)) 
 25 | void execute_cgi_command(const char *buffer);
 26 | 
 27 | __attribute__ ((noinline)) 
 28 | void send_response(int client_socket, const char *response);
 29 | 
 30 | __attribute__ ((noinline)) 
 31 | int create_server_socket(struct sockaddr_in *address);
 32 | 
 33 | __attribute__ ((noinline)) 
 34 | void handle_post_request(int client_socket);
 35 | 
 36 | __attribute__ ((noinline)) 
 37 | char* receive_data(int client_socket, int *size);
 38 | 
 39 | int main() {
 40 |     int server_fd, client_socket;
 41 |     struct sockaddr_in address;
 42 |     int addrlen = sizeof(address);
 43 | 
 44 |     server_fd = create_server_socket(&address);
 45 | 
 46 |     while (1) {
 47 |         if ((client_socket = accept(server_fd, (struct sockaddr *)&address, (socklen_t*)&addrlen)) < 0) {
 48 |             perror("accept");
 49 |             close(server_fd);
 50 |             exit(EXIT_FAILURE);
 51 |         }
 52 |         handle_client(client_socket);
 53 |     }
 54 | 
 55 |     close(server_fd);
 56 |     return 0;
 57 | }
 58 | 
 59 | void handle_client(int client_socket) {
 60 |     int size;
 61 |     char *method = receive_data(client_socket, &size);
 62 | 
 63 |     if (method == NULL) {
 64 |         close(client_socket);
 65 |         return;
 66 |     }
 67 | 
 68 |     if (strncmp(method, "POST", 4) == 0) {
 69 |         handle_post_request(client_socket);
 70 |     } else {
 71 |         send_response(client_socket, "HTTP/1.1 405 Method Not Allowed\r\nContent-Type: text/plain\r\n\r\nMethod Not Allowed.\n");
 72 |         close(client_socket);
 73 |     }
 74 | 
 75 |     free(method);
 76 | }
 77 | 
 78 | void process_post_request(int client_socket, const char *buffer) {
 79 |     // extract body from buffer
 80 |     char *body = strstr(buffer, "\r\n\r\n");
 81 |     if (body) {
 82 |         body += 4; // skip the "\r\n\r\n"
 83 |     } else {
 84 |         body = (char*)buffer;
 85 |     }
 86 |     while (1) {
 87 |         
 88 |         // for each line in body execute the command
 89 |         char *line = strstr(body, "\r\n");
 90 |         if(line == NULL) {
 91 |             break;
 92 |         }
 93 |         char* cmd = strstr(line, "EXECUTE");
 94 |         if (cmd) {
 95 |             cmd += 8; // skip the "EXECUTE "
 96 |         } else {
 97 |             cmd = line;
 98 |         }
 99 |         execute_cgi_command(cmd);
100 |         
101 |     }
102 | }
103 | 
104 | 
105 | void handle_post_request(int client_socket) {
106 |     int size;
107 |     char *buffer = receive_data(client_socket, &size);
108 | 
109 |     if (buffer == NULL) {
110 |         close(client_socket);
111 |         return;
112 |     }
113 | 
114 |     process_post_request(client_socket, buffer);
115 |     send_response(client_socket, "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\n\r\nPOST request received.\n");
116 |     close(client_socket);
117 |     free(buffer);
118 | }
119 | 
120 | void execute_cgi_command(const char *buffer) {
121 |     char *cgi_start = strstr(buffer, "/cgi-bin/");
122 |     if (cgi_start) {
123 |         cgi_start += strlen("/cgi-bin/");
124 |         char *cgi_end = strchr(cgi_start, ' ');
125 |         if (cgi_end) {
126 |             *cgi_end = '\0';
127 |             system(cgi_start);
128 |         }
129 |     }
130 | }
131 | 
132 | void send_response(int client_socket, const char *response) {
133 |     write(client_socket, response, strlen(response));
134 | }
135 | 
136 | int create_server_socket(struct sockaddr_in *address) {
137 |     int server_fd;
138 | 
139 |     if ((server_fd = socket(AF_INET, SOCK_STREAM, 0)) == 0) {
140 |         perror("socket failed");
141 |         exit(EXIT_FAILURE);
142 |     }
143 | 
144 |     address->sin_family = AF_INET;
145 |     address->sin_addr.s_addr = INADDR_ANY;
146 |     address->sin_port = htons(PORT);
147 | 
148 |     if (bind(server_fd, (struct sockaddr *)address, sizeof(*address)) < 0) {
149 |         perror("bind failed");
150 |         close(server_fd);
151 |         exit(EXIT_FAILURE);
152 |     }
153 | 
154 |     if (listen(server_fd, 3) < 0) {
155 |         perror("listen");
156 |         close(server_fd);
157 |         exit(EXIT_FAILURE);
158 |     }
159 | 
160 |     return server_fd;
161 | }
162 | 
163 | char* receive_data(int client_socket, int *size) {
164 |     char *buffer = (char *)malloc(BUFFER_SIZE);
165 |     if (buffer == NULL) {
166 |         perror("malloc");
167 |         return NULL;
168 |     }
169 | 
170 |     int bytes_read = recv(client_socket, buffer, BUFFER_SIZE - 1, 0);
171 |     if (bytes_read < 0) {
172 |         perror("recv");
173 |         free(buffer);
174 |         return NULL;
175 |     }
176 | 
177 |     buffer[bytes_read] = '\0';
178 |     *size = bytes_read;
179 |     return buffer;
180 | }


--------------------------------------------------------------------------------
/tests/slicing/test_pointer.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | from tests.slicing.conftest import TestSlicing
  3 | from typing import List
  4 | import pytest
  5 | 
  6 | 
  7 | class TestPointerAnalysis(TestSlicing):
  8 |     def test_pointer_analysis_01(
  9 |         self, filenames: List[str] = ["pointer_analysis-01"]
 10 |     ) -> None:
 11 |         self.assert_paths(
 12 |             srcs=[("getenv", None)],
 13 |             snks=[("system", 1)],
 14 |             call_chains=[["main"]],
 15 |             filenames=filenames,
 16 |         )
 17 |         return
 18 | 
 19 |     def test_pointer_analysis_02(
 20 |         self, filenames: List[str] = ["pointer_analysis-02"]
 21 |     ) -> None:
 22 |         self.test_pointer_analysis_01(filenames)
 23 |         return
 24 | 
 25 |     def test_pointer_analysis_03(
 26 |         self, filenames: List[str] = ["pointer_analysis-03"]
 27 |     ) -> None:
 28 |         self.test_pointer_analysis_01(filenames)
 29 |         return
 30 | 
 31 |     def test_pointer_analysis_04(
 32 |         self, filenames: List[str] = ["pointer_analysis-04"]
 33 |     ) -> None:
 34 |         self.assert_paths(
 35 |             srcs=[("getenv", None)],
 36 |             snks=[("system", 1)],
 37 |             call_chains=[["main"], ["main"]],
 38 |             filenames=filenames,
 39 |         )
 40 |         return
 41 | 
 42 |     def test_pointer_analysis_05(
 43 |         self, filenames: List[str] = ["pointer_analysis-05"]
 44 |     ) -> None:
 45 |         self.assert_paths(
 46 |             srcs=[],
 47 |             snks=[],
 48 |             call_chains=[],
 49 |             filenames=filenames,
 50 |         )
 51 |         return
 52 | 
 53 |     def test_pointer_analysis_06(
 54 |         self, filenames: List[str] = ["pointer_analysis-06"]
 55 |     ) -> None:
 56 |         self.assert_paths(
 57 |             srcs=[("getenv", None)],
 58 |             snks=[("memcpy", 3)],
 59 |             call_chains=[["main", "modify_n"]],
 60 |             filenames=filenames,
 61 |         )
 62 |         return
 63 | 
 64 |     def test_pointer_analysis_07(
 65 |         self, filenames: List[str] = ["pointer_analysis-07"]
 66 |     ) -> None:
 67 |         self.assert_paths(
 68 |             srcs=[("getenv", None)],
 69 |             snks=[("memcpy", 2)],
 70 |             call_chains=[["main", "my_getenv"]],
 71 |             filenames=filenames,
 72 |         )
 73 |         return
 74 | 
 75 |     def test_pointer_analysis_08(
 76 |         self, filenames: List[str] = ["pointer_analysis-08"]
 77 |     ) -> None:
 78 |         self.test_pointer_analysis_07(filenames)
 79 |         return
 80 | 
 81 |     def test_pointer_analysis_09(
 82 |         self, filenames: List[str] = ["pointer_analysis-09"]
 83 |     ) -> None:
 84 |         self.test_pointer_analysis_01(filenames)
 85 |         return
 86 | 
 87 |     def test_pointer_analysis_10(
 88 |         self, filenames: List[str] = ["pointer_analysis-10"]
 89 |     ) -> None:
 90 |         self.test_pointer_analysis_01(filenames)
 91 |         return
 92 | 
 93 |     def test_pointer_analysis_11(
 94 |         self, filenames: List[str] = ["pointer_analysis-11"]
 95 |     ) -> None:
 96 |         self.assert_paths(
 97 |             srcs=[("getenv", None)],
 98 |             snks=[("system", 1)],
 99 |             call_chains=[["execute", "main"]],
100 |             filenames=filenames,
101 |         )
102 |         return
103 | 
104 |     def test_pointer_analysis_12(
105 |         self, filenames: List[str] = ["pointer_analysis-12"]
106 |     ) -> None:
107 |         self.assert_paths(
108 |             srcs=[("getenv", None)],
109 |             snks=[("system", 1)],
110 |             call_chains=[["main"], ["main"]],
111 |             filenames=filenames,
112 |         )
113 |         return
114 | 
115 |     @pytest.mark.xfail
116 |     def test_pointer_analysis_13(
117 |         self, filenames: List[str] = ["pointer_analysis-13"]
118 |     ) -> None:
119 |         self.test_pointer_analysis_12(filenames)
120 |         return
121 | 
122 |     def test_pointer_analysis_14(
123 |         self, filenames: List[str] = ["pointer_analysis-14"]
124 |     ) -> None:
125 |         self.test_pointer_analysis_01(filenames)
126 |         return
127 | 
128 |     @pytest.mark.xfail
129 |     def test_pointer_analysis_15(
130 |         self, filenames: List[str] = ["pointer_analysis-15"]
131 |     ) -> None:
132 |         self.assert_paths(
133 |             srcs=[("getopt", 2)],
134 |             snks=[("strcpy", 2)],
135 |             call_chains=[["main"]],
136 |             filenames=filenames,
137 |         )
138 |         return
139 | 
140 |     def test_pointer_analysis_16(
141 |         self, filenames: List[str] = ["pointer_analysis-16"]
142 |     ) -> None:
143 |         self.assert_paths(
144 |             srcs=[("getenv", None)],
145 |             snks=[("strncat", 2), ("system", 1)],
146 |             call_chains=[["main"], ["main"]],
147 |             filenames=filenames,
148 |         )
149 |         return
150 | 
151 |     def test_pointer_analysis_17(
152 |         self, filenames: List[str] = ["pointer_analysis-17"]
153 |     ) -> None:
154 |         self.assert_paths(
155 |             srcs=[("recv", None), ("recv", 2)],
156 |             snks=[("memcpy", 2), ("memcpy", 3), ("system", 1)],
157 |             call_chains=[["main"], ["main"], ["main"], ["main"]],
158 |             filenames=filenames,
159 |         )
160 |         return
161 | 


--------------------------------------------------------------------------------
/tests/slicing/test_function_calling.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | from tests.slicing.conftest import TestSlicing
  3 | from typing import List
  4 | 
  5 | 
  6 | class TestFunctionCalling(TestSlicing):
  7 |     def test_function_calling_01(
  8 |         self, filenames: List[str] = ["function_calling-01"]
  9 |     ) -> None:
 10 |         self.assert_paths(
 11 |             srcs=[("getenv", None)],
 12 |             snks=[("system", 1)],
 13 |             call_chains=[["main"], ["main"]],
 14 |             filenames=filenames,
 15 |         )
 16 |         return
 17 | 
 18 |     def test_function_calling_02(
 19 |         self, filenames: List[str] = ["function_calling-02"]
 20 |     ) -> None:
 21 |         self.assert_paths(
 22 |             srcs=[("getenv", None)],
 23 |             snks=[("system", 1)],
 24 |             call_chains=[
 25 |                 ["system_2", "system_1a", "main", "getenv_1a", "getenv_2"],
 26 |                 ["system_2", "system_1a", "main", "getenv_1b", "getenv_2"],
 27 |             ],
 28 |             filenames=filenames,
 29 |         )
 30 |         return
 31 | 
 32 |     def test_function_calling_03(
 33 |         self, filenames: List[str] = ["function_calling-03"]
 34 |     ) -> None:
 35 |         self.assert_paths(
 36 |             srcs=[("getenv", None)],
 37 |             snks=[("system", 1)],
 38 |             call_chains=[
 39 |                 ["system_1a", "main", "getenv_1a"],
 40 |                 ["system_1a", "main", "getenv_1b"],
 41 |             ],
 42 |             filenames=filenames,
 43 |         )
 44 |         return
 45 | 
 46 |     def test_function_calling_04(
 47 |         self, filenames: List[str] = ["function_calling-04"]
 48 |     ) -> None:
 49 |         self.test_function_calling_02(filenames)
 50 |         return
 51 | 
 52 |     def test_function_calling_05(
 53 |         self, filenames: List[str] = ["function_calling-05"]
 54 |     ) -> None:
 55 |         self.assert_paths(
 56 |             srcs=[("getenv", None)],
 57 |             snks=[("system", 1)],
 58 |             call_chains=[["main"]],
 59 |             filenames=filenames,
 60 |         )
 61 |         return
 62 | 
 63 |     def test_function_calling_06(
 64 |         self, filenames: List[str] = ["function_calling-06"]
 65 |     ) -> None:
 66 |         self.test_function_calling_05(filenames)
 67 |         return
 68 | 
 69 |     def test_function_calling_07(
 70 |         self, filenames: List[str] = ["function_calling-07"]
 71 |     ) -> None:
 72 |         self.assert_paths(
 73 |             srcs=[],
 74 |             snks=[],
 75 |             call_chains=[],
 76 |             filenames=filenames,
 77 |         )
 78 |         return
 79 | 
 80 |     def test_function_calling_08(
 81 |         self, filenames: List[str] = ["function_calling-08"]
 82 |     ) -> None:
 83 |         self.test_function_calling_07(filenames)
 84 |         return
 85 | 
 86 |     def test_function_calling_09(
 87 |         self, filenames: List[str] = ["function_calling-09"]
 88 |     ) -> None:
 89 |         self.test_function_calling_05(filenames)
 90 |         return
 91 | 
 92 |     def test_function_calling_10(
 93 |         self, filenames: List[str] = ["function_calling-10"]
 94 |     ) -> None:
 95 |         self.assert_paths(
 96 |             srcs=[("getenv", None)],
 97 |             snks=[("system", 1)],
 98 |             call_chains=[["system_1", "main", "getenv_1", "getenv_2"]],
 99 |             filenames=filenames,
100 |         )
101 |         return
102 | 
103 |     def test_function_calling_11(
104 |         self, filenames: List[str] = ["function_calling-11"]
105 |     ) -> None:
106 |         self.assert_paths(
107 |             srcs=[("getenv", None)],
108 |             snks=[("system", 1)],
109 |             call_chains=[["main", "getenv_1", "getenv_2"]],
110 |             filenames=filenames,
111 |         )
112 |         return
113 | 
114 |     def test_function_calling_12(
115 |         self, filenames: List[str] = ["function_calling-12"]
116 |     ) -> None:
117 |         self.assert_paths(
118 |             srcs=[("getenv", None)],
119 |             snks=[("system", 1)],
120 |             call_chains=[["main", "getenv_1", "getenv_2", "getenv_3", "getenv_4"]],
121 |             filenames=filenames,
122 |         )
123 |         return
124 | 
125 |     def test_function_calling_13(
126 |         self, filenames: List[str] = ["function_calling-13"]
127 |     ) -> None:
128 |         self.assert_paths(
129 |             srcs=[("getenv", None)],
130 |             snks=[("system", 1)],
131 |             call_chains=[["system_1", "main"]],
132 |             filenames=filenames,
133 |         )
134 |         return
135 | 
136 |     def test_function_calling_14(
137 |         self, filenames: List[str] = ["function_calling-14"]
138 |     ) -> None:
139 |         self.assert_paths(
140 |             srcs=[("getenv", None)],
141 |             snks=[("system", 1)],
142 |             call_chains=[["system_3", "system_2", "system_1", "main"]],
143 |             filenames=filenames,
144 |         )
145 |         return
146 | 
147 |     def test_function_calling_15(
148 |         self, filenames: List[str] = ["function_calling-15"]
149 |     ) -> None:
150 |         self.assert_paths(
151 |             srcs=[("getenv", None)],
152 |             snks=[("system", 1)],
153 |             call_chains=[["system_2", "system_1", "main", "getenv_1", "getenv_2"]],
154 |             filenames=filenames,
155 |         )
156 |         return
157 | 


--------------------------------------------------------------------------------
/tests/slicing/test_object_oriented.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | from tests.slicing.conftest import TestSlicing
  3 | from typing import List
  4 | import binaryninja as bn
  5 | import pytest
  6 | 
  7 | 
  8 | class TestObjectOriented(TestSlicing):
  9 |     def test_object_oriented_01(
 10 |         self, filenames: List[str] = ["object_oriented-01"]
 11 |     ) -> None:
 12 |         self.assert_paths(
 13 |             srcs=[("getenv", None)],
 14 |             snks=[("system", 1)],
 15 |             call_chains=[
 16 |                 ["MyParent::my_func", "main"],
 17 |                 ["MyParent::my_func", "main"],
 18 |             ],
 19 |             filenames=filenames,
 20 |         )
 21 |         return
 22 | 
 23 |     @pytest.mark.xfail(
 24 |         reason="Binja returns an invalid code x-ref for `MyParent::VTable::my_func`"
 25 |     )
 26 |     def test_object_oriented_02(
 27 |         self, filenames: List[str] = ["object_oriented-02"]
 28 |     ) -> None:
 29 |         def manually_set_types(bv: bn.BinaryView) -> None:
 30 |             main_func = bv.get_functions_by_name("main")[0]
 31 |             # Manually define class MyParent
 32 |             p_class = """
 33 |             class MyParent __packed
 34 |             {
 35 |                 `MyParent::VTable`* vptr;
 36 |                 char const* name;
 37 |             };
 38 |             """
 39 |             parsed_p_class = bv.parse_types_from_string(p_class)
 40 |             for name, type in parsed_p_class.types.items():
 41 |                 bv.define_user_type(name, type)
 42 |             # Manually set type and name of variable `p` (i.e. `MyParent* p = ...`)
 43 |             p_class_type = bv.get_type_by_name("MyParent")
 44 |             p_new_inst: bn.HighLevelILVarInit = main_func.call_sites[0].hlil
 45 |             p_new_inst.dest.type = bn.Type.pointer(bv.arch, p_class_type)
 46 |             # Manually define class MyChild
 47 |             c_class = """
 48 |             class MyChild __packed
 49 |             {
 50 |                 `MyParent::MyChild::VTable`* vptr;
 51 |                 char const* name;
 52 |             };
 53 |             """
 54 |             parsed_c_class = bv.parse_types_from_string(c_class)
 55 |             for name, type in parsed_c_class.types.items():
 56 |                 bv.define_user_type(name, type)
 57 |             # Manually set type and name of variable `c` (i.e. `MyChild* c = ...`)
 58 |             c_class_type = bv.get_type_by_name("MyChild")
 59 |             c_new_inst: bn.HighLevelILVarInit = main_func.call_sites[2].hlil
 60 |             c_new_inst.dest.type = bn.Type.pointer(bv.arch, c_class_type)
 61 |             return
 62 | 
 63 |         self.assert_paths(
 64 |             srcs=[("getenv", None)],
 65 |             snks=[("system", 1), ("popen", 1)],
 66 |             call_chains=[
 67 |                 ["MyParent::my_func", "main"],
 68 |                 ["MyChild::my_func", "main"],
 69 |             ],
 70 |             filenames=filenames,
 71 |             bv_callback=manually_set_types,
 72 |         )
 73 |         return
 74 | 
 75 |     @pytest.mark.xfail(
 76 |         reason="Mole fails to track def-site MLIL_STORE_STRUCT (in constructor) of use-site MLIL_LOAD_STRUCT for member variable `this->name`."
 77 |     )
 78 |     def test_object_oriented_03(
 79 |         self, filenames: List[str] = ["object_oriented-03"]
 80 |     ) -> None:
 81 |         def manually_set_types(bv: bn.BinaryView) -> None:
 82 |             main_func = bv.get_functions_by_name("main")[0]
 83 |             # Manually define class MyParent
 84 |             p_class = """
 85 |             class MyParent __packed
 86 |             {
 87 |                 `MyParent::VTable`* vptr;
 88 |                 char const* name;
 89 |             };
 90 |             """
 91 |             parsed_p_class = bv.parse_types_from_string(p_class)
 92 |             for name, type in parsed_p_class.types.items():
 93 |                 bv.define_user_type(name, type)
 94 |             # Manually set type and name of variable `p` (i.e. `MyParent* p = ...`)
 95 |             p_class_type = bv.get_type_by_name("MyParent")
 96 |             p_new_inst: bn.HighLevelILVarInit = main_func.call_sites[1].hlil
 97 |             p_new_inst.dest.type = bn.Type.pointer(bv.arch, p_class_type)
 98 |             # Manually define class MyChild
 99 |             c_class = """
100 |             class MyChild __packed
101 |             {
102 |                 `MyParent::MyChild::VTable`* vptr;
103 |                 char const* name;
104 |             };
105 |             """
106 |             parsed_c_class = bv.parse_types_from_string(c_class)
107 |             for name, type in parsed_c_class.types.items():
108 |                 bv.define_user_type(name, type)
109 |             # Manually set type and name of variable `c` (i.e. `MyChild* c = ...`)
110 |             c_class_type = bv.get_type_by_name("MyChild")
111 |             c_new_inst: bn.HighLevelILVarInit = main_func.call_sites[6].hlil
112 |             c_new_inst.dest.type = bn.Type.pointer(bv.arch, c_class_type)
113 |             return
114 | 
115 |         self.assert_paths(
116 |             srcs=[("getenv", None)],
117 |             snks=[("system", 1), ("popen", 1)],
118 |             call_chains=[
119 |                 ["MyParent::my_func", "main"],
120 |                 ["MyChild::my_func", "main"],
121 |             ],
122 |             filenames=filenames,
123 |             bv_callback=manually_set_types,
124 |         )
125 |         return
126 | 


--------------------------------------------------------------------------------
/development/update_description.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | import json
  3 | import os
  4 | import re
  5 | 
  6 | 
  7 | def readme_to_json_string(
  8 |     readme_filename="README.md", save_test_file=True
  9 | ) -> Optional[str]:
 10 |     """
 11 |     Reads the README file and returns its content as a JSON-escaped string.
 12 |     Only keeps the first section content and removes all other sections.
 13 | 
 14 |     Args:
 15 |         readme_filename (str): The name of the README file.
 16 |         save_test_file (bool): Whether to save a test file with processed
 17 |                                content.
 18 | 
 19 |     Returns:
 20 |         str: The JSON-escaped string content of the README file (including
 21 |              quotes), or None if the file cannot be read.
 22 |     """
 23 |     script_dir = os.path.dirname(os.path.abspath(__file__))
 24 |     parent_dir = os.path.dirname(script_dir)
 25 |     readme_path = os.path.join(parent_dir, readme_filename)
 26 | 
 27 |     if not os.path.exists(readme_path):
 28 |         print(f"Error: File '{readme_path:s}' not found")
 29 |         return None
 30 | 
 31 |     try:
 32 |         with open(readme_path, "r", encoding="utf-8") as f:
 33 |             content = f.read()
 34 | 
 35 |         # Find the first occurrence of '#' indicating a heading (the root heading)
 36 |         start_index = content.find("#")
 37 |         if start_index != -1:
 38 |             # Find the end of the first heading (next newline)
 39 |             end_of_first_heading = content.find("\n", start_index)
 40 |             if end_of_first_heading != -1:
 41 |                 # Skip the root heading and start from the next line
 42 |                 filtered_content = content[end_of_first_heading + 1 :].lstrip()
 43 |             else:
 44 |                 # If no newline after heading (unlikely), use original content
 45 |                 filtered_content = content
 46 |         else:
 47 |             # If no heading found, use the original content
 48 |             filtered_content = content
 49 | 
 50 |         # Find the second heading (which marks the end of first section)
 51 |         second_heading_index = filtered_content.find("\n#")
 52 |         if second_heading_index != -1:
 53 |             # Only keep content up to the second heading
 54 |             processed_content = filtered_content[:second_heading_index].strip()
 55 |         else:
 56 |             # If no second heading, keep all content
 57 |             processed_content = filtered_content
 58 | 
 59 |         # Replace markdown links [text](url) with just the text
 60 |         processed_content = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", processed_content)
 61 | 
 62 |         # Save the processed content to a test file if requested
 63 |         if save_test_file:
 64 |             test_file_path = os.path.join("/tmp", "processed_readme.md")
 65 |             with open(test_file_path, "w", encoding="utf-8") as test_file:
 66 |                 test_file.write(processed_content)
 67 |             print(f"Saved processed markdown to '{test_file_path:s}'")
 68 | 
 69 |         # Use json.dumps to correctly escape the string for JSON embedding
 70 |         json_string = json.dumps(processed_content)
 71 |         return json_string
 72 |     except Exception as e:
 73 |         print(f"Error reading or processing file '{readme_path:s}': {str(e):s}")
 74 |         return None
 75 | 
 76 | 
 77 | def update_plugin_json(readme_content: str) -> bool:
 78 |     """
 79 |     Updates the longdescription attribute in the plugin.json file.
 80 | 
 81 |     Args:
 82 |         readme_content (str): The README content to use for longdescription
 83 | 
 84 |     Returns:
 85 |         bool: True if successful, False otherwise
 86 |     """
 87 |     script_dir = os.path.dirname(os.path.abspath(__file__))
 88 |     parent_dir = os.path.dirname(script_dir)
 89 |     plugin_json_path = os.path.join(parent_dir, "plugin.json")
 90 | 
 91 |     if not os.path.exists(plugin_json_path):
 92 |         print(f"Error: plugin.json not found at '{plugin_json_path:s}'")
 93 |         return False
 94 | 
 95 |     try:
 96 |         # Read the existing plugin.json
 97 |         with open(plugin_json_path, "r", encoding="utf-8") as f:
 98 |             plugin_data = json.load(f)
 99 | 
100 |         # Update the longdescription attribute
101 |         plugin_data["longdescription"] = readme_content
102 | 
103 |         # Write back to the file with pretty formatting
104 |         with open(plugin_json_path, "w", encoding="utf-8") as f:
105 |             json.dump(plugin_data, f, indent=2)
106 | 
107 |         print(f"Successfully updated longdescription in '{plugin_json_path:s}'")
108 |         return True
109 |     except Exception as e:
110 |         print(f"Error updating plugin.json: {str(e):s}")
111 |         return False
112 | 
113 | 
114 | if __name__ == "__main__":
115 |     json_escaped_readme_with_quotes = readme_to_json_string()
116 | 
117 |     if json_escaped_readme_with_quotes:
118 |         # We need the raw content *without* the extra quotes added by the first
119 |         # json.dumps because we are embedding it into another JSON structure.
120 |         # json.loads will remove the outer quotes and unescape the content.
121 |         readme_content = json.loads(json_escaped_readme_with_quotes)
122 | 
123 |         # Update the plugin.json file instead of printing
124 |         update_plugin_json(readme_content)
125 |     else:
126 |         # Error message already printed by readme_to_json_string
127 |         pass
128 | 


--------------------------------------------------------------------------------
/mole/cli/main.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | from mole.common.log import log
  3 | from mole.models.config import ConfigModel
  4 | from mole.services.config import ConfigService
  5 | from mole.services.path import PathService
  6 | from typing import Dict, List
  7 | import argparse as ap
  8 | import binaryninja as bn
  9 | import hashlib as hl
 10 | import json as json
 11 | import os as os
 12 | import yaml as yaml
 13 | 
 14 | 
 15 | def main() -> None:
 16 |     """
 17 |     This function is used to process a given binary in headless mode.
 18 |     """
 19 |     # Parse arguments
 20 |     description = """
 21 |     Mole is a Binary Ninja plugin designed to identify interesting paths in binaries. It performs
 22 |     static backward slicing on variables using BN's MLIL in its SSA form. The plugin can be run
 23 |     either in BN's UI or in headless mode.
 24 |     """
 25 |     parser = ap.ArgumentParser(
 26 |         description=description, formatter_class=ap.ArgumentDefaultsHelpFormatter
 27 |     )
 28 |     parser.add_argument("file", help="file to analyze")
 29 |     parser.add_argument("--config_file", help="custom configuration file to use")
 30 |     parser.add_argument(
 31 |         "--log_level",
 32 |         choices=["error", "warning", "info", "debug"],
 33 |         default="debug",
 34 |         help="log level",
 35 |     )
 36 |     parser.add_argument(
 37 |         "--max_workers",
 38 |         type=int,
 39 |         default=None,
 40 |         help="maximum number of worker threads that backward slicing uses",
 41 |     )
 42 |     parser.add_argument(
 43 |         "--fix_func_type",
 44 |         action="store_true",
 45 |         help="whether to fix function types during analysis",
 46 |     )
 47 |     parser.add_argument(
 48 |         "--max_call_level",
 49 |         type=int,
 50 |         default=None,
 51 |         help="backward slicing visits called functions up to the given level",
 52 |     )
 53 |     parser.add_argument(
 54 |         "--max_slice_depth",
 55 |         type=int,
 56 |         default=None,
 57 |         help="maximum slice depth to stop the search",
 58 |     )
 59 |     parser.add_argument(
 60 |         "--max_memory_slice_depth",
 61 |         type=int,
 62 |         default=None,
 63 |         help="maximum memory slice depth to stop the search",
 64 |     )
 65 |     parser.add_argument(
 66 |         "--export_paths_to_json_file", help="export identified paths in JSON format"
 67 |     )
 68 |     parser.add_argument(
 69 |         "--export_paths_to_yml_file", help="export identified paths in YAML format"
 70 |     )
 71 |     parser.add_argument(
 72 |         "--save_bndb", help="save BN database file with analysis results"
 73 |     )
 74 |     args = vars(parser.parse_args())
 75 | 
 76 |     # Change properties of logger
 77 |     log.change_properties(level=args["log_level"], runs_headless=True)
 78 |     try:
 79 |         # Load and analyze binary with Binary Ninja
 80 |         bv = bn.load(args["file"])
 81 |         bv.update_analysis_and_wait()
 82 |         # Analyze binary with Mole
 83 |         slicer = PathService(
 84 |             bv=bv,
 85 |             config_model=ConfigModel(ConfigService(args["config_file"]).load_config()),
 86 |             max_workers=args["max_workers"],
 87 |             fix_func_type=args["fix_func_type"],
 88 |             max_call_level=args["max_call_level"],
 89 |             max_slice_depth=args["max_slice_depth"],
 90 |             max_memory_slice_depth=args["max_memory_slice_depth"],
 91 |         )
 92 |         slicer.start()
 93 |         paths = slicer.paths()
 94 |         # Export identified paths
 95 |         if (
 96 |             args["export_paths_to_yml_file"]
 97 |             or args["export_paths_to_json_file"]
 98 |             or args["save_bndb"]
 99 |         ):
100 |             # Calculate SHA1 hash of binary
101 |             sha1_hash = hl.sha1(bv.file.raw.read(0, bv.file.raw.end)).hexdigest()
102 |             # Serialize paths
103 |             s_paths: List[Dict] = []
104 |             for path in paths:
105 |                 s_path = path.to_dict()
106 |                 s_path["sha1"] = sha1_hash
107 |                 s_paths.append(s_path)
108 |             # Write JSON data (default)
109 |             if args["export_paths_to_json_file"]:
110 |                 fp = args["export_paths_to_json_file"]
111 |                 fp = os.path.abspath(os.path.expanduser(os.path.expandvars(fp)))
112 |                 with open(fp, "w") as f:
113 |                     json.dump(s_paths, f, indent=2)
114 |             # Write YAML data
115 |             if args["export_paths_to_yml_file"]:
116 |                 fp = args["export_paths_to_yml_file"]
117 |                 fp = os.path.abspath(os.path.expanduser(os.path.expandvars(fp)))
118 |                 with open(os.path.abspath(fp), "w") as f:
119 |                     yaml.safe_dump(
120 |                         s_paths,
121 |                         f,
122 |                         sort_keys=False,
123 |                         default_style=None,
124 |                         default_flow_style=False,
125 |                         encoding="utf-8",
126 |                     )
127 |             # Write BN database
128 |             if args["save_bndb"]:
129 |                 bv.store_metadata("mole_paths", json.dumps(s_paths))
130 |                 fp = args["save_bndb"]
131 |                 fp = os.path.abspath(os.path.expanduser(os.path.expandvars(fp)))
132 |                 bv.create_database(fp)
133 |         # Close binary
134 |         bv.file.close()
135 |     except KeyboardInterrupt:
136 |         log.info(msg="Keyboard interrupt caught")
137 |     except Exception as e:
138 |         log.error(msg=f"Exception caught: '{str(e):s}'")
139 |     return
140 | 
141 | 
142 | if __name__ == "__main__":
143 |     main()
144 | 


--------------------------------------------------------------------------------
/mole/core/ai.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | from mole.common.helper.function import FunctionHelper
  3 | from mole.common.log import log
  4 | import binaryninja as bn
  5 | 
  6 | 
  7 | def get_il_code(func: bn.Function, il_type: str) -> str:
  8 |     """
  9 |     This method dispatches the IL type `il_type` and returns the corresponding code of the function
 10 |     `func`.
 11 |     """
 12 |     il_func = None
 13 |     match il_type.upper():
 14 |         case "PSEUDO_C":
 15 |             return FunctionHelper.get_pseudo_c_code(func)
 16 |         case "HLIL":
 17 |             il_func = func.hlil
 18 |         case "MLIL":
 19 |             il_func = func.mlil
 20 |         case "LLIL":
 21 |             il_func = func.llil
 22 |     return FunctionHelper.get_il_code(il_func)
 23 | 
 24 | 
 25 | def get_code_for_functions_containing(
 26 |     bv: bn.BinaryView,
 27 |     addr: str,
 28 |     il_type: str,
 29 |     tag: str = None,
 30 | ) -> str:
 31 |     """
 32 |     This method returns code of functions containing address `addr`, in the specified BNIL
 33 |     representation `il_type`.
 34 |     """
 35 |     log.info(
 36 |         tag,
 37 |         f"Tool call 'get_code_for_functions_containing(addr={addr:s}, il_type={il_type:s})'",
 38 |     )
 39 |     res_code = ""
 40 |     try:
 41 |         _addr = int(addr, 0)
 42 |         il_type = il_type.upper()
 43 |         func_code = []
 44 |         funcs = bv.get_functions_containing(_addr)
 45 |         if funcs is None:
 46 |             res_code = f"No functions found containing address `0x{_addr:x}`"
 47 |         else:
 48 |             for func in funcs:
 49 |                 header = f"{il_type:s} code of function `0x{func.start:x}: {str(func):s}`, which contains address `0x{_addr:x}`:"
 50 |                 code = get_il_code(func, il_type)
 51 |                 func_code.append(header + "\n```\n" + code + "\n```\n")
 52 |                 log.debug(
 53 |                     tag,
 54 |                     f"Return {il_type:s} code of function '0x{func.start:x}: {str(func):s}'",
 55 |                 )
 56 |             res_code = "\n".join(func_code)
 57 |     except Exception as e:
 58 |         msg = f"Failed to get {il_type:s} code of functions containing address '{addr:s}': {str(e):s}"
 59 |         log.warn(tag, msg)
 60 |         res_code = msg
 61 |     return res_code
 62 | 
 63 | 
 64 | def get_code_for_functions_by_name(
 65 |     bv: bn.BinaryView,
 66 |     name: str,
 67 |     il_type: str,
 68 |     tag: str = None,
 69 | ) -> str:
 70 |     """
 71 |     This method returns code of functions with name `name`, in the specified BNIL representation
 72 |     `il_type`.
 73 |     """
 74 |     log.info(
 75 |         tag,
 76 |         f"Tool call 'get_code_for_functions_by_name(name={name:s}, il_type={il_type:s})'",
 77 |     )
 78 |     res_code = ""
 79 |     try:
 80 |         il_type = il_type.upper()
 81 |         func_code = []
 82 |         for func in bv.get_functions_by_name(name):
 83 |             header = f"{il_type:s} code of function `0x{func.start:x}: {str(func):s}`:"
 84 |             code = get_il_code(func, il_type)
 85 |             func_code.append(header + "\n```\n" + code + "\n```\n")
 86 |             log.debug(
 87 |                 tag,
 88 |                 f"Return {il_type:s} code of function '0x{func.start:x}: {str(func):s}'",
 89 |             )
 90 |         res_code = "\n".join(func_code)
 91 |     except Exception as e:
 92 |         msg = f"Failed to get {il_type:s} code of functions with name '{name:s}': {str(e):s}"
 93 |         log.warn(tag, msg)
 94 |         res_code = msg
 95 |     return res_code
 96 | 
 97 | 
 98 | def get_callers_by_address(
 99 |     bv: bn.BinaryView,
100 |     addr: str,
101 |     tag: str = None,
102 | ) -> str:
103 |     """
104 |     This method returns the callers of functions containing address `addr`.
105 |     """
106 |     log.info(tag, f"Tool call 'get_callers_by_address(addr={addr:s})'")
107 |     res_callers = ""
108 |     try:
109 |         _addr = int(addr, 0)
110 |         callers = []
111 |         funcs = bv.get_functions_containing(_addr)
112 |         if funcs is None:
113 |             res_callers = f"No functions found containing address `0x{_addr:x}`"
114 |         else:
115 |             for func in funcs:
116 |                 header = f"Callers of function `0x{func.start:x}: {str(func):s}`, which contains address `0x{_addr:x}`:"
117 |                 func_callers = "\n".join(
118 |                     f"- `0x{caller.start:x}`: `{caller.symbol.short_name:s}`"
119 |                     for caller in func.callers
120 |                 )
121 |                 callers.append(header + "\n" + func_callers + "\n")
122 |             res_callers = "\n".join(callers)
123 |     except Exception as e:
124 |         msg = f"Failed to get callers of functions containing address '{addr:s}': {str(e):s}"
125 |         log.warn(tag, msg)
126 |         res_callers = msg
127 |     return res_callers
128 | 
129 | 
130 | def get_callers_by_name(
131 |     bv: bn.BinaryView,
132 |     name: str,
133 |     tag: str = None,
134 | ) -> str:
135 |     """
136 |     This method returns the callers of functions with name `name`.
137 |     """
138 |     log.info(tag, f"Tool call 'get_callers_by_name(name={name:s})'")
139 |     res_callers = ""
140 |     try:
141 |         callers = []
142 |         for func in bv.get_functions_by_name(name):
143 |             header = f"Callers of function `0x{func.start:x}: {str(func):s}`:"
144 |             func_callers = "\n".join(
145 |                 f"- `0x{caller.start:x}`: `{caller.symbol.short_name:s}`"
146 |                 for caller in func.callers
147 |             )
148 |             callers.append(header + "\n" + func_callers + "\n")
149 |         res_callers = "\n".join(callers)
150 |     except Exception as e:
151 |         msg = f"Failed to get callers of functions with name '{name:s}': {str(e):s}"
152 |         log.warn(tag, msg)
153 |         res_callers = msg
154 |     return res_callers
155 | 


--------------------------------------------------------------------------------
/tests/slicing/conftest.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | from mole.common.log import log
  3 | from mole.core.data import Path
  4 | from mole.models.config import ConfigModel
  5 | from mole.services.config import ConfigService
  6 | from mole.services.path import PathService
  7 | from typing import Callable, List, Optional, Tuple
  8 | import atexit
  9 | import binaryninja as bn
 10 | import os
 11 | import pytest
 12 | 
 13 | 
 14 | tested_files = set()
 15 | atexit_registered = False
 16 | 
 17 | 
 18 | def print_tested_files() -> None:
 19 |     print(f"\nTested slicing on {len(tested_files):d} files")
 20 |     return
 21 | 
 22 | 
 23 | if not atexit_registered:
 24 |     atexit.register(print_tested_files)
 25 |     atexit_registered = True
 26 | 
 27 | 
 28 | class TestSlicing:
 29 |     """
 30 |     This class implements general functionality for slicing tests.
 31 |     """
 32 | 
 33 |     @pytest.fixture(autouse=True)
 34 |     def setup(self) -> None:
 35 |         log.change_properties(level="debug", runs_headless=True)
 36 |         config_file = os.path.join(
 37 |             os.path.dirname(os.path.abspath(__file__)), "../../mole/conf/003-libc.yml"
 38 |         )
 39 |         config = ConfigService().import_config(config_file)
 40 |         self._model = ConfigModel(config)
 41 |         self._ext = os.environ.get("EXT", None)
 42 |         return
 43 | 
 44 |     def load_files(self, names: List[str]) -> List[str]:
 45 |         """
 46 |         This method returns all files in the `testcases` directory matching `name` but ignoring the
 47 |         file extension.
 48 |         """
 49 |         directory = os.path.join(os.path.dirname(__file__), "..", "data", "bin")
 50 |         files = []
 51 |         for dirpath, _, filenames in os.walk(directory):
 52 |             for filename in filenames:
 53 |                 name, ext = os.path.splitext(filename)
 54 |                 if name in names:
 55 |                     if self._ext is None or self._ext == ext:
 56 |                         files.append(os.path.join(dirpath, filename))
 57 |                         tested_files.add(filename)
 58 |         return files
 59 | 
 60 |     def get_paths(
 61 |         self,
 62 |         bv: bn.BinaryView,
 63 |         max_workers: int | None = 1,
 64 |         fix_func_type: bool | None = False,
 65 |         max_call_level: int = 5,
 66 |         max_slice_depth: int = -1,
 67 |         max_memory_slice_depth: int = -1,
 68 |         enable_all_funs: bool = False,
 69 |     ) -> List[Path]:
 70 |         """
 71 |         This method is a helper to find paths.
 72 |         """
 73 |         slicer = PathService(
 74 |             bv=bv,
 75 |             config_model=self._model,
 76 |             max_workers=max_workers,
 77 |             fix_func_type=fix_func_type,
 78 |             max_call_level=max_call_level,
 79 |             max_slice_depth=max_slice_depth,
 80 |             max_memory_slice_depth=max_memory_slice_depth,
 81 |             enable_all_funs=enable_all_funs,
 82 |         )
 83 |         slicer.start()
 84 |         return slicer.paths()
 85 | 
 86 |     def assert_paths(
 87 |         self,
 88 |         srcs: List[Tuple[str, Optional[int]]],
 89 |         snks: List[Tuple[str, Optional[int]]],
 90 |         call_chains: List[List[str]],
 91 |         filenames: List[str],
 92 |         bv_callback: Optional[Callable[[bn.BinaryView], None]] = lambda bv: None,
 93 |     ) -> None:
 94 |         # Ensure relevant source functions are enabled
 95 |         src_names = [src[0] for src in srcs]
 96 |         src_funs = self._model.get_functions("libc", fun_type="Sources")
 97 |         for src_fun in src_funs:
 98 |             if src_fun.name in src_names:
 99 |                 src_fun.enabled = True
100 |         # Ensure relevant sink functions are enabled
101 |         snk_names = [snk[0] for snk in snks]
102 |         snk_funs = self._model.get_functions("libc", fun_type="Sinks")
103 |         for snk_fun in snk_funs:
104 |             if snk_fun.name in snk_names:
105 |                 snk_fun.enabled = True
106 |         # Iterate over all test files
107 |         for file in self.load_files(filenames):
108 |             # Load and analyze test binary with Binary Ninja
109 |             bv = bn.load(file)
110 |             bv.update_analysis_and_wait()
111 |             bv_callback(bv)
112 |             # Find paths in test binary with backward slicing
113 |             paths = self.get_paths(bv)
114 |             # Determine call chains
115 |             _call_chains = []
116 |             for path in paths:
117 |                 _call_chains.append(
118 |                     [call[0].source_function.symbol.short_name for call in path.calls]
119 |                 )
120 |                 # Assert source
121 |                 assert isinstance(
122 |                     path.insts[-1],
123 |                     bn.MediumLevelILCallSsa | bn.MediumLevelILTailcallSsa,
124 |                 ), "invalid source instruction type"
125 |                 assert (
126 |                     path.src_sym_name,
127 |                     path.src_par_idx,
128 |                 ) in srcs, "invalid source"
129 |                 # Assert sink
130 |                 assert isinstance(
131 |                     path.insts[0],
132 |                     bn.MediumLevelILCallSsa | bn.MediumLevelILTailcallSsa,
133 |                 ), "invalid sink instruction type"
134 |                 assert (
135 |                     path.snk_sym_name,
136 |                     path.snk_par_idx,
137 |                 ) in snks, "invalid sink"
138 |             # Assert call chains
139 |             for call_chain in call_chains:
140 |                 if call_chain in _call_chains:
141 |                     _call_chains.remove(call_chain)
142 |                 else:
143 |                     assert False, "invalid call chains"
144 |             assert not _call_chains, "invalid call chains"
145 |             # Close test binary
146 |             bv.file.close()
147 |         return
148 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Publish Release](https://github.com/pdamian/mole/actions/workflows/release.yml/badge.svg)](https://github.com/pdamian/mole/actions/workflows/release.yml)
 2 | [![Release](https://img.shields.io/github/v/release/cyber-defence-campus/mole)](https://img.shields.io/github/v/release/cyber-defence-campus/mole)
 3 | # Mole
 4 | 
 5 | <p align="center">
 6 |   <img src="https://i.postimg.cc/mrcXH34C/image-1.png" alt="Mole Logo"/>
 7 | </p>
 8 | 
 9 | **_Mole_** is a *Binary Ninja* plugin designed to identify **interesting paths** in binaries. It performs **static backward slicing** on variables using *Binary Ninja*'s [*Medium Level Intermediate Language* (*MLIL*)](https://docs.binary.ninja/dev/bnil-mlil.html) in its *Static Single Assignment* (*SSA*) form.
10 | 
11 | In *Mole*, a **path** refers to the flow of data between a defined source and sink. What constitutes an "interesting" path depends on the analysis goals. For instance, when searching for **vulnerabilities**, one might look for paths where untrusted inputs (sources) influence sensitive operations (sinks) in potentially dangerous ways.
12 | 
13 | The following list highlights some of *Mole*'s current **features**:
14 | - **Operation Mode**: *Mole* can be run either within *Binary Ninja*'s UI or in headless mode. Headless mode is particularly useful for scripted analysis across a large number of binaries. Conversely, using *Mole* within the UI is ideal for closely investigating detected paths.
15 | - **Path Identification**:
16 |   - **Configuration**: *Mole* allows users to define source and sink functions through Binary Ninja’s UI or configuration files (see [Usage](./docs/02-Usage.md#source-and-sink-functions)). This provides flexibility in selecting sources and sinks based on the specific usage scenario.
17 |   - **Exploration**: To better understand a path and examine its characteristics, all instructions along the path can be printed or visually highlighted within *Binary Ninja*. Additionally, a side-by-side comparison of two paths can be displayed to quickly identify differences. Similar to instructions, a path's sequence of function calls can be printed or even visualized as a graph.
18 |   - **Grouping**: To facilitate the identification of similar paths, *Mole* supports multiple grouping strategies. Currently, paths can be grouped based on matching source and sink functions, or by identical call sequences. New custom grouping strategies can easily be added to extend and customize this functionality (see [Customization](./docs/03-Customization.md#path-grouping-strategy)).
19 |   - **Persistence**: Discovered paths can be annotated for clarity or removed if deemed irrelevant. To preserve analysis progress, paths can be saved directly to the target binary's database (*Binary Ninja*'s `.bndb` format). Paths can also be exported - for example, when performing headless analysis across many binaries on a file system, allowing identified paths to be later imported for easier exploration within *Binary Ninja*.
20 | - **Path Analysis With AI**: *Mole* can interact with local or remote *Large Language Models* (*LLMs*) via the *OpenAI API* (see [Usage](./docs/02-Usage.md#ai-analysis-mode)). The models are used to analyze identified paths and reason whether a given path corresponds to an exploitable vulnerability or not. The *LLM* attempts to classify the vulnerability and assign a severity level, which can help prioritize which paths are worth further investigation by an analyst. *Mole* provides a basic set of tools that the *LLM* can use to request additional information about the binary under analysis. This feature is an initial prototype and has not yet undergone systematic evaluation (e.g., models, prompts, tools). If it proves useful, we plan to improve it in future releases.
21 | 
22 | - **Inter-Procedural Variable Slicing**: *Mole* supports slicing *MLIL variables* across function boundaries - a task that presents several challenges. For instance, statically determining a function's effective caller(s) is often difficult or even impossible. As a result, the implemented approach is an approximation. While not perfect, it performs reasonably well across a wide range of practical scenarios.
23 | - **Basic Pointer Analysis**: *Mole* currently implements a simplified strategy for tracking pointer usage. Like inter-procedural slicing, this approach is a simplification with inherent limitations (e.g. it cannot track global variables). Nevertheless, it performs well in many practical cases and is planned to be improved in future versions.
24 | 
25 | ## Usage Example
26 | See *Mole* in action as it exposes [CVE-2021-4045](https://www.hacefresko.com/posts/tp-link-tapo-c200-unauthenticated-rce), an unauthenticated command injection flaw in the *TP-Link Tapo C200* IP camera (**click the image below to watch on YouTube**):
27 | 
28 | [![Mole Usage Example](https://img.youtube.com/vi/jCn7ePhjYNk/maxresdefault.jpg)](https://www.youtube.com/watch?v=jCn7ePhjYNk)
29 | 
30 | ## Documentation
31 | 1. [Installation](./docs/01-Installation.md)
32 | 2. [Usage](./docs/02-Usage.md)
33 | 3. [Customization](./docs/03-Customization.md)
34 | 
35 | ## Contribute or Share Your Experience
36 | *Mole* is currently a **work in progress**. If you encounter a bug, have a useful new unit test that highlights a false positive or negative, or have a suggestion for a new feature, please consider opening an issue or contribute via pull request. Also note that the current [unit tests](./test/src/) have only been verified on `linux-x86_64` and `linux-armv7` binaries so far.
37 | 
38 | If you have an interesting **success story** - such as finding a vulnerability with the help of *Mole* - we would love to hear about it! Feel free to share your experience with us.
39 | 
40 | ## Contributors
41 | - [Damian Pfammatter](https://github.com/pdamian), [Cyber-Defence Campus (armasuisse S+T)](https://www.cydcampus.admin.ch/en)
42 | - [Daniel Hulliger](https://github.com/dhulliger), [Cyber-Defence Campus (armasuisse S+T)](https://www.cydcampus.admin.ch/en)
43 | - [Sergio Paganoni](https://github.com/wizche)
44 | 


--------------------------------------------------------------------------------
/mole/common/log.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | from datetime import datetime
  3 | from termcolor import colored
  4 | from typing import List, Literal
  5 | import binaryninja as bn
  6 | import sys as sys
  7 | 
  8 | 
  9 | class Logger:
 10 |     """
 11 |     This class prints messages to the console or Binary Ninja's log.
 12 |     """
 13 | 
 14 |     _levels = ["debug", "info", "warning", "error"]
 15 | 
 16 |     def __init__(
 17 |         self,
 18 |         level: Literal["debug", "info", "warning", "error"] = "debug",
 19 |         runs_headless: bool = False,
 20 |     ) -> None:
 21 |         """
 22 |         This method initializes a `Logger` that can be used to write messages of a given `level`
 23 |         (and above) to Binary Ninja's log and to stdout/stderr.
 24 |         """
 25 |         self._logger = bn.Logger(0, "Plugin: Mole")
 26 |         self.change_properties(level, runs_headless)
 27 |         self.find_attached_debugger()
 28 |         return
 29 | 
 30 |     def change_properties(
 31 |         self,
 32 |         level: Literal["debug", "info", "warning", "error"] = "debug",
 33 |         runs_headless: bool = False,
 34 |     ) -> None:
 35 |         """
 36 |         This method changes the properties of a `Logger`.
 37 |         """
 38 |         self._level = self._levels.index(level)
 39 |         self._runs_headless = runs_headless
 40 |         return
 41 | 
 42 |     def find_attached_debugger(self) -> None:
 43 |         """
 44 |         This method checks whether or not a debugger is attached.
 45 |         """
 46 |         self._runs_debugger = any(
 47 |             module.startswith("debugpy") for module in sys.modules
 48 |         )
 49 |         return
 50 | 
 51 |     def get_level(self) -> str:
 52 |         """
 53 |         This method returns the configured log level.
 54 |         """
 55 |         return self._levels[self._level]
 56 | 
 57 |     def _tag_msg(
 58 |         self,
 59 |         tag: str = None,
 60 |         msg: str = None,
 61 |     ) -> str:
 62 |         """
 63 |         This method concatenates tag `tag` to the message `msg`.
 64 |         """
 65 |         m = ""
 66 |         if tag:
 67 |             m = f"[{tag:s}]"
 68 |         if msg:
 69 |             m = f"{m:s} {msg:s}"
 70 |         return m.strip()
 71 | 
 72 |     def _print(
 73 |         self,
 74 |         tag: str,
 75 |         msg: str,
 76 |         color: str,
 77 |         on_color: str = None,
 78 |         print_raw: bool = False,
 79 |         attrs: List[str] = [],
 80 |         file=sys.stdout,
 81 |     ) -> None:
 82 |         """
 83 |         This method prints the message `msg` to the console.
 84 |         """
 85 |         if not print_raw:
 86 |             now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
 87 |             head = f"[{now:s}] [{tag:s}] "
 88 |         else:
 89 |             head = ""
 90 |         print(
 91 |             colored(f"{head:s}{msg:s}", color=color, on_color=on_color, attrs=attrs),
 92 |             file=file,
 93 |             flush=True,
 94 |         )
 95 |         return
 96 | 
 97 |     def debug(
 98 |         self,
 99 |         tag: str = None,
100 |         msg: str = None,
101 |         color: str = "magenta",
102 |         on_color: str = None,
103 |         print_raw: bool = False,
104 |         attrs: List[str] = [],
105 |     ) -> None:
106 |         """
107 |         This method prints a tagged message of log level debug to the console or Binary Ninja's log.
108 |         """
109 |         text = self._tag_msg(tag, msg)
110 |         if self._level > 0:
111 |             return
112 |         if not self._runs_headless and not self._runs_debugger:
113 |             self._logger.log_debug(text)
114 |         else:
115 |             self._print(
116 |                 "DEBG",
117 |                 text,
118 |                 color=color,
119 |                 on_color=on_color,
120 |                 print_raw=print_raw,
121 |                 attrs=attrs,
122 |                 file=sys.stdout,
123 |             )
124 |         return
125 | 
126 |     def info(
127 |         self,
128 |         tag: str = None,
129 |         msg: str = None,
130 |         color: str = "blue",
131 |         on_color: str = None,
132 |         print_raw: bool = False,
133 |         attrs: List[str] = [],
134 |     ) -> None:
135 |         """
136 |         This method prints a tagged message of log level info to the console or Binary Ninja's log.
137 |         """
138 |         text = self._tag_msg(tag, msg)
139 |         if self._level > 1:
140 |             return
141 |         if not self._runs_headless and not self._runs_debugger:
142 |             self._logger.log_info(text)
143 |         else:
144 |             self._print(
145 |                 "INFO",
146 |                 text,
147 |                 color=color,
148 |                 on_color=on_color,
149 |                 print_raw=print_raw,
150 |                 attrs=attrs,
151 |                 file=sys.stdout,
152 |             )
153 |         return
154 | 
155 |     def warn(
156 |         self,
157 |         tag: str = None,
158 |         msg: str = None,
159 |         color: str = "yellow",
160 |         on_color: str = None,
161 |         print_raw: bool = False,
162 |         attrs: List[str] = [],
163 |     ) -> None:
164 |         """
165 |         This method prints a tagged message of log level warn to the console or Binary Ninja's log.
166 |         """
167 |         text = self._tag_msg(tag, msg)
168 |         if self._level > 2:
169 |             return
170 |         if not self._runs_headless and not self._runs_debugger:
171 |             self._logger.log_warn(text)
172 |         else:
173 |             self._print(
174 |                 "WARN",
175 |                 text,
176 |                 color=color,
177 |                 on_color=on_color,
178 |                 print_raw=print_raw,
179 |                 attrs=attrs,
180 |                 file=sys.stderr,
181 |             )
182 |         return
183 | 
184 |     def error(
185 |         self,
186 |         tag: str = None,
187 |         msg: str = None,
188 |         color: str = "red",
189 |         on_color: str = None,
190 |         print_raw: bool = False,
191 |         attrs: List[str] = [],
192 |     ) -> None:
193 |         """
194 |         This method prints a tagged message of log level error to the console or Binary Ninja's log.
195 |         """
196 |         text = self._tag_msg(tag, msg)
197 |         if self._level > 3:
198 |             return
199 |         if not self._runs_headless and not self._runs_debugger:
200 |             self._logger.log_error(text)
201 |         else:
202 |             self._print(
203 |                 "ERRO",
204 |                 text,
205 |                 color=color,
206 |                 on_color=on_color,
207 |                 print_raw=print_raw,
208 |                 attrs=attrs,
209 |                 file=sys.stderr,
210 |             )
211 |         return
212 | 
213 | 
214 | log = Logger()
215 | 


--------------------------------------------------------------------------------
/tests/data/src/simple_http_server-03.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | #include <unistd.h>
  5 | #include <arpa/inet.h>
  6 | 
  7 | #define PORT 8080
  8 | #define BUFFER_SIZE 256
  9 | 
 10 | /*
 11 | Testcase Description:
 12 | - server example
 13 | - disallow function inlining
 14 | */
 15 | 
 16 | __attribute__ ((noinline)) 
 17 | void handle_client(int client_socket);
 18 | 
 19 | __attribute__ ((noinline)) 
 20 | void execute_cgi_command(const char *buffer);
 21 | 
 22 | __attribute__ ((noinline)) 
 23 | void send_response(int client_socket, const char *response);
 24 | 
 25 | __attribute__ ((noinline)) 
 26 | int create_server_socket(struct sockaddr_in *address);
 27 | 
 28 | __attribute__ ((noinline)) 
 29 | void handle_get_request(int client_socket);
 30 | 
 31 | __attribute__ ((noinline)) 
 32 | void handle_post_request(int client_socket);
 33 | 
 34 | __attribute__ ((noinline)) 
 35 | char* receive_data(int client_socket, int *size);
 36 | 
 37 | __attribute__ ((noinline)) 
 38 | void log_request(const char *method, const char *buffer);
 39 | 
 40 | __attribute__ ((noinline)) 
 41 | void process_request(int client_socket, const char *method, const char *buffer);
 42 | 
 43 | __attribute__ ((noinline)) 
 44 | void handle_put_request(int client_socket);
 45 | 
 46 | __attribute__ ((noinline)) 
 47 | void handle_delete_request(int client_socket);
 48 | 
 49 | __attribute__ ((noinline)) 
 50 | void wrap_and_execute(const char *buffer);
 51 | 
 52 | int main() {
 53 |     int server_fd, client_socket;
 54 |     struct sockaddr_in address;
 55 |     int addrlen = sizeof(address);
 56 | 
 57 |     server_fd = create_server_socket(&address);
 58 | 
 59 |     while (1) {
 60 |         if ((client_socket = accept(server_fd, (struct sockaddr *)&address, (socklen_t*)&addrlen)) < 0) {
 61 |             perror("accept");
 62 |             close(server_fd);
 63 |             exit(EXIT_FAILURE);
 64 |         }
 65 |         handle_client(client_socket);
 66 |     }
 67 | 
 68 |     close(server_fd);
 69 |     return 0;
 70 | }
 71 | 
 72 | void handle_client(int client_socket) {
 73 |     int size;
 74 |     char *method = receive_data(client_socket, &size);
 75 | 
 76 |     if (method == NULL) {
 77 |         close(client_socket);
 78 |         return;
 79 |     }
 80 | 
 81 |     if (strncmp(method, "GET ", 4) == 0) {
 82 |         handle_get_request(client_socket);
 83 |     } else if (strncmp(method, "POST", 4) == 0) {
 84 |         handle_post_request(client_socket);
 85 |     } else if (strncmp(method, "PUT ", 4) == 0) {
 86 |         handle_put_request(client_socket);
 87 |     } else if (strncmp(method, "DELETE", 6) == 0) {
 88 |         handle_delete_request(client_socket);
 89 |     } else {
 90 |         send_response(client_socket, "HTTP/1.1 405 Method Not Allowed\r\nContent-Type: text/plain\r\n\r\nMethod Not Allowed.\n");
 91 |         close(client_socket);
 92 |     }
 93 | 
 94 |     free(method);
 95 | }
 96 | 
 97 | void handle_get_request(int client_socket) {
 98 |     int size;
 99 |     char *buffer = receive_data(client_socket, &size);
100 | 
101 |     if (buffer == NULL) {
102 |         close(client_socket);
103 |         return;
104 |     }
105 | 
106 |     log_request("GET", buffer);
107 |     process_request(client_socket, "GET", buffer);
108 |     free(buffer);
109 | }
110 | 
111 | void handle_post_request(int client_socket) {
112 |     int size;
113 |     char *buffer = receive_data(client_socket, &size);
114 | 
115 |     if (buffer == NULL) {
116 |         close(client_socket);
117 |         return;
118 |     }
119 | 
120 |     log_request("POST", buffer);
121 |     process_request(client_socket, "POST", buffer);
122 |     free(buffer);
123 | }
124 | 
125 | void handle_put_request(int client_socket) {
126 |     int size;
127 |     char *buffer = receive_data(client_socket, &size);
128 | 
129 |     if (buffer == NULL) {
130 |         close(client_socket);
131 |         return;
132 |     }
133 | 
134 |     log_request("PUT", buffer);
135 |     process_request(client_socket, "PUT", buffer);
136 |     free(buffer);
137 | }
138 | 
139 | void handle_delete_request(int client_socket) {
140 |     int size;
141 |     char *buffer = receive_data(client_socket, &size);
142 | 
143 |     if (buffer == NULL) {
144 |         close(client_socket);
145 |         return;
146 |     }
147 | 
148 |     log_request("DELETE", buffer);
149 |     process_request(client_socket, "DELETE", buffer);
150 |     free(buffer);
151 | }
152 | 
153 | void log_request(const char *method, const char *buffer) {
154 |     printf("Received %s request: %s\n", method, buffer);
155 | }
156 | 
157 | void process_request(int client_socket, const char *method, const char *buffer) {
158 |     if (strcmp(method, "GET") == 0 || strcmp(method, "POST") == 0) {
159 |         wrap_and_execute(buffer);
160 |         send_response(client_socket, "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\n\r\nRequest received.\n");
161 |     } else {
162 |         send_response(client_socket, "HTTP/1.1 501 Not Implemented\r\nContent-Type: text/plain\r\n\r\nRequest method not implemented.\n");
163 |     }
164 |     close(client_socket);
165 | }
166 | 
167 | void execute_cgi_command(const char *buffer) {
168 |     char *cgi_start = strstr(buffer, "/cgi-bin/");
169 |     if (cgi_start) {
170 |         cgi_start += strlen("/cgi-bin/");
171 |         char *cgi_end = strchr(cgi_start, ' ');
172 |         if (cgi_end) {
173 |             *cgi_end = '\0';
174 |             system(cgi_start);
175 |         }
176 |     }
177 | }
178 | 
179 | void send_response(int client_socket, const char *response) {
180 |     write(client_socket, response, strlen(response));
181 | }
182 | 
183 | int create_server_socket(struct sockaddr_in *address) {
184 |     int server_fd;
185 | 
186 |     if ((server_fd = socket(AF_INET, SOCK_STREAM, 0)) == 0) {
187 |         perror("socket failed");
188 |         exit(EXIT_FAILURE);
189 |     }
190 | 
191 |     address->sin_family = AF_INET;
192 |     address->sin_addr.s_addr = INADDR_ANY;
193 |     address->sin_port = htons(PORT);
194 | 
195 |     if (bind(server_fd, (struct sockaddr *)address, sizeof(*address)) < 0) {
196 |         perror("bind failed");
197 |         close(server_fd);
198 |         exit(EXIT_FAILURE);
199 |     }
200 | 
201 |     if (listen(server_fd, 3) < 0) {
202 |         perror("listen");
203 |         close(server_fd);
204 |         exit(EXIT_FAILURE);
205 |     }
206 | 
207 |     return server_fd;
208 | }
209 | 
210 | char* receive_data(int client_socket, int *size) {
211 |     char *buffer = (char *)malloc(BUFFER_SIZE);
212 |     if (buffer == NULL) {
213 |         perror("malloc");
214 |         return NULL;
215 |     }
216 | 
217 |     int bytes_read = recv(client_socket, buffer, BUFFER_SIZE - 1, 0);
218 |     if (bytes_read < 0) {
219 |         perror("recv");
220 |         free(buffer);
221 |         return NULL;
222 |     }
223 | 
224 |     buffer[bytes_read] = '\0';
225 |     *size = bytes_read;
226 |     return buffer;
227 | }
228 | 
229 | void wrap_and_execute(const char *buffer) {
230 |     execute_cgi_command(buffer);
231 | }
232 | 


--------------------------------------------------------------------------------
/tests/test_data.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | from mole.core.data import Category, Configuration, Library
  3 | from mole.core.data import SinkFunction, SourceFunction
  4 | from mole.core.data import (
  5 |     ComboboxSetting,
  6 |     DoubleSpinboxSetting,
  7 |     SpinboxSetting,
  8 |     TextSetting,
  9 | )
 10 | from mole.services.config import ConfigService
 11 | from typing import Generator, IO
 12 | import pytest
 13 | import tempfile
 14 | 
 15 | 
 16 | @pytest.fixture
 17 | def temp_file() -> Generator[IO[str], None, None]:
 18 |     """Provides a temporary file for testing."""
 19 |     tf = tempfile.NamedTemporaryFile(mode="w+", delete=False)
 20 |     yield tf
 21 |     tf.close()
 22 |     return
 23 | 
 24 | 
 25 | @pytest.fixture
 26 | def config_service() -> ConfigService:
 27 |     """Provides a ConfigService instance."""
 28 |     return ConfigService()
 29 | 
 30 | 
 31 | @pytest.fixture
 32 | def test_config() -> Configuration:
 33 |     """Provides a test Configuration object."""
 34 |     return Configuration(
 35 |         sources={
 36 |             "manual": Library(name="manual", categories={}),
 37 |             "libc": Library(
 38 |                 name="libc",
 39 |                 categories={
 40 |                     "Environment Accesses": Category(
 41 |                         name="Environment Accesses",
 42 |                         functions={
 43 |                             "getenv": SourceFunction(
 44 |                                 name="getenv",
 45 |                                 symbols=["getenv", "__builtin_getenv"],
 46 |                                 synopsis="char* getenv(const char* name)",
 47 |                                 enabled=True,
 48 |                                 par_cnt="i == 1",
 49 |                                 par_slice="False",
 50 |                             )
 51 |                         },
 52 |                     )
 53 |                 },
 54 |             ),
 55 |         },
 56 |         sinks={
 57 |             "manual": Library(name="manual", categories={}),
 58 |             "libc": Library(
 59 |                 name="libc",
 60 |                 categories={
 61 |                     "Memory Copy": Category(
 62 |                         name="Memory Copy",
 63 |                         functions={
 64 |                             "memcpy": SinkFunction(
 65 |                                 name="memcpy",
 66 |                                 symbols=["memcpy", "__builtin_memcpy"],
 67 |                                 synopsis="void* memcpy(void* dest, const void* src, size_t n)",
 68 |                                 enabled=True,
 69 |                                 par_cnt="i == 3",
 70 |                                 par_slice="True",
 71 |                             )
 72 |                         },
 73 |                     )
 74 |                 },
 75 |             ),
 76 |         },
 77 |         settings={
 78 |             "max_workers": SpinboxSetting(
 79 |                 name="max_workers",
 80 |                 value=-1,
 81 |                 min_value=-1,
 82 |                 max_value=256,
 83 |                 help="maximum number of worker thread that backward slicing uses",
 84 |             ),
 85 |             "max_call_level": SpinboxSetting(
 86 |                 name="max_call_level",
 87 |                 value=5,
 88 |                 min_value=-1,
 89 |                 max_value=99,
 90 |                 help="backward slicing visits called functions up to the given level",
 91 |             ),
 92 |             "max_slice_depth": SpinboxSetting(
 93 |                 name="max_slice_depth",
 94 |                 value=-1,
 95 |                 min_value=-1,
 96 |                 max_value=9999,
 97 |                 help="maximum slice depth to stop the search",
 98 |             ),
 99 |             "max_memory_slice_depth": SpinboxSetting(
100 |                 name="max_memory_slice_depth",
101 |                 value=-1,
102 |                 min_value=-1,
103 |                 max_value=9999,
104 |                 help="maximum memory slice depth to stop the search",
105 |             ),
106 |             "src_highlight_color": ComboboxSetting(
107 |                 name="src_highlight_color",
108 |                 value="Orange",
109 |                 items=[
110 |                     "Blue",
111 |                     "Green",
112 |                     "Cyan",
113 |                     "Red",
114 |                     "Magenta",
115 |                     "Yellow",
116 |                     "Orange",
117 |                 ],
118 |                 help="color used to highlight instructions originating from slicing a source function",
119 |             ),
120 |             "snk_highlight_color": ComboboxSetting(
121 |                 name="snk_highlight_color",
122 |                 value="Red",
123 |                 items=[
124 |                     "Blue",
125 |                     "Green",
126 |                     "Cyan",
127 |                     "Red",
128 |                     "Magenta",
129 |                     "Yellow",
130 |                     "Orange",
131 |                 ],
132 |                 help="color used to highlight instructions originating from slicing a sink function",
133 |             ),
134 |             "path_grouping": ComboboxSetting(
135 |                 name="path_grouping",
136 |                 value="Call Graph",
137 |                 items=["Call Graph", "Source / Sink", "None"],
138 |                 help="strategy used to group paths",
139 |             ),
140 |             "openai_base_url": TextSetting(
141 |                 name="openai_base_url",
142 |                 value="https://api.openai.com/v1",
143 |                 help="OpenAI API base URL",
144 |             ),
145 |             "openai_api_key": TextSetting(
146 |                 name="openai_api_key",
147 |                 value="",
148 |                 help="OpenAI API key",
149 |             ),
150 |             "openai_model": TextSetting(
151 |                 name="openai_model",
152 |                 value="o4-mini",
153 |                 help="OpenAI model",
154 |             ),
155 |             "max_turns": SpinboxSetting(
156 |                 name="max_turns",
157 |                 value=10,
158 |                 min_value=2,
159 |                 max_value=256,
160 |                 help="maximum number of turns in a conversation with the AI",
161 |             ),
162 |             "max_completion_tokens": SpinboxSetting(
163 |                 name="max_completion_tokens",
164 |                 value=4096,
165 |                 min_value=-1,
166 |                 max_value=100000,
167 |                 help="maximum number of tokens in a completion",
168 |             ),
169 |             "temperature": DoubleSpinboxSetting(
170 |                 name="temperature",
171 |                 value=1.0,
172 |                 min_value=0.0,
173 |                 max_value=2.0,
174 |                 help="the sampling temperature to use",
175 |             ),
176 |         },
177 |     )
178 | 
179 | 
180 | class TestData:
181 |     """
182 |     This class implements unit tests for the data classes.
183 |     """
184 | 
185 |     def test_serialize_configuration(
186 |         self,
187 |         temp_file: IO[str],
188 |         config_service: ConfigService,
189 |         test_config: Configuration,
190 |     ) -> None:
191 |         ori_config: Configuration = test_config
192 |         # Export configuration to temporary file (serialize)
193 |         config_service.export_config(ori_config, temp_file.name)
194 |         # Load configuration from temporary file (deserialize)
195 |         temp_file.seek(0)
196 |         des_config = config_service.import_config(temp_file.name)
197 |         # Assert
198 |         assert ori_config == des_config, "Serialization error of 'Configuration'"
199 |         return
200 | 


--------------------------------------------------------------------------------