├── README.md
├── featureExtractor
├── bcat_client
│ ├── pom.xml
│ └── src
│ │ └── main
│ │ └── java
│ │ └── thusca
│ │ └── bcat
│ │ └── client
│ │ ├── ClientApplication.java
│ │ ├── consumer
│ │ ├── BinFileFeatureExtractTest.java
│ │ ├── Task2ExtractCoreFedora.java
│ │ ├── TaskExtractFeatureLibs13.java
│ │ └── TaskProcessTargets.java
│ │ ├── entity
│ │ ├── BaseFile.java
│ │ ├── BinFileFeature.java
│ │ ├── BinaryFile.java
│ │ ├── FeatureExtractStatus.java
│ │ └── FunctionFeature.java
│ │ ├── model
│ │ └── CIdModel.java
│ │ ├── service
│ │ ├── ExtractService.java
│ │ ├── GetBinFeatureService.java
│ │ ├── GetBinFileService.java
│ │ └── SaveToJsonService.java
│ │ └── utils
│ │ ├── BinaryAnalyzer.java
│ │ ├── FileUtil.java
│ │ ├── LibmagicJnaWrapper.java
│ │ ├── LibmagicJnaWrapperBean.java
│ │ ├── StatusMsg.java
│ │ └── libghidra
│ │ ├── LibGhidra.java
│ │ ├── LibHeadlessAnalyzer.java
│ │ ├── LibHeadlessErrorLogger.java
│ │ ├── LibHeadlessOptions.java
│ │ ├── LibHeadlessScript.java
│ │ ├── LibHeadlessTimedTaskMonitor.java
│ │ └── LibProgramHandler.java
└── pom.xml
├── main
├── __init__.py
└── torch
│ ├── __init__.py
│ ├── analyze_results.py
│ ├── b2sfinder_afcg.py
│ ├── base_afcg.py
│ ├── build_milvus_database.py
│ ├── core_fedora_embeddings.py
│ ├── dataset.py
│ ├── eval.py
│ ├── eval_re_large.py
│ ├── func2vec.py
│ ├── function_vector_channel.py
│ ├── generate_vec_index.py
│ ├── get_data_gemini_format.py
│ ├── get_threshold.py
│ ├── get_validation_pairs.py
│ ├── milvus_mod.py
│ ├── run.sh
│ ├── torch_main.py
│ ├── torch_model.py
│ ├── utils.py
│ └── utils_loss.py
└── requirements.txt
/README.md:
--------------------------------------------------------------------------------
1 | # binary_tpl_detection
2 |
3 | Dataset url: https://figshare.com/s/4a007e78f29243531b8c
4 |
5 | ## Feature Extractor
6 | - The extractor extracts features from all binary files under a given directory and save features to a json file.
7 | - Input: directory
8 | - Output: two files, stored in a given target directory.
9 | - Information such as running time is stored in the `status` file.
10 | - Extracted features are stored in the features file, such as `9760608.json`. The format of this json is a list of BinaryFile entity.
11 | - It is recommended to put your task code under `consumer` directory (in `featureExtractor/bcat_client/src/main/java/thusca/bcat/client/consumer`). See the example in `consumer/BinFileFeatureExtractTest.java`
12 |
13 | ### Pre-requisites
14 | Basic knowledge about Java Development, Springboot and Annotation Development.
15 | For example, if you use IDE like VScode or Idea, basic java development environment need to be installed such as `Java Extension Pack`, `MAVEN for JAVA`. It should be noted that we use Lombok Annotation and Springboot in code that may depend on extensions `Lombok Annotations Support` and `Spring Boot Tools` for IDE to debug or run. Besides, LibmagicJnaWrapper depends on libmagic to get file type, please install this library and modify the paths in LibmagicJnaWrapper.java. It can be easily installed using apt/brew command on Linux/MacOS.
16 |
17 | ### Build Artifact
18 | Env:
19 | - Java: Java 11.
20 | - IntelliJ Idea. (We have found that the extractor artifact works well only under IntelliJ Idea to build the artifact. Tested successful under Windows IntelliJ Idea 2021.2)
21 |
22 | Steps:
23 | 1. Ghidra: 9.1.2. The file `ghidra.jar` is stored under `/user/lib/ghidra.jar` you should put it under `/featureExtractor/bcat_client/lib` first.
24 | 2. Open Idea, open project "binary_lib_detection-main\featureExtractor". Wait until indexing finish, if error occurs, try reopen/clean the project.
25 | 3. File -> Project Structure -> Project SDK, select Java SDK 11.
26 | 4. File -> Project Structure -> Artifacts -> "+" -> jar -> from modules with dependencies -> Module ("bcat_client") -> Main Class ("ClientApplication") -> JAR files from libraries (select `copy to the output directory and link via manifest`)
27 | 5. The jars will be generated at path: featureExtractor\out\artifacts\bcat_client_jar, with `bcat_client.jar` inside.
28 |
29 | ### Task
30 | Methods for all tasks are stored under the directory `/consumer`.
31 | Building database: Code:`Task2ExtractCoreFedora.java`, Data: `FedoraLib_Dataset`. Set tha save path and get all features to build TPL feature database. We use the directory `../data/CoreFedoraFeatureJson0505` to represent the save path.
32 |
33 | ### Run
34 | Zip the bcat_client_jar folder and upload to a Linux server, unzip, and run:
35 | ```shell
36 | java -jar bcat_client.jar
37 | ```
38 |
39 | Note: Java 11 required.
40 |
41 | ## Func similarity Model
42 | This model is used to determine if two functions are similar based on [Gemini](https://github.com/xiaojunxu/dnn-binary-code-similarity) Network.
43 |
44 | Prepration and Data
45 | Data is stored in `../data/vector_deduplicate_gemini_format_less_compilation_cases`.
46 | or Cross-5C_Dataset.7z on figshare.
47 |
48 | By default, we use the path `../data` under `main/torch` to store the data. Please copy them under it.
49 |
50 | ### Environment Step
51 | The network is written using Torch 1.8 in Python 3.8. Torch installation is based on cuda 11.
52 |
53 | ```
54 | conda create -n tpldetection python=3.8 ipykernel
55 | bash
56 | conda activate tpldetection
57 | pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 torchaudio==0.8.1 -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
58 | pip install -r requirements.txt
59 | ```
60 |
61 | Milvus v1.1.1(vector search engine) is necessary for function retrival. It requires docker 19.03 or higher
62 | ref: https://milvus.io/docs/v1.1.1/milvus_docker-gpu.md
63 | ```shell
64 | sudo docker pull milvusdb/milvus:1.1.1-gpu-d061621-330cc6
65 | mkdir -p /home/$USER/milvus/conf
66 | cd /home/$USER/milvus/conf
67 | wget https://raw.githubusercontent.com/milvus-io/milvus/v1.1.1/core/conf/demo/server_config.yaml
68 |
69 | sudo docker run -d --name milvus_gpu_1.1.1 --gpus all \
70 | -p 19530:19530 \
71 | -p 19121:19121 \
72 | -v /home/$USER/milvus/db:/var/lib/milvus/db \
73 | -v /home/$USER/milvus/conf:/var/lib/milvus/conf \
74 | -v /home/$USER/milvus/logs:/var/lib/milvus/logs \
75 | -v /home/$USER/milvus/wal:/var/lib/milvus/wal \
76 | milvusdb/milvus:1.1.1-gpu-d061621-330cc6
77 | ```
78 |
79 | ## Run
80 | Run the following command to train the model:
81 | ```shell
82 | # train/validation dataset: /data/func_comparison/vector_deduplicate_our_format_less_compilation_cases/train_test
83 | # test dataset: /data/func_comparison/vector_deduplicate_our_format_less_compilation_cases/valid
84 | cd main/torch
85 | bash run.sh
86 | ```
87 | A trained model is saved under `../data/7fea_contra_torch_b128/saved_model/`
88 |
89 | ## Library detection
90 |
91 | ### Database
92 | #### Embedding
93 | raw feature database: `../data/CoreFedoraFeatureJson0505`
94 |
95 | Embeddings:
96 | set the path `../data/CoreFedoraFeatureJson0505` as `args.fedora_js`.
97 | You can use mutilprocess to speed up and the code is writen in `core_fedora_embeddings.py` as follows:
98 | ```python
99 | with Pool(10) as p:
100 | p.starmap(core_fedora_embedding, [(i, True) for i in range(10)])
101 | ```
102 | all embeddings are saved under the `args.save_path`.
103 | We use the path `../data/7fea_contra_torch_b128/core_funcs` to represent it.
104 |
105 | #### Indexing and Building Milvus dataset
106 | run `build_milvus_database.py` to build function vector database using Mulvis.
107 |
108 | the function `get_bin_fcg` is used to generate an indexing file containing binary to functions to accelarate.
109 |
110 | `get_bin2func_num` generates an indexing from binary to the number of funtions in it.
111 |
112 |
113 | #### Detection
114 | Data: detection_targets. Firstly, extract features from APKs. See the method `localExtractOSSPoliceApks` in `TaskProcessTargets.java` under the directory `consumer`. We use the directory`../data/detection_targets/feature_json` to save all extracted features.
115 |
116 | see the function `detect_v2` in function_vector_channel.
117 | Other methods + FCG Filter can be seen in files `xxx_afcg.py`.
118 | Baselines are under the directory `/related_work`.
119 |
120 | We combine basic feature channel (B2SFinder(basic features) + FCG Filter) and function vector channel together to report the final results.
121 |
122 | All files named `analyze_results.py` are used to calculate precision and recall.
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
--------------------------------------------------------------------------------
/featureExtractor/bcat_client/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | bcat
5 | thusca
6 | 0.0.1-SNAPSHOT
7 |
8 | 4.0.0
9 |
10 | bcat_client
11 |
12 |
13 |
14 | com.google.guava
15 | guava
16 | 20.0
17 |
18 |
19 |
20 | ghidra
21 | ghidra
22 | 1.0
23 | system
24 | ${project.basedir}/lib/ghidra.jar
25 |
26 |
27 | com.sun.jna
28 | jna
29 | 3.0.9
30 | compile
31 |
32 |
33 | com.github.junrar
34 | junrar
35 | 3.0.0
36 |
37 |
38 |
39 | org.springframework.boot
40 | spring-boot-configuration-processor
41 | true
42 |
43 |
44 | org.springframework.boot
45 | spring-boot-test
46 |
47 |
48 | org.springframework.boot
49 | spring-boot-starter-test
50 | test
51 |
52 |
53 | org.junit.jupiter
54 | junit-jupiter-api
55 | 5.6.2
56 |
57 |
58 | net.sf.sevenzipjbinding
59 | sevenzipjbinding
60 | 9.20-2.00beta
61 |
62 |
63 | org.apache.logging.log4j
64 | log4j-api
65 | 2.13.3
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 | src/main/java
74 |
75 | **/*.properties
76 | **/*.xml
77 |
78 | false
79 |
80 |
81 | src/main/resources
82 |
83 |
84 |
85 |
86 |
87 |
88 | org.springframework.boot
89 | spring-boot-maven-plugin
90 | 2.3.4.RELEASE
91 |
92 | thusca.bcat.client.ClientApplication
93 |
94 |
95 |
96 |
97 | repackage
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
--------------------------------------------------------------------------------
/featureExtractor/bcat_client/src/main/java/thusca/bcat/client/ClientApplication.java:
--------------------------------------------------------------------------------
1 | package thusca.bcat.client;
2 |
3 | import org.springframework.boot.SpringApplication;
4 | import org.springframework.boot.autoconfigure.SpringBootApplication;
5 |
6 | @SpringBootApplication
7 | public class ClientApplication {
8 | public static void main(String[] args){
9 | SpringApplication.run(ClientApplication.class, args);
10 | }
11 | }
--------------------------------------------------------------------------------
/featureExtractor/bcat_client/src/main/java/thusca/bcat/client/consumer/BinFileFeatureExtractTest.java:
--------------------------------------------------------------------------------
1 | package thusca.bcat.client.consumer;
2 |
3 | import org.slf4j.Logger;
4 | import org.slf4j.LoggerFactory;
5 | import org.springframework.beans.factory.ObjectFactory;
6 | import org.springframework.beans.factory.annotation.Autowired;
7 | import org.springframework.beans.factory.annotation.Qualifier;
8 | import org.springframework.boot.ApplicationArguments;
9 | import org.springframework.boot.ApplicationRunner;
10 | import org.springframework.stereotype.Component;
11 | import thusca.bcat.client.service.ExtractService;
12 |
13 | //@Component
14 | public class BinFileFeatureExtractTest implements ApplicationRunner {
15 | @Autowired
16 | @Qualifier("ExtractService")
17 | ObjectFactory extractServiceObjectFactory;
18 |
19 | @Override
20 | public void run(ApplicationArguments args) throws Exception {
21 | Logger logger = LoggerFactory.getLogger(this.getClass());
22 | String ghidraTmp = "/mnt/c/Users/user/Desktop/tmp/ghidraTmp";
23 | String unzippedPackagePath = "/mnt/c/Users/user/Desktop/tmp/binaryTarget/test";
24 | String jsonFileRootPath = "/mnt/c/Users/user/Desktop/tmp/saveJson";
25 | int packageId = 12345678;
26 | long startTime = System.currentTimeMillis();
27 | try {
28 | ExtractService extractService = extractServiceObjectFactory.getObject();
29 | extractService.init(unzippedPackagePath, jsonFileRootPath, ghidraTmp, packageId);
30 | extractService.executable();
31 | logger.info(Thread.currentThread().getName() + " [Done]: "+ packageId);
32 | } catch (Exception e) {
33 | e.printStackTrace();
34 | }
35 | long endTime = System.currentTimeMillis();
36 | logger.info("running time: " + (endTime - startTime)/1000 + "s");
37 | }
38 | }
--------------------------------------------------------------------------------
/featureExtractor/bcat_client/src/main/java/thusca/bcat/client/consumer/Task2ExtractCoreFedora.java:
--------------------------------------------------------------------------------
1 | package thusca.bcat.client.consumer;
2 |
3 | import org.slf4j.Logger;
4 | import org.slf4j.LoggerFactory;
5 | import org.springframework.beans.factory.annotation.Autowired;
6 | import org.springframework.beans.factory.annotation.Value;
7 | import org.springframework.boot.ApplicationArguments;
8 | import org.springframework.boot.ApplicationRunner;
9 | import org.springframework.stereotype.Component;
10 | import org.springframework.beans.factory.ObjectFactory;
11 | import thusca.bcat.client.service.ExtractService;
12 |
13 | import java.io.File;
14 | import java.util.concurrent.*;
15 |
16 | // @Component
17 | public class Task2ExtractCoreFedora implements ApplicationRunner {
18 |
19 | private final Logger logger = LoggerFactory.getLogger(this.getClass());
20 |
21 | // tmp path
22 | @Value("${ghidra.tmp.path}")
23 | private String ghidraTmp;
24 |
25 | // save path
26 | @Value("${json.file.path}")
27 | private String jsonFilePath;
28 |
29 | // set pool size
30 | @Value("${core.pool.size}")
31 | private int CORE_POOL_SIZE;
32 | @Value("${core.pool.size}")
33 | private int MAX_POOL_SIZE;
34 | private static final int QUEUE_CAPACITY = 150;
35 | private static final Long KEEP_ALIVE_TIME = 1L;
36 |
37 | @Autowired
38 | ObjectFactory extractServiceObjectFactory;
39 |
40 | private String rootPath = "../data/FedoraLib_Dataset";
41 |
42 | @Override
43 | public void run(ApplicationArguments args) throws Exception {
44 | logger.info("Client start......");
45 | long startTime = System.currentTimeMillis();
46 |
47 | extractPackage();
48 |
49 | long endTime = System.currentTimeMillis();
50 | logger.info("run time:" + (endTime - startTime) + "ms");
51 | System.exit(0);
52 | }
53 |
54 | public void extractPackage() {
55 | ThreadPoolExecutor cachedThreadPool = new ThreadPoolExecutor(CORE_POOL_SIZE, MAX_POOL_SIZE, KEEP_ALIVE_TIME,
56 | TimeUnit.SECONDS, new ArrayBlockingQueue<>(QUEUE_CAPACITY), new ThreadPoolExecutor.CallerRunsPolicy());
57 |
58 | File rootDir = new File(rootPath);
59 |
60 | for (File firstLevel : rootDir.listFiles()) {
61 | if (!firstLevel.isDirectory()) {
62 | continue;
63 | }
64 | String[] firstLevelStrings = firstLevel.toString().split("/", -1);
65 | String firstLevelId = firstLevelStrings[firstLevelStrings.length - 1];
66 | for (File secondLevel : firstLevel.listFiles()) {
67 | if (!secondLevel.isDirectory()) {
68 | continue;
69 | }
70 | String[] secondLevelStrings = secondLevel.toString().split("/", -1);
71 | String secondLevelId = secondLevelStrings[secondLevelStrings.length - 1];
72 | for (File packageDir : secondLevel.listFiles()) {
73 | if (!packageDir.isDirectory()) {
74 | continue;
75 | }
76 | String[] packageStrings = packageDir.toString().split("/", -1);
77 | String packageId = packageStrings[packageStrings.length - 1];
78 | String jsonFileName = packageId+ ".json";
79 | String savePath = jsonFilePath + "/" + firstLevelId + "/" + secondLevelId + "/" + packageId;
80 | File targetJsonFile = new File(savePath, jsonFileName);
81 | if (targetJsonFile.exists()) {
82 | logger.info("package has been processed: " + packageId);
83 | continue;
84 | }
85 | logger.info("package to be processed: " + packageId);
86 | process(packageDir.toString(), savePath, ghidraTmp, Integer.parseInt(packageId));
87 | // CountDownLatch threadSignal = new CountDownLatch(1);
88 | // cachedThreadPool.submit(new Runnable() {
89 | // @Override
90 | // public void run() {
91 | // try {
92 | // if (!packageDir.exists()) {
93 | // System.out.println("no exist");
94 | // }
95 |
96 | // process(packageDir.toString(), savePath, ghidraTmp, Integer.parseInt(packageId));
97 |
98 | // } catch (Exception e) {
99 | // logger.info("error: " + e + packageDir.toString());
100 | // } finally {
101 | // threadSignal.countDown();
102 | // }
103 | // }
104 | // });
105 | }
106 | }
107 | }
108 |
109 | // cachedThreadPool.shutdown();
110 | // try {
111 | // cachedThreadPool.awaitTermination(Long.MAX_VALUE, TimeUnit.MINUTES);
112 | // } catch (InterruptedException e) {
113 | // e.printStackTrace();
114 | // }
115 | }
116 |
117 | public void process(String packageDir, String savePath, String ghidraTmp, int packageId) {
118 | long startTime = System.currentTimeMillis();
119 | try {
120 | ExtractService extractService = extractServiceObjectFactory.getObject();
121 | extractService.init(packageDir.toString(), savePath, ghidraTmp, packageId);
122 | extractService.executable();
123 | logger.info(Thread.currentThread().getName() + " extracted:" + packageDir.toString());
124 | } catch (Exception e) {
125 | logger.info("exception in processing:" + e);
126 | }
127 | logger.info("run time:" + (System.currentTimeMillis() - startTime) / 1000 + "s");
128 | }
129 | }
130 |
--------------------------------------------------------------------------------
/featureExtractor/bcat_client/src/main/java/thusca/bcat/client/consumer/TaskExtractFeatureLibs13.java:
--------------------------------------------------------------------------------
1 | package thusca.bcat.client.consumer;
2 |
3 | import org.slf4j.Logger;
4 | import org.slf4j.LoggerFactory;
5 | import org.springframework.beans.factory.annotation.Autowired;
6 | import org.springframework.boot.ApplicationArguments;
7 | import org.springframework.boot.ApplicationRunner;
8 | import org.springframework.beans.factory.annotation.Value;
9 | import org.springframework.stereotype.Component;
10 | import thusca.bcat.client.entity.BinaryFile;
11 | import thusca.bcat.client.entity.FeatureExtractStatus;
12 | import thusca.bcat.client.service.GetBinFileService;
13 | import thusca.bcat.client.utils.FileUtil;
14 | import org.springframework.beans.factory.ObjectFactory;
15 | import thusca.bcat.client.service.ExtractService;
16 |
17 | import java.io.File;
18 | import java.io.IOException;
19 | import java.util.List;
20 |
21 | / @Component
22 | public class TaskExtractFeatureLibs13 implements ApplicationRunner {
23 | private final Logger logger = LoggerFactory.getLogger(this.getClass());
24 | @Autowired
25 | ObjectFactory extractServiceObjectFactory;
26 |
27 | @Override
28 | public void run(ApplicationArguments args) throws Exception {
29 | logger.info("Client start......");
30 | long startTime = System.currentTimeMillis();
31 |
32 | localExtract();
33 |
34 | long endTime = System.currentTimeMillis();
35 | logger.info(" " + (endTime - startTime) + "ms");
36 | System.exit(0);
37 | }
38 |
39 | public void localExtract() {
40 | String libsPath = "/mnt/c/Users/user/Desktop/data/binaryfiles13repos";
41 | String ghidraTmp = "/mnt/c/Users/user/Desktop/tmp/ghidraTmp";
42 | String jsonFileRootPath = "/mnt/c/Users/user/Desktop/data/featureJson";
43 | File prefixFile = new File(libsPath);
44 |
45 | for (File lib : prefixFile.listFiles()) {
46 | if (!lib.isDirectory()) {
47 | continue;
48 | }
49 | String[] sufNames = lib.toString().split("/", -1);
50 | String libName = sufNames[sufNames.length - 1];
51 | System.out.println(libName);
52 |
53 | for (File compilationCase : lib.listFiles()){
54 | if (!compilationCase.isDirectory()) {
55 | continue;
56 | }
57 | sufNames = compilationCase.toString().split("/", -1);
58 | String caseName = sufNames[sufNames.length - 1];
59 | System.out.println(caseName);
60 | long startTime = System.currentTimeMillis();
61 | String savePath = jsonFileRootPath + "/" + libName + "/" + caseName;
62 | try{
63 | ExtractService extractService = extractServiceObjectFactory.getObject();
64 | extractService.init(compilationCase.toString(), savePath, ghidraTmp, 0);
65 | extractService.executable();
66 | logger.info(Thread.currentThread().getName() + " 提取完成: " + (System.currentTimeMillis()-startTime) / 1000 + "s");
67 | } catch (Exception e) {
68 | e.printStackTrace();
69 | }
70 | }
71 | }
72 | }
73 | }
--------------------------------------------------------------------------------
/featureExtractor/bcat_client/src/main/java/thusca/bcat/client/consumer/TaskProcessTargets.java:
--------------------------------------------------------------------------------
1 | package thusca.bcat.client.consumer;
2 |
3 | import org.slf4j.Logger;
4 | import org.slf4j.LoggerFactory;
5 | import org.springframework.beans.factory.annotation.Autowired;
6 | import org.springframework.boot.ApplicationArguments;
7 | import org.springframework.boot.ApplicationRunner;
8 | import org.springframework.beans.factory.annotation.Value;
9 | import org.springframework.stereotype.Component;
10 | import thusca.bcat.client.entity.BinaryFile;
11 | import thusca.bcat.client.entity.FeatureExtractStatus;
12 | import thusca.bcat.client.service.GetBinFileService;
13 | import thusca.bcat.client.utils.FileUtil;
14 | import org.springframework.beans.factory.ObjectFactory;
15 | import thusca.bcat.client.service.ExtractService;
16 |
17 | import java.io.File;
18 | import java.io.IOException;
19 | import java.util.List;
20 |
21 | // @Component
22 | public class TaskProcessTargets implements ApplicationRunner {
23 | private final Logger logger = LoggerFactory.getLogger(this.getClass());
24 | @Autowired
25 | ObjectFactory extractServiceObjectFactory;
26 |
27 | @Override
28 | public void run(ApplicationArguments args) throws Exception {
29 | logger.info("Client start......");
30 | long startTime = System.currentTimeMillis();
31 |
32 | localExtractOSSPoliceApks();
33 | // localExtractLibDXApks();
34 |
35 | long endTime = System.currentTimeMillis();
36 | logger.info("time cost: " + (endTime - startTime) + "ms");
37 | System.exit(0);
38 | }
39 |
40 | public void localExtractOSSPoliceApks() {
41 | String libsPath = "/mnt/c/Users/user/Desktop/detection_targets";
42 | String ghidraTmp = "/mnt/c/Users/user/Desktop/tmp/ghidraTmp";
43 | String jsonFileRootPath = "/mnt/c/Users/user/Desktop/data/featureJson";
44 | File prefixFile = new File(libsPath);
45 |
46 | for (File lib : prefixFile.listFiles()) {
47 | String[] sufNames = lib.toString().split("/", -1);
48 | String libName = sufNames[sufNames.length - 1];
49 | System.out.println(libName);
50 |
51 | long startTime = System.currentTimeMillis();
52 | String savePath = jsonFileRootPath + "/" + libName + "/";
53 | try{
54 | ExtractService extractService = extractServiceObjectFactory.getObject();
55 | extractService.init(lib.toString(), savePath, ghidraTmp, 0);
56 | extractService.executable();
57 | logger.info(Thread.currentThread().getName() + " done: " + (System.currentTimeMillis()-startTime) / 1000 + "s");
58 | } catch (Exception e) {
59 | e.printStackTrace();
60 | }
61 | }
62 | }
63 |
64 | public void localExtractLibDXApks(){
65 | String libsPath = "/mnt/c/Users/user/Desktop/detection_targets/unzipped_packages/DesktopApps";
66 | String ghidraTmp = "/mnt/c/Users/user/Desktop/tmp/ghidraTmp";
67 | String jsonFileRootPath = "/mnt/c/Users/user/Desktop/detection_targets/features/libdx_desktop";
68 | File prefixFile = new File(libsPath);
69 | for (File app : prefixFile.listFiles()) {
70 | String[] sufNames = app.toString().split("/", -1);
71 | String appName = sufNames[sufNames.length - 1];
72 | System.out.println(appName);
73 | for (File target: app.listFiles()) {
74 | sufNames = target.toString().split("/", -1);
75 | String targetName = sufNames[sufNames.length - 1];
76 | System.out.println(targetName);
77 | long startTime = System.currentTimeMillis();
78 | String savePath = jsonFileRootPath + "/" + appName + "/" + targetName + "/";
79 | File savePathFile = new File(savePath);
80 | if (savePathFile.exists()) {
81 | continue;
82 | }
83 |
84 | try{
85 | ExtractService extractService = extractServiceObjectFactory.getObject();
86 | extractService.init(target.toString(), savePath, ghidraTmp, 0);
87 | extractService.executable();
88 | logger.info(Thread.currentThread().getName() + " 提取完成: " + (System.currentTimeMillis()-startTime) / 1000 + "s");
89 | } catch (Exception e) {
90 | e.printStackTrace();
91 | }
92 | }
93 | }
94 | }
95 | }
--------------------------------------------------------------------------------
/featureExtractor/bcat_client/src/main/java/thusca/bcat/client/entity/BaseFile.java:
--------------------------------------------------------------------------------
1 | package thusca.bcat.client.entity;
2 |
3 | import java.io.File;
4 |
5 | import lombok.Data;
6 |
7 | @Data
8 | public class BaseFile {
9 | protected String filePath;
10 | protected String fileName;
11 | protected Boolean isProcessed = false;
12 | protected long byteSize;
13 |
14 | public BaseFile(String filePath) {
15 | File tempFile = new File(filePath);
16 | this.filePath = filePath;
17 | this.fileName = tempFile.getName();
18 | this.byteSize = tempFile.length();
19 | }
20 |
21 | public BaseFile(String filePath, String fileName) {
22 | this.filePath = filePath;
23 | this.fileName = fileName;
24 | this.byteSize = new File(filePath).length();
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/featureExtractor/bcat_client/src/main/java/thusca/bcat/client/entity/BinFileFeature.java:
--------------------------------------------------------------------------------
1 | package thusca.bcat.client.entity;
2 |
3 | import java.util.ArrayList;
4 | import java.util.List;
5 |
6 | import lombok.Data;
7 |
8 | @Data
9 | public class BinFileFeature {
10 | private String fileName;
11 | private String fileType;
12 | private List importFunctionNames = new ArrayList<>();
13 | private List exportFunctionNames = new ArrayList<>();
14 | private List stringConstants = new ArrayList<>();
15 | private List functions = new ArrayList<>();
16 | }
--------------------------------------------------------------------------------
/featureExtractor/bcat_client/src/main/java/thusca/bcat/client/entity/BinaryFile.java:
--------------------------------------------------------------------------------
1 | package thusca.bcat.client.entity;
2 |
3 | import lombok.Data;
4 | @Data
5 | public class BinaryFile extends BaseFile {
6 | protected BinFileFeature binFileFeature;
7 | private String formattedFileName;
8 | private String fileType;
9 |
10 | public BinaryFile(String filePath) {
11 | super(filePath);
12 | }
13 |
14 | public BinaryFile(String filePath, String fileName) {
15 | super(filePath, fileName);
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/featureExtractor/bcat_client/src/main/java/thusca/bcat/client/entity/FeatureExtractStatus.java:
--------------------------------------------------------------------------------
1 | package thusca.bcat.client.entity;
2 |
3 | import lombok.Data;
4 |
5 | import java.util.ArrayList;
6 | import java.util.List;
7 |
8 | @Data
9 | public class FeatureExtractStatus {
10 | private boolean getBinFiles = false;
11 | private List binFileNameList = new ArrayList<>();
12 | private long getBinFileTime = 0;
13 |
14 | private List successfullyExtractedBinFeatureList = new ArrayList<>();
15 | private List failedExtractedBinFeatureList = new ArrayList<>();
16 | private long getBinFeatureTime = 0;
17 |
18 | private List successfullySavedJsonList = new ArrayList<>();
19 | private List failedSavedJsonList = new ArrayList<>();
20 | private long saveJsonTime = 0;
21 |
22 | public int extracted = 0;
23 | private int extractedStatus = 0;
24 | private long extractedTime = 0;
25 |
26 | private List errorMessages = new ArrayList<>();
27 |
28 | public void addSuccessfullyExtractedBinFeature(String binFileName, long time, long byteSize) {
29 | successfullyExtractedBinFeatureList.add(new successfullyExtractedBinFeature(binFileName, time, byteSize));
30 | }
31 |
32 | public void addFailedExtractedBinFeature(String binFileName, String errorMessage) {
33 | failedExtractedBinFeatureList.add(new failedExtractedBinFeature(binFileName, errorMessage));
34 | }
35 |
36 | public void addSuccessfullySavedJson(String binFileName, long time, long byteSize) {
37 | successfullySavedJsonList.add(new successfullySavedJson(binFileName, time, byteSize));
38 | }
39 |
40 | public void addfailedSavedJson(String binFileName, String errorMessage) {
41 | failedSavedJsonList.add(new failedSavedJson(binFileName, errorMessage));
42 | }
43 |
44 |
45 | }
46 |
47 | @Data
48 | class successfullyExtractedBinFeature {
49 | private String binFileName;
50 | private long byteSize;
51 | private long time;
52 | successfullyExtractedBinFeature(String binFileName, long time, long byteSize) {
53 | this.binFileName = binFileName;
54 | this.time = time;
55 | this.byteSize = byteSize;
56 | }
57 | }
58 |
59 | @Data
60 | class failedExtractedBinFeature {
61 | private String binFileName;
62 | private String errorMessage;
63 | failedExtractedBinFeature(String binFileName, String errorMessage) {
64 | this.binFileName = binFileName;
65 | this.errorMessage = errorMessage;
66 | }
67 | }
68 |
69 | @Data
70 | class successfullySavedJson {
71 | private String binFileName;
72 | private long time;
73 | private long byteSize;
74 | successfullySavedJson(String binFileName, long time, long byteSize) {
75 | this.binFileName = binFileName;
76 | this.time = time;
77 | this.byteSize = byteSize;
78 | }
79 | }
80 |
81 | @Data
82 | class failedSavedJson {
83 | private String binFileName;
84 | private String errorMessage;
85 | failedSavedJson(String binFileName, String errorMessage) {
86 | this.binFileName = binFileName;
87 | this.errorMessage = errorMessage;
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/featureExtractor/bcat_client/src/main/java/thusca/bcat/client/entity/FunctionFeature.java:
--------------------------------------------------------------------------------
1 | package thusca.bcat.client.entity;
2 |
3 | import java.util.ArrayList;
4 | import java.util.Collections;
5 | import java.util.List;
6 | import java.util.Map;
7 |
8 | import lombok.Data;
9 |
10 | @Data
11 | public class FunctionFeature {
12 | private String functionName = "";
13 | private String functionType = "";
14 | private List args = new ArrayList<>();
15 | private String functionSignature = "";
16 | private String entryPoint = "";
17 | private Boolean isExportFunction;
18 | private Boolean isImportFunction;
19 | private Boolean isThunkFunction;
20 | private Boolean isInline;
21 | private String memoryBlock = "";
22 | private int edges;
23 | private int nodes;
24 | private int exits;
25 | private int complexity;
26 | private String cfSignature = "";
27 | private String cfBody = "";
28 | private int variables;
29 |
30 | private List instructionBytes = new ArrayList<>();
31 | private List instructions = new ArrayList<>();
32 | private List opcodes = new ArrayList<>();
33 | private List pcodeInstr = new ArrayList<>();
34 | private List callingFunctionAddresses = new ArrayList<>();
35 | private List callingFunctionsByPointer = new ArrayList<>();
36 | private List calledFunctionAddresses = new ArrayList<>();
37 | private List calledFunctionsByPointer = new ArrayList<>();
38 | private List calledStrings = new ArrayList<>();
39 | private List calledImports = new ArrayList<>();
40 | private List calledData = new ArrayList<>();
41 | private int[] pcodes = new int[]{};
42 |
43 | private List