├── .gitignore ├── DEVELOPING.md ├── LICENSE ├── Module.manifest ├── README.md ├── assets └── IntelliJ-Ghidra-0.5.0.zip ├── build.gradle ├── data ├── README.txt ├── buildLanguage.xml ├── languages │ ├── skel.cspec │ ├── skel.ldefs │ ├── skel.opinion │ ├── skel.pspec │ ├── skel.sinc │ └── skel.slaspec └── sleighArgs.txt ├── demo ├── README.md ├── TypeConstraint_1539724c_global_morph.json ├── TypeConstraint_746192c2_range_morph.json ├── TypeConstraint_f2b22cd2_final.json ├── TypeConstraint_f8591481_final_DI.json └── varType.json ├── extension.properties ├── ghidra_scripts ├── GroundTruth.java └── TypeForge.java ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── imgs ├── TypeForge_overview.png ├── figure_develop-1.png ├── figure_develop-2.png ├── figure_develop-3.png └── figure_develop-4.png ├── lib ├── README.txt ├── jackson-annotations-2.13.0.jar ├── jackson-core-2.13.0.jar ├── jackson-databind-2.13.0.jar ├── jgrapht-core-1.5.1.jar └── jheaps-0.13.jar ├── os ├── linux_x86_64 │ └── README.txt ├── mac_x86_64 │ └── README.txt └── win_x86_64 │ └── README.txt ├── scripts ├── .python-version ├── GraphExplorer.py ├── GroundTruthExtractor.py ├── README.md ├── TypeInference.py ├── config.yml ├── judge │ ├── README.md │ ├── double_elimination.py │ ├── llm.py │ └── main.py ├── requirements.txt └── uv.lock └── src ├── main ├── java │ └── typeforge │ │ ├── analyzer │ │ ├── Generator.java │ │ ├── ReTyper.java │ │ └── TypeAnalyzer.java │ │ ├── base │ │ ├── dataflow │ │ │ ├── AccessPoints.java │ │ │ ├── ConflictGraph.java │ │ │ ├── KSet.java │ │ │ ├── Layout.java │ │ │ ├── Range.java │ │ │ ├── TFG │ │ │ │ ├── TFGManager.java │ │ │ │ ├── TypeFlowGraph.java │ │ │ │ ├── TypeFlowPath.java │ │ │ │ └── TypeFlowPathManager.java │ │ │ ├── UnionFind.java │ │ │ ├── constraint │ │ │ │ ├── SizeSource.java │ │ │ │ ├── Skeleton.java │ │ │ │ └── TypeConstraint.java │ │ │ ├── expression │ │ │ │ ├── NMAE.java │ │ │ │ ├── NMAEManager.java │ │ │ │ └── ParsedExpr.java │ │ │ └── solver │ │ │ │ ├── ConstPropagator.java │ │ │ │ ├── ExternalHandler.java │ │ │ │ ├── InterSolver.java │ │ │ │ ├── IntraSolver.java │ │ │ │ ├── LayoutPropagator.java │ │ │ │ ├── PCodeVisitor.java │ │ │ │ └── TypeHintCollector.java │ │ ├── graph │ │ │ ├── CallGraph.java │ │ │ ├── GraphBase.java │ │ │ └── SDGraph.java │ │ ├── node │ │ │ ├── CallSite.java │ │ │ ├── DataTypeNode.java │ │ │ ├── FunctionNode.java │ │ │ └── NodeBase.java │ │ ├── parallel │ │ │ └── PrepareFunctionNodeCallback.java │ │ └── passes │ │ │ ├── SlidingWindowProcessor.java │ │ │ └── Window.java │ │ └── utils │ │ ├── DataTypeHelper.java │ │ ├── DecompilerHelper.java │ │ ├── FunctionHelper.java │ │ ├── Global.java │ │ ├── GraphHelper.java │ │ ├── HighSymbolHelper.java │ │ ├── Logging.java │ │ └── TCHelper.java └── resources │ └── log4j2_default.xml └── test └── java ├── README.test.txt └── typeforge └── base └── dataflow ├── NMAETest.java └── types └── LayoutTest.java /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | .gradle/ 3 | .idea 4 | Inferred 5 | GhidraScriptLog/ 6 | bin/ 7 | TypeForge_GroundTruth/ 8 | TypeForge_Inference/ 9 | .venv 10 | .env 11 | __pycache__/ -------------------------------------------------------------------------------- /DEVELOPING.md: -------------------------------------------------------------------------------- 1 | # How to Develop 2 | Writing a simple Ghidra Script is straightforward, but developing a complex Ghidra Extension can be challenging, especially when setting up the initial development environment. 3 | Ghidra officially supports **Eclipse** for Extension development. However, since **Eclipse** is not very user-friendly, TypeForge uses **IntelliJ IDEA** and **intellij-ghidra** plugin for development. 4 | (Note: The latest version of Ghidra supports plugin development in **VSCode**, but TypeForge has not been tested in this environment.) 5 | 6 | ## Setup 7 | 1. [intellij-ghidra](https://github.com/garyttierney/intellij-ghidra) is an IDEA plugin that enables developers to work with Ghidra in an integrated environment, providing features such as API completion, compilation, and debugging. Testing has confirmed that IntelliJ IDEA (version **2024.1.7**) can run this plugin properly. 8 | 2. A version of intellij-ghidra compatible with IDEA (version 2024.1.7) has been pre-compiled and stored in the [assets](./assets/IntelliJ-Ghidra-0.5.0.zip) directory. Open IDEA, click on `File -> Settings -> Plugins`, and choose to install the plugin (the `.zip` file) from local disk. 9 | 10 |  11 | 12 | 3. Follow the [usage guide](https://github.com/garyttierney/intellij-ghidra) for initial plugin configuration. 13 | 4. Configure the **TypeForge** project, making sure to check the `headless` option. You can specify a log path to save the plugin's output. The specific parameters are consistent with command-line usage parameters (no need to specify the `analyzeHeadless` path again). 14 | 15 |  16 | 17 |  18 | 19 |  -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2025 Yanzhong Wang 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 1. Redistributions of source code must retain the above copyright notice, this 7 | list of conditions and the following disclaimer. 8 | 2. Redistributions in binary form must reproduce the above copyright notice, 9 | this list of conditions and the following disclaimer in the documentation 10 | and/or other materials provided with the distribution. 11 | 3. Neither the name of the copyright holder nor the names of its contributors 12 | may be used to endorse or promote products derived from this software without 13 | specific prior written permission. 14 | 15 | If you use this code in academic work, you must: 16 | - Cite the original paper: TypeForge: Synthesizing and Selecting Best-Fit Composite Data Types for Stripped Binaries (DOI: 10.1109/SP61157.2025.00193) 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /Module.manifest: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/Module.manifest -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TypeForge: Synthesizing and Selecting Best-Fit Composite Data Types for Stripped Binaries 2 | 3 | [](https://doi.ieeecomputersociety.org/10.1109/SP61157.2025.00193) 4 | [](https://www.ccf.org.cn/Academic_Evaluation/) 5 | [](./LICENSE) 6 | [](https://github.com/noobone123/typeforge/stargazers) 7 | [](https://github.com/noobone123/typeforge) 8 | 9 | > We are continuously maintaining and updating this project, aiming to provide more user-friendly features and higher efficiency. 10 | 11 | This is the implementation of the paper titled "TypeForge: Synthesizing and Selecting Best-Fit Composite Data Types for Stripped Binaries". For more details about TypeForge, please refer to [our S&P 2025 paper](https://www.computer.org/csdl/proceedings-article/sp/2025/223600c847/26hiVajYJwY). 12 | 13 | ## What is TypeForge? 14 |
87 | * So some function can be seen as a root node, but failed to pass the check of `getCallingFunctions().isEmpty()`.
88 | * We need to check and complete these root nodes.
89 | *
90 | * @return if the function has no direct caller in the whole program
91 | */
92 | public static boolean confirmNoDirectCaller(Function func) {
93 | boolean noCaller = true;
94 |
95 | for (var caller : func.getCallingFunctions(TaskMonitor.DUMMY)) {
96 | var callerInsts = Global.currentProgram.getListing().getInstructions(caller.getBody(), true);
97 | for (var inst : callerInsts) {
98 | if (inst.getMnemonicString().equals("CALL")) {
99 | var instFlows = inst.getFlows();
100 | if (instFlows.length >= 1) {
101 | for (var flow : instFlows) {
102 | Function calledFunc = Global.currentProgram.getFunctionManager().getFunctionAt(flow);
103 | if (calledFunc != null && calledFunc.equals(func)) {
104 | noCaller = false;
105 | return noCaller;
106 | }
107 | }
108 | }
109 | }
110 | }
111 | }
112 |
113 | return noCaller;
114 | }
115 |
116 |
117 | public static Address getAddress(long offset) {
118 | return Global.currentProgram.getAddressFactory().getDefaultAddressSpace().getAddress(offset);
119 | }
120 |
121 | public static Function getFunction(long offset) {
122 | return Global.currentProgram.getFunctionManager().getFunctionAt(getAddress(offset));
123 | }
124 | }
125 |
--------------------------------------------------------------------------------
/src/main/java/typeforge/utils/Global.java:
--------------------------------------------------------------------------------
1 | package typeforge.utils;
2 |
3 | import ghidra.app.script.GhidraScript;
4 | import ghidra.program.model.listing.Program;
5 | import ghidra.program.flatapi.FlatProgramAPI;
6 | /**
7 | * The global state of the current analysis.
8 | */
9 | public class Global {
10 | public static Program currentProgram;
11 | public static FlatProgramAPI flatAPI;
12 | public static GhidraScript ghidraScript;
13 | public static String outputDirectory;
14 | public static long startAddress;
15 |
16 | public static long typeAnalysisBeginTime;
17 | public static long typeAnalysisEndTime;
18 | public static long retypingBeginTime;
19 | public static long retypingEndTime;
20 | public static long prepareAnalysisBeginTime;
21 | public static long prepareAnalysisEndTime;
22 | }
23 |
--------------------------------------------------------------------------------
/src/main/java/typeforge/utils/GraphHelper.java:
--------------------------------------------------------------------------------
1 | package typeforge.utils;
2 |
3 | import typeforge.base.node.DataTypeNode;
4 | import typeforge.base.node.NodeBase;
5 | import typeforge.base.graph.SDGraph;
6 | import ghidra.program.model.data.DataType;
7 |
8 | import java.io.BufferedWriter;
9 | import java.io.IOException;
10 | import java.nio.file.Files;
11 | import java.nio.file.Paths;
12 | import java.util.Set;
13 |
14 | public class GraphHelper {
15 | /**
16 | * Dump the SDGraph to a dot file
17 | */
18 | public static void dumpSDGraph(SDGraph sdg, String filename) {
19 | StringBuilder dotBuilder = new StringBuilder();
20 |
21 | Set