├── .gitignore ├── DEVELOPING.md ├── LICENSE ├── Module.manifest ├── README.md ├── assets └── IntelliJ-Ghidra-0.5.0.zip ├── build.gradle ├── data ├── README.txt ├── buildLanguage.xml ├── languages │ ├── skel.cspec │ ├── skel.ldefs │ ├── skel.opinion │ ├── skel.pspec │ ├── skel.sinc │ └── skel.slaspec └── sleighArgs.txt ├── demo ├── README.md ├── TypeConstraint_1539724c_global_morph.json ├── TypeConstraint_746192c2_range_morph.json ├── TypeConstraint_f2b22cd2_final.json ├── TypeConstraint_f8591481_final_DI.json └── varType.json ├── extension.properties ├── ghidra_scripts ├── GroundTruth.java └── TypeForge.java ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── imgs ├── TypeForge_overview.png ├── figure_develop-1.png ├── figure_develop-2.png ├── figure_develop-3.png └── figure_develop-4.png ├── lib ├── README.txt ├── jackson-annotations-2.13.0.jar ├── jackson-core-2.13.0.jar ├── jackson-databind-2.13.0.jar ├── jgrapht-core-1.5.1.jar └── jheaps-0.13.jar ├── os ├── linux_x86_64 │ └── README.txt ├── mac_x86_64 │ └── README.txt └── win_x86_64 │ └── README.txt ├── scripts ├── .python-version ├── GraphExplorer.py ├── GroundTruthExtractor.py ├── README.md ├── TypeInference.py ├── config.yml ├── judge │ ├── README.md │ ├── double_elimination.py │ ├── llm.py │ └── main.py ├── requirements.txt └── uv.lock └── src ├── main ├── java │ └── typeforge │ │ ├── analyzer │ │ ├── Generator.java │ │ ├── ReTyper.java │ │ └── TypeAnalyzer.java │ │ ├── base │ │ ├── dataflow │ │ │ ├── AccessPoints.java │ │ │ ├── ConflictGraph.java │ │ │ ├── KSet.java │ │ │ ├── Layout.java │ │ │ ├── Range.java │ │ │ ├── TFG │ │ │ │ ├── TFGManager.java │ │ │ │ ├── TypeFlowGraph.java │ │ │ │ ├── TypeFlowPath.java │ │ │ │ └── TypeFlowPathManager.java │ │ │ ├── UnionFind.java │ │ │ ├── constraint │ │ │ │ ├── SizeSource.java │ │ │ │ ├── Skeleton.java │ │ │ │ └── TypeConstraint.java │ │ │ ├── expression │ │ │ │ ├── NMAE.java │ │ │ │ ├── NMAEManager.java │ │ │ │ └── ParsedExpr.java │ │ │ └── solver │ │ │ │ ├── ConstPropagator.java │ │ │ │ ├── ExternalHandler.java │ │ │ │ ├── InterSolver.java │ │ │ │ ├── IntraSolver.java │ │ │ │ ├── LayoutPropagator.java │ │ │ │ ├── PCodeVisitor.java │ │ │ │ └── TypeHintCollector.java │ │ ├── graph │ │ │ ├── CallGraph.java │ │ │ ├── GraphBase.java │ │ │ └── SDGraph.java │ │ ├── node │ │ │ ├── CallSite.java │ │ │ ├── DataTypeNode.java │ │ │ ├── FunctionNode.java │ │ │ └── NodeBase.java │ │ ├── parallel │ │ │ └── PrepareFunctionNodeCallback.java │ │ └── passes │ │ │ ├── SlidingWindowProcessor.java │ │ │ └── Window.java │ │ └── utils │ │ ├── DataTypeHelper.java │ │ ├── DecompilerHelper.java │ │ ├── FunctionHelper.java │ │ ├── Global.java │ │ ├── GraphHelper.java │ │ ├── HighSymbolHelper.java │ │ ├── Logging.java │ │ └── TCHelper.java └── resources │ └── log4j2_default.xml └── test └── java ├── README.test.txt └── typeforge └── base └── dataflow ├── NMAETest.java └── types └── LayoutTest.java /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | .gradle/ 3 | .idea 4 | Inferred 5 | GhidraScriptLog/ 6 | bin/ 7 | TypeForge_GroundTruth/ 8 | TypeForge_Inference/ 9 | .venv 10 | .env 11 | __pycache__/ -------------------------------------------------------------------------------- /DEVELOPING.md: -------------------------------------------------------------------------------- 1 | # How to Develop 2 | Writing a simple Ghidra Script is straightforward, but developing a complex Ghidra Extension can be challenging, especially when setting up the initial development environment. 3 | Ghidra officially supports **Eclipse** for Extension development. However, since **Eclipse** is not very user-friendly, TypeForge uses **IntelliJ IDEA** and **intellij-ghidra** plugin for development. 4 | (Note: The latest version of Ghidra supports plugin development in **VSCode**, but TypeForge has not been tested in this environment.) 5 | 6 | ## Setup 7 | 1. [intellij-ghidra](https://github.com/garyttierney/intellij-ghidra) is an IDEA plugin that enables developers to work with Ghidra in an integrated environment, providing features such as API completion, compilation, and debugging. Testing has confirmed that IntelliJ IDEA (version **2024.1.7**) can run this plugin properly. 8 | 2. A version of intellij-ghidra compatible with IDEA (version 2024.1.7) has been pre-compiled and stored in the [assets](./assets/IntelliJ-Ghidra-0.5.0.zip) directory. Open IDEA, click on `File -> Settings -> Plugins`, and choose to install the plugin (the `.zip` file) from local disk. 9 | 10 | ![install-plugin](./imgs/figure_develop-4.png) 11 | 12 | 3. Follow the [usage guide](https://github.com/garyttierney/intellij-ghidra) for initial plugin configuration. 13 | 4. Configure the **TypeForge** project, making sure to check the `headless` option. You can specify a log path to save the plugin's output. The specific parameters are consistent with command-line usage parameters (no need to specify the `analyzeHeadless` path again). 14 | 15 | ![install-plugin](./imgs/figure_develop-1.png) 16 | 17 | ![install-plugin](./imgs/figure_develop-3.png) 18 | 19 | ![install-plugin](./imgs/figure_develop-2.png) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2025 Yanzhong Wang 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 1. Redistributions of source code must retain the above copyright notice, this 7 | list of conditions and the following disclaimer. 8 | 2. Redistributions in binary form must reproduce the above copyright notice, 9 | this list of conditions and the following disclaimer in the documentation 10 | and/or other materials provided with the distribution. 11 | 3. Neither the name of the copyright holder nor the names of its contributors 12 | may be used to endorse or promote products derived from this software without 13 | specific prior written permission. 14 | 15 | If you use this code in academic work, you must: 16 | - Cite the original paper: TypeForge: Synthesizing and Selecting Best-Fit Composite Data Types for Stripped Binaries (DOI: 10.1109/SP61157.2025.00193) 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /Module.manifest: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/Module.manifest -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TypeForge: Synthesizing and Selecting Best-Fit Composite Data Types for Stripped Binaries 2 | 3 | [![IEEE DOI](https://img.shields.io/badge/S%26P%202025-10.1109%2FSP61157.2025.00193-00629A?logo=ieee&logoColor=00629A&labelColor=E6F2FF)](https://doi.ieeecomputersociety.org/10.1109/SP61157.2025.00193) 4 | [![CCF-A](https://img.shields.io/badge/CCF_A-Security_%26_Privacy-FFD700?logo=star&logoColor=003A5D)](https://www.ccf.org.cn/Academic_Evaluation/) 5 | [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](./LICENSE) 6 | [![GitHub Stars](https://img.shields.io/github/stars/noobone123/typeforge?style=social)](https://github.com/noobone123/typeforge/stargazers) 7 | [![Last Commit](https://img.shields.io/github/last-commit/noobone123/typeforge/dev?color=blue&label=last-commit)](https://github.com/noobone123/typeforge) 8 | 9 | > We are continuously maintaining and updating this project, aiming to provide more user-friendly features and higher efficiency. 10 | 11 | This is the implementation of the paper titled "TypeForge: Synthesizing and Selecting Best-Fit Composite Data Types for Stripped Binaries". For more details about TypeForge, please refer to [our S&P 2025 paper](https://www.computer.org/csdl/proceedings-article/sp/2025/223600c847/26hiVajYJwY). 12 | 13 | ## What is TypeForge? 14 |
15 | overview 16 |
17 | 18 | TypeForge aims to recover composite data types (such as structures, unions, etc.) in stripped binaries. Compared to existing methods, TypeForge provides higher efficiency and accuracy. 19 | - TypeForge is divided into **two phases**: a *Program Analysis phase* and an *LLM-assisted Refinement phase*. The first phase is sufficient for common reverse engineering tasks, while the second phase further improves the accuracy of phase one results. 20 | - TypeForge is currently implemented as a [Ghidra Extension](https://ghidra-sre.org/InstallationGuide.html#GhidraExtensionNotes). We welcome other developers to port it to platforms like [IDA Pro](https://hex-rays.com/ida-pro), [Binary Ninja](https://binary.ninja/), and [Angr](https://github.com/angr/angr). 21 | 22 | 23 | ## Project Structure 24 | 25 | ``` 26 | typeforge/ # Project root 27 | ├── ... 28 | ├── build.gradle # Gradle build configuration 29 | ├── extension.properties # Extension properties 30 | ├── src/ # Main Source code of TypeForge 31 | │ ├── main/java/typeforge 32 | │ │ ├── analyzer/ # Entry functions for various program analyses 33 | │ │ ├── base/ # Underlying components for program analysis algorithms 34 | │ │ │ ├── dataflow/ # Data flow analysis (including data flow abstractions, intra/inter-procedural Solvers) 35 | │ │ │ ├── graph/ # CallGraph 36 | │ │ │ ├── node/ # Binary functions and CallSites 37 | │ │ │ ├── parallel/ # Parallel processing Callbacks 38 | │ │ │ └── passes/ # Passes used for synthesizing possible type declarations 39 | │ │ └── utils/ # Other useful functions for binary analysis 40 | │ └── test/ 41 | ├── ghidra_scripts/ # Ghidra scripts 42 | │ ├── TypeForge.java # Main TypeForge script 43 | │ └── GroundTruth.java # Ground truth extractor (from binaries with debug symbol) 44 | ├── scripts/ # Useful Python Scripts 45 | │ ├── judge/ # LLM-assisted double elimination process 46 | │ ├── GraphExplorer.py # (Debugging purpose) Explore dumped Type Flow Graph 47 | │ ├── GroundTruthExtractor.py # Ground truth extractor (wrapper, actually call GroundTruth.java) 48 | │ └── TypeInference.py # Type Inference (wrapper, actually call TypeForge.java) 49 | ├── lib/ 50 | └── ... 51 | ``` 52 | 53 | ## Building and Installing 54 | ### Building as ghidra extension 55 | 1. clone this repo 56 | 57 | ```bash 58 | git clone https://github.com/noobone123/TypeForge.git 59 | ``` 60 | 2. Install JDK and Ghidra (ghidra version 11.0.3 is tested). 61 | download ghidra from [here](https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_11.0.3_build/ghidra_11.0.3_PUBLIC_20240410.zip) and following the ghidra [install instructions](https://github.com/NationalSecurityAgency/ghidra/blob/Ghidra_11.0.3_build/GhidraDocs/InstallationGuide.html). 62 | 3. Modify `ghidraInstallDir` to **YOUR Ghidra installation directory** in the `build.gradle`. 63 | 4. build the ghidra extension. 64 | 65 | ```bash 66 | cd TypeForge 67 | gradle buildExtension 68 | # after building, you will find your extension zip file. 69 | ls -alh ./dist/ghidra_11.0.3_PUBLIC_[your-build-time]_TypeForge.zip 70 | ``` 71 | 72 | ### Installing 73 | Please refer to the following command to unzip and install the compiled Ghidra Extension. 74 | 75 | ```bash 76 | cp ./dist/ghidra_11.0.3_PUBLIC_[your-build-time]_TypeForge.zip \ 77 | [YOUR-Ghidra-Installation-Directory]/Ghidra/Extensions 78 | cd [YOUR-Ghidra-Installation-Directory]/Ghidra/Extensions 79 | unzip ghidra_11.0.3_PUBLIC_[your-build-time]_TypeForge.zip 80 | ``` 81 | 82 | ## Getting Started 83 | ### Type Inference (Headless Mode) 84 | 85 | After installing the TypeForge, for a single stripped binary, just run: 86 | ```bash 87 | [YOUR-Ghidra-Installation-Directory]/support/analyzeHeadless \ 88 | [YOUR-Ghidra-Project-Directory] [YOUR-Project-Name] \ 89 | -deleteProject -import [YOUR-Stripped-Binary] \ 90 | -postScript TypeForge.java output=[Your-output-dir] 91 | ``` 92 | 93 | After a while, you will see the Type Inference results (JSON files) saved in `[Your-output-dir]`. For details about these JSON files, please refer to the [demo](./demo/README.md). These JSON files will then be fed into *Phase 2 for refinement*. For more information, please refer to [judge](./scripts/judge/README.md). 94 | 95 | For **batch processing**, please refer to [scripts](./scripts/README.md). 96 | 97 | > We are currently developing additional features to directly import Type Inference results into Ghidra projects. 98 | 99 | > For more information about Ghidra Headless Mode, please refer to [this guide](https://static.grumpycoder.net/pixel/support/analyzeHeadlessREADME.html). 100 | 101 | ### Extract the Ground Truth 102 | You can also extract the ground truth of composite data types from a binary with debug information (Note that Ghidra currently does **NOT** support Dwarf-5 format debug information, so you need to specify `-gdwarf-4` during compilation). 103 | For more details, please refer to [scripts](./scripts/README.md). 104 | 105 | ### Run in Ghidra GUI Mode 106 | In development ... 107 | 108 | ## Developing and Debugging 109 | TypeForge is developed using [IntelliJ IDEA](https://www.jetbrains.com/idea/download/other.html) (version 2024.1.7) and the [intellij-ghidra](https://github.com/garyttierney/intellij-ghidra) plugin. For detailed development guidelines, please refer to [How To Develop](./DEVELOPING.md). 110 | 111 | ## Contributors 112 | TypeForge is written and maintained by: 113 | - [h1k0](https://github.com/noobone123) h1k0naka@outlook.com 114 | - [liyilin](https://github.com/li-yilin-30) liyilin2023@iie.ac.cn 115 | 116 | ## Cite 117 | 118 | If you use `TypeForge` for your academic work, please cite the following paper: 119 | ``` 120 | @inproceedings{typeforge, 121 | title = {TypeForge: Synthesizing and Selecting Best-Fit Composite Data Types for Stripped Binaries}, 122 | author = {Wang, Yanzhong and Liang, Ruigang and Li, Yilin and Hu, Peiwei and Chen, Kai and Zhang, Bolun}, 123 | booktitle = {2025 IEEE Symposium on Security and Privacy (SP)}, 124 | pages = {2847--2864}, 125 | year = {2025}, 126 | publisher = {IEEE Computer Society}, 127 | doi = {10.1109/SP61157.2025.00193}, 128 | } 129 | ``` -------------------------------------------------------------------------------- /assets/IntelliJ-Ghidra-0.5.0.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/assets/IntelliJ-Ghidra-0.5.0.zip -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | /* ### 2 | * IP: GHIDRA 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | // Builds a Ghidra Extension for a given Ghidra installation. 17 | // 18 | // An absolute path to the Ghidra installation directory must be supplied either by setting the 19 | // GHIDRA_INSTALL_DIR environment variable or Gradle project property: 20 | // 21 | // > export GHIDRA_INSTALL_DIR= 22 | // > gradle 23 | // 24 | // or 25 | // 26 | // > gradle -PGHIDRA_INSTALL_DIR= 27 | // 28 | // Gradle should be invoked from the directory of the project to build. Please see the 29 | // application.gradle.version property in /Ghidra/application.properties 30 | // for the correction version of Gradle to use for the Ghidra installation you specify. 31 | 32 | //----------------------START "DO NOT MODIFY" SECTION------------------------------ 33 | def ghidraInstallDir = "/home/h1k0/tools/ghidra_11.0.3_PUBLIC" // Modify `ghidraInstallDir` to your Ghidra installation directory 34 | 35 | if (System.env.GHIDRA_INSTALL_DIR) { 36 | ghidraInstallDir = System.env.GHIDRA_INSTALL_DIR 37 | } 38 | else if (project.hasProperty("GHIDRA_INSTALL_DIR")) { 39 | ghidraInstallDir = project.getProperty("GHIDRA_INSTALL_DIR") 40 | } 41 | 42 | if (ghidraInstallDir) { 43 | apply from: new File(ghidraInstallDir).getCanonicalPath() + "/support/buildExtension.gradle" 44 | } 45 | else { 46 | throw new GradleException("GHIDRA_INSTALL_DIR is not defined!") 47 | } 48 | //----------------------END "DO NOT MODIFY" SECTION------------------------------- 49 | 50 | sourceSets { 51 | main { 52 | java { 53 | srcDirs = ['src/main/java', 'ghidra_scripts'] 54 | } 55 | resources { 56 | srcDirs = ['src/main/resources'] 57 | } 58 | } 59 | } 60 | 61 | repositories { 62 | // Declare dependency repositories here. This is not needed if dependencies are manually 63 | // dropped into the lib/ directory. 64 | // See https://docs.gradle.org/current/userguide/declaring_repositories.html for more info. 65 | // Ex: mavenCentral() 66 | mavenCentral() 67 | } 68 | 69 | dependencies { 70 | // Any external dependencies added here will automatically be copied to the lib/ directory when 71 | // this extension is built. 72 | 73 | implementation 'com.fasterxml.jackson.core:jackson-core:2.13.0' 74 | implementation 'com.fasterxml.jackson.core:jackson-databind:2.13.0' 75 | implementation 'com.fasterxml.jackson.core:jackson-annotations:2.13.0' 76 | implementation 'org.jgrapht:jgrapht-core:1.5.1' 77 | 78 | testImplementation('org.junit.jupiter:junit-jupiter-api:5.8.2') 79 | testRuntimeOnly('org.junit.jupiter:junit-jupiter-engine:5.8.2') 80 | 81 | testImplementation 'org.mockito:mockito-core:4.0.0' 82 | testImplementation 'org.mockito:mockito-junit-jupiter:4.0.0' 83 | } 84 | 85 | test { 86 | useJUnitPlatform() 87 | } 88 | 89 | // Exclude additional files from the built extension 90 | buildExtension.exclude '.idea/**' 91 | buildExtension.exclude '.git/**' 92 | buildExtension.exclude 'GhidraScriptLog/**' 93 | buildExtension.exclude 'TypeForge_Inference/**' 94 | buildExtension.exclude 'demo/**' 95 | buildExtension.exclude 'scripts/**' 96 | buildExtension.exclude 'imgs/**' 97 | -------------------------------------------------------------------------------- /data/README.txt: -------------------------------------------------------------------------------- 1 | The "data" directory is intended to hold data files that will be used by this module and will 2 | not end up in the .jar file, but will be present in the zip or tar file. Typically, data 3 | files are placed here rather than in the resources directory if the user may need to edit them. 4 | 5 | An optional data/languages directory can exist for the purpose of containing various Sleigh language 6 | specification files and importer opinion files. 7 | 8 | The data/buildLanguage.xml is used for building the contents of the data/languages directory. 9 | 10 | The skel language definition has been commented-out within the skel.ldefs file so that the 11 | skeleton language does not show-up within Ghidra. 12 | 13 | See the Sleigh language documentation (docs/languages/index.html) for details Sleigh language 14 | specification syntax. 15 | -------------------------------------------------------------------------------- /data/buildLanguage.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /data/languages/skel.cspec: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /data/languages/skel.ldefs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 20 | 21 | -------------------------------------------------------------------------------- /data/languages/skel.opinion: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 12 | 13 | -------------------------------------------------------------------------------- /data/languages/skel.pspec: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /data/languages/skel.sinc: -------------------------------------------------------------------------------- 1 | # sleigh include file for Skeleton language instructions 2 | 3 | define token opbyte (8) 4 | op0_8 = (0,7) 5 | op6_2 = (6,7) 6 | 7 | dRegPair4_2 = (4,5) 8 | pRegPair4_2 = (4,5) 9 | sRegPair4_2 = (4,5) 10 | qRegPair4_2 = (4,5) 11 | qRegPair4_2a = (4,5) 12 | qRegPair4_2b = (4,5) 13 | rRegPair4_2 = (4,5) 14 | 15 | reg3_3 = (3,5) 16 | bits3_3 = (3,5) 17 | 18 | bits0_4 = (0,3) 19 | 20 | reg0_3 = (0,2) 21 | bits0_3 = (0,2) 22 | ; 23 | 24 | define token data8 (8) 25 | imm8 = (0,7) 26 | sign8 = (7,7) 27 | simm8 = (0,7) signed 28 | ; 29 | 30 | define token data16 (16) 31 | timm4 = (12,15) 32 | imm16 = (0,15) 33 | sign16 = (15,15) 34 | simm16 = (0,15) signed 35 | ; 36 | 37 | attach variables [ reg0_3 reg3_3 ] [ B C D E H L _ A ]; 38 | 39 | attach variables [ sRegPair4_2 dRegPair4_2 ] [ BC DE HL SP ]; 40 | 41 | attach variables [ qRegPair4_2 ] [ BC DE HL AF ]; 42 | attach variables [ qRegPair4_2a ] [ B D H A ]; 43 | attach variables [ qRegPair4_2b ] [ C E L F ]; 44 | 45 | attach variables [ pRegPair4_2 ] [ BC DE IX SP ]; 46 | attach variables [ rRegPair4_2 ] [ BC DE IY SP ]; 47 | 48 | ################################################################ 49 | # Macros 50 | ################################################################ 51 | 52 | macro setResultFlags(result) { 53 | $(Z_flag) = (result == 0); 54 | $(S_flag) = (result s< 0); 55 | } 56 | 57 | macro setAddCarryFlags(op1,op2) { 58 | $(C_flag) = (carry(op1,zext($(C_flag))) || carry(op2,op1 + zext($(C_flag)))); 59 | } 60 | 61 | macro setAddFlags(op1,op2) { 62 | $(C_flag) = carry(op1,op2); 63 | } 64 | 65 | macro setSubtractCarryFlags(op1,op2) { 66 | notC = ~$(C_flag); 67 | $(C_flag) = ((op1 < sext(notC)) || (op2 < (op1 - sext(notC)))); 68 | } 69 | 70 | macro setSubtractFlags(op1,op2) { 71 | $(C_flag) = (op1 < op2); 72 | } 73 | 74 | macro push16(val16) { 75 | SP = SP - 2; 76 | *:2 SP = val16; 77 | } 78 | 79 | macro pop16(ret16) { 80 | ret16 = *:2 SP; 81 | SP = SP + 2; 82 | } 83 | 84 | macro push8(val8) { 85 | SP = SP - 1; 86 | ptr:2 = SP; 87 | *:1 ptr = val8; 88 | } 89 | 90 | macro pop8(ret8) { 91 | ptr:2 = SP; 92 | ret8 = *:1 ptr; 93 | SP = SP + 1; 94 | } 95 | 96 | ################################################################ 97 | 98 | ixMem8: (IX+simm8) is IX & simm8 { ptr:2 = IX + simm8; export *:1 ptr; } 99 | ixMem8: (IX-val) is IX & simm8 & sign8=1 [ val = -simm8; ] { ptr:2 = IX + simm8; export *:1 ptr; } 100 | 101 | iyMem8: (IY+simm8) is IY & simm8 { ptr:2 = IY + simm8; export *:1 ptr; } 102 | iyMem8: (IY-val) is IY & simm8 & sign8=1 [ val = -simm8; ] { ptr:2 = IY + simm8; export *:1 ptr; } 103 | 104 | Addr16: imm16 is imm16 { export *:1 imm16; } 105 | 106 | Mem16: (imm16) is imm16 { export *:2 imm16; } 107 | 108 | RelAddr8: loc is simm8 [ loc = inst_next + simm8; ] { export *:1 loc; } 109 | 110 | cc: "NZ" is bits3_3=0x0 { c:1 = ($(Z_flag) == 0); export c; } 111 | cc: "Z" is bits3_3=0x1 { c:1 = $(Z_flag); export c; } 112 | cc: "NC" is bits3_3=0x2 { c:1 = ($(C_flag) == 0); export c; } 113 | cc: "C" is bits3_3=0x3 { c:1 = $(C_flag); export c; } 114 | cc: "PO" is bits3_3=0x4 { c:1 = ($(PV_flag) == 0); export c; } 115 | cc: "PE" is bits3_3=0x5 { c:1 = $(PV_flag); export c; } 116 | cc: "P" is bits3_3=0x6 { c:1 = ($(S_flag) == 0); export c; } 117 | cc: "M" is bits3_3=0x7 { c:1 = $(S_flag); export c; } 118 | 119 | cc2: "NZ" is bits3_3=0x4 { c:1 = ($(Z_flag) == 0); export c; } 120 | cc2: "Z" is bits3_3=0x5 { c:1 = $(Z_flag); export c; } 121 | cc2: "NC" is bits3_3=0x6 { c:1 = ($(C_flag) == 0); export c; } 122 | cc2: "C" is bits3_3=0x7 { c:1 = $(C_flag); export c; } 123 | 124 | ################################################################ 125 | 126 | 127 | :LD IX,Mem16 is op0_8=0xdd & IX; op0_8=0x2a; Mem16 { 128 | IX = Mem16; 129 | } 130 | 131 | :LD IY,Mem16 is op0_8=0xfd & IY; op0_8=0x2a; Mem16 { 132 | IY = Mem16; 133 | } 134 | 135 | :LD Mem16,HL is op0_8=0x22 & HL; Mem16 { 136 | Mem16 = HL; 137 | } 138 | 139 | :LD Mem16,dRegPair4_2 is op0_8=0xed; op6_2=0x1 & dRegPair4_2 & bits0_4=0x3; Mem16 { 140 | Mem16 = dRegPair4_2; 141 | } 142 | 143 | :LD Mem16,IX is op0_8=0xdd & IX; op0_8=0x22; Mem16 { 144 | Mem16 = IX; 145 | } 146 | 147 | :LD Mem16,IY is op0_8=0xfd & IY; op0_8=0x22; Mem16 { 148 | Mem16 = IY; 149 | } 150 | 151 | :NEG is op0_8=0xed; op0_8=0x44 { 152 | $(PV_flag) = (A == 0x80); 153 | $(C_flag) = (A != 0); 154 | A = -A; 155 | setResultFlags(A); 156 | } 157 | 158 | :SET bits3_3,ixMem8 is op0_8=0xdd; op0_8=0xcb; ixMem8; op6_2=0x3 & bits3_3 & bits0_3=0x6 { 159 | mask:1 = (1 << bits3_3); 160 | val:1 = ixMem8; 161 | ixMem8 = val | mask; 162 | } 163 | 164 | :SET bits3_3,iyMem8 is op0_8=0xfd; op0_8=0xcb; iyMem8; op6_2=0x3 & bits3_3 & bits0_3=0x6 { 165 | mask:1 = (1 << bits3_3); 166 | val:1 = iyMem8; 167 | iyMem8 = val | mask; 168 | } 169 | 170 | :JP Addr16 is op0_8=0xc3; Addr16 { 171 | goto Addr16; 172 | } 173 | 174 | :JP cc,Addr16 is op6_2=0x3 & cc & bits0_3=0x2; Addr16 { 175 | if (!cc) goto Addr16; 176 | } 177 | 178 | :JR RelAddr8 is op0_8=0x18; RelAddr8 { 179 | goto RelAddr8; 180 | } 181 | 182 | :JR cc2,RelAddr8 is op6_2=0x0 & cc2 & bits0_3=0x0; RelAddr8 { 183 | if (cc2) goto RelAddr8; 184 | } 185 | 186 | :JP (HL) is op0_8=0xe9 & HL { 187 | goto [HL]; 188 | } 189 | 190 | :JP (IX) is op0_8=0xdd & IX; op0_8=0xe9 { 191 | goto [IX]; 192 | } 193 | 194 | :JP (IY) is op0_8=0xfd & IY; op0_8=0xe9 { 195 | goto [IY]; 196 | } 197 | 198 | :CALL Addr16 is op0_8=0xcd; Addr16 { 199 | push16(&:2 inst_next); 200 | call Addr16; 201 | } 202 | 203 | :CALL cc,Addr16 is op6_2=0x3 & cc & bits0_3=0x4; Addr16 { 204 | if (!cc) goto inst_next; 205 | push16(&:2 inst_next); 206 | call Addr16; 207 | } 208 | 209 | :RET is op0_8=0xc9 { 210 | pop16(PC); 211 | ptr:2 = zext(PC); 212 | return [ptr]; 213 | } 214 | 215 | :RET cc is op6_2=0x3 & cc & bits0_3=0x0 { 216 | if (!cc) goto inst_next; 217 | pop16(PC); 218 | ptr:2 = zext(PC); 219 | return [ptr]; 220 | } 221 | -------------------------------------------------------------------------------- /data/languages/skel.slaspec: -------------------------------------------------------------------------------- 1 | # sleigh specification file for Skeleton Processor 2 | # >> see docs/languages/sleigh.htm or sleigh.pdf for Sleigh syntax 3 | # Other language modules (see Ghidra/Processors) may provide better examples 4 | # when creating a new language module. 5 | 6 | define endian=little; 7 | define alignment=1; 8 | 9 | define space ram type=ram_space size=2 default; 10 | 11 | define space io type=ram_space size=2; 12 | define space register type=register_space size=1; 13 | 14 | define register offset=0x00 size=1 [ F A C B E D L H I R ]; 15 | define register offset=0x00 size=2 [ AF BC DE HL ]; 16 | define register offset=0x20 size=1 [ A_ F_ B_ C_ D_ E_ H_ L_ ]; # Alternate registers 17 | define register offset=0x20 size=2 [ AF_ BC_ DE_ HL_ ]; # Alternate registers 18 | 19 | define register offset=0x40 size=2 [ _ PC SP IX IY ]; 20 | 21 | define register offset=0x50 size=1 [ rCBAR rCBR rBBR ]; 22 | 23 | # Define context bits (if defined, size must be multiple of 4-bytes) 24 | define register offset=0xf0 size=4 contextreg; 25 | 26 | define context contextreg 27 | assume8bitIOSpace = (0,0) 28 | ; 29 | 30 | # Flag bits (?? manual is very confusing - could be typos!) 31 | @define C_flag "F[0,1]" # C: Carry 32 | @define N_flag "F[1,1]" # N: Add/Subtract 33 | @define PV_flag "F[2,1]" # PV: Parity/Overflow 34 | @define H_flag "F[4,1]" # H: Half Carry 35 | @define Z_flag "F[6,1]" # Z: Zero 36 | @define S_flag "F[7,1]" # S: Sign 37 | 38 | # Include contents of skel.sinc file 39 | @include "skel.sinc" 40 | -------------------------------------------------------------------------------- /data/sleighArgs.txt: -------------------------------------------------------------------------------- 1 | # Add sleigh compiler options to this file (one per line) which will 2 | # be used when compiling each language within this module. 3 | # All options should start with a '-' character. 4 | # 5 | # IMPORTANT: The -a option should NOT be specified 6 | # -------------------------------------------------------------------------------- /demo/README.md: -------------------------------------------------------------------------------- 1 | # Demo 2 | This directory contains several JSON files that represent type inference results from `lighttpd`. 3 | 4 | ## varType.json 5 | This file serves as an index mapping from Ghidra decompiled variables to TypeForge inferred types, as shown below: 6 | ```json 7 | "0x13834f" : { // Function Entry Address 8 | "Name" : "pcre_keyvalue_burl_percent_high_UTF8", // Function Name 9 | "Parameters" : { // Function Parameters 10 | "0x13834f:param_1" : { 11 | "Name" : "param_1", 12 | "desc" : "pointer", 13 | "TypeConstraint" : "TypeConstraint_451eec67" // Corresponding TypeConstraint 14 | } 15 | }, 16 | "LocalVariables" : { // Function Local Variables 17 | "0x13834f:stack[-0x28]" : { // Stack Variable (with stack offset) 18 | "Name" : "local_28", 19 | "desc" : "pointer", 20 | "TypeConstraint" : "TypeConstraint_8792a6aa" 21 | }, 22 | "0x13834f:RegUniq[0x138371]" : { // Register Variables (with address where this varnode is defined) 23 | "Name" : "iVar2", 24 | "desc" : "pointer", 25 | "TypeConstraint" : "TypeConstraint_8792a6aa" 26 | }, 27 | "0x13834f:RegUniq[0x1383d5]" : { 28 | "Name" : "lVar3", 29 | "desc" : "pointer", 30 | "TypeConstraint" : "TypeConstraint_c076aa34" 31 | } 32 | } 33 | } 34 | ``` 35 | 36 | ## xxx_final.json 37 | Indicates that this composite data type does not need to enter the refinement stage. 38 | ```json 39 | { 40 | "ForgedStruct_213" : { 41 | "desc" : "Structure", 42 | "layout" : { // Member Layout 43 | "0x4" : { 44 | "desc" : "Primitive", 45 | "size" : 4, 46 | "type" : "int", 47 | "name" : "field_0x4" 48 | }, 49 | "0x8" : { 50 | "desc" : "Pointer", 51 | "size" : 8, 52 | "type" : "void *", 53 | "name" : "ref_0x8_TypeConstraint_4c5a3461" 54 | }, 55 | // ... 56 | }, 57 | "ptrRef" : { // Pointer Reference Relationship 58 | "0x8" : { // Reference member offset 59 | "refSkt" : "TypeConstraint_4c5a3461", // Pointee TypeConstraint 60 | "ptrLevel" : 1 // Pointer level: 1 for *, 2 for **, ... 61 | }, 62 | "0x28" : { 63 | "refSkt" : "TypeConstraint_05d81b5b", 64 | "ptrLevel" : 1 65 | }, 66 | "0x30" : { 67 | "refSkt" : "TypeConstraint_05d81b5b", 68 | "ptrLevel" : 1 69 | } 70 | }, 71 | "nest" : { // Nested Relationship 72 | "0x28" : "TypeConstraint_b1d2b2a7" // Nested member offset 73 | }, 74 | "anonTypes" : { }, 75 | "decompilerInferred" : { 76 | "composite" : [ ], 77 | "array" : [ ], 78 | "primitive" : [ ] 79 | } 80 | } 81 | } 82 | ``` 83 | 84 | ## xxx_global_morph.json 85 | Indicates that refinement is needed, and the TypeConstraint as a whole can be interpreted as two different types. The `decompiledCode` field in the JSON corresponds to different variants of decompiled code. 86 | 87 | ## xxx_range_morph.json 88 | Indicates that refinement is needed, and certain member ranges within the TypeConstraint can be interpreted as multiple types. The member range is marked with `"startOffset"` and `"endOffset"` fields. 89 | The `decompiledCode` field in the JSON corresponds to different variants of decompiled pseudocode. 90 | 91 | ## xxx_final_DI.json 92 | Indicates that this type is Decompiler-Inferred, typically representing library-defined composite data types, such as `sockaddr`, etc. -------------------------------------------------------------------------------- /demo/TypeConstraint_1539724c_global_morph.json: -------------------------------------------------------------------------------- 1 | { 2 | "globalMorph" : { 3 | "dword" : { 4 | "desc" : "Primitive", 5 | "type" : "dword", 6 | "decompiledCode" : { 7 | "0x13dfe0" : "\nundefined8 fdevent_pipe_cloexec(dword *param_1,uint param_2)\n\n{\n int iVar1;\n undefined8 uVar2;\n \n iVar1 = pipe2((int *)param_1,0x80000);\n if (iVar1 == 0) {\nLAB_0013e05f:\n if (0x10000 < param_2) {\n fcntl64(param_1[1],0x407,param_2);\n }\n uVar2 = 0;\n }\n else {\n iVar1 = pipe((int *)param_1);\n if (iVar1 == 0) {\n iVar1 = fcntl64(*param_1,2,1);\n if (iVar1 == 0) {\n iVar1 = fcntl64(param_1[1],2,1);\n if (iVar1 == 0) goto LAB_0013e05f;\n }\n }\n uVar2 = 0xffffffff;\n }\n return uVar2;\n}\n\n", 8 | "0x15dcf7" : "\nundefined8 fdlog_pipe_init(undefined8 param_1,dword *param_2,undefined4 param_3)\n\n{\n undefined8 *puVar1;\n undefined8 uVar2;\n \n if ((DAT_00179068 & 3) == 0) {\n ck_realloc_u32(&fdlog_pipes,DAT_00179068,4,0x18);\n }\n puVar1 = (undefined8 *)((ulong)DAT_00179068 * 0x18 + fdlog_pipes);\n DAT_00179068 = DAT_00179068 + 1;\n *(dword *)((long)puVar1 + 0xc) = *param_2;\n *(undefined4 *)(puVar1 + 1) = param_3;\n puVar1[2] = log_monotonic_secs;\n uVar2 = fdlog_init(param_1,param_2[1],3);\n *puVar1 = uVar2;\n return *puVar1;\n}\n\n" 9 | } 10 | }, 11 | "ForgedStruct_8" : { 12 | "desc" : "Structure", 13 | "layout" : { 14 | "0x0" : { 15 | "desc" : "Primitive", 16 | "size" : 4, 17 | "type" : "dword", 18 | "name" : "field_0x0" 19 | }, 20 | "0x4" : { 21 | "desc" : "Primitive", 22 | "size" : 4, 23 | "type" : "int", 24 | "name" : "field_0x4" 25 | } 26 | }, 27 | "ptrRef" : { }, 28 | "nest" : { }, 29 | "anonTypes" : { }, 30 | "decompilerInferred" : { 31 | "composite" : [ ], 32 | "array" : [ ], 33 | "primitive" : [ ] 34 | }, 35 | "decompiledCode" : { 36 | "0x13dfe0" : "\nundefined8 fdevent_pipe_cloexec(ForgedStruct_8 *param_1,uint param_2)\n\n{\n int iVar1;\n undefined8 uVar2;\n \n iVar1 = pipe2((int *)param_1,0x80000);\n if (iVar1 == 0) {\nLAB_0013e05f:\n if (0x10000 < param_2) {\n fcntl64(param_1->field_0x4,0x407,param_2);\n }\n uVar2 = 0;\n }\n else {\n iVar1 = pipe((int *)param_1);\n if (iVar1 == 0) {\n iVar1 = fcntl64(param_1->field_0x0,2,1);\n if (iVar1 == 0) {\n iVar1 = fcntl64(param_1->field_0x4,2,1);\n if (iVar1 == 0) goto LAB_0013e05f;\n }\n }\n uVar2 = 0xffffffff;\n }\n return uVar2;\n}\n\n", 37 | "0x15dcf7" : "\nundefined8 fdlog_pipe_init(undefined8 param_1,ForgedStruct_8 *param_2,undefined4 param_3)\n\n{\n undefined8 *puVar1;\n undefined8 uVar2;\n \n if ((DAT_00179068 & 3) == 0) {\n ck_realloc_u32(&fdlog_pipes,DAT_00179068,4,0x18);\n }\n puVar1 = (undefined8 *)((ulong)DAT_00179068 * 0x18 + fdlog_pipes);\n DAT_00179068 = DAT_00179068 + 1;\n *(dword *)((long)puVar1 + 0xc) = param_2->field_0x0;\n *(undefined4 *)(puVar1 + 1) = param_3;\n puVar1[2] = log_monotonic_secs;\n uVar2 = fdlog_init(param_1,param_2->field_0x4,3);\n *puVar1 = uVar2;\n return *puVar1;\n}\n\n" 38 | } 39 | } 40 | } 41 | } -------------------------------------------------------------------------------- /demo/TypeConstraint_746192c2_range_morph.json: -------------------------------------------------------------------------------- 1 | { 2 | "rangeMorph" : [ { 3 | "startOffset" : "0x8", 4 | "endOffset" : "0x10", 5 | "types" : { 6 | "ForgedStruct_87" : { 7 | "desc" : "Structure", 8 | "layout" : { 9 | "0x0" : { 10 | "desc" : "Primitive", 11 | "size" : 8, 12 | "type" : "qword", 13 | "name" : "field_0x0" 14 | }, 15 | "0x8" : { 16 | "desc" : "Pointer", 17 | "size" : 8, 18 | "type" : "void *", 19 | "name" : "field_0x8" 20 | } 21 | }, 22 | "ptrRef" : { }, 23 | "nest" : { }, 24 | "anonTypes" : { }, 25 | "decompilerInferred" : { 26 | "composite" : [ ], 27 | "array" : [ ], 28 | "primitive" : [ ] 29 | }, 30 | "decompiledCode" : { 31 | "0x1391bf" : "\nundefined4 chunk_buffer_prepare_append(void **param_1,ulong param_2)\n\n{\n void *pvVar1;\n void *__src;\n void *pvVar2;\n uint uVar3;\n undefined4 uVar4;\n ulong uVar5;\n ForgedStruct_87 *ppvVar6;\n undefined4 local_10;\n \n uVar3 = buffer_string_space(param_1);\n if (uVar3 < param_2) {\n if (*(int *)(param_1 + 1) == 0) {\n uVar5 = 1;\n }\n else {\n uVar5 = (ulong)*(uint *)(param_1 + 1);\n }\n ppvVar6 = (ForgedStruct_87 *)chunk_buffer_acquire_sz(param_2 + uVar5);\n pvVar1 = param_1[1];\n __src = *param_1;\n pvVar2 = ppvVar6->field_0x8;\n *param_1 = (void *)ppvVar6->field_0x0;\n param_1[1] = pvVar2;\n ppvVar6->field_0x0 = (qword)__src;\n ppvVar6->field_0x8 = pvVar1;\n local_10 = SUB84(pvVar1,0);\n *(undefined4 *)(param_1 + 1) = local_10;\n if (*(int *)(param_1 + 1) != 0) {\n memcpy(*param_1,__src,(ulong)pvVar1 & 0xffffffff);\n }\n chunk_buffer_release(ppvVar6);\n }\n uVar4 = buffer_string_space(param_1);\n return uVar4;\n}\n\n", 32 | "0x1308c8" : "\nvoid buffer_clear(ForgedStruct_87 *param_1)\n\n{\n *(undefined4 *)¶m_1->field_0x8 = 0;\n return;\n}\n\n", 33 | "0x13876a" : "\nvoid buffer_append_buffer(undefined8 param_1,ForgedStruct_87 *param_2)\n\n{\n undefined4 uVar1;\n \n uVar1 = buffer_clen(param_2);\n buffer_append_string_len(param_1,param_2->field_0x0,uVar1);\n return;\n}\n\n", 34 | "0x13913a" : "\nvoid chunk_buffer_yield(qword *param_1)\n\n{\n void *pvVar1;\n qword qVar2;\n void *pvVar3;\n ForgedStruct_87 *puVar4;\n \n if ((ulong)*(uint *)((long)param_1 + 0xc) != (chunk_buf_sz | 1)) {\n puVar4 = (ForgedStruct_87 *)chunk_buffer_acquire_sz(chunk_buf_sz);\n pvVar1 = (void *)param_1[1];\n qVar2 = *param_1;\n pvVar3 = puVar4->field_0x8;\n *param_1 = puVar4->field_0x0;\n param_1[1] = (qword)pvVar3;\n puVar4->field_0x0 = qVar2;\n puVar4->field_0x8 = pvVar1;\n chunk_buffer_release(puVar4);\n }\n return;\n}\n\n", 35 | "0x1386b6" : "\nbool buffer_is_blank(ForgedStruct_87 *param_1)\n\n{\n return *(uint *)¶m_1->field_0x8 < 2;\n}\n\n", 36 | "0x130996" : "\nvoid buffer_move(ForgedStruct_87 *param_1,undefined8 *param_2)\n\n{\n void *pvVar1;\n qword qVar2;\n void *pvVar3;\n \n buffer_clear(param_1);\n pvVar1 = (void *)param_2[1];\n qVar2 = *param_2;\n pvVar3 = param_1->field_0x8;\n *param_2 = param_1->field_0x0;\n param_2[1] = pvVar3;\n param_1->field_0x0 = qVar2;\n param_1->field_0x8 = pvVar1;\n return;\n}\n\n" 37 | } 38 | }, 39 | "ForgedStruct_86" : { 40 | "desc" : "Structure", 41 | "layout" : { 42 | "0x0" : { 43 | "desc" : "Primitive", 44 | "size" : 8, 45 | "type" : "qword", 46 | "name" : "field_0x0" 47 | }, 48 | "0x8" : { 49 | "desc" : "Primitive", 50 | "size" : 4, 51 | "type" : "dword", 52 | "name" : "field_0x8" 53 | } 54 | }, 55 | "ptrRef" : { }, 56 | "nest" : { }, 57 | "anonTypes" : { }, 58 | "decompilerInferred" : { 59 | "composite" : [ ], 60 | "array" : [ ], 61 | "primitive" : [ ] 62 | }, 63 | "decompiledCode" : { 64 | "0x1391bf" : "\nundefined4 chunk_buffer_prepare_append(void **param_1,ulong param_2)\n\n{\n void *pvVar1;\n void *__src;\n void *pvVar2;\n uint uVar3;\n undefined4 uVar4;\n ulong uVar5;\n ForgedStruct_86 *ppvVar6;\n undefined4 local_10;\n \n uVar3 = buffer_string_space(param_1);\n if (uVar3 < param_2) {\n if (*(int *)(param_1 + 1) == 0) {\n uVar5 = 1;\n }\n else {\n uVar5 = (ulong)*(uint *)(param_1 + 1);\n }\n ppvVar6 = (ForgedStruct_86 *)chunk_buffer_acquire_sz(param_2 + uVar5);\n pvVar1 = param_1[1];\n __src = *param_1;\n pvVar2 = *(void **)&ppvVar6->field_0x8;\n *param_1 = (void *)ppvVar6->field_0x0;\n param_1[1] = pvVar2;\n ppvVar6->field_0x0 = (qword)__src;\n *(void **)&ppvVar6->field_0x8 = pvVar1;\n local_10 = SUB84(pvVar1,0);\n *(undefined4 *)(param_1 + 1) = local_10;\n if (*(int *)(param_1 + 1) != 0) {\n memcpy(*param_1,__src,(ulong)pvVar1 & 0xffffffff);\n }\n chunk_buffer_release(ppvVar6);\n }\n uVar4 = buffer_string_space(param_1);\n return uVar4;\n}\n\n", 65 | "0x1308c8" : "\nvoid buffer_clear(ForgedStruct_86 *param_1)\n\n{\n param_1->field_0x8 = 0;\n return;\n}\n\n", 66 | "0x13876a" : "\nvoid buffer_append_buffer(undefined8 param_1,ForgedStruct_86 *param_2)\n\n{\n undefined4 uVar1;\n \n uVar1 = buffer_clen(param_2);\n buffer_append_string_len(param_1,param_2->field_0x0,uVar1);\n return;\n}\n\n", 67 | "0x13913a" : "\nvoid chunk_buffer_yield(qword *param_1)\n\n{\n qword qVar1;\n qword qVar2;\n qword qVar3;\n ForgedStruct_86 *puVar4;\n \n if ((ulong)*(uint *)((long)param_1 + 0xc) != (chunk_buf_sz | 1)) {\n puVar4 = (ForgedStruct_86 *)chunk_buffer_acquire_sz(chunk_buf_sz);\n qVar1 = param_1[1];\n qVar2 = *param_1;\n qVar3 = *(qword *)&puVar4->field_0x8;\n *param_1 = puVar4->field_0x0;\n param_1[1] = qVar3;\n puVar4->field_0x0 = qVar2;\n *(qword *)&puVar4->field_0x8 = qVar1;\n chunk_buffer_release(puVar4);\n }\n return;\n}\n\n", 68 | "0x1386b6" : "\nbool buffer_is_blank(ForgedStruct_86 *param_1)\n\n{\n return param_1->field_0x8 < 2;\n}\n\n", 69 | "0x130996" : "\nvoid buffer_move(ForgedStruct_86 *param_1,undefined8 *param_2)\n\n{\n undefined8 uVar1;\n qword qVar2;\n undefined8 uVar3;\n \n buffer_clear(param_1);\n uVar1 = param_2[1];\n qVar2 = *param_2;\n uVar3 = *(undefined8 *)¶m_1->field_0x8;\n *param_2 = param_1->field_0x0;\n param_2[1] = uVar3;\n param_1->field_0x0 = qVar2;\n *(undefined8 *)¶m_1->field_0x8 = uVar1;\n return;\n}\n\n" 70 | } 71 | } 72 | } 73 | } ] 74 | } -------------------------------------------------------------------------------- /demo/TypeConstraint_f2b22cd2_final.json: -------------------------------------------------------------------------------- 1 | { 2 | "ForgedStruct_213" : { 3 | "desc" : "Structure", 4 | "layout" : { 5 | "0x4" : { 6 | "desc" : "Primitive", 7 | "size" : 4, 8 | "type" : "int", 9 | "name" : "field_0x4" 10 | }, 11 | "0x8" : { 12 | "desc" : "Pointer", 13 | "size" : 8, 14 | "type" : "void *", 15 | "name" : "ref_0x8_TypeConstraint_4c5a3461" 16 | }, 17 | "0x18" : { 18 | "desc" : "Primitive", 19 | "size" : 8, 20 | "type" : "qword", 21 | "name" : "field_0x18" 22 | }, 23 | "0x20" : { 24 | "desc" : "Primitive", 25 | "size" : 8, 26 | "type" : "qword", 27 | "name" : "field_0x20" 28 | }, 29 | "0x28" : { 30 | "desc" : "Pointer", 31 | "size" : 8, 32 | "type" : "void *", 33 | "name" : "ref_0x28_TypeConstraint_05d81b5b" 34 | }, 35 | "0x30" : { 36 | "desc" : "Pointer", 37 | "size" : 8, 38 | "type" : "void *", 39 | "name" : "ref_0x30_TypeConstraint_05d81b5b" 40 | } 41 | }, 42 | "ptrRef" : { 43 | "0x8" : { 44 | "refSkt" : "TypeConstraint_4c5a3461", 45 | "ptrLevel" : 1 46 | }, 47 | "0x28" : { 48 | "refSkt" : "TypeConstraint_05d81b5b", 49 | "ptrLevel" : 1 50 | }, 51 | "0x30" : { 52 | "refSkt" : "TypeConstraint_05d81b5b", 53 | "ptrLevel" : 1 54 | } 55 | }, 56 | "nest" : { 57 | "0x28" : "TypeConstraint_b1d2b2a7" 58 | }, 59 | "anonTypes" : { }, 60 | "decompilerInferred" : { 61 | "composite" : [ ], 62 | "array" : [ ], 63 | "primitive" : [ ] 64 | } 65 | } 66 | } -------------------------------------------------------------------------------- /demo/TypeConstraint_f8591481_final_DI.json: -------------------------------------------------------------------------------- 1 | { 2 | "desc" : "DecompilerInferred", 3 | "decompilerInferred" : { 4 | "composite" : [ "sockaddr" ], 5 | "array" : [ ], 6 | "primitive" : [ ] 7 | } 8 | } -------------------------------------------------------------------------------- /extension.properties: -------------------------------------------------------------------------------- 1 | name=@typeforge@ 2 | description=TypeForge aims to recover composite data types (such as structures, unions, etc.) in stripped binaries. Compared to existing methods, TypeForge provides higher efficiency and accuracy. 3 | author=h1k0naka 4 | createdOn=2024-11-10 5 | version=@0.1.0@ 6 | -------------------------------------------------------------------------------- /ghidra_scripts/TypeForge.java: -------------------------------------------------------------------------------- 1 | import ghidra.app.script.GhidraScript; 2 | import ghidra.program.model.lang.Language; 3 | import ghidra.program.model.listing.Function; 4 | 5 | import typeforge.analyzer.Generator; 6 | import typeforge.analyzer.ReTyper; 7 | import typeforge.analyzer.TypeAnalyzer; 8 | import typeforge.base.graph.CallGraph; 9 | import typeforge.utils.*; 10 | import org.apache.commons.io.FileUtils; 11 | 12 | import java.io.IOException; 13 | import java.util.List; 14 | import java.util.Set; 15 | import java.io.File; 16 | 17 | public class TypeForge extends GhidraScript { 18 | @Override 19 | protected void run() throws Exception { 20 | 21 | println("====================== TypeForge ======================"); 22 | 23 | if(!Logging.init()) { 24 | return; 25 | } 26 | if (!prepare()) { 27 | return; 28 | } 29 | 30 | List mainFunc = Global.currentProgram.getListing().getGlobalFunctions("main"); 31 | DataTypeHelper.prepare(); 32 | 33 | if (mainFunc.isEmpty()) { 34 | Logging.warn("TypeForge","No main function found"); 35 | return; 36 | } 37 | Logging.info("TypeForge","Number of main functions: " + mainFunc.size()); 38 | 39 | long startAnalysisTime = System.currentTimeMillis(); 40 | 41 | // Function node and CallGraph Statistics 42 | Set meaningfulFunctions = FunctionHelper.getMeaningfulFunctions(); 43 | Logging.info("TypeForge","Number of meaningful functions: " + meaningfulFunctions.size()); 44 | 45 | CallGraph cg = CallGraph.getCallGraph(); 46 | 47 | Global.typeAnalysisBeginTime = System.currentTimeMillis(); 48 | TypeAnalyzer analyzer = new TypeAnalyzer(cg); 49 | analyzer.run(); 50 | Global.typeAnalysisEndTime = System.currentTimeMillis(); 51 | 52 | Generator generator = new Generator(analyzer.interSolver.typeHintCollector, 53 | analyzer.interSolver.exprManager); 54 | generator.run(); 55 | generator.explore(); 56 | 57 | Global.retypingBeginTime = System.currentTimeMillis(); 58 | ReTyper reTyper = new ReTyper(generator.getFinalSkeletons()); 59 | reTyper.run(); 60 | Global.retypingEndTime = System.currentTimeMillis(); 61 | 62 | Logging.info("TypeForge","Type Analysis time: " + (Global.typeAnalysisEndTime - Global.typeAnalysisBeginTime) / 1000.00 + "s"); 63 | Logging.info("TypeForge","ReTyping time: " + (Global.retypingEndTime - Global.retypingBeginTime) / 1000.00 + "s"); 64 | Logging.info("TypeForge","Total time: " + (Global.retypingEndTime - Global.typeAnalysisBeginTime) / 1000.00 + "s"); 65 | Logging.info("TypeForge", "Prepare Analysis time: " + (Global.prepareAnalysisEndTime - Global.prepareAnalysisBeginTime) / 1000.00 + "s"); 66 | } 67 | 68 | protected boolean prepare() { 69 | parseArgs(); 70 | prepareOutputDirectory(); 71 | 72 | Global.currentProgram = this.currentProgram; 73 | Global.flatAPI = this; 74 | Global.ghidraScript = this; 75 | 76 | Language language = this.currentProgram.getLanguage(); 77 | if (language == null) { 78 | Logging.error("TypeForge","Language not found"); 79 | return false; 80 | } else { 81 | Logging.info("TypeForge","Language: " + language.getLanguageID()); 82 | return true; 83 | } 84 | } 85 | 86 | protected void parseArgs() { 87 | String[] args = getScriptArgs(); 88 | for (String arg : args) { 89 | Logging.info("TypeForge", "Arg: " + arg); 90 | // split the arguments string by "=" 91 | String[] argParts = arg.split("="); 92 | if (argParts.length != 2) { 93 | Logging.error("TypeForge", "Invalid argument: " + arg); 94 | System.exit(1); 95 | } 96 | 97 | String key = argParts[0]; 98 | String value = argParts[1]; 99 | 100 | if (key.equals("output")) { 101 | Global.outputDirectory = value; 102 | } else if (key.equals("start_addr")) { 103 | Global.startAddress = Long.decode(value); 104 | } else { 105 | Logging.error("TypeForge", "Invalid argument: " + arg); 106 | System.exit(1); 107 | } 108 | } 109 | } 110 | 111 | protected void prepareOutputDirectory() { 112 | if (Global.outputDirectory == null) { 113 | Logging.error("TypeForge","Output directory not specified"); 114 | System.exit(1); 115 | } 116 | 117 | File outputDir = new File(Global.outputDirectory); 118 | // If the output directory does not exist, create it 119 | if (!outputDir.exists()) { 120 | if (!outputDir.mkdirs()) { 121 | Logging.error("TypeForge", "Failed to create output directory"); 122 | System.exit(1); 123 | } 124 | } else { 125 | try { 126 | FileUtils.cleanDirectory(outputDir); 127 | } catch (IOException e) { 128 | throw new RuntimeException(e); 129 | } 130 | } 131 | } 132 | } -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-8.4-bin.zip 4 | networkTimeout=10000 5 | validateDistributionUrl=true 6 | zipStoreBase=GRADLE_USER_HOME 7 | zipStorePath=wrapper/dists 8 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # 4 | # Copyright © 2015-2021 the original authors. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # https://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | ############################################################################## 20 | # 21 | # Gradle start up script for POSIX generated by Gradle. 22 | # 23 | # Important for running: 24 | # 25 | # (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is 26 | # noncompliant, but you have some other compliant shell such as ksh or 27 | # bash, then to run this script, type that shell name before the whole 28 | # command line, like: 29 | # 30 | # ksh Gradle 31 | # 32 | # Busybox and similar reduced shells will NOT work, because this script 33 | # requires all of these POSIX shell features: 34 | # * functions; 35 | # * expansions «$var», «${var}», «${var:-default}», «${var+SET}», 36 | # «${var#prefix}», «${var%suffix}», and «$( cmd )»; 37 | # * compound commands having a testable exit status, especially «case»; 38 | # * various built-in commands including «command», «set», and «ulimit». 39 | # 40 | # Important for patching: 41 | # 42 | # (2) This script targets any POSIX shell, so it avoids extensions provided 43 | # by Bash, Ksh, etc; in particular arrays are avoided. 44 | # 45 | # The "traditional" practice of packing multiple parameters into a 46 | # space-separated string is a well documented source of bugs and security 47 | # problems, so this is (mostly) avoided, by progressively accumulating 48 | # options in "$@", and eventually passing that to Java. 49 | # 50 | # Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, 51 | # and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; 52 | # see the in-line comments for details. 53 | # 54 | # There are tweaks for specific operating systems such as AIX, CygWin, 55 | # Darwin, MinGW, and NonStop. 56 | # 57 | # (3) This script is generated from the Groovy template 58 | # https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt 59 | # within the Gradle project. 60 | # 61 | # You can find Gradle at https://github.com/gradle/gradle/. 62 | # 63 | ############################################################################## 64 | 65 | # Attempt to set APP_HOME 66 | 67 | # Resolve links: $0 may be a link 68 | app_path=$0 69 | 70 | # Need this for daisy-chained symlinks. 71 | while 72 | APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path 73 | [ -h "$app_path" ] 74 | do 75 | ls=$( ls -ld "$app_path" ) 76 | link=${ls#*' -> '} 77 | case $link in #( 78 | /*) app_path=$link ;; #( 79 | *) app_path=$APP_HOME$link ;; 80 | esac 81 | done 82 | 83 | # This is normally unused 84 | # shellcheck disable=SC2034 85 | APP_BASE_NAME=${0##*/} 86 | # Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) 87 | APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit 88 | 89 | # Use the maximum available, or set MAX_FD != -1 to use that value. 90 | MAX_FD=maximum 91 | 92 | warn () { 93 | echo "$*" 94 | } >&2 95 | 96 | die () { 97 | echo 98 | echo "$*" 99 | echo 100 | exit 1 101 | } >&2 102 | 103 | # OS specific support (must be 'true' or 'false'). 104 | cygwin=false 105 | msys=false 106 | darwin=false 107 | nonstop=false 108 | case "$( uname )" in #( 109 | CYGWIN* ) cygwin=true ;; #( 110 | Darwin* ) darwin=true ;; #( 111 | MSYS* | MINGW* ) msys=true ;; #( 112 | NONSTOP* ) nonstop=true ;; 113 | esac 114 | 115 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 116 | 117 | 118 | # Determine the Java command to use to start the JVM. 119 | if [ -n "$JAVA_HOME" ] ; then 120 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 121 | # IBM's JDK on AIX uses strange locations for the executables 122 | JAVACMD=$JAVA_HOME/jre/sh/java 123 | else 124 | JAVACMD=$JAVA_HOME/bin/java 125 | fi 126 | if [ ! -x "$JAVACMD" ] ; then 127 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 128 | 129 | Please set the JAVA_HOME variable in your environment to match the 130 | location of your Java installation." 131 | fi 132 | else 133 | JAVACMD=java 134 | if ! command -v java >/dev/null 2>&1 135 | then 136 | die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 137 | 138 | Please set the JAVA_HOME variable in your environment to match the 139 | location of your Java installation." 140 | fi 141 | fi 142 | 143 | # Increase the maximum file descriptors if we can. 144 | if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then 145 | case $MAX_FD in #( 146 | max*) 147 | # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. 148 | # shellcheck disable=SC2039,SC3045 149 | MAX_FD=$( ulimit -H -n ) || 150 | warn "Could not query maximum file descriptor limit" 151 | esac 152 | case $MAX_FD in #( 153 | '' | soft) :;; #( 154 | *) 155 | # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. 156 | # shellcheck disable=SC2039,SC3045 157 | ulimit -n "$MAX_FD" || 158 | warn "Could not set maximum file descriptor limit to $MAX_FD" 159 | esac 160 | fi 161 | 162 | # Collect all arguments for the java command, stacking in reverse order: 163 | # * args from the command line 164 | # * the main class name 165 | # * -classpath 166 | # * -D...appname settings 167 | # * --module-path (only if needed) 168 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. 169 | 170 | # For Cygwin or MSYS, switch paths to Windows format before running java 171 | if "$cygwin" || "$msys" ; then 172 | APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) 173 | CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) 174 | 175 | JAVACMD=$( cygpath --unix "$JAVACMD" ) 176 | 177 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 178 | for arg do 179 | if 180 | case $arg in #( 181 | -*) false ;; # don't mess with options #( 182 | /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath 183 | [ -e "$t" ] ;; #( 184 | *) false ;; 185 | esac 186 | then 187 | arg=$( cygpath --path --ignore --mixed "$arg" ) 188 | fi 189 | # Roll the args list around exactly as many times as the number of 190 | # args, so each arg winds up back in the position where it started, but 191 | # possibly modified. 192 | # 193 | # NB: a `for` loop captures its iteration list before it begins, so 194 | # changing the positional parameters here affects neither the number of 195 | # iterations, nor the values presented in `arg`. 196 | shift # remove old arg 197 | set -- "$@" "$arg" # push replacement arg 198 | done 199 | fi 200 | 201 | 202 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 203 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' 204 | 205 | # Collect all arguments for the java command: 206 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, 207 | # and any embedded shellness will be escaped. 208 | # * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be 209 | # treated as '${Hostname}' itself on the command line. 210 | 211 | set -- \ 212 | "-Dorg.gradle.appname=$APP_BASE_NAME" \ 213 | -classpath "$CLASSPATH" \ 214 | org.gradle.wrapper.GradleWrapperMain \ 215 | "$@" 216 | 217 | # Stop when "xargs" is not available. 218 | if ! command -v xargs >/dev/null 2>&1 219 | then 220 | die "xargs is not available" 221 | fi 222 | 223 | # Use "xargs" to parse quoted args. 224 | # 225 | # With -n1 it outputs one arg per line, with the quotes and backslashes removed. 226 | # 227 | # In Bash we could simply go: 228 | # 229 | # readarray ARGS < <( xargs -n1 <<<"$var" ) && 230 | # set -- "${ARGS[@]}" "$@" 231 | # 232 | # but POSIX shell has neither arrays nor command substitution, so instead we 233 | # post-process each arg (as a line of input to sed) to backslash-escape any 234 | # character that might be a shell metacharacter, then use eval to reverse 235 | # that process (while maintaining the separation between arguments), and wrap 236 | # the whole thing up as a single "set" statement. 237 | # 238 | # This will of course break if any of these variables contains a newline or 239 | # an unmatched quote. 240 | # 241 | 242 | eval "set -- $( 243 | printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | 244 | xargs -n1 | 245 | sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | 246 | tr '\n' ' ' 247 | )" '"$@"' 248 | 249 | exec "$JAVACMD" "$@" 250 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | 17 | @if "%DEBUG%"=="" @echo off 18 | @rem ########################################################################## 19 | @rem 20 | @rem Gradle startup script for Windows 21 | @rem 22 | @rem ########################################################################## 23 | 24 | @rem Set local scope for the variables with windows NT shell 25 | if "%OS%"=="Windows_NT" setlocal 26 | 27 | set DIRNAME=%~dp0 28 | if "%DIRNAME%"=="" set DIRNAME=. 29 | @rem This is normally unused 30 | set APP_BASE_NAME=%~n0 31 | set APP_HOME=%DIRNAME% 32 | 33 | @rem Resolve any "." and ".." in APP_HOME to make it shorter. 34 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi 35 | 36 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 37 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 38 | 39 | @rem Find java.exe 40 | if defined JAVA_HOME goto findJavaFromJavaHome 41 | 42 | set JAVA_EXE=java.exe 43 | %JAVA_EXE% -version >NUL 2>&1 44 | if %ERRORLEVEL% equ 0 goto execute 45 | 46 | echo. 47 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 48 | echo. 49 | echo Please set the JAVA_HOME variable in your environment to match the 50 | echo location of your Java installation. 51 | 52 | goto fail 53 | 54 | :findJavaFromJavaHome 55 | set JAVA_HOME=%JAVA_HOME:"=% 56 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 57 | 58 | if exist "%JAVA_EXE%" goto execute 59 | 60 | echo. 61 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 62 | echo. 63 | echo Please set the JAVA_HOME variable in your environment to match the 64 | echo location of your Java installation. 65 | 66 | goto fail 67 | 68 | :execute 69 | @rem Setup the command line 70 | 71 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 72 | 73 | 74 | @rem Execute Gradle 75 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* 76 | 77 | :end 78 | @rem End local scope for the variables with windows NT shell 79 | if %ERRORLEVEL% equ 0 goto mainEnd 80 | 81 | :fail 82 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 83 | rem the _cmd.exe /c_ return code! 84 | set EXIT_CODE=%ERRORLEVEL% 85 | if %EXIT_CODE% equ 0 set EXIT_CODE=1 86 | if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% 87 | exit /b %EXIT_CODE% 88 | 89 | :mainEnd 90 | if "%OS%"=="Windows_NT" endlocal 91 | 92 | :omega 93 | -------------------------------------------------------------------------------- /imgs/TypeForge_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/imgs/TypeForge_overview.png -------------------------------------------------------------------------------- /imgs/figure_develop-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/imgs/figure_develop-1.png -------------------------------------------------------------------------------- /imgs/figure_develop-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/imgs/figure_develop-2.png -------------------------------------------------------------------------------- /imgs/figure_develop-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/imgs/figure_develop-3.png -------------------------------------------------------------------------------- /imgs/figure_develop-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/imgs/figure_develop-4.png -------------------------------------------------------------------------------- /lib/README.txt: -------------------------------------------------------------------------------- 1 | The "lib" directory is intended to hold Jar files which this module is dependent upon. Jar files 2 | may be placed in this directory manually, or automatically by maven via the dependencies block 3 | of this module's build.gradle file. -------------------------------------------------------------------------------- /lib/jackson-annotations-2.13.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/lib/jackson-annotations-2.13.0.jar -------------------------------------------------------------------------------- /lib/jackson-core-2.13.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/lib/jackson-core-2.13.0.jar -------------------------------------------------------------------------------- /lib/jackson-databind-2.13.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/lib/jackson-databind-2.13.0.jar -------------------------------------------------------------------------------- /lib/jgrapht-core-1.5.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/lib/jgrapht-core-1.5.1.jar -------------------------------------------------------------------------------- /lib/jheaps-0.13.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/lib/jheaps-0.13.jar -------------------------------------------------------------------------------- /os/linux_x86_64/README.txt: -------------------------------------------------------------------------------- 1 | The "os/linux_x86_64" directory is intended to hold Linux native binaries 2 | which this module is dependent upon. This directory may be eliminated for a specific 3 | module if native binaries are not provided for the corresponding platform. 4 | -------------------------------------------------------------------------------- /os/mac_x86_64/README.txt: -------------------------------------------------------------------------------- 1 | The "os/mac_x86_64" directory is intended to hold macOS (OS X) native binaries 2 | which this module is dependent upon. This directory may be eliminated for a specific 3 | module if native binaries are not provided for the corresponding platform. 4 | -------------------------------------------------------------------------------- /os/win_x86_64/README.txt: -------------------------------------------------------------------------------- 1 | The "os/win_x86_64" directory is intended to hold MS Windows native binaries (.exe) 2 | which this module is dependent upon. This directory may be eliminated for a specific 3 | module if native binaries are not provided for the corresponding platform. 4 | -------------------------------------------------------------------------------- /scripts/.python-version: -------------------------------------------------------------------------------- 1 | 3.13 2 | -------------------------------------------------------------------------------- /scripts/GraphExplorer.py: -------------------------------------------------------------------------------- 1 | import pydot 2 | import argparse 3 | import os 4 | import networkx as nx 5 | 6 | def load_graph(dot_file): 7 | """ 8 | Load a graph from a DOT file and return a networkx Graph object. 9 | """ 10 | graph = pydot.graph_from_dot_file(dot_file)[0] 11 | 12 | G = nx.Graph() 13 | 14 | for edge in graph.get_edges(): 15 | src = edge.get_source() 16 | dst = edge.get_destination() 17 | label = edge.get_attributes().get('label', '') # Safely get label attribute 18 | 19 | # Remove quotes from the node names 20 | src = src.replace('"', '') 21 | dst = dst.replace('"', '') 22 | 23 | # Add edges to the networkx graph (as undirected edges) 24 | G.add_edge(src, dst, label=label) 25 | G.add_edge(dst, src, label=label) # Add the reverse edge as well for undirected graph 26 | print(f"Added edge: {src} <-> {dst} ({label})") 27 | 28 | return G 29 | 30 | def load_graphs(dot_dir): 31 | """ 32 | Load all .dot files in the specified directory and return a big networkx Graph object. 33 | """ 34 | big_G = nx.Graph() 35 | 36 | print(f"Loading graphs from {dot_dir}") 37 | for filename in os.listdir(dot_dir): 38 | if filename.endswith(".dot"): 39 | dot_file = os.path.join(dot_dir, filename) 40 | graph = pydot.graph_from_dot_file(dot_file)[0] 41 | 42 | for edge in graph.get_edges(): 43 | src = edge.get_source().replace('"', '') 44 | dst = edge.get_destination().replace('"', '') 45 | label = edge.get_attributes().get('label', '') 46 | 47 | # Add edges to the networkx graph (as undirected edges) 48 | big_G.add_edge(src, dst, label=label) 49 | big_G.add_edge(dst, src, label=label) # Add the reverse edge as well for undirected graph 50 | print(f"Added edge: {src} <-> {dst} ({label})") 51 | 52 | print(f"Loaded graph from {dot_file}") 53 | 54 | return big_G 55 | 56 | 57 | def find_shortest_path(G, src, dst): 58 | """ 59 | Find the shortest path from source node to destination node in the graph. 60 | Return the shortest path as a list of nodes and edges. 61 | """ 62 | try: 63 | shortest_path_nodes = nx.shortest_path(G, source=src, target=dst) 64 | shortest_path_edges = list(zip(shortest_path_nodes[:-1], shortest_path_nodes[1:])) 65 | return shortest_path_nodes, shortest_path_edges 66 | except nx.NetworkXNoPath: 67 | return None, None 68 | 69 | def print_colored_path(shortest_path_nodes, shortest_path_edges, G): 70 | """ 71 | Print the shortest path with edges colored and formatted. 72 | """ 73 | if shortest_path_nodes: 74 | for i, node in enumerate(shortest_path_nodes): 75 | if i < len(shortest_path_edges): 76 | edge = shortest_path_edges[i] 77 | edge_label = G.edges[edge]['label'] 78 | print(f"\033[34m{node}\033[0m --- (\033[31m{edge_label}\033[0m) --- \033[34m{edge[1]}\033[0m") 79 | else: 80 | print(f"\033[34m{node}\033[0m") 81 | else: 82 | print("No path found.") 83 | 84 | 85 | def main(G: nx.Graph): 86 | while True: 87 | src_node = input("Enter source node: ").strip() 88 | if src_node not in G.nodes: 89 | print("Node not found in the graph.") 90 | continue 91 | 92 | dst_node = input("Enter destination node: ").strip() 93 | if dst_node not in G.nodes: 94 | print("Node not found in the graph.") 95 | continue 96 | 97 | shortest_path_nodes, shortest_path_edges = find_shortest_path(G, src_node, dst_node) 98 | print_colored_path(shortest_path_nodes, shortest_path_edges, G) 99 | 100 | 101 | if __name__ == "__main__": 102 | # Parse command line arguments 103 | parser = argparse.ArgumentParser(description="Graph Explorer") 104 | 105 | # -p: dot file path, specify 1 dot file to load and explore 106 | # -d: dot files directory, specify a directory to load and explore all dot files in it 107 | # -p and -d are mutually exclusive 108 | group = parser.add_mutually_exclusive_group(required=True) 109 | group.add_argument("-p", "--path", help="Path to the DOT file") 110 | group.add_argument("-d", "--dir", help="Path to the directory containing DOT files") 111 | 112 | args = parser.parse_args() 113 | 114 | if args.path: 115 | G = load_graph(args.path) 116 | elif args.dir: 117 | G = load_graphs(args.dir) 118 | 119 | main(G) 120 | 121 | -------------------------------------------------------------------------------- /scripts/GroundTruthExtractor.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import argparse 4 | import shutil 5 | 6 | # Modify your Ghidra project name here 7 | project_name = "binaries_osprey" 8 | script_name = "GroundTruth.java" 9 | 10 | def is_elf(file_path): 11 | with open(file_path, 'rb') as f: 12 | magic_number = f.read(4) 13 | return magic_number == b'\x7fELF' 14 | 15 | def analyze_elf_files(ghidra_path, project_dir, binary_dir, output_dir): 16 | for root, dirs, files in os.walk(binary_dir): 17 | for file in files: 18 | file_path = os.path.join(root, file) 19 | if is_elf(file_path): 20 | ghidra_headless = os.path.join(ghidra_path, "support", "analyzeHeadless") 21 | output_subdir = os.path.join(output_dir, file) 22 | if not os.path.exists(output_subdir): 23 | os.makedirs(output_subdir) 24 | else: 25 | shutil.rmtree(output_subdir) 26 | os.makedirs(output_subdir) 27 | analyze_file(ghidra_headless, project_dir, file_path, output_subdir) 28 | 29 | def analyze_file(headless_path, project_dir, binary_path, output_dir): 30 | command = [ 31 | headless_path, 32 | project_dir, 33 | project_name, 34 | "-deleteProject", 35 | "-import", 36 | binary_path, 37 | "-postScript", 38 | script_name, 39 | f"output={output_dir}" 40 | ] 41 | print(f"Analyzing {binary_path}...") 42 | try: 43 | print(f"Running command: {command}") 44 | subprocess.run(command, check=True, env=os.environ.copy()) 45 | print(f"Analysis complete for {binary_path}") 46 | except subprocess.CalledProcessError as e: 47 | print(f"Failed to analyze {binary_path}: {e}") 48 | exit(1) 49 | 50 | def main(): 51 | parser = argparse.ArgumentParser(description="Analyze ELF files with Ghidra analyzeHeadless.") 52 | parser.add_argument("--ghidra", required=True, help="Path to the Ghidra Home.") 53 | parser.add_argument("--project_dir", required=True, help="Directory for the Ghidra project.") 54 | parser.add_argument("--binary_dir", required=True, help="Directory containing ELF binaries.") 55 | parser.add_argument("--output_dir", required=True, help="Directory to store the output.") 56 | 57 | args = parser.parse_args() 58 | 59 | # if project already exists, delete dir project_name.rep and project_name.gpr 60 | if os.path.exists(os.path.join(args.project_dir, project_name + ".rep")): 61 | shutil.rmtree(os.path.join(args.project_dir, project_name + ".rep")) 62 | if os.path.exists(os.path.join(args.project_dir, project_name + ".gpr")): 63 | os.remove(os.path.join(args.project_dir, project_name + ".gpr")) 64 | 65 | # if output_dir already exists, delete it 66 | if os.path.exists(args.output_dir): 67 | shutil.rmtree(args.output_dir) 68 | os.makedirs(args.output_dir) 69 | 70 | analyze_elf_files(args.ghidra, args.project_dir, args.binary_dir, args.output_dir) 71 | 72 | if __name__ == "__main__": 73 | main() 74 | -------------------------------------------------------------------------------- /scripts/README.md: -------------------------------------------------------------------------------- 1 | # TypeForge (Python Scripts) 2 | 3 | ## Setup 4 | TypeForge uses `uv` to manage Python packages and dependencies. To setup, you should: 5 | 1. Install python package manager `uv` 6 | 2. Create new virtual environment and install dependencies 7 | 8 | ```bash 9 | uv venv .venv 10 | uv pip install --requirement requirements.txt 11 | ``` 12 | 3. Active this virtual environment 13 | 14 | ```bash 15 | source .venv/bin/activate 16 | ``` 17 | 18 | If you want to add other packages, just run: 19 | ```bash 20 | uv pip install [package] 21 | uv pip freeze > requirements.txt 22 | # `uv add` need a `pyproject.toml` but we did not create it. 23 | ``` 24 | 25 | ## Type Inference (Batch Mode) 26 | If a series of binaries need to be processed, you should: 27 | 1. Update the `config.yml` to specify the required metadata. 28 | 2. Prepare the dataset: 29 | The directory structure for each project in the dataset should follow this format: `dataset_root/project_name`. Each project should contain pairs of binaries: one with debug symbols (named `binary_name`, used only for Ground Truth Extraction) and one stripped binary (named `binary_name.strip`, used during Type Inference). 30 | 3. Update the `projects` field in `config.yml` to include all projects you want to process. All binaries under these projects will be processed. 31 | 4. Run the script: 32 | 33 | ```bash 34 | python3 ./TypeInference.py 35 | ``` 36 | 37 | ## Extract Ground Truth (Batch Mode) 38 | The preparation steps are the same as above. You only need to modify the following code in `TypeInference.py`: 39 | 40 | ```python 41 | # Set `infer = False` to collect ground truth instead of performing inference 42 | run_ghidra_headless_on_project(pathlib.Path(dataset_root) / proj, infer = False) 43 | ``` -------------------------------------------------------------------------------- /scripts/TypeInference.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pathlib 3 | import yaml 4 | import subprocess 5 | import re 6 | 7 | def check_directories_exist(): 8 | for proj_name in os.listdir(dataset_root): 9 | proj_path = pathlib.Path(dataset_root) / proj_name 10 | for binary_name in os.listdir(proj_path): 11 | if ".strip" not in binary_name: 12 | binary_gt_dir = pathlib.Path(gt_root) / proj_name / binary_name 13 | if not binary_gt_dir.exists(): 14 | print(f"Warning: {binary_gt_dir} does not exist") 15 | # create the directory 16 | os.makedirs(binary_gt_dir) 17 | print(f"Created {binary_gt_dir}") 18 | else: 19 | print(f"Found {binary_gt_dir}") 20 | 21 | binary_infer_dir = pathlib.Path(infer_root) / proj_name / binary_name 22 | if not binary_infer_dir.exists(): 23 | print(f"Warning: {binary_infer_dir} does not exist") 24 | # create the directory 25 | os.makedirs(binary_infer_dir) 26 | print(f"Created {binary_infer_dir}") 27 | else: 28 | print(f"Found {binary_infer_dir}") 29 | 30 | 31 | def check_consistency(): 32 | check_directories_exist() 33 | 34 | def inference_on_binary(command): 35 | try: 36 | result = subprocess.run(command, capture_output=True, text=True, check=True) 37 | print(f"STDOUT:\n{result.stdout}") 38 | print(f"STDERR:\n{result.stderr}") 39 | output = result.stdout + result.stderr 40 | 41 | analyze_time = None 42 | retype_time = None 43 | total_time = None 44 | 45 | # Don't just look at last 10 lines, search through all output 46 | for line in output.splitlines(): 47 | if 'Type Analysis time' in line: 48 | match = re.search(r"Type Analysis time:\s*([0-9.]+)s", line) 49 | if match: 50 | analyze_time = match.group(1) 51 | elif 'ReTyping time' in line: 52 | match = re.search(r"ReTyping time:\s*([0-9.]+)s", line) 53 | if match: 54 | retype_time = match.group(1) 55 | elif 'Total time' in line: 56 | match = re.search(r"Total time:\s*([0-9.]+)s", line) 57 | if match: 58 | total_time = match.group(1) 59 | 60 | print(f"Analysis time: {analyze_time if analyze_time else 'None'}s") 61 | print(f"ReType time: {retype_time if retype_time else 'None'}s") 62 | print(f"Total time: {total_time if total_time else 'None'}s") 63 | 64 | return analyze_time, retype_time, total_time 65 | 66 | except subprocess.CalledProcessError as e: 67 | print(f"Error: {e}") 68 | print(f"Output: {e.output}") 69 | return None, None, None 70 | 71 | def get_gt_on_binary(command): 72 | try: 73 | subprocess.run(command, check=True, capture_output=True, text=True) 74 | except subprocess.CalledProcessError as e: 75 | print(f"Error: {e}") 76 | print(f"Output: {e.output}") 77 | 78 | 79 | def check_infer_exists(target_dir): 80 | skt_exists = False 81 | vartype_exists = False 82 | for file in os.listdir(target_dir): 83 | if "TypeConstraint_" in file: 84 | skt_exists = True 85 | if "varType" in file: 86 | vartype_exists = True 87 | return skt_exists and vartype_exists 88 | 89 | def check_gt_exists(target_dir): 90 | typelib_exists = False 91 | vartype_exists = False 92 | for file in os.listdir(target_dir): 93 | if "typeLib" in file: 94 | typelib_exists = True 95 | if "varType" in file: 96 | vartype_exists = True 97 | return typelib_exists and vartype_exists 98 | 99 | def run_ghidra_headless_on_project(project_path, infer: bool = True): 100 | dataflow_time = 0 101 | retype_time = 0 102 | total_time = 0 103 | 104 | for binary_name in os.listdir(project_path): 105 | if infer: 106 | if ".strip" in binary_name: 107 | binary_path = pathlib.Path(project_path) / binary_name 108 | binary_output_dir = pathlib.Path(infer_root) / project_path.name / binary_name[:-6] 109 | command = [ghidra_headless, project_dir, project_name, "-deleteProject", 110 | "-import", binary_path.resolve(), "-postScript", "TypeForge.java", 111 | f"output={binary_output_dir.resolve()}"] 112 | 113 | print(f"Inferring on {binary_path} ...") 114 | print(f"Command: {command}") 115 | times = inference_on_binary(command) 116 | if (check_infer_exists(binary_output_dir)): 117 | print("Inference successful") 118 | dataflow_time += float(times[0]) 119 | retype_time += float(times[1]) 120 | total_time += float(times[2]) 121 | else: 122 | print(f"Inferring on {binary_path} failed") 123 | else: 124 | if ".strip" not in binary_name: 125 | binary_path = pathlib.Path(project_path) / binary_name 126 | binary_output_dir = pathlib.Path(gt_root) / project_path.name / binary_name 127 | command = [ghidra_headless, project_dir, project_name, "-deleteProject", 128 | "-import", binary_path.resolve(), "-postScript", "GroundTruth.java", 129 | f"output={binary_output_dir.resolve()}"] 130 | print(f"Collecting GT on {binary_path} ...") 131 | print(f"Command: {command}") 132 | get_gt_on_binary(command) 133 | if (check_gt_exists(binary_output_dir)): 134 | print("GT collection successful") 135 | else: 136 | print(f"GT collection on {binary_path} failed") 137 | 138 | if infer: 139 | print(f"Total dataflow time: {dataflow_time}s") 140 | print(f"Total retype time: {retype_time}s") 141 | print(f"Total time: {total_time}s") 142 | 143 | if __name__ == "__main__": 144 | 145 | config_yml = pathlib.Path(__file__).parent / "config.yml" 146 | with open(config_yml, "r") as f: 147 | config = yaml.safe_load(f) 148 | 149 | dataset_root = config["inference"]["dataset"] 150 | gt_root = config["inference"]["gt"] 151 | infer_root = config["inference"]["infer"] 152 | 153 | ghidra_headless = config["ghidra"]["headless"] 154 | project_dir = config["ghidra"]["project_dir"] 155 | project_name = config["ghidra"]["project_name"] 156 | 157 | check_consistency() 158 | 159 | projects_to_inference = config["inference"]["projects"] 160 | 161 | project_statistics = {} 162 | 163 | for proj in projects_to_inference: 164 | print(f"Projects to inference: {projects_to_inference}") 165 | run_ghidra_headless_on_project(pathlib.Path(dataset_root) / proj, infer = True) 166 | # run_ghidra_headless_on_project(pathlib.Path(dataset_root) / proj, infer = False) -------------------------------------------------------------------------------- /scripts/config.yml: -------------------------------------------------------------------------------- 1 | inference: 2 | dataset: "./TypeForge_Dataset" # the input dataset directory 3 | gt: "./TypeForge_GT" # the extracted ground truth directory 4 | infer: "./TypeForge_Infer" # the inference output directory 5 | projects: # the projects to be analyzed 6 | - "lighttpd" 7 | 8 | ghidra: 9 | headless: "/home/h1k0/tools/ghidra_11.0.3_PUBLIC/support/analyzeHeadless" # the path to the ghidra headless analyzer 10 | project_dir: "/home/h1k0/tmp" # the directory where the ghidra projects are stored 11 | project_name: "TypeForge_headless_project" # the name of the ghidra project -------------------------------------------------------------------------------- /scripts/judge/README.md: -------------------------------------------------------------------------------- 1 | # LLM-Assisted Double Elimination 2 | 3 | ## Setup 4 | 1. Create `.env` file in current directory and fill as following: 5 | 6 | ```bash 7 | LANGSMITH_TRACING="false" 8 | LANGSMITH_API_KEY="[your_langsmith_apikey]" 9 | LANGSMITH_PROJECT="typeforge" # or any other project name 10 | OPENAI_API_KEY="[your_openai_apikey]" 11 | BASE_URL="[your_url]" 12 | MODEL="gpt-4.1-mini" 13 | ``` 14 | 2. The directory containing inferred type constraints (including a series of JSON files) 15 | 16 | ## Judge 17 | 1. Run `uv run main.py [inferred_dir]` to refinement the inferred results. 18 | 2. The JSON file with the suffix `_morph_final.json` contains the final inferred type. -------------------------------------------------------------------------------- /scripts/judge/llm.py: -------------------------------------------------------------------------------- 1 | import os, asyncio 2 | from typing import Tuple, List, Literal, Optional, Any 3 | from langchain.chat_models import init_chat_model 4 | from langchain_core.messages import HumanMessage, SystemMessage 5 | from langchain_core.prompts import ChatPromptTemplate 6 | from pydantic import BaseModel, Field 7 | import random 8 | 9 | system_template = """ 10 | You are an experienced reverse engineering expert. 11 | Please assess the readability of each pair of the following decompiled code snippets, where differences originate from some variables being assigned different types. 12 | You should disregard differences in variable and type names, and instead focus on both: 13 | 1. The syntactic clarity of the code, and 14 | 2. The logical rationality of its contextual semantics. 15 | 16 | Please return 0 if decompiled_code_0 has better readability, or 1 if decompiled_code_1 has better readability. 17 | """ 18 | 19 | prompt_template = ChatPromptTemplate.from_messages( 20 | [ 21 | ("system", system_template), 22 | ("user", "decompiled_code_0:\n{code1}\n\ndecompiled_code_1:\n{code2}\n") 23 | ] 24 | ) 25 | 26 | class ReadabilityJudgment(BaseModel): 27 | choice: Literal[0, 1] = Field( 28 | description = "0 if decompiled_code_0 has better readability, 1 if decompiled_code_1 has better readability." 29 | ) 30 | 31 | async def judge_code_pair(code_pair: Tuple[str, str], max_retries: int = 3, timeout: float = 5.0) -> int: 32 | """ 33 | Judge a pair of code snippets for readability. 34 | 35 | Args: 36 | code_pair: A tuple of two code snippets to compare 37 | max_retries: Maximum number of retries when timeout occurs 38 | timeout: Timeout in seconds for each attempt 39 | 40 | Returns: 41 | 0 if the first code is more readable, 1 if the second is more readable 42 | """ 43 | llm = None 44 | prompt = prompt_template.invoke({ 45 | "code1": code_pair[0], 46 | "code2": code_pair[1] 47 | }) 48 | 49 | retries = 0 50 | while retries < max_retries: 51 | try: 52 | # Use try/except to handle potential import errors 53 | try: 54 | if llm is None: # Only initialize if not already initialized 55 | llm = init_chat_model( 56 | model=os.environ.get("MODEL"), 57 | temperature=0.4, 58 | base_url=os.environ.get("BASE_URL"), 59 | ) 60 | except (ImportError, AttributeError) as e: 61 | print(f"Failed to initialize chat model: {e}") 62 | return random.choice([0, 1]) 63 | 64 | structured_llm = llm.with_structured_output(ReadabilityJudgment) 65 | try: 66 | result = await asyncio.wait_for(structured_llm.ainvoke(prompt), timeout=timeout) 67 | print(f"Judge result: {result.choice}") 68 | return result.choice 69 | except asyncio.TimeoutError: 70 | retries += 1 71 | print(f"Timeout occurred, attempt {retries}/{max_retries}") 72 | if retries == max_retries: 73 | print("Max retries reached, returning random choice") 74 | return random.choice([0, 1]) 75 | continue 76 | 77 | except Exception as e: 78 | print(f"Exception occurred in judge_code_pair: {e}") 79 | # Return a random choice in case of error instead of crashing the entire process 80 | return random.choice([0, 1]) 81 | finally: 82 | # Ensure resources are cleaned up 83 | if llm and hasattr(llm, 'aclose') and callable(llm.aclose): 84 | try: 85 | await llm.aclose() 86 | except Exception as e: 87 | print(f"Error closing LLM: {e}") 88 | llm = None # Reset llm for next retry 89 | 90 | async def judge_readability(decompiled_code_pairs: List[Tuple[str, str]]) -> List[int]: 91 | """ 92 | Judge readability of decompiled code pairs concurrently. 93 | 94 | Args: 95 | decompiled_code_pairs: List of tuples of decompiled code pairs 96 | 97 | Returns: 98 | List of judgments (0 or 1) for each pair 99 | """ 100 | if not decompiled_code_pairs: 101 | print("No code pairs to judge") 102 | return [] 103 | 104 | # If the number of code pairs is too large, we need to sample them. 105 | original_len = len(decompiled_code_pairs) 106 | if len(decompiled_code_pairs) > 10: 107 | decompiled_code_pairs = random.sample(decompiled_code_pairs, 10) 108 | print(f"Randomly sample {len(decompiled_code_pairs)} code pairs from {original_len}") 109 | 110 | print(f"Judging {len(decompiled_code_pairs)} code pairs") 111 | 112 | tasks = [] 113 | # Create task for each code pair 114 | for i, code_pair in enumerate(decompiled_code_pairs): 115 | tasks.append(judge_code_pair(code_pair)) 116 | 117 | # Process all pairs concurrently with proper error handling 118 | try: 119 | results = await asyncio.gather(*tasks, return_exceptions=True) 120 | # Handle any exceptions in results 121 | final_results = [] 122 | for i, result in enumerate(results): 123 | if isinstance(result, Exception): 124 | print(f"Error in pair {i}: {result}") 125 | final_results.append(random.choice([0, 1])) 126 | else: 127 | final_results.append(result) 128 | return final_results 129 | except Exception as e: 130 | print(f"Error in judge_readability: {e}") 131 | # Return fallback results if needed 132 | return [random.choice([0, 1]) for _ in range(len(decompiled_code_pairs))] 133 | 134 | if __name__ == "__main__": 135 | pass -------------------------------------------------------------------------------- /scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | aiofiles==24.1.0 2 | annotated-types==0.7.0 3 | anyio==4.9.0 4 | certifi==2025.1.31 5 | charset-normalizer==3.4.1 6 | distro==1.9.0 7 | dotenv==0.9.9 8 | greenlet==3.2.0 9 | h11==0.14.0 10 | httpcore==1.0.8 11 | httpx==0.28.1 12 | idna==3.10 13 | iniconfig==2.1.0 14 | jiter==0.9.0 15 | jsonpatch==1.33 16 | jsonpointer==3.0.0 17 | langchain==0.3.23 18 | langchain-core==0.3.54 19 | langchain-openai==0.3.14 20 | langchain-text-splitters==0.3.8 21 | langsmith==0.3.32 22 | networkx==3.4.2 23 | openai==1.75.0 24 | orjson==3.10.16 25 | packaging==24.2 26 | pluggy==1.5.0 27 | pydantic==2.11.3 28 | pydantic-core==2.33.1 29 | pydot==4.0.0 30 | pyparsing==3.2.3 31 | pytest==8.3.5 32 | python-dotenv==1.1.0 33 | pyyaml==6.0.2 34 | regex==2024.11.6 35 | requests==2.32.3 36 | requests-toolbelt==1.0.0 37 | sniffio==1.3.1 38 | sqlalchemy==2.0.40 39 | tenacity==9.1.2 40 | tiktoken==0.9.0 41 | tqdm==4.67.1 42 | typing-extensions==4.13.2 43 | typing-inspection==0.4.0 44 | urllib3==2.4.0 45 | zstandard==0.23.0 46 | -------------------------------------------------------------------------------- /src/main/java/typeforge/base/dataflow/AccessPoints.java: -------------------------------------------------------------------------------- 1 | package typeforge.base.dataflow; 2 | 3 | import typeforge.base.dataflow.expression.NMAE; 4 | import typeforge.utils.DataTypeHelper; 5 | import typeforge.utils.Logging; 6 | import ghidra.program.model.data.*; 7 | import ghidra.program.model.listing.Function; 8 | import ghidra.program.model.pcode.PcodeOp; 9 | 10 | import java.util.HashSet; 11 | import java.util.Map; 12 | import java.util.HashMap; 13 | import java.util.Objects; 14 | import java.util.Set; 15 | 16 | public class AccessPoints { 17 | 18 | public enum AccessType { 19 | LOAD, 20 | STORE, 21 | ARGUMENT, 22 | RETURN_VALUE, 23 | INDIRECT 24 | } 25 | 26 | /** 27 | * AP records an access point of a symbolExpr 28 | * access type can be load, store or ... 29 | * an access point always associates with a TypeDescriptor. 30 | */ 31 | public static class AP { 32 | public final PcodeOp pcodeOp; 33 | public final Function func; 34 | public DataType dataType; 35 | 36 | /** accessType: including: 37 | * 0: load 38 | * 1: store 39 | * 2: argument 40 | * 3. RETURN_VALUE 41 | */ 42 | public AccessType accessType; 43 | 44 | public AP(PcodeOp pcodeOp, DataType type, AccessType accessType, Function func) { 45 | this.pcodeOp = pcodeOp; 46 | if (accessType != AccessType.ARGUMENT) { 47 | assert type != null; 48 | this.dataType = type; 49 | } 50 | else { 51 | this.dataType = null; 52 | } 53 | this.accessType = accessType; 54 | this.func = func; 55 | } 56 | 57 | @Override 58 | public int hashCode() { 59 | return Objects.hash(pcodeOp, accessType); 60 | } 61 | 62 | @Override 63 | public boolean equals(Object obj) { 64 | if (obj instanceof AP other) { 65 | if (accessType != AccessType.ARGUMENT) { 66 | if (!pcodeOp.equals(other.pcodeOp)) return false; 67 | return accessType == other.accessType; 68 | } else { 69 | return pcodeOp.equals(other.pcodeOp) && accessType == other.accessType; 70 | } 71 | } 72 | return false; 73 | } 74 | } 75 | 76 | public static class APSet { 77 | public final Set apSet; 78 | public boolean isSameSizeType = true; 79 | public int maxDTSize = -1; 80 | public int minDTSize = -1; 81 | public int DTSize = -1; 82 | public DataType mostAccessedDT = null; 83 | public Set allDTs = new HashSet<>(); 84 | 85 | public APSet() { 86 | this.apSet = new HashSet<>(); 87 | } 88 | 89 | public APSet(APSet other) { 90 | this.apSet = new HashSet<>(other.apSet); 91 | this.isSameSizeType = other.isSameSizeType; 92 | this.maxDTSize = other.maxDTSize; 93 | this.minDTSize = other.minDTSize; 94 | this.DTSize = other.DTSize; 95 | this.mostAccessedDT = other.mostAccessedDT; 96 | this.allDTs = new HashSet<>(other.allDTs); 97 | } 98 | 99 | public void addAll(Set apSet) { 100 | this.apSet.addAll(apSet); 101 | } 102 | 103 | public boolean addAP(AP ap) { 104 | return apSet.add(ap); 105 | } 106 | 107 | public Set getApSet() { 108 | return apSet; 109 | } 110 | 111 | public int getAPCount() { 112 | return apSet.size(); 113 | } 114 | 115 | public Map getTypeFreq() { 116 | Map typeFreq = new HashMap<>(); 117 | for (var ap: apSet) { 118 | typeFreq.putIfAbsent(ap.dataType, 0); 119 | typeFreq.put(ap.dataType, typeFreq.get(ap.dataType) + 1); 120 | } 121 | return typeFreq; 122 | } 123 | 124 | public void postHandle() { 125 | /* Avoid using undefined data type */ 126 | for (var ap: apSet) { 127 | if (ap.dataType instanceof Undefined || ap.dataType instanceof DefaultDataType) { 128 | ap.dataType = DataTypeHelper.getDataTypeInSize(ap.dataType.getLength()); 129 | } else if (ap.dataType instanceof Pointer && ((Pointer) ap.dataType).getDataType() instanceof Undefined) { 130 | var dt = DataTypeHelper.getDataTypeInSize(ap.dataType.getLength()); 131 | ap.dataType = DataTypeHelper.getPointerDT(dt, 1); 132 | } 133 | } 134 | 135 | isSameSizeType = AccessPoints.ifAPSetHoldsSameSizeType(apSet); 136 | if (isSameSizeType) { 137 | DTSize = AccessPoints.getDataTypeSize(apSet); 138 | maxDTSize = DTSize; 139 | minDTSize = DTSize; 140 | } else { 141 | maxDTSize = AccessPoints.getMaxSizeInAPSet(apSet); 142 | minDTSize = AccessPoints.getMinSizeInAPSet(apSet); 143 | } 144 | 145 | mostAccessedDT = AccessPoints.getMostAccessedDT(apSet); 146 | allDTs = AccessPoints.getDataTypes(apSet); 147 | } 148 | } 149 | 150 | 151 | /** 152 | * Each SymbolExpr in function may be accessed by multiple PcodeOps with different types. 153 | * So we need to record all the access points of each SymbolExpr. 154 | */ 155 | 156 | /** Expressions in memAccessMap: (param + 1) means there is a load/store into (param + 1), loaded value can be represented as *(param + 1) */ 157 | private final Map> fieldExprToAccessMap; 158 | 159 | public AccessPoints() { 160 | fieldExprToAccessMap = new HashMap<>(); 161 | } 162 | 163 | public void addFieldAccessPoint(NMAE symExpr, PcodeOp op, DataType type, AccessType accessType, Function func) { 164 | fieldExprToAccessMap.putIfAbsent(symExpr, new HashSet<>()); 165 | fieldExprToAccessMap.get(symExpr).add(new AP(op, type, accessType, func)); 166 | Logging.trace("AccessPoints", String.format("Add Field Access %s for [%s] with type [%s]", accessType, symExpr, type.getName())); 167 | } 168 | 169 | public Set getFieldAccessPoints(NMAE symExpr) { 170 | return fieldExprToAccessMap.get(symExpr); 171 | } 172 | 173 | public static boolean ifAPSetHoldsSameSizeType(Set apSet) { 174 | if (apSet.isEmpty()) { 175 | return false; 176 | } 177 | var firstAP = apSet.iterator().next(); 178 | var firstDT = firstAP.dataType; 179 | for (var ap : apSet) { 180 | if (!(firstDT.getLength() == ap.dataType.getLength())) { 181 | return false; 182 | } 183 | } 184 | return true; 185 | } 186 | 187 | public static int getMaxSizeInAPSet(Set apSet) { 188 | if (apSet.isEmpty()) { 189 | return 0; 190 | } 191 | var maxSize = 0; 192 | for (var ap : apSet) { 193 | if (ap.dataType.getLength() > maxSize) { 194 | maxSize = ap.dataType.getLength(); 195 | } 196 | } 197 | return maxSize; 198 | } 199 | 200 | public static int getMinSizeInAPSet(Set apSet) { 201 | if (apSet.isEmpty()) { 202 | return 0; 203 | } 204 | var minSize = Integer.MAX_VALUE; 205 | for (var ap : apSet) { 206 | if (ap.dataType.getLength() < minSize) { 207 | minSize = ap.dataType.getLength(); 208 | } 209 | } 210 | return minSize; 211 | } 212 | 213 | public static DataType getMostAccessedDT(Set apSet) { 214 | Map apCount = new HashMap<>(); 215 | apSet.forEach(ap -> { 216 | apCount.putIfAbsent(ap.dataType, 0); 217 | apCount.put(ap.dataType, apCount.get(ap.dataType) + 1); 218 | }); 219 | 220 | /* Find DataType with Max access count */ 221 | var maxCount = 0; 222 | DataType maxDT = null; 223 | for (var entry: apCount.entrySet()) { 224 | if (entry.getValue() > maxCount) { 225 | maxCount = entry.getValue(); 226 | maxDT = entry.getKey(); 227 | } 228 | } 229 | return maxDT; 230 | } 231 | 232 | public static Set getDataTypes(Set apSet) { 233 | Set dataTypes = new HashSet<>(); 234 | for (var ap: apSet) { 235 | dataTypes.add(ap.dataType); 236 | } 237 | return dataTypes; 238 | } 239 | 240 | public static int getDataTypeSize(Set apSet) { 241 | if (apSet.isEmpty()) { 242 | return 0; 243 | } 244 | return apSet.iterator().next().dataType.getLength(); 245 | } 246 | } 247 | 248 | -------------------------------------------------------------------------------- /src/main/java/typeforge/base/dataflow/ConflictGraph.java: -------------------------------------------------------------------------------- 1 | package typeforge.base.dataflow; 2 | 3 | import org.jgrapht.Graph; 4 | import org.jgrapht.graph.SimpleGraph; 5 | import typeforge.utils.Logging; 6 | 7 | import java.util.Comparator; 8 | import java.util.NoSuchElementException; 9 | import java.util.Set; 10 | 11 | public class ConflictGraph { 12 | // Edge type enum 13 | public enum EdgeType { 14 | INTERSEC, 15 | NOINTERSEC 16 | } 17 | 18 | // Custom edge class 19 | public static class ConflictEdge { 20 | private final EdgeType type; 21 | 22 | public ConflictEdge(EdgeType type) { 23 | this.type = type; 24 | } 25 | 26 | public EdgeType getType() { 27 | return type; 28 | } 29 | 30 | @Override 31 | public String toString() { 32 | return "ConflictEdge[" + type + "]"; 33 | } 34 | } 35 | 36 | private final Graph graph; 37 | 38 | public ConflictGraph() { 39 | // Create an undirected graph 40 | this.graph = new SimpleGraph<>(ConflictEdge.class); 41 | } 42 | 43 | // Add a vertex to the graph 44 | public boolean addVertex(T vertex) { 45 | return graph.addVertex(vertex); 46 | } 47 | 48 | // Add an edge of type INTERSEC between two vertices 49 | public void addIntersecEdge(T source, T target) { 50 | addVertex(source); 51 | addVertex(target); 52 | graph.addEdge(source, target, new ConflictEdge(EdgeType.INTERSEC)); 53 | Logging.debug("ConflictGraph", String.format("Add Intersection Conflict Graph edge: %s ---%s---> %s", source, EdgeType.INTERSEC, target)); 54 | } 55 | 56 | // Add an edge of type NOINTERSEC between two vertices 57 | public void addNoIntersecEdge(T source, T target) { 58 | addVertex(source); 59 | addVertex(target); 60 | graph.addEdge(source, target, new ConflictEdge(EdgeType.NOINTERSEC)); 61 | Logging.debug("ConflictGraph", String.format("Add No Intersection Conflict Graph edge: %s ---%s---> %s", source, EdgeType.NOINTERSEC, target)); 62 | } 63 | 64 | // Get all vertices 65 | public Set getVertices() { 66 | return graph.vertexSet(); 67 | } 68 | 69 | // Get all edges 70 | public Set getEdges() { 71 | return graph.edgeSet(); 72 | } 73 | 74 | // Find the node with the most connections (highest degree) 75 | public T findNodeWithMostNoIntersecConnections() { 76 | Set vertices = graph.vertexSet(); 77 | return vertices.stream() 78 | .max(Comparator.comparingInt(vertex -> { 79 | // Count only NOINTERSEC edges for this vertex 80 | return (int) graph.edgesOf(vertex).stream() 81 | .filter(edge -> edge.getType() == EdgeType.NOINTERSEC) 82 | .count(); 83 | })) 84 | .orElseThrow(() -> new NoSuchElementException("No vertex found with NOINTERSEC connections")); 85 | } 86 | 87 | public boolean hasIntersecConnections() { 88 | return graph.edgeSet().stream() 89 | .anyMatch(edge -> edge.getType() == EdgeType.INTERSEC); 90 | } 91 | 92 | public boolean hasNoIntersecConnections() { 93 | return graph.edgeSet().stream() 94 | .anyMatch(edge -> edge.getType() == EdgeType.NOINTERSEC); 95 | } 96 | 97 | public void removeAllNoIntersecEdgesOfNode(T vertex) { 98 | // Create a copy to avoid concurrent modification 99 | Set edgesToRemove = graph.edgesOf(vertex).stream() 100 | .filter(edge -> edge.getType() == EdgeType.NOINTERSEC) 101 | .collect(java.util.stream.Collectors.toSet()); 102 | 103 | // Remove each edge 104 | for (ConflictEdge edge : edgesToRemove) { 105 | graph.removeEdge(edge); 106 | } 107 | } 108 | 109 | // Get the number of edges for a specific vertex 110 | public int getConnectionCount(T vertex) { 111 | return graph.degreeOf(vertex); 112 | } 113 | 114 | // Get the underlying graph 115 | public Graph getGraph() { 116 | return graph; 117 | } 118 | 119 | // Get edges of a specific type 120 | public Set getEdgesOfType(EdgeType type) { 121 | return graph.edgeSet().stream() 122 | .filter(edge -> edge.getType() == type) 123 | .collect(java.util.stream.Collectors.toSet()); 124 | } 125 | 126 | public int getEdgesCountOfType(EdgeType type) { 127 | return (int) graph.edgeSet().stream() 128 | .filter(edge -> edge.getType() == type) 129 | .count(); 130 | } 131 | 132 | @Override 133 | public String toString() { 134 | return "ConflictGraph{vertices=" + graph.vertexSet().size() + 135 | ", edges=" + graph.edgeSet().size() + "}"; 136 | } 137 | } -------------------------------------------------------------------------------- /src/main/java/typeforge/base/dataflow/KSet.java: -------------------------------------------------------------------------------- 1 | package typeforge.base.dataflow; 2 | 3 | import typeforge.utils.Logging; 4 | 5 | import java.util.HashSet; 6 | import java.util.Iterator; 7 | 8 | /** 9 | * KSet is a set with a maximum size. 10 | * If the set is full, then the add operation will return false. 11 | * @param the element type 12 | */ 13 | public class KSet implements Iterable { 14 | private final HashSet set; 15 | private final int maxSize; 16 | 17 | public KSet(int maxSize) { 18 | this.maxSize = maxSize; 19 | this.set = new HashSet<>(); 20 | } 21 | 22 | public boolean add(E element) { 23 | if (set.size() >= maxSize) { 24 | Logging.warn("KSet", "Set is full, cannot add element: " + element); 25 | return false; 26 | } 27 | return set.add(element); 28 | } 29 | 30 | public boolean isEmpty() { 31 | return set.isEmpty(); 32 | } 33 | 34 | public boolean contains(E element) { 35 | return set.contains(element); 36 | } 37 | 38 | public void merge(KSet other) { 39 | for (E element : other.set) { 40 | if (this.set.size() >= this.maxSize) { 41 | break; 42 | } 43 | this.add(element); 44 | } 45 | } 46 | 47 | @Override 48 | public String toString() { 49 | return set.toString(); 50 | } 51 | 52 | @Override 53 | public Iterator iterator() { 54 | return set.iterator(); 55 | } 56 | 57 | public void clear() { 58 | set.clear(); 59 | } 60 | 61 | } -------------------------------------------------------------------------------- /src/main/java/typeforge/base/dataflow/Layout.java: -------------------------------------------------------------------------------- 1 | package typeforge.base.dataflow; 2 | 3 | import typeforge.base.dataflow.constraint.Skeleton; 4 | 5 | import java.util.*; 6 | 7 | /** 8 | * Layout class is used to describe the layout of a composite data type. 9 | * Actually, layout is just a List of intervals, where each interval is a pair of offset and size. 10 | */ 11 | public class Layout { 12 | 13 | public static class Interval { 14 | public long offset; 15 | public Set sizes; 16 | 17 | public Interval(long offset, Set sizes) { 18 | this.offset = offset; 19 | this.sizes = sizes; 20 | } 21 | 22 | @Override 23 | public boolean equals(Object o) { 24 | if (this == o) return true; 25 | if (o == null || getClass() != o.getClass()) return false; 26 | Interval interval = (Interval) o; 27 | return offset == interval.offset && sizes.equals(interval.sizes); 28 | } 29 | 30 | @Override 31 | public int hashCode() { 32 | return Objects.hash(offset, sizes); 33 | } 34 | } 35 | 36 | public List intervals; 37 | 38 | public Layout(Skeleton constraint) { 39 | intervals = new ArrayList<>(); 40 | constraint.fieldAccess.forEach((offset, aps) -> { 41 | Set sizes = new HashSet<>(); 42 | for (var ap: aps.getApSet()) { 43 | sizes.add(ap.dataType.getLength()); 44 | } 45 | intervals.add(new Interval(offset, sizes)); 46 | }); 47 | } 48 | 49 | public Layout(List intervals) { 50 | this.intervals = intervals; 51 | } 52 | 53 | @Override 54 | public int hashCode() { 55 | return Objects.hash( 56 | intervals.stream().map(interval -> interval.offset).toArray() 57 | ); 58 | } 59 | 60 | @Override 61 | public boolean equals(Object o) { 62 | if (this == o) return true; 63 | if (o == null || getClass() != o.getClass()) return false; 64 | Layout layout = (Layout) o; 65 | 66 | if (this.intervals.size() != layout.intervals.size()) return false; 67 | for (int i = 0; i < this.intervals.size(); i++) { 68 | if (this.intervals.get(i).offset != layout.intervals.get(i).offset) { 69 | return false; 70 | } 71 | } 72 | return true; 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/main/java/typeforge/base/dataflow/Range.java: -------------------------------------------------------------------------------- 1 | package typeforge.base.dataflow; 2 | 3 | import java.util.Objects; 4 | import java.util.Set; 5 | 6 | public class Range { 7 | private final long start; 8 | private final long end; 9 | 10 | public Range(Long start, Long end) { 11 | this.start = start; 12 | this.end = end; 13 | } 14 | 15 | public long getStart() { 16 | return start; 17 | } 18 | 19 | public long getEnd() { 20 | return end; 21 | } 22 | 23 | static public boolean ifRangeInRanges(Range range, Set existRanges) { 24 | for (var r: existRanges) { 25 | if (range.getStart() >= r.getStart() && range.getEnd() <= r.getEnd()) { 26 | return true; 27 | } 28 | } 29 | return false; 30 | } 31 | 32 | @Override 33 | public int hashCode() { 34 | return Objects.hash(start, end); 35 | } 36 | 37 | @Override 38 | public boolean equals(Object o) { 39 | if (this == o) return true; 40 | if (o == null || getClass() != o.getClass()) return false; 41 | Range range = (Range) o; 42 | return Objects.equals(start, range.start) && 43 | Objects.equals(end, range.end); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/typeforge/base/dataflow/TFG/TypeFlowGraph.java: -------------------------------------------------------------------------------- 1 | package typeforge.base.dataflow.TFG; 2 | 3 | import org.jgrapht.Graph; 4 | import org.jgrapht.alg.connectivity.ConnectivityInspector; 5 | import org.jgrapht.alg.connectivity.KosarajuStrongConnectivityInspector; 6 | import org.jgrapht.alg.interfaces.StrongConnectivityAlgorithm; 7 | import org.jgrapht.graph.DefaultDirectedGraph; 8 | import org.jgrapht.graph.DefaultEdge; 9 | import org.jgrapht.Graphs; 10 | 11 | import typeforge.base.dataflow.constraint.Skeleton; 12 | import typeforge.utils.Logging; 13 | 14 | import java.util.*; 15 | 16 | public class TypeFlowGraph { 17 | public enum EdgeType { 18 | CALL, 19 | RETURN, 20 | DATAFLOW, 21 | ALIAS, 22 | } 23 | 24 | public static class TypeFlowEdge extends DefaultEdge { 25 | private final EdgeType type; 26 | 27 | public TypeFlowEdge(EdgeType type) { 28 | this.type = type; 29 | } 30 | 31 | public EdgeType getType() { 32 | return type; 33 | } 34 | 35 | @Override 36 | public String toString() { 37 | var source = this.getSource(); 38 | var target = this.getTarget(); 39 | return String.format("%s ---%s---> %s", source, type, target); 40 | } 41 | } 42 | 43 | private final Graph graph; 44 | private final UUID uuid; 45 | private final String shortUUID; 46 | 47 | public TypeFlowPathManager pathManager; 48 | public Skeleton finalSkeleton; 49 | 50 | public TypeFlowGraph() { 51 | graph = new DefaultDirectedGraph<>(TypeFlowEdge.class); 52 | uuid = UUID.randomUUID(); 53 | shortUUID = uuid.toString().substring(0, 8); 54 | 55 | pathManager = new TypeFlowPathManager(this); 56 | 57 | Logging.trace("TypeFlowGraph", String.format("Create TypeFlowGraph_%s", shortUUID)); 58 | } 59 | 60 | public String getShortUUID() { 61 | return shortUUID; 62 | } 63 | 64 | public void addEdge(T src, T dst, EdgeType edgeType) { 65 | graph.addVertex(src); 66 | graph.addVertex(dst); 67 | graph.addEdge(src, dst, new TypeFlowEdge(edgeType)); 68 | Logging.trace("TypeFlowGraph", String.format("TypeFlowGraph_%s Add edge: %s ---%s---> %s", shortUUID, src, edgeType, dst)); 69 | } 70 | 71 | public void removeEdge(T src, T dst) { 72 | graph.removeEdge(src, dst); 73 | Logging.debug("TypeFlowGraph", String.format("TypeFlowGraph_%s Remove edge: %s ---> %s", shortUUID, src, dst)); 74 | } 75 | 76 | public void removeNode(T node) { 77 | graph.removeVertex(node); 78 | Logging.trace("TypeFlowGraph", String.format("TypeFlowGraph_%s Remove node: %s", shortUUID, node)); 79 | } 80 | 81 | /** 82 | * If a graph has individual single nodes, it is invalid 83 | */ 84 | public boolean isValid() { 85 | // If there is only one node, it is valid 86 | if (graph.vertexSet().size() == 1) { 87 | return true; 88 | } else if (graph.vertexSet().isEmpty()) { 89 | Logging.error("TypeFlowGraph", 90 | String.format("Unexpected empty graph: %s", this)); 91 | return false; 92 | } else { 93 | boolean isValid = true; 94 | for (var node: getNodes()) { 95 | if (graph.inDegreeOf(node) == 0 && graph.outDegreeOf(node) == 0) { 96 | isValid = false; 97 | break; 98 | } 99 | } 100 | 101 | return isValid; 102 | } 103 | } 104 | 105 | public int getNumNodes() { 106 | return graph.vertexSet().size(); 107 | } 108 | 109 | public Set getNodes() { 110 | return graph.vertexSet(); 111 | } 112 | 113 | public Set getEdges() { 114 | return graph.edgeSet(); 115 | } 116 | 117 | public Graph getGraph() { 118 | return graph; 119 | } 120 | 121 | public Set getForwardNeighbors(T node) { 122 | var result = new HashSet(); 123 | for (var edge: graph.outgoingEdgesOf(node)) { 124 | var target = graph.getEdgeTarget(edge); 125 | result.add(target); 126 | } 127 | return result; 128 | } 129 | 130 | public void mergeGraph(TypeFlowGraph other) { 131 | for (T vertex: other.getNodes()) { 132 | graph.addVertex(vertex); 133 | } 134 | 135 | Set edges = other.getGraph().edgeSet(); 136 | for (TypeFlowEdge edge: edges) { 137 | T src = other.getGraph().getEdgeSource(edge); 138 | T dst = other.getGraph().getEdgeTarget(edge); 139 | var EdgeType = edge.getType(); 140 | 141 | TypeFlowEdge existingEdge = graph.getEdge(src, dst); 142 | if (existingEdge == null) { 143 | graph.addEdge(src, dst, new TypeFlowEdge(EdgeType)); 144 | } else if (existingEdge.getType() != EdgeType) { 145 | Logging.warn("TypeFlowGraph", String.format("%s Merge conflict: %s ---> %s", other, src, dst)); 146 | } else { 147 | continue; 148 | } 149 | } 150 | 151 | Logging.trace("TypeFlowGraph", String.format("TypeFlowGraph_%s Merge with %s", shortUUID, other)); 152 | } 153 | 154 | 155 | public List> getConnectedComponents() { 156 | ConnectivityInspector inspector = new ConnectivityInspector<>(graph); 157 | return inspector.connectedSets(); 158 | } 159 | 160 | public StrongConnectivityAlgorithm getStrongConnectedComponentsAlg() { 161 | return new KosarajuStrongConnectivityInspector<>(graph); 162 | } 163 | 164 | public boolean rebuildPathManager() { 165 | if (getNumNodes() <= 1) { 166 | return false; 167 | } 168 | this.pathManager = new TypeFlowPathManager(this); 169 | this.pathManager.initialize(); 170 | return true; 171 | } 172 | 173 | public String toGraphviz() { 174 | StringBuilder builder = new StringBuilder(); 175 | builder.append("digraph TypeFlowGraph_").append(shortUUID).append(" {\n"); 176 | for (TypeFlowEdge edge : graph.edgeSet()) { 177 | T src = graph.getEdgeSource(edge); 178 | T dst = graph.getEdgeTarget(edge); 179 | builder.append(" \"").append(src).append("\" -> \"").append(dst) 180 | .append("\" [label=\"").append(edge.getType()).append("\"];\n"); 181 | } 182 | builder.append("}"); 183 | return builder.toString(); 184 | } 185 | 186 | /** 187 | * Write the partial TFG for a given NMAE node into one graphviz file. 188 | * @param node The node to dump the TFG for 189 | * @param maxDepth Max graph edge depth around the node 190 | */ 191 | public String toPartialGraphviz(T node, int maxDepth) { 192 | if (!graph.containsVertex(node)) { 193 | return "digraph Empty {\n}"; 194 | } 195 | 196 | Set includedEdges = new HashSet<>(); 197 | 198 | // BFS to find nodes within maxDepth 199 | Map distanceMap = new HashMap<>(); 200 | Queue queue = new LinkedList<>(); 201 | 202 | // Start with the given node 203 | queue.add(node); 204 | distanceMap.put(node, 0); 205 | 206 | // Process outgoing edges (forward direction) 207 | while (!queue.isEmpty()) { 208 | T current = queue.poll(); 209 | int currentDistance = distanceMap.get(current); 210 | 211 | if (currentDistance < maxDepth) { 212 | // Process outgoing edges 213 | for (TypeFlowEdge edge : graph.outgoingEdgesOf(current)) { 214 | T target = graph.getEdgeTarget(edge); 215 | if (!distanceMap.containsKey(target) || distanceMap.get(target) > currentDistance + 1) { 216 | distanceMap.put(target, currentDistance + 1); 217 | includedEdges.add(edge); 218 | queue.add(target); 219 | } else { 220 | includedEdges.add(edge); 221 | } 222 | } 223 | } 224 | } 225 | 226 | // Reset for backward traversal 227 | queue.clear(); 228 | queue.add(node); 229 | Map reverseDistanceMap = new HashMap<>(); 230 | reverseDistanceMap.put(node, 0); 231 | 232 | // Process incoming edges (backward direction) 233 | while (!queue.isEmpty()) { 234 | T current = queue.poll(); 235 | int currentDistance = reverseDistanceMap.get(current); 236 | 237 | if (currentDistance < maxDepth) { 238 | // Process incoming edges 239 | for (TypeFlowEdge edge : graph.incomingEdgesOf(current)) { 240 | T source = graph.getEdgeSource(edge); 241 | if (!reverseDistanceMap.containsKey(source) || reverseDistanceMap.get(source) > currentDistance + 1) { 242 | reverseDistanceMap.put(source, currentDistance + 1); 243 | includedEdges.add(edge); 244 | queue.add(source); 245 | } else { 246 | includedEdges.add(edge); 247 | } 248 | } 249 | } 250 | } 251 | 252 | // Generate graphviz representation 253 | StringBuilder builder = new StringBuilder(); 254 | builder.append("digraph Partial_TypeFlowGraph_").append(shortUUID).append(" {\n"); 255 | 256 | // Highlight the center node 257 | builder.append(" \"").append(node).append("\" [style=filled, fillcolor=lightblue];\n"); 258 | 259 | // Add all edges 260 | for (TypeFlowEdge edge : includedEdges) { 261 | T src = graph.getEdgeSource(edge); 262 | T dst = graph.getEdgeTarget(edge); 263 | builder.append(" \"").append(src).append("\" -> \"").append(dst) 264 | .append("\" [label=\"").append(edge.getType()).append("\"];\n"); 265 | } 266 | 267 | builder.append("}"); 268 | return builder.toString(); 269 | } 270 | 271 | public TypeFlowGraph createCopy() { 272 | Logging.trace("TypeFlowGraph", "Create copy of " + this); 273 | TypeFlowGraph copy = new TypeFlowGraph<>(); 274 | Graphs.addGraph(copy.graph, this.graph); 275 | return copy; 276 | } 277 | 278 | @Override 279 | public String toString() { 280 | return "TypeFlowGraph_" + shortUUID; 281 | } 282 | } 283 | -------------------------------------------------------------------------------- /src/main/java/typeforge/base/dataflow/TFG/TypeFlowPath.java: -------------------------------------------------------------------------------- 1 | package typeforge.base.dataflow.TFG; 2 | import generic.stl.Pair; 3 | import typeforge.base.dataflow.expression.NMAE; 4 | import typeforge.base.dataflow.expression.NMAEManager; 5 | import typeforge.base.dataflow.constraint.Skeleton; 6 | import typeforge.utils.Logging; 7 | import org.jgrapht.GraphPath; 8 | 9 | import java.util.*; 10 | 11 | public class TypeFlowPath { 12 | TypeFlowGraph graph; 13 | public final UUID uuid = UUID.randomUUID(); 14 | public final String shortUUID = uuid.toString().substring(0, 8); 15 | public List nodes; 16 | public List edges; 17 | public Skeleton finalSkeletonOnPath = null; 18 | public boolean conflict = false; 19 | public TypeFlowGraph.TypeFlowEdge conflictEdge = null; 20 | public T start; 21 | public T end; 22 | public Set evilEdges; 23 | 24 | /** 25 | * Map[SUB_PATH_LENGTH, Map[HASH_CODE, SUB_PATH_NODES]] 26 | */ 27 | public Map>> subPathsOfLengthWithHash = new HashMap<>(); 28 | 29 | public TypeFlowPath(TypeFlowGraph graph, GraphPath path) { 30 | this.graph = graph; 31 | // update nodes; 32 | this.nodes = path.getVertexList(); 33 | this.edges = path.getEdgeList(); 34 | 35 | this.start = nodes.get(0); 36 | this.end = nodes.get(nodes.size() - 1); 37 | this.evilEdges = new HashSet<>(); 38 | } 39 | 40 | public TypeFlowPath(TypeFlowGraph graph, List nodes, List edges) { 41 | this.graph = graph; 42 | this.nodes = nodes; 43 | this.edges = edges; 44 | 45 | this.start = nodes.get(0); 46 | this.end = nodes.get(nodes.size() - 1); 47 | this.evilEdges = new HashSet<>(); 48 | } 49 | 50 | /** 51 | * Try Merge skeletons of each node in the path in forward direction. 52 | * If all merges are success without any conflict, return true and update finalSkeletonOnPath. 53 | * If any merge fails, return false and update evilEdges. 54 | * @param exprManager NMAE Manager 55 | * @return true if no conflict, false if conflict 56 | */ 57 | public boolean tryMergeLayoutForwardOnPath(NMAEManager exprManager) { 58 | Logging.debug("TypeFlowPath", String.format("Try merge by path: %s", this)); 59 | Skeleton mergedSkt = new Skeleton(); 60 | for (var i = 0; i < nodes.size(); i++) { 61 | var curNode = nodes.get(i); 62 | NMAE curExpr = (NMAE) curNode; 63 | // TODO: should we also merge current node's alias? 64 | var curExprSkt = exprManager.getSkeleton(curExpr); 65 | if (curExprSkt == null) { 66 | continue; 67 | } 68 | var success = mergedSkt.tryMergeLayoutStrict(curExprSkt); 69 | if (!success) { 70 | Logging.warn("TypeFlowPath", 71 | String.format("Layout Conflict when forward merging Skeletons on path for %s", curExpr)); 72 | Logging.warn("TypeFlowPath", 73 | String.format("Merged Skeleton: %s", mergedSkt.dumpLayout(2))); 74 | Logging.warn("TypeFlowPath", 75 | String.format("Current Skeleton: %s", curExprSkt.dumpLayout(2))); 76 | conflict = true; 77 | if (i > 0) { 78 | var prevNode = nodes.get(i - 1); 79 | conflictEdge = this.graph.getGraph().getEdge( 80 | prevNode, 81 | curNode 82 | ); 83 | Logging.warn("TypeFlowPath", 84 | String.format("Marked Layout Conflict Edge: %s", conflictEdge)); 85 | } 86 | return false; 87 | } 88 | } 89 | finalSkeletonOnPath = mergedSkt; 90 | return true; 91 | } 92 | 93 | 94 | public void findEvilEdges(int rightBoundIndex, int leftBoundIndex) { 95 | if (leftBoundIndex == -1) { 96 | Logging.warn("TypeAliasPath", "Cannot find leftBoundIndex when finding evil edges"); 97 | evilEdges.add(edges.get(rightBoundIndex - 1)); 98 | } 99 | else if (leftBoundIndex == rightBoundIndex) { 100 | Logging.debug("TypeAliasPath", "LB == RB"); 101 | evilEdges.add(edges.get(rightBoundIndex)); 102 | evilEdges.add(edges.get(rightBoundIndex - 1)); 103 | } 104 | else if (leftBoundIndex > rightBoundIndex) { 105 | Logging.debug("TypeAliasPath", "LB > RB"); 106 | evilEdges.add(edges.get(leftBoundIndex)); 107 | evilEdges.add(edges.get(rightBoundIndex - 1)); 108 | for (int i = rightBoundIndex; i < leftBoundIndex; i++) { 109 | evilEdges.add(edges.get(i)); 110 | } 111 | } 112 | /* leftBoundIndex < rightBoundIndex, this is what we expect */ 113 | else { 114 | Logging.debug("TypeAliasPath", "LB < RB"); 115 | for (int i = leftBoundIndex; i < rightBoundIndex; i++) { 116 | evilEdges.add(edges.get(i)); 117 | } 118 | } 119 | 120 | for (var edge: evilEdges) { 121 | Logging.debug("TypeAliasPath", String.format("Found Evil Edge: %s", edge)); 122 | } 123 | } 124 | 125 | 126 | public Set getConnectedEdges(T node) { 127 | var result = new HashSet(); 128 | var nodeIdx = nodes.indexOf(node); 129 | if (nodeIdx != -1) { 130 | if (nodeIdx > 0) { 131 | result.add(edges.get(nodeIdx - 1)); 132 | } 133 | if (nodeIdx < nodes.size() - 1) { 134 | result.add(edges.get(nodeIdx)); 135 | } 136 | } 137 | return result; 138 | } 139 | 140 | public void createSubPathsOfLength(int length) { 141 | if (length < 1) { 142 | return; 143 | } 144 | for (int i = 0; i < nodes.size() - length + 1; i++) { 145 | var subPathNodes = nodes.subList(i, i + length); 146 | var hash = getPathsHashCode(subPathNodes); 147 | if (!subPathsOfLengthWithHash.containsKey(length)) { 148 | subPathsOfLengthWithHash.put(length, new HashMap<>()); 149 | } 150 | if (!subPathsOfLengthWithHash.get(length).containsKey(hash)) { 151 | subPathsOfLengthWithHash.get(length).put(hash, subPathNodes); 152 | } 153 | } 154 | } 155 | 156 | public int getPathsHashCode(List path) { 157 | int hash = 0; 158 | for (var t : path) { 159 | hash = 31 * hash + t.hashCode(); 160 | } 161 | return hash; 162 | } 163 | 164 | @Override 165 | public int hashCode() { 166 | return edges.hashCode() + nodes.hashCode(); 167 | } 168 | 169 | @Override 170 | public boolean equals(Object obj) { 171 | if (this == obj) { 172 | return true; 173 | } 174 | if (obj == null || getClass() != obj.getClass()) { 175 | return false; 176 | } 177 | TypeFlowPath other = (TypeFlowPath) obj; 178 | return this.hashCode() == other.hashCode(); 179 | } 180 | 181 | @Override 182 | public String toString() { 183 | StringBuilder builder = new StringBuilder(); 184 | builder.append(String.format("Path-%s: ", shortUUID)); 185 | builder.append(nodes.get(0)); 186 | for (int i = 0; i < edges.size(); i++) { 187 | builder.append(String.format(" --- %s ---> ", edges.get(i).getType())); 188 | if (i + 1 < nodes.size()) { 189 | builder.append(nodes.get(i + 1)); 190 | } 191 | } 192 | return builder.toString(); 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /src/main/java/typeforge/base/dataflow/UnionFind.java: -------------------------------------------------------------------------------- 1 | package typeforge.base.dataflow; 2 | 3 | import java.util.HashMap; 4 | import java.util.HashSet; 5 | import java.util.Map; 6 | import java.util.Set; 7 | 8 | public class UnionFind { 9 | private final Map parent; 10 | private final Map rank; 11 | 12 | public UnionFind() { 13 | this.parent = new HashMap<>(); 14 | this.rank = new HashMap<>(); 15 | } 16 | 17 | // Add a new element to the union-find structure 18 | public void add(T element) { 19 | if (!parent.containsKey(element)) { 20 | parent.put(element, element); 21 | rank.put(element, 0); 22 | } 23 | } 24 | 25 | // Find the root of the element with path compression 26 | public T find(T element) { 27 | if (!parent.containsKey(element)) { 28 | throw new IllegalArgumentException("Element not found in UnionFind structure"); 29 | } 30 | 31 | if (!parent.get(element).equals(element)) { 32 | parent.put(element, find(parent.get(element))); // Path compression 33 | } 34 | return parent.get(element); 35 | } 36 | 37 | // Union two elements by rank 38 | public void union(T element1, T element2) { 39 | T root1 = find(element1); 40 | T root2 = find(element2); 41 | 42 | if (!root1.equals(root2)) { 43 | int rank1 = rank.get(root1); 44 | int rank2 = rank.get(root2); 45 | 46 | if (rank1 > rank2) { 47 | parent.put(root2, root1); 48 | } else if (rank1 < rank2) { 49 | parent.put(root1, root2); 50 | } else { 51 | parent.put(root2, root1); 52 | rank.put(root1, rank1 + 1); 53 | } 54 | } 55 | } 56 | 57 | // Check if two elements are in the same set 58 | public boolean connected(T element1, T element2) { 59 | return find(element1).equals(find(element2)); 60 | } 61 | 62 | public boolean contains(T element) { 63 | return parent.containsKey(element); 64 | } 65 | 66 | public Set getCluster(T element) { 67 | Set cluster = new HashSet<>(); 68 | T root = find(element); 69 | for (T key : parent.keySet()) { 70 | if (find(key).equals(root)) { 71 | cluster.add(key); 72 | } 73 | } 74 | return cluster; 75 | } 76 | 77 | public Set> getClusters() { 78 | Map> clusters = new HashMap<>(); 79 | for (T element : parent.keySet()) { 80 | T root = find(element); 81 | clusters.computeIfAbsent(root, k -> new HashSet<>()).add(element); 82 | } 83 | return new HashSet<>(clusters.values()); 84 | } 85 | 86 | public void initializeWithCluster(Set cluster) { 87 | T first = null; 88 | for (T element : cluster) { 89 | add(element); 90 | if (first == null) { 91 | first = element; 92 | } else { 93 | union(first, element); 94 | } 95 | } 96 | } 97 | } 98 | 99 | -------------------------------------------------------------------------------- /src/main/java/typeforge/base/dataflow/constraint/SizeSource.java: -------------------------------------------------------------------------------- 1 | package typeforge.base.dataflow.constraint; 2 | 3 | import typeforge.base.dataflow.expression.NMAE; 4 | import typeforge.base.node.CallSite; 5 | import typeforge.utils.Logging; 6 | 7 | /** 8 | * Tracks the source of size information for a Skeleton. 9 | */ 10 | public class SizeSource { 11 | 12 | public enum SourceType { 13 | CALLSITE, // Size determined from a function call 14 | EXPRESSION // Size determined from an expression within a function 15 | } 16 | 17 | private final long size; 18 | private final SourceType sourceType; 19 | private final NMAE expression; // For expression sources only 20 | private final CallSite callSite; // For call site sources only 21 | 22 | /** 23 | * Creates a SizeSource from a function call 24 | */ 25 | public SizeSource(long size, CallSite callSite) { 26 | this.size = size; 27 | this.sourceType = SourceType.CALLSITE; 28 | this.callSite = callSite; 29 | this.expression = null; 30 | } 31 | 32 | /** 33 | * Creates a SizeSource from an expression 34 | */ 35 | public SizeSource(long size, NMAE expression) { 36 | this.size = size; 37 | this.sourceType = SourceType.EXPRESSION; 38 | this.expression = expression; 39 | this.callSite = null; 40 | } 41 | 42 | public long getSize() { 43 | return size; 44 | } 45 | 46 | public SourceType getSourceType() { 47 | return sourceType; 48 | } 49 | 50 | @Override 51 | public String toString() { 52 | if (sourceType == SourceType.CALLSITE) { 53 | return String.format("SizeSource{size=0x%x, callsite=%s}", 54 | size, callSite); 55 | } else { 56 | return String.format("SizeSource{size=0x%x, expr=%s}", 57 | size, expression); 58 | } 59 | } 60 | } -------------------------------------------------------------------------------- /src/main/java/typeforge/base/dataflow/expression/ParsedExpr.java: -------------------------------------------------------------------------------- 1 | package typeforge.base.dataflow.expression; 2 | 3 | import typeforge.utils.Logging; 4 | 5 | import java.util.Optional; 6 | 7 | public class ParsedExpr { 8 | public NMAE base = null; 9 | public NMAE offset = null; 10 | public NMAE index = null; 11 | public NMAE scale = null; 12 | public long offsetValue = 0; 13 | 14 | public static Optional parseFieldAccessExpr(NMAE expr) { 15 | ParsedExpr parsedExpr = new ParsedExpr(); 16 | 17 | if (expr.getNestedExpr().isDereference()) { 18 | parsedExpr.base = expr.getNestedExpr(); 19 | parsedExpr.offsetValue = 0L; 20 | } 21 | else if (expr.getNestedExpr().isRootSymExpr()) { 22 | parsedExpr.base = expr.getNestedExpr(); 23 | parsedExpr.offsetValue = 0L; 24 | } 25 | else if (expr.getNestedExpr().isReference()) { 26 | parsedExpr.base = expr.getNestedExpr(); 27 | parsedExpr.offsetValue = 0L; 28 | } 29 | else { 30 | parsedExpr.base = expr.getNestedExpr().getBase(); 31 | parsedExpr.offset = expr.getNestedExpr().getOffset(); 32 | parsedExpr.index = expr.getNestedExpr().getIndex(); 33 | parsedExpr.scale = expr.getNestedExpr().getScale(); 34 | 35 | if (parsedExpr.offset != null) { 36 | if (!parsedExpr.offset.isNormalConst()) { 37 | Logging.warn("ParsedExpr", String.format("Offset is not a constant: %s, Skipping...", expr)); 38 | return Optional.empty(); 39 | } else { 40 | parsedExpr.offsetValue = parsedExpr.offset.getConstant(); 41 | } 42 | } else { 43 | parsedExpr.offsetValue = 0L; 44 | } 45 | } 46 | 47 | return Optional.of(parsedExpr); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/typeforge/base/dataflow/solver/ExternalHandler.java: -------------------------------------------------------------------------------- 1 | package typeforge.base.dataflow.solver; 2 | 3 | import typeforge.base.dataflow.expression.NMAEManager; 4 | import typeforge.base.node.CallSite; 5 | import typeforge.utils.Logging; 6 | 7 | import java.util.HashMap; 8 | import java.util.Map; 9 | 10 | /** 11 | * Handler for external function calls. 12 | */ 13 | public class ExternalHandler { 14 | 15 | /** 16 | * Base handler class for external functions 17 | */ 18 | public static abstract class Handler { 19 | /** 20 | * Process an external function call 21 | */ 22 | public abstract void handle(CallSite callSite, IntraSolver intraSolver, NMAEManager exprManager); 23 | } 24 | 25 | public static class Malloc extends Handler { 26 | @Override 27 | public void handle(CallSite callSite, IntraSolver intraSolver, NMAEManager exprManager) { 28 | var ptrExprs = intraSolver.getOrCreateDataFlowFacts(callSite.receiver); 29 | for (var expr: ptrExprs) { 30 | Logging.debug("ExternalHandler.Malloc", 31 | String.format("Set composite of skeleton: %s to true", expr)); 32 | var skeleton = exprManager.getOrCreateSkeleton(expr); 33 | skeleton.setComposite(true); 34 | 35 | var mallocSize = callSite.arguments.get(0); 36 | if (mallocSize.isConstant()) { 37 | skeleton.setSizeFromCallSite(mallocSize.getOffset(), callSite); 38 | Logging.debug("ExternalHandler.Malloc", 39 | String.format("(malloc) Set size of skeleton : %s to 0x%x", expr, callSite.arguments.get(0).getOffset())); 40 | } 41 | } 42 | } 43 | } 44 | 45 | public static class Calloc extends Handler { 46 | @Override 47 | public void handle(CallSite callSite, IntraSolver intraSolver, NMAEManager exprManager) { 48 | var ptrExprs = intraSolver.getOrCreateDataFlowFacts(callSite.receiver); 49 | for (var expr: ptrExprs) { 50 | Logging.debug("ExternalHandler.Calloc", 51 | String.format("Set composite of skeleton: %s to true", expr)); 52 | var skeleton = exprManager.getOrCreateSkeleton(expr); 53 | skeleton.setComposite(true); 54 | 55 | var nmemblock = callSite.arguments.get(0); 56 | var memsize = callSite.arguments.get(1); 57 | if (nmemblock.isConstant() && memsize.isConstant()) { 58 | skeleton.setSizeFromCallSite(nmemblock.getOffset() * memsize.getOffset(), callSite); 59 | Logging.debug("ExternalHandler.Calloc", 60 | String.format("(calloc) Set size of skeleton: %s to 0x%x", expr, nmemblock.getOffset() * memsize.getOffset())); 61 | } 62 | } 63 | } 64 | } 65 | 66 | /** 67 | * Handler for memset function. 68 | * For memset-like functions, the first pointer argument is treated as a composite type. 69 | * Because in the vast majority of scenarios, memset is used to initialize composite types, 70 | * regardless of whether their length is a constant. 71 | */ 72 | public static class Memset extends Handler { 73 | @Override 74 | public void handle(CallSite callSite, IntraSolver intraSolver, NMAEManager exprManager) { 75 | var lengthArg = callSite.arguments.get(2); 76 | 77 | var ptrExprs = intraSolver.getOrCreateDataFlowFacts(callSite.arguments.get(0)); 78 | for (var expr: ptrExprs) { 79 | Logging.debug("ExternalHandler.Memset", 80 | String.format("(memset) Set composite of skeleton: %s to true", expr)); 81 | var skeleton = exprManager.getOrCreateSkeleton(expr); 82 | skeleton.setComposite(true); 83 | 84 | if (lengthArg.isConstant()) { 85 | skeleton.setSizeFromCallSite(lengthArg.getOffset(), callSite); 86 | Logging.debug("ExternalHandler.Memset", 87 | String.format("(memset) Set size of skeleton: %s to %d", expr, lengthArg.getOffset())); 88 | } 89 | } 90 | } 91 | } 92 | 93 | /** 94 | * Handler for memcpy function. 95 | * For memcpy-like functions, the dst and src pointer arguments are treated as composite types 96 | * only if the length argument is a constant. 97 | * Because in other cases, the memcpy function is used to copy data from *char[] 98 | */ 99 | public static class Memcpy extends Handler { 100 | @Override 101 | public void handle(CallSite callSite, IntraSolver intraSolver, NMAEManager exprManager) { 102 | var dstVn = callSite.arguments.get(0); 103 | var srcVn = callSite.arguments.get(1); 104 | var lengthVn = callSite.arguments.get(2); 105 | if (!intraSolver.isTracedVn(dstVn) || !intraSolver.isTracedVn(srcVn)) { 106 | return; 107 | } 108 | var dstExprs = intraSolver.getOrCreateDataFlowFacts(dstVn); 109 | var srcExprs = intraSolver.getOrCreateDataFlowFacts(srcVn); 110 | for (var dstExpr : dstExprs) { 111 | for (var srcExpr : srcExprs) { 112 | var dstSkt = exprManager.getOrCreateSkeleton(dstExpr); 113 | var srcSkt = exprManager.getOrCreateSkeleton(srcExpr); 114 | 115 | if (lengthVn.isConstant()) { 116 | dstSkt.setComposite(true); 117 | dstSkt.setSizeFromCallSite(lengthVn.getOffset(), callSite); 118 | srcSkt.setComposite(true); 119 | srcSkt.setSizeFromCallSite(lengthVn.getOffset(), callSite); 120 | Logging.debug("ExternalHandler.Memcpy", 121 | String.format("(memcpy) Set size and composite from %s -> %s with size %d", srcExpr, dstExpr, lengthVn.getOffset())); 122 | } 123 | } 124 | } 125 | } 126 | } 127 | 128 | // Map of function names to their handlers 129 | private static final Map HANDLERS = new HashMap<>(); 130 | 131 | static { 132 | HANDLERS.put("memset", new Memset()); 133 | HANDLERS.put("memcpy", new Memcpy()); 134 | HANDLERS.put("mempcpy", new Memcpy()); 135 | HANDLERS.put("malloc", new Malloc()); 136 | HANDLERS.put("calloc", new Calloc()); 137 | // `calloc` and `malloc` are always used for allocating heap buffer for composite types 138 | // while `realloc` is used for reallocating heap buffer for `char*` 139 | } 140 | 141 | /** 142 | * Handle an external function call 143 | */ 144 | public static void handle(CallSite callSite, String funcName, IntraSolver intraSolver, NMAEManager exprManager) { 145 | Handler handler = HANDLERS.get(funcName); 146 | if (handler != null) { 147 | handler.handle(callSite, intraSolver, exprManager); 148 | } 149 | } 150 | } -------------------------------------------------------------------------------- /src/main/java/typeforge/base/dataflow/solver/LayoutPropagator.java: -------------------------------------------------------------------------------- 1 | package typeforge.base.dataflow.solver; 2 | 3 | import typeforge.base.dataflow.TFG.TFGManager; 4 | import typeforge.base.dataflow.TFG.TypeFlowGraph; 5 | import typeforge.base.dataflow.constraint.Skeleton; 6 | import typeforge.base.dataflow.expression.NMAE; 7 | import typeforge.base.dataflow.expression.NMAEManager; 8 | import typeforge.utils.Logging; 9 | 10 | import java.util.LinkedList; 11 | import java.util.Queue; 12 | import java.util.Set; 13 | 14 | /** 15 | * Used for propagating Layout information through the whole-program TFG and 16 | * further find the evil edges. 17 | */ 18 | public class LayoutPropagator { 19 | 20 | InterSolver interSolver; 21 | NMAEManager exprManager; 22 | TFGManager graphManager; 23 | 24 | public LayoutPropagator(InterSolver interSolver) { 25 | this.interSolver = interSolver; 26 | this.exprManager = interSolver.exprManager; 27 | this.graphManager = interSolver.graphManager; 28 | } 29 | 30 | public void run() { 31 | // Step1: process all the TFGs in the first pass 32 | processAllGraphsFirstPass(); 33 | 34 | // Reorganize the TFGs 35 | graphManager.reOrganize(); 36 | 37 | // Step2: process the conflict graphs in the workList 38 | processConflictGraphs(); 39 | } 40 | 41 | private void processConflictGraphs() { 42 | // Step2: iteratively process the conflict graphs in the workList 43 | Queue> workList = new LinkedList<>(); 44 | 45 | for (var graph: graphManager.getGraphs()) { 46 | if (!graphManager.isProcessableGraph(graph)) { 47 | continue; 48 | } 49 | addToWorkListIfConflict(workList, graph); 50 | } 51 | 52 | while (!workList.isEmpty()) { 53 | TypeFlowGraph graph = workList.poll(); 54 | 55 | graph.pathManager.initialize(); 56 | var hasPathMergeConflict = graph.pathManager.tryMergeLayoutFormSamePathsForward(exprManager); 57 | var hasSourceMergeConflict = graph.pathManager.tryMergeLayoutFromSameSourceForward(exprManager); 58 | if (hasPathMergeConflict || hasSourceMergeConflict) { 59 | Logging.error("LayoutPropagator", 60 | "Should not have any merge conflict after the first pass in theory, please check the code."); 61 | } 62 | // Following Propagation is actually not needed 63 | var hasBFSConflict = graph.pathManager.propagateLayoutFromSourcesBFS(); 64 | if (hasBFSConflict) { 65 | Logging.error("LayoutPropagator", 66 | "Should not have any BFS conflict after the first pass in theory, please check the code."); 67 | } 68 | 69 | graph.pathManager.resolveMultiSourceConflicts(); 70 | /* remember to remove the evil edges related to Multi Source Conflicts */ 71 | for (var edge: graph.pathManager.evilEdgesInMultiSourceResolving) { 72 | graph.removeEdge(graph.getGraph().getEdgeSource(edge), graph.getGraph().getEdgeTarget(edge)); 73 | } 74 | 75 | var newGraphs = graphManager.reOrganizeTFG(graph); 76 | for (var newGraph: newGraphs) { 77 | if (!graphManager.isProcessableGraph(newGraph)) { 78 | continue; 79 | } 80 | addToWorkListIfConflict(workList, newGraph); 81 | } 82 | } 83 | } 84 | 85 | private void processAllGraphsFirstPass() { 86 | // Step1 87 | for (var graph: graphManager.getGraphs()) { 88 | Logging.debug("LayoutPropagator", String.format("*********************** Handle Graph %s ***********************", graph)); 89 | 90 | if (!graphManager.isProcessableGraph(graph)) { 91 | continue; 92 | } 93 | 94 | graph.pathManager.initialize(); 95 | graph.pathManager.tryMergeLayoutFormSamePathsForward(exprManager); 96 | graph.pathManager.tryMergeLayoutFromSameSourceForward(exprManager); 97 | 98 | // Removing Evil Edges in layout information aggregate 99 | // These edges including alias edges. 100 | for (var edge: graph.pathManager.evilEdgesInPerPath) { 101 | graph.removeEdge(graph.getGraph().getEdgeSource(edge), graph.getGraph().getEdgeTarget(edge)); 102 | } 103 | for (var edge: graph.pathManager.evilEdgesInSourceAggregate) { 104 | graph.removeEdge(graph.getGraph().getEdgeSource(edge), graph.getGraph().getEdgeTarget(edge)); 105 | } 106 | /* Backward edges must be removed before BFS, 107 | as the previous merge was based on TFGPath, 108 | and the subsequent BFS will not involve path. */ 109 | for (var edge: graph.pathManager.backwardEdges) { 110 | graph.removeEdge(graph.getGraph().getEdgeSource(edge), graph.getGraph().getEdgeTarget(edge)); 111 | } 112 | 113 | graph.pathManager.propagateLayoutFromSourcesBFS(); 114 | 115 | /* remember to remove the evil edges related to BFS */ 116 | for (var edge: graph.pathManager.evilEdgesInPropagateBFS) { 117 | graph.removeEdge(graph.getGraph().getEdgeSource(edge), graph.getGraph().getEdgeTarget(edge)); 118 | } 119 | } 120 | } 121 | 122 | private void addToWorkListIfConflict(Queue> workList, TypeFlowGraph graph) { 123 | var connectedComponents = graph.getConnectedComponents(); 124 | if (connectedComponents.size() > 1) { 125 | Logging.error("LayoutPropagator", 126 | String.format("Now Each Graph should have only one connected component, but %d", connectedComponents.size())); 127 | System.exit(1); 128 | } 129 | 130 | var connects = connectedComponents.get(0); 131 | var success = graphManager.tryToMergeAllNodesSkeleton(graph, connects, exprManager); 132 | // IMPORTANT: If not success in merging, means some conflict nodes are not detected by previous propagateLayoutFromSourcesBFS. 133 | // This is because if the mergedSkeleton from different source has no intersection in their path, their conflicts will not be detected. 134 | // So we need to rebuild the path Manager there and detect them. 135 | if (!success) { 136 | workList.add(graph); 137 | Logging.info("LayoutPropagator", 138 | String.format("Graph: %s (%d) has been added into work list ...", graph, connects.size())); 139 | } 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /src/main/java/typeforge/base/graph/CallGraph.java: -------------------------------------------------------------------------------- 1 | package typeforge.base.graph; 2 | 3 | import typeforge.base.node.FunctionNode; 4 | import typeforge.base.node.NodeBase; 5 | import typeforge.utils.DecompilerHelper; 6 | import typeforge.utils.Global; 7 | import typeforge.utils.FunctionHelper; 8 | import typeforge.utils.Logging; 9 | 10 | import java.util.*; 11 | 12 | import ghidra.app.decompiler.DecompInterface; 13 | import ghidra.app.decompiler.DecompileResults; 14 | import ghidra.program.model.address.Address; 15 | import ghidra.program.model.listing.Function; 16 | import ghidra.program.model.pcode.HighFunction; 17 | import ghidra.util.task.TaskMonitor; 18 | 19 | public class CallGraph extends GraphBase { 20 | /** The cache of function nodes */ 21 | public final Set functionNodes = new HashSet<>(); 22 | 23 | /** The cache of root nodes to nodes */ 24 | public final Map> rootToNodes = new HashMap<>(); 25 | 26 | /** The cache of address to node */ 27 | public final Map addrToNode = new HashMap<>(); 28 | 29 | /** Possible root nodes of the call graph */ 30 | public Set roots; 31 | 32 | public Set leafNodes = new HashSet<>(); 33 | 34 | /** 35 | * Get the Whole Program's call graph. 36 | * We did not resolve indirect calls here, and we consider each function 37 | * without caller as a root node of a call graph. So the whole program may 38 | * contain multiple root nodes in the call graph. 39 | * @return the Set of CallGraph 40 | */ 41 | public static CallGraph getCallGraph() { 42 | Set possibleRoots = new HashSet<>(); 43 | 44 | for (var func : Global.currentProgram.getListing().getFunctions(true)) { 45 | // These functions should not be seen as nodes of a call graph 46 | if (!FunctionHelper.isMeaningfulFunction(func)) { 47 | continue; 48 | } 49 | 50 | // If the function does not have caller or the function is 'main' function. 51 | // it is one root node of Whole-program's call graph 52 | // WARNING: ghidra's getCallingFunctions() may not work correctly, so we need to 53 | // check and complete the call graph manually. 54 | if (func.getCallingFunctions(TaskMonitor.DUMMY).isEmpty() || FunctionHelper.isMainFunction(func)) { 55 | possibleRoots.add(func); 56 | } else if (FunctionHelper.confirmNoDirectCaller(func)) { 57 | possibleRoots.add(func); 58 | } 59 | } 60 | 61 | Logging.info("CallGraph", String.format( 62 | "Found %d possible root nodes of the call graph", 63 | possibleRoots.size() 64 | )); 65 | Logging.info("CallGraph", possibleRoots.toString()); 66 | 67 | return new CallGraph(possibleRoots); 68 | } 69 | 70 | /** 71 | * Decompile each function and get high function in CallGraph. 72 | * Finally, build the highFunctionCache. 73 | */ 74 | public void decompileAllFunctions() { 75 | DecompInterface ifc = DecompilerHelper.setUpDecompiler(null); 76 | try { 77 | if (!ifc.openProgram(Global.currentProgram)) { 78 | Logging.error("CallGraph", "Failed to use the decompiler"); 79 | return; 80 | } 81 | 82 | for (var funcNode : functionNodes) { 83 | Function func = funcNode.value; 84 | HighFunction highFunc = null; 85 | DecompileResults decompileRes = ifc.decompileFunction(func, 30, TaskMonitor.DUMMY); 86 | if (!decompileRes.decompileCompleted()) { 87 | Logging.error("CallGraph", "Decompile failed for function " + func.getName()); 88 | continue; 89 | } else { 90 | Logging.debug("CallGraph", "Decompile function " + func.getName()); 91 | } 92 | } 93 | 94 | } finally { 95 | ifc.dispose(); 96 | } 97 | } 98 | 99 | 100 | /** 101 | * Create a call graph with the given root function. 102 | * We did not use ghidra's `getCalledFunctions()` api here to build the call graph, 103 | * because they may not work correctly. 104 | * @param possibleRoots the possible root nodes of the call graph 105 | */ 106 | private CallGraph(Set possibleRoots) { 107 | roots = new HashSet<>(possibleRoots); 108 | 109 | for (Function root : roots) { 110 | buildCallGraph(root); 111 | } 112 | 113 | 114 | // Update FunctionNode's property 115 | for (var funcNode : functionNodes) { 116 | if (funcNode.succ.isEmpty()) { 117 | funcNode.isLeaf = true; 118 | leafNodes.add(funcNode); 119 | } 120 | 121 | if (FunctionHelper.isMeaningfulFunction(funcNode.value)) { 122 | funcNode.isMeaningful = true; 123 | } 124 | 125 | if (FunctionHelper.isNormalFunction(funcNode.value)) { 126 | funcNode.isNormal = true; 127 | } 128 | 129 | if (funcNode.value.isExternal() || funcNode.value.isThunk()) { 130 | funcNode.isExternal = true; 131 | } 132 | 133 | addrToNode.put(funcNode.value.getEntryPoint(), funcNode); 134 | } 135 | } 136 | 137 | 138 | /** 139 | * Build the call graph with the given root function. 140 | * @param root the root function of the call graph 141 | */ 142 | public void buildCallGraph(Function root) { 143 | LinkedList workList = new LinkedList<>(); 144 | Set visited = new HashSet<>(); 145 | var currentProgram = Global.currentProgram; 146 | 147 | workList.add(root); 148 | visited.add(root); 149 | 150 | while (!workList.isEmpty()) { 151 | Function cur = workList.remove(); 152 | var funcInsts = currentProgram.getListing().getInstructions(cur.getBody(), true); 153 | boolean hasIndirectCallee = false; 154 | for (var inst : funcInsts) { 155 | if (inst.getMnemonicString().equals("CALL")) { 156 | // If Call instruction is indirect that can't be resolved, flows will be empty 157 | var instFlows = inst.getFlows(); 158 | if (instFlows.length >= 1) { 159 | hasIndirectCallee = true; 160 | for (var flow : instFlows) { 161 | Function calledFunc = currentProgram.getFunctionManager().getFunctionAt(flow); 162 | if (calledFunc != null) { 163 | addEdge(cur, calledFunc); 164 | if (!visited.contains(calledFunc)) { 165 | visited.add(calledFunc); 166 | if (FunctionHelper.isMeaningfulFunction(calledFunc)) { 167 | workList.add(calledFunc); 168 | } 169 | } 170 | } else { 171 | Logging.error("CallGraph", "Function not found at " + flow); 172 | } 173 | } 174 | } else { 175 | Logging.trace("CallGraph", "Indirect call at " + inst.getAddress()); 176 | } 177 | } 178 | } 179 | 180 | if (!hasIndirectCallee) { 181 | getNode(cur); 182 | visited.add(cur); 183 | } 184 | } 185 | rootToNodes.put(root, visited); 186 | } 187 | 188 | 189 | @Override 190 | protected NodeBase createNode(Function value, int node_id) { 191 | FunctionNode funcNode = new FunctionNode(value, node_id); 192 | functionNodes.add(funcNode); 193 | return funcNode; 194 | } 195 | 196 | @Override 197 | public FunctionNode getNode(Function value) { 198 | return (FunctionNode) super.getNode(value); 199 | } 200 | 201 | public FunctionNode getNodebyAddr(Address addr) { 202 | return addrToNode.get(addr); 203 | } 204 | 205 | public Set getCallees(FunctionNode caller) { 206 | Set res = new HashSet<>(); 207 | for (var callee : caller.succ) { 208 | res.add((FunctionNode)callee); 209 | } 210 | return res; 211 | } 212 | 213 | public Set getCallers(FunctionNode callee) { 214 | Set res = new HashSet<>(); 215 | for (var caller : callee.pred) { 216 | res.add((FunctionNode)caller); 217 | } 218 | return res; 219 | } 220 | } 221 | -------------------------------------------------------------------------------- /src/main/java/typeforge/base/graph/GraphBase.java: -------------------------------------------------------------------------------- 1 | package typeforge.base.graph; 2 | 3 | import typeforge.base.node.NodeBase; 4 | 5 | import java.util.HashMap; 6 | import java.util.HashSet; 7 | import java.util.LinkedList; 8 | import java.util.Map; 9 | import java.util.Set; 10 | 11 | public abstract class GraphBase { 12 | 13 | /** Graph's id */ 14 | protected int id = -1; 15 | 16 | /** Whether the graph has been changed */ 17 | protected boolean changed = false; 18 | 19 | /** Map from node's value to node */ 20 | private final Map> valueToNode = new HashMap<>(); 21 | 22 | /** Map from node's id to node's value */ 23 | protected final Map idToValueMap = new HashMap<>(); 24 | 25 | /** Map from node's value to node's id */ 26 | protected final Map valueToIdMap = new HashMap<>(); 27 | 28 | /** Node id in the graph */ 29 | public int node_id = 0; 30 | 31 | /** 32 | * An array of integers, where the indexes represent the id of each node and 33 | * the values are the depth-first numbering. 34 | */ 35 | protected int[] depthFirstNums = null; 36 | 37 | /** 38 | * Get a Node for the given value from the graph. 39 | * This may create a new node if needed. 40 | * @param value The node's value 41 | * @return the graph node. 42 | */ 43 | public NodeBase getNode(T value) { 44 | if (valueToNode.containsKey(value)) { 45 | return valueToNode.get(value); 46 | } 47 | 48 | NodeBase res = createNode(value, node_id); 49 | 50 | valueToNode.put(value, res); 51 | idToValueMap.put(node_id, value); 52 | valueToIdMap.put(value, node_id); 53 | node_id++; 54 | changed = true; 55 | return res; 56 | } 57 | 58 | /** 59 | * Create a graph edge with source and destination. 60 | * This also creates the graph node of the given parameters if needed. 61 | * @param from the source node's value 62 | * @param to the destination node's value 63 | */ 64 | public void addEdge(T from, T to) { 65 | NodeBase src = getNode(from); 66 | NodeBase dst = getNode(to); 67 | if (src.succ.contains(dst)) { 68 | changed = false; 69 | return; 70 | } 71 | src.succ.add(dst); 72 | dst.pred.add(src); 73 | changed = true; 74 | } 75 | 76 | /** 77 | * Delete a graph edge with source and destination. 78 | * @param from the source node's value 79 | * @param to the destination node's value 80 | */ 81 | public void deleteEdge(T from, T to) { 82 | NodeBase src = getNode(from); 83 | NodeBase dst = getNode(to); 84 | 85 | if (src.succ.remove(dst)) { 86 | changed = true; 87 | } 88 | if (dst.pred.remove(src)) { 89 | changed = true; 90 | } 91 | } 92 | 93 | /** 94 | * Return a list of the value's successors 95 | * @param value the node value 96 | * @return Return a list of the value's successors 97 | */ 98 | public Set getSuccs(T value) { 99 | NodeBase tmp = getNode(value); 100 | Set res = new HashSet<>(); 101 | for (NodeBase node : tmp.succ) { 102 | res.add(node.value); 103 | } 104 | return res; 105 | } 106 | 107 | /** 108 | * Return a list of the node's successors 109 | * @param node the node 110 | * @return Return a list of the node's successors 111 | */ 112 | public Set> getSuccNodes(NodeBase node) { 113 | return node.succ; 114 | } 115 | 116 | /** 117 | * Return a list of the value's predecessors 118 | * @param value the node value 119 | * @return Return a list of the value's predecessors 120 | */ 121 | public Set getPreds(T value) { 122 | NodeBase tmp = getNode(value); 123 | Set res = new HashSet<>(); 124 | for (NodeBase node : tmp.pred) { 125 | res.add(node.value); 126 | } 127 | return res; 128 | } 129 | 130 | /** 131 | * Return a list of the node's predecessors 132 | * @param node the node 133 | * @return Return a list of the node's predecessors 134 | */ 135 | public Set> getPredNodes(NodeBase node) { 136 | return node.pred; 137 | } 138 | 139 | 140 | /** 141 | * Get all nodes in the graph 142 | * @return a set of all nodes in the graph 143 | */ 144 | public Set> getAllNodes() { 145 | Set> res = new HashSet<>(); 146 | for (var entry : valueToNode.entrySet()) { 147 | res.add(entry.getValue()); 148 | } 149 | return res; 150 | } 151 | 152 | /** 153 | * Check if the graph has a path from src to dst 154 | * @param from The src node 155 | * @param to The dst node 156 | * @return True if it has a path from src to dst 157 | */ 158 | public boolean hasPath(T from, T to) { 159 | NodeBase src = getNode(from); 160 | NodeBase dst = getNode(to); 161 | if (src == null || dst == null) { 162 | return false; 163 | } 164 | 165 | LinkedList> workList = new LinkedList<>(); 166 | Set> visited = new HashSet<>(); 167 | workList.add(src); 168 | visited.add(dst); 169 | while (!workList.isEmpty()) { 170 | var cur = workList.remove(); 171 | for (var succ : getSuccNodes(cur)) { 172 | if (succ == to) { 173 | return true; 174 | } 175 | if (visited.contains(succ)) { 176 | continue; 177 | } 178 | visited.add(succ); 179 | workList.add(succ); 180 | } 181 | } 182 | return false; 183 | } 184 | 185 | 186 | public int getNodeCount() { 187 | return node_id; 188 | } 189 | 190 | 191 | /** 192 | * Create a graph node with the given value. 193 | * @param value the node's value 194 | * @return the graph node 195 | */ 196 | protected abstract NodeBase createNode(T value, int node_id); 197 | } 198 | -------------------------------------------------------------------------------- /src/main/java/typeforge/base/graph/SDGraph.java: -------------------------------------------------------------------------------- 1 | package typeforge.base.graph; 2 | 3 | import java.util.*; 4 | 5 | import typeforge.base.node.DataTypeNode; 6 | import typeforge.base.node.NodeBase; 7 | import typeforge.utils.Logging; 8 | import ghidra.program.model.data.*; 9 | import ghidra.program.model.data.Enum; 10 | 11 | /** 12 | * Structure Dependency Graph 13 | */ 14 | public class SDGraph extends GraphBase { 15 | 16 | /** The cache of SDGraphs */ 17 | private static final Map sdGraphCache = new HashMap<>(); 18 | 19 | /** 20 | * Get the SDGraph of the given data type. If the SDGraph does 21 | * not exist, a new one will be created. 22 | * @param root the root data type of the SDGraph 23 | * @return the SDGraph 24 | */ 25 | public static SDGraph getSDGraph(DataType root) { 26 | if (sdGraphCache.containsKey(root)) { 27 | return sdGraphCache.get(root); 28 | } 29 | SDGraph sdg = new SDGraph(root); 30 | sdGraphCache.put(root, sdg); 31 | return sdg; 32 | } 33 | 34 | /** 35 | * Create a SDGraph with the given root data type. 36 | * @param root the root data type 37 | */ 38 | private SDGraph(DataType root) { 39 | Logging.debug("SDGraph", root.toString()); 40 | if (!(root instanceof Structure st)) { 41 | Logging.error("SDGraph", "The root data type is not a structure"); 42 | return; 43 | } 44 | 45 | buildAll(st); 46 | } 47 | 48 | /** 49 | * Build the SDGraph for the given root, build recursively. 50 | */ 51 | private void buildAll(Structure root) { 52 | LinkedList workList = new LinkedList<>(); 53 | HashSet visited = new HashSet<>(); 54 | 55 | DataTypeNode rootNode = (DataTypeNode) getNode(root); 56 | 57 | workList.add(rootNode); 58 | while (!workList.isEmpty()) { 59 | DataTypeNode cur = workList.poll(); 60 | if (cur.value instanceof Structure st) { 61 | handleStructureNode(cur, st, workList, visited); 62 | } else if (cur.value instanceof Array array) { 63 | throw new UnsupportedOperationException("Array is not supported yet"); 64 | } else if (cur.value instanceof Union union) { 65 | throw new UnsupportedOperationException("Union is not supported yet"); 66 | } else { 67 | throw new UnsupportedOperationException("Unsupported data type"); 68 | } 69 | } 70 | } 71 | 72 | 73 | /** 74 | * Traverse the fields of structure node and try to build the SDGraph. 75 | * @param node the object of DataTypeNode 76 | * @param st the structure DataType 77 | * @param workList the worklist for building the SDGraph 78 | */ 79 | private void handleStructureNode(DataTypeNode node, Structure st, 80 | LinkedList workList, 81 | HashSet visited) 82 | { 83 | for (var dtc : st.getDefinedComponents()) { 84 | DataType fieldDT = dtc.getDataType(); 85 | 86 | if (fieldDT instanceof BuiltInDataType) { 87 | continue; 88 | 89 | } else if (fieldDT instanceof Pointer ptr) { 90 | // TODO: consider to handle multiple pointers? especially for ** 91 | // TODO: pointer should be handled differently from other types 92 | DataType pointedDT = ptr.getDataType(); 93 | if (pointedDT instanceof Structure pointedST) { 94 | Logging.debug("SDGraph", "Reference: " + fieldDT + " offset: " + dtc.getOffset()); 95 | DataTypeNode dstNode = (DataTypeNode) getNode(pointedST); 96 | addEdge(node, dstNode, EdgeType.REFERENCE, dtc.getOffset()); 97 | if (!visited.contains(dstNode)) { 98 | workList.add(dstNode); 99 | visited.add(dstNode); 100 | } 101 | } 102 | // TODO: handle other types of pointer 103 | 104 | } else if (fieldDT instanceof Array) { 105 | continue; 106 | 107 | } else if (fieldDT instanceof Structure fst) { 108 | DataTypeNode dstNode = (DataTypeNode) getNode(fst); 109 | Logging.debug("SDGraph", "Nested: " + fst.getName() + " offset: " + dtc.getOffset()); 110 | addEdge(node, dstNode, EdgeType.NESTED, dtc.getOffset()); 111 | if (!visited.contains(dstNode)) { 112 | workList.add(dstNode); 113 | visited.add(dstNode); 114 | } 115 | 116 | } else if (fieldDT instanceof Union) { 117 | continue; 118 | 119 | } else if (fieldDT instanceof FunctionDefinition) { 120 | continue; 121 | 122 | } else if (fieldDT instanceof Enum) { 123 | continue; 124 | 125 | } else if (fieldDT instanceof TypeDef) { 126 | continue; 127 | 128 | } else if (fieldDT instanceof BitFieldDataType) { 129 | continue; 130 | 131 | } else { 132 | Logging.error("SDGraph", "Unsupported data type: " + fieldDT); 133 | } 134 | } 135 | } 136 | 137 | public enum EdgeType { 138 | /** 139 | * SDG Graph has the following types of edges: 140 | * 1. Nested Edge: If a structure A contains a structure B, then there is a nested edge from A to B. 141 | * 2. Reference Edge: If a structure A contains a pointer to a structure B, then there is a reference edge from A to B. 142 | * 3. Union Edge: If a structure A contains a union B, then there is a union edge from A to B. 143 | * 4. Array Edge: If a structure A contains an array of other type B, then there is an array edge from A to B. 144 | * 5. FuncPtr Edge: If a structure A contains a function pointer to a function B, then there is a function edge from A to B. 145 | * 6. Enum Edge: If a structure A contains an enum B, then there is an enum edge from A to B. 146 | * 7. Normal Edge: If a structure A contains a primitive type pointer which points to B, then there is a normal edge from A to B. 147 | */ 148 | NESTED, REFERENCE, UNION, ARRAY, FUNC_PTR, ENUM, NORMAL 149 | } 150 | 151 | public static class SDEdge { 152 | public final DataTypeNode srcNode; 153 | public final DataTypeNode dstNode; 154 | public final EdgeType edgeType; 155 | public final int offset; 156 | 157 | public SDEdge(DataTypeNode srcNode, DataTypeNode dstNode, EdgeType edgeType, int offset) { 158 | this.srcNode = srcNode; 159 | this.dstNode = dstNode; 160 | this.edgeType = edgeType; 161 | this.offset = offset; 162 | } 163 | 164 | @Override 165 | public String toString() { 166 | return "SDEdge{" + 167 | "srcNode=" + srcNode + 168 | ", dstNode=" + dstNode + 169 | ", edgeType=" + edgeType + 170 | ", offset=" + offset + 171 | '}'; 172 | } 173 | } 174 | 175 | /** 176 | * Add an edge to the SDGraph. 177 | * @param srcNode the source data type 178 | * @param dstNode the destination data type 179 | * @param edge_type the type of the edge 180 | * @param offset the offset of the dependency 181 | */ 182 | public void addEdge(DataTypeNode srcNode, DataTypeNode dstNode, EdgeType edge_type, int offset) { 183 | if (srcNode.offsetToEdge.get(offset) != null) { 184 | Logging.warn("SDGraph", "The offset " + offset + " already exists in the srcNode"); 185 | if (srcNode.offsetToEdge.get(offset).dstNode != dstNode) { 186 | Logging.error("SDGraph", "The offset " + offset + " already exists in the srcNode, but the dstNode is different"); 187 | } 188 | return; 189 | } 190 | 191 | SDEdge edge = new SDEdge(srcNode, dstNode, edge_type, offset); 192 | srcNode.edges.add(edge); 193 | srcNode.offsetToEdge.put(offset, edge); 194 | } 195 | 196 | /** 197 | * Build and get all edges from the DataTypeNode's offsetNodeMap and offsetEdgeTypeMap. 198 | * @return a Set of edges 199 | */ 200 | public Set getAllEdges() { 201 | Set> allNodes = getAllNodes(); 202 | Set allEdges = new HashSet<>(); 203 | 204 | for (NodeBase node : allNodes) { 205 | if (node instanceof DataTypeNode dtNode) { 206 | allEdges.addAll(dtNode.edges); 207 | } 208 | } 209 | 210 | return allEdges; 211 | } 212 | 213 | @Override 214 | protected NodeBase createNode(DataType value, int node_id) { 215 | return new DataTypeNode(value, node_id); 216 | } 217 | } 218 | -------------------------------------------------------------------------------- /src/main/java/typeforge/base/node/CallSite.java: -------------------------------------------------------------------------------- 1 | package typeforge.base.node; 2 | 3 | import ghidra.program.model.address.Address; 4 | import ghidra.program.model.listing.Function; 5 | import ghidra.program.model.pcode.PcodeOp; 6 | import ghidra.program.model.pcode.Varnode; 7 | import jnr.ffi.Struct; 8 | 9 | import java.util.ArrayList; 10 | import java.util.List; 11 | 12 | public class CallSite { 13 | public Function caller; 14 | public Address calleeAddr; 15 | public PcodeOp callOp; 16 | public List arguments; 17 | public Varnode receiver; 18 | private boolean hasReceiver = false; 19 | 20 | public CallSite(Function caller, Address CalleeAddr, PcodeOp callOp) { 21 | this.caller = caller; 22 | this.calleeAddr = CalleeAddr; 23 | this.callOp = callOp; 24 | this.arguments = new ArrayList<>(); 25 | for (int i = 1; i < callOp.getNumInputs(); i++) { 26 | arguments.add(callOp.getInput(i)); 27 | } 28 | 29 | receiver = callOp.getOutput(); 30 | if (receiver != null) { 31 | hasReceiver = true; 32 | } 33 | } 34 | 35 | public boolean hasReceiver() { 36 | return hasReceiver; 37 | } 38 | 39 | @Override 40 | public String toString() { 41 | // It's really hard for ghidra to get the asm addr from pcode, so we use the BasicBlock addr instead. 42 | return String.format( 43 | "CallSite{BBAddr=%s}", 44 | callOp.getParent().getStart().toString() 45 | ); 46 | } 47 | 48 | @Override 49 | public int hashCode() { 50 | return caller.hashCode() * 31 + calleeAddr.hashCode() * 17 + callOp.hashCode(); 51 | } 52 | 53 | @Override 54 | public boolean equals(Object obj) { 55 | if (obj == this) { 56 | return true; 57 | } 58 | if (!(obj instanceof CallSite other)) { 59 | return false; 60 | } 61 | return this.caller.equals(other.caller) && 62 | this.calleeAddr.equals(other.calleeAddr) && 63 | this.callOp.equals(other.callOp); 64 | } 65 | } -------------------------------------------------------------------------------- /src/main/java/typeforge/base/node/DataTypeNode.java: -------------------------------------------------------------------------------- 1 | package typeforge.base.node; 2 | 3 | import typeforge.base.graph.SDGraph; 4 | import ghidra.program.model.data.DataType; 5 | import ghidra.program.model.data.DataTypeComponent; 6 | import ghidra.program.model.data.Structure; 7 | 8 | import java.util.*; 9 | 10 | import typeforge.utils.Logging; 11 | 12 | /** 13 | * In Structure Dependency Graph, each node has multiple edges to other nodes. 14 | * For example: 15 | * struct A { 16 | * struct B b_1; // struct B's size is 8 bytes 17 | * struct *C c_1; 18 | * struct *C c_2; 19 | * } 20 | * The above structure A has 3 edges: 21 | * 1. A -- Nested -- offset 0 --> B 22 | * 2. A -- Reference -- offset 8 --> C 23 | * 3. A -- Reference -- offset 12 --> C 24 | */ 25 | public class DataTypeNode extends NodeBase{ 26 | 27 | /** The map from field offset to field */ 28 | public final Map fieldMap = new HashMap<>(); 29 | 30 | /** The edges of the node */ 31 | public Set edges = new HashSet<>(); 32 | 33 | /** The HashMap of offset to edge */ 34 | public Map offsetToEdge = new HashMap<>(); 35 | 36 | public DataTypeNode(DataType value, int id) { 37 | super(value, id); 38 | Logging.debug("DataTypeNode", "Creating DataTypeNode with value: " + value.getName()); 39 | 40 | if (value instanceof Structure st) { 41 | fillFieldMap(st); 42 | } 43 | 44 | } 45 | 46 | private void fillFieldMap(Structure st) { 47 | for (var dtc : st.getDefinedComponents()) { 48 | fieldMap.put(dtc.getOffset(), dtc); 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/typeforge/base/node/NodeBase.java: -------------------------------------------------------------------------------- 1 | package typeforge.base.node; 2 | 3 | import java.util.HashSet; 4 | import java.util.Set; 5 | 6 | public abstract class NodeBase { 7 | public final T value; 8 | public int id; 9 | 10 | /** The pred of this node */ 11 | public final Set> pred = new HashSet<>(); 12 | 13 | /** The succ of this node */ 14 | public final Set> succ = new HashSet<>(); 15 | 16 | /** Create a node from the given parameter */ 17 | public NodeBase(T value, int id) { 18 | this.value = value; 19 | this.id = id; 20 | } 21 | 22 | @Override 23 | public int hashCode() { 24 | return value != null ? value.hashCode() : 0; 25 | } 26 | } -------------------------------------------------------------------------------- /src/main/java/typeforge/base/parallel/PrepareFunctionNodeCallback.java: -------------------------------------------------------------------------------- 1 | package typeforge.base.parallel; 2 | 3 | import ghidra.app.decompiler.DecompileResults; 4 | import ghidra.app.decompiler.parallel.DecompileConfigurer; 5 | import ghidra.app.decompiler.parallel.DecompilerCallback; 6 | import ghidra.program.model.address.Address; 7 | import ghidra.program.model.listing.Program; 8 | import ghidra.util.task.TaskMonitor; 9 | import typeforge.base.node.FunctionNode; 10 | import typeforge.utils.Logging; 11 | 12 | import java.util.HashMap; 13 | 14 | /** 15 | * Callback for parallel decompile, used for initializing function node 16 | */ 17 | public class PrepareFunctionNodeCallback extends DecompilerCallback { 18 | 19 | public HashMap addrToFuncNode; 20 | public int decompileCount = 0; 21 | 22 | public PrepareFunctionNodeCallback(Program program, 23 | DecompileConfigurer configurer, 24 | HashMap addrToFuncNode) { 25 | super(program, configurer); 26 | this.addrToFuncNode = addrToFuncNode; 27 | } 28 | 29 | @Override 30 | public Void process(DecompileResults decompileResults, TaskMonitor taskMonitor) throws Exception { 31 | var addr = decompileResults.getFunction().getEntryPoint(); 32 | var funcNode = addrToFuncNode.get(addr); 33 | 34 | if (!decompileResults.decompileCompleted()) { 35 | Logging.error("PrepareFunctionNodeCallback", 36 | "Function %s decompiled failed".formatted(funcNode.value.getName())); 37 | funcNode.isDecompiled = false; 38 | return null; 39 | } 40 | 41 | decompileCount += 1; 42 | funcNode.isDecompiled = true; 43 | funcNode.updateDecompileResult(decompileResults); 44 | 45 | if (!funcNode.initialize()) { 46 | Logging.error("PrepareFunctionNodeCallback", 47 | "Function %s initialization failed".formatted(funcNode.value.getName())); 48 | } 49 | 50 | return null; 51 | } 52 | } -------------------------------------------------------------------------------- /src/main/java/typeforge/base/passes/SlidingWindowProcessor.java: -------------------------------------------------------------------------------- 1 | package typeforge.base.passes; 2 | 3 | import typeforge.base.dataflow.constraint.TypeConstraint; 4 | import typeforge.utils.Global; 5 | import typeforge.utils.Logging; 6 | 7 | import java.util.*; 8 | 9 | public class SlidingWindowProcessor { 10 | public final TypeConstraint constraint; 11 | public final List offsetList; 12 | 13 | private int windowCapacity; 14 | private int flattenCnt; 15 | 16 | public SlidingWindowProcessor(TypeConstraint constraint, List offsetList, int initialWindowCapacity) { 17 | this.constraint = constraint; 18 | this.offsetList = offsetList; 19 | this.windowCapacity = initialWindowCapacity; 20 | } 21 | 22 | public Optional tryMatchingFromCurrentOffset(int curOffsetIndex, final int threshold) { 23 | Optional windowOpt = getWindowAtOffset(curOffsetIndex); 24 | if (windowOpt.isEmpty()) { 25 | return Optional.empty(); 26 | } 27 | 28 | var window = windowOpt.get(); 29 | int matchCount = 1; 30 | int alignedWindowSize = window.getAlignedWindowSize(); 31 | long prevWindowStartOffset = offsetList.get(curOffsetIndex); 32 | var prevWindow = window; 33 | 34 | for (int i = curOffsetIndex + windowCapacity; i < offsetList.size(); i += windowCapacity) { 35 | Optional candidateWindowOpt = getWindowAtOffset(i); 36 | if (candidateWindowOpt.isEmpty()) { 37 | break; 38 | } 39 | 40 | var candidateWindow = candidateWindowOpt.get(); 41 | if (window.equals(candidateWindow)) { 42 | if ((offsetList.get(i) - prevWindowStartOffset) == alignedWindowSize) { 43 | matchCount++; 44 | prevWindowStartOffset = offsetList.get(i); 45 | prevWindow = candidateWindow; 46 | } else { 47 | Logging.debug("SlidingWindowProcessor", "Window equal but not contiguous of Skeleton " + constraint); 48 | Logging.debug("SlidingWindowProcessor", 49 | String.format("Previous Window:\nStart: 0x%x\n%s", prevWindowStartOffset, prevWindow)); 50 | Logging.debug("SlidingWindowProcessor", 51 | String.format("Current Window:\nStart: 0x%x\n%s", offsetList.get(i), candidateWindow)); 52 | break; 53 | } 54 | } else { 55 | break; 56 | } 57 | } 58 | 59 | if (matchCount >= threshold) { 60 | flattenCnt = matchCount; 61 | return Optional.of(window); 62 | } else { 63 | return Optional.empty(); 64 | } 65 | } 66 | 67 | public void setWindowCapacity(int newWindowCapacity) { 68 | this.windowCapacity = newWindowCapacity; 69 | } 70 | 71 | public void resetFlattenCnt() { 72 | flattenCnt = 0; 73 | } 74 | 75 | public int getFlattenCount() { 76 | return flattenCnt; 77 | } 78 | 79 | private Optional getWindowAtOffset(int startIndex) { 80 | if (startIndex + windowCapacity > offsetList.size()) { 81 | return Optional.empty(); 82 | } 83 | 84 | var startOffset = offsetList.get(startIndex); 85 | 86 | /* We don't consider windows with only one element if the element is a pointer */ 87 | if (windowCapacity == 1 && 88 | (constraint.innerSkeleton.fieldAccess.get(startOffset).mostAccessedDT.getLength() == Global.currentProgram.getDefaultPointerSize())) { 89 | return Optional.empty(); 90 | } 91 | 92 | var window = new Window(); 93 | 94 | long prevOffset = -1; 95 | 96 | for (int i = 0; i < windowCapacity; i++) { 97 | var currentOffset = offsetList.get(startIndex + i); 98 | if (constraint.isInconsistentOffset(currentOffset)) { 99 | return Optional.empty(); 100 | } 101 | if (constraint.hasFinalNestedConstraint() && constraint.isInNestedRange(currentOffset)) { 102 | return Optional.empty(); 103 | } 104 | 105 | Object element = null; 106 | if (constraint.finalPtrReference.containsKey(currentOffset)) { 107 | element = constraint.finalPtrReference.get(currentOffset); 108 | } else { 109 | element = constraint.innerSkeleton.fieldAccess.get(currentOffset); 110 | } 111 | 112 | var relativeOffset = currentOffset.intValue() - startOffset.intValue(); 113 | window.addElement(relativeOffset, element); 114 | if (element instanceof TypeConstraint) { 115 | window.addPtrLevel(relativeOffset, constraint.ptrLevel.get(currentOffset) != null ? constraint.ptrLevel.get(currentOffset) : 1); 116 | } 117 | 118 | prevOffset = currentOffset; 119 | } 120 | 121 | /* Check if all the elements in the window are of the same type (excluded capacity 1) */ 122 | if (window.isHomogeneous() || (!window.isContiguous())) { 123 | return Optional.empty(); 124 | } 125 | 126 | return Optional.of(window); 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /src/main/java/typeforge/base/passes/Window.java: -------------------------------------------------------------------------------- 1 | package typeforge.base.passes; 2 | 3 | import typeforge.base.dataflow.AccessPoints; 4 | import typeforge.base.dataflow.constraint.TypeConstraint; 5 | import typeforge.utils.DataTypeHelper; 6 | import typeforge.utils.Global; 7 | import ghidra.program.model.data.DataType; 8 | 9 | import java.util.Map; 10 | import java.util.TreeMap; 11 | 12 | public class Window { 13 | private final Map windowElements; 14 | private int windowSize; 15 | private final Map ptrLevel; 16 | 17 | public Window() { 18 | this.windowElements = new TreeMap<>(); 19 | this.windowSize = 0; 20 | this.ptrLevel = new TreeMap<>(); 21 | } 22 | 23 | public void addElement(int offset, Object element) { 24 | windowElements.put(offset, element); 25 | } 26 | 27 | public void addPtrLevel(int offset, int level) { 28 | ptrLevel.put(offset, level); 29 | } 30 | 31 | /** 32 | * Get the Aligned Window's Size 33 | * @return aligned window's size 34 | */ 35 | public int getAlignedWindowSize() { 36 | if (windowSize != 0) { 37 | return windowSize; 38 | } 39 | 40 | long totalSize = 0; 41 | long maxAlignSize = 1; 42 | for (var element: windowElements.values()) { 43 | long fieldSize; 44 | long fieldAlignSize = 1; 45 | if (element instanceof TypeConstraint) { 46 | fieldSize = Global.currentProgram.getDefaultPointerSize(); 47 | fieldAlignSize = fieldSize; 48 | } 49 | else { 50 | fieldSize = ((AccessPoints.APSet) element).mostAccessedDT.getLength(); 51 | fieldAlignSize = ((AccessPoints.APSet) element).mostAccessedDT.getAlignment(); 52 | } 53 | 54 | if (totalSize % fieldAlignSize != 0) { 55 | totalSize += fieldAlignSize - (totalSize % fieldAlignSize); 56 | } 57 | 58 | totalSize += fieldSize; 59 | if (fieldAlignSize > maxAlignSize) { 60 | maxAlignSize = fieldAlignSize; 61 | } 62 | } 63 | 64 | if (totalSize % maxAlignSize != 0) { 65 | totalSize += maxAlignSize - (totalSize % maxAlignSize); 66 | } 67 | 68 | windowSize = (int) totalSize; 69 | return windowSize; 70 | } 71 | 72 | public DataType getWindowDT() { 73 | if (windowElements.size() == 1) { 74 | var element = windowElements.get(0); 75 | if (element instanceof AccessPoints.APSet apSet) { 76 | return apSet.mostAccessedDT; 77 | } else { 78 | return DataTypeHelper.getDataTypeByName("void"); 79 | } 80 | } 81 | else if (windowElements.size() > 1) { 82 | return DataTypeHelper.createAnonStructureFromWindow(this); 83 | } else { 84 | return null; 85 | } 86 | } 87 | 88 | public Map getWindowElements() { 89 | return windowElements; 90 | } 91 | 92 | public Map getPtrLevel() { 93 | return ptrLevel; 94 | } 95 | 96 | public boolean isContiguous() { 97 | if (windowElements.size() == 1) { 98 | return true; 99 | } 100 | 101 | int previousEndOffset = 0; 102 | /* Check if the window is contiguous by element's aligned size */ 103 | for (var entry: windowElements.entrySet()) { 104 | int offset = entry.getKey(); 105 | Object element = entry.getValue(); 106 | int fieldSize; 107 | int fieldAlignSize = 1; 108 | 109 | if (element instanceof TypeConstraint) { 110 | fieldSize = Global.currentProgram.getDefaultPointerSize(); 111 | fieldAlignSize = fieldSize; 112 | } else { 113 | fieldSize = ((AccessPoints.APSet) element).mostAccessedDT.getLength(); 114 | fieldAlignSize = ((AccessPoints.APSet) element).mostAccessedDT.getAlignment(); 115 | } 116 | 117 | if (previousEndOffset % fieldAlignSize != 0) { 118 | previousEndOffset += fieldAlignSize - (previousEndOffset % fieldAlignSize); 119 | } 120 | 121 | if (offset != previousEndOffset) { 122 | return false; 123 | } 124 | 125 | previousEndOffset += fieldSize; 126 | } 127 | 128 | return true; 129 | } 130 | 131 | 132 | public boolean isHomogeneous() { 133 | if (windowElements.size() == 1) { 134 | return false; 135 | } 136 | 137 | var firstElement = windowElements.get(0); 138 | if (firstElement instanceof TypeConstraint skt) { 139 | for (var element: windowElements.values()) { 140 | if (!(element instanceof TypeConstraint) || !element.equals(skt)) { 141 | return false; 142 | } 143 | } 144 | return true; 145 | } else if (firstElement instanceof AccessPoints.APSet apSet) { 146 | for (var element: windowElements.values()) { 147 | if (!(element instanceof AccessPoints.APSet)) { 148 | return false; 149 | } 150 | var otherAPSet = (AccessPoints.APSet) element; 151 | if (apSet.DTSize != otherAPSet.DTSize) { 152 | return false; 153 | } 154 | } 155 | return true; 156 | } 157 | 158 | return false; 159 | } 160 | 161 | @Override 162 | public String toString() { 163 | StringBuilder sb = new StringBuilder(); 164 | for (var entry: windowElements.entrySet()) { 165 | int offset = entry.getKey(); 166 | Object element = entry.getValue(); 167 | sb.append(String.format("0x%x", offset)).append(": "); 168 | if (element instanceof TypeConstraint skt) { 169 | sb.append(skt.toString()); 170 | } else if (element instanceof AccessPoints.APSet apSet) { 171 | sb.append(apSet.mostAccessedDT.getName()); 172 | } 173 | sb.append("\n"); 174 | } 175 | sb.append("Size: ").append(getAlignedWindowSize()); 176 | return sb.toString(); 177 | } 178 | 179 | @Override 180 | public boolean equals(Object other) { 181 | if (!(other instanceof Window otherWindow)) { 182 | return false; 183 | } 184 | 185 | if (windowElements.size() != otherWindow.windowElements.size()) { 186 | return false; 187 | } 188 | if (!windowElements.keySet().equals(otherWindow.windowElements.keySet())) { 189 | return false; 190 | } 191 | for (var entry: windowElements.entrySet()) { 192 | int offset = entry.getKey(); 193 | Object e1 = entry.getValue(); 194 | Object e2 = otherWindow.windowElements.get(offset); 195 | 196 | if (e1 instanceof TypeConstraint && e2 instanceof TypeConstraint) { 197 | if (!e1.equals(e2)) { 198 | return false; 199 | } 200 | } else if (e1 instanceof AccessPoints.APSet s1 && e2 instanceof AccessPoints.APSet s2) { 201 | if (s1.DTSize != s2.DTSize) { 202 | return false; 203 | } 204 | } else { 205 | return false; 206 | } 207 | } 208 | return true; 209 | } 210 | 211 | } 212 | -------------------------------------------------------------------------------- /src/main/java/typeforge/utils/FunctionHelper.java: -------------------------------------------------------------------------------- 1 | package typeforge.utils; 2 | 3 | import ghidra.program.model.address.Address; 4 | import ghidra.program.model.listing.Function; 5 | import ghidra.util.task.TaskMonitor; 6 | 7 | import java.util.HashSet; 8 | import java.util.Set; 9 | 10 | public class FunctionHelper { 11 | 12 | /** 13 | * Check if the function is the entry(main) function. 14 | * @param func the function to check 15 | * @return true if the function is the main function 16 | */ 17 | public static boolean isMainFunction(Function func) { 18 | if (func.getName().equals("main")) { 19 | return true; 20 | } 21 | // if stripped, the caller function is _start 22 | if (isNormalFunction(func)) { 23 | var callers = func.getCallingFunctions(TaskMonitor.DUMMY); 24 | for (var caller : callers) { 25 | if (caller.getName().equals("_start")) { 26 | return true; 27 | } 28 | } 29 | } 30 | return false; 31 | } 32 | 33 | /** 34 | * Check if the function is a normal function, which is not external and not thunk. 35 | * @param func the function to check 36 | * @return true if the function is normal 37 | */ 38 | public static boolean isNormalFunction(Function func) { 39 | return !func.isExternal() && !func.isThunk(); 40 | } 41 | 42 | /** 43 | * Check if the function is a trivial function, which should not be seen 44 | * as a root node of a call graph. 45 | * @param func the Function to check 46 | * @return true if the Function is trivial 47 | */ 48 | public static boolean isTrivialFunction(Function func) { 49 | Set forbiddenName = Set.of("_init", "_start", "_fini", "__do_global_dtors_aux", 50 | "frame_dummy", "deregister_tm_clones", "register_tm_clones", "ck_assert_failed"); 51 | return forbiddenName.contains(func.getName()); 52 | } 53 | 54 | /** 55 | * Check if the function is a meaningful function. 56 | * @param func the function to check 57 | * @return true if the function is meaningful 58 | */ 59 | public static boolean isMeaningfulFunction(Function func) { 60 | return isNormalFunction(func) && !isTrivialFunction(func); 61 | } 62 | 63 | /** 64 | * Get all meaningful functions in the current program. 65 | * A meaningful function is a normal function which is not trivial. 66 | * @return the set of meaningful functions 67 | */ 68 | public static Set getMeaningfulFunctions() { 69 | Set meaningfulFunctions = new HashSet<>(); 70 | for (var func : Global.currentProgram.getListing().getFunctions(true)) { 71 | if (isMeaningfulFunction(func)) { 72 | meaningfulFunctions.add(func); 73 | } 74 | } 75 | return meaningfulFunctions; 76 | } 77 | 78 | 79 | /** 80 | * This is a stupid function, but we have to do this. 81 | * Because ghidra's `getCallingFunctions()` and `getCalledFunctions()` may not work correctly. 82 | * For Example: 83 | * If function B is not called by function A, but function B's ptr is used in function A, then ghidra will 84 | * consider function A as a caller of function B when using `getCallingFunctions()` methods. And consider 85 | * function B as a callee of function A when using `getCalledFunctions()` methods. 86 | *

87 | * So some function can be seen as a root node, but failed to pass the check of `getCallingFunctions().isEmpty()`. 88 | * We need to check and complete these root nodes. 89 | * 90 | * @return if the function has no direct caller in the whole program 91 | */ 92 | public static boolean confirmNoDirectCaller(Function func) { 93 | boolean noCaller = true; 94 | 95 | for (var caller : func.getCallingFunctions(TaskMonitor.DUMMY)) { 96 | var callerInsts = Global.currentProgram.getListing().getInstructions(caller.getBody(), true); 97 | for (var inst : callerInsts) { 98 | if (inst.getMnemonicString().equals("CALL")) { 99 | var instFlows = inst.getFlows(); 100 | if (instFlows.length >= 1) { 101 | for (var flow : instFlows) { 102 | Function calledFunc = Global.currentProgram.getFunctionManager().getFunctionAt(flow); 103 | if (calledFunc != null && calledFunc.equals(func)) { 104 | noCaller = false; 105 | return noCaller; 106 | } 107 | } 108 | } 109 | } 110 | } 111 | } 112 | 113 | return noCaller; 114 | } 115 | 116 | 117 | public static Address getAddress(long offset) { 118 | return Global.currentProgram.getAddressFactory().getDefaultAddressSpace().getAddress(offset); 119 | } 120 | 121 | public static Function getFunction(long offset) { 122 | return Global.currentProgram.getFunctionManager().getFunctionAt(getAddress(offset)); 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /src/main/java/typeforge/utils/Global.java: -------------------------------------------------------------------------------- 1 | package typeforge.utils; 2 | 3 | import ghidra.app.script.GhidraScript; 4 | import ghidra.program.model.listing.Program; 5 | import ghidra.program.flatapi.FlatProgramAPI; 6 | /** 7 | * The global state of the current analysis. 8 | */ 9 | public class Global { 10 | public static Program currentProgram; 11 | public static FlatProgramAPI flatAPI; 12 | public static GhidraScript ghidraScript; 13 | public static String outputDirectory; 14 | public static long startAddress; 15 | 16 | public static long typeAnalysisBeginTime; 17 | public static long typeAnalysisEndTime; 18 | public static long retypingBeginTime; 19 | public static long retypingEndTime; 20 | public static long prepareAnalysisBeginTime; 21 | public static long prepareAnalysisEndTime; 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/typeforge/utils/GraphHelper.java: -------------------------------------------------------------------------------- 1 | package typeforge.utils; 2 | 3 | import typeforge.base.node.DataTypeNode; 4 | import typeforge.base.node.NodeBase; 5 | import typeforge.base.graph.SDGraph; 6 | import ghidra.program.model.data.DataType; 7 | 8 | import java.io.BufferedWriter; 9 | import java.io.IOException; 10 | import java.nio.file.Files; 11 | import java.nio.file.Paths; 12 | import java.util.Set; 13 | 14 | public class GraphHelper { 15 | /** 16 | * Dump the SDGraph to a dot file 17 | */ 18 | public static void dumpSDGraph(SDGraph sdg, String filename) { 19 | StringBuilder dotBuilder = new StringBuilder(); 20 | 21 | Set> allNodes = sdg.getAllNodes(); 22 | Set allEdges = sdg.getAllEdges(); 23 | 24 | dotBuilder.append("digraph SDGraph {\n"); 25 | 26 | // traverse all nodes 27 | for (var node : allNodes) { 28 | if (node instanceof DataTypeNode dtn) { 29 | String nodeID = "node" + dtn.id; 30 | String nodeLabel = dtn.value.getName(); 31 | dotBuilder.append( 32 | String.format( 33 | "%s [label=\"%s\"];\n", 34 | nodeID, 35 | nodeLabel 36 | ) 37 | ); 38 | } 39 | } 40 | 41 | // traverse all edges 42 | for (var edge : allEdges) { 43 | String srcNodeID = "node" + edge.srcNode.id; 44 | String dstNodeID = "node" + edge.dstNode.id; 45 | String edgeType = edge.edgeType.toString(); 46 | String edgeLabel = String.format("Offset %s: %s", Integer.toHexString(edge.offset), edgeType); 47 | dotBuilder.append( 48 | String.format( 49 | "%s -> %s [label=\"%s\"];\n", 50 | srcNodeID, 51 | dstNodeID, 52 | edgeLabel 53 | ) 54 | ); 55 | } 56 | 57 | dotBuilder.append("}\n"); 58 | 59 | try (BufferedWriter writer = Files.newBufferedWriter(Paths.get(filename))) { 60 | writer.write(dotBuilder.toString()); 61 | } catch (IOException e) { 62 | Logging.error("GraphHelper", "Failed to write to file: " + filename); 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/main/java/typeforge/utils/HighSymbolHelper.java: -------------------------------------------------------------------------------- 1 | package typeforge.utils; 2 | 3 | import ghidra.program.model.address.Address; 4 | import ghidra.program.model.pcode.HighSymbol; 5 | 6 | public class HighSymbolHelper { 7 | 8 | public static Address getGlobalHighSymbolAddr(HighSymbol globalSym) { 9 | assert globalSym.isGlobal(); 10 | return globalSym.getStorage().getMinAddress(); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/main/java/typeforge/utils/Logging.java: -------------------------------------------------------------------------------- 1 | package typeforge.utils; 2 | 3 | import java.io.IOException; 4 | import java.io.InputStream; 5 | 6 | import org.apache.logging.log4j.LogManager; 7 | import org.apache.logging.log4j.Logger; 8 | import org.apache.logging.log4j.core.LoggerContext; 9 | import org.apache.logging.log4j.core.config.Configuration; 10 | import org.apache.logging.log4j.core.config.ConfigurationSource; 11 | import org.apache.logging.log4j.core.config.xml.XmlConfiguration; 12 | 13 | /** 14 | * Logging class. 15 | */ 16 | public class Logging { 17 | 18 | private static final String DEFAULT_LOGGER_NAME = "TypeForge"; 19 | private static final String DEFAULT_CONFIG_FILE_PATH = "/log4j2_default.xml"; 20 | private static Logger defaultLogger; 21 | 22 | /** 23 | * Initialize the logging module. 24 | * @return true if init success, false otherwise. 25 | */ 26 | public static boolean init() { 27 | InputStream in = Logging.class.getResourceAsStream(DEFAULT_CONFIG_FILE_PATH); 28 | try { 29 | assert in != null; 30 | Configuration configuration = new XmlConfiguration(new LoggerContext(DEFAULT_LOGGER_NAME), 31 | new ConfigurationSource(in)); 32 | LoggerContext context = (LoggerContext) LogManager.getContext(true); 33 | context.stop(); 34 | context.start(configuration); 35 | defaultLogger = context.getLogger(DEFAULT_LOGGER_NAME); 36 | } catch (IOException e) { 37 | System.out.println("Cannot locate logging config file :" + in); 38 | return false; 39 | } 40 | return true; 41 | } 42 | 43 | /** 44 | * Generate an error log. 45 | * @param msg the log message. 46 | */ 47 | public static void error(String prefix, String msg) { 48 | defaultLogger.error("[{}] - {}", prefix, msg); 49 | } 50 | 51 | /** 52 | * Generate a warning log. 53 | * @param msg the log message. 54 | */ 55 | public static void warn(String prefix, String msg) { 56 | defaultLogger.warn("[{}] - {}", prefix, msg); 57 | } 58 | 59 | /** 60 | * Generate a info log. 61 | * @param msg the log message. 62 | */ 63 | public static void info(String prefix, String msg) { 64 | defaultLogger.info("[{}] - {}", prefix, msg); 65 | } 66 | 67 | /** 68 | * Generate a debug log 69 | * @param msg the debug log. 70 | */ 71 | public static void debug(String prefix, String msg) { 72 | defaultLogger.debug("[{}] - {}", prefix, msg); 73 | } 74 | 75 | /** 76 | * Generate a trace log 77 | * @param msg the trace log. 78 | */ 79 | public static void trace(String prefix, String msg) { 80 | defaultLogger.trace("[{}] - {}", prefix, msg); 81 | } 82 | } -------------------------------------------------------------------------------- /src/main/java/typeforge/utils/TCHelper.java: -------------------------------------------------------------------------------- 1 | package typeforge.utils; 2 | 3 | import ghidra.program.model.data.DataType; 4 | import typeforge.base.dataflow.constraint.Skeleton; 5 | 6 | import java.util.*; 7 | 8 | public class TCHelper { 9 | public static class Interval { 10 | final long start; 11 | final long end; 12 | 13 | Interval(long start, long end) { 14 | this.start = start; 15 | this.end = end; 16 | } 17 | 18 | public boolean inInterval(long offset) { 19 | return offset > start && offset < end; 20 | } 21 | 22 | @Override 23 | public boolean equals(Object obj) { 24 | if (obj instanceof Interval) { 25 | return this.start == ((Interval) obj).start && this.end == ((Interval) obj).end; 26 | } 27 | return false; 28 | } 29 | 30 | @Override 31 | public int hashCode() { 32 | return Objects.hash(start, end); 33 | } 34 | } 35 | 36 | /** 37 | * If a field's start in other field's interval, then return true 38 | * If two fields have same start, but one field's end is larger than other field's start, then return true 39 | * @return if overlap occurs 40 | */ 41 | public static boolean checkFieldOverlapStrict(Skeleton a, Skeleton b) { 42 | var aIntervals = buildIntervals(a); 43 | var bIntervals = buildIntervals(b); 44 | for (var aI: aIntervals) { 45 | for (var bI: bIntervals) { 46 | if (aI.inInterval(bI.start) || bI.inInterval(aI.start)) { 47 | return true; 48 | } 49 | 50 | if (aI.start == bI.start) { 51 | var aNI = getNextLargerInterval(aI, aIntervals); 52 | if (aNI != null && bI.end > aNI.start) { 53 | return true; 54 | } 55 | 56 | var bNI = getNextLargerInterval(bI, bIntervals); 57 | if (bNI != null && aI.end > bNI.start) { 58 | return true; 59 | } 60 | } 61 | } 62 | } 63 | return false; 64 | } 65 | 66 | // TODO: not very expected ... since (0,4) maybe (0,2) + (2,4), and this is still overlap 67 | public static boolean checkFieldOverlapRelax(Skeleton a, Skeleton b) { 68 | var aIntervals = buildIntervalWithMostAccessed(a); 69 | var bIntervals = buildIntervalWithMostAccessed(b); 70 | for (var aI: aIntervals) { 71 | for (var bI: bIntervals) { 72 | if (aI.inInterval(bI.start) || bI.inInterval(aI.start)) { 73 | return true; 74 | } 75 | 76 | if (aI.start == bI.start) { 77 | var aNI = getNextLargerInterval(aI, aIntervals); 78 | if (aNI != null && bI.end > aNI.start) { 79 | return true; 80 | } 81 | 82 | var bNI = getNextLargerInterval(bI, bIntervals); 83 | if (bNI != null && aI.end > bNI.start) { 84 | return true; 85 | } 86 | } 87 | } 88 | } 89 | return false; 90 | } 91 | 92 | 93 | // public static boolean checkFieldSizeInConsistent(TypeConstraint a, TypeConstraint b) { 94 | // if (a == b) { 95 | // return false; 96 | // } 97 | // Set thisIntervals = new HashSet<>(); 98 | // for (var offset : a.fieldAccess.keySet()) { 99 | // long endOffset = calcFieldEndOffset(a, offset); 100 | // thisIntervals.add(new Interval(offset, endOffset)); 101 | // } 102 | // 103 | // Set otherIntervals = new HashSet<>(); 104 | // for (var offset : b.fieldAccess.keySet()) { 105 | // long endOffset = calcFieldEndOffset(b, offset); 106 | // otherIntervals.add(new Interval(offset, endOffset)); 107 | // } 108 | // 109 | // Set commonIntervals = new HashSet<>(thisIntervals); 110 | // commonIntervals.retainAll(otherIntervals); 111 | // 112 | // thisIntervals.removeAll(commonIntervals); 113 | // otherIntervals.removeAll(commonIntervals); 114 | // 115 | // if (thisIntervals.isEmpty() || otherIntervals.isEmpty()) { 116 | // return false; 117 | // } 118 | // 119 | // List mergedIntervals = new ArrayList<>(thisIntervals); 120 | // mergedIntervals.addAll(otherIntervals); 121 | // mergedIntervals.sort(Comparator.comparingLong(interval -> interval.start)); 122 | // for (int i = 0; i < mergedIntervals.size() - 1; i++) { 123 | // Interval current = mergedIntervals.get(i); 124 | // Interval next = mergedIntervals.get(i + 1); 125 | // if (current.end > next.start) { 126 | // return true; 127 | // } 128 | // } 129 | // return false; 130 | // } 131 | 132 | public static ArrayList buildIntervals(Skeleton a) { 133 | ArrayList intervals = new ArrayList<>(); 134 | for (var offset : a.fieldAccess.keySet()) { 135 | for (var endOffset : calcFieldEndOffset(a, offset)) { 136 | intervals.add(new Interval(offset, endOffset)); 137 | } 138 | } 139 | return intervals; 140 | } 141 | 142 | public static ArrayList buildIntervalWithMostAccessed(Skeleton a) { 143 | ArrayList intervals = new ArrayList<>(); 144 | 145 | for (var offset : a.fieldAccess.keySet()) { 146 | var aps = a.fieldAccess.get(offset); 147 | if (aps == null || aps.getApSet().isEmpty()) { 148 | continue; 149 | } 150 | 151 | var typeFreq = aps.getTypeFreq(); 152 | DataType mostAccessedType = null; 153 | int maxAccess = 0; 154 | 155 | for (var entry : typeFreq.entrySet()) { 156 | if (entry.getValue() > maxAccess) { 157 | maxAccess = entry.getValue(); 158 | mostAccessedType = entry.getKey(); 159 | } 160 | } 161 | 162 | if (mostAccessedType != null) { 163 | long endOffset = offset + mostAccessedType.getLength(); 164 | intervals.add(new Interval(offset, endOffset)); 165 | } 166 | } 167 | 168 | return intervals; 169 | } 170 | 171 | public static Interval getNextLargerInterval(Interval cur, List intervals) { 172 | for (var interval: intervals) { 173 | if (interval.start >= cur.end) { 174 | return interval; 175 | } 176 | } 177 | return null; 178 | } 179 | 180 | 181 | public static Set calcFieldEndOffset(Skeleton a, Long offset) { 182 | Set ends = new TreeSet<>(); 183 | var fields = a.fieldAccess.get(offset); 184 | if (fields == null) { 185 | return ends; 186 | } 187 | 188 | for (var ap : fields.getApSet()) { 189 | if (ap.dataType != null) { 190 | ends.add(offset + ap.dataType.getLength()); 191 | } 192 | } 193 | return ends; 194 | } 195 | } 196 | -------------------------------------------------------------------------------- /src/main/resources/log4j2_default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | > 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /src/test/java/README.test.txt: -------------------------------------------------------------------------------- 1 | The "test" directory is intended to hold unit test cases. The package structure within 2 | this folder should correspond to that found in the "src" folder. 3 | -------------------------------------------------------------------------------- /src/test/java/typeforge/base/dataflow/NMAETest.java: -------------------------------------------------------------------------------- 1 | package typeforge.base.dataflow; 2 | 3 | import static org.mockito.Mockito.*; 4 | 5 | import typeforge.utils.Logging; 6 | import ghidra.program.model.listing.Function; 7 | import ghidra.program.model.pcode.HighFunction; 8 | import ghidra.program.model.pcode.HighSymbol; 9 | import org.junit.jupiter.api.BeforeEach; 10 | import org.junit.jupiter.api.Test; 11 | import org.junit.jupiter.api.extension.ExtendWith; 12 | import org.mockito.Mock; 13 | import org.mockito.junit.jupiter.MockitoExtension; 14 | 15 | @ExtendWith(MockitoExtension.class) 16 | public class NMAETest { 17 | @Mock 18 | private HighSymbol mockHighSymbol1; 19 | @Mock 20 | private HighSymbol mockHighSymbol2; 21 | @Mock 22 | private HighSymbol mockHighSymbol3; 23 | @Mock 24 | private HighFunction mockHighFunc; 25 | @Mock 26 | private Function mockFunc; 27 | 28 | @BeforeEach 29 | public void setUp() { 30 | if(!Logging.init()) { 31 | return; 32 | } 33 | when(mockHighSymbol1.getName()).thenReturn("mock_1"); 34 | when(mockHighSymbol2.getName()).thenReturn("mock_2"); 35 | when(mockHighSymbol3.getName()).thenReturn("mock_3"); 36 | when(mockHighSymbol1.getHighFunction()).thenReturn(mockHighFunc); 37 | when(mockHighSymbol2.getHighFunction()).thenReturn(mockHighFunc); 38 | when(mockHighSymbol3.getHighFunction()).thenReturn(mockHighFunc); 39 | when(mockHighFunc.getFunction()).thenReturn(mockFunc); 40 | when(mockFunc.getName()).thenReturn("mock_func"); 41 | } 42 | 43 | @Test 44 | public void test() { 45 | // var expr1 = new SymbolExpr.Builder() 46 | // .rootSymbol(mockHighSymbol1) 47 | // .build(); 48 | // 49 | // var expr2 = new SymbolExpr.Builder() 50 | // .rootSymbol(mockHighSymbol2) 51 | // .build(); 52 | // 53 | // var expr3 = new SymbolExpr.Builder() 54 | // .rootSymbol(mockHighSymbol3) 55 | // .build(); 56 | // 57 | // 58 | // assertEquals(expr1.getRepresentation(), "mock_1"); 59 | // assertEquals(expr2.getRepresentation(), "mock_2"); 60 | // assertEquals(expr3.getRepresentation(), "mock_3"); 61 | // 62 | // var expr4 = new SymbolExpr.Builder().constant(0x8).build(); 63 | // var expr5 = new SymbolExpr.Builder().constant(0x10).build(); 64 | // var expr6 = new SymbolExpr.Builder().constant(0x18).build(); 65 | // 66 | // assertEquals(expr4.getRepresentation(), "0x8"); 67 | // assertEquals(expr5.getRepresentation(), "0x10"); 68 | // assertEquals(expr6.getRepresentation(), "0x18"); 69 | 70 | // var expr7 = expr1.add(expr4); 71 | // var expr8 = expr2.add(expr5); 72 | // var expr9 = expr3.add(expr6); 73 | // assertEquals(expr7.getRepresentation(), "mock_1 + 0x8"); 74 | // assertEquals(expr8.getRepresentation(), "mock_2 + 0x10"); 75 | // assertEquals(expr9.getRepresentation(), "mock_3 + 0x18"); 76 | // 77 | // var expr10 = expr7.dereference(); 78 | // var expr11 = expr8.dereference(); 79 | // var expr12 = expr9.dereference(); 80 | // var expr13 = expr1.dereference(); 81 | // var expr14 = expr12.dereference(); 82 | // assertEquals(expr10.getRepresentation(), "*(mock_1 + 0x8)"); 83 | // assertEquals(expr11.getRepresentation(), "*(mock_2 + 0x10)"); 84 | // assertEquals(expr12.getRepresentation(), "*(mock_3 + 0x18)"); 85 | // assertEquals(expr13.getRepresentation(), "*(mock_1)"); 86 | // assertEquals(expr14.getRepresentation(), "*(*(mock_3 + 0x18))"); 87 | // 88 | // var expr15 = expr12.add(expr4); 89 | // var expr16 = expr14.add(expr6); 90 | // var expr17 = expr16.add(expr6); 91 | // assertEquals(expr15.getRepresentation(), "*(mock_3 + 0x18) + 0x8"); 92 | // assertEquals(expr16.getRepresentation(), "*(*(mock_3 + 0x18)) + 0x18"); 93 | // assertEquals(expr17.getRepresentation(), "*(*(mock_3 + 0x18)) + 0x30"); 94 | // 95 | // var expr18 = expr17.add(expr3); 96 | // assertEquals(expr18.getRepresentation(), "*(*(mock_3 + 0x18)) + mock_3 + 0x30"); 97 | } 98 | } -------------------------------------------------------------------------------- /src/test/java/typeforge/base/dataflow/types/LayoutTest.java: -------------------------------------------------------------------------------- 1 | package typeforge.base.dataflow.types; 2 | 3 | import org.junit.jupiter.api.Test; 4 | import typeforge.base.dataflow.Layout; 5 | 6 | import java.util.ArrayList; 7 | import java.util.HashSet; 8 | import java.util.Set; 9 | 10 | public class LayoutTest { 11 | @Test 12 | public void test() { 13 | Set sizes1 = new HashSet<>(); 14 | sizes1.add(1); 15 | sizes1.add(2); 16 | sizes1.add(3); 17 | 18 | Set sizes2 = new HashSet<>(); 19 | sizes2.add(3); 20 | sizes2.add(2); 21 | sizes2.add(1); 22 | 23 | Set sizes3 = new HashSet<>(); 24 | sizes3.add(0x10); 25 | 26 | var interval1 = new Layout.Interval(10L, sizes1); 27 | var interval2 = new Layout.Interval(10L, sizes2); 28 | var interval3 = new Layout.Interval(0, sizes3); 29 | 30 | assert sizes1.equals(sizes2); 31 | assert interval1.equals(interval2); 32 | assert interval1.hashCode() == interval2.hashCode(); 33 | assert !interval1.equals(interval3); 34 | 35 | var intervals = new ArrayList(); 36 | intervals.add(interval1); 37 | intervals.add(interval3); 38 | var layout1 = new Layout(intervals); 39 | 40 | intervals = new ArrayList(); 41 | intervals.add(interval3); 42 | intervals.add(interval1); 43 | var layout2 = new Layout(intervals); 44 | 45 | intervals = new ArrayList(); 46 | intervals.add(interval1); 47 | intervals.add(interval3); 48 | var layout3 = new Layout(intervals); 49 | 50 | assert !layout1.equals(layout2); 51 | assert layout1.equals(layout3); 52 | assert layout1.hashCode() == layout3.hashCode(); 53 | } 54 | } 55 | --------------------------------------------------------------------------------