├── .gitignore
├── DEVELOPING.md
├── LICENSE
├── Module.manifest
├── README.md
├── assets
    └── IntelliJ-Ghidra-0.5.0.zip
├── build.gradle
├── data
    ├── README.txt
    ├── buildLanguage.xml
    ├── languages
    │   ├── skel.cspec
    │   ├── skel.ldefs
    │   ├── skel.opinion
    │   ├── skel.pspec
    │   ├── skel.sinc
    │   └── skel.slaspec
    └── sleighArgs.txt
├── demo
    ├── README.md
    ├── TypeConstraint_1539724c_global_morph.json
    ├── TypeConstraint_746192c2_range_morph.json
    ├── TypeConstraint_f2b22cd2_final.json
    ├── TypeConstraint_f8591481_final_DI.json
    └── varType.json
├── extension.properties
├── ghidra_scripts
    ├── GroundTruth.java
    └── TypeForge.java
├── gradle
    └── wrapper
    │   ├── gradle-wrapper.jar
    │   └── gradle-wrapper.properties
├── gradlew
├── gradlew.bat
├── imgs
    ├── TypeForge_overview.png
    ├── figure_develop-1.png
    ├── figure_develop-2.png
    ├── figure_develop-3.png
    └── figure_develop-4.png
├── lib
    ├── README.txt
    ├── jackson-annotations-2.13.0.jar
    ├── jackson-core-2.13.0.jar
    ├── jackson-databind-2.13.0.jar
    ├── jgrapht-core-1.5.1.jar
    └── jheaps-0.13.jar
├── os
    ├── linux_x86_64
    │   └── README.txt
    ├── mac_x86_64
    │   └── README.txt
    └── win_x86_64
    │   └── README.txt
├── scripts
    ├── .python-version
    ├── GraphExplorer.py
    ├── GroundTruthExtractor.py
    ├── README.md
    ├── TypeInference.py
    ├── config.yml
    ├── judge
    │   ├── README.md
    │   ├── double_elimination.py
    │   ├── llm.py
    │   └── main.py
    ├── requirements.txt
    └── uv.lock
└── src
    ├── main
        ├── java
        │   └── typeforge
        │   │   ├── analyzer
        │   │       ├── Generator.java
        │   │       ├── ReTyper.java
        │   │       └── TypeAnalyzer.java
        │   │   ├── base
        │   │       ├── dataflow
        │   │       │   ├── AccessPoints.java
        │   │       │   ├── ConflictGraph.java
        │   │       │   ├── KSet.java
        │   │       │   ├── Layout.java
        │   │       │   ├── Range.java
        │   │       │   ├── TFG
        │   │       │   │   ├── TFGManager.java
        │   │       │   │   ├── TypeFlowGraph.java
        │   │       │   │   ├── TypeFlowPath.java
        │   │       │   │   └── TypeFlowPathManager.java
        │   │       │   ├── UnionFind.java
        │   │       │   ├── constraint
        │   │       │   │   ├── SizeSource.java
        │   │       │   │   ├── Skeleton.java
        │   │       │   │   └── TypeConstraint.java
        │   │       │   ├── expression
        │   │       │   │   ├── NMAE.java
        │   │       │   │   ├── NMAEManager.java
        │   │       │   │   └── ParsedExpr.java
        │   │       │   └── solver
        │   │       │   │   ├── ConstPropagator.java
        │   │       │   │   ├── ExternalHandler.java
        │   │       │   │   ├── InterSolver.java
        │   │       │   │   ├── IntraSolver.java
        │   │       │   │   ├── LayoutPropagator.java
        │   │       │   │   ├── PCodeVisitor.java
        │   │       │   │   └── TypeHintCollector.java
        │   │       ├── graph
        │   │       │   ├── CallGraph.java
        │   │       │   ├── GraphBase.java
        │   │       │   └── SDGraph.java
        │   │       ├── node
        │   │       │   ├── CallSite.java
        │   │       │   ├── DataTypeNode.java
        │   │       │   ├── FunctionNode.java
        │   │       │   └── NodeBase.java
        │   │       ├── parallel
        │   │       │   └── PrepareFunctionNodeCallback.java
        │   │       └── passes
        │   │       │   ├── SlidingWindowProcessor.java
        │   │       │   └── Window.java
        │   │   └── utils
        │   │       ├── DataTypeHelper.java
        │   │       ├── DecompilerHelper.java
        │   │       ├── FunctionHelper.java
        │   │       ├── Global.java
        │   │       ├── GraphHelper.java
        │   │       ├── HighSymbolHelper.java
        │   │       ├── Logging.java
        │   │       └── TCHelper.java
        └── resources
        │   └── log4j2_default.xml
    └── test
        └── java
            ├── README.test.txt
            └── typeforge
                └── base
                    └── dataflow
                        ├── NMAETest.java
                        └── types
                            └── LayoutTest.java


/.gitignore:
--------------------------------------------------------------------------------
 1 | build/
 2 | .gradle/
 3 | .idea
 4 | Inferred
 5 | GhidraScriptLog/
 6 | bin/
 7 | TypeForge_GroundTruth/
 8 | TypeForge_Inference/
 9 | .venv
10 | .env
11 | __pycache__/


--------------------------------------------------------------------------------
/DEVELOPING.md:
--------------------------------------------------------------------------------
 1 | # How to Develop
 2 | Writing a simple Ghidra Script is straightforward, but developing a complex Ghidra Extension can be challenging, especially when setting up the initial development environment.  
 3 | Ghidra officially supports **Eclipse** for Extension development. However, since **Eclipse** is not very user-friendly, TypeForge uses **IntelliJ IDEA** and **intellij-ghidra** plugin for development.  
 4 | (Note: The latest version of Ghidra supports plugin development in **VSCode**, but TypeForge has not been tested in this environment.)
 5 | 
 6 | ## Setup
 7 | 1. [intellij-ghidra](https://github.com/garyttierney/intellij-ghidra) is an IDEA plugin that enables developers to work with Ghidra in an integrated environment, providing features such as API completion, compilation, and debugging. Testing has confirmed that IntelliJ IDEA (version **2024.1.7**) can run this plugin properly.
 8 | 2. A version of intellij-ghidra compatible with IDEA (version 2024.1.7) has been pre-compiled and stored in the [assets](./assets/IntelliJ-Ghidra-0.5.0.zip) directory. Open IDEA, click on `File -> Settings -> Plugins`, and choose to install the plugin (the `.zip` file) from local disk.  
 9 | 
10 |     ![install-plugin](./imgs/figure_develop-4.png)
11 | 
12 | 3. Follow the [usage guide](https://github.com/garyttierney/intellij-ghidra) for initial plugin configuration.
13 | 4. Configure the **TypeForge** project, making sure to check the `headless` option. You can specify a log path to save the plugin's output. The specific parameters are consistent with command-line usage parameters (no need to specify the `analyzeHeadless` path again).
14 | 
15 |     ![install-plugin](./imgs/figure_develop-1.png)
16 | 
17 |     ![install-plugin](./imgs/figure_develop-3.png)
18 | 
19 |     ![install-plugin](./imgs/figure_develop-2.png)


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2025 Yanzhong Wang  
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without modification,
 5 | are permitted provided that the following conditions are met:
 6 | 1. Redistributions of source code must retain the above copyright notice, this
 7 |    list of conditions and the following disclaimer.
 8 | 2. Redistributions in binary form must reproduce the above copyright notice,
 9 |    this list of conditions and the following disclaimer in the documentation
10 |    and/or other materials provided with the distribution.
11 | 3. Neither the name of the copyright holder nor the names of its contributors
12 |    may be used to endorse or promote products derived from this software without
13 |    specific prior written permission.
14 | 
15 | If you use this code in academic work, you must:
16 | - Cite the original paper: TypeForge: Synthesizing and Selecting Best-Fit Composite Data Types for Stripped Binaries (DOI: 10.1109/SP61157.2025.00193)
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/Module.manifest:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/Module.manifest


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # TypeForge: Synthesizing and Selecting Best-Fit Composite Data Types for Stripped Binaries
  2 | 
  3 | [![IEEE DOI](https://img.shields.io/badge/S%26P%202025-10.1109%2FSP61157.2025.00193-00629A?logo=ieee&logoColor=00629A&labelColor=E6F2FF)](https://doi.ieeecomputersociety.org/10.1109/SP61157.2025.00193)
  4 | [![CCF-A](https://img.shields.io/badge/CCF_A-Security_%26_Privacy-FFD700?logo=star&logoColor=003A5D)](https://www.ccf.org.cn/Academic_Evaluation/)  
  5 | [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](./LICENSE)
  6 | [![GitHub Stars](https://img.shields.io/github/stars/noobone123/typeforge?style=social)](https://github.com/noobone123/typeforge/stargazers)
  7 | [![Last Commit](https://img.shields.io/github/last-commit/noobone123/typeforge/dev?color=blue&label=last-commit)](https://github.com/noobone123/typeforge)
  8 | 
  9 | > We are continuously maintaining and updating this project, aiming to provide more user-friendly features and higher efficiency.
 10 | 
 11 | This is the implementation of the paper titled "TypeForge: Synthesizing and Selecting Best-Fit Composite Data Types for Stripped Binaries". For more details about TypeForge, please refer to [our S&P 2025 paper](https://www.computer.org/csdl/proceedings-article/sp/2025/223600c847/26hiVajYJwY).
 12 | 
 13 | ## What is TypeForge?
 14 | <div style="float: right; margin: 0 0 10px 20px;">
 15 |     <img src="imgs/TypeForge_overview.png" alt="overview" width="400" />
 16 | </div>
 17 | 
 18 | TypeForge aims to recover composite data types (such as structures, unions, etc.) in stripped binaries. Compared to existing methods, TypeForge provides higher efficiency and accuracy. 
 19 | - TypeForge is divided into **two phases**: a *Program Analysis phase* and an *LLM-assisted Refinement phase*. The first phase is sufficient for common reverse engineering tasks, while the second phase further improves the accuracy of phase one results.
 20 | - TypeForge is currently implemented as a [Ghidra Extension](https://ghidra-sre.org/InstallationGuide.html#GhidraExtensionNotes). We welcome other developers to port it to platforms like [IDA Pro](https://hex-rays.com/ida-pro), [Binary Ninja](https://binary.ninja/), and [Angr](https://github.com/angr/angr).
 21 | 
 22 | 
 23 | ## Project Structure
 24 | 
 25 | ```
 26 | typeforge/                             # Project root
 27 | ├── ...
 28 | ├── build.gradle                       # Gradle build configuration
 29 | ├── extension.properties               # Extension properties
 30 | ├── src/                               # Main Source code of TypeForge
 31 | │   ├── main/java/typeforge            
 32 | │   │   ├── analyzer/                  # Entry functions for various program analyses
 33 | │   │   ├── base/                      # Underlying components for program analysis algorithms
 34 | │   │   │   ├── dataflow/              # Data flow analysis (including data flow abstractions, intra/inter-procedural Solvers)
 35 | │   │   │   ├── graph/                 # CallGraph
 36 | │   │   │   ├── node/                  # Binary functions and CallSites
 37 | │   │   │   ├── parallel/              # Parallel processing Callbacks
 38 | │   │   │   └── passes/                # Passes used for synthesizing possible type declarations
 39 | │   │   └── utils/                     # Other useful functions for binary analysis
 40 | │   └── test/
 41 | ├── ghidra_scripts/                    # Ghidra scripts
 42 | │   ├── TypeForge.java                 # Main TypeForge script
 43 | │   └── GroundTruth.java               # Ground truth extractor (from binaries with debug symbol)
 44 | ├── scripts/                           # Useful Python Scripts
 45 | │   ├── judge/                         # LLM-assisted double elimination process
 46 | │   ├── GraphExplorer.py               # (Debugging purpose) Explore dumped Type Flow Graph
 47 | │   ├── GroundTruthExtractor.py        # Ground truth extractor (wrapper, actually call GroundTruth.java)
 48 | │   └── TypeInference.py               # Type Inference (wrapper, actually call TypeForge.java)
 49 | ├── lib/
 50 | └── ...
 51 | ```
 52 | 
 53 | ## Building and Installing
 54 | ### Building as ghidra extension
 55 | 1. clone this repo
 56 | 
 57 |     ```bash
 58 |     git clone https://github.com/noobone123/TypeForge.git
 59 |     ```
 60 | 2. Install JDK and Ghidra (ghidra version 11.0.3 is tested).   
 61 | download ghidra from [here](https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_11.0.3_build/ghidra_11.0.3_PUBLIC_20240410.zip) and following the ghidra [install instructions](https://github.com/NationalSecurityAgency/ghidra/blob/Ghidra_11.0.3_build/GhidraDocs/InstallationGuide.html).
 62 | 3. Modify `ghidraInstallDir` to **YOUR Ghidra installation directory** in the `build.gradle`.
 63 | 4. build the ghidra extension.
 64 | 
 65 |     ```bash
 66 |     cd TypeForge
 67 |     gradle buildExtension
 68 |     # after building, you will find your extension zip file.
 69 |     ls -alh ./dist/ghidra_11.0.3_PUBLIC_[your-build-time]_TypeForge.zip
 70 |     ```
 71 | 
 72 | ### Installing
 73 | Please refer to the following command to unzip and install the compiled Ghidra Extension.
 74 | 
 75 | ```bash
 76 | cp ./dist/ghidra_11.0.3_PUBLIC_[your-build-time]_TypeForge.zip \
 77 |     [YOUR-Ghidra-Installation-Directory]/Ghidra/Extensions
 78 | cd [YOUR-Ghidra-Installation-Directory]/Ghidra/Extensions
 79 | unzip ghidra_11.0.3_PUBLIC_[your-build-time]_TypeForge.zip
 80 | ```
 81 | 
 82 | ## Getting Started
 83 | ### Type Inference (Headless Mode)
 84 | 
 85 | After installing the TypeForge, for a single stripped binary, just run:
 86 | ```bash
 87 | [YOUR-Ghidra-Installation-Directory]/support/analyzeHeadless \
 88 |     [YOUR-Ghidra-Project-Directory] [YOUR-Project-Name] \
 89 |     -deleteProject -import [YOUR-Stripped-Binary] \
 90 |     -postScript TypeForge.java output=[Your-output-dir]
 91 | ```
 92 | 
 93 | After a while, you will see the Type Inference results (JSON files) saved in `[Your-output-dir]`. For details about these JSON files, please refer to the [demo](./demo/README.md). These JSON files will then be fed into *Phase 2 for refinement*. For more information, please refer to [judge](./scripts/judge/README.md).
 94 | 
 95 | For **batch processing**, please refer to [scripts](./scripts/README.md).
 96 | 
 97 | > We are currently developing additional features to directly import Type Inference results into Ghidra projects.
 98 | 
 99 | > For more information about Ghidra Headless Mode, please refer to [this guide](https://static.grumpycoder.net/pixel/support/analyzeHeadlessREADME.html).
100 | 
101 | ### Extract the Ground Truth
102 | You can also extract the ground truth of composite data types from a binary with debug information (Note that Ghidra currently does **NOT** support Dwarf-5 format debug information, so you need to specify `-gdwarf-4` during compilation).
103 | For more details, please refer to [scripts](./scripts/README.md).
104 | 
105 | ### Run in Ghidra GUI Mode
106 | In development ...
107 | 
108 | ## Developing and Debugging
109 | TypeForge is developed using [IntelliJ IDEA](https://www.jetbrains.com/idea/download/other.html) (version 2024.1.7) and the [intellij-ghidra](https://github.com/garyttierney/intellij-ghidra) plugin. For detailed development guidelines, please refer to [How To Develop](./DEVELOPING.md).
110 | 
111 | ## Contributors
112 | TypeForge is written and maintained by:
113 | - [h1k0](https://github.com/noobone123) h1k0naka@outlook.com
114 | - [liyilin](https://github.com/li-yilin-30) liyilin2023@iie.ac.cn
115 | 
116 | ## Cite
117 | 
118 | If you use `TypeForge` for your academic work, please cite the following paper:
119 | ```
120 | @inproceedings{typeforge,
121 |   title      = {TypeForge: Synthesizing and Selecting Best-Fit Composite Data Types for Stripped Binaries},
122 |   author     = {Wang, Yanzhong and Liang, Ruigang and Li, Yilin and Hu, Peiwei and Chen, Kai and Zhang, Bolun},
123 |   booktitle  = {2025 IEEE Symposium on Security and Privacy (SP)},
124 |   pages      = {2847--2864},
125 |   year       = {2025},
126 |   publisher  = {IEEE Computer Society},
127 |   doi        = {10.1109/SP61157.2025.00193},
128 | }
129 | ```


--------------------------------------------------------------------------------
/assets/IntelliJ-Ghidra-0.5.0.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/assets/IntelliJ-Ghidra-0.5.0.zip


--------------------------------------------------------------------------------
/build.gradle:
--------------------------------------------------------------------------------
 1 | /* ###
 2 |  * IP: GHIDRA
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  * 
 8 |  *      http://www.apache.org/licenses/LICENSE-2.0
 9 |  * 
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | // Builds a Ghidra Extension for a given Ghidra installation.
17 | //
18 | // An absolute path to the Ghidra installation directory must be supplied either by setting the 
19 | // GHIDRA_INSTALL_DIR environment variable or Gradle project property:
20 | //
21 | //     > export GHIDRA_INSTALL_DIR=<Absolute path to Ghidra> 
22 | //     > gradle
23 | //
24 | //         or
25 | //
26 | //     > gradle -PGHIDRA_INSTALL_DIR=<Absolute path to Ghidra>
27 | //
28 | // Gradle should be invoked from the directory of the project to build.  Please see the
29 | // application.gradle.version property in <GHIDRA_INSTALL_DIR>/Ghidra/application.properties
30 | // for the correction version of Gradle to use for the Ghidra installation you specify.
31 | 
32 | //----------------------START "DO NOT MODIFY" SECTION------------------------------
33 | def ghidraInstallDir = "/home/h1k0/tools/ghidra_11.0.3_PUBLIC" // Modify `ghidraInstallDir` to your Ghidra installation directory
34 | 
35 | if (System.env.GHIDRA_INSTALL_DIR) {
36 | 	ghidraInstallDir = System.env.GHIDRA_INSTALL_DIR
37 | }
38 | else if (project.hasProperty("GHIDRA_INSTALL_DIR")) {
39 | 	ghidraInstallDir = project.getProperty("GHIDRA_INSTALL_DIR")
40 | }
41 | 
42 | if (ghidraInstallDir) {
43 | 	apply from: new File(ghidraInstallDir).getCanonicalPath() + "/support/buildExtension.gradle"
44 | }
45 | else {
46 | 	throw new GradleException("GHIDRA_INSTALL_DIR is not defined!")
47 | }
48 | //----------------------END "DO NOT MODIFY" SECTION-------------------------------
49 | 
50 | sourceSets {
51 | 	main {
52 | 		java {
53 | 			srcDirs = ['src/main/java', 'ghidra_scripts']
54 | 		}
55 | 		resources {
56 | 			srcDirs = ['src/main/resources']
57 | 		}
58 | 	}
59 | }
60 | 
61 | repositories {
62 | 	// Declare dependency repositories here.  This is not needed if dependencies are manually 
63 | 	// dropped into the lib/ directory.
64 | 	// See https://docs.gradle.org/current/userguide/declaring_repositories.html for more info.
65 | 	// Ex: mavenCentral()
66 | 	mavenCentral()
67 | }
68 | 
69 | dependencies {
70 | 	// Any external dependencies added here will automatically be copied to the lib/ directory when
71 | 	// this extension is built.
72 | 
73 | 	implementation 'com.fasterxml.jackson.core:jackson-core:2.13.0'
74 | 	implementation 'com.fasterxml.jackson.core:jackson-databind:2.13.0'
75 | 	implementation 'com.fasterxml.jackson.core:jackson-annotations:2.13.0'
76 | 	implementation 'org.jgrapht:jgrapht-core:1.5.1'
77 | 
78 | 	testImplementation('org.junit.jupiter:junit-jupiter-api:5.8.2')
79 | 	testRuntimeOnly('org.junit.jupiter:junit-jupiter-engine:5.8.2')
80 | 
81 | 	testImplementation 'org.mockito:mockito-core:4.0.0'
82 | 	testImplementation 'org.mockito:mockito-junit-jupiter:4.0.0'
83 | }
84 | 
85 | test {
86 | 	useJUnitPlatform()
87 | }
88 | 
89 | // Exclude additional files from the built extension
90 | buildExtension.exclude '.idea/**'
91 | buildExtension.exclude '.git/**'
92 | buildExtension.exclude 'GhidraScriptLog/**'
93 | buildExtension.exclude 'TypeForge_Inference/**'
94 | buildExtension.exclude 'demo/**'
95 | buildExtension.exclude 'scripts/**'
96 | buildExtension.exclude 'imgs/**'
97 | 


--------------------------------------------------------------------------------
/data/README.txt:
--------------------------------------------------------------------------------
 1 | The "data" directory is intended to hold data files that will be used by this module and will
 2 | not end up in the .jar file, but will be present in the zip or tar file.  Typically, data
 3 | files are placed here rather than in the resources directory if the user may need to edit them.
 4 | 
 5 | An optional data/languages directory can exist for the purpose of containing various Sleigh language
 6 | specification files and importer opinion files.  
 7 | 
 8 | The data/buildLanguage.xml is used for building the contents of the data/languages directory.
 9 | 
10 | The skel language definition has been commented-out within the skel.ldefs file so that the 
11 | skeleton language does not show-up within Ghidra.
12 | 
13 | See the Sleigh language documentation (docs/languages/index.html) for details Sleigh language 
14 | specification syntax.
15 |  


--------------------------------------------------------------------------------
/data/buildLanguage.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | 
 3 | <!--
 4 |   + Compile sleigh languages within this module.
 5 |   + Sleigh compiler options are read from the sleighArgs.txt file.
 6 |   + Eclipse: right-click on this file and choose menu item "Run As->Ant Build"
 7 |   -->
 8 |                                      
 9 | <project name="privateBuildDeveloper" default="sleighCompile">
10 | 	
11 | 	<property name="sleigh.compile.class" value="ghidra.pcodeCPort.slgh_compile.SleighCompile"/>
12 | 
13 | 	<!--Import optional ant properties.  GhidraDev Eclipse plugin produces this so this file can find the Ghidra installation-->
14 | 	<import file="../.antProperties.xml" optional="false" />
15 | 	
16 | 	<target name="sleighCompile">
17 | 	    
18 | 		<!-- If language module is detached from installation, get Ghidra installation directory path from imported properties -->
19 | 		<property name="framework.path" value="${ghidra.install.dir}/Ghidra/Framework"/>
20 | 		
21 | 		<path id="sleigh.class.path">
22 | 			<fileset dir="${framework.path}/SoftwareModeling/lib">
23 | 				<include name="*.jar"/>
24 | 			</fileset>
25 | 			<fileset dir="${framework.path}/Generic/lib">
26 | 				<include name="*.jar"/>
27 | 			</fileset>
28 | 			<fileset dir="${framework.path}/Utility/lib">
29 | 				<include name="*.jar"/>
30 | 			</fileset>
31 | 		</path>
32 | 		
33 | 		<available classname="${sleigh.compile.class}" classpathref="sleigh.class.path" property="sleigh.compile.exists"/>
34 | 			
35 | 		<fail unless="sleigh.compile.exists" />
36 | 		
37 | 		<java classname="${sleigh.compile.class}"
38 | 			classpathref="sleigh.class.path"
39 | 			fork="true"
40 | 			failonerror="true">
41 | 			<jvmarg value="-Xmx2048M"/>
42 | 			<arg value="-i"/>
43 | 			<arg value="sleighArgs.txt"/>
44 | 			<arg value="-a"/>
45 | 			<arg value="./languages"/>
46 | 		</java>
47 | 		
48 |  	</target>
49 | 
50 | </project>
51 | 


--------------------------------------------------------------------------------
/data/languages/skel.cspec:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | 
  3 | <!-- See Relax specification: Ghidra/Framework/SoftwareModeling/data/languages/compiler_spec.rxg -->
  4 | 
  5 | <compiler_spec>
  6 |   <data_organization>
  7 | 	<pointer_size value="2" />
  8 |   </data_organization>
  9 |   <global>
 10 |     <range space="ram"/>
 11 |     <range space="io"/>
 12 |   </global>
 13 |   <stackpointer register="SP" space="ram"/>
 14 |   <default_proto>
 15 |     <prototype name="__asmA" extrapop="2" stackshift="2" strategy="register">
 16 |       <input>
 17 |         <pentry minsize="1" maxsize="1">
 18 |           <register name="A"/>
 19 |         </pentry>
 20 |         <pentry minsize="1" maxsize="2">
 21 |           <register name="BC"/>
 22 |         </pentry>
 23 |         <pentry minsize="1" maxsize="2">
 24 |           <register name="HL"/>
 25 |         </pentry>
 26 |         <pentry minsize="1" maxsize="2">
 27 |           <register name="DE"/>
 28 |         </pentry>
 29 |         <pentry minsize="1" maxsize="2">
 30 |           <register name="IY"/>
 31 |         </pentry>
 32 |         <pentry minsize="1" maxsize="2">
 33 |           <register name="IX"/>
 34 |         </pentry>
 35 |         <pentry minsize="1" maxsize="500" align="2">
 36 |           <addr offset="2" space="stack"/>
 37 |         </pentry>
 38 |       </input>
 39 |       <output>
 40 |         <pentry minsize="1" maxsize="1">
 41 |           <register name="A"/>
 42 |         </pentry>
 43 |       </output>
 44 |       <unaffected>
 45 |         <register name="SP"/>
 46 |         <register name="BC_"/>
 47 |         <register name="HL_"/>
 48 |         <register name="DE_"/>
 49 |         <register name="AF_"/>
 50 |         <register name="rBBR"/>
 51 |       </unaffected>
 52 |     </prototype>
 53 |   </default_proto>
 54 |   <prototype name="__asmAF" extrapop="2" stackshift="2" strategy="register">
 55 |       <input>
 56 |         <pentry minsize="1" maxsize="1">
 57 |           <register name="A"/>
 58 |         </pentry>
 59 |         <pentry minsize="1" maxsize="2">
 60 |           <register name="BC"/>
 61 |         </pentry>
 62 |         <pentry minsize="1" maxsize="2">
 63 |           <register name="HL"/>
 64 |         </pentry>
 65 |         <pentry minsize="1" maxsize="2">
 66 |           <register name="DE"/>
 67 |         </pentry>
 68 |         <pentry minsize="1" maxsize="2">
 69 |           <register name="IY"/>
 70 |         </pentry>
 71 |         <pentry minsize="1" maxsize="2">
 72 |           <register name="IX"/>
 73 |         </pentry>
 74 |         <pentry minsize="1" maxsize="500" align="2">
 75 |           <addr offset="2" space="stack"/>
 76 |         </pentry>
 77 |       </input>
 78 |       <output>
 79 |         <pentry minsize="1" maxsize="2">
 80 |           <register name="AF"/>
 81 |         </pentry>
 82 |       </output>
 83 |       <unaffected>
 84 |         <register name="SP"/>
 85 |         <register name="rBBR"/>
 86 |         <register name="BC_"/>
 87 |         <register name="HL_"/>
 88 |         <register name="DE_"/>
 89 |         <register name="AF_"/>
 90 |       </unaffected>
 91 |   </prototype>
 92 |   <prototype name="__stdcall" extrapop="2" stackshift="2">
 93 |       <input>
 94 |         <pentry minsize="1" maxsize="1">
 95 |           <register name="A"/>
 96 |         </pentry>
 97 |         <pentry minsize="1" maxsize="2">
 98 |           <register name="BC"/>
 99 |         </pentry>
100 |         <pentry minsize="1" maxsize="2">
101 |           <register name="HL"/>
102 |         </pentry>
103 |         <pentry minsize="1" maxsize="500" align="2">
104 |           <addr offset="2" space="stack"/>
105 |         </pentry>
106 |       </input>
107 |       <output>
108 |         <pentry minsize="1" maxsize="1">
109 |           <register name="AF"/>
110 |         </pentry>
111 |       </output>
112 |       <unaffected>
113 |         <register name="SP"/>
114 |         <register name="rBBR"/>
115 |         <register name="BC_"/>
116 |         <register name="HL_"/>
117 |         <register name="DE_"/>
118 |         <register name="AF_"/>
119 |       </unaffected>
120 |     </prototype>
121 | </compiler_spec>
122 | 


--------------------------------------------------------------------------------
/data/languages/skel.ldefs:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | 
 3 | <!-- See Relax specification: Ghidra/Framework/SoftwareModeling/data/languages/language_definitions.rxg -->
 4 | 
 5 | <language_definitions>
 6 | <!-- Uncomment the following to make the language available in Ghidra -->
 7 | <!-- 
 8 |    <language processor="Skel"
 9 |             endian="little"
10 |             size="16"
11 |             variant="default"
12 |             version="1.0"
13 |             slafile="skel.sla"
14 |             processorspec="skel.pspec"
15 |             id="skel:LE:16:default">
16 |     <description>Skeleton Language Module</description>
17 |     <compiler name="default" spec="skel.cspec" id="default"/>
18 |   </language> 
19 | -->
20 | </language_definitions>
21 | 


--------------------------------------------------------------------------------
/data/languages/skel.opinion:
--------------------------------------------------------------------------------
 1 | <opinions>
 2 | <!-- Example of importer opinions - commented-out to prevent use by Ghidra -->
 3 | <!-- The primary and secondary constraint values must be specifide as a decimal string -->
 4 | <!--
 5 |     <constraint loader="Executable and Linking Format (ELF)" compilerSpecID="default">
 6 |     		<constraint primary="40"   secondary="123"  processor="Skel"  size="16" variant="default" />
 7 |     </constraint>
 8 |     <constraint loader="MS Common Object File Format (COFF)" compilerSpecID="default">
 9 |         <constraint primary="61"                    processor="Skel"  size="16" variant="default" />
10 |     </constraint>
11 | -->
12 | </opinions>
13 | 


--------------------------------------------------------------------------------
/data/languages/skel.pspec:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | 
 3 | <!-- See Relax specification: Ghidra/Framework/SoftwareModeling/data/languages/processor_spec.rxg -->
 4 | 
 5 | <processor_spec>
 6 |   <programcounter register="PC"/>
 7 |   <register_data>
 8 |     <register name="AF_" group="Alt"/>
 9 |     <register name="BC_" group="Alt"/>
10 |     <register name="DE_" group="Alt"/>
11 |     <register name="HL_" group="Alt"/>
12 |   </register_data>
13 |   <default_symbols>
14 |     <symbol name="RST0" address="ram:0000" entry="true"/>
15 |     <symbol name="RST1" address="ram:0008" entry="false"/>
16 |     <symbol name="RST2" address="ram:0010" entry="false"/>
17 |     <symbol name="RST3" address="ram:0018" entry="false"/>
18 |     <symbol name="RST4" address="ram:0020" entry="false"/>
19 |     <symbol name="RST5" address="ram:0028" entry="false"/>
20 |     <symbol name="RST6" address="ram:0030" entry="false"/>
21 |     <symbol name="RST7" address="ram:0038" entry="false"/>
22 |   </default_symbols>
23 | </processor_spec>
24 | 


--------------------------------------------------------------------------------
/data/languages/skel.sinc:
--------------------------------------------------------------------------------
  1 | # sleigh include file for Skeleton language instructions
  2 | 
  3 | define token opbyte (8)
  4 |    op0_8     = (0,7)
  5 |    op6_2     = (6,7)
  6 |    
  7 |    dRegPair4_2    = (4,5)
  8 |    pRegPair4_2    = (4,5)
  9 |    sRegPair4_2    = (4,5)
 10 |    qRegPair4_2    = (4,5)
 11 |    qRegPair4_2a   = (4,5)
 12 |    qRegPair4_2b   = (4,5)
 13 |    rRegPair4_2    = (4,5)
 14 | 
 15 |    reg3_3 = (3,5)
 16 |    bits3_3   = (3,5)
 17 |    
 18 |    bits0_4   = (0,3)
 19 |    
 20 |    reg0_3 = (0,2)
 21 |    bits0_3   = (0,2)
 22 | ;
 23 | 
 24 | define token data8 (8)
 25 |    imm8		= (0,7)
 26 |    sign8	= (7,7)
 27 |    simm8	= (0,7) signed
 28 | ;
 29 | 
 30 | define token data16 (16)
 31 |    timm4        = (12,15)
 32 |    imm16        = (0,15)
 33 |    sign16		= (15,15)
 34 |    simm16		= (0,15) signed
 35 | ;
 36 | 
 37 | attach variables [ reg0_3 reg3_3 ] [ B C D E H L _ A ];
 38 | 
 39 | attach variables [ sRegPair4_2 dRegPair4_2 ] [ BC DE HL SP ];
 40 | 
 41 | attach variables [ qRegPair4_2 ] [ BC DE HL AF ];
 42 | attach variables [ qRegPair4_2a ] [ B D H A ];
 43 | attach variables [ qRegPair4_2b ] [ C E L F ];
 44 | 
 45 | attach variables [ pRegPair4_2 ] [ BC DE IX SP ];
 46 | attach variables [ rRegPair4_2 ] [ BC DE IY SP ];
 47 | 
 48 | ################################################################
 49 | # Macros
 50 | ################################################################
 51 | 
 52 | macro setResultFlags(result) {
 53 | 	$(Z_flag) = (result == 0);
 54 | 	$(S_flag) = (result s< 0);
 55 | }
 56 | 
 57 | macro setAddCarryFlags(op1,op2) {
 58 | 	$(C_flag) = (carry(op1,zext($(C_flag))) || carry(op2,op1 + zext($(C_flag))));
 59 | }
 60 | 
 61 | macro setAddFlags(op1,op2) {
 62 | 	$(C_flag) = carry(op1,op2);
 63 | }
 64 | 
 65 | macro setSubtractCarryFlags(op1,op2) {
 66 | 	notC = ~$(C_flag);
 67 | 	$(C_flag) = ((op1 < sext(notC)) || (op2 < (op1 - sext(notC))));
 68 | }
 69 | 
 70 | macro setSubtractFlags(op1,op2) {
 71 | 	$(C_flag) = (op1 < op2);
 72 | }
 73 | 
 74 | macro push16(val16) {
 75 | 	SP = SP - 2;
 76 | 	*:2 SP = val16; 
 77 | }
 78 | 
 79 | macro pop16(ret16) {
 80 | 	ret16 = *:2 SP;
 81 | 	SP = SP + 2; 
 82 | }
 83 | 
 84 | macro push8(val8) {
 85 | 	SP = SP - 1;
 86 | 	ptr:2 = SP;
 87 | 	*:1 ptr = val8; 
 88 | }
 89 | 
 90 | macro pop8(ret8) {
 91 |     ptr:2 = SP;
 92 | 	ret8 = *:1 ptr;
 93 | 	SP = SP + 1; 
 94 | }
 95 | 
 96 | ################################################################
 97 | 
 98 | ixMem8: (IX+simm8)  is IX & simm8								{ ptr:2 = IX + simm8; export *:1 ptr; }
 99 | ixMem8: (IX-val)    is IX & simm8 & sign8=1	[ val = -simm8; ]	{ ptr:2 = IX + simm8; export *:1 ptr; }
100 | 
101 | iyMem8: (IY+simm8)  is IY & simm8								{ ptr:2 = IY + simm8; export *:1 ptr; }
102 | iyMem8: (IY-val)    is IY & simm8 & sign8=1	[ val = -simm8; ]	{ ptr:2 = IY + simm8; export *:1 ptr; }
103 | 
104 | Addr16: imm16		is imm16									{ export *:1 imm16; }
105 | 
106 | Mem16: (imm16)		is imm16									{ export *:2 imm16; }
107 | 
108 | RelAddr8: loc		is simm8  [ loc = inst_next + simm8; ]		{ export *:1 loc; }
109 | 
110 | cc: "NZ"            is bits3_3=0x0                              { c:1 = ($(Z_flag) == 0); export c; }
111 | cc: "Z"             is bits3_3=0x1                              { c:1 = $(Z_flag); export c; }
112 | cc: "NC"            is bits3_3=0x2                              { c:1 = ($(C_flag) == 0); export c; }
113 | cc: "C"             is bits3_3=0x3                              { c:1 = $(C_flag); export c; }
114 | cc: "PO"            is bits3_3=0x4                              { c:1 = ($(PV_flag) == 0); export c; }
115 | cc: "PE"            is bits3_3=0x5                              { c:1 = $(PV_flag); export c; }
116 | cc: "P"             is bits3_3=0x6                              { c:1 = ($(S_flag) == 0); export c; }
117 | cc: "M"             is bits3_3=0x7                              { c:1 = $(S_flag); export c; }
118 | 
119 | cc2: "NZ"            is bits3_3=0x4                              { c:1 = ($(Z_flag) == 0); export c; }
120 | cc2: "Z"             is bits3_3=0x5                              { c:1 = $(Z_flag); export c; }
121 | cc2: "NC"            is bits3_3=0x6                              { c:1 = ($(C_flag) == 0); export c; }
122 | cc2: "C"             is bits3_3=0x7                              { c:1 = $(C_flag); export c; }
123 | 
124 | ################################################################
125 | 
126 | 
127 | :LD IX,Mem16  is op0_8=0xdd & IX; op0_8=0x2a; Mem16 {
128 | 	IX = Mem16;
129 | }
130 | 
131 | :LD IY,Mem16  is op0_8=0xfd & IY; op0_8=0x2a; Mem16 {
132 | 	IY = Mem16;
133 | }
134 | 
135 | :LD Mem16,HL  is op0_8=0x22 & HL; Mem16 {
136 | 	Mem16 = HL;
137 | }
138 | 
139 | :LD Mem16,dRegPair4_2  is op0_8=0xed; op6_2=0x1 & dRegPair4_2 & bits0_4=0x3; Mem16 {
140 | 	Mem16 = dRegPair4_2;
141 | }
142 | 
143 | :LD Mem16,IX  is op0_8=0xdd & IX; op0_8=0x22; Mem16 {
144 | 	Mem16 = IX;
145 | }
146 | 
147 | :LD Mem16,IY  is op0_8=0xfd & IY; op0_8=0x22; Mem16 {
148 | 	Mem16 = IY;
149 | }
150 | 
151 | :NEG  is op0_8=0xed; op0_8=0x44 {
152 | 	$(PV_flag) = (A == 0x80);
153 | 	$(C_flag) = (A != 0);
154 | 	A = -A;
155 | 	setResultFlags(A);
156 | }
157 | 
158 | :SET bits3_3,ixMem8  is op0_8=0xdd; op0_8=0xcb; ixMem8; op6_2=0x3 & bits3_3 & bits0_3=0x6 {
159 | 	mask:1 = (1 << bits3_3);
160 | 	val:1 = ixMem8;
161 | 	ixMem8 = val | mask;
162 | }
163 | 
164 | :SET bits3_3,iyMem8  is op0_8=0xfd; op0_8=0xcb; iyMem8; op6_2=0x3 & bits3_3 & bits0_3=0x6 {
165 | 	mask:1 = (1 << bits3_3);
166 | 	val:1 = iyMem8;
167 | 	iyMem8 = val | mask;
168 | }
169 | 
170 | :JP Addr16  is op0_8=0xc3; Addr16 {
171 | 	goto Addr16;	
172 | }
173 | 
174 | :JP cc,Addr16  is op6_2=0x3 & cc & bits0_3=0x2; Addr16 {
175 | 	if (!cc) goto Addr16;
176 | }
177 | 
178 | :JR RelAddr8  is op0_8=0x18; RelAddr8 {
179 | 	goto RelAddr8;
180 | }
181 | 
182 | :JR cc2,RelAddr8  is op6_2=0x0 & cc2 & bits0_3=0x0; RelAddr8 {
183 | 	if (cc2) goto RelAddr8;
184 | }
185 | 
186 | :JP (HL)  is op0_8=0xe9 & HL {
187 | 	goto [HL];
188 | }
189 | 
190 | :JP (IX)  is op0_8=0xdd & IX; op0_8=0xe9 {
191 | 	goto [IX];
192 | }
193 | 
194 | :JP (IY)  is op0_8=0xfd & IY; op0_8=0xe9 {
195 | 	goto [IY];
196 | }
197 | 
198 | :CALL Addr16  is op0_8=0xcd; Addr16 {
199 |     push16(&:2 inst_next);
200 | 	call Addr16;
201 | }
202 | 
203 | :CALL cc,Addr16  is op6_2=0x3 & cc & bits0_3=0x4; Addr16 {
204 | 	if (!cc) goto inst_next;
205 |     push16(&:2 inst_next);
206 | 	call Addr16;
207 | }
208 | 
209 | :RET  is op0_8=0xc9 {
210 | 	pop16(PC);
211 | 	ptr:2 = zext(PC);
212 | 	return [ptr];
213 | }
214 | 
215 | :RET cc  is op6_2=0x3 & cc & bits0_3=0x0 {
216 | 	if (!cc) goto inst_next;
217 | 	pop16(PC);
218 | 	ptr:2 = zext(PC);
219 | 	return [ptr];
220 | }	
221 | 


--------------------------------------------------------------------------------
/data/languages/skel.slaspec:
--------------------------------------------------------------------------------
 1 | # sleigh specification file for Skeleton Processor
 2 | #   >> see docs/languages/sleigh.htm or sleigh.pdf for Sleigh syntax
 3 | # Other language modules (see Ghidra/Processors) may provide better examples
 4 | # when creating a new language module.
 5 | 
 6 | define endian=little;
 7 | define alignment=1;
 8 | 
 9 | define space ram     type=ram_space      size=2  default;
10 | 
11 | define space io      type=ram_space      size=2;
12 | define space register type=register_space size=1;
13 | 
14 | define register offset=0x00 size=1 [ F A C B E D L H I R ];
15 | define register offset=0x00 size=2 [ AF  BC  DE  HL ];
16 | define register offset=0x20 size=1 [ A_ F_ B_ C_ D_ E_ H_ L_ ]; # Alternate registers
17 | define register offset=0x20 size=2 [ AF_   BC_   DE_   HL_ ]; # Alternate registers
18 | 
19 | define register offset=0x40 size=2 [ _  PC SP IX IY ];
20 | 
21 | define register offset=0x50 size=1 [ rCBAR rCBR rBBR ];
22 | 
23 | # Define context bits (if defined, size must be multiple of 4-bytes)
24 | define register offset=0xf0 size=4   contextreg;
25 | 
26 | define context contextreg
27 |   assume8bitIOSpace		= (0,0)
28 | ;
29 | 
30 | # Flag bits (?? manual is very confusing - could be typos!)
31 | @define C_flag "F[0,1]"		# C: Carry
32 | @define N_flag "F[1,1]"		# N: Add/Subtract
33 | @define PV_flag "F[2,1]"	# PV: Parity/Overflow
34 | @define H_flag "F[4,1]"		# H: Half Carry
35 | @define Z_flag "F[6,1]"		# Z: Zero
36 | @define S_flag "F[7,1]"		# S: Sign
37 | 
38 | # Include contents of skel.sinc file
39 | @include "skel.sinc"
40 | 


--------------------------------------------------------------------------------
/data/sleighArgs.txt:
--------------------------------------------------------------------------------
1 | # Add sleigh compiler options to this file (one per line) which will
2 | # be used when compiling each language within this module.
3 | # All options should start with a '-' character.
4 | #
5 | # IMPORTANT: The -a option should NOT be specified
6 | #


--------------------------------------------------------------------------------
/demo/README.md:
--------------------------------------------------------------------------------
 1 | # Demo
 2 | This directory contains several JSON files that represent type inference results from `lighttpd`.
 3 | 
 4 | ## varType.json
 5 | This file serves as an index mapping from Ghidra decompiled variables to TypeForge inferred types, as shown below:
 6 | ```json
 7 | "0x13834f" : {                                          // Function Entry Address
 8 |     "Name" : "pcre_keyvalue_burl_percent_high_UTF8",    // Function Name
 9 |     "Parameters" : {                                    // Function Parameters
10 |       "0x13834f:param_1" : {
11 |         "Name" : "param_1",
12 |         "desc" : "pointer",
13 |         "TypeConstraint" : "TypeConstraint_451eec67"    // Corresponding TypeConstraint
14 |       }
15 |     },
16 |     "LocalVariables" : {                                // Function Local Variables
17 |       "0x13834f:stack[-0x28]" : {                       // Stack Variable (with stack offset)
18 |         "Name" : "local_28",
19 |         "desc" : "pointer",
20 |         "TypeConstraint" : "TypeConstraint_8792a6aa"
21 |       },
22 |       "0x13834f:RegUniq[0x138371]" : {                  // Register Variables (with address where this varnode is defined)
23 |         "Name" : "iVar2",
24 |         "desc" : "pointer",
25 |         "TypeConstraint" : "TypeConstraint_8792a6aa"
26 |       },
27 |       "0x13834f:RegUniq[0x1383d5]" : {
28 |         "Name" : "lVar3",
29 |         "desc" : "pointer",
30 |         "TypeConstraint" : "TypeConstraint_c076aa34"
31 |       }
32 |     }
33 |   }
34 | ```
35 | 
36 | ## xxx_final.json
37 | Indicates that this composite data type does not need to enter the refinement stage.
38 | ```json
39 | {
40 |   "ForgedStruct_213" : {
41 |     "desc" : "Structure",
42 |     "layout" : {                                   // Member Layout
43 |       "0x4" : {
44 |         "desc" : "Primitive",
45 |         "size" : 4,
46 |         "type" : "int",
47 |         "name" : "field_0x4"
48 |       },
49 |       "0x8" : {
50 |         "desc" : "Pointer",
51 |         "size" : 8,
52 |         "type" : "void *",
53 |         "name" : "ref_0x8_TypeConstraint_4c5a3461"
54 |       },
55 |       // ...
56 |     },
57 |     "ptrRef" : {                                   // Pointer Reference Relationship
58 |       "0x8" : {                                    // Reference member offset
59 |         "refSkt" : "TypeConstraint_4c5a3461",      // Pointee TypeConstraint
60 |         "ptrLevel" : 1                             // Pointer level: 1 for *, 2 for **, ...
61 |       },
62 |       "0x28" : {
63 |         "refSkt" : "TypeConstraint_05d81b5b",
64 |         "ptrLevel" : 1
65 |       },
66 |       "0x30" : {
67 |         "refSkt" : "TypeConstraint_05d81b5b",
68 |         "ptrLevel" : 1
69 |       }
70 |     },
71 |     "nest" : {                                     // Nested Relationship 
72 |       "0x28" : "TypeConstraint_b1d2b2a7"           // Nested member offset
73 |     },
74 |     "anonTypes" : { },
75 |     "decompilerInferred" : {
76 |       "composite" : [ ],
77 |       "array" : [ ],
78 |       "primitive" : [ ]
79 |     }
80 |   }
81 | }
82 | ```
83 | 
84 | ## xxx_global_morph.json
85 | Indicates that refinement is needed, and the TypeConstraint as a whole can be interpreted as two different types. The `decompiledCode` field in the JSON corresponds to different variants of decompiled code.
86 | 
87 | ## xxx_range_morph.json
88 | Indicates that refinement is needed, and certain member ranges within the TypeConstraint can be interpreted as multiple types. The member range is marked with `"startOffset"` and `"endOffset"` fields.
89 | The `decompiledCode` field in the JSON corresponds to different variants of decompiled pseudocode.
90 | 
91 | ## xxx_final_DI.json
92 | Indicates that this type is Decompiler-Inferred, typically representing library-defined composite data types, such as `sockaddr`, etc.


--------------------------------------------------------------------------------
/demo/TypeConstraint_1539724c_global_morph.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "globalMorph" : {
 3 |     "dword" : {
 4 |       "desc" : "Primitive",
 5 |       "type" : "dword",
 6 |       "decompiledCode" : {
 7 |         "0x13dfe0" : "\nundefined8 fdevent_pipe_cloexec(dword *param_1,uint param_2)\n\n{\n  int iVar1;\n  undefined8 uVar2;\n  \n  iVar1 = pipe2((int *)param_1,0x80000);\n  if (iVar1 == 0) {\nLAB_0013e05f:\n    if (0x10000 < param_2) {\n      fcntl64(param_1[1],0x407,param_2);\n    }\n    uVar2 = 0;\n  }\n  else {\n    iVar1 = pipe((int *)param_1);\n    if (iVar1 == 0) {\n      iVar1 = fcntl64(*param_1,2,1);\n      if (iVar1 == 0) {\n        iVar1 = fcntl64(param_1[1],2,1);\n        if (iVar1 == 0) goto LAB_0013e05f;\n      }\n    }\n    uVar2 = 0xffffffff;\n  }\n  return uVar2;\n}\n\n",
 8 |         "0x15dcf7" : "\nundefined8 fdlog_pipe_init(undefined8 param_1,dword *param_2,undefined4 param_3)\n\n{\n  undefined8 *puVar1;\n  undefined8 uVar2;\n  \n  if ((DAT_00179068 & 3) == 0) {\n    ck_realloc_u32(&fdlog_pipes,DAT_00179068,4,0x18);\n  }\n  puVar1 = (undefined8 *)((ulong)DAT_00179068 * 0x18 + fdlog_pipes);\n  DAT_00179068 = DAT_00179068 + 1;\n  *(dword *)((long)puVar1 + 0xc) = *param_2;\n  *(undefined4 *)(puVar1 + 1) = param_3;\n  puVar1[2] = log_monotonic_secs;\n  uVar2 = fdlog_init(param_1,param_2[1],3);\n  *puVar1 = uVar2;\n  return *puVar1;\n}\n\n"
 9 |       }
10 |     },
11 |     "ForgedStruct_8" : {
12 |       "desc" : "Structure",
13 |       "layout" : {
14 |         "0x0" : {
15 |           "desc" : "Primitive",
16 |           "size" : 4,
17 |           "type" : "dword",
18 |           "name" : "field_0x0"
19 |         },
20 |         "0x4" : {
21 |           "desc" : "Primitive",
22 |           "size" : 4,
23 |           "type" : "int",
24 |           "name" : "field_0x4"
25 |         }
26 |       },
27 |       "ptrRef" : { },
28 |       "nest" : { },
29 |       "anonTypes" : { },
30 |       "decompilerInferred" : {
31 |         "composite" : [ ],
32 |         "array" : [ ],
33 |         "primitive" : [ ]
34 |       },
35 |       "decompiledCode" : {
36 |         "0x13dfe0" : "\nundefined8 fdevent_pipe_cloexec(ForgedStruct_8 *param_1,uint param_2)\n\n{\n  int iVar1;\n  undefined8 uVar2;\n  \n  iVar1 = pipe2((int *)param_1,0x80000);\n  if (iVar1 == 0) {\nLAB_0013e05f:\n    if (0x10000 < param_2) {\n      fcntl64(param_1->field_0x4,0x407,param_2);\n    }\n    uVar2 = 0;\n  }\n  else {\n    iVar1 = pipe((int *)param_1);\n    if (iVar1 == 0) {\n      iVar1 = fcntl64(param_1->field_0x0,2,1);\n      if (iVar1 == 0) {\n        iVar1 = fcntl64(param_1->field_0x4,2,1);\n        if (iVar1 == 0) goto LAB_0013e05f;\n      }\n    }\n    uVar2 = 0xffffffff;\n  }\n  return uVar2;\n}\n\n",
37 |         "0x15dcf7" : "\nundefined8 fdlog_pipe_init(undefined8 param_1,ForgedStruct_8 *param_2,undefined4 param_3)\n\n{\n  undefined8 *puVar1;\n  undefined8 uVar2;\n  \n  if ((DAT_00179068 & 3) == 0) {\n    ck_realloc_u32(&fdlog_pipes,DAT_00179068,4,0x18);\n  }\n  puVar1 = (undefined8 *)((ulong)DAT_00179068 * 0x18 + fdlog_pipes);\n  DAT_00179068 = DAT_00179068 + 1;\n  *(dword *)((long)puVar1 + 0xc) = param_2->field_0x0;\n  *(undefined4 *)(puVar1 + 1) = param_3;\n  puVar1[2] = log_monotonic_secs;\n  uVar2 = fdlog_init(param_1,param_2->field_0x4,3);\n  *puVar1 = uVar2;\n  return *puVar1;\n}\n\n"
38 |       }
39 |     }
40 |   }
41 | }


--------------------------------------------------------------------------------
/demo/TypeConstraint_746192c2_range_morph.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "rangeMorph" : [ {
 3 |     "startOffset" : "0x8",
 4 |     "endOffset" : "0x10",
 5 |     "types" : {
 6 |       "ForgedStruct_87" : {
 7 |         "desc" : "Structure",
 8 |         "layout" : {
 9 |           "0x0" : {
10 |             "desc" : "Primitive",
11 |             "size" : 8,
12 |             "type" : "qword",
13 |             "name" : "field_0x0"
14 |           },
15 |           "0x8" : {
16 |             "desc" : "Pointer",
17 |             "size" : 8,
18 |             "type" : "void *",
19 |             "name" : "field_0x8"
20 |           }
21 |         },
22 |         "ptrRef" : { },
23 |         "nest" : { },
24 |         "anonTypes" : { },
25 |         "decompilerInferred" : {
26 |           "composite" : [ ],
27 |           "array" : [ ],
28 |           "primitive" : [ ]
29 |         },
30 |         "decompiledCode" : {
31 |           "0x1391bf" : "\nundefined4 chunk_buffer_prepare_append(void **param_1,ulong param_2)\n\n{\n  void *pvVar1;\n  void *__src;\n  void *pvVar2;\n  uint uVar3;\n  undefined4 uVar4;\n  ulong uVar5;\n  ForgedStruct_87 *ppvVar6;\n  undefined4 local_10;\n  \n  uVar3 = buffer_string_space(param_1);\n  if (uVar3 < param_2) {\n    if (*(int *)(param_1 + 1) == 0) {\n      uVar5 = 1;\n    }\n    else {\n      uVar5 = (ulong)*(uint *)(param_1 + 1);\n    }\n    ppvVar6 = (ForgedStruct_87 *)chunk_buffer_acquire_sz(param_2 + uVar5);\n    pvVar1 = param_1[1];\n    __src = *param_1;\n    pvVar2 = ppvVar6->field_0x8;\n    *param_1 = (void *)ppvVar6->field_0x0;\n    param_1[1] = pvVar2;\n    ppvVar6->field_0x0 = (qword)__src;\n    ppvVar6->field_0x8 = pvVar1;\n    local_10 = SUB84(pvVar1,0);\n    *(undefined4 *)(param_1 + 1) = local_10;\n    if (*(int *)(param_1 + 1) != 0) {\n      memcpy(*param_1,__src,(ulong)pvVar1 & 0xffffffff);\n    }\n    chunk_buffer_release(ppvVar6);\n  }\n  uVar4 = buffer_string_space(param_1);\n  return uVar4;\n}\n\n",
32 |           "0x1308c8" : "\nvoid buffer_clear(ForgedStruct_87 *param_1)\n\n{\n  *(undefined4 *)&param_1->field_0x8 = 0;\n  return;\n}\n\n",
33 |           "0x13876a" : "\nvoid buffer_append_buffer(undefined8 param_1,ForgedStruct_87 *param_2)\n\n{\n  undefined4 uVar1;\n  \n  uVar1 = buffer_clen(param_2);\n  buffer_append_string_len(param_1,param_2->field_0x0,uVar1);\n  return;\n}\n\n",
34 |           "0x13913a" : "\nvoid chunk_buffer_yield(qword *param_1)\n\n{\n  void *pvVar1;\n  qword qVar2;\n  void *pvVar3;\n  ForgedStruct_87 *puVar4;\n  \n  if ((ulong)*(uint *)((long)param_1 + 0xc) != (chunk_buf_sz | 1)) {\n    puVar4 = (ForgedStruct_87 *)chunk_buffer_acquire_sz(chunk_buf_sz);\n    pvVar1 = (void *)param_1[1];\n    qVar2 = *param_1;\n    pvVar3 = puVar4->field_0x8;\n    *param_1 = puVar4->field_0x0;\n    param_1[1] = (qword)pvVar3;\n    puVar4->field_0x0 = qVar2;\n    puVar4->field_0x8 = pvVar1;\n    chunk_buffer_release(puVar4);\n  }\n  return;\n}\n\n",
35 |           "0x1386b6" : "\nbool buffer_is_blank(ForgedStruct_87 *param_1)\n\n{\n  return *(uint *)&param_1->field_0x8 < 2;\n}\n\n",
36 |           "0x130996" : "\nvoid buffer_move(ForgedStruct_87 *param_1,undefined8 *param_2)\n\n{\n  void *pvVar1;\n  qword qVar2;\n  void *pvVar3;\n  \n  buffer_clear(param_1);\n  pvVar1 = (void *)param_2[1];\n  qVar2 = *param_2;\n  pvVar3 = param_1->field_0x8;\n  *param_2 = param_1->field_0x0;\n  param_2[1] = pvVar3;\n  param_1->field_0x0 = qVar2;\n  param_1->field_0x8 = pvVar1;\n  return;\n}\n\n"
37 |         }
38 |       },
39 |       "ForgedStruct_86" : {
40 |         "desc" : "Structure",
41 |         "layout" : {
42 |           "0x0" : {
43 |             "desc" : "Primitive",
44 |             "size" : 8,
45 |             "type" : "qword",
46 |             "name" : "field_0x0"
47 |           },
48 |           "0x8" : {
49 |             "desc" : "Primitive",
50 |             "size" : 4,
51 |             "type" : "dword",
52 |             "name" : "field_0x8"
53 |           }
54 |         },
55 |         "ptrRef" : { },
56 |         "nest" : { },
57 |         "anonTypes" : { },
58 |         "decompilerInferred" : {
59 |           "composite" : [ ],
60 |           "array" : [ ],
61 |           "primitive" : [ ]
62 |         },
63 |         "decompiledCode" : {
64 |           "0x1391bf" : "\nundefined4 chunk_buffer_prepare_append(void **param_1,ulong param_2)\n\n{\n  void *pvVar1;\n  void *__src;\n  void *pvVar2;\n  uint uVar3;\n  undefined4 uVar4;\n  ulong uVar5;\n  ForgedStruct_86 *ppvVar6;\n  undefined4 local_10;\n  \n  uVar3 = buffer_string_space(param_1);\n  if (uVar3 < param_2) {\n    if (*(int *)(param_1 + 1) == 0) {\n      uVar5 = 1;\n    }\n    else {\n      uVar5 = (ulong)*(uint *)(param_1 + 1);\n    }\n    ppvVar6 = (ForgedStruct_86 *)chunk_buffer_acquire_sz(param_2 + uVar5);\n    pvVar1 = param_1[1];\n    __src = *param_1;\n    pvVar2 = *(void **)&ppvVar6->field_0x8;\n    *param_1 = (void *)ppvVar6->field_0x0;\n    param_1[1] = pvVar2;\n    ppvVar6->field_0x0 = (qword)__src;\n    *(void **)&ppvVar6->field_0x8 = pvVar1;\n    local_10 = SUB84(pvVar1,0);\n    *(undefined4 *)(param_1 + 1) = local_10;\n    if (*(int *)(param_1 + 1) != 0) {\n      memcpy(*param_1,__src,(ulong)pvVar1 & 0xffffffff);\n    }\n    chunk_buffer_release(ppvVar6);\n  }\n  uVar4 = buffer_string_space(param_1);\n  return uVar4;\n}\n\n",
65 |           "0x1308c8" : "\nvoid buffer_clear(ForgedStruct_86 *param_1)\n\n{\n  param_1->field_0x8 = 0;\n  return;\n}\n\n",
66 |           "0x13876a" : "\nvoid buffer_append_buffer(undefined8 param_1,ForgedStruct_86 *param_2)\n\n{\n  undefined4 uVar1;\n  \n  uVar1 = buffer_clen(param_2);\n  buffer_append_string_len(param_1,param_2->field_0x0,uVar1);\n  return;\n}\n\n",
67 |           "0x13913a" : "\nvoid chunk_buffer_yield(qword *param_1)\n\n{\n  qword qVar1;\n  qword qVar2;\n  qword qVar3;\n  ForgedStruct_86 *puVar4;\n  \n  if ((ulong)*(uint *)((long)param_1 + 0xc) != (chunk_buf_sz | 1)) {\n    puVar4 = (ForgedStruct_86 *)chunk_buffer_acquire_sz(chunk_buf_sz);\n    qVar1 = param_1[1];\n    qVar2 = *param_1;\n    qVar3 = *(qword *)&puVar4->field_0x8;\n    *param_1 = puVar4->field_0x0;\n    param_1[1] = qVar3;\n    puVar4->field_0x0 = qVar2;\n    *(qword *)&puVar4->field_0x8 = qVar1;\n    chunk_buffer_release(puVar4);\n  }\n  return;\n}\n\n",
68 |           "0x1386b6" : "\nbool buffer_is_blank(ForgedStruct_86 *param_1)\n\n{\n  return param_1->field_0x8 < 2;\n}\n\n",
69 |           "0x130996" : "\nvoid buffer_move(ForgedStruct_86 *param_1,undefined8 *param_2)\n\n{\n  undefined8 uVar1;\n  qword qVar2;\n  undefined8 uVar3;\n  \n  buffer_clear(param_1);\n  uVar1 = param_2[1];\n  qVar2 = *param_2;\n  uVar3 = *(undefined8 *)&param_1->field_0x8;\n  *param_2 = param_1->field_0x0;\n  param_2[1] = uVar3;\n  param_1->field_0x0 = qVar2;\n  *(undefined8 *)&param_1->field_0x8 = uVar1;\n  return;\n}\n\n"
70 |         }
71 |       }
72 |     }
73 |   } ]
74 | }


--------------------------------------------------------------------------------
/demo/TypeConstraint_f2b22cd2_final.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "ForgedStruct_213" : {
 3 |     "desc" : "Structure",
 4 |     "layout" : {
 5 |       "0x4" : {
 6 |         "desc" : "Primitive",
 7 |         "size" : 4,
 8 |         "type" : "int",
 9 |         "name" : "field_0x4"
10 |       },
11 |       "0x8" : {
12 |         "desc" : "Pointer",
13 |         "size" : 8,
14 |         "type" : "void *",
15 |         "name" : "ref_0x8_TypeConstraint_4c5a3461"
16 |       },
17 |       "0x18" : {
18 |         "desc" : "Primitive",
19 |         "size" : 8,
20 |         "type" : "qword",
21 |         "name" : "field_0x18"
22 |       },
23 |       "0x20" : {
24 |         "desc" : "Primitive",
25 |         "size" : 8,
26 |         "type" : "qword",
27 |         "name" : "field_0x20"
28 |       },
29 |       "0x28" : {
30 |         "desc" : "Pointer",
31 |         "size" : 8,
32 |         "type" : "void *",
33 |         "name" : "ref_0x28_TypeConstraint_05d81b5b"
34 |       },
35 |       "0x30" : {
36 |         "desc" : "Pointer",
37 |         "size" : 8,
38 |         "type" : "void *",
39 |         "name" : "ref_0x30_TypeConstraint_05d81b5b"
40 |       }
41 |     },
42 |     "ptrRef" : {
43 |       "0x8" : {
44 |         "refSkt" : "TypeConstraint_4c5a3461",
45 |         "ptrLevel" : 1
46 |       },
47 |       "0x28" : {
48 |         "refSkt" : "TypeConstraint_05d81b5b",
49 |         "ptrLevel" : 1
50 |       },
51 |       "0x30" : {
52 |         "refSkt" : "TypeConstraint_05d81b5b",
53 |         "ptrLevel" : 1
54 |       }
55 |     },
56 |     "nest" : {
57 |       "0x28" : "TypeConstraint_b1d2b2a7"
58 |     },
59 |     "anonTypes" : { },
60 |     "decompilerInferred" : {
61 |       "composite" : [ ],
62 |       "array" : [ ],
63 |       "primitive" : [ ]
64 |     }
65 |   }
66 | }


--------------------------------------------------------------------------------
/demo/TypeConstraint_f8591481_final_DI.json:
--------------------------------------------------------------------------------
1 | {
2 |   "desc" : "DecompilerInferred",
3 |   "decompilerInferred" : {
4 |     "composite" : [ "sockaddr" ],
5 |     "array" : [ ],
6 |     "primitive" : [ ]
7 |   }
8 | }


--------------------------------------------------------------------------------
/extension.properties:
--------------------------------------------------------------------------------
1 | name=@typeforge@
2 | description=TypeForge aims to recover composite data types (such as structures, unions, etc.) in stripped binaries. Compared to existing methods, TypeForge provides higher efficiency and accuracy.
3 | author=h1k0naka
4 | createdOn=2024-11-10
5 | version=@0.1.0@
6 | 


--------------------------------------------------------------------------------
/ghidra_scripts/TypeForge.java:
--------------------------------------------------------------------------------
  1 | import ghidra.app.script.GhidraScript;
  2 | import ghidra.program.model.lang.Language;
  3 | import ghidra.program.model.listing.Function;
  4 | 
  5 | import typeforge.analyzer.Generator;
  6 | import typeforge.analyzer.ReTyper;
  7 | import typeforge.analyzer.TypeAnalyzer;
  8 | import typeforge.base.graph.CallGraph;
  9 | import typeforge.utils.*;
 10 | import org.apache.commons.io.FileUtils;
 11 | 
 12 | import java.io.IOException;
 13 | import java.util.List;
 14 | import java.util.Set;
 15 | import java.io.File;
 16 | 
 17 | public class TypeForge extends GhidraScript {
 18 |     @Override
 19 |     protected void run() throws Exception {
 20 | 
 21 |         println("====================== TypeForge ======================");
 22 | 
 23 |         if(!Logging.init()) {
 24 |             return;
 25 |         }
 26 |         if (!prepare()) {
 27 |             return;
 28 |         }
 29 | 
 30 |         List<Function> mainFunc = Global.currentProgram.getListing().getGlobalFunctions("main");
 31 |         DataTypeHelper.prepare();
 32 | 
 33 |         if (mainFunc.isEmpty()) {
 34 |             Logging.warn("TypeForge","No main function found");
 35 |             return;
 36 |         }
 37 |         Logging.info("TypeForge","Number of main functions: " + mainFunc.size());
 38 | 
 39 |         long startAnalysisTime = System.currentTimeMillis();
 40 | 
 41 |         // Function node and CallGraph Statistics
 42 |         Set<Function> meaningfulFunctions = FunctionHelper.getMeaningfulFunctions();
 43 |         Logging.info("TypeForge","Number of meaningful functions: " + meaningfulFunctions.size());
 44 | 
 45 |         CallGraph cg = CallGraph.getCallGraph();
 46 | 
 47 |         Global.typeAnalysisBeginTime = System.currentTimeMillis();
 48 |         TypeAnalyzer analyzer = new TypeAnalyzer(cg);
 49 |         analyzer.run();
 50 |         Global.typeAnalysisEndTime = System.currentTimeMillis();
 51 | 
 52 |         Generator generator = new Generator(analyzer.interSolver.typeHintCollector,
 53 |                 analyzer.interSolver.exprManager);
 54 |         generator.run();
 55 |         generator.explore();
 56 | 
 57 |         Global.retypingBeginTime = System.currentTimeMillis();
 58 |         ReTyper reTyper = new ReTyper(generator.getFinalSkeletons());
 59 |         reTyper.run();
 60 |         Global.retypingEndTime = System.currentTimeMillis();
 61 | 
 62 |         Logging.info("TypeForge","Type Analysis time: " + (Global.typeAnalysisEndTime - Global.typeAnalysisBeginTime) / 1000.00 + "s");
 63 |         Logging.info("TypeForge","ReTyping time: " + (Global.retypingEndTime - Global.retypingBeginTime) / 1000.00 + "s");
 64 |         Logging.info("TypeForge","Total time: " + (Global.retypingEndTime  - Global.typeAnalysisBeginTime) / 1000.00 + "s");
 65 |         Logging.info("TypeForge", "Prepare Analysis time: " + (Global.prepareAnalysisEndTime - Global.prepareAnalysisBeginTime) / 1000.00 + "s");
 66 |     }
 67 | 
 68 |     protected boolean prepare() {
 69 |         parseArgs();
 70 |         prepareOutputDirectory();
 71 | 
 72 |         Global.currentProgram = this.currentProgram;
 73 |         Global.flatAPI = this;
 74 |         Global.ghidraScript = this;
 75 | 
 76 |         Language language = this.currentProgram.getLanguage();
 77 |         if (language == null) {
 78 |             Logging.error("TypeForge","Language not found");
 79 |             return false;
 80 |         } else {
 81 |             Logging.info("TypeForge","Language: " + language.getLanguageID());
 82 |             return true;
 83 |         }
 84 |     }
 85 | 
 86 |     protected void parseArgs() {
 87 |         String[] args = getScriptArgs();
 88 |         for (String arg : args) {
 89 |             Logging.info("TypeForge", "Arg: " + arg);
 90 |             // split the arguments string by "="
 91 |             String[] argParts = arg.split("=");
 92 |             if (argParts.length != 2) {
 93 |                 Logging.error("TypeForge", "Invalid argument: " + arg);
 94 |                 System.exit(1);
 95 |             }
 96 | 
 97 |             String key = argParts[0];
 98 |             String value = argParts[1];
 99 | 
100 |             if (key.equals("output")) {
101 |                 Global.outputDirectory = value;
102 |             } else if (key.equals("start_addr")) {
103 |                 Global.startAddress = Long.decode(value);
104 |             } else {
105 |                 Logging.error("TypeForge", "Invalid argument: " + arg);
106 |                 System.exit(1);
107 |             }
108 |         }
109 |     }
110 | 
111 |     protected void prepareOutputDirectory() {
112 |         if (Global.outputDirectory == null) {
113 |             Logging.error("TypeForge","Output directory not specified");
114 |             System.exit(1);
115 |         }
116 | 
117 |         File outputDir = new File(Global.outputDirectory);
118 |         // If the output directory does not exist, create it
119 |         if (!outputDir.exists()) {
120 |             if (!outputDir.mkdirs()) {
121 |                 Logging.error("TypeForge", "Failed to create output directory");
122 |                 System.exit(1);
123 |             }
124 |         } else {
125 |             try {
126 |                 FileUtils.cleanDirectory(outputDir);
127 |             } catch (IOException e) {
128 |                 throw new RuntimeException(e);
129 |             }
130 |         }
131 |     }
132 | }


--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/gradle/wrapper/gradle-wrapper.jar


--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | distributionBase=GRADLE_USER_HOME
2 | distributionPath=wrapper/dists
3 | distributionUrl=https\://services.gradle.org/distributions/gradle-8.4-bin.zip
4 | networkTimeout=10000
5 | validateDistributionUrl=true
6 | zipStoreBase=GRADLE_USER_HOME
7 | zipStorePath=wrapper/dists
8 | 


--------------------------------------------------------------------------------
/gradlew:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | 
  3 | #
  4 | # Copyright © 2015-2021 the original authors.
  5 | #
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | #      https://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | #
 18 | 
 19 | ##############################################################################
 20 | #
 21 | #   Gradle start up script for POSIX generated by Gradle.
 22 | #
 23 | #   Important for running:
 24 | #
 25 | #   (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
 26 | #       noncompliant, but you have some other compliant shell such as ksh or
 27 | #       bash, then to run this script, type that shell name before the whole
 28 | #       command line, like:
 29 | #
 30 | #           ksh Gradle
 31 | #
 32 | #       Busybox and similar reduced shells will NOT work, because this script
 33 | #       requires all of these POSIX shell features:
 34 | #         * functions;
 35 | #         * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
 36 | #           «${var#prefix}», «${var%suffix}», and «$( cmd )»;
 37 | #         * compound commands having a testable exit status, especially «case»;
 38 | #         * various built-in commands including «command», «set», and «ulimit».
 39 | #
 40 | #   Important for patching:
 41 | #
 42 | #   (2) This script targets any POSIX shell, so it avoids extensions provided
 43 | #       by Bash, Ksh, etc; in particular arrays are avoided.
 44 | #
 45 | #       The "traditional" practice of packing multiple parameters into a
 46 | #       space-separated string is a well documented source of bugs and security
 47 | #       problems, so this is (mostly) avoided, by progressively accumulating
 48 | #       options in "$@", and eventually passing that to Java.
 49 | #
 50 | #       Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
 51 | #       and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
 52 | #       see the in-line comments for details.
 53 | #
 54 | #       There are tweaks for specific operating systems such as AIX, CygWin,
 55 | #       Darwin, MinGW, and NonStop.
 56 | #
 57 | #   (3) This script is generated from the Groovy template
 58 | #       https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
 59 | #       within the Gradle project.
 60 | #
 61 | #       You can find Gradle at https://github.com/gradle/gradle/.
 62 | #
 63 | ##############################################################################
 64 | 
 65 | # Attempt to set APP_HOME
 66 | 
 67 | # Resolve links: $0 may be a link
 68 | app_path=$0
 69 | 
 70 | # Need this for daisy-chained symlinks.
 71 | while
 72 |     APP_HOME=${app_path%"${app_path##*/}"}  # leaves a trailing /; empty if no leading path
 73 |     [ -h "$app_path" ]
 74 | do
 75 |     ls=$( ls -ld "$app_path" )
 76 |     link=${ls#*' -> '}
 77 |     case $link in             #(
 78 |       /*)   app_path=$link ;; #(
 79 |       *)    app_path=$APP_HOME$link ;;
 80 |     esac
 81 | done
 82 | 
 83 | # This is normally unused
 84 | # shellcheck disable=SC2034
 85 | APP_BASE_NAME=${0##*/}
 86 | # Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
 87 | APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit
 88 | 
 89 | # Use the maximum available, or set MAX_FD != -1 to use that value.
 90 | MAX_FD=maximum
 91 | 
 92 | warn () {
 93 |     echo "$*"
 94 | } >&2
 95 | 
 96 | die () {
 97 |     echo
 98 |     echo "$*"
 99 |     echo
100 |     exit 1
101 | } >&2
102 | 
103 | # OS specific support (must be 'true' or 'false').
104 | cygwin=false
105 | msys=false
106 | darwin=false
107 | nonstop=false
108 | case "$( uname )" in                #(
109 |   CYGWIN* )         cygwin=true  ;; #(
110 |   Darwin* )         darwin=true  ;; #(
111 |   MSYS* | MINGW* )  msys=true    ;; #(
112 |   NONSTOP* )        nonstop=true ;;
113 | esac
114 | 
115 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
116 | 
117 | 
118 | # Determine the Java command to use to start the JVM.
119 | if [ -n "$JAVA_HOME" ] ; then
120 |     if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
121 |         # IBM's JDK on AIX uses strange locations for the executables
122 |         JAVACMD=$JAVA_HOME/jre/sh/java
123 |     else
124 |         JAVACMD=$JAVA_HOME/bin/java
125 |     fi
126 |     if [ ! -x "$JAVACMD" ] ; then
127 |         die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
128 | 
129 | Please set the JAVA_HOME variable in your environment to match the
130 | location of your Java installation."
131 |     fi
132 | else
133 |     JAVACMD=java
134 |     if ! command -v java >/dev/null 2>&1
135 |     then
136 |         die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
137 | 
138 | Please set the JAVA_HOME variable in your environment to match the
139 | location of your Java installation."
140 |     fi
141 | fi
142 | 
143 | # Increase the maximum file descriptors if we can.
144 | if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
145 |     case $MAX_FD in #(
146 |       max*)
147 |         # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
148 |         # shellcheck disable=SC2039,SC3045
149 |         MAX_FD=$( ulimit -H -n ) ||
150 |             warn "Could not query maximum file descriptor limit"
151 |     esac
152 |     case $MAX_FD in  #(
153 |       '' | soft) :;; #(
154 |       *)
155 |         # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
156 |         # shellcheck disable=SC2039,SC3045
157 |         ulimit -n "$MAX_FD" ||
158 |             warn "Could not set maximum file descriptor limit to $MAX_FD"
159 |     esac
160 | fi
161 | 
162 | # Collect all arguments for the java command, stacking in reverse order:
163 | #   * args from the command line
164 | #   * the main class name
165 | #   * -classpath
166 | #   * -D...appname settings
167 | #   * --module-path (only if needed)
168 | #   * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
169 | 
170 | # For Cygwin or MSYS, switch paths to Windows format before running java
171 | if "$cygwin" || "$msys" ; then
172 |     APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
173 |     CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
174 | 
175 |     JAVACMD=$( cygpath --unix "$JAVACMD" )
176 | 
177 |     # Now convert the arguments - kludge to limit ourselves to /bin/sh
178 |     for arg do
179 |         if
180 |             case $arg in                                #(
181 |               -*)   false ;;                            # don't mess with options #(
182 |               /?*)  t=${arg#/} t=/${t%%/*}              # looks like a POSIX filepath
183 |                     [ -e "$t" ] ;;                      #(
184 |               *)    false ;;
185 |             esac
186 |         then
187 |             arg=$( cygpath --path --ignore --mixed "$arg" )
188 |         fi
189 |         # Roll the args list around exactly as many times as the number of
190 |         # args, so each arg winds up back in the position where it started, but
191 |         # possibly modified.
192 |         #
193 |         # NB: a `for` loop captures its iteration list before it begins, so
194 |         # changing the positional parameters here affects neither the number of
195 |         # iterations, nor the values presented in `arg`.
196 |         shift                   # remove old arg
197 |         set -- "$@" "$arg"      # push replacement arg
198 |     done
199 | fi
200 | 
201 | 
202 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
203 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
204 | 
205 | # Collect all arguments for the java command:
206 | #   * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
207 | #     and any embedded shellness will be escaped.
208 | #   * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
209 | #     treated as '${Hostname}' itself on the command line.
210 | 
211 | set -- \
212 |         "-Dorg.gradle.appname=$APP_BASE_NAME" \
213 |         -classpath "$CLASSPATH" \
214 |         org.gradle.wrapper.GradleWrapperMain \
215 |         "$@"
216 | 
217 | # Stop when "xargs" is not available.
218 | if ! command -v xargs >/dev/null 2>&1
219 | then
220 |     die "xargs is not available"
221 | fi
222 | 
223 | # Use "xargs" to parse quoted args.
224 | #
225 | # With -n1 it outputs one arg per line, with the quotes and backslashes removed.
226 | #
227 | # In Bash we could simply go:
228 | #
229 | #   readarray ARGS < <( xargs -n1 <<<"$var" ) &&
230 | #   set -- "${ARGS[@]}" "$@"
231 | #
232 | # but POSIX shell has neither arrays nor command substitution, so instead we
233 | # post-process each arg (as a line of input to sed) to backslash-escape any
234 | # character that might be a shell metacharacter, then use eval to reverse
235 | # that process (while maintaining the separation between arguments), and wrap
236 | # the whole thing up as a single "set" statement.
237 | #
238 | # This will of course break if any of these variables contains a newline or
239 | # an unmatched quote.
240 | #
241 | 
242 | eval "set -- $(
243 |         printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
244 |         xargs -n1 |
245 |         sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
246 |         tr '\n' ' '
247 |     )" '"$@"'
248 | 
249 | exec "$JAVACMD" "$@"
250 | 


--------------------------------------------------------------------------------
/gradlew.bat:
--------------------------------------------------------------------------------
 1 | @rem
 2 | @rem Copyright 2015 the original author or authors.
 3 | @rem
 4 | @rem Licensed under the Apache License, Version 2.0 (the "License");
 5 | @rem you may not use this file except in compliance with the License.
 6 | @rem You may obtain a copy of the License at
 7 | @rem
 8 | @rem      https://www.apache.org/licenses/LICENSE-2.0
 9 | @rem
10 | @rem Unless required by applicable law or agreed to in writing, software
11 | @rem distributed under the License is distributed on an "AS IS" BASIS,
12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | @rem See the License for the specific language governing permissions and
14 | @rem limitations under the License.
15 | @rem
16 | 
17 | @if "%DEBUG%"=="" @echo off
18 | @rem ##########################################################################
19 | @rem
20 | @rem  Gradle startup script for Windows
21 | @rem
22 | @rem ##########################################################################
23 | 
24 | @rem Set local scope for the variables with windows NT shell
25 | if "%OS%"=="Windows_NT" setlocal
26 | 
27 | set DIRNAME=%~dp0
28 | if "%DIRNAME%"=="" set DIRNAME=.
29 | @rem This is normally unused
30 | set APP_BASE_NAME=%~n0
31 | set APP_HOME=%DIRNAME%
32 | 
33 | @rem Resolve any "." and ".." in APP_HOME to make it shorter.
34 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
35 | 
36 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
37 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
38 | 
39 | @rem Find java.exe
40 | if defined JAVA_HOME goto findJavaFromJavaHome
41 | 
42 | set JAVA_EXE=java.exe
43 | %JAVA_EXE% -version >NUL 2>&1
44 | if %ERRORLEVEL% equ 0 goto execute
45 | 
46 | echo.
47 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
48 | echo.
49 | echo Please set the JAVA_HOME variable in your environment to match the
50 | echo location of your Java installation.
51 | 
52 | goto fail
53 | 
54 | :findJavaFromJavaHome
55 | set JAVA_HOME=%JAVA_HOME:"=%
56 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe
57 | 
58 | if exist "%JAVA_EXE%" goto execute
59 | 
60 | echo.
61 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
62 | echo.
63 | echo Please set the JAVA_HOME variable in your environment to match the
64 | echo location of your Java installation.
65 | 
66 | goto fail
67 | 
68 | :execute
69 | @rem Setup the command line
70 | 
71 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
72 | 
73 | 
74 | @rem Execute Gradle
75 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
76 | 
77 | :end
78 | @rem End local scope for the variables with windows NT shell
79 | if %ERRORLEVEL% equ 0 goto mainEnd
80 | 
81 | :fail
82 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
83 | rem the _cmd.exe /c_ return code!
84 | set EXIT_CODE=%ERRORLEVEL%
85 | if %EXIT_CODE% equ 0 set EXIT_CODE=1
86 | if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
87 | exit /b %EXIT_CODE%
88 | 
89 | :mainEnd
90 | if "%OS%"=="Windows_NT" endlocal
91 | 
92 | :omega
93 | 


--------------------------------------------------------------------------------
/imgs/TypeForge_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/imgs/TypeForge_overview.png


--------------------------------------------------------------------------------
/imgs/figure_develop-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/imgs/figure_develop-1.png


--------------------------------------------------------------------------------
/imgs/figure_develop-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/imgs/figure_develop-2.png


--------------------------------------------------------------------------------
/imgs/figure_develop-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/imgs/figure_develop-3.png


--------------------------------------------------------------------------------
/imgs/figure_develop-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/imgs/figure_develop-4.png


--------------------------------------------------------------------------------
/lib/README.txt:
--------------------------------------------------------------------------------
1 | The "lib" directory is intended to hold Jar files which this module is dependent upon.  Jar files 
2 | may be placed in this directory manually, or automatically by maven via the dependencies block
3 | of this module's build.gradle file.


--------------------------------------------------------------------------------
/lib/jackson-annotations-2.13.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/lib/jackson-annotations-2.13.0.jar


--------------------------------------------------------------------------------
/lib/jackson-core-2.13.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/lib/jackson-core-2.13.0.jar


--------------------------------------------------------------------------------
/lib/jackson-databind-2.13.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/lib/jackson-databind-2.13.0.jar


--------------------------------------------------------------------------------
/lib/jgrapht-core-1.5.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/lib/jgrapht-core-1.5.1.jar


--------------------------------------------------------------------------------
/lib/jheaps-0.13.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noobone123/TypeForge/d801b9d9fed2b14ba9e8d7380a02665f4ecc395c/lib/jheaps-0.13.jar


--------------------------------------------------------------------------------
/os/linux_x86_64/README.txt:
--------------------------------------------------------------------------------
1 | The "os/linux_x86_64" directory is intended to hold Linux native binaries
2 | which this module is dependent upon.   This directory may be eliminated for a specific 
3 | module if native binaries are not provided for the corresponding platform.
4 | 


--------------------------------------------------------------------------------
/os/mac_x86_64/README.txt:
--------------------------------------------------------------------------------
1 | The "os/mac_x86_64" directory is intended to hold macOS (OS X) native binaries
2 | which this module is dependent upon.   This directory may be eliminated for a specific 
3 | module if native binaries are not provided for the corresponding platform.
4 | 


--------------------------------------------------------------------------------
/os/win_x86_64/README.txt:
--------------------------------------------------------------------------------
1 | The "os/win_x86_64" directory is intended to hold MS Windows native binaries (.exe)
2 | which this module is dependent upon.   This directory may be eliminated for a specific 
3 | module if native binaries are not provided for the corresponding platform.
4 | 


--------------------------------------------------------------------------------
/scripts/.python-version:
--------------------------------------------------------------------------------
1 | 3.13
2 | 


--------------------------------------------------------------------------------
/scripts/GraphExplorer.py:
--------------------------------------------------------------------------------
  1 | import pydot
  2 | import argparse
  3 | import os
  4 | import networkx as nx
  5 | 
  6 | def load_graph(dot_file):
  7 |     """
  8 |     Load a graph from a DOT file and return a networkx Graph object.
  9 |     """
 10 |     graph = pydot.graph_from_dot_file(dot_file)[0]
 11 | 
 12 |     G = nx.Graph()
 13 | 
 14 |     for edge in graph.get_edges():
 15 |         src = edge.get_source()
 16 |         dst = edge.get_destination()
 17 |         label = edge.get_attributes().get('label', '')  # Safely get label attribute
 18 | 
 19 |         # Remove quotes from the node names
 20 |         src = src.replace('"', '')
 21 |         dst = dst.replace('"', '')
 22 | 
 23 |         # Add edges to the networkx graph (as undirected edges)
 24 |         G.add_edge(src, dst, label=label)
 25 |         G.add_edge(dst, src, label=label)  # Add the reverse edge as well for undirected graph
 26 |         print(f"Added edge: {src} <-> {dst} ({label})")
 27 | 
 28 |     return G
 29 | 
 30 | def load_graphs(dot_dir):
 31 |     """
 32 |     Load all .dot files in the specified directory and return a big networkx Graph object.
 33 |     """
 34 |     big_G = nx.Graph()
 35 |     
 36 |     print(f"Loading graphs from {dot_dir}")
 37 |     for filename in os.listdir(dot_dir):
 38 |         if filename.endswith(".dot"):
 39 |             dot_file = os.path.join(dot_dir, filename)
 40 |             graph = pydot.graph_from_dot_file(dot_file)[0]
 41 | 
 42 |             for edge in graph.get_edges():
 43 |                 src = edge.get_source().replace('"', '')
 44 |                 dst = edge.get_destination().replace('"', '')
 45 |                 label = edge.get_attributes().get('label', '')
 46 | 
 47 |                 # Add edges to the networkx graph (as undirected edges)
 48 |                 big_G.add_edge(src, dst, label=label)
 49 |                 big_G.add_edge(dst, src, label=label)  # Add the reverse edge as well for undirected graph
 50 |                 print(f"Added edge: {src} <-> {dst} ({label})")
 51 | 
 52 |             print(f"Loaded graph from {dot_file}")
 53 | 
 54 |     return big_G
 55 | 
 56 | 
 57 | def find_shortest_path(G, src, dst):
 58 |     """
 59 |     Find the shortest path from source node to destination node in the graph.
 60 |     Return the shortest path as a list of nodes and edges.
 61 |     """
 62 |     try:
 63 |         shortest_path_nodes = nx.shortest_path(G, source=src, target=dst)
 64 |         shortest_path_edges = list(zip(shortest_path_nodes[:-1], shortest_path_nodes[1:]))
 65 |         return shortest_path_nodes, shortest_path_edges
 66 |     except nx.NetworkXNoPath:
 67 |         return None, None
 68 | 
 69 | def print_colored_path(shortest_path_nodes, shortest_path_edges, G):
 70 |     """
 71 |     Print the shortest path with edges colored and formatted.
 72 |     """
 73 |     if shortest_path_nodes:
 74 |         for i, node in enumerate(shortest_path_nodes):
 75 |             if i < len(shortest_path_edges):
 76 |                 edge = shortest_path_edges[i]
 77 |                 edge_label = G.edges[edge]['label']
 78 |                 print(f"\033[34m{node}\033[0m --- (\033[31m{edge_label}\033[0m) --- \033[34m{edge[1]}\033[0m")
 79 |             else:
 80 |                 print(f"\033[34m{node}\033[0m")
 81 |     else:
 82 |         print("No path found.")
 83 | 
 84 | 
 85 | def main(G: nx.Graph):
 86 |     while True:
 87 |         src_node = input("Enter source node: ").strip()
 88 |         if src_node not in G.nodes:
 89 |             print("Node not found in the graph.")
 90 |             continue
 91 | 
 92 |         dst_node = input("Enter destination node: ").strip()
 93 |         if dst_node not in G.nodes:
 94 |             print("Node not found in the graph.")
 95 |             continue
 96 | 
 97 |         shortest_path_nodes, shortest_path_edges = find_shortest_path(G, src_node, dst_node)
 98 |         print_colored_path(shortest_path_nodes, shortest_path_edges, G)
 99 | 
100 | 
101 | if __name__ == "__main__":
102 |     # Parse command line arguments
103 |     parser = argparse.ArgumentParser(description="Graph Explorer")
104 | 
105 |     # -p: dot file path, specify 1 dot file to load and explore
106 |     # -d: dot files directory, specify a directory to load and explore all dot files in it
107 |     # -p and -d are mutually exclusive
108 |     group = parser.add_mutually_exclusive_group(required=True)
109 |     group.add_argument("-p", "--path", help="Path to the DOT file")
110 |     group.add_argument("-d", "--dir", help="Path to the directory containing DOT files")
111 | 
112 |     args = parser.parse_args()
113 | 
114 |     if args.path:
115 |         G = load_graph(args.path)
116 |     elif args.dir:
117 |         G = load_graphs(args.dir)
118 | 
119 |     main(G)
120 | 
121 | 


--------------------------------------------------------------------------------
/scripts/GroundTruthExtractor.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | import argparse
 4 | import shutil
 5 | 
 6 | # Modify your Ghidra project name here
 7 | project_name = "binaries_osprey"
 8 | script_name = "GroundTruth.java"
 9 | 
10 | def is_elf(file_path):
11 |     with open(file_path, 'rb') as f:
12 |         magic_number = f.read(4)
13 |         return magic_number == b'\x7fELF'
14 | 
15 | def analyze_elf_files(ghidra_path, project_dir, binary_dir, output_dir):
16 |     for root, dirs, files in os.walk(binary_dir):
17 |         for file in files:
18 |             file_path = os.path.join(root, file)
19 |             if is_elf(file_path):
20 |                 ghidra_headless = os.path.join(ghidra_path, "support", "analyzeHeadless")
21 |                 output_subdir = os.path.join(output_dir, file)
22 |                 if not os.path.exists(output_subdir):
23 |                     os.makedirs(output_subdir)
24 |                 else:
25 |                     shutil.rmtree(output_subdir)
26 |                     os.makedirs(output_subdir)
27 |                 analyze_file(ghidra_headless, project_dir, file_path, output_subdir)
28 | 
29 | def analyze_file(headless_path, project_dir, binary_path, output_dir):
30 |     command = [
31 |         headless_path, 
32 |         project_dir, 
33 |         project_name, 
34 |         "-deleteProject",
35 |         "-import", 
36 |         binary_path, 
37 |         "-postScript", 
38 |         script_name, 
39 |         f"output={output_dir}"
40 |     ]
41 |     print(f"Analyzing {binary_path}...")
42 |     try:
43 |         print(f"Running command: {command}")
44 |         subprocess.run(command, check=True, env=os.environ.copy())
45 |         print(f"Analysis complete for {binary_path}")
46 |     except subprocess.CalledProcessError as e:
47 |         print(f"Failed to analyze {binary_path}: {e}")
48 |         exit(1)
49 | 
50 | def main():
51 |     parser = argparse.ArgumentParser(description="Analyze ELF files with Ghidra analyzeHeadless.")
52 |     parser.add_argument("--ghidra", required=True, help="Path to the Ghidra Home.")
53 |     parser.add_argument("--project_dir", required=True, help="Directory for the Ghidra project.")
54 |     parser.add_argument("--binary_dir", required=True, help="Directory containing ELF binaries.")
55 |     parser.add_argument("--output_dir", required=True, help="Directory to store the output.")
56 | 
57 |     args = parser.parse_args()
58 | 
59 |     # if project already exists, delete dir project_name.rep and project_name.gpr
60 |     if os.path.exists(os.path.join(args.project_dir, project_name + ".rep")):
61 |         shutil.rmtree(os.path.join(args.project_dir, project_name + ".rep"))
62 |     if os.path.exists(os.path.join(args.project_dir, project_name + ".gpr")):
63 |         os.remove(os.path.join(args.project_dir, project_name + ".gpr"))
64 | 
65 |     # if output_dir already exists, delete it
66 |     if os.path.exists(args.output_dir):
67 |         shutil.rmtree(args.output_dir)
68 |     os.makedirs(args.output_dir)
69 | 
70 |     analyze_elf_files(args.ghidra, args.project_dir, args.binary_dir, args.output_dir)
71 | 
72 | if __name__ == "__main__":
73 |     main()
74 | 


--------------------------------------------------------------------------------
/scripts/README.md:
--------------------------------------------------------------------------------
 1 | # TypeForge (Python Scripts)
 2 | 
 3 | ## Setup
 4 | TypeForge uses `uv` to manage Python packages and dependencies. To setup, you should:
 5 | 1. Install python package manager `uv`
 6 | 2. Create new virtual environment and install dependencies
 7 | 
 8 |     ```bash
 9 |     uv venv .venv
10 |     uv pip install --requirement requirements.txt    
11 |     ```
12 | 3. Active this virtual environment
13 | 
14 |     ```bash
15 |     source .venv/bin/activate
16 |     ```
17 | 
18 | If you want to add other packages, just run:
19 | ```bash
20 | uv pip install [package]
21 | uv pip freeze > requirements.txt
22 | # `uv add` need a `pyproject.toml` but we did not create it.
23 | ```
24 | 
25 | ## Type Inference (Batch Mode)
26 | If a series of binaries need to be processed, you should:
27 | 1. Update the `config.yml` to specify the required metadata.
28 | 2. Prepare the dataset:
29 |    The directory structure for each project in the dataset should follow this format: `dataset_root/project_name`. Each project should contain pairs of binaries: one with debug symbols (named `binary_name`, used only for Ground Truth Extraction) and one stripped binary (named `binary_name.strip`, used during Type Inference).
30 | 3. Update the `projects` field in `config.yml` to include all projects you want to process. All binaries under these projects will be processed.
31 | 4. Run the script:
32 | 
33 |    ```bash
34 |    python3 ./TypeInference.py
35 |    ```
36 | 
37 | ## Extract Ground Truth (Batch Mode)
38 | The preparation steps are the same as above. You only need to modify the following code in `TypeInference.py`:
39 | 
40 | ```python
41 | # Set `infer = False` to collect ground truth instead of performing inference
42 | run_ghidra_headless_on_project(pathlib.Path(dataset_root) / proj, infer = False)
43 | ```


--------------------------------------------------------------------------------
/scripts/TypeInference.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pathlib
  3 | import yaml
  4 | import subprocess
  5 | import re
  6 | 
  7 | def check_directories_exist():
  8 |     for proj_name in os.listdir(dataset_root):
  9 |         proj_path = pathlib.Path(dataset_root) / proj_name
 10 |         for binary_name in os.listdir(proj_path):
 11 |             if ".strip" not in binary_name:
 12 |                 binary_gt_dir = pathlib.Path(gt_root) / proj_name / binary_name
 13 |                 if not binary_gt_dir.exists():
 14 |                     print(f"Warning: {binary_gt_dir} does not exist")
 15 |                     # create the directory
 16 |                     os.makedirs(binary_gt_dir)
 17 |                     print(f"Created {binary_gt_dir}")
 18 |                 else:
 19 |                     print(f"Found {binary_gt_dir}")
 20 | 
 21 |                 binary_infer_dir = pathlib.Path(infer_root) / proj_name / binary_name
 22 |                 if not binary_infer_dir.exists():
 23 |                     print(f"Warning: {binary_infer_dir} does not exist")
 24 |                     # create the directory
 25 |                     os.makedirs(binary_infer_dir)
 26 |                     print(f"Created {binary_infer_dir}")
 27 |                 else:
 28 |                     print(f"Found {binary_infer_dir}")
 29 | 
 30 | 
 31 | def check_consistency():
 32 |     check_directories_exist()
 33 | 
 34 | def inference_on_binary(command):
 35 |     try:
 36 |         result = subprocess.run(command, capture_output=True, text=True, check=True)
 37 |         print(f"STDOUT:\n{result.stdout}")
 38 |         print(f"STDERR:\n{result.stderr}")
 39 |         output = result.stdout + result.stderr
 40 | 
 41 |         analyze_time = None
 42 |         retype_time = None
 43 |         total_time = None
 44 | 
 45 |         # Don't just look at last 10 lines, search through all output
 46 |         for line in output.splitlines():
 47 |             if 'Type Analysis time' in line:
 48 |                 match = re.search(r"Type Analysis time:\s*([0-9.]+)s", line)
 49 |                 if match:
 50 |                     analyze_time = match.group(1)
 51 |             elif 'ReTyping time' in line:
 52 |                 match = re.search(r"ReTyping time:\s*([0-9.]+)s", line)
 53 |                 if match:
 54 |                     retype_time = match.group(1)
 55 |             elif 'Total time' in line:
 56 |                 match = re.search(r"Total time:\s*([0-9.]+)s", line)
 57 |                 if match:
 58 |                     total_time = match.group(1)
 59 |         
 60 |         print(f"Analysis time: {analyze_time if analyze_time else 'None'}s")
 61 |         print(f"ReType time: {retype_time if retype_time else 'None'}s")
 62 |         print(f"Total time: {total_time if total_time else 'None'}s")
 63 | 
 64 |         return analyze_time, retype_time, total_time
 65 |     
 66 |     except subprocess.CalledProcessError as e:
 67 |         print(f"Error: {e}")
 68 |         print(f"Output: {e.output}")
 69 |         return None, None, None
 70 | 
 71 | def get_gt_on_binary(command):
 72 |     try:
 73 |         subprocess.run(command, check=True, capture_output=True, text=True)
 74 |     except subprocess.CalledProcessError as e:
 75 |         print(f"Error: {e}")
 76 |         print(f"Output: {e.output}")
 77 | 
 78 | 
 79 | def check_infer_exists(target_dir):
 80 |     skt_exists = False
 81 |     vartype_exists = False
 82 |     for file in os.listdir(target_dir):
 83 |         if "TypeConstraint_" in file:
 84 |             skt_exists = True
 85 |         if "varType" in file:
 86 |             vartype_exists = True
 87 |     return skt_exists and vartype_exists
 88 | 
 89 | def check_gt_exists(target_dir):
 90 |     typelib_exists = False
 91 |     vartype_exists = False
 92 |     for file in os.listdir(target_dir):
 93 |         if "typeLib" in file:
 94 |             typelib_exists = True
 95 |         if "varType" in file:
 96 |             vartype_exists = True
 97 |     return typelib_exists and vartype_exists
 98 | 
 99 | def run_ghidra_headless_on_project(project_path, infer: bool = True):
100 |     dataflow_time = 0
101 |     retype_time = 0
102 |     total_time = 0
103 | 
104 |     for binary_name in os.listdir(project_path):
105 |         if infer:
106 |             if ".strip" in binary_name:
107 |                 binary_path = pathlib.Path(project_path) / binary_name
108 |                 binary_output_dir = pathlib.Path(infer_root) / project_path.name / binary_name[:-6]
109 |                 command = [ghidra_headless, project_dir, project_name, "-deleteProject",
110 |                            "-import", binary_path.resolve(), "-postScript", "TypeForge.java",
111 |                            f"output={binary_output_dir.resolve()}"]
112 |                 
113 |                 print(f"Inferring on {binary_path} ...")
114 |                 print(f"Command: {command}")
115 |                 times = inference_on_binary(command)
116 |                 if (check_infer_exists(binary_output_dir)):
117 |                     print("Inference successful")
118 |                     dataflow_time += float(times[0])
119 |                     retype_time += float(times[1])
120 |                     total_time += float(times[2])
121 |                 else:
122 |                     print(f"Inferring on {binary_path} failed")
123 |         else:
124 |             if ".strip" not in binary_name:
125 |                 binary_path = pathlib.Path(project_path) / binary_name
126 |                 binary_output_dir = pathlib.Path(gt_root) / project_path.name / binary_name
127 |                 command = [ghidra_headless, project_dir, project_name, "-deleteProject",
128 |                            "-import", binary_path.resolve(), "-postScript", "GroundTruth.java",
129 |                            f"output={binary_output_dir.resolve()}"]
130 |                 print(f"Collecting GT on {binary_path} ...")
131 |                 print(f"Command: {command}")
132 |                 get_gt_on_binary(command)
133 |                 if (check_gt_exists(binary_output_dir)):
134 |                     print("GT collection successful")
135 |                 else:
136 |                     print(f"GT collection on {binary_path} failed")
137 | 
138 |     if infer:
139 |         print(f"Total dataflow time: {dataflow_time}s")
140 |         print(f"Total retype time: {retype_time}s")
141 |         print(f"Total time: {total_time}s")
142 | 
143 | if __name__ == "__main__":
144 | 
145 |     config_yml = pathlib.Path(__file__).parent / "config.yml"
146 |     with open(config_yml, "r") as f:
147 |         config = yaml.safe_load(f)
148 | 
149 |     dataset_root = config["inference"]["dataset"]
150 |     gt_root = config["inference"]["gt"]
151 |     infer_root = config["inference"]["infer"]
152 | 
153 |     ghidra_headless = config["ghidra"]["headless"]
154 |     project_dir = config["ghidra"]["project_dir"]
155 |     project_name = config["ghidra"]["project_name"]
156 | 
157 |     check_consistency()
158 | 
159 |     projects_to_inference = config["inference"]["projects"]
160 | 
161 |     project_statistics = {}
162 | 
163 |     for proj in projects_to_inference:
164 |         print(f"Projects to inference: {projects_to_inference}")
165 |         run_ghidra_headless_on_project(pathlib.Path(dataset_root) / proj, infer = True)
166 |         # run_ghidra_headless_on_project(pathlib.Path(dataset_root) / proj, infer = False)


--------------------------------------------------------------------------------
/scripts/config.yml:
--------------------------------------------------------------------------------
 1 | inference:
 2 |   dataset: "./TypeForge_Dataset" # the input dataset directory
 3 |   gt: "./TypeForge_GT"       # the extracted ground truth directory
 4 |   infer: "./TypeForge_Infer" # the inference output directory
 5 |   projects:             # the projects to be analyzed
 6 |     - "lighttpd"
 7 | 
 8 | ghidra:
 9 |   headless: "/home/h1k0/tools/ghidra_11.0.3_PUBLIC/support/analyzeHeadless" # the path to the ghidra headless analyzer
10 |   project_dir: "/home/h1k0/tmp" # the directory where the ghidra projects are stored
11 |   project_name: "TypeForge_headless_project" # the name of the ghidra project


--------------------------------------------------------------------------------
/scripts/judge/README.md:
--------------------------------------------------------------------------------
 1 | # LLM-Assisted Double Elimination
 2 | 
 3 | ## Setup
 4 | 1. Create `.env` file in current directory and fill as following:
 5 | 
 6 |     ```bash
 7 |     LANGSMITH_TRACING="false"
 8 |     LANGSMITH_API_KEY="[your_langsmith_apikey]"
 9 |     LANGSMITH_PROJECT="typeforge" # or any other project name
10 |     OPENAI_API_KEY="[your_openai_apikey]"
11 |     BASE_URL="[your_url]"
12 |     MODEL="gpt-4.1-mini"
13 |     ```
14 | 2. The directory containing inferred type constraints (including a series of JSON files)
15 | 
16 | ## Judge
17 | 1. Run `uv run main.py [inferred_dir]` to refinement the inferred results.
18 | 2. The JSON file with the suffix `_morph_final.json` contains the final inferred type.


--------------------------------------------------------------------------------
/scripts/judge/llm.py:
--------------------------------------------------------------------------------
  1 | import os, asyncio
  2 | from typing import Tuple, List, Literal, Optional, Any
  3 | from langchain.chat_models import init_chat_model
  4 | from langchain_core.messages import HumanMessage, SystemMessage
  5 | from langchain_core.prompts import ChatPromptTemplate
  6 | from pydantic import BaseModel, Field
  7 | import random
  8 | 
  9 | system_template = """
 10 |     You are an experienced reverse engineering expert.
 11 |     Please assess the readability of each pair of the following decompiled code snippets, where differences originate from some variables being assigned different types.
 12 |     You should disregard differences in variable and type names, and instead focus on both:
 13 |     1. The syntactic clarity of the code, and
 14 |     2. The logical rationality of its contextual semantics.
 15 | 
 16 |     Please return 0 if decompiled_code_0 has better readability, or 1 if decompiled_code_1 has better readability.
 17 | """
 18 | 
 19 | prompt_template = ChatPromptTemplate.from_messages(
 20 |     [
 21 |         ("system", system_template), 
 22 |         ("user", "decompiled_code_0:\n{code1}\n\ndecompiled_code_1:\n{code2}\n")
 23 |     ]
 24 | )
 25 | 
 26 | class ReadabilityJudgment(BaseModel):
 27 |     choice: Literal[0, 1] = Field(
 28 |         description = "0 if decompiled_code_0 has better readability, 1 if decompiled_code_1 has better readability."
 29 |     )
 30 | 
 31 | async def judge_code_pair(code_pair: Tuple[str, str], max_retries: int = 3, timeout: float = 5.0) -> int:
 32 |     """
 33 |     Judge a pair of code snippets for readability.
 34 |     
 35 |     Args:
 36 |         code_pair: A tuple of two code snippets to compare
 37 |         max_retries: Maximum number of retries when timeout occurs
 38 |         timeout: Timeout in seconds for each attempt
 39 |         
 40 |     Returns:
 41 |         0 if the first code is more readable, 1 if the second is more readable
 42 |     """
 43 |     llm = None
 44 |     prompt = prompt_template.invoke({
 45 |         "code1": code_pair[0],
 46 |         "code2": code_pair[1]
 47 |     })
 48 |     
 49 |     retries = 0
 50 |     while retries < max_retries:
 51 |         try:
 52 |             # Use try/except to handle potential import errors
 53 |             try:
 54 |                 if llm is None:  # Only initialize if not already initialized
 55 |                     llm = init_chat_model(
 56 |                         model=os.environ.get("MODEL"),
 57 |                         temperature=0.4,
 58 |                         base_url=os.environ.get("BASE_URL"),
 59 |                     )
 60 |             except (ImportError, AttributeError) as e:
 61 |                 print(f"Failed to initialize chat model: {e}")
 62 |                 return random.choice([0, 1])
 63 |                 
 64 |             structured_llm = llm.with_structured_output(ReadabilityJudgment)
 65 |             try:
 66 |                 result = await asyncio.wait_for(structured_llm.ainvoke(prompt), timeout=timeout)
 67 |                 print(f"Judge result: {result.choice}")
 68 |                 return result.choice
 69 |             except asyncio.TimeoutError:
 70 |                 retries += 1
 71 |                 print(f"Timeout occurred, attempt {retries}/{max_retries}")
 72 |                 if retries == max_retries:
 73 |                     print("Max retries reached, returning random choice")
 74 |                     return random.choice([0, 1])
 75 |                 continue
 76 |                 
 77 |         except Exception as e:
 78 |             print(f"Exception occurred in judge_code_pair: {e}")
 79 |             # Return a random choice in case of error instead of crashing the entire process
 80 |             return random.choice([0, 1])
 81 |         finally:
 82 |             # Ensure resources are cleaned up
 83 |             if llm and hasattr(llm, 'aclose') and callable(llm.aclose):
 84 |                 try:
 85 |                     await llm.aclose()
 86 |                 except Exception as e:
 87 |                     print(f"Error closing LLM: {e}")
 88 |             llm = None  # Reset llm for next retry
 89 | 
 90 | async def judge_readability(decompiled_code_pairs: List[Tuple[str, str]]) -> List[int]:
 91 |     """
 92 |     Judge readability of decompiled code pairs concurrently.
 93 |     
 94 |     Args:
 95 |         decompiled_code_pairs: List of tuples of decompiled code pairs
 96 |         
 97 |     Returns:
 98 |         List of judgments (0 or 1) for each pair
 99 |     """
100 |     if not decompiled_code_pairs:
101 |         print("No code pairs to judge")
102 |         return []
103 | 
104 |     # If the number of code pairs is too large, we need to sample them.
105 |     original_len = len(decompiled_code_pairs)
106 |     if len(decompiled_code_pairs) > 10:
107 |         decompiled_code_pairs = random.sample(decompiled_code_pairs, 10)
108 |         print(f"Randomly sample {len(decompiled_code_pairs)} code pairs from {original_len}")
109 | 
110 |     print(f"Judging {len(decompiled_code_pairs)} code pairs")
111 |     
112 |     tasks = []
113 |     # Create task for each code pair
114 |     for i, code_pair in enumerate(decompiled_code_pairs):
115 |         tasks.append(judge_code_pair(code_pair))
116 | 
117 |     # Process all pairs concurrently with proper error handling
118 |     try:
119 |         results = await asyncio.gather(*tasks, return_exceptions=True)
120 |         # Handle any exceptions in results
121 |         final_results = []
122 |         for i, result in enumerate(results):
123 |             if isinstance(result, Exception):
124 |                 print(f"Error in pair {i}: {result}")
125 |                 final_results.append(random.choice([0, 1]))
126 |             else:
127 |                 final_results.append(result)
128 |         return final_results
129 |     except Exception as e:
130 |         print(f"Error in judge_readability: {e}")
131 |         # Return fallback results if needed
132 |         return [random.choice([0, 1]) for _ in range(len(decompiled_code_pairs))]
133 | 
134 | if __name__ == "__main__":
135 |     pass


--------------------------------------------------------------------------------
/scripts/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiofiles==24.1.0
 2 | annotated-types==0.7.0
 3 | anyio==4.9.0
 4 | certifi==2025.1.31
 5 | charset-normalizer==3.4.1
 6 | distro==1.9.0
 7 | dotenv==0.9.9
 8 | greenlet==3.2.0
 9 | h11==0.14.0
10 | httpcore==1.0.8
11 | httpx==0.28.1
12 | idna==3.10
13 | iniconfig==2.1.0
14 | jiter==0.9.0
15 | jsonpatch==1.33
16 | jsonpointer==3.0.0
17 | langchain==0.3.23
18 | langchain-core==0.3.54
19 | langchain-openai==0.3.14
20 | langchain-text-splitters==0.3.8
21 | langsmith==0.3.32
22 | networkx==3.4.2
23 | openai==1.75.0
24 | orjson==3.10.16
25 | packaging==24.2
26 | pluggy==1.5.0
27 | pydantic==2.11.3
28 | pydantic-core==2.33.1
29 | pydot==4.0.0
30 | pyparsing==3.2.3
31 | pytest==8.3.5
32 | python-dotenv==1.1.0
33 | pyyaml==6.0.2
34 | regex==2024.11.6
35 | requests==2.32.3
36 | requests-toolbelt==1.0.0
37 | sniffio==1.3.1
38 | sqlalchemy==2.0.40
39 | tenacity==9.1.2
40 | tiktoken==0.9.0
41 | tqdm==4.67.1
42 | typing-extensions==4.13.2
43 | typing-inspection==0.4.0
44 | urllib3==2.4.0
45 | zstandard==0.23.0
46 | 


--------------------------------------------------------------------------------
/src/main/java/typeforge/base/dataflow/AccessPoints.java:
--------------------------------------------------------------------------------
  1 | package typeforge.base.dataflow;
  2 | 
  3 | import typeforge.base.dataflow.expression.NMAE;
  4 | import typeforge.utils.DataTypeHelper;
  5 | import typeforge.utils.Logging;
  6 | import ghidra.program.model.data.*;
  7 | import ghidra.program.model.listing.Function;
  8 | import ghidra.program.model.pcode.PcodeOp;
  9 | 
 10 | import java.util.HashSet;
 11 | import java.util.Map;
 12 | import java.util.HashMap;
 13 | import java.util.Objects;
 14 | import java.util.Set;
 15 | 
 16 | public class AccessPoints {
 17 | 
 18 |     public enum AccessType {
 19 |         LOAD,
 20 |         STORE,
 21 |         ARGUMENT,
 22 |         RETURN_VALUE,
 23 |         INDIRECT
 24 |     }
 25 | 
 26 |     /**
 27 |      * AP records an access point of a symbolExpr
 28 |      * access type can be load, store or ...
 29 |      * an access point always associates with a TypeDescriptor.
 30 |      */
 31 |     public static class AP {
 32 |         public final PcodeOp pcodeOp;
 33 |         public final Function func;
 34 |         public DataType dataType;
 35 | 
 36 |         /** accessType: including:
 37 |          * 0: load
 38 |          * 1: store
 39 |          * 2: argument
 40 |          * 3. RETURN_VALUE
 41 |          */
 42 |         public AccessType accessType;
 43 | 
 44 |         public AP(PcodeOp pcodeOp, DataType type, AccessType accessType, Function func) {
 45 |             this.pcodeOp = pcodeOp;
 46 |             if (accessType != AccessType.ARGUMENT) {
 47 |                 assert type != null;
 48 |                 this.dataType = type;
 49 |             }
 50 |             else {
 51 |                 this.dataType = null;
 52 |             }
 53 |             this.accessType = accessType;
 54 |             this.func = func;
 55 |         }
 56 | 
 57 |         @Override
 58 |         public int hashCode() {
 59 |             return Objects.hash(pcodeOp, accessType);
 60 |         }
 61 | 
 62 |         @Override
 63 |         public boolean equals(Object obj) {
 64 |             if (obj instanceof AP other) {
 65 |                 if (accessType != AccessType.ARGUMENT) {
 66 |                     if (!pcodeOp.equals(other.pcodeOp)) return false;
 67 |                     return accessType == other.accessType;
 68 |                 } else {
 69 |                     return pcodeOp.equals(other.pcodeOp) && accessType == other.accessType;
 70 |                 }
 71 |             }
 72 |             return false;
 73 |         }
 74 |     }
 75 | 
 76 |     public static class APSet {
 77 |         public final Set<AP> apSet;
 78 |         public boolean isSameSizeType = true;
 79 |         public int maxDTSize = -1;
 80 |         public int minDTSize = -1;
 81 |         public int DTSize = -1;
 82 |         public DataType mostAccessedDT = null;
 83 |         public Set<DataType> allDTs = new HashSet<>();
 84 | 
 85 |         public APSet() {
 86 |             this.apSet = new HashSet<>();
 87 |         }
 88 | 
 89 |         public APSet(APSet other) {
 90 |             this.apSet = new HashSet<>(other.apSet);
 91 |             this.isSameSizeType = other.isSameSizeType;
 92 |             this.maxDTSize = other.maxDTSize;
 93 |             this.minDTSize = other.minDTSize;
 94 |             this.DTSize = other.DTSize;
 95 |             this.mostAccessedDT = other.mostAccessedDT;
 96 |             this.allDTs = new HashSet<>(other.allDTs);
 97 |         }
 98 | 
 99 |         public void addAll(Set<AP> apSet) {
100 |             this.apSet.addAll(apSet);
101 |         }
102 | 
103 |         public boolean addAP(AP ap) {
104 |             return apSet.add(ap);
105 |         }
106 | 
107 |         public Set<AP> getApSet() {
108 |             return apSet;
109 |         }
110 | 
111 |         public int getAPCount() {
112 |             return apSet.size();
113 |         }
114 | 
115 |         public Map<DataType, Integer> getTypeFreq() {
116 |             Map<DataType, Integer> typeFreq = new HashMap<>();
117 |             for (var ap: apSet) {
118 |                 typeFreq.putIfAbsent(ap.dataType, 0);
119 |                 typeFreq.put(ap.dataType, typeFreq.get(ap.dataType) + 1);
120 |             }
121 |             return typeFreq;
122 |         }
123 | 
124 |         public void postHandle() {
125 |             /* Avoid using undefined data type */
126 |             for (var ap: apSet) {
127 |                 if (ap.dataType instanceof Undefined || ap.dataType instanceof DefaultDataType) {
128 |                     ap.dataType = DataTypeHelper.getDataTypeInSize(ap.dataType.getLength());
129 |                 } else if (ap.dataType instanceof Pointer && ((Pointer) ap.dataType).getDataType() instanceof Undefined) {
130 |                     var dt = DataTypeHelper.getDataTypeInSize(ap.dataType.getLength());
131 |                     ap.dataType = DataTypeHelper.getPointerDT(dt, 1);
132 |                 }
133 |             }
134 | 
135 |             isSameSizeType = AccessPoints.ifAPSetHoldsSameSizeType(apSet);
136 |             if (isSameSizeType) {
137 |                 DTSize = AccessPoints.getDataTypeSize(apSet);
138 |                 maxDTSize = DTSize;
139 |                 minDTSize = DTSize;
140 |             } else {
141 |                 maxDTSize = AccessPoints.getMaxSizeInAPSet(apSet);
142 |                 minDTSize = AccessPoints.getMinSizeInAPSet(apSet);
143 |             }
144 | 
145 |             mostAccessedDT = AccessPoints.getMostAccessedDT(apSet);
146 |             allDTs = AccessPoints.getDataTypes(apSet);
147 |         }
148 |     }
149 | 
150 | 
151 |     /**
152 |      * Each SymbolExpr in function may be accessed by multiple PcodeOps with different types.
153 |      * So we need to record all the access points of each SymbolExpr.
154 |      */
155 | 
156 |     /** Expressions in memAccessMap: (param + 1) means there is a load/store into (param + 1), loaded value can be represented as *(param + 1) */
157 |     private final Map<NMAE, Set<AP>> fieldExprToAccessMap;
158 | 
159 |     public AccessPoints() {
160 |         fieldExprToAccessMap = new HashMap<>();
161 |     }
162 | 
163 |     public void addFieldAccessPoint(NMAE symExpr, PcodeOp op, DataType type, AccessType accessType, Function func) {
164 |         fieldExprToAccessMap.putIfAbsent(symExpr, new HashSet<>());
165 |         fieldExprToAccessMap.get(symExpr).add(new AP(op, type, accessType, func));
166 |         Logging.trace("AccessPoints", String.format("Add Field Access %s for [%s] with type [%s]", accessType, symExpr, type.getName()));
167 |     }
168 | 
169 |     public Set<AP> getFieldAccessPoints(NMAE symExpr) {
170 |         return fieldExprToAccessMap.get(symExpr);
171 |     }
172 | 
173 |     public static boolean ifAPSetHoldsSameSizeType(Set<AccessPoints.AP> apSet) {
174 |         if (apSet.isEmpty()) {
175 |             return false;
176 |         }
177 |         var firstAP = apSet.iterator().next();
178 |         var firstDT = firstAP.dataType;
179 |         for (var ap : apSet) {
180 |             if (!(firstDT.getLength() == ap.dataType.getLength())) {
181 |                 return false;
182 |             }
183 |         }
184 |         return true;
185 |     }
186 | 
187 |     public static int getMaxSizeInAPSet(Set<AccessPoints.AP> apSet) {
188 |         if (apSet.isEmpty()) {
189 |             return 0;
190 |         }
191 |         var maxSize = 0;
192 |         for (var ap : apSet) {
193 |             if (ap.dataType.getLength() > maxSize) {
194 |                 maxSize = ap.dataType.getLength();
195 |             }
196 |         }
197 |         return maxSize;
198 |     }
199 | 
200 |     public static int getMinSizeInAPSet(Set<AccessPoints.AP> apSet) {
201 |         if (apSet.isEmpty()) {
202 |             return 0;
203 |         }
204 |         var minSize = Integer.MAX_VALUE;
205 |         for (var ap : apSet) {
206 |             if (ap.dataType.getLength() < minSize) {
207 |                 minSize = ap.dataType.getLength();
208 |             }
209 |         }
210 |         return minSize;
211 |     }
212 | 
213 |     public static DataType getMostAccessedDT(Set<AccessPoints.AP> apSet) {
214 |         Map<DataType, Integer> apCount = new HashMap<>();
215 |         apSet.forEach(ap -> {
216 |             apCount.putIfAbsent(ap.dataType, 0);
217 |             apCount.put(ap.dataType, apCount.get(ap.dataType) + 1);
218 |         });
219 | 
220 |         /* Find DataType with Max access count */
221 |         var maxCount = 0;
222 |         DataType maxDT = null;
223 |         for (var entry: apCount.entrySet()) {
224 |             if (entry.getValue() > maxCount) {
225 |                 maxCount = entry.getValue();
226 |                 maxDT = entry.getKey();
227 |             }
228 |         }
229 |         return maxDT;
230 |     }
231 | 
232 |     public static Set<DataType> getDataTypes(Set<AccessPoints.AP> apSet) {
233 |         Set<DataType> dataTypes = new HashSet<>();
234 |         for (var ap: apSet) {
235 |             dataTypes.add(ap.dataType);
236 |         }
237 |         return dataTypes;
238 |     }
239 | 
240 |     public static int getDataTypeSize(Set<AccessPoints.AP> apSet) {
241 |         if (apSet.isEmpty()) {
242 |             return 0;
243 |         }
244 |         return apSet.iterator().next().dataType.getLength();
245 |     }
246 | }
247 | 
248 | 


--------------------------------------------------------------------------------
/src/main/java/typeforge/base/dataflow/ConflictGraph.java:
--------------------------------------------------------------------------------
  1 | package typeforge.base.dataflow;
  2 | 
  3 | import org.jgrapht.Graph;
  4 | import org.jgrapht.graph.SimpleGraph;
  5 | import typeforge.utils.Logging;
  6 | 
  7 | import java.util.Comparator;
  8 | import java.util.NoSuchElementException;
  9 | import java.util.Set;
 10 | 
 11 | public class ConflictGraph<T> {
 12 |     // Edge type enum
 13 |     public enum EdgeType {
 14 |         INTERSEC,
 15 |         NOINTERSEC
 16 |     }
 17 | 
 18 |     // Custom edge class
 19 |     public static class ConflictEdge {
 20 |         private final EdgeType type;
 21 | 
 22 |         public ConflictEdge(EdgeType type) {
 23 |             this.type = type;
 24 |         }
 25 | 
 26 |         public EdgeType getType() {
 27 |             return type;
 28 |         }
 29 | 
 30 |         @Override
 31 |         public String toString() {
 32 |             return "ConflictEdge[" + type + "]";
 33 |         }
 34 |     }
 35 | 
 36 |     private final Graph<T, ConflictEdge> graph;
 37 | 
 38 |     public ConflictGraph() {
 39 |         // Create an undirected graph
 40 |         this.graph = new SimpleGraph<>(ConflictEdge.class);
 41 |     }
 42 | 
 43 |     // Add a vertex to the graph
 44 |     public boolean addVertex(T vertex) {
 45 |         return graph.addVertex(vertex);
 46 |     }
 47 | 
 48 |     // Add an edge of type INTERSEC between two vertices
 49 |     public void addIntersecEdge(T source, T target) {
 50 |         addVertex(source);
 51 |         addVertex(target);
 52 |         graph.addEdge(source, target, new ConflictEdge(EdgeType.INTERSEC));
 53 |         Logging.debug("ConflictGraph", String.format("Add Intersection Conflict Graph edge: %s ---%s---> %s", source, EdgeType.INTERSEC, target));
 54 |     }
 55 | 
 56 |     // Add an edge of type NOINTERSEC between two vertices
 57 |     public void addNoIntersecEdge(T source, T target) {
 58 |         addVertex(source);
 59 |         addVertex(target);
 60 |         graph.addEdge(source, target, new ConflictEdge(EdgeType.NOINTERSEC));
 61 |         Logging.debug("ConflictGraph", String.format("Add No Intersection Conflict Graph edge: %s ---%s---> %s", source, EdgeType.NOINTERSEC, target));
 62 |     }
 63 | 
 64 |     // Get all vertices
 65 |     public Set<T> getVertices() {
 66 |         return graph.vertexSet();
 67 |     }
 68 | 
 69 |     // Get all edges
 70 |     public Set<ConflictEdge> getEdges() {
 71 |         return graph.edgeSet();
 72 |     }
 73 | 
 74 |     // Find the node with the most connections (highest degree)
 75 |     public T findNodeWithMostNoIntersecConnections() {
 76 |         Set<T> vertices = graph.vertexSet();
 77 |         return vertices.stream()
 78 |                 .max(Comparator.comparingInt(vertex -> {
 79 |                     // Count only NOINTERSEC edges for this vertex
 80 |                     return (int) graph.edgesOf(vertex).stream()
 81 |                             .filter(edge -> edge.getType() == EdgeType.NOINTERSEC)
 82 |                             .count();
 83 |                 }))
 84 |                 .orElseThrow(() -> new NoSuchElementException("No vertex found with NOINTERSEC connections"));
 85 |     }
 86 | 
 87 |     public boolean hasIntersecConnections() {
 88 |         return graph.edgeSet().stream()
 89 |                 .anyMatch(edge -> edge.getType() == EdgeType.INTERSEC);
 90 |     }
 91 | 
 92 |     public boolean hasNoIntersecConnections() {
 93 |         return graph.edgeSet().stream()
 94 |                 .anyMatch(edge -> edge.getType() == EdgeType.NOINTERSEC);
 95 |     }
 96 | 
 97 |     public void removeAllNoIntersecEdgesOfNode(T vertex) {
 98 |         // Create a copy to avoid concurrent modification
 99 |         Set<ConflictEdge> edgesToRemove = graph.edgesOf(vertex).stream()
100 |                 .filter(edge -> edge.getType() == EdgeType.NOINTERSEC)
101 |                 .collect(java.util.stream.Collectors.toSet());
102 | 
103 |         // Remove each edge
104 |         for (ConflictEdge edge : edgesToRemove) {
105 |             graph.removeEdge(edge);
106 |         }
107 |     }
108 | 
109 |     // Get the number of edges for a specific vertex
110 |     public int getConnectionCount(T vertex) {
111 |         return graph.degreeOf(vertex);
112 |     }
113 | 
114 |     // Get the underlying graph
115 |     public Graph<T, ConflictEdge> getGraph() {
116 |         return graph;
117 |     }
118 | 
119 |     // Get edges of a specific type
120 |     public Set<ConflictEdge> getEdgesOfType(EdgeType type) {
121 |         return graph.edgeSet().stream()
122 |                 .filter(edge -> edge.getType() == type)
123 |                 .collect(java.util.stream.Collectors.toSet());
124 |     }
125 | 
126 |     public int getEdgesCountOfType(EdgeType type) {
127 |         return (int) graph.edgeSet().stream()
128 |                 .filter(edge -> edge.getType() == type)
129 |                 .count();
130 |     }
131 | 
132 |     @Override
133 |     public String toString() {
134 |         return "ConflictGraph{vertices=" + graph.vertexSet().size() +
135 |                 ", edges=" + graph.edgeSet().size() + "}";
136 |     }
137 | }


--------------------------------------------------------------------------------
/src/main/java/typeforge/base/dataflow/KSet.java:
--------------------------------------------------------------------------------
 1 | package typeforge.base.dataflow;
 2 | 
 3 | import typeforge.utils.Logging;
 4 | 
 5 | import java.util.HashSet;
 6 | import java.util.Iterator;
 7 | 
 8 | /**
 9 |  * KSet is a set with a maximum size.
10 |  * If the set is full, then the add operation will return false.
11 |  * @param <E> the element type
12 |  */
13 | public class KSet<E> implements Iterable<E> {
14 |     private final HashSet<E> set;
15 |     private final int maxSize;
16 | 
17 |     public KSet(int maxSize) {
18 |         this.maxSize = maxSize;
19 |         this.set = new HashSet<>();
20 |     }
21 | 
22 |     public boolean add(E element) {
23 |         if (set.size() >= maxSize) {
24 |             Logging.warn("KSet", "Set is full, cannot add element: " + element);
25 |             return false;
26 |         }
27 |         return set.add(element);
28 |     }
29 | 
30 |     public boolean isEmpty() {
31 |         return set.isEmpty();
32 |     }
33 | 
34 |     public boolean contains(E element) {
35 |         return set.contains(element);
36 |     }
37 | 
38 |     public void merge(KSet<E> other) {
39 |         for (E element : other.set) {
40 |             if (this.set.size() >= this.maxSize) {
41 |                 break;
42 |             }
43 |             this.add(element);
44 |         }
45 |     }
46 | 
47 |     @Override
48 |     public String toString() {
49 |         return set.toString();
50 |     }
51 | 
52 |     @Override
53 |     public Iterator<E> iterator() {
54 |         return set.iterator();
55 |     }
56 | 
57 |     public void clear() {
58 |         set.clear();
59 |     }
60 | 
61 | }


--------------------------------------------------------------------------------
/src/main/java/typeforge/base/dataflow/Layout.java:
--------------------------------------------------------------------------------
 1 | package typeforge.base.dataflow;
 2 | 
 3 | import typeforge.base.dataflow.constraint.Skeleton;
 4 | 
 5 | import java.util.*;
 6 | 
 7 | /**
 8 |  * Layout class is used to describe the layout of a composite data type.
 9 |  * Actually, layout is just a List of intervals, where each interval is a pair of offset and size.
10 |  */
11 | public class Layout {
12 | 
13 |     public static class Interval {
14 |         public long offset;
15 |         public Set<Integer> sizes;
16 | 
17 |         public Interval(long offset, Set<Integer> sizes) {
18 |             this.offset = offset;
19 |             this.sizes = sizes;
20 |         }
21 | 
22 |         @Override
23 |         public boolean equals(Object o) {
24 |             if (this == o) return true;
25 |             if (o == null || getClass() != o.getClass()) return false;
26 |             Interval interval = (Interval) o;
27 |             return offset == interval.offset && sizes.equals(interval.sizes);
28 |         }
29 | 
30 |         @Override
31 |         public int hashCode() {
32 |             return Objects.hash(offset, sizes);
33 |         }
34 |     }
35 | 
36 |     public List<Interval> intervals;
37 | 
38 |     public Layout(Skeleton constraint) {
39 |         intervals = new ArrayList<>();
40 |         constraint.fieldAccess.forEach((offset, aps) -> {
41 |             Set<Integer> sizes = new HashSet<>();
42 |             for (var ap: aps.getApSet()) {
43 |                 sizes.add(ap.dataType.getLength());
44 |             }
45 |             intervals.add(new Interval(offset, sizes));
46 |         });
47 |     }
48 | 
49 |     public Layout(List<Interval> intervals) {
50 |         this.intervals = intervals;
51 |     }
52 | 
53 |     @Override
54 |     public int hashCode() {
55 |         return Objects.hash(
56 |                 intervals.stream().map(interval -> interval.offset).toArray()
57 |         );
58 |     }
59 | 
60 |     @Override
61 |     public boolean equals(Object o) {
62 |         if (this == o) return true;
63 |         if (o == null || getClass() != o.getClass()) return false;
64 |         Layout layout = (Layout) o;
65 | 
66 |         if (this.intervals.size() != layout.intervals.size()) return false;
67 |         for (int i = 0; i < this.intervals.size(); i++) {
68 |             if (this.intervals.get(i).offset != layout.intervals.get(i).offset) {
69 |                 return false;
70 |             }
71 |         }
72 |         return true;
73 |     }
74 | }
75 | 


--------------------------------------------------------------------------------
/src/main/java/typeforge/base/dataflow/Range.java:
--------------------------------------------------------------------------------
 1 | package typeforge.base.dataflow;
 2 | 
 3 | import java.util.Objects;
 4 | import java.util.Set;
 5 | 
 6 | public class Range {
 7 |     private final long start;
 8 |     private final long end;
 9 | 
10 |     public Range(Long start, Long end) {
11 |         this.start = start;
12 |         this.end = end;
13 |     }
14 | 
15 |     public long getStart() {
16 |         return start;
17 |     }
18 | 
19 |     public long getEnd() {
20 |         return end;
21 |     }
22 | 
23 |     static public boolean ifRangeInRanges(Range range, Set<Range> existRanges) {
24 |         for (var r: existRanges) {
25 |             if (range.getStart() >= r.getStart() && range.getEnd() <= r.getEnd()) {
26 |                 return true;
27 |             }
28 |         }
29 |         return false;
30 |     }
31 | 
32 |     @Override
33 |     public int hashCode() {
34 |         return Objects.hash(start, end);
35 |     }
36 | 
37 |     @Override
38 |     public boolean equals(Object o) {
39 |         if (this == o) return true;
40 |         if (o == null || getClass() != o.getClass()) return false;
41 |         Range range = (Range) o;
42 |         return Objects.equals(start, range.start) &&
43 |                 Objects.equals(end, range.end);
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/src/main/java/typeforge/base/dataflow/TFG/TypeFlowGraph.java:
--------------------------------------------------------------------------------
  1 | package typeforge.base.dataflow.TFG;
  2 | 
  3 | import org.jgrapht.Graph;
  4 | import org.jgrapht.alg.connectivity.ConnectivityInspector;
  5 | import org.jgrapht.alg.connectivity.KosarajuStrongConnectivityInspector;
  6 | import org.jgrapht.alg.interfaces.StrongConnectivityAlgorithm;
  7 | import org.jgrapht.graph.DefaultDirectedGraph;
  8 | import org.jgrapht.graph.DefaultEdge;
  9 | import org.jgrapht.Graphs;
 10 | 
 11 | import typeforge.base.dataflow.constraint.Skeleton;
 12 | import typeforge.utils.Logging;
 13 | 
 14 | import java.util.*;
 15 | 
 16 | public class TypeFlowGraph<T> {
 17 |     public enum EdgeType {
 18 |         CALL,
 19 |         RETURN,
 20 |         DATAFLOW,
 21 |         ALIAS,
 22 |     }
 23 | 
 24 |     public static class TypeFlowEdge extends DefaultEdge {
 25 |         private final EdgeType type;
 26 | 
 27 |         public TypeFlowEdge(EdgeType type) {
 28 |             this.type = type;
 29 |         }
 30 | 
 31 |         public EdgeType getType() {
 32 |             return type;
 33 |         }
 34 | 
 35 |         @Override
 36 |         public String toString() {
 37 |             var source = this.getSource();
 38 |             var target = this.getTarget();
 39 |             return String.format("%s ---%s---> %s", source, type, target);
 40 |         }
 41 |     }
 42 | 
 43 |     private final Graph<T, TypeFlowEdge> graph;
 44 |     private final UUID uuid;
 45 |     private final String shortUUID;
 46 | 
 47 |     public TypeFlowPathManager<T> pathManager;
 48 |     public Skeleton finalSkeleton;
 49 | 
 50 |     public TypeFlowGraph() {
 51 |         graph = new DefaultDirectedGraph<>(TypeFlowEdge.class);
 52 |         uuid = UUID.randomUUID();
 53 |         shortUUID = uuid.toString().substring(0, 8);
 54 | 
 55 |         pathManager = new TypeFlowPathManager<T>(this);
 56 | 
 57 |         Logging.trace("TypeFlowGraph", String.format("Create TypeFlowGraph_%s", shortUUID));
 58 |     }
 59 | 
 60 |     public String getShortUUID() {
 61 |         return shortUUID;
 62 |     }
 63 | 
 64 |     public void addEdge(T src, T dst, EdgeType edgeType) {
 65 |         graph.addVertex(src);
 66 |         graph.addVertex(dst);
 67 |         graph.addEdge(src, dst, new TypeFlowEdge(edgeType));
 68 |         Logging.trace("TypeFlowGraph", String.format("TypeFlowGraph_%s Add edge: %s ---%s---> %s", shortUUID, src, edgeType, dst));
 69 |     }
 70 | 
 71 |     public void removeEdge(T src, T dst) {
 72 |         graph.removeEdge(src, dst);
 73 |         Logging.debug("TypeFlowGraph", String.format("TypeFlowGraph_%s Remove edge: %s ---> %s", shortUUID, src, dst));
 74 |     }
 75 | 
 76 |     public void removeNode(T node) {
 77 |         graph.removeVertex(node);
 78 |         Logging.trace("TypeFlowGraph", String.format("TypeFlowGraph_%s Remove node: %s", shortUUID, node));
 79 |     }
 80 | 
 81 |     /**
 82 |      * If a graph has individual single nodes, it is invalid
 83 |      */
 84 |     public boolean isValid() {
 85 |         // If there is only one node, it is valid
 86 |         if (graph.vertexSet().size() == 1) {
 87 |             return true;
 88 |         } else if (graph.vertexSet().isEmpty()) {
 89 |             Logging.error("TypeFlowGraph",
 90 |                     String.format("Unexpected empty graph: %s", this));
 91 |             return false;
 92 |         } else {
 93 |             boolean isValid = true;
 94 |             for (var node: getNodes()) {
 95 |                 if (graph.inDegreeOf(node) == 0 && graph.outDegreeOf(node) == 0) {
 96 |                     isValid = false;
 97 |                     break;
 98 |                 }
 99 |             }
100 | 
101 |             return isValid;
102 |         }
103 |     }
104 | 
105 |     public int getNumNodes() {
106 |         return graph.vertexSet().size();
107 |     }
108 | 
109 |     public Set<T> getNodes() {
110 |         return graph.vertexSet();
111 |     }
112 | 
113 |     public Set<TypeFlowEdge> getEdges() {
114 |         return graph.edgeSet();
115 |     }
116 | 
117 |     public Graph<T, TypeFlowEdge> getGraph() {
118 |         return graph;
119 |     }
120 | 
121 |     public Set<T> getForwardNeighbors(T node) {
122 |         var result = new HashSet<T>();
123 |         for (var edge: graph.outgoingEdgesOf(node)) {
124 |             var target = graph.getEdgeTarget(edge);
125 |             result.add(target);
126 |         }
127 |         return result;
128 |     }
129 | 
130 |     public void mergeGraph(TypeFlowGraph<T> other) {
131 |         for (T vertex: other.getNodes()) {
132 |             graph.addVertex(vertex);
133 |         }
134 | 
135 |         Set<TypeFlowEdge> edges = other.getGraph().edgeSet();
136 |         for (TypeFlowEdge edge: edges) {
137 |             T src = other.getGraph().getEdgeSource(edge);
138 |             T dst = other.getGraph().getEdgeTarget(edge);
139 |             var EdgeType = edge.getType();
140 | 
141 |             TypeFlowEdge existingEdge = graph.getEdge(src, dst);
142 |             if (existingEdge == null) {
143 |                 graph.addEdge(src, dst, new TypeFlowEdge(EdgeType));
144 |             } else if (existingEdge.getType() != EdgeType) {
145 |                 Logging.warn("TypeFlowGraph", String.format("%s Merge conflict: %s ---> %s", other, src, dst));
146 |             } else {
147 |                 continue;
148 |             }
149 |         }
150 | 
151 |         Logging.trace("TypeFlowGraph", String.format("TypeFlowGraph_%s Merge with %s", shortUUID, other));
152 |     }
153 | 
154 | 
155 |     public List<Set<T>> getConnectedComponents() {
156 |         ConnectivityInspector<T, TypeFlowEdge> inspector = new ConnectivityInspector<>(graph);
157 |         return inspector.connectedSets();
158 |     }
159 | 
160 |     public StrongConnectivityAlgorithm<T, TypeFlowEdge> getStrongConnectedComponentsAlg() {
161 |         return new KosarajuStrongConnectivityInspector<>(graph);
162 |     }
163 | 
164 |     public boolean rebuildPathManager() {
165 |         if (getNumNodes() <= 1) {
166 |             return false;
167 |         }
168 |         this.pathManager = new TypeFlowPathManager<T>(this);
169 |         this.pathManager.initialize();
170 |         return true;
171 |     }
172 | 
173 |     public String toGraphviz() {
174 |         StringBuilder builder = new StringBuilder();
175 |         builder.append("digraph TypeFlowGraph_").append(shortUUID).append(" {\n");
176 |         for (TypeFlowEdge edge : graph.edgeSet()) {
177 |             T src = graph.getEdgeSource(edge);
178 |             T dst = graph.getEdgeTarget(edge);
179 |             builder.append("  \"").append(src).append("\" -> \"").append(dst)
180 |                     .append("\" [label=\"").append(edge.getType()).append("\"];\n");
181 |         }
182 |         builder.append("}");
183 |         return builder.toString();
184 |     }
185 | 
186 |     /**
187 |      * Write the partial TFG for a given NMAE node into one graphviz file.
188 |      * @param node The node to dump the TFG for
189 |      * @param maxDepth Max graph edge depth around the node
190 |      */
191 |     public String toPartialGraphviz(T node, int maxDepth) {
192 |         if (!graph.containsVertex(node)) {
193 |             return "digraph Empty {\n}";
194 |         }
195 | 
196 |         Set<TypeFlowEdge> includedEdges = new HashSet<>();
197 | 
198 |         // BFS to find nodes within maxDepth
199 |         Map<T, Integer> distanceMap = new HashMap<>();
200 |         Queue<T> queue = new LinkedList<>();
201 | 
202 |         // Start with the given node
203 |         queue.add(node);
204 |         distanceMap.put(node, 0);
205 | 
206 |         // Process outgoing edges (forward direction)
207 |         while (!queue.isEmpty()) {
208 |             T current = queue.poll();
209 |             int currentDistance = distanceMap.get(current);
210 | 
211 |             if (currentDistance < maxDepth) {
212 |                 // Process outgoing edges
213 |                 for (TypeFlowEdge edge : graph.outgoingEdgesOf(current)) {
214 |                     T target = graph.getEdgeTarget(edge);
215 |                     if (!distanceMap.containsKey(target) || distanceMap.get(target) > currentDistance + 1) {
216 |                         distanceMap.put(target, currentDistance + 1);
217 |                         includedEdges.add(edge);
218 |                         queue.add(target);
219 |                     } else {
220 |                         includedEdges.add(edge);
221 |                     }
222 |                 }
223 |             }
224 |         }
225 | 
226 |         // Reset for backward traversal
227 |         queue.clear();
228 |         queue.add(node);
229 |         Map<T, Integer> reverseDistanceMap = new HashMap<>();
230 |         reverseDistanceMap.put(node, 0);
231 | 
232 |         // Process incoming edges (backward direction)
233 |         while (!queue.isEmpty()) {
234 |             T current = queue.poll();
235 |             int currentDistance = reverseDistanceMap.get(current);
236 | 
237 |             if (currentDistance < maxDepth) {
238 |                 // Process incoming edges
239 |                 for (TypeFlowEdge edge : graph.incomingEdgesOf(current)) {
240 |                     T source = graph.getEdgeSource(edge);
241 |                     if (!reverseDistanceMap.containsKey(source) || reverseDistanceMap.get(source) > currentDistance + 1) {
242 |                         reverseDistanceMap.put(source, currentDistance + 1);
243 |                         includedEdges.add(edge);
244 |                         queue.add(source);
245 |                     } else {
246 |                         includedEdges.add(edge);
247 |                     }
248 |                 }
249 |             }
250 |         }
251 | 
252 |         // Generate graphviz representation
253 |         StringBuilder builder = new StringBuilder();
254 |         builder.append("digraph Partial_TypeFlowGraph_").append(shortUUID).append(" {\n");
255 | 
256 |         // Highlight the center node
257 |         builder.append("  \"").append(node).append("\" [style=filled, fillcolor=lightblue];\n");
258 | 
259 |         // Add all edges
260 |         for (TypeFlowEdge edge : includedEdges) {
261 |             T src = graph.getEdgeSource(edge);
262 |             T dst = graph.getEdgeTarget(edge);
263 |             builder.append("  \"").append(src).append("\" -> \"").append(dst)
264 |                     .append("\" [label=\"").append(edge.getType()).append("\"];\n");
265 |         }
266 | 
267 |         builder.append("}");
268 |         return builder.toString();
269 |     }
270 | 
271 |     public TypeFlowGraph<T> createCopy() {
272 |         Logging.trace("TypeFlowGraph", "Create copy of " + this);
273 |         TypeFlowGraph<T> copy = new TypeFlowGraph<>();
274 |         Graphs.addGraph(copy.graph, this.graph);
275 |         return copy;
276 |     }
277 | 
278 |     @Override
279 |     public String toString() {
280 |         return "TypeFlowGraph_" + shortUUID;
281 |     }
282 | }
283 | 


--------------------------------------------------------------------------------
/src/main/java/typeforge/base/dataflow/TFG/TypeFlowPath.java:
--------------------------------------------------------------------------------
  1 | package typeforge.base.dataflow.TFG;
  2 | import generic.stl.Pair;
  3 | import typeforge.base.dataflow.expression.NMAE;
  4 | import typeforge.base.dataflow.expression.NMAEManager;
  5 | import typeforge.base.dataflow.constraint.Skeleton;
  6 | import typeforge.utils.Logging;
  7 | import org.jgrapht.GraphPath;
  8 | 
  9 | import java.util.*;
 10 | 
 11 | public class TypeFlowPath<T> {
 12 |     TypeFlowGraph<T> graph;
 13 |     public final UUID uuid = UUID.randomUUID();
 14 |     public final String shortUUID = uuid.toString().substring(0, 8);
 15 |     public List<T> nodes;
 16 |     public List<TypeFlowGraph.TypeFlowEdge> edges;
 17 |     public Skeleton finalSkeletonOnPath = null;
 18 |     public boolean conflict = false;
 19 |     public TypeFlowGraph.TypeFlowEdge conflictEdge = null;
 20 |     public T start;
 21 |     public T end;
 22 |     public Set<TypeFlowGraph.TypeFlowEdge> evilEdges;
 23 | 
 24 |     /**
 25 |      * Map[SUB_PATH_LENGTH, Map[HASH_CODE, SUB_PATH_NODES]]
 26 |      */
 27 |     public Map<Integer, Map<Integer, List<T>>> subPathsOfLengthWithHash = new HashMap<>();
 28 | 
 29 |     public TypeFlowPath(TypeFlowGraph<T> graph, GraphPath<T, TypeFlowGraph.TypeFlowEdge> path) {
 30 |         this.graph = graph;
 31 |         // update nodes;
 32 |         this.nodes = path.getVertexList();
 33 |         this.edges = path.getEdgeList();
 34 | 
 35 |         this.start = nodes.get(0);
 36 |         this.end = nodes.get(nodes.size() - 1);
 37 |         this.evilEdges = new HashSet<>();
 38 |     }
 39 | 
 40 |     public TypeFlowPath(TypeFlowGraph<T> graph, List<T> nodes, List<TypeFlowGraph.TypeFlowEdge> edges) {
 41 |         this.graph = graph;
 42 |         this.nodes = nodes;
 43 |         this.edges = edges;
 44 | 
 45 |         this.start = nodes.get(0);
 46 |         this.end = nodes.get(nodes.size() - 1);
 47 |         this.evilEdges = new HashSet<>();
 48 |     }
 49 | 
 50 |     /**
 51 |      * Try Merge skeletons of each node in the path in forward direction.
 52 |      * If all merges are success without any conflict, return true and update finalSkeletonOnPath.
 53 |      * If any merge fails, return false and update evilEdges.
 54 |      * @param exprManager NMAE Manager
 55 |      * @return true if no conflict, false if conflict
 56 |      */
 57 |     public boolean tryMergeLayoutForwardOnPath(NMAEManager exprManager) {
 58 |         Logging.debug("TypeFlowPath", String.format("Try merge by path: %s", this));
 59 |         Skeleton mergedSkt = new Skeleton();
 60 |         for (var i = 0; i < nodes.size(); i++) {
 61 |             var curNode = nodes.get(i);
 62 |             NMAE curExpr = (NMAE) curNode;
 63 |             // TODO: should we also merge current node's alias?
 64 |             var curExprSkt = exprManager.getSkeleton(curExpr);
 65 |             if (curExprSkt == null) {
 66 |                 continue;
 67 |             }
 68 |             var success = mergedSkt.tryMergeLayoutStrict(curExprSkt);
 69 |             if (!success) {
 70 |                 Logging.warn("TypeFlowPath",
 71 |                         String.format("Layout Conflict when forward merging Skeletons on path for %s", curExpr));
 72 |                 Logging.warn("TypeFlowPath",
 73 |                         String.format("Merged Skeleton: %s", mergedSkt.dumpLayout(2)));
 74 |                 Logging.warn("TypeFlowPath",
 75 |                         String.format("Current Skeleton: %s", curExprSkt.dumpLayout(2)));
 76 |                 conflict = true;
 77 |                 if (i > 0) {
 78 |                     var prevNode = nodes.get(i - 1);
 79 |                     conflictEdge = this.graph.getGraph().getEdge(
 80 |                             prevNode,
 81 |                             curNode
 82 |                     );
 83 |                     Logging.warn("TypeFlowPath",
 84 |                             String.format("Marked Layout Conflict Edge: %s", conflictEdge));
 85 |                 }
 86 |                 return false;
 87 |             }
 88 |         }
 89 |         finalSkeletonOnPath = mergedSkt;
 90 |         return true;
 91 |     }
 92 | 
 93 | 
 94 |     public void findEvilEdges(int rightBoundIndex, int leftBoundIndex) {
 95 |         if (leftBoundIndex == -1) {
 96 |             Logging.warn("TypeAliasPath", "Cannot find leftBoundIndex when finding evil edges");
 97 |             evilEdges.add(edges.get(rightBoundIndex - 1));
 98 |         }
 99 |         else if (leftBoundIndex == rightBoundIndex) {
100 |             Logging.debug("TypeAliasPath", "LB == RB");
101 |             evilEdges.add(edges.get(rightBoundIndex));
102 |             evilEdges.add(edges.get(rightBoundIndex - 1));
103 |         }
104 |         else if (leftBoundIndex > rightBoundIndex) {
105 |             Logging.debug("TypeAliasPath", "LB > RB");
106 |             evilEdges.add(edges.get(leftBoundIndex));
107 |             evilEdges.add(edges.get(rightBoundIndex - 1));
108 |             for (int i = rightBoundIndex; i < leftBoundIndex; i++) {
109 |                 evilEdges.add(edges.get(i));
110 |             }
111 |         }
112 |         /* leftBoundIndex < rightBoundIndex, this is what we expect */
113 |         else {
114 |             Logging.debug("TypeAliasPath", "LB < RB");
115 |             for (int i = leftBoundIndex; i < rightBoundIndex; i++) {
116 |                 evilEdges.add(edges.get(i));
117 |             }
118 |         }
119 | 
120 |         for (var edge: evilEdges) {
121 |             Logging.debug("TypeAliasPath", String.format("Found Evil Edge: %s", edge));
122 |         }
123 |     }
124 | 
125 | 
126 |     public Set<TypeFlowGraph.TypeFlowEdge> getConnectedEdges(T node) {
127 |         var result = new HashSet<TypeFlowGraph.TypeFlowEdge>();
128 |         var nodeIdx = nodes.indexOf(node);
129 |         if (nodeIdx != -1) {
130 |             if (nodeIdx > 0) {
131 |                 result.add(edges.get(nodeIdx - 1));
132 |             }
133 |             if (nodeIdx < nodes.size() - 1) {
134 |                 result.add(edges.get(nodeIdx));
135 |             }
136 |         }
137 |         return result;
138 |     }
139 | 
140 |     public void createSubPathsOfLength(int length) {
141 |         if (length < 1) {
142 |             return;
143 |         }
144 |         for (int i = 0; i < nodes.size() - length + 1; i++) {
145 |             var subPathNodes = nodes.subList(i, i + length);
146 |             var hash = getPathsHashCode(subPathNodes);
147 |             if (!subPathsOfLengthWithHash.containsKey(length)) {
148 |                 subPathsOfLengthWithHash.put(length, new HashMap<>());
149 |             }
150 |             if (!subPathsOfLengthWithHash.get(length).containsKey(hash)) {
151 |                 subPathsOfLengthWithHash.get(length).put(hash, subPathNodes);
152 |             }
153 |         }
154 |     }
155 | 
156 |     public int getPathsHashCode(List<T> path) {
157 |         int hash = 0;
158 |         for (var t : path) {
159 |             hash = 31 * hash + t.hashCode();
160 |         }
161 |         return hash;
162 |     }
163 | 
164 |     @Override
165 |     public int hashCode() {
166 |         return edges.hashCode() + nodes.hashCode();
167 |     }
168 | 
169 |     @Override
170 |     public boolean equals(Object obj) {
171 |         if (this == obj) {
172 |             return true;
173 |         }
174 |         if (obj == null || getClass() != obj.getClass()) {
175 |             return false;
176 |         }
177 |         TypeFlowPath<?> other = (TypeFlowPath<?>) obj;
178 |         return this.hashCode() == other.hashCode();
179 |     }
180 | 
181 |     @Override
182 |     public String toString() {
183 |         StringBuilder builder = new StringBuilder();
184 |         builder.append(String.format("Path-%s: ", shortUUID));
185 |         builder.append(nodes.get(0));
186 |         for (int i = 0; i < edges.size(); i++) {
187 |             builder.append(String.format(" --- %s ---> ", edges.get(i).getType()));
188 |             if (i + 1 < nodes.size()) {
189 |                 builder.append(nodes.get(i + 1));
190 |             }
191 |         }
192 |         return builder.toString();
193 |     }
194 | }
195 | 


--------------------------------------------------------------------------------
/src/main/java/typeforge/base/dataflow/UnionFind.java:
--------------------------------------------------------------------------------
 1 | package typeforge.base.dataflow;
 2 | 
 3 | import java.util.HashMap;
 4 | import java.util.HashSet;
 5 | import java.util.Map;
 6 | import java.util.Set;
 7 | 
 8 | public class UnionFind<T> {
 9 |     private final Map<T, T> parent;
10 |     private final Map<T, Integer> rank;
11 | 
12 |     public UnionFind() {
13 |         this.parent = new HashMap<>();
14 |         this.rank = new HashMap<>();
15 |     }
16 | 
17 |     // Add a new element to the union-find structure
18 |     public void add(T element) {
19 |         if (!parent.containsKey(element)) {
20 |             parent.put(element, element);
21 |             rank.put(element, 0);
22 |         }
23 |     }
24 | 
25 |     // Find the root of the element with path compression
26 |     public T find(T element) {
27 |         if (!parent.containsKey(element)) {
28 |             throw new IllegalArgumentException("Element not found in UnionFind structure");
29 |         }
30 | 
31 |         if (!parent.get(element).equals(element)) {
32 |             parent.put(element, find(parent.get(element))); // Path compression
33 |         }
34 |         return parent.get(element);
35 |     }
36 | 
37 |     // Union two elements by rank
38 |     public void union(T element1, T element2) {
39 |         T root1 = find(element1);
40 |         T root2 = find(element2);
41 | 
42 |         if (!root1.equals(root2)) {
43 |             int rank1 = rank.get(root1);
44 |             int rank2 = rank.get(root2);
45 | 
46 |             if (rank1 > rank2) {
47 |                 parent.put(root2, root1);
48 |             } else if (rank1 < rank2) {
49 |                 parent.put(root1, root2);
50 |             } else {
51 |                 parent.put(root2, root1);
52 |                 rank.put(root1, rank1 + 1);
53 |             }
54 |         }
55 |     }
56 | 
57 |     // Check if two elements are in the same set
58 |     public boolean connected(T element1, T element2) {
59 |         return find(element1).equals(find(element2));
60 |     }
61 | 
62 |     public boolean contains(T element) {
63 |         return parent.containsKey(element);
64 |     }
65 | 
66 |     public Set<T> getCluster(T element) {
67 |         Set<T> cluster = new HashSet<>();
68 |         T root = find(element);
69 |         for (T key : parent.keySet()) {
70 |             if (find(key).equals(root)) {
71 |                 cluster.add(key);
72 |             }
73 |         }
74 |         return cluster;
75 |     }
76 | 
77 |     public Set<Set<T>> getClusters() {
78 |         Map<T, Set<T>> clusters = new HashMap<>();
79 |         for (T element : parent.keySet()) {
80 |             T root = find(element);
81 |             clusters.computeIfAbsent(root, k -> new HashSet<>()).add(element);
82 |         }
83 |         return new HashSet<>(clusters.values());
84 |     }
85 | 
86 |     public void initializeWithCluster(Set<T> cluster) {
87 |         T first = null;
88 |         for (T element : cluster) {
89 |             add(element);
90 |             if (first == null) {
91 |                 first = element;
92 |             } else {
93 |                 union(first, element);
94 |             }
95 |         }
96 |     }
97 | }
98 | 
99 | 


--------------------------------------------------------------------------------
/src/main/java/typeforge/base/dataflow/constraint/SizeSource.java:
--------------------------------------------------------------------------------
 1 | package typeforge.base.dataflow.constraint;
 2 | 
 3 | import typeforge.base.dataflow.expression.NMAE;
 4 | import typeforge.base.node.CallSite;
 5 | import typeforge.utils.Logging;
 6 | 
 7 | /**
 8 |  * Tracks the source of size information for a Skeleton.
 9 |  */
10 | public class SizeSource {
11 | 
12 |     public enum SourceType {
13 |         CALLSITE,   // Size determined from a function call
14 |         EXPRESSION   // Size determined from an expression within a function
15 |     }
16 | 
17 |     private final long size;
18 |     private final SourceType sourceType;
19 |     private final NMAE expression; // For expression sources only
20 |     private final CallSite callSite; // For call site sources only
21 | 
22 |     /**
23 |      * Creates a SizeSource from a function call
24 |      */
25 |     public SizeSource(long size, CallSite callSite) {
26 |         this.size = size;
27 |         this.sourceType = SourceType.CALLSITE;
28 |         this.callSite = callSite;
29 |         this.expression = null;
30 |     }
31 | 
32 |     /**
33 |      * Creates a SizeSource from an expression
34 |      */
35 |     public SizeSource(long size, NMAE expression) {
36 |         this.size = size;
37 |         this.sourceType = SourceType.EXPRESSION;
38 |         this.expression = expression;
39 |         this.callSite = null;
40 |     }
41 | 
42 |     public long getSize() {
43 |         return size;
44 |     }
45 | 
46 |     public SourceType getSourceType() {
47 |         return sourceType;
48 |     }
49 | 
50 |     @Override
51 |     public String toString() {
52 |         if (sourceType == SourceType.CALLSITE) {
53 |             return String.format("SizeSource{size=0x%x, callsite=%s}",
54 |                     size, callSite);
55 |         } else {
56 |             return String.format("SizeSource{size=0x%x, expr=%s}",
57 |                     size, expression);
58 |         }
59 |     }
60 | }


--------------------------------------------------------------------------------
/src/main/java/typeforge/base/dataflow/expression/ParsedExpr.java:
--------------------------------------------------------------------------------
 1 | package typeforge.base.dataflow.expression;
 2 | 
 3 | import typeforge.utils.Logging;
 4 | 
 5 | import java.util.Optional;
 6 | 
 7 | public class ParsedExpr {
 8 |     public NMAE base = null;
 9 |     public NMAE offset = null;
10 |     public NMAE index = null;
11 |     public NMAE scale = null;
12 |     public long offsetValue = 0;
13 | 
14 |     public static Optional<ParsedExpr> parseFieldAccessExpr(NMAE expr) {
15 |         ParsedExpr parsedExpr = new ParsedExpr();
16 | 
17 |         if (expr.getNestedExpr().isDereference()) {
18 |             parsedExpr.base = expr.getNestedExpr();
19 |             parsedExpr.offsetValue = 0L;
20 |         }
21 |         else if (expr.getNestedExpr().isRootSymExpr()) {
22 |             parsedExpr.base = expr.getNestedExpr();
23 |             parsedExpr.offsetValue = 0L;
24 |         }
25 |         else if (expr.getNestedExpr().isReference()) {
26 |             parsedExpr.base = expr.getNestedExpr();
27 |             parsedExpr.offsetValue = 0L;
28 |         }
29 |         else {
30 |             parsedExpr.base = expr.getNestedExpr().getBase();
31 |             parsedExpr.offset = expr.getNestedExpr().getOffset();
32 |             parsedExpr.index = expr.getNestedExpr().getIndex();
33 |             parsedExpr.scale = expr.getNestedExpr().getScale();
34 | 
35 |             if (parsedExpr.offset != null) {
36 |                 if (!parsedExpr.offset.isNormalConst()) {
37 |                     Logging.warn("ParsedExpr", String.format("Offset is not a constant: %s, Skipping...", expr));
38 |                     return Optional.empty();
39 |                 } else {
40 |                     parsedExpr.offsetValue = parsedExpr.offset.getConstant();
41 |                 }
42 |             } else {
43 |                 parsedExpr.offsetValue = 0L;
44 |             }
45 |         }
46 | 
47 |         return Optional.of(parsedExpr);
48 |     }
49 | }
50 | 


--------------------------------------------------------------------------------
/src/main/java/typeforge/base/dataflow/solver/ExternalHandler.java:
--------------------------------------------------------------------------------
  1 | package typeforge.base.dataflow.solver;
  2 | 
  3 | import typeforge.base.dataflow.expression.NMAEManager;
  4 | import typeforge.base.node.CallSite;
  5 | import typeforge.utils.Logging;
  6 | 
  7 | import java.util.HashMap;
  8 | import java.util.Map;
  9 | 
 10 | /**
 11 |  * Handler for external function calls.
 12 |  */
 13 | public class ExternalHandler {
 14 | 
 15 |     /**
 16 |      * Base handler class for external functions
 17 |      */
 18 |     public static abstract class Handler {
 19 |         /**
 20 |          * Process an external function call
 21 |          */
 22 |         public abstract void handle(CallSite callSite, IntraSolver intraSolver, NMAEManager exprManager);
 23 |     }
 24 | 
 25 |     public static class Malloc extends Handler {
 26 |         @Override
 27 |         public void handle(CallSite callSite, IntraSolver intraSolver, NMAEManager exprManager) {
 28 |             var ptrExprs = intraSolver.getOrCreateDataFlowFacts(callSite.receiver);
 29 |             for (var expr: ptrExprs) {
 30 |                 Logging.debug("ExternalHandler.Malloc",
 31 |                         String.format("Set composite of skeleton: %s to true", expr));
 32 |                 var skeleton = exprManager.getOrCreateSkeleton(expr);
 33 |                 skeleton.setComposite(true);
 34 | 
 35 |                 var mallocSize = callSite.arguments.get(0);
 36 |                 if (mallocSize.isConstant()) {
 37 |                     skeleton.setSizeFromCallSite(mallocSize.getOffset(), callSite);
 38 |                     Logging.debug("ExternalHandler.Malloc",
 39 |                             String.format("(malloc) Set size of skeleton : %s to 0x%x", expr, callSite.arguments.get(0).getOffset()));
 40 |                 }
 41 |             }
 42 |         }
 43 |     }
 44 | 
 45 |     public static class Calloc extends Handler {
 46 |         @Override
 47 |         public void handle(CallSite callSite, IntraSolver intraSolver, NMAEManager exprManager) {
 48 |             var ptrExprs = intraSolver.getOrCreateDataFlowFacts(callSite.receiver);
 49 |             for (var expr: ptrExprs) {
 50 |                 Logging.debug("ExternalHandler.Calloc",
 51 |                         String.format("Set composite of skeleton: %s to true", expr));
 52 |                 var skeleton = exprManager.getOrCreateSkeleton(expr);
 53 |                 skeleton.setComposite(true);
 54 | 
 55 |                 var nmemblock = callSite.arguments.get(0);
 56 |                 var memsize = callSite.arguments.get(1);
 57 |                 if (nmemblock.isConstant() && memsize.isConstant()) {
 58 |                     skeleton.setSizeFromCallSite(nmemblock.getOffset() * memsize.getOffset(), callSite);
 59 |                     Logging.debug("ExternalHandler.Calloc",
 60 |                             String.format("(calloc) Set size of skeleton: %s to 0x%x", expr, nmemblock.getOffset() * memsize.getOffset()));
 61 |                 }
 62 |             }
 63 |         }
 64 |     }
 65 | 
 66 |     /**
 67 |      * Handler for memset function.
 68 |      * For memset-like functions, the first pointer argument is treated as a composite type.
 69 |      * Because in the vast majority of scenarios, memset is used to initialize composite types,
 70 |      * regardless of whether their length is a constant.
 71 |      */
 72 |     public static class Memset extends Handler {
 73 |         @Override
 74 |         public void handle(CallSite callSite, IntraSolver intraSolver, NMAEManager exprManager) {
 75 |             var lengthArg = callSite.arguments.get(2);
 76 | 
 77 |             var ptrExprs = intraSolver.getOrCreateDataFlowFacts(callSite.arguments.get(0));
 78 |             for (var expr: ptrExprs) {
 79 |                 Logging.debug("ExternalHandler.Memset",
 80 |                         String.format("(memset) Set composite of skeleton: %s to true", expr));
 81 |                 var skeleton = exprManager.getOrCreateSkeleton(expr);
 82 |                 skeleton.setComposite(true);
 83 | 
 84 |                 if (lengthArg.isConstant()) {
 85 |                     skeleton.setSizeFromCallSite(lengthArg.getOffset(), callSite);
 86 |                     Logging.debug("ExternalHandler.Memset",
 87 |                             String.format("(memset) Set size of skeleton: %s to %d", expr, lengthArg.getOffset()));
 88 |                 }
 89 |             }
 90 |         }
 91 |     }
 92 | 
 93 |     /**
 94 |      * Handler for memcpy function.
 95 |      * For memcpy-like functions, the dst and src pointer arguments are treated as composite types
 96 |      * only if the length argument is a constant.
 97 |      * Because in other cases, the memcpy function is used to copy data from *char[]
 98 |      */
 99 |     public static class Memcpy extends Handler {
100 |         @Override
101 |         public void handle(CallSite callSite, IntraSolver intraSolver, NMAEManager exprManager) {
102 |             var dstVn = callSite.arguments.get(0);
103 |             var srcVn = callSite.arguments.get(1);
104 |             var lengthVn = callSite.arguments.get(2);
105 |             if (!intraSolver.isTracedVn(dstVn) || !intraSolver.isTracedVn(srcVn)) {
106 |                 return;
107 |             }
108 |             var dstExprs = intraSolver.getOrCreateDataFlowFacts(dstVn);
109 |             var srcExprs = intraSolver.getOrCreateDataFlowFacts(srcVn);
110 |             for (var dstExpr : dstExprs) {
111 |                 for (var srcExpr : srcExprs) {
112 |                     var dstSkt = exprManager.getOrCreateSkeleton(dstExpr);
113 |                     var srcSkt = exprManager.getOrCreateSkeleton(srcExpr);
114 | 
115 |                     if (lengthVn.isConstant()) {
116 |                         dstSkt.setComposite(true);
117 |                         dstSkt.setSizeFromCallSite(lengthVn.getOffset(), callSite);
118 |                         srcSkt.setComposite(true);
119 |                         srcSkt.setSizeFromCallSite(lengthVn.getOffset(), callSite);
120 |                         Logging.debug("ExternalHandler.Memcpy",
121 |                                 String.format("(memcpy) Set size and composite from %s -> %s with size %d", srcExpr, dstExpr, lengthVn.getOffset()));
122 |                     }
123 |                 }
124 |             }
125 |         }
126 |     }
127 | 
128 |     // Map of function names to their handlers
129 |     private static final Map<String, Handler> HANDLERS = new HashMap<>();
130 | 
131 |     static {
132 |         HANDLERS.put("memset", new Memset());
133 |         HANDLERS.put("memcpy", new Memcpy());
134 |         HANDLERS.put("mempcpy", new Memcpy());
135 |         HANDLERS.put("malloc", new Malloc());
136 |         HANDLERS.put("calloc", new Calloc());
137 |         // `calloc` and `malloc` are always used for allocating heap buffer for composite types
138 |         // while `realloc` is used for reallocating heap buffer for `char*`
139 |     }
140 | 
141 |     /**
142 |      * Handle an external function call
143 |      */
144 |     public static void handle(CallSite callSite, String funcName, IntraSolver intraSolver, NMAEManager exprManager) {
145 |         Handler handler = HANDLERS.get(funcName);
146 |         if (handler != null) {
147 |             handler.handle(callSite, intraSolver, exprManager);
148 |         }
149 |     }
150 | }


--------------------------------------------------------------------------------
/src/main/java/typeforge/base/dataflow/solver/LayoutPropagator.java:
--------------------------------------------------------------------------------
  1 | package typeforge.base.dataflow.solver;
  2 | 
  3 | import typeforge.base.dataflow.TFG.TFGManager;
  4 | import typeforge.base.dataflow.TFG.TypeFlowGraph;
  5 | import typeforge.base.dataflow.constraint.Skeleton;
  6 | import typeforge.base.dataflow.expression.NMAE;
  7 | import typeforge.base.dataflow.expression.NMAEManager;
  8 | import typeforge.utils.Logging;
  9 | 
 10 | import java.util.LinkedList;
 11 | import java.util.Queue;
 12 | import java.util.Set;
 13 | 
 14 | /**
 15 |  * Used for propagating Layout information through the whole-program TFG and
 16 |  * further find the evil edges.
 17 |  */
 18 | public class LayoutPropagator {
 19 | 
 20 |     InterSolver interSolver;
 21 |     NMAEManager exprManager;
 22 |     TFGManager graphManager;
 23 | 
 24 |     public LayoutPropagator(InterSolver interSolver) {
 25 |         this.interSolver = interSolver;
 26 |         this.exprManager = interSolver.exprManager;
 27 |         this.graphManager = interSolver.graphManager;
 28 |     }
 29 | 
 30 |     public void run() {
 31 |         // Step1: process all the TFGs in the first pass
 32 |         processAllGraphsFirstPass();
 33 | 
 34 |         // Reorganize the TFGs
 35 |         graphManager.reOrganize();
 36 | 
 37 |         // Step2: process the conflict graphs in the workList
 38 |         processConflictGraphs();
 39 |     }
 40 | 
 41 |     private void processConflictGraphs() {
 42 |         // Step2: iteratively process the conflict graphs in the workList
 43 |         Queue<TypeFlowGraph<NMAE>> workList = new LinkedList<>();
 44 | 
 45 |         for (var graph: graphManager.getGraphs()) {
 46 |             if (!graphManager.isProcessableGraph(graph)) {
 47 |                 continue;
 48 |             }
 49 |             addToWorkListIfConflict(workList, graph);
 50 |         }
 51 | 
 52 |         while (!workList.isEmpty()) {
 53 |             TypeFlowGraph<NMAE> graph = workList.poll();
 54 | 
 55 |             graph.pathManager.initialize();
 56 |             var hasPathMergeConflict = graph.pathManager.tryMergeLayoutFormSamePathsForward(exprManager);
 57 |             var hasSourceMergeConflict = graph.pathManager.tryMergeLayoutFromSameSourceForward(exprManager);
 58 |             if (hasPathMergeConflict || hasSourceMergeConflict) {
 59 |                 Logging.error("LayoutPropagator",
 60 |                         "Should not have any merge conflict after the first pass in theory, please check the code.");
 61 |             }
 62 |             // Following Propagation is actually not needed
 63 |             var hasBFSConflict = graph.pathManager.propagateLayoutFromSourcesBFS();
 64 |             if (hasBFSConflict) {
 65 |                 Logging.error("LayoutPropagator",
 66 |                         "Should not have any BFS conflict after the first pass in theory, please check the code.");
 67 |             }
 68 | 
 69 |             graph.pathManager.resolveMultiSourceConflicts();
 70 |             /* remember to remove the evil edges related to Multi Source Conflicts */
 71 |             for (var edge: graph.pathManager.evilEdgesInMultiSourceResolving) {
 72 |                 graph.removeEdge(graph.getGraph().getEdgeSource(edge), graph.getGraph().getEdgeTarget(edge));
 73 |             }
 74 | 
 75 |             var newGraphs = graphManager.reOrganizeTFG(graph);
 76 |             for (var newGraph: newGraphs) {
 77 |                 if (!graphManager.isProcessableGraph(newGraph)) {
 78 |                     continue;
 79 |                 }
 80 |                 addToWorkListIfConflict(workList, newGraph);
 81 |             }
 82 |         }
 83 |     }
 84 | 
 85 |     private void processAllGraphsFirstPass() {
 86 |         // Step1
 87 |         for (var graph: graphManager.getGraphs()) {
 88 |             Logging.debug("LayoutPropagator", String.format("*********************** Handle Graph %s ***********************", graph));
 89 | 
 90 |             if (!graphManager.isProcessableGraph(graph)) {
 91 |                 continue;
 92 |             }
 93 | 
 94 |             graph.pathManager.initialize();
 95 |             graph.pathManager.tryMergeLayoutFormSamePathsForward(exprManager);
 96 |             graph.pathManager.tryMergeLayoutFromSameSourceForward(exprManager);
 97 | 
 98 |             // Removing Evil Edges in layout information aggregate
 99 |             // These edges including alias edges.
100 |             for (var edge: graph.pathManager.evilEdgesInPerPath) {
101 |                 graph.removeEdge(graph.getGraph().getEdgeSource(edge), graph.getGraph().getEdgeTarget(edge));
102 |             }
103 |             for (var edge: graph.pathManager.evilEdgesInSourceAggregate) {
104 |                 graph.removeEdge(graph.getGraph().getEdgeSource(edge), graph.getGraph().getEdgeTarget(edge));
105 |             }
106 |             /* Backward edges must be removed before BFS,
107 |                 as the previous merge was based on TFGPath,
108 |                 and the subsequent BFS will not involve path. */
109 |             for (var edge: graph.pathManager.backwardEdges) {
110 |                 graph.removeEdge(graph.getGraph().getEdgeSource(edge), graph.getGraph().getEdgeTarget(edge));
111 |             }
112 | 
113 |             graph.pathManager.propagateLayoutFromSourcesBFS();
114 | 
115 |             /* remember to remove the evil edges related to BFS */
116 |             for (var edge: graph.pathManager.evilEdgesInPropagateBFS) {
117 |                 graph.removeEdge(graph.getGraph().getEdgeSource(edge), graph.getGraph().getEdgeTarget(edge));
118 |             }
119 |         }
120 |     }
121 | 
122 |     private void addToWorkListIfConflict(Queue<TypeFlowGraph<NMAE>> workList, TypeFlowGraph<NMAE> graph) {
123 |         var connectedComponents = graph.getConnectedComponents();
124 |         if (connectedComponents.size() > 1) {
125 |             Logging.error("LayoutPropagator",
126 |                     String.format("Now Each Graph should have only one connected component, but %d", connectedComponents.size()));
127 |             System.exit(1);
128 |         }
129 | 
130 |         var connects = connectedComponents.get(0);
131 |         var success = graphManager.tryToMergeAllNodesSkeleton(graph, connects, exprManager);
132 |         // IMPORTANT: If not success in merging, means some conflict nodes are not detected by previous propagateLayoutFromSourcesBFS.
133 |         // This is because if the mergedSkeleton from different source has no intersection in their path, their conflicts will not be detected.
134 |         // So we need to rebuild the path Manager there and detect them.
135 |         if (!success) {
136 |             workList.add(graph);
137 |             Logging.info("LayoutPropagator",
138 |                     String.format("Graph: %s (%d) has been added into work list ...", graph, connects.size()));
139 |         }
140 |     }
141 | }
142 | 


--------------------------------------------------------------------------------
/src/main/java/typeforge/base/graph/CallGraph.java:
--------------------------------------------------------------------------------
  1 | package typeforge.base.graph;
  2 | 
  3 | import typeforge.base.node.FunctionNode;
  4 | import typeforge.base.node.NodeBase;
  5 | import typeforge.utils.DecompilerHelper;
  6 | import typeforge.utils.Global;
  7 | import typeforge.utils.FunctionHelper;
  8 | import typeforge.utils.Logging;
  9 | 
 10 | import java.util.*;
 11 | 
 12 | import ghidra.app.decompiler.DecompInterface;
 13 | import ghidra.app.decompiler.DecompileResults;
 14 | import ghidra.program.model.address.Address;
 15 | import ghidra.program.model.listing.Function;
 16 | import ghidra.program.model.pcode.HighFunction;
 17 | import ghidra.util.task.TaskMonitor;
 18 | 
 19 | public class CallGraph extends GraphBase<Function> {
 20 |     /** The cache of function nodes */
 21 |     public final Set<FunctionNode> functionNodes = new HashSet<>();
 22 | 
 23 |     /** The cache of root nodes to nodes */
 24 |     public final Map<Function, Set<Function>> rootToNodes = new HashMap<>();
 25 | 
 26 |     /** The cache of address to node */
 27 |     public final Map<Address, FunctionNode> addrToNode = new HashMap<>();
 28 | 
 29 |     /** Possible root nodes of the call graph */
 30 |     public Set<Function> roots;
 31 | 
 32 |     public Set<FunctionNode> leafNodes = new HashSet<>();
 33 | 
 34 |     /**
 35 |      * Get the Whole Program's call graph.
 36 |      * We did not resolve indirect calls here, and we consider each function
 37 |      * without caller as a root node of a call graph. So the whole program may
 38 |      * contain multiple root nodes in the call graph.
 39 |      * @return the Set of CallGraph
 40 |      */
 41 |     public static CallGraph getCallGraph() {
 42 |         Set<Function> possibleRoots = new HashSet<>();
 43 | 
 44 |         for (var func : Global.currentProgram.getListing().getFunctions(true)) {
 45 |             // These functions should not be seen as nodes of a call graph
 46 |             if (!FunctionHelper.isMeaningfulFunction(func)) {
 47 |                 continue;
 48 |             }
 49 | 
 50 |             // If the function does not have caller or the function is 'main' function.
 51 |             // it is one root node of Whole-program's call graph
 52 |             // WARNING: ghidra's getCallingFunctions() may not work correctly, so we need to
 53 |             //          check and complete the call graph manually.
 54 |             if (func.getCallingFunctions(TaskMonitor.DUMMY).isEmpty() || FunctionHelper.isMainFunction(func)) {
 55 |                 possibleRoots.add(func);
 56 |             } else if (FunctionHelper.confirmNoDirectCaller(func)) {
 57 |                 possibleRoots.add(func);
 58 |             }
 59 |         }
 60 | 
 61 |         Logging.info("CallGraph", String.format(
 62 |                 "Found %d possible root nodes of the call graph",
 63 |                 possibleRoots.size()
 64 |         ));
 65 |         Logging.info("CallGraph", possibleRoots.toString());
 66 | 
 67 |         return new CallGraph(possibleRoots);
 68 |     }
 69 | 
 70 |     /**
 71 |      * Decompile each function and get high function in CallGraph.
 72 |      * Finally, build the highFunctionCache.
 73 |      */
 74 |     public void decompileAllFunctions() {
 75 |         DecompInterface ifc = DecompilerHelper.setUpDecompiler(null);
 76 |         try {
 77 |             if (!ifc.openProgram(Global.currentProgram)) {
 78 |                 Logging.error("CallGraph", "Failed to use the decompiler");
 79 |                 return;
 80 |             }
 81 | 
 82 |             for (var funcNode : functionNodes) {
 83 |                 Function func = funcNode.value;
 84 |                 HighFunction highFunc = null;
 85 |                 DecompileResults decompileRes = ifc.decompileFunction(func, 30, TaskMonitor.DUMMY);
 86 |                 if (!decompileRes.decompileCompleted()) {
 87 |                     Logging.error("CallGraph", "Decompile failed for function " + func.getName());
 88 |                     continue;
 89 |                 } else {
 90 |                     Logging.debug("CallGraph", "Decompile function " + func.getName());
 91 |                 }
 92 |             }
 93 | 
 94 |         } finally {
 95 |             ifc.dispose();
 96 |         }
 97 |     }
 98 | 
 99 | 
100 |     /**
101 |      * Create a call graph with the given root function.
102 |      * We did not use ghidra's `getCalledFunctions()` api here to build the call graph,
103 |      * because they may not work correctly.
104 |      * @param possibleRoots the possible root nodes of the call graph
105 |      */
106 |     private CallGraph(Set<Function> possibleRoots) {
107 |         roots = new HashSet<>(possibleRoots);
108 | 
109 |         for (Function root : roots) {
110 |             buildCallGraph(root);
111 |         }
112 | 
113 | 
114 |         // Update FunctionNode's property
115 |         for (var funcNode : functionNodes) {
116 |             if (funcNode.succ.isEmpty()) {
117 |                 funcNode.isLeaf = true;
118 |                 leafNodes.add(funcNode);
119 |             }
120 | 
121 |             if (FunctionHelper.isMeaningfulFunction(funcNode.value)) {
122 |                 funcNode.isMeaningful = true;
123 |             }
124 | 
125 |             if (FunctionHelper.isNormalFunction(funcNode.value)) {
126 |                 funcNode.isNormal = true;
127 |             }
128 | 
129 |             if (funcNode.value.isExternal() || funcNode.value.isThunk()) {
130 |                 funcNode.isExternal = true;
131 |             }
132 | 
133 |             addrToNode.put(funcNode.value.getEntryPoint(), funcNode);
134 |         }
135 |     }
136 | 
137 | 
138 |     /**
139 |      * Build the call graph with the given root function.
140 |      * @param root the root function of the call graph
141 |      */
142 |     public void buildCallGraph(Function root) {
143 |         LinkedList<Function> workList = new LinkedList<>();
144 |         Set<Function> visited = new HashSet<>();
145 |         var currentProgram = Global.currentProgram;
146 | 
147 |         workList.add(root);
148 |         visited.add(root);
149 | 
150 |         while (!workList.isEmpty()) {
151 |             Function cur = workList.remove();
152 |             var funcInsts = currentProgram.getListing().getInstructions(cur.getBody(), true);
153 |             boolean hasIndirectCallee = false;
154 |             for (var inst : funcInsts) {
155 |                 if (inst.getMnemonicString().equals("CALL")) {
156 |                     // If Call instruction is indirect that can't be resolved, flows will be empty
157 |                     var instFlows = inst.getFlows();
158 |                     if (instFlows.length >= 1) {
159 |                         hasIndirectCallee = true;
160 |                         for (var flow : instFlows) {
161 |                             Function calledFunc = currentProgram.getFunctionManager().getFunctionAt(flow);
162 |                             if (calledFunc != null) {
163 |                                 addEdge(cur, calledFunc);
164 |                                 if (!visited.contains(calledFunc)) {
165 |                                     visited.add(calledFunc);
166 |                                     if (FunctionHelper.isMeaningfulFunction(calledFunc)) {
167 |                                         workList.add(calledFunc);
168 |                                     }
169 |                                 }
170 |                             } else {
171 |                                 Logging.error("CallGraph", "Function not found at " + flow);
172 |                             }
173 |                         }
174 |                     } else {
175 |                         Logging.trace("CallGraph", "Indirect call at " + inst.getAddress());
176 |                     }
177 |                 }
178 |             }
179 | 
180 |             if (!hasIndirectCallee) {
181 |                 getNode(cur);
182 |                 visited.add(cur);
183 |             }
184 |         }
185 |         rootToNodes.put(root, visited);
186 |     }
187 | 
188 | 
189 |     @Override
190 |     protected NodeBase<Function> createNode(Function value, int node_id) {
191 |         FunctionNode funcNode = new FunctionNode(value, node_id);
192 |         functionNodes.add(funcNode);
193 |         return funcNode;
194 |     }
195 | 
196 |     @Override
197 |     public FunctionNode getNode(Function value) {
198 |         return (FunctionNode) super.getNode(value);
199 |     }
200 | 
201 |     public FunctionNode getNodebyAddr(Address addr) {
202 |         return addrToNode.get(addr);
203 |     }
204 | 
205 |     public Set<FunctionNode> getCallees(FunctionNode caller) {
206 |         Set<FunctionNode> res = new HashSet<>();
207 |         for (var callee : caller.succ) {
208 |             res.add((FunctionNode)callee);
209 |         }
210 |         return res;
211 |     }
212 | 
213 |     public Set<FunctionNode> getCallers(FunctionNode callee) {
214 |         Set<FunctionNode> res = new HashSet<>();
215 |         for (var caller : callee.pred) {
216 |             res.add((FunctionNode)caller);
217 |         }
218 |         return res;
219 |     }
220 | }
221 | 


--------------------------------------------------------------------------------
/src/main/java/typeforge/base/graph/GraphBase.java:
--------------------------------------------------------------------------------
  1 | package typeforge.base.graph;
  2 | 
  3 | import typeforge.base.node.NodeBase;
  4 | 
  5 | import java.util.HashMap;
  6 | import java.util.HashSet;
  7 | import java.util.LinkedList;
  8 | import java.util.Map;
  9 | import java.util.Set;
 10 | 
 11 | public abstract class GraphBase<T> {
 12 | 
 13 |     /** Graph's id */
 14 |     protected int id = -1;
 15 | 
 16 |     /** Whether the graph has been changed */
 17 |     protected boolean changed = false;
 18 | 
 19 |     /** Map from node's value to node */
 20 |     private final Map<T, NodeBase<T>> valueToNode = new HashMap<>();
 21 | 
 22 |     /** Map from node's id to node's value */
 23 |     protected final Map<Integer, T> idToValueMap = new HashMap<>();
 24 | 
 25 |     /** Map from node's value to node's id */
 26 |     protected final Map<T, Integer> valueToIdMap = new HashMap<>();
 27 | 
 28 |     /** Node id in the graph */
 29 |     public int node_id = 0;
 30 | 
 31 |     /**
 32 |      * An array of integers, where the indexes represent the id of each node and
 33 |      * the values are the depth-first numbering.
 34 |      */
 35 |     protected int[] depthFirstNums = null;
 36 | 
 37 |     /**
 38 |      * Get a Node for the given value from the graph.
 39 |      * This may create a new node if needed.
 40 |      * @param value The node's value
 41 |      * @return the graph node.
 42 |      */
 43 |     public NodeBase<T> getNode(T value) {
 44 |         if (valueToNode.containsKey(value)) {
 45 |             return valueToNode.get(value);
 46 |         }
 47 | 
 48 |         NodeBase<T> res = createNode(value, node_id);
 49 | 
 50 |         valueToNode.put(value, res);
 51 |         idToValueMap.put(node_id, value);
 52 |         valueToIdMap.put(value, node_id);
 53 |         node_id++;
 54 |         changed = true;
 55 |         return res;
 56 |     }
 57 | 
 58 |     /**
 59 |      * Create a graph edge with source and destination.
 60 |      * This also creates the graph node of the given parameters if needed.
 61 |      * @param from the source node's value
 62 |      * @param to the destination node's value
 63 |      */
 64 |     public void addEdge(T from, T to) {
 65 |         NodeBase<T> src = getNode(from);
 66 |         NodeBase<T> dst = getNode(to);
 67 |         if (src.succ.contains(dst)) {
 68 |             changed = false;
 69 |             return;
 70 |         }
 71 |         src.succ.add(dst);
 72 |         dst.pred.add(src);
 73 |         changed = true;
 74 |     }
 75 | 
 76 |     /**
 77 |      * Delete a graph edge with source and destination.
 78 |      * @param from the source node's value
 79 |      * @param to the destination node's value
 80 |      */
 81 |     public void deleteEdge(T from, T to) {
 82 |         NodeBase<T> src = getNode(from);
 83 |         NodeBase<T> dst = getNode(to);
 84 | 
 85 |         if (src.succ.remove(dst)) {
 86 |             changed = true;
 87 |         }
 88 |         if (dst.pred.remove(src)) {
 89 |             changed = true;
 90 |         }
 91 |     }
 92 | 
 93 |     /**
 94 |      * Return a list of the value's successors
 95 |      * @param value the node value
 96 |      * @return Return a list of the value's successors
 97 |      */
 98 |     public Set<T> getSuccs(T value) {
 99 |         NodeBase<T> tmp = getNode(value);
100 |         Set<T> res = new HashSet<>();
101 |         for (NodeBase<T> node : tmp.succ) {
102 |             res.add(node.value);
103 |         }
104 |         return res;
105 |     }
106 | 
107 |     /**
108 |      * Return a list of the node's successors
109 |      * @param node the node
110 |      * @return Return a list of the node's successors
111 |      */
112 |     public Set<NodeBase<T>> getSuccNodes(NodeBase<T> node) {
113 |         return node.succ;
114 |     }
115 | 
116 |     /**
117 |      * Return a list of the value's predecessors
118 |      * @param value the node value
119 |      * @return Return a list of the value's predecessors
120 |      */
121 |     public Set<T> getPreds(T value) {
122 |         NodeBase<T> tmp = getNode(value);
123 |         Set<T> res = new HashSet<>();
124 |         for (NodeBase<T> node : tmp.pred) {
125 |             res.add(node.value);
126 |         }
127 |         return res;
128 |     }
129 | 
130 |     /**
131 |      * Return a list of the node's predecessors
132 |      * @param node the node
133 |      * @return Return a list of the node's predecessors
134 |      */
135 |     public Set<NodeBase<T>> getPredNodes(NodeBase<T> node) {
136 |         return node.pred;
137 |     }
138 | 
139 | 
140 |     /**
141 |      * Get all nodes in the graph
142 |      * @return a set of all nodes in the graph
143 |      */
144 |     public Set<NodeBase<T>> getAllNodes() {
145 |         Set<NodeBase<T>> res = new HashSet<>();
146 |         for (var entry : valueToNode.entrySet()) {
147 |             res.add(entry.getValue());
148 |         }
149 |         return res;
150 |     }
151 | 
152 |     /**
153 |      * Check if the graph has a path from src to dst
154 |      * @param from The src node
155 |      * @param to The dst node
156 |      * @return True if it has a path from src to dst
157 |      */
158 |     public boolean hasPath(T from, T to) {
159 |         NodeBase<T> src = getNode(from);
160 |         NodeBase<T> dst = getNode(to);
161 |         if (src == null || dst == null) {
162 |             return false;
163 |         }
164 | 
165 |         LinkedList<NodeBase<T>> workList = new LinkedList<>();
166 |         Set<NodeBase<T>> visited = new HashSet<>();
167 |         workList.add(src);
168 |         visited.add(dst);
169 |         while (!workList.isEmpty()) {
170 |             var cur = workList.remove();
171 |             for (var succ : getSuccNodes(cur)) {
172 |                 if (succ == to) {
173 |                     return true;
174 |                 }
175 |                 if (visited.contains(succ)) {
176 |                     continue;
177 |                 }
178 |                 visited.add(succ);
179 |                 workList.add(succ);
180 |             }
181 |         }
182 |         return false;
183 |     }
184 | 
185 | 
186 |     public int getNodeCount() {
187 |         return node_id;
188 |     }
189 | 
190 | 
191 |     /**
192 |      * Create a graph node with the given value.
193 |      * @param value the node's value
194 |      * @return the graph node
195 |      */
196 |     protected abstract NodeBase<T> createNode(T value, int node_id);
197 | }
198 | 


--------------------------------------------------------------------------------
/src/main/java/typeforge/base/graph/SDGraph.java:
--------------------------------------------------------------------------------
  1 | package typeforge.base.graph;
  2 | 
  3 | import java.util.*;
  4 | 
  5 | import typeforge.base.node.DataTypeNode;
  6 | import typeforge.base.node.NodeBase;
  7 | import typeforge.utils.Logging;
  8 | import ghidra.program.model.data.*;
  9 | import ghidra.program.model.data.Enum;
 10 | 
 11 | /**
 12 |  * Structure Dependency Graph
 13 |  */
 14 | public class SDGraph extends GraphBase<DataType> {
 15 | 
 16 |     /** The cache of SDGraphs */
 17 |     private static final Map<DataType, SDGraph> sdGraphCache = new HashMap<>();
 18 | 
 19 |     /**
 20 |      * Get the SDGraph of the given data type. If the SDGraph does
 21 |      * not exist, a new one will be created.
 22 |      * @param root the root data type of the SDGraph
 23 |      * @return the SDGraph
 24 |      */
 25 |     public static SDGraph getSDGraph(DataType root) {
 26 |         if (sdGraphCache.containsKey(root)) {
 27 |             return sdGraphCache.get(root);
 28 |         }
 29 |         SDGraph sdg = new SDGraph(root);
 30 |         sdGraphCache.put(root, sdg);
 31 |         return sdg;
 32 |     }
 33 | 
 34 |     /**
 35 |      * Create a SDGraph with the given root data type.
 36 |      * @param root the root data type
 37 |      */
 38 |     private SDGraph(DataType root) {
 39 |         Logging.debug("SDGraph", root.toString());
 40 |         if (!(root instanceof Structure st)) {
 41 |             Logging.error("SDGraph", "The root data type is not a structure");
 42 |             return;
 43 |         }
 44 | 
 45 |         buildAll(st);
 46 |     }
 47 | 
 48 |     /**
 49 |      * Build the SDGraph for the given root, build recursively.
 50 |      */
 51 |     private void buildAll(Structure root) {
 52 |         LinkedList<DataTypeNode> workList = new LinkedList<>();
 53 |         HashSet<DataTypeNode> visited = new HashSet<>();
 54 | 
 55 |         DataTypeNode rootNode = (DataTypeNode) getNode(root);
 56 | 
 57 |         workList.add(rootNode);
 58 |         while (!workList.isEmpty()) {
 59 |             DataTypeNode cur = workList.poll();
 60 |             if (cur.value instanceof Structure st) {
 61 |                 handleStructureNode(cur, st, workList, visited);
 62 |             } else if (cur.value instanceof Array array) {
 63 |                 throw new UnsupportedOperationException("Array is not supported yet");
 64 |             } else if (cur.value instanceof Union union) {
 65 |                 throw new UnsupportedOperationException("Union is not supported yet");
 66 |             } else {
 67 |                 throw new UnsupportedOperationException("Unsupported data type");
 68 |             }
 69 |         }
 70 |     }
 71 | 
 72 | 
 73 |     /**
 74 |      * Traverse the fields of structure node and try to build the SDGraph.
 75 |      * @param node the object of DataTypeNode
 76 |      * @param st the structure DataType
 77 |      * @param workList the worklist for building the SDGraph
 78 |      */
 79 |     private void handleStructureNode(DataTypeNode node, Structure st,
 80 |                                      LinkedList<DataTypeNode> workList,
 81 |                                      HashSet<DataTypeNode> visited)
 82 |     {
 83 |         for (var dtc : st.getDefinedComponents()) {
 84 |             DataType fieldDT = dtc.getDataType();
 85 | 
 86 |             if (fieldDT instanceof BuiltInDataType) {
 87 |                 continue;
 88 | 
 89 |             } else if (fieldDT instanceof Pointer ptr) {
 90 |                 // TODO: consider to handle multiple pointers? especially for **
 91 |                 // TODO: pointer should be handled differently from other types
 92 |                 DataType pointedDT = ptr.getDataType();
 93 |                 if (pointedDT instanceof Structure pointedST) {
 94 |                     Logging.debug("SDGraph", "Reference: " + fieldDT + " offset: " + dtc.getOffset());
 95 |                     DataTypeNode dstNode = (DataTypeNode) getNode(pointedST);
 96 |                     addEdge(node, dstNode, EdgeType.REFERENCE, dtc.getOffset());
 97 |                     if (!visited.contains(dstNode)) {
 98 |                         workList.add(dstNode);
 99 |                         visited.add(dstNode);
100 |                     }
101 |                 }
102 |                 // TODO: handle other types of pointer
103 | 
104 |             } else if (fieldDT instanceof Array) {
105 |                 continue;
106 | 
107 |             } else if (fieldDT instanceof Structure fst) {
108 |                 DataTypeNode dstNode = (DataTypeNode) getNode(fst);
109 |                 Logging.debug("SDGraph", "Nested: " + fst.getName() + " offset: " + dtc.getOffset());
110 |                 addEdge(node, dstNode, EdgeType.NESTED, dtc.getOffset());
111 |                 if (!visited.contains(dstNode)) {
112 |                     workList.add(dstNode);
113 |                     visited.add(dstNode);
114 |                 }
115 | 
116 |             } else if (fieldDT instanceof Union) {
117 |                 continue;
118 | 
119 |             } else if (fieldDT instanceof FunctionDefinition) {
120 |                 continue;
121 | 
122 |             } else if (fieldDT instanceof Enum) {
123 |                 continue;
124 | 
125 |             } else if (fieldDT instanceof TypeDef) {
126 |                 continue;
127 | 
128 |             } else if (fieldDT instanceof BitFieldDataType) {
129 |                 continue;
130 | 
131 |             } else {
132 |                 Logging.error("SDGraph", "Unsupported data type: " + fieldDT);
133 |             }
134 |         }
135 |     }
136 | 
137 |     public enum EdgeType {
138 |         /**
139 |          * SDG Graph has the following types of edges:
140 |          * 1. Nested Edge: If a structure A contains a structure B, then there is a nested edge from A to B.
141 |          * 2. Reference Edge: If a structure A contains a pointer to a structure B, then there is a reference edge from A to B.
142 |          * 3. Union Edge: If a structure A contains a union B, then there is a union edge from A to B.
143 |          * 4. Array Edge: If a structure A contains an array of other type B, then there is an array edge from A to B.
144 |          * 5. FuncPtr Edge: If a structure A contains a function pointer to a function B, then there is a function edge from A to B.
145 |          * 6. Enum Edge: If a structure A contains an enum B, then there is an enum edge from A to B.
146 |          * 7. Normal Edge: If a structure A contains a primitive type pointer which points to B, then there is a normal edge from A to B.
147 |        */
148 |         NESTED, REFERENCE, UNION, ARRAY, FUNC_PTR, ENUM, NORMAL
149 |     }
150 | 
151 |     public static class SDEdge {
152 |         public final DataTypeNode srcNode;
153 |         public final DataTypeNode dstNode;
154 |         public final EdgeType edgeType;
155 |         public final int offset;
156 | 
157 |         public SDEdge(DataTypeNode srcNode, DataTypeNode dstNode, EdgeType edgeType, int offset) {
158 |             this.srcNode = srcNode;
159 |             this.dstNode = dstNode;
160 |             this.edgeType = edgeType;
161 |             this.offset = offset;
162 |         }
163 | 
164 |         @Override
165 |         public String toString() {
166 |             return "SDEdge{" +
167 |                     "srcNode=" + srcNode +
168 |                     ", dstNode=" + dstNode +
169 |                     ", edgeType=" + edgeType +
170 |                     ", offset=" + offset +
171 |                     '}';
172 |         }
173 |     }
174 | 
175 |     /**
176 |      * Add an edge to the SDGraph.
177 |      * @param srcNode the source data type
178 |      * @param dstNode the destination data type
179 |      * @param edge_type the type of the edge
180 |      * @param offset the offset of the dependency
181 |      */
182 |     public void addEdge(DataTypeNode srcNode, DataTypeNode dstNode, EdgeType edge_type, int offset) {
183 |         if (srcNode.offsetToEdge.get(offset) != null) {
184 |             Logging.warn("SDGraph", "The offset " + offset + " already exists in the srcNode");
185 |             if (srcNode.offsetToEdge.get(offset).dstNode != dstNode) {
186 |                 Logging.error("SDGraph", "The offset " + offset + " already exists in the srcNode, but the dstNode is different");
187 |             }
188 |             return;
189 |         }
190 | 
191 |         SDEdge edge = new SDEdge(srcNode, dstNode, edge_type, offset);
192 |         srcNode.edges.add(edge);
193 |         srcNode.offsetToEdge.put(offset, edge);
194 |     }
195 | 
196 |     /**
197 |      * Build and get all edges from the DataTypeNode's offsetNodeMap and offsetEdgeTypeMap.
198 |      * @return a Set of edges
199 |      */
200 |     public Set<SDEdge> getAllEdges() {
201 |         Set<NodeBase<DataType>> allNodes = getAllNodes();
202 |         Set<SDEdge> allEdges = new HashSet<>();
203 | 
204 |         for (NodeBase<DataType> node : allNodes) {
205 |             if (node instanceof DataTypeNode dtNode) {
206 |                 allEdges.addAll(dtNode.edges);
207 |             }
208 |         }
209 | 
210 |         return allEdges;
211 |     }
212 | 
213 |     @Override
214 |     protected NodeBase<DataType> createNode(DataType value, int node_id) {
215 |         return new DataTypeNode(value, node_id);
216 |     }
217 | }
218 | 


--------------------------------------------------------------------------------
/src/main/java/typeforge/base/node/CallSite.java:
--------------------------------------------------------------------------------
 1 | package typeforge.base.node;
 2 | 
 3 | import ghidra.program.model.address.Address;
 4 | import ghidra.program.model.listing.Function;
 5 | import ghidra.program.model.pcode.PcodeOp;
 6 | import ghidra.program.model.pcode.Varnode;
 7 | import jnr.ffi.Struct;
 8 | 
 9 | import java.util.ArrayList;
10 | import java.util.List;
11 | 
12 | public class CallSite {
13 |     public Function caller;
14 |     public Address calleeAddr;
15 |     public PcodeOp callOp;
16 |     public List<Varnode> arguments;
17 |     public Varnode receiver;
18 |     private boolean hasReceiver = false;
19 | 
20 |     public CallSite(Function caller, Address CalleeAddr, PcodeOp callOp) {
21 |         this.caller = caller;
22 |         this.calleeAddr = CalleeAddr;
23 |         this.callOp = callOp;
24 |         this.arguments = new ArrayList<>();
25 |         for (int i = 1; i < callOp.getNumInputs(); i++) {
26 |             arguments.add(callOp.getInput(i));
27 |         }
28 | 
29 |         receiver = callOp.getOutput();
30 |         if (receiver != null) {
31 |             hasReceiver = true;
32 |         }
33 |     }
34 | 
35 |     public boolean hasReceiver() {
36 |         return hasReceiver;
37 |     }
38 | 
39 |     @Override
40 |     public String toString() {
41 |         // It's really hard for ghidra to get the asm addr from pcode, so we use the BasicBlock addr instead.
42 |         return String.format(
43 |                 "CallSite{BBAddr=%s}",
44 |                 callOp.getParent().getStart().toString()
45 |         );
46 |     }
47 | 
48 |     @Override
49 |     public int hashCode() {
50 |         return caller.hashCode() * 31 + calleeAddr.hashCode() * 17 + callOp.hashCode();
51 |     }
52 | 
53 |     @Override
54 |     public boolean equals(Object obj) {
55 |         if (obj == this) {
56 |             return true;
57 |         }
58 |         if (!(obj instanceof CallSite other)) {
59 |             return false;
60 |         }
61 |         return this.caller.equals(other.caller) &&
62 |                 this.calleeAddr.equals(other.calleeAddr) &&
63 |                 this.callOp.equals(other.callOp);
64 |     }
65 | }


--------------------------------------------------------------------------------
/src/main/java/typeforge/base/node/DataTypeNode.java:
--------------------------------------------------------------------------------
 1 | package typeforge.base.node;
 2 | 
 3 | import typeforge.base.graph.SDGraph;
 4 | import ghidra.program.model.data.DataType;
 5 | import ghidra.program.model.data.DataTypeComponent;
 6 | import ghidra.program.model.data.Structure;
 7 | 
 8 | import java.util.*;
 9 | 
10 | import typeforge.utils.Logging;
11 | 
12 | /**
13 |  * In Structure Dependency Graph, each node has multiple edges to other nodes.
14 |  * For example:
15 |  * struct A {
16 |  *     struct B b_1;     // struct B's size is 8 bytes
17 |  *     struct *C c_1;
18 |  *     struct *C c_2;
19 |  * }
20 |  * The above structure A has 3 edges:
21 |  * 1. A -- Nested -- offset 0 --> B
22 |  * 2. A -- Reference -- offset 8 --> C
23 |  * 3. A -- Reference -- offset 12 --> C
24 |  */
25 | public class DataTypeNode extends NodeBase<DataType>{
26 | 
27 |     /** The map from field offset to field */
28 |     public final Map<Integer, DataTypeComponent> fieldMap = new HashMap<>();
29 | 
30 |     /** The edges of the node */
31 |     public Set<SDGraph.SDEdge> edges = new HashSet<>();
32 | 
33 |     /** The HashMap of offset to edge */
34 |     public Map<Integer, SDGraph.SDEdge> offsetToEdge = new HashMap<>();
35 | 
36 |     public DataTypeNode(DataType value, int id) {
37 |         super(value, id);
38 |         Logging.debug("DataTypeNode", "Creating DataTypeNode with value: " + value.getName());
39 | 
40 |         if (value instanceof Structure st) {
41 |             fillFieldMap(st);
42 |         }
43 | 
44 |     }
45 | 
46 |     private void fillFieldMap(Structure st) {
47 |         for (var dtc : st.getDefinedComponents()) {
48 |             fieldMap.put(dtc.getOffset(), dtc);
49 |         }
50 |     }
51 | }
52 | 


--------------------------------------------------------------------------------
/src/main/java/typeforge/base/node/NodeBase.java:
--------------------------------------------------------------------------------
 1 | package typeforge.base.node;
 2 | 
 3 | import java.util.HashSet;
 4 | import java.util.Set;
 5 | 
 6 | public abstract class NodeBase<T> {
 7 |     public final T value;
 8 |     public int id;
 9 | 
10 |     /** The pred of this node */
11 |     public final Set<NodeBase<T>> pred = new HashSet<>();
12 | 
13 |     /** The succ of this node */
14 |     public final Set<NodeBase<T>> succ = new HashSet<>();
15 | 
16 |     /** Create a node from the given parameter */
17 |     public NodeBase(T value, int id) {
18 |         this.value = value;
19 |         this.id = id;
20 |     }
21 | 
22 |     @Override
23 |     public int hashCode() {
24 |         return value != null ? value.hashCode() : 0;
25 |     }
26 | }


--------------------------------------------------------------------------------
/src/main/java/typeforge/base/parallel/PrepareFunctionNodeCallback.java:
--------------------------------------------------------------------------------
 1 | package typeforge.base.parallel;
 2 | 
 3 | import ghidra.app.decompiler.DecompileResults;
 4 | import ghidra.app.decompiler.parallel.DecompileConfigurer;
 5 | import ghidra.app.decompiler.parallel.DecompilerCallback;
 6 | import ghidra.program.model.address.Address;
 7 | import ghidra.program.model.listing.Program;
 8 | import ghidra.util.task.TaskMonitor;
 9 | import typeforge.base.node.FunctionNode;
10 | import typeforge.utils.Logging;
11 | 
12 | import java.util.HashMap;
13 | 
14 | /**
15 |  * Callback for parallel decompile, used for initializing function node
16 |  */
17 | public class PrepareFunctionNodeCallback extends DecompilerCallback<Void> {
18 | 
19 |     public HashMap<Address, FunctionNode> addrToFuncNode;
20 |     public int decompileCount = 0;
21 | 
22 |     public PrepareFunctionNodeCallback(Program program,
23 |                                        DecompileConfigurer configurer,
24 |                                        HashMap<Address, FunctionNode> addrToFuncNode) {
25 |         super(program, configurer);
26 |         this.addrToFuncNode = addrToFuncNode;
27 |     }
28 | 
29 |     @Override
30 |     public Void process(DecompileResults decompileResults, TaskMonitor taskMonitor) throws Exception {
31 |         var addr = decompileResults.getFunction().getEntryPoint();
32 |         var funcNode = addrToFuncNode.get(addr);
33 | 
34 |         if (!decompileResults.decompileCompleted()) {
35 |             Logging.error("PrepareFunctionNodeCallback",
36 |                           "Function %s decompiled failed".formatted(funcNode.value.getName()));
37 |             funcNode.isDecompiled = false;
38 |             return null;
39 |         }
40 | 
41 |         decompileCount += 1;
42 |         funcNode.isDecompiled = true;
43 |         funcNode.updateDecompileResult(decompileResults);
44 | 
45 |         if (!funcNode.initialize()) {
46 |             Logging.error("PrepareFunctionNodeCallback",
47 |                           "Function %s initialization failed".formatted(funcNode.value.getName()));
48 |         }
49 | 
50 |         return null;
51 |     }
52 | }


--------------------------------------------------------------------------------
/src/main/java/typeforge/base/passes/SlidingWindowProcessor.java:
--------------------------------------------------------------------------------
  1 | package typeforge.base.passes;
  2 | 
  3 | import typeforge.base.dataflow.constraint.TypeConstraint;
  4 | import typeforge.utils.Global;
  5 | import typeforge.utils.Logging;
  6 | 
  7 | import java.util.*;
  8 | 
  9 | public class SlidingWindowProcessor {
 10 |     public final TypeConstraint constraint;
 11 |     public final List<Long> offsetList;
 12 | 
 13 |     private int windowCapacity;
 14 |     private int flattenCnt;
 15 | 
 16 |     public SlidingWindowProcessor(TypeConstraint constraint, List<Long> offsetList, int initialWindowCapacity) {
 17 |         this.constraint = constraint;
 18 |         this.offsetList = offsetList;
 19 |         this.windowCapacity = initialWindowCapacity;
 20 |     }
 21 | 
 22 |     public Optional<Window> tryMatchingFromCurrentOffset(int curOffsetIndex, final int threshold) {
 23 |         Optional<Window> windowOpt = getWindowAtOffset(curOffsetIndex);
 24 |         if (windowOpt.isEmpty()) {
 25 |             return Optional.empty();
 26 |         }
 27 | 
 28 |         var window = windowOpt.get();
 29 |         int matchCount = 1;
 30 |         int alignedWindowSize = window.getAlignedWindowSize();
 31 |         long prevWindowStartOffset = offsetList.get(curOffsetIndex);
 32 |         var prevWindow = window;
 33 | 
 34 |         for (int i = curOffsetIndex + windowCapacity; i < offsetList.size(); i += windowCapacity) {
 35 |             Optional<Window> candidateWindowOpt = getWindowAtOffset(i);
 36 |             if (candidateWindowOpt.isEmpty()) {
 37 |                 break;
 38 |             }
 39 | 
 40 |             var candidateWindow = candidateWindowOpt.get();
 41 |             if (window.equals(candidateWindow)) {
 42 |                 if ((offsetList.get(i) - prevWindowStartOffset) == alignedWindowSize) {
 43 |                     matchCount++;
 44 |                     prevWindowStartOffset = offsetList.get(i);
 45 |                     prevWindow = candidateWindow;
 46 |                 } else {
 47 |                     Logging.debug("SlidingWindowProcessor", "Window equal but not contiguous of Skeleton " + constraint);
 48 |                     Logging.debug("SlidingWindowProcessor",
 49 |                             String.format("Previous Window:\nStart: 0x%x\n%s", prevWindowStartOffset, prevWindow));
 50 |                     Logging.debug("SlidingWindowProcessor",
 51 |                             String.format("Current Window:\nStart: 0x%x\n%s", offsetList.get(i), candidateWindow));
 52 |                     break;
 53 |                 }
 54 |             } else {
 55 |                 break;
 56 |             }
 57 |         }
 58 | 
 59 |         if (matchCount >= threshold) {
 60 |             flattenCnt = matchCount;
 61 |             return Optional.of(window);
 62 |         } else {
 63 |             return Optional.empty();
 64 |         }
 65 |     }
 66 | 
 67 |     public void setWindowCapacity(int newWindowCapacity) {
 68 |         this.windowCapacity = newWindowCapacity;
 69 |     }
 70 | 
 71 |     public void resetFlattenCnt() {
 72 |         flattenCnt = 0;
 73 |     }
 74 | 
 75 |     public int getFlattenCount() {
 76 |         return flattenCnt;
 77 |     }
 78 | 
 79 |     private Optional<Window> getWindowAtOffset(int startIndex) {
 80 |         if (startIndex + windowCapacity > offsetList.size()) {
 81 |             return Optional.empty();
 82 |         }
 83 | 
 84 |         var startOffset = offsetList.get(startIndex);
 85 | 
 86 |         /* We don't consider windows with only one element if the element is a pointer */
 87 |         if (windowCapacity == 1 &&
 88 |                 (constraint.innerSkeleton.fieldAccess.get(startOffset).mostAccessedDT.getLength() == Global.currentProgram.getDefaultPointerSize())) {
 89 |             return Optional.empty();
 90 |         }
 91 | 
 92 |         var window = new Window();
 93 | 
 94 |         long prevOffset = -1;
 95 | 
 96 |         for (int i = 0; i < windowCapacity; i++) {
 97 |             var currentOffset = offsetList.get(startIndex + i);
 98 |             if (constraint.isInconsistentOffset(currentOffset)) {
 99 |                 return Optional.empty();
100 |             }
101 |             if (constraint.hasFinalNestedConstraint() && constraint.isInNestedRange(currentOffset)) {
102 |                 return Optional.empty();
103 |             }
104 | 
105 |             Object element = null;
106 |             if (constraint.finalPtrReference.containsKey(currentOffset)) {
107 |                 element = constraint.finalPtrReference.get(currentOffset);
108 |             } else {
109 |                 element = constraint.innerSkeleton.fieldAccess.get(currentOffset);
110 |             }
111 | 
112 |             var relativeOffset = currentOffset.intValue() - startOffset.intValue();
113 |             window.addElement(relativeOffset, element);
114 |             if (element instanceof TypeConstraint) {
115 |                 window.addPtrLevel(relativeOffset, constraint.ptrLevel.get(currentOffset) != null ? constraint.ptrLevel.get(currentOffset) : 1);
116 |             }
117 | 
118 |             prevOffset = currentOffset;
119 |         }
120 | 
121 |         /* Check if all the elements in the window are of the same type (excluded capacity 1) */
122 |         if (window.isHomogeneous() || (!window.isContiguous())) {
123 |             return Optional.empty();
124 |         }
125 | 
126 |         return Optional.of(window);
127 |     }
128 | }
129 | 


--------------------------------------------------------------------------------
/src/main/java/typeforge/base/passes/Window.java:
--------------------------------------------------------------------------------
  1 | package typeforge.base.passes;
  2 | 
  3 | import typeforge.base.dataflow.AccessPoints;
  4 | import typeforge.base.dataflow.constraint.TypeConstraint;
  5 | import typeforge.utils.DataTypeHelper;
  6 | import typeforge.utils.Global;
  7 | import ghidra.program.model.data.DataType;
  8 | 
  9 | import java.util.Map;
 10 | import java.util.TreeMap;
 11 | 
 12 | public class Window {
 13 |     private final Map<Integer, Object> windowElements;
 14 |     private int windowSize;
 15 |     private final Map<Integer, Integer> ptrLevel;
 16 | 
 17 |     public Window() {
 18 |         this.windowElements = new TreeMap<>();
 19 |         this.windowSize = 0;
 20 |         this.ptrLevel = new TreeMap<>();
 21 |     }
 22 | 
 23 |     public void addElement(int offset, Object element) {
 24 |         windowElements.put(offset, element);
 25 |     }
 26 | 
 27 |     public void addPtrLevel(int offset, int level) {
 28 |         ptrLevel.put(offset, level);
 29 |     }
 30 | 
 31 |     /**
 32 |      * Get the Aligned Window's Size
 33 |      * @return aligned window's size
 34 |      */
 35 |     public int getAlignedWindowSize() {
 36 |         if (windowSize != 0) {
 37 |             return windowSize;
 38 |         }
 39 | 
 40 |         long totalSize = 0;
 41 |         long maxAlignSize = 1;
 42 |         for (var element: windowElements.values()) {
 43 |             long fieldSize;
 44 |             long fieldAlignSize = 1;
 45 |             if (element instanceof TypeConstraint) {
 46 |                 fieldSize = Global.currentProgram.getDefaultPointerSize();
 47 |                 fieldAlignSize = fieldSize;
 48 |             }
 49 |             else {
 50 |                 fieldSize = ((AccessPoints.APSet) element).mostAccessedDT.getLength();
 51 |                 fieldAlignSize = ((AccessPoints.APSet) element).mostAccessedDT.getAlignment();
 52 |             }
 53 | 
 54 |             if (totalSize % fieldAlignSize != 0) {
 55 |                 totalSize += fieldAlignSize - (totalSize % fieldAlignSize);
 56 |             }
 57 | 
 58 |             totalSize += fieldSize;
 59 |             if (fieldAlignSize > maxAlignSize) {
 60 |                 maxAlignSize = fieldAlignSize;
 61 |             }
 62 |         }
 63 | 
 64 |         if (totalSize % maxAlignSize != 0) {
 65 |             totalSize += maxAlignSize - (totalSize % maxAlignSize);
 66 |         }
 67 | 
 68 |         windowSize = (int) totalSize;
 69 |         return windowSize;
 70 |     }
 71 | 
 72 |     public DataType getWindowDT() {
 73 |         if (windowElements.size() == 1) {
 74 |             var element = windowElements.get(0);
 75 |             if (element instanceof AccessPoints.APSet apSet) {
 76 |                 return apSet.mostAccessedDT;
 77 |             } else {
 78 |                 return DataTypeHelper.getDataTypeByName("void");
 79 |             }
 80 |         }
 81 |         else if (windowElements.size() > 1) {
 82 |             return DataTypeHelper.createAnonStructureFromWindow(this);
 83 |         } else {
 84 |             return null;
 85 |         }
 86 |     }
 87 | 
 88 |     public Map<Integer, Object> getWindowElements() {
 89 |         return windowElements;
 90 |     }
 91 | 
 92 |     public Map<Integer, Integer> getPtrLevel() {
 93 |         return ptrLevel;
 94 |     }
 95 | 
 96 |     public boolean isContiguous() {
 97 |         if (windowElements.size() == 1) {
 98 |             return true;
 99 |         }
100 | 
101 |         int previousEndOffset = 0;
102 |         /* Check if the window is contiguous by element's aligned size */
103 |         for (var entry: windowElements.entrySet()) {
104 |             int offset = entry.getKey();
105 |             Object element = entry.getValue();
106 |             int fieldSize;
107 |             int fieldAlignSize = 1;
108 | 
109 |             if (element instanceof TypeConstraint) {
110 |                 fieldSize = Global.currentProgram.getDefaultPointerSize();
111 |                 fieldAlignSize = fieldSize;
112 |             } else {
113 |                 fieldSize = ((AccessPoints.APSet) element).mostAccessedDT.getLength();
114 |                 fieldAlignSize = ((AccessPoints.APSet) element).mostAccessedDT.getAlignment();
115 |             }
116 | 
117 |             if (previousEndOffset % fieldAlignSize != 0) {
118 |                 previousEndOffset += fieldAlignSize - (previousEndOffset % fieldAlignSize);
119 |             }
120 | 
121 |             if (offset != previousEndOffset) {
122 |                 return false;
123 |             }
124 | 
125 |             previousEndOffset += fieldSize;
126 |         }
127 | 
128 |         return true;
129 |     }
130 | 
131 | 
132 |     public boolean isHomogeneous() {
133 |         if (windowElements.size() == 1) {
134 |             return false;
135 |         }
136 | 
137 |         var firstElement = windowElements.get(0);
138 |         if (firstElement instanceof TypeConstraint skt) {
139 |             for (var element: windowElements.values()) {
140 |                 if (!(element instanceof TypeConstraint) || !element.equals(skt)) {
141 |                     return false;
142 |                 }
143 |             }
144 |             return true;
145 |         } else if (firstElement instanceof AccessPoints.APSet apSet) {
146 |             for (var element: windowElements.values()) {
147 |                 if (!(element instanceof AccessPoints.APSet)) {
148 |                     return false;
149 |                 }
150 |                 var otherAPSet = (AccessPoints.APSet) element;
151 |                 if (apSet.DTSize != otherAPSet.DTSize) {
152 |                     return false;
153 |                 }
154 |             }
155 |             return true;
156 |         }
157 | 
158 |         return false;
159 |     }
160 | 
161 |     @Override
162 |     public String toString() {
163 |         StringBuilder sb = new StringBuilder();
164 |         for (var entry: windowElements.entrySet()) {
165 |             int offset = entry.getKey();
166 |             Object element = entry.getValue();
167 |             sb.append(String.format("0x%x", offset)).append(": ");
168 |             if (element instanceof TypeConstraint skt) {
169 |                 sb.append(skt.toString());
170 |             } else if (element instanceof AccessPoints.APSet apSet) {
171 |                 sb.append(apSet.mostAccessedDT.getName());
172 |             }
173 |             sb.append("\n");
174 |         }
175 |         sb.append("Size: ").append(getAlignedWindowSize());
176 |         return sb.toString();
177 |     }
178 | 
179 |     @Override
180 |     public boolean equals(Object other) {
181 |         if (!(other instanceof Window otherWindow)) {
182 |             return false;
183 |         }
184 | 
185 |         if (windowElements.size() != otherWindow.windowElements.size()) {
186 |             return false;
187 |         }
188 |         if (!windowElements.keySet().equals(otherWindow.windowElements.keySet())) {
189 |             return false;
190 |         }
191 |         for (var entry: windowElements.entrySet()) {
192 |             int offset = entry.getKey();
193 |             Object e1 = entry.getValue();
194 |             Object e2 = otherWindow.windowElements.get(offset);
195 | 
196 |             if (e1 instanceof TypeConstraint && e2 instanceof TypeConstraint) {
197 |                 if (!e1.equals(e2)) {
198 |                     return false;
199 |                 }
200 |             } else if (e1 instanceof AccessPoints.APSet s1 && e2 instanceof AccessPoints.APSet s2) {
201 |                 if (s1.DTSize != s2.DTSize) {
202 |                     return false;
203 |                 }
204 |             } else {
205 |                 return false;
206 |             }
207 |         }
208 |         return true;
209 |     }
210 | 
211 | }
212 | 


--------------------------------------------------------------------------------
/src/main/java/typeforge/utils/FunctionHelper.java:
--------------------------------------------------------------------------------
  1 | package typeforge.utils;
  2 | 
  3 | import ghidra.program.model.address.Address;
  4 | import ghidra.program.model.listing.Function;
  5 | import ghidra.util.task.TaskMonitor;
  6 | 
  7 | import java.util.HashSet;
  8 | import java.util.Set;
  9 | 
 10 | public class FunctionHelper {
 11 | 
 12 |     /**
 13 |      * Check if the function is the entry(main) function.
 14 |      * @param func the function to check
 15 |      * @return true if the function is the main function
 16 |      */
 17 |     public static boolean isMainFunction(Function func) {
 18 |         if (func.getName().equals("main")) {
 19 |             return true;
 20 |         }
 21 |         // if stripped, the caller function is _start
 22 |         if (isNormalFunction(func)) {
 23 |             var callers = func.getCallingFunctions(TaskMonitor.DUMMY);
 24 |             for (var caller : callers) {
 25 |                 if (caller.getName().equals("_start")) {
 26 |                     return true;
 27 |                 }
 28 |             }
 29 |         }
 30 |         return false;
 31 |     }
 32 | 
 33 |     /**
 34 |      * Check if the function is a normal function, which is not external and not thunk.
 35 |      * @param func the function to check
 36 |      * @return true if the function is normal
 37 |      */
 38 |     public static boolean isNormalFunction(Function func) {
 39 |         return !func.isExternal() && !func.isThunk();
 40 |     }
 41 | 
 42 |     /**
 43 |      * Check if the function is a trivial function, which should not be seen
 44 |      * as a root node of a call graph.
 45 |      * @param func the Function to check
 46 |      * @return true if the Function is trivial
 47 |      */
 48 |     public static boolean isTrivialFunction(Function func) {
 49 |         Set<String> forbiddenName = Set.of("_init", "_start", "_fini", "__do_global_dtors_aux",
 50 |                 "frame_dummy", "deregister_tm_clones", "register_tm_clones", "ck_assert_failed");
 51 |         return forbiddenName.contains(func.getName());
 52 |     }
 53 | 
 54 |     /**
 55 |      * Check if the function is a meaningful function.
 56 |      * @param func the function to check
 57 |      * @return true if the function is meaningful
 58 |      */
 59 |     public static boolean isMeaningfulFunction(Function func) {
 60 |         return isNormalFunction(func) && !isTrivialFunction(func);
 61 |     }
 62 | 
 63 |     /**
 64 |      * Get all meaningful functions in the current program.
 65 |      * A meaningful function is a normal function which is not trivial.
 66 |      * @return the set of meaningful functions
 67 |      */
 68 |     public static Set<Function> getMeaningfulFunctions() {
 69 |         Set<Function> meaningfulFunctions = new HashSet<>();
 70 |         for (var func : Global.currentProgram.getListing().getFunctions(true)) {
 71 |             if (isMeaningfulFunction(func)) {
 72 |                 meaningfulFunctions.add(func);
 73 |             }
 74 |         }
 75 |         return meaningfulFunctions;
 76 |     }
 77 | 
 78 | 
 79 |     /**
 80 |      * This is a stupid function, but we have to do this.
 81 |      * Because ghidra's `getCallingFunctions()` and `getCalledFunctions()` may not work correctly.
 82 |      * For Example:
 83 |      * If function B is not called by function A, but function B's ptr is used in function A, then ghidra will
 84 |      * consider function A as a caller of function B when using `getCallingFunctions()` methods. And consider
 85 |      * function B as a callee of function A when using `getCalledFunctions()` methods.
 86 |      * <p>
 87 |      * So some function can be seen as a root node, but failed to pass the check of `getCallingFunctions().isEmpty()`.
 88 |      * We need to check and complete these root nodes.
 89 |      *
 90 |      * @return if the function has no direct caller in the whole program
 91 |      */
 92 |     public static boolean confirmNoDirectCaller(Function func) {
 93 |         boolean noCaller = true;
 94 | 
 95 |         for (var caller : func.getCallingFunctions(TaskMonitor.DUMMY)) {
 96 |             var callerInsts = Global.currentProgram.getListing().getInstructions(caller.getBody(), true);
 97 |             for (var inst : callerInsts) {
 98 |                 if (inst.getMnemonicString().equals("CALL")) {
 99 |                     var instFlows = inst.getFlows();
100 |                     if (instFlows.length >= 1) {
101 |                         for (var flow : instFlows) {
102 |                             Function calledFunc = Global.currentProgram.getFunctionManager().getFunctionAt(flow);
103 |                             if (calledFunc != null && calledFunc.equals(func)) {
104 |                                 noCaller = false;
105 |                                 return noCaller;
106 |                             }
107 |                         }
108 |                     }
109 |                 }
110 |             }
111 |         }
112 | 
113 |         return noCaller;
114 |     }
115 | 
116 | 
117 |     public static Address getAddress(long offset) {
118 |         return Global.currentProgram.getAddressFactory().getDefaultAddressSpace().getAddress(offset);
119 |     }
120 | 
121 |     public static Function getFunction(long offset) {
122 |         return Global.currentProgram.getFunctionManager().getFunctionAt(getAddress(offset));
123 |     }
124 | }
125 | 


--------------------------------------------------------------------------------
/src/main/java/typeforge/utils/Global.java:
--------------------------------------------------------------------------------
 1 | package typeforge.utils;
 2 | 
 3 | import ghidra.app.script.GhidraScript;
 4 | import ghidra.program.model.listing.Program;
 5 | import ghidra.program.flatapi.FlatProgramAPI;
 6 | /**
 7 |  * The global state of the current analysis.
 8 |  */
 9 | public class Global {
10 |     public static Program currentProgram;
11 |     public static FlatProgramAPI flatAPI;
12 |     public static GhidraScript ghidraScript;
13 |     public static String outputDirectory;
14 |     public static long startAddress;
15 | 
16 |     public static long typeAnalysisBeginTime;
17 |     public static long typeAnalysisEndTime;
18 |     public static long retypingBeginTime;
19 |     public static long retypingEndTime;
20 |     public static long prepareAnalysisBeginTime;
21 |     public static long prepareAnalysisEndTime;
22 | }
23 | 


--------------------------------------------------------------------------------
/src/main/java/typeforge/utils/GraphHelper.java:
--------------------------------------------------------------------------------
 1 | package typeforge.utils;
 2 | 
 3 | import typeforge.base.node.DataTypeNode;
 4 | import typeforge.base.node.NodeBase;
 5 | import typeforge.base.graph.SDGraph;
 6 | import ghidra.program.model.data.DataType;
 7 | 
 8 | import java.io.BufferedWriter;
 9 | import java.io.IOException;
10 | import java.nio.file.Files;
11 | import java.nio.file.Paths;
12 | import java.util.Set;
13 | 
14 | public class GraphHelper {
15 |     /**
16 |      * Dump the SDGraph to a dot file
17 |      */
18 |     public static void dumpSDGraph(SDGraph sdg, String filename) {
19 |         StringBuilder dotBuilder = new StringBuilder();
20 | 
21 |         Set<NodeBase<DataType>> allNodes = sdg.getAllNodes();
22 |         Set<SDGraph.SDEdge> allEdges = sdg.getAllEdges();
23 | 
24 |         dotBuilder.append("digraph SDGraph {\n");
25 | 
26 |         // traverse all nodes
27 |         for (var node : allNodes) {
28 |             if (node instanceof DataTypeNode dtn) {
29 |                 String nodeID = "node" + dtn.id;
30 |                 String nodeLabel = dtn.value.getName();
31 |                 dotBuilder.append(
32 |                         String.format(
33 |                                 "%s [label=\"%s\"];\n",
34 |                                 nodeID,
35 |                                 nodeLabel
36 |                         )
37 |                 );
38 |             }
39 |         }
40 | 
41 |         // traverse all edges
42 |         for (var edge : allEdges) {
43 |             String srcNodeID = "node" + edge.srcNode.id;
44 |             String dstNodeID = "node" + edge.dstNode.id;
45 |             String edgeType = edge.edgeType.toString();
46 |             String edgeLabel = String.format("Offset %s: %s", Integer.toHexString(edge.offset), edgeType);
47 |             dotBuilder.append(
48 |                     String.format(
49 |                             "%s -> %s [label=\"%s\"];\n",
50 |                             srcNodeID,
51 |                             dstNodeID,
52 |                             edgeLabel
53 |                     )
54 |             );
55 |         }
56 | 
57 |         dotBuilder.append("}\n");
58 | 
59 |         try (BufferedWriter writer = Files.newBufferedWriter(Paths.get(filename))) {
60 |             writer.write(dotBuilder.toString());
61 |         } catch (IOException e) {
62 |             Logging.error("GraphHelper", "Failed to write to file: " + filename);
63 |         }
64 |     }
65 | }
66 | 


--------------------------------------------------------------------------------
/src/main/java/typeforge/utils/HighSymbolHelper.java:
--------------------------------------------------------------------------------
 1 | package typeforge.utils;
 2 | 
 3 | import ghidra.program.model.address.Address;
 4 | import ghidra.program.model.pcode.HighSymbol;
 5 | 
 6 | public class HighSymbolHelper {
 7 | 
 8 |     public static Address getGlobalHighSymbolAddr(HighSymbol globalSym) {
 9 |         assert globalSym.isGlobal();
10 |         return globalSym.getStorage().getMinAddress();
11 |     }
12 | }
13 | 


--------------------------------------------------------------------------------
/src/main/java/typeforge/utils/Logging.java:
--------------------------------------------------------------------------------
 1 | package typeforge.utils;
 2 | 
 3 | import java.io.IOException;
 4 | import java.io.InputStream;
 5 | 
 6 | import org.apache.logging.log4j.LogManager;
 7 | import org.apache.logging.log4j.Logger;
 8 | import org.apache.logging.log4j.core.LoggerContext;
 9 | import org.apache.logging.log4j.core.config.Configuration;
10 | import org.apache.logging.log4j.core.config.ConfigurationSource;
11 | import org.apache.logging.log4j.core.config.xml.XmlConfiguration;
12 | 
13 | /**
14 |  * Logging class.
15 |  */
16 | public class Logging {
17 | 
18 |     private static final String DEFAULT_LOGGER_NAME = "TypeForge";
19 |     private static final String DEFAULT_CONFIG_FILE_PATH = "/log4j2_default.xml";
20 |     private static Logger defaultLogger;
21 | 
22 |     /**
23 |      * Initialize the logging module.
24 |      * @return true if init success, false otherwise.
25 |      */
26 |     public static boolean init() {
27 |         InputStream in = Logging.class.getResourceAsStream(DEFAULT_CONFIG_FILE_PATH);
28 |         try {
29 |             assert in != null;
30 |             Configuration configuration = new XmlConfiguration(new LoggerContext(DEFAULT_LOGGER_NAME),
31 |                     new ConfigurationSource(in));
32 |             LoggerContext context = (LoggerContext) LogManager.getContext(true);
33 |             context.stop();
34 |             context.start(configuration);
35 |             defaultLogger = context.getLogger(DEFAULT_LOGGER_NAME);
36 |         } catch (IOException e) {
37 |             System.out.println("Cannot locate logging config file :" + in);
38 |             return false;
39 |         }
40 |         return true;
41 |     }
42 | 
43 |     /**
44 |      * Generate an error log.
45 |      * @param msg the log message.
46 |      */
47 |     public static void error(String prefix, String msg) {
48 |         defaultLogger.error("[{}] - {}", prefix, msg);
49 |     }
50 | 
51 |     /**
52 |      * Generate a warning log.
53 |      * @param msg the log message.
54 |      */
55 |     public static void warn(String prefix, String msg) {
56 |         defaultLogger.warn("[{}] - {}", prefix, msg);
57 |     }
58 | 
59 |     /**
60 |      * Generate a info log.
61 |      * @param msg the log message.
62 |      */
63 |     public static void info(String prefix, String msg) {
64 |         defaultLogger.info("[{}] - {}", prefix, msg);
65 |     }
66 | 
67 |     /**
68 |      * Generate a debug log
69 |      * @param msg the debug log.
70 |      */
71 |     public static void debug(String prefix, String msg) {
72 |         defaultLogger.debug("[{}] - {}", prefix, msg);
73 |     }
74 | 
75 |     /**
76 |      * Generate a trace log
77 |      * @param msg the trace log.
78 |      */
79 |     public static void trace(String prefix, String msg) {
80 |         defaultLogger.trace("[{}] - {}", prefix, msg);
81 |     }
82 | }


--------------------------------------------------------------------------------
/src/main/java/typeforge/utils/TCHelper.java:
--------------------------------------------------------------------------------
  1 | package typeforge.utils;
  2 | 
  3 | import ghidra.program.model.data.DataType;
  4 | import typeforge.base.dataflow.constraint.Skeleton;
  5 | 
  6 | import java.util.*;
  7 | 
  8 | public class TCHelper {
  9 |     public static class Interval {
 10 |         final long start;
 11 |         final long end;
 12 | 
 13 |         Interval(long start, long end) {
 14 |             this.start = start;
 15 |             this.end = end;
 16 |         }
 17 | 
 18 |         public boolean inInterval(long offset) {
 19 |             return offset > start && offset < end;
 20 |         }
 21 | 
 22 |         @Override
 23 |         public boolean equals(Object obj) {
 24 |             if (obj instanceof Interval) {
 25 |                 return this.start == ((Interval) obj).start && this.end == ((Interval) obj).end;
 26 |             }
 27 |             return false;
 28 |         }
 29 | 
 30 |         @Override
 31 |         public int hashCode() {
 32 |             return Objects.hash(start, end);
 33 |         }
 34 |     }
 35 | 
 36 |     /**
 37 |      * If a field's start in other field's interval, then return true
 38 |      * If two fields have same start, but one field's end is larger than other field's start, then return true
 39 |      * @return if overlap occurs
 40 |      */
 41 |     public static boolean checkFieldOverlapStrict(Skeleton a, Skeleton b) {
 42 |         var aIntervals = buildIntervals(a);
 43 |         var bIntervals = buildIntervals(b);
 44 |         for (var aI: aIntervals) {
 45 |             for (var bI: bIntervals) {
 46 |                 if (aI.inInterval(bI.start) || bI.inInterval(aI.start)) {
 47 |                     return true;
 48 |                 }
 49 | 
 50 |                 if (aI.start == bI.start) {
 51 |                     var aNI = getNextLargerInterval(aI, aIntervals);
 52 |                     if (aNI != null && bI.end > aNI.start) {
 53 |                         return true;
 54 |                     }
 55 | 
 56 |                     var bNI = getNextLargerInterval(bI, bIntervals);
 57 |                     if (bNI != null && aI.end > bNI.start) {
 58 |                         return true;
 59 |                     }
 60 |                 }
 61 |             }
 62 |         }
 63 |         return false;
 64 |     }
 65 | 
 66 |     // TODO: not very expected ... since (0,4) maybe (0,2) + (2,4), and this is still overlap
 67 |     public static boolean checkFieldOverlapRelax(Skeleton a, Skeleton b) {
 68 |         var aIntervals = buildIntervalWithMostAccessed(a);
 69 |         var bIntervals = buildIntervalWithMostAccessed(b);
 70 |         for (var aI: aIntervals) {
 71 |             for (var bI: bIntervals) {
 72 |                 if (aI.inInterval(bI.start) || bI.inInterval(aI.start)) {
 73 |                     return true;
 74 |                 }
 75 | 
 76 |                 if (aI.start == bI.start) {
 77 |                     var aNI = getNextLargerInterval(aI, aIntervals);
 78 |                     if (aNI != null && bI.end > aNI.start) {
 79 |                         return true;
 80 |                     }
 81 | 
 82 |                     var bNI = getNextLargerInterval(bI, bIntervals);
 83 |                     if (bNI != null && aI.end > bNI.start) {
 84 |                         return true;
 85 |                     }
 86 |                 }
 87 |             }
 88 |         }
 89 |         return false;
 90 |     }
 91 | 
 92 | 
 93 | //    public static boolean checkFieldSizeInConsistent(TypeConstraint a, TypeConstraint b) {
 94 | //        if (a == b) {
 95 | //            return false;
 96 | //        }
 97 | //        Set<Interval> thisIntervals = new HashSet<>();
 98 | //        for (var offset : a.fieldAccess.keySet()) {
 99 | //            long endOffset = calcFieldEndOffset(a, offset);
100 | //            thisIntervals.add(new Interval(offset, endOffset));
101 | //        }
102 | //
103 | //        Set<Interval> otherIntervals = new HashSet<>();
104 | //        for (var offset : b.fieldAccess.keySet()) {
105 | //            long endOffset = calcFieldEndOffset(b, offset);
106 | //            otherIntervals.add(new Interval(offset, endOffset));
107 | //        }
108 | //
109 | //        Set<Interval> commonIntervals = new HashSet<>(thisIntervals);
110 | //        commonIntervals.retainAll(otherIntervals);
111 | //
112 | //        thisIntervals.removeAll(commonIntervals);
113 | //        otherIntervals.removeAll(commonIntervals);
114 | //
115 | //        if (thisIntervals.isEmpty() || otherIntervals.isEmpty()) {
116 | //            return false;
117 | //        }
118 | //
119 | //        List<Interval> mergedIntervals = new ArrayList<>(thisIntervals);
120 | //        mergedIntervals.addAll(otherIntervals);
121 | //        mergedIntervals.sort(Comparator.comparingLong(interval -> interval.start));
122 | //        for (int i = 0; i < mergedIntervals.size() - 1; i++) {
123 | //            Interval current = mergedIntervals.get(i);
124 | //            Interval next = mergedIntervals.get(i + 1);
125 | //            if (current.end > next.start) {
126 | //                return true;
127 | //            }
128 | //        }
129 | //        return false;
130 | //    }
131 | 
132 |     public static ArrayList<Interval> buildIntervals(Skeleton a) {
133 |         ArrayList<Interval> intervals = new ArrayList<>();
134 |         for (var offset : a.fieldAccess.keySet()) {
135 |             for (var endOffset : calcFieldEndOffset(a, offset)) {
136 |                 intervals.add(new Interval(offset, endOffset));
137 |             }
138 |         }
139 |         return intervals;
140 |     }
141 | 
142 |     public static ArrayList<Interval> buildIntervalWithMostAccessed(Skeleton a) {
143 |         ArrayList<Interval> intervals = new ArrayList<>();
144 | 
145 |         for (var offset : a.fieldAccess.keySet()) {
146 |             var aps = a.fieldAccess.get(offset);
147 |             if (aps == null || aps.getApSet().isEmpty()) {
148 |                 continue;
149 |             }
150 | 
151 |             var typeFreq = aps.getTypeFreq();
152 |             DataType mostAccessedType = null;
153 |             int maxAccess = 0;
154 | 
155 |             for (var entry : typeFreq.entrySet()) {
156 |                 if (entry.getValue() > maxAccess) {
157 |                     maxAccess = entry.getValue();
158 |                     mostAccessedType = entry.getKey();
159 |                 }
160 |             }
161 | 
162 |             if (mostAccessedType != null) {
163 |                 long endOffset = offset + mostAccessedType.getLength();
164 |                 intervals.add(new Interval(offset, endOffset));
165 |             }
166 |         }
167 | 
168 |         return intervals;
169 |     }
170 | 
171 |     public static Interval getNextLargerInterval(Interval cur, List<Interval> intervals) {
172 |         for (var interval: intervals) {
173 |             if (interval.start >= cur.end) {
174 |                 return interval;
175 |             }
176 |         }
177 |         return null;
178 |     }
179 | 
180 | 
181 |     public static Set<Long> calcFieldEndOffset(Skeleton a, Long offset) {
182 |         Set<Long> ends = new TreeSet<>();
183 |         var fields = a.fieldAccess.get(offset);
184 |         if (fields == null) {
185 |             return ends;
186 |         }
187 | 
188 |         for (var ap : fields.getApSet()) {
189 |             if (ap.dataType != null) {
190 |                 ends.add(offset + ap.dataType.getLength());
191 |             }
192 |         }
193 |         return ends;
194 |     }
195 | }
196 | 


--------------------------------------------------------------------------------
/src/main/resources/log4j2_default.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <Configuration status="WARN">
 3 |     <Appenders>
 4 |         <Console name="Console" target="SYSTEM_OUT">
 5 |             <PatternLayout disableAnsi="false" pattern="%highlight{[%-5level] - %msg%n%throwable}{FATAL=red, ERROR=red, WARN=yellow, INFO=green, DEBUG=blue, TRACE=white}"/>
 6 |         </Console>
 7 |     </Appenders>
 8 |     <Loggers>
 9 |         <Logger name="TypeForge" level="info" additivity="false">>
10 |             <AppenderRef ref="Console"/>
11 |         </Logger>
12 |         <Root level="warn">
13 |             <AppenderRef ref="Console"/>
14 |         </Root>
15 |     </Loggers>
16 | </Configuration>


--------------------------------------------------------------------------------
/src/test/java/README.test.txt:
--------------------------------------------------------------------------------
1 | The "test" directory is intended to hold unit test cases.  The package structure within
2 | this folder should correspond to that found in the "src" folder.
3 | 


--------------------------------------------------------------------------------
/src/test/java/typeforge/base/dataflow/NMAETest.java:
--------------------------------------------------------------------------------
 1 | package typeforge.base.dataflow;
 2 | 
 3 | import static org.mockito.Mockito.*;
 4 | 
 5 | import typeforge.utils.Logging;
 6 | import ghidra.program.model.listing.Function;
 7 | import ghidra.program.model.pcode.HighFunction;
 8 | import ghidra.program.model.pcode.HighSymbol;
 9 | import org.junit.jupiter.api.BeforeEach;
10 | import org.junit.jupiter.api.Test;
11 | import org.junit.jupiter.api.extension.ExtendWith;
12 | import org.mockito.Mock;
13 | import org.mockito.junit.jupiter.MockitoExtension;
14 | 
15 | @ExtendWith(MockitoExtension.class)
16 | public class NMAETest {
17 |     @Mock
18 |     private HighSymbol mockHighSymbol1;
19 |     @Mock
20 |     private HighSymbol mockHighSymbol2;
21 |     @Mock
22 |     private HighSymbol mockHighSymbol3;
23 |     @Mock
24 |     private HighFunction mockHighFunc;
25 |     @Mock
26 |     private Function mockFunc;
27 | 
28 |     @BeforeEach
29 |     public void setUp() {
30 |         if(!Logging.init()) {
31 |             return;
32 |         }
33 |         when(mockHighSymbol1.getName()).thenReturn("mock_1");
34 |         when(mockHighSymbol2.getName()).thenReturn("mock_2");
35 |         when(mockHighSymbol3.getName()).thenReturn("mock_3");
36 |         when(mockHighSymbol1.getHighFunction()).thenReturn(mockHighFunc);
37 |         when(mockHighSymbol2.getHighFunction()).thenReturn(mockHighFunc);
38 |         when(mockHighSymbol3.getHighFunction()).thenReturn(mockHighFunc);
39 |         when(mockHighFunc.getFunction()).thenReturn(mockFunc);
40 |         when(mockFunc.getName()).thenReturn("mock_func");
41 |     }
42 | 
43 |     @Test
44 |     public void test() {
45 | //        var expr1 = new SymbolExpr.Builder()
46 | //                        .rootSymbol(mockHighSymbol1)
47 | //                        .build();
48 | //
49 | //        var expr2 = new SymbolExpr.Builder()
50 | //                        .rootSymbol(mockHighSymbol2)
51 | //                        .build();
52 | //
53 | //        var expr3 = new SymbolExpr.Builder()
54 | //                        .rootSymbol(mockHighSymbol3)
55 | //                        .build();
56 | //
57 | //
58 | //        assertEquals(expr1.getRepresentation(), "mock_1");
59 | //        assertEquals(expr2.getRepresentation(), "mock_2");
60 | //        assertEquals(expr3.getRepresentation(), "mock_3");
61 | //
62 | //        var expr4 = new SymbolExpr.Builder().constant(0x8).build();
63 | //        var expr5 = new SymbolExpr.Builder().constant(0x10).build();
64 | //        var expr6 = new SymbolExpr.Builder().constant(0x18).build();
65 | //
66 | //        assertEquals(expr4.getRepresentation(), "0x8");
67 | //        assertEquals(expr5.getRepresentation(), "0x10");
68 | //        assertEquals(expr6.getRepresentation(), "0x18");
69 | 
70 | //        var expr7 = expr1.add(expr4);
71 | //        var expr8 = expr2.add(expr5);
72 | //        var expr9 = expr3.add(expr6);
73 | //        assertEquals(expr7.getRepresentation(), "mock_1 + 0x8");
74 | //        assertEquals(expr8.getRepresentation(), "mock_2 + 0x10");
75 | //        assertEquals(expr9.getRepresentation(), "mock_3 + 0x18");
76 | //
77 | //        var expr10 = expr7.dereference();
78 | //        var expr11 = expr8.dereference();
79 | //        var expr12 = expr9.dereference();
80 | //        var expr13 = expr1.dereference();
81 | //        var expr14 = expr12.dereference();
82 | //        assertEquals(expr10.getRepresentation(), "*(mock_1 + 0x8)");
83 | //        assertEquals(expr11.getRepresentation(), "*(mock_2 + 0x10)");
84 | //        assertEquals(expr12.getRepresentation(), "*(mock_3 + 0x18)");
85 | //        assertEquals(expr13.getRepresentation(), "*(mock_1)");
86 | //        assertEquals(expr14.getRepresentation(), "*(*(mock_3 + 0x18))");
87 | //
88 | //        var expr15 = expr12.add(expr4);
89 | //        var expr16 = expr14.add(expr6);
90 | //        var expr17 = expr16.add(expr6);
91 | //        assertEquals(expr15.getRepresentation(), "*(mock_3 + 0x18) + 0x8");
92 | //        assertEquals(expr16.getRepresentation(), "*(*(mock_3 + 0x18)) + 0x18");
93 | //        assertEquals(expr17.getRepresentation(), "*(*(mock_3 + 0x18)) + 0x30");
94 | //
95 | //        var expr18 = expr17.add(expr3);
96 | //        assertEquals(expr18.getRepresentation(), "*(*(mock_3 + 0x18)) + mock_3 + 0x30");
97 |     }
98 | }


--------------------------------------------------------------------------------
/src/test/java/typeforge/base/dataflow/types/LayoutTest.java:
--------------------------------------------------------------------------------
 1 | package typeforge.base.dataflow.types;
 2 | 
 3 | import org.junit.jupiter.api.Test;
 4 | import typeforge.base.dataflow.Layout;
 5 | 
 6 | import java.util.ArrayList;
 7 | import java.util.HashSet;
 8 | import java.util.Set;
 9 | 
10 | public class LayoutTest {
11 |     @Test
12 |     public void test() {
13 |         Set<Integer> sizes1 = new HashSet<>();
14 |         sizes1.add(1);
15 |         sizes1.add(2);
16 |         sizes1.add(3);
17 | 
18 |         Set<Integer> sizes2 = new HashSet<>();
19 |         sizes2.add(3);
20 |         sizes2.add(2);
21 |         sizes2.add(1);
22 | 
23 |         Set<Integer> sizes3 = new HashSet<>();
24 |         sizes3.add(0x10);
25 | 
26 |         var interval1 = new Layout.Interval(10L, sizes1);
27 |         var interval2 = new Layout.Interval(10L, sizes2);
28 |         var interval3 = new Layout.Interval(0, sizes3);
29 | 
30 |         assert sizes1.equals(sizes2);
31 |         assert interval1.equals(interval2);
32 |         assert interval1.hashCode() == interval2.hashCode();
33 |         assert !interval1.equals(interval3);
34 | 
35 |         var intervals = new ArrayList<Layout.Interval>();
36 |         intervals.add(interval1);
37 |         intervals.add(interval3);
38 |         var layout1 = new Layout(intervals);
39 | 
40 |         intervals = new ArrayList<Layout.Interval>();
41 |         intervals.add(interval3);
42 |         intervals.add(interval1);
43 |         var layout2 = new Layout(intervals);
44 | 
45 |         intervals = new ArrayList<Layout.Interval>();
46 |         intervals.add(interval1);
47 |         intervals.add(interval3);
48 |         var layout3 = new Layout(intervals);
49 | 
50 |         assert !layout1.equals(layout2);
51 |         assert layout1.equals(layout3);
52 |         assert layout1.hashCode() == layout3.hashCode();
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------