├── Module.manifest
├── settings.gradle
├── requirements.txt
├── .gitignore
├── .gitmodules
├── extension.properties
├── LICENSE
├── data
    ├── sig
    │   ├── util.py
    │   └── x86.py
    ├── entry.py
    ├── ida-calc-all-metadata.py
    ├── type.py
    ├── test.py
    ├── client.py
    └── parsing.py
├── README.md
└── src
    └── main
        └── java
            └── org
                └── maplebacon
                    └── lumina
                        ├── PythonExecutor.java
                        └── LuminaPlugin.java


/Module.manifest:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/settings.gradle:
--------------------------------------------------------------------------------
1 | include ':Ghidrathon'


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | capstone>=4.0.2
2 | git+https://github.com/ubcctf/lumina_structs


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | **/__pycache__/
2 | /build/
3 | /bin/
4 | /dist/
5 | .gradle/
6 | .settings/
7 | .project
8 | .classpath


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "Ghidrathon"]
2 | 	path = Ghidrathon
3 | 	url = https://github.com/mandiant/Ghidrathon.git
4 | 


--------------------------------------------------------------------------------
/extension.properties:
--------------------------------------------------------------------------------
1 | name=Lumina
2 | description=Lumina implementation for Ghidra
3 | author=Maple Bacon
4 | createdOn=04/12/2022
5 | version=@extversion@
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 CTF @ UBC
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/data/sig/util.py:
--------------------------------------------------------------------------------
 1 | from ghidra.program.database.function import FunctionDB
 2 | from ghidra.program.flatapi import FlatProgramAPI
 3 | 
 4 | def hexdump(block):
 5 |     print("\n".join([" ".join([row.hex()[i:i + 2] for i in range(0, len(row.hex()), 2)]) for row in [block[i:i + 16] for i in range(0, len(block), 16)]]))
 6 | 
 7 | 
 8 | #base class for all architectures' signature generation functions
 9 | class Sig:
10 |     def __init__(self, prog):
11 |         self.prog = FlatProgramAPI(prog)
12 |         self.mem = prog.getMemory()
13 | 
14 |     def calc_func_metadata(self, func: FunctionDB) -> tuple[str, bytes, bytes]:
15 |         raise NotImplementedError()
16 | 
17 | 
18 | #metaclass like how binja implements its subscriptable Architecture class
19 | #have to do this to lazy load; otherwise we end up with circular import
20 | class _mapping(type):
21 |     def __init__(self, name, bases, dict) -> None:
22 |         super().__init__(name, bases, dict)
23 |         from .x86 import X86
24 |         self.map = {'x86_64': X86, 'x86': X86}
25 | 
26 |     def __iter__(self):
27 |         return self.map.__iter__()
28 | 
29 |     def __getitem__(self, name: str) -> Sig:
30 |         return self.map[name]
31 | 
32 | class ARCH_MAPPING(metaclass=_mapping): ...


--------------------------------------------------------------------------------
/data/entry.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | #hotfix for win32com SetupEnvironment NoneType is not callable; we are effectively running a bundled python installation anyway
 4 | sys.frozen = 1
 5 | 
 6 | from .client import LuminaClient
 7 | 
 8 | from ghidra.framework.options import OptionType
 9 | 
10 | 
11 | #ALWAYS register to prevent weird behaviours of resetting config - see removeUnusedOptions() implementation in ToolOptions
12 | #Also apparently storing as INT_TYPE might trigger !isCompatibleOption - the value retrieved from the XML is returned as a Long whereas they expect an int
13 | #seems like that's coz jep stores python integers as longs, so whatever we can just make it a string
14 | settings = plugin.getTool().getOptions("Lumina")     #already creates the category if doesnt exist for us
15 | settings.registerOption('Host Address', OptionType.STRING_TYPE, '', None, 'Host address for the Lumina server')
16 | settings.registerOption('Port', OptionType.STRING_TYPE, '', None, 'Port for the Lumina server')
17 | #also needs to use None instead of an empty string as path here since that would be incompatible with java.io.File
18 | settings.registerOption('Key File', OptionType.FILE_TYPE, None, None, 'Path to the Key file to connect to the Lumina server with, if any')
19 | settings.registerOption('TLS Certificate File', OptionType.FILE_TYPE, None, None, 'Path to the TLS Certificate for the Lumina server, if any')
20 | 
21 | #try logging in with configured params
22 | plugin.setClient(LuminaClient(plugin))


--------------------------------------------------------------------------------
/data/ida-calc-all-metadata.py:
--------------------------------------------------------------------------------
 1 | import ida_kernwin, ida_pro, ida_funcs, ida_auto, ida_segment, ida_idp, ida_registry, idc
 2 | 
 3 | #this script writes each function address into a file, and triggers lumina sequentially (intended to use with frida to obtain hash)
 4 | #note: idat is *pretty* fragile so things might break if you reorder code (might even be nondeterministic if its related to lumina initialization)
 5 | #also idat will utilize 100% of a core if the script throws an exception
 6 | 
 7 | 
 8 | #open disasm window well in advance to prepare for lumina
 9 | idaview = ida_kernwin.open_disasm_window('IDA View-A')
10 | ida_kernwin.display_widget(idaview, 0)
11 | 
12 | #we need to disable lumina pull all on autoanalysis finish or else we get junk that we dont want
13 | orig = ida_registry.reg_read_int('AutoUseLumina', 1)
14 | ida_registry.reg_write_int('AutoUseLumina', 0)
15 | 
16 | #wait until autoanalysis is finished to get full function list; this also gives time for frida to attach
17 | ida_auto.auto_wait()
18 | 
19 | #restore option
20 | ida_registry.reg_write_int('AutoUseLumina', orig)
21 | 
22 | #use local server to speed up processing - we dont need actual responses anyway
23 | #self note: if local server is not running idat's gonna make a ton of windows noise
24 | ida_idp.process_config_directive('LUMINA_HOST="127.0.0.1"')
25 | ida_idp.process_config_directive('LUMINA_PORT=4443')
26 | ida_idp.process_config_directive('LUMINA_TLS=NO')
27 | 
28 | 
29 | class Run(ida_kernwin.UI_Hooks):
30 |     def __init__(self) -> None:
31 |         ida_kernwin.UI_Hooks.__init__(self)
32 | 
33 |     #wait until ready
34 |     def ready_to_run(self):
35 |         #spinning seems to be necessary when we are connecting to remote but makes things worse when we are connecting to local server
36 |         # #spin until lumina finishes initializing - have to use idc.qsleep not time.sleep since we cannot occupy the thread
37 |         # #somehow sometimes it deadlocks though i have no idea how to fix
38 |         # while not ida_kernwin.is_action_enabled(ida_kernwin.get_action_state('LuminaIDAViewPullMd')[1]):
39 |         #     with open('test.log', 'a') as ww:
40 |         #         ww.write('spinning\n')
41 |         #     idc.qsleep(100)
42 | 
43 |         ea = 0
44 |         while (f:=ida_funcs.get_next_func(ea)):
45 |             ea = f.start_ea   #move onto the next func regardless
46 |             if not ida_segment.is_spec_ea(ea):  #ignore extern symbols and the likes
47 |                 ida_kernwin.jumpto(ea)
48 |                 ida_kernwin.process_ui_action('LuminaIDAViewPullMd')
49 | 
50 |         ida_idp.process_config_directive('ABANDON_DATABASE=YES')
51 |         ida_pro.qexit(0)
52 | 
53 | 
54 | uihook = Run()
55 | uihook.hook()
56 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # lumina-ghidra
 2 | Ghidra port for [lumina-binja](https://github.com/ubcctf/lumina-binja), a reimplmentation of IDA's [Lumina](https://hex-rays.com/products/ida/lumina/) feature in Binary Ninja
 3 | 
 4 | The features provided by this port is on par with the Binary Ninja plugin - see the [repo](https://github.com/ubcctf/lumina-binja) for more info!
 5 | 
 6 | **CURRENTLY IN ACTIVE DEVELOPMENT - NOTHING IS FULLY STABLE YET**
 7 | 
 8 | ## Building and Installation
 9 |  - include `--recurse-submodules` to get the Ghidrathon repo when cloning this repo
10 |  - `cd Ghidrathon && git checkout -b pre-10.2 0a54fa1cef41869582eb3614a86a9475ecf5c67a` if you are running Ghidra < v10.2, otherwise `cd Ghidrathon && git checkout main`
11 |  - `gradle -PGHIDRA_INSTALL_DIR=<absolute path to Ghidra install>` should compile both Ghidrathon (in `Ghidrathon/dist/`) and this plugin (in `dist/`)
12 |  - Alternatively, if you are using `GhidraDev` in Eclipse, import the project, right click the project: `GhidraDev -> Link Ghidra...`, follow the prompts, and then `GhidraDev -> Export -> Ghidra Module Extension...` which will do the same thing as the command above
13 |  - Go into Ghidra, `File -> Install Extensions`, click the green arrow and select both of the zip file
14 |  - Check both of the new extensions and restart Ghidra
15 |  - Since the plugin is still marked unstable currently, you will have to go to `File -> Configure -> Experimental` and check `LuminaPlugin` manually to enable it
16 |  - Configure Lumina through `Edit -> Tool Options -> Lumina` in disassembler view; Most logs will be viewable in the main Ghidra tool -> `Help -> Show Log`
17 | 
18 | ## Running tests
19 | The `test.py` requires more setup than the Binary Ninja counterpart, mainly because of the way headless mode works for Ghidra:
20 |  - Make sure Ghidrathon is set up, and requirements are installed (along with `frida`)
21 |  - Run `analyzeHeadless <project path> <project name> -import <name> -scriptPath <repo root dir> -postScript test.py` - This would require you to erase the `<project name>.gpr` file every single time since Ghidra does not allow reimporting
22 |  - Alternatively you can run `analyzeHeadless <project path> <project name> -import <name>` once, and then `analyzeHeadless <project path> <project name> -process <name> -scriptPath <repo root dir> -postScript test.py` to use the cached analysis (faster, but things might persist in the project that is unideal for testing)
23 |  - You'll need to input the filepath and verbosity arguments through stdin, along with manually finding IDA addresses for `calc_func_metadata`, `MD5Update` and `MD5Final` - `postScript` argument passing doesn't seem to work that well, and we cannot really analyze multiple binaries needed to get the addresses automatically without prior setup with `analyzeHeadless` either
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/src/main/java/org/maplebacon/lumina/PythonExecutor.java:
--------------------------------------------------------------------------------
  1 | package org.maplebacon.lumina;
  2 | 
  3 | import java.io.PrintWriter;
  4 | import java.util.concurrent.ExecutionException;
  5 | import java.util.concurrent.ExecutorService;
  6 | import java.util.concurrent.Executors;
  7 | 
  8 | import org.apache.commons.lang3.concurrent.BasicThreadFactory;
  9 | 
 10 | import generic.jar.ResourceFile;
 11 | import ghidrathon.interpreter.GhidrathonInterpreter;
 12 | 
 13 | /**
 14 |  * The class responsible for executing python code,
 15 |  * ensuring consistency of the thread state for jep. <br>
 16 |  * <br>
 17 |  * This class abstracts with a single thread executor, which means commands submitted are executed sequentially,
 18 |  * ensuring the ordering is as expected.
 19 |  * @author despawningbone
 20 |  */
 21 | public class PythonExecutor {
 22 | 	private GhidrathonInterpreter python;
 23 | 	private ExecutorService pyThread;
 24 | 	
 25 | 	//TODO figure out whether this will introduces noticable race conditions due to modifying program state (are transactions thread safe?)
 26 | 	
 27 | 	/**
 28 | 	 * Instantiates the environment for the python interpreter.
 29 | 	 */
 30 | 	public PythonExecutor() {
 31 | 		BasicThreadFactory factory = new BasicThreadFactory.Builder()
 32 | 				.namingPattern("Lumina-JEP-thread-%d")
 33 | 				.priority(7)   //higher than norm, lower than critical
 34 | 				.build();
 35 | 		
 36 | 		pyThread = Executors.newSingleThreadExecutor(factory);
 37 | 		
 38 | 		try {  //wait until it finishes
 39 | 			pyThread.submit(() -> python = GhidrathonInterpreter.get()).get();
 40 | 		} catch (InterruptedException | ExecutionException e) {
 41 | 			python = null;   //disable on error
 42 | 		}
 43 | 	}
 44 | 	
 45 | 	
 46 | 	/**
 47 | 	 * Evaluates a python statement, blocking until it finishes.
 48 | 	 * @apiNote Assumes isEnabled == true
 49 | 	 * @param line python statement to execute
 50 | 	 */
 51 | 	public void evalSync(String line) {
 52 | 		try {
 53 | 			pyThread.submit(() -> python.eval(line)).get();
 54 | 		} catch (InterruptedException | ExecutionException e) {
 55 | 			throw new RuntimeException(e);   //pass exception to our own thread in unchecked fashion, as would happen without executor
 56 | 		}	
 57 | 	}
 58 | 
 59 | 	/**
 60 | 	 * Evaluates a python statement asynchronously.
 61 | 	 * @apiNote Assumes isEnabled == true
 62 | 	 * @param line python statement to execute
 63 | 	 */
 64 | 	public void eval(String line) {
 65 | 		pyThread.execute(() -> python.eval(line));
 66 | 	}
 67 | 
 68 | 	
 69 | 	/**
 70 | 	 * Evaluates a python script, blocking until it finishes.
 71 | 	 * @apiNote Assumes isEnabled == true
 72 | 	 * @param file python script to execute
 73 | 	 */
 74 | 	public void runScriptSync(ResourceFile file) {
 75 | 		try {
 76 | 			pyThread.submit(() -> python.runScript(file)).get();
 77 | 		} catch (InterruptedException | ExecutionException e) {
 78 | 			throw new RuntimeException(e);   //pass exception to our own thread in unchecked fashion, as would happen without executor
 79 | 		}	
 80 | 	}
 81 | 
 82 | 	/**
 83 | 	 * Evaluates a python script asynchronously.
 84 | 	 * @apiNote Assumes isEnabled == true
 85 | 	 * @param file python script to execute
 86 | 	 */
 87 | 	public void runScript(ResourceFile line) {
 88 | 		pyThread.execute(() -> python.runScript(line));
 89 | 	}	
 90 | 	
 91 | 	
 92 | 	/**
 93 | 	 * Passes a value to the python interpreter.
 94 | 	 */
 95 | 	public void set(String name, Object obj) {  //we dont really need a sync method for this since we dont really care about when it finishes and the ordering is already guaranteed
 96 | 		pyThread.execute(() -> python.set(name, obj));
 97 | 	}
 98 | 		
 99 | 	/**
100 | 	 * Redirects the python interpreter output to the streams provided
101 | 	 * @param out stream to redirect stdout to
102 | 	 * @param err stream to redirect stderr to
103 | 	 */
104 | 	public void setStreams(PrintWriter out, PrintWriter err) {  //we dont really need a sync method for this since we dont really care about when it finishes and the ordering is already guaranteed
105 | 		pyThread.execute(() -> python.setStreams(out, err));
106 | 	}	
107 | 	
108 | 	
109 | 	/**
110 | 	 * Checks whether the python environment is available.
111 | 	 * @return whether the python interpreter is ready
112 | 	 */
113 | 	public boolean isEnabled() {
114 | 		return python != null;
115 | 	}
116 | 	
117 | 	
118 | 	/**
119 | 	 * Cleans up the python environment and disables it.
120 | 	 */
121 | 	public void close() {
122 | 		try {
123 | 			pyThread.submit(() -> python.close()).get();
124 | 			pyThread.shutdown();
125 | 			python = null;
126 | 		} catch (InterruptedException | ExecutionException e) {
127 | 			;  //its whatever we can just ignore it we are leaving anyway; a new PythonExecutor will have a new thread for the python state
128 | 		}
129 | 	}
130 | }
131 | 


--------------------------------------------------------------------------------
/data/sig/x86.py:
--------------------------------------------------------------------------------
 1 | from ghidra.program.database.function import FunctionDB
 2 | from ghidra.program.model.address import Address
 3 | from ghidra.program.model.symbol import SymbolType
 4 | import jep
 5 | 
 6 | from capstone import Cs, CsInsn, CS_ARCH_X86, CS_MODE_64
 7 | from capstone.x86 import X86_REG_FS, X86_REG_GS, X86_REG_RIP, X86_OP_MEM, X86_OP_IMM
 8 | 
 9 | import io, ctypes
10 | 
11 | from .util import Sig
12 | 
13 | 
14 | class X86(Sig):
15 | 
16 |     def valid_loc(self, offset: Address, f: FunctionDB):
17 |         #include all data variables that has a code ref 
18 |         #also include if is pointing to start of instruction, but never mask same function jumps
19 |         #sometimes there might be multiple functions at the same address even on the same architecture it seems like - we check all of them to see if any is the same function then reject
20 |         #ghidra cant have multiple functions at the same location
21 |         #if a data var exists (undefined or not) at offset there has to be a reference in ghidra
22 |         return offset and (((o:=self.prog.getFunctionContaining(offset)) and o.getEntryPoint() != f.getEntryPoint() and self.prog.getInstructionAt(offset)) or self.prog.getDataContaining(offset) or self.prog.getUndefinedDataAt(offset))
23 |         #jep jarrays are correctly falsey
24 | 
25 | 
26 |     def calcrel(self, d: CsInsn, f: FunctionDB):
27 |         mask = bytes(d.size)
28 | 
29 |         #<opcode, disp_offset, imm_offset> - offsets are optional and can not exist
30 |         #afaik x86 imm is always at the end
31 | 
32 |         if d.disp_offset: #consider references - any fs address, any relative memory accesses that's valid in program scope (see valid_loc def)
33 |             m = b'\xFF' if any(op.type == X86_OP_MEM and (op.reg in [X86_REG_FS, X86_REG_GS] or (op.value.mem.base == X86_REG_RIP and self.valid_loc(self.prog.getAddressFactory().getAddress(hex(op.value.mem.disp + d.address + d.size)), f))) for op in d.operands) else b'\0'
34 |             size = (d.imm_offset - d.disp_offset if d.imm_offset else d.size - d.disp_offset)
35 |             mask = mask[:d.disp_offset] + m*size + mask[d.disp_offset+size:]
36 | 
37 |         #imm always later than disp
38 |         if d.imm_offset: #references in imm just points directly to addresses
39 |             m = b'\xFF' if any(op.type == X86_OP_IMM and self.valid_loc(self.prog.getAddressFactory().getAddress(hex(op.imm)), f) for op in d.operands) else b'\0'
40 |             size = d.size - d.imm_offset
41 |             mask = mask[:d.imm_offset] + m*size + mask[d.imm_offset+size:]
42 | 
43 |         return mask
44 | 
45 |     def calc_func_metadata(self, func: FunctionDB) -> tuple[str, bytes, bytes]:
46 | 
47 |         if func.isThunk() and func.getThunkedFunction(False).isExternal():   #special functions, ignore
48 |             return
49 | 
50 |         ranges = func.getBody()
51 | 
52 |         #dont check the portions of the function above func.start (aka no min([r.start for r in ranges])); seems like IDA doesnt care either and this speeds things up by a ton in binaries with exception handlers
53 |         func_start = func.getEntryPoint().getOffset()
54 |         func_end = ranges.getMaxAddress().getOffset()  #get max of the entire address space
55 | 
56 |         cap = Cs(CS_ARCH_X86, CS_MODE_64)  #seems like 64bit mode can still disassemble 32 bit completely fine
57 |         cap.detail = True
58 | 
59 |         #take the entire block of data including alignment into account (use size if disassembly is not available)
60 |         #pass by reference workaround - we cant directly use b'' coz jep will just copy the array into java and then discard the changed result
61 |         jblock = jep.jarray(func_end - func_start + 1, jep.JBYTE_ID)  #func_end inclusive
62 |         self.mem.getBytes(func.getEntryPoint(), jblock)
63 |         block = bytes([ctypes.c_ubyte(b).value for b in jblock]) #java bytes are signed
64 | 
65 |         #linearly disassemble the entire block of bytes that the function encompasses (IDA does that instead of checking whether the bytes are accessible to the function or not)
66 |         dis = cap.disasm(block, func_start) 
67 | 
68 |         maskblock = io.BytesIO(bytes(len(block)))
69 |         block = io.BytesIO(block)
70 |         #if its in the valid proc address space then it counts as volatile
71 |         for d in dis:
72 |             maskblock.seek(d.address - func_start)
73 |             block.seek(d.address - func_start)
74 | 
75 |             mask = (self.calcrel(d, func))
76 |             data = bytes([b if m != 0xFF else 0 for m, b in zip(mask, block.read(len(mask)))])
77 | 
78 |             maskblock.write(mask)
79 |             
80 |             block.seek(d.address - func_start)
81 |             block.write(data)
82 |         block = block.getvalue()
83 |         maskblock = maskblock.getvalue()
84 | 
85 |         #compute MD5
86 |         import hashlib
87 | 
88 |         hash = hashlib.md5(block + maskblock).digest()
89 |         return hash, block, maskblock


--------------------------------------------------------------------------------
/src/main/java/org/maplebacon/lumina/LuminaPlugin.java:
--------------------------------------------------------------------------------
  1 | package org.maplebacon.lumina;
  2 | 
  3 | import ghidra.app.plugin.ProgramPlugin;
  4 | import ghidra.app.services.ConsoleService;
  5 | 
  6 | import java.io.File;
  7 | import java.io.IOException;
  8 | import java.util.Arrays;
  9 | import java.util.NoSuchElementException;
 10 | 
 11 | import docking.ActionContext;
 12 | import docking.action.DockingAction;
 13 | import docking.action.MenuData;
 14 | import generic.jar.ResourceFile;
 15 | import ghidra.app.events.ProgramLocationPluginEvent;
 16 | import ghidra.app.plugin.PluginCategoryNames;
 17 | import ghidra.framework.Application;
 18 | import ghidra.framework.plugintool.PluginInfo;
 19 | import ghidra.framework.plugintool.PluginTool;
 20 | import ghidra.framework.plugintool.util.PluginStatus;
 21 | import ghidra.util.Msg;
 22 | 
 23 | @PluginInfo(
 24 | 	status = PluginStatus.UNSTABLE,
 25 | 	packageName = "org.maplebacon.lumina",
 26 | 	category = PluginCategoryNames.ANALYSIS,
 27 | 	shortDescription = "Lumina implementation for Ghidra",
 28 | 	description = "This plugin adds support for IDA's Lumina feature in Ghidra.",
 29 | 	servicesRequired = { ConsoleService.class },    //needed to ensure console initiates first for python logging
 30 | 	eventsConsumed = { ProgramLocationPluginEvent.class }  //needed to get currentLocation updates
 31 | )
 32 | public class LuminaPlugin extends ProgramPlugin {
 33 | 	private PythonExecutor python;
 34 | 	private File pyScripts;
 35 | 	
 36 | 	//temporary storage for the LuminaClient pyObject - apparently client could go out of scope for some reason for some installations after entry.py
 37 | 	private Object client;
 38 | 	
 39 | 	//expose for entry.py to be able to persist the client object here
 40 | 	public void setClient(Object client) {
 41 | 		this.client = client;
 42 | 	}
 43 | 
 44 | 	public LuminaPlugin(PluginTool tool) throws IOException {
 45 | 		super(tool, false, false);
 46 | 		
 47 | 		//unzip the python files into extension directory if not done yet so python can read it properly
 48 | 		//getFile implicitly copies to the application directory that we want
 49 | 		pyScripts = Application.getModuleDataSubDirectory(".").getFile(true);
 50 | 	}
 51 | 	
 52 | 	@Override
 53 | 	protected void init() {
 54 | 		//start the lumina client; DONE move to a separate thread in case other plugins want to make a jep interpreter on the GUI thread too? (also for running background tasks)
 55 | 		try {
 56 | 			python = new PythonExecutor();
 57 | 			ResourceFile entry = Arrays.asList(pyScripts.listFiles()).stream().filter(f -> f.getName().equals("entry.py")).map(f -> new ResourceFile(f)).findFirst().get();
 58 | 			
 59 | 			//set any errors to print to the console; it is expected that all communciations should be done through Msg logger but for debugging purposes this would be much more visible
 60 | 			ConsoleService console = tool.getService(ConsoleService.class);
 61 | 			python.setStreams(console.getStdOut(), console.getStdErr());
 62 | 			
 63 | 			python.set("plugin", this);   //pass everything we need to do the plugin in python; getTool will give us the rest we need
 64 | 			
 65 | 			//hotfix for relative imports
 66 | 			python.eval("import sys; sys.path.append(r'" +  pyScripts.getParentFile().getParent() +  "'); __package__ = 'data'");
 67 | 			
 68 | 			python.runScript(entry);		
 69 | 		} catch(NoSuchElementException e) {
 70 | 			Msg.error(this, "Lumina python scripts not found:", e);
 71 | 		}
 72 | 		
 73 | 		createActions();
 74 | 	}
 75 | 	
 76 | 	private DockingAction getLuminaAction(String name, String exec, boolean checkValid, boolean funcSpecific) {
 77 | 		MenuData tb = new MenuData(new String[] {"Lumina", name});
 78 | 		DockingAction action = new DockingAction(name, "Lumina") {
 79 | 			@Override
 80 | 			public void actionPerformed(ActionContext context) {
 81 | 				if(python.isEnabled()) {					
 82 | 					python.set("ctx", currentProgram);
 83 | 					//pass client back into scope before evaluating
 84 | 					python.set("client", LuminaPlugin.this.client);
 85 | 					
 86 | 					if(funcSpecific)   //only set if its function specific - can be null otherwise
 87 | 						python.set("func", currentProgram.getFunctionManager().getFunctionContaining(currentLocation.getAddress()));
 88 | 					
 89 | 					//Msg is probably not in scope, so we import
 90 | 					python.eval(exec + (checkValid ? " if client.is_valid(ctx) else __import__('ghidra.util').util.Msg.showWarn(plugin, None, 'Lumina - Unavailable', 'This function is not available in this context. (Either the client is not connected, or the architecture is currently unsupported.)')" : ""));
 91 | 				} else {
 92 | 					Msg.showWarn(LuminaPlugin.this, null, "Lumina - inconsistent state", "The python interpreter is not available right now. Please restart the plugin.");
 93 | 				}
 94 | 			}
 95 | 			
 96 | 			@Override
 97 | 			public boolean isEnabledForContext(ActionContext context) {
 98 | 				return !checkValid || (currentProgram != null && currentLocation != null && !(funcSpecific && currentProgram.getFunctionManager().getFunctionContaining(currentLocation.getAddress()) == null));
 99 | 			}
100 | 		};
101 | 		action.setMenuBarData(tb);
102 | 		action.setEnabled(true);
103 | 		action.markHelpUnnecessary();
104 | 		return action;
105 | 	}
106 | 	
107 | 	
108 | 	private void createActions() {
109 | 		if(tool.getDockingActionsByOwnerName("Lumina").size() == 0) {   //only add if not added already
110 | 			tool.addAction(getLuminaAction("Pull current function metadata", "client.pull_function_md(ctx, func)", true, true));
111 | 			tool.addAction(getLuminaAction("Push current function metadata", "client.push_function_md(ctx, func)", true, true));
112 | 			tool.addAction(getLuminaAction("Pull all function metadata", "client.pull_all_mds(ctx)", true, false));
113 | 			tool.addAction(getLuminaAction("Push all function metadata", "client.push_all_mds(ctx)", true, false));
114 | 			tool.addAction(getLuminaAction("Reconnect", "client.reconnect()", false, false));
115 | 			//TODO option for reverting applied metadata
116 | 		}
117 | 	}
118 | 	
119 | 	@Override
120 | 	protected void dispose() {
121 | 		if(python.isEnabled()) 
122 | 			python.close();      //need to close it at the end in case we need to turn lumina back on (which is likely in the same thread as before aka jep is gonna die)
123 | 	}
124 | }
125 | 


--------------------------------------------------------------------------------
/data/type.py:
--------------------------------------------------------------------------------
  1 | from lumina_structs.tinfo import *
  2 | from construct import Container
  3 | from ghidra.program.model.data import (DataType, VoidDataType,
  4 |     CharDataType, SignedByteDataType, ByteDataType,
  5 |     SignedWordDataType, WordDataType, SignedDWordDataType, DWordDataType,
  6 |     SignedQWordDataType, QWordDataType, Integer16DataType, UnsignedInteger16DataType,
  7 |     IntegerDataType, UnsignedIntegerDataType, BooleanDataType,
  8 |     FloatDataType, DoubleDataType, LongDoubleDataType, Float2DataType,
  9 |     PointerDataType, ArrayDataType, FunctionDefinitionDataType,
 10 |     TypedefDataType, Undefined, BitFieldDataType)
 11 | from ghidra.program.model.data import ParameterDefinitionImpl, GenericCallingConvention
 12 | from ghidra.framework.plugintool import PluginTool
 13 | from ghidra.app.services import DataTypeManagerService
 14 | from java.util import ArrayList
 15 | from typing import List, Optional
 16 | 
 17 | #
 18 | # handles mapping from generic lumina tinfo definitions to ghidra-specific data
 19 | #
 20 | 
 21 | def construct_ptr(tinfo: Container, tool: PluginTool, names: Optional[List[str]], *_):
 22 |     #ghidra seem to only have the basics in ptr types (no const/volatile, no closures, no near/far etc)
 23 |     if tinfo.data.ptrsize:
 24 |         return PointerDataType(construct_type(tool, tinfo.data.type, names), tinfo.data.ptrsize)
 25 |     else:
 26 |         #assuming dynamic pointer size means itll eventually be the right one
 27 |         return PointerDataType(construct_type(tool, tinfo.data.type, names))
 28 | 
 29 | def construct_arr(tinfo: Container, tool: PluginTool, names: Optional[List[str]], *_):
 30 |     #ghidra have no "base of array" concepts, assume zero always
 31 |     t = construct_type(tool, tinfo.data.type, names)
 32 |     return ArrayDataType(t, tinfo.data.num_elems, t.getLength())
 33 | 
 34 | 
 35 | cc_mapping = {
 36 |     CallingConvention.CM_CC_CDECL: GenericCallingConvention.cdecl,
 37 |     CallingConvention.CM_CC_ELLIPSIS: GenericCallingConvention.cdecl,
 38 |     CallingConvention.CM_CC_STDCALL: GenericCallingConvention.stdcall,
 39 |     CallingConvention.CM_CC_PASCAL: GenericCallingConvention.stdcall,    #but reversed order of args
 40 |     CallingConvention.CM_CC_FASTCALL: GenericCallingConvention.fastcall,
 41 |     CallingConvention.CM_CC_THISCALL: GenericCallingConvention.thiscall,
 42 | }
 43 | 
 44 | def construct_func(tinfo: Container, tool: PluginTool, names: Optional[List[str]], *_):
 45 |     #again seems like the data types are pretty basic, aka no arglocs, near/far, spoiled regs etcetc
 46 |     funcdef = FunctionDefinitionDataType("lumina_function")  #temp name for generating
 47 | 
 48 |     funcdef.setReturnType(construct_type(tool, tinfo.data.rettype, names))
 49 |     funcdef.setArguments([ParameterDefinitionImpl(names.pop(0) if names else "", construct_type(tool, param.type, names), None) for param in tinfo.data.params])
 50 |     if tinfo.data.cc.convention in cc_mapping:
 51 |         funcdef.setGenericCallingConvention(cc_mapping[tinfo.data.cc.convention])
 52 | 
 53 |     funcdef.setName(funcdef.getPrototypeString())
 54 | 
 55 |     return funcdef
 56 | 
 57 | def construct_cmplx(tinfo: Container, tool: PluginTool, names: Optional[List[str]], nbytes: int):
 58 |     #lumina only pushes typedef, so not much we can do if it doesnt already exist in type libraries
 59 |     if tinfo.typedef.flags == ComplexFlags.BTMT_TYPEDEF:   #just to be sure we are dealing with typedefs before we search the name up
 60 |         #we either just use the state in the global scope to get every type library or we have to pass it through a whole chain of things which is not ideal
 61 |         for lib in tool.getService(DataTypeManagerService).getDataTypeManagers():    
 62 |             l = ArrayList()
 63 |             lib.findDataTypes(tinfo.data.name, l)
 64 |             if l:
 65 |                 #if nbytes is defined and the type we got is very likely defined by lumina (typedef -> undefined), trust that
 66 |                 if hasattr(l[0], 'getBaseDataType') and Undefined.isUndefined(l[0].getBaseDataType()) and nbytes:
 67 |                     l[0].replaceWith(TypedefDataType(tinfo.data.name, Undefined.getUndefinedDataType(nbytes)))
 68 |                 return l[0]
 69 |         return TypedefDataType(tinfo.data.name, Undefined.getUndefinedDataType(nbytes))
 70 |     
 71 |     #TODO properly parse the complex types once ive figured out ways to force lumina to push full struct info (or extend it to do that)
 72 |     #this should basically never be reached before then
 73 |     return Undefined.getUndefinedDataType(nbytes)
 74 | 
 75 | bitfield_mapping = {
 76 |     BitFieldFlags.BTMT_BFLDI8: (ByteDataType, SignedByteDataType),
 77 |     BitFieldFlags.BTMT_BFLDI16: (WordDataType, SignedWordDataType),
 78 |     BitFieldFlags.BTMT_BFLDI32: (DWordDataType, SignedDWordDataType),
 79 |     BitFieldFlags.BTMT_BFLDI64: (QWordDataType, SignedQWordDataType),
 80 | }
 81 | 
 82 | def construct_bitfield(tinfo: Container, *_):  #ive never seen this in use - see lumina_structs.tinfo for more info
 83 |     #BitFieldDataType is technically an internal class, but we should be fine since BT_BITFIELD is also only used in structs in IDA
 84 |     return BitFieldDataType(bitfield_mapping[tinfo.typedef.flags][int(not tinfo.data.unsigned)], tinfo.data.bitsize)
 85 | 
 86 | 
 87 | 
 88 | float_mapping = {
 89 |     FloatFlags.BTMT_FLOAT: FloatDataType.dataType,
 90 |     FloatFlags.BTMT_DOUBLE: DoubleDataType.dataType,
 91 |     FloatFlags.BTMT_LNGDBL: LongDoubleDataType.dataType,
 92 |     FloatFlags.BTMT_SPECFLT: Float2DataType.dataType, #depends on use_tbyte() in IDA otherwise 2 - likely not used for lumina
 93 | }
 94 | 
 95 | basetype_mapping = {
 96 |     BaseTypes.BT_VOID: lambda *_: VoidDataType.dataType,
 97 |     BaseTypes.BT_INT8: lambda tinfo, *_: CharDataType.dataType if tinfo.typedef.flags == IntFlags.BTMT_CHAR else (SignedByteDataType.dataType if not tinfo.typedef.flags == IntFlags.BTMT_USIGNED else ByteDataType.dataType),  #default to signed unless unsigned is specified
 98 |     BaseTypes.BT_INT16: lambda tinfo, *_: SignedWordDataType.dataType if not tinfo.typedef.flags == IntFlags.BTMT_USIGNED else WordDataType.dataType,
 99 |     BaseTypes.BT_INT32: lambda tinfo, *_: SignedDWordDataType.dataType if not tinfo.typedef.flags == IntFlags.BTMT_USIGNED else DWordDataType.dataType,
100 |     BaseTypes.BT_INT64: lambda tinfo, *_: SignedQWordDataType.dataType if not tinfo.typedef.flags == IntFlags.BTMT_USIGNED else QWordDataType.dataType,
101 |     BaseTypes.BT_INT128: lambda tinfo, *_: Integer16DataType.dataType if not tinfo.typedef.flags == IntFlags.BTMT_USIGNED else UnsignedInteger16DataType.dataType,
102 |     BaseTypes.BT_INT: lambda tinfo, *_: IntegerDataType.dataType if not tinfo.typedef.flags == IntFlags.BTMT_USIGNED else UnsignedIntegerDataType.dataType,
103 |     BaseTypes.BT_BOOL: lambda *_: BooleanDataType.dataType,
104 |     BaseTypes.BT_FLOAT: lambda tinfo, *_: float_mapping[tinfo.typedef.flags],
105 |     #complex types
106 |     BaseTypes.BT_PTR: construct_ptr,
107 |     BaseTypes.BT_ARRAY: construct_arr,
108 |     BaseTypes.BT_FUNC: construct_func,
109 |     BaseTypes.BT_COMPLEX: construct_cmplx,
110 |     BaseTypes.BT_BITFIELD: construct_bitfield,
111 | }
112 | 
113 | 
114 | def construct_type(tool: PluginTool, tinfo: Container, names: Optional[List[str]] = None, nbytes: int = 0) -> DataType:
115 |     #trust nbytes more than type info coz sometimes its missing width (especially typedefs)
116 |     #though thats only on the first layer - we reuse types from libraries which ends up having the wrong nbytes size if a typedef is indirectly referred, so dont propagate
117 |     return basetype_mapping[tinfo.typedef.basetype](tinfo, tool, names, nbytes)
118 | 


--------------------------------------------------------------------------------
/data/test.py:
--------------------------------------------------------------------------------
  1 | import frida
  2 | 
  3 | from ghidra.program.flatapi import FlatProgramAPI
  4 | 
  5 | from capstone import *
  6 | 
  7 | from sig.util import ARCH_MAPPING
  8 | 
  9 | import time
 10 | 
 11 | import os, subprocess
 12 | 
 13 | cs_mapping = {'x86': CS_ARCH_X86, 'x86_64': CS_ARCH_X86}
 14 | 
 15 | IDA_PATH = os.path.realpath(os.environ['IDADIR']) + os.path.sep
 16 | 
 17 | 
 18 | def check_against_ida(binary: str, verbosity: int, arch: str):
 19 |     #it's a hassle to automatically get ida offsets from ghidra so manual input it is for now
 20 |     metadata, update, final = input(), input(), input()
 21 | 
 22 |     cwd = input()  #manually enter the path to ida-calc-all-metadata coz __file__ doesnt exist
 23 | 
 24 |     #fetch the expected function signatures first
 25 | 
 26 |     #we rely on widget loading in ida-calc-all-metadata.py since we cant trigger calc_func_metadata other than running lumina which is why we need to run idat
 27 |     #with some tricks we can actually get idat to run headless and have widgets loaded like what we expect with gui mode
 28 |     #-c removes the old database so it doesnt affect operations between runs
 29 |     p = subprocess.Popen(IDA_PATH + 'idat64.exe -c -A -S' + cwd + 'ida-calc-all-metadata.py ' + binary, stdout=subprocess.PIPE)
 30 | 
 31 |     session = frida.attach(p.pid)
 32 | 
 33 |     script = session.create_script("""
 34 |     const baseAddr = Module.findBaseAddress('ida64.dll');"""
 35 | f"\n    const metadata  = resolveAddress('{metadata}');\n"
 36 | f"\n    const MD5Update = resolveAddress('{update}');\n"
 37 | f"\n    const MD5Final  = resolveAddress('{final}');\n"
 38 |     """
 39 |     var bytes = '';
 40 |     var funcptr = null;
 41 | 
 42 |     function buf2hex(buffer) { // buffer is an ArrayBuffer
 43 |     return [...new Uint8Array(buffer)]
 44 |         .map(x => x.toString(16).padStart(2, '0'))
 45 |         .join('');
 46 |     }
 47 | 
 48 |     Interceptor.attach(metadata, {
 49 |         onEnter(args) {
 50 |             funcptr = args[2].readU64();   //first param of func_t is start_ea, as shown in how calc_func_metadata uses get_ea_name
 51 |             bytes = '';  //prime bytes for writing
 52 |         },
 53 |     });
 54 | 
 55 |     Interceptor.attach(MD5Update, {
 56 |         onEnter(args) {
 57 |             bytes += ' ' + buf2hex(args[1].readByteArray(args[2].toInt32()));
 58 |         },
 59 |     });
 60 | 
 61 |     Interceptor.attach(MD5Final, {
 62 |         onEnter(args) {
 63 |             this.hashAddr = args[0];
 64 |             this.objAddr = args[1];
 65 |         },
 66 |         onLeave(retval) {
 67 |             if(funcptr !== null) {  //lumina functions always only have 2 as count; also ensure calc_func_metadata runs
 68 |                 const hash = this.hashAddr.readByteArray(16);
 69 |                 send(funcptr.toString(16) + ' ' + buf2hex(hash) + bytes);
 70 | 
 71 |                 //reset
 72 |                 funcptr = null;
 73 |             }
 74 |         },
 75 |     });
 76 | 
 77 |     function resolveAddress(addr) {
 78 |         const idaBase = ptr('0x10000000');
 79 |         const offset = ptr(addr).sub(idaBase);
 80 |         const result = baseAddr.add(offset);
 81 |         return result;
 82 |     }
 83 | 
 84 |     """)
 85 | 
 86 |     expected = []
 87 | 
 88 |     script.on('message', lambda msg, _: expected.append((int((pl:=msg['payload'].split(' '))[0], 16), pl[1], pl[2], pl[3])))
 89 |     script.load()
 90 |     
 91 | 
 92 |     #ghidra already loaded currentProgram for us
 93 |     start = time.time()  #ignore open_view overhead in our timing
 94 |     
 95 |     gen = ARCH_MAPPING[arch](currentProgram)
 96 |     actual = {}
 97 |     for f in currentProgram.getFunctionManager().getFunctions(True):
 98 |         if calcrel:=gen.calc_func_metadata(f):
 99 |             actual[f.getEntryPoint().getOffset()] = calcrel
100 | 
101 |     end = time.time()
102 | 
103 |     p.communicate()
104 | 
105 |     if not len(expected):
106 |         print('Failed to obtain results from IDA, aborting...')
107 |         return
108 | 
109 |     #check results
110 | 
111 |     missing = 0
112 | 
113 |     base = FlatProgramAPI(currentProgram).getFirstData().getAddress().getOffset()
114 |     for addr, hash, buf, mask in list(expected):
115 |         if addr not in actual:
116 |             if (addr + base) in actual:   #ghidra sometimes have a different base (doesnt matter in actual lumina operations, since addresses are never checked)
117 |                 expected[expected.index((addr, hash, buf, mask))] = (addr+base, hash, buf, mask)
118 |             else:
119 |                 if verbosity > 0:
120 |                     print('Function missing from ghidra: ' + hex(addr))
121 |                 expected.remove((addr, hash, buf, mask))
122 |                 missing+=1
123 | 
124 |     if verbosity > 0:
125 |         print()
126 | 
127 |     miss = 0
128 |     cap = Cs(cs_mapping[arch], CS_MODE_64)
129 |     for addr, hash, buf, mask in expected:  #we dont really care about ghidra exclusive functions i guess
130 |         if actual[addr][0].hex() != hash:
131 |             if verbosity > 0:
132 |                 print('\nFunction', hex(addr), 'mismatch:')
133 |                 print('Expected:', hash)
134 |                 print('Actual:', actual[addr][0].hex())
135 | 
136 |             if verbosity > 1:
137 |                 #print('\n' + actual[addr][1].hex() + '\n' + actual[addr][2].hex() + '\n' + buf + '\n\n')
138 | 
139 |                 us = {d.address:str(d) for d in cap.disasm(actual[addr][1], addr)}
140 |                 ida = {d.address:str(d) for d in cap.disasm(bytes.fromhex(buf), addr)}
141 | 
142 |                 #only compare the str form of the disassembly since the disasm objects themselves are different
143 |                 diff = set(ida.values()) ^ set(us.values())
144 | 
145 |                 differing, excl_us, excl_ida = [], [], []
146 |                 for d in diff:
147 |                     diff_addr = int(d.split(' ')[1], 16)
148 |                     if diff_addr in us and diff_addr in ida:
149 |                         if hex(diff_addr) + ': ' + ida[diff_addr] + ' vs ' + us[diff_addr] not in differing:
150 |                             differing.append(hex(diff_addr) + ': ' + ida[diff_addr] + ' vs ' + us[diff_addr])
151 |                     elif diff_addr in us:
152 |                         excl_us.append(hex(diff_addr) + ': ' + us[diff_addr])
153 |                     elif diff_addr in ida:
154 |                         excl_ida.append(hex(diff_addr) + ': ' + ida[diff_addr])
155 | 
156 |                 if not differing and not excl_ida and not excl_us:
157 |                     print()
158 |                     print('Function matches, but mask mismatched:')
159 |                     print('Expected Mask:', mask)
160 |                     print('Actual Mask  :', actual[addr][2].hex())
161 | 
162 |                 print()
163 | 
164 |                 if differing:
165 |                     print('Differing instructions (expected vs actual):')
166 |                     print('\n'.join(sorted(differing, key=lambda s: int(s.split(':')[0], 16))))
167 |                 if excl_ida:
168 |                     print('Only on IDA:')
169 |                     print('\n'.join(sorted(excl_ida, key=lambda s: int(s.split(':')[0], 16))))
170 |                 if excl_us:
171 |                     print('Only on ghidra:')
172 |                     print('\n'.join(sorted(excl_us, key=lambda s: int(s.split(':')[0], 16))))
173 | 
174 |                 print()
175 |             
176 |             miss+=1
177 | 
178 |     print('Checked', len(expected), 'functions in', end - start,'seconds (' + str(missing) + ' missing), Mismatch:', str(miss) + '; Accuracy:', (len(expected)-miss)/len(expected))
179 | 
180 | 
181 | if __name__ == "__main__":
182 |     check_against_ida(input(), int(val) if (val:=input()) else 2, 'x86') #only x86 is supported atm


--------------------------------------------------------------------------------
/data/client.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from lumina_structs import *
  3 | from ghidra.util import Msg
  4 | from ghidra.program.flatapi import FlatProgramAPI
  5 | from ghidra.program.database.function import FunctionDB
  6 | from ghidra.program.database import ProgramDB
  7 | 
  8 | import socket, ssl, threading
  9 | 
 10 | from .sig.util import ARCH_MAPPING
 11 | from .parsing import apply_md, craft_push_md, craft_pull_md
 12 | 
 13 | 
 14 | class LuminaClient:
 15 |     def __init__(self, plugin) -> None:
 16 |         self.socket = None
 17 |         self.lock = threading.RLock() #we need RLock to be able to enter critical sections holding a lock already
 18 |         self.plugin = plugin
 19 |         self.reconnect()
 20 | 
 21 |     def is_valid(self, ctx: ProgramDB):
 22 |         #ghidra doesnt allow multi arch disassembly so no function specific context needed
 23 |         return self.socket and ctx.getLanguage().getProcessor().toString() in ARCH_MAPPING
 24 |     
 25 |     def send_and_recv_rpc(self, code: RPC_TYPE, noretry: bool = False, **kwargs):
 26 |         try: 
 27 |             with self.lock: #only lock if not already in critical section (see reconnect())
 28 |                 payload = rpc_message_build(code, **kwargs)
 29 |                 Msg.debug(self.plugin, 'Sending ' + str(code) + ' command (' + str(payload) + ')')
 30 |                 self.socket.send(payload)
 31 | 
 32 |                 packet, message = rpc_message_parse(self.socket)
 33 |                 Msg.debug(self.plugin, 'Received ' + str(packet) + 'Message: ' + str(message) + '')
 34 |                 return packet, message
 35 |         except (ConnectionError, con.StreamError) as e:
 36 |             Msg.warn(self.plugin, 'Disconnected from the Lumina server.' + ('' if noretry else ' Reconnecting...'))
 37 |             if not noretry:
 38 |                 self.reconnect()
 39 |                 return self.send_and_recv_rpc(code, **kwargs)  #retry
 40 |             return (None, None)
 41 |         except Exception as e:
 42 |             Msg.error(self.plugin, 'Something went wrong: ' + str(type(e)) + ': ' + str(e))
 43 |             return (None, None)
 44 | 
 45 | 
 46 |     def reconnect(self, *_):  #ignore additional args
 47 |         with self.lock:  #lock until handshakes over to avoid other reqs go faster than we do
 48 |             try:
 49 |                 if self.socket:  #reset connection
 50 |                     self.socket.close()
 51 | 
 52 |                 settings = self.plugin.getTool().getOptions("Lumina")   #refresh settings
 53 | 
 54 |                 host = settings.getString('Host Address', ''), int(settings.getString('Port', ''))
 55 | 
 56 |                 self.socket = socket.socket()
 57 |                 self.socket.connect(host)
 58 | 
 59 |                 cert = settings.getFile('TLS Certificate File', None)
 60 |                 if cert:
 61 |                     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 62 |                     context.load_verify_locations(cert.getPath())
 63 |                     self.socket = context.wrap_socket(self.socket, server_hostname=host[0])
 64 | 
 65 |                 key, id = b'', bytes(6)
 66 |                 try:
 67 |                     keyfile = settings.getFile('Key File', None)
 68 |                     if keyfile:
 69 |                         with open(keyfile.getPath(), 'rb') as kf:
 70 |                             key = kf.read()
 71 |                             if key.startswith(b'HEXRAYS_LICENSE'):    #looks like genuine license, parse id
 72 |                                 #id is from the line with IDAPRO*W in it
 73 |                                 id = bytes.fromhex(key.split(b' IDAPRO')[0].split(b'\n')[-1].replace(b'-', b'').decode())
 74 |                                 if len(id) != 6:   #must be 6 bytes long, if not something went wrong
 75 |                                     id = bytes(6)  #reset into empty bytes
 76 |                                     raise ValueError()
 77 |                 except OSError:
 78 |                     Msg.warn(self.plugin, 'Lumina key file path is invalid, ignoring...')
 79 |                 except ValueError:
 80 |                     Msg.warn(self.plugin, 'Given Hexrays license file seems malformed, skipping parsing...')
 81 | 
 82 |                 #dont retry for this query to prevent infinite mutual recursion
 83 |                 resp, msg = self.send_and_recv_rpc(RPC_TYPE.RPC_HELO, noretry=True, protocol=2, hexrays_license=key, hexrays_id=id, field_0x36=0)
 84 |                 if not resp or resp.code != RPC_TYPE.RPC_OK:
 85 |                     raise ConnectionError('Handshake failed ' + (f'({msg.message})' if resp and resp.code == RPC_TYPE.RPC_FAIL else '(Connection failure)'))
 86 | 
 87 |                 Msg.info(self.plugin, 'Connection to Lumina server ' +  host[0] + ':' + str(host[1]) + ' (TLS: ' + str(bool(cert)) + ') succeeded.')
 88 |             except Exception as e:
 89 |                 if self.socket:  #if we got an error after opening the socket, close it; also needs to be locked
 90 |                     self.socket.close()
 91 |                 self.socket = None
 92 | 
 93 |                 Msg.showWarn(self.plugin, None, 'Lumina connection failed', 'Connection to Lumina server failed (' + (str(e) if type(e) != ValueError else 'invalid port') + '). Please check your configuration.')
 94 | 
 95 |     
 96 |     #
 97 |     # All functions commands
 98 |     #
 99 | 
100 |     def pull_all_mds(self, ctx: ProgramDB):
101 |         #background in this context is in the pythread - all commands get queued into that thread
102 |         Msg.info(self.plugin, "Pulling all function metadata in the background...")
103 | 
104 |         #just in case functions changed while we were waiting, make a copy since we rely on ordering heavily
105 |         #also coz otherwise it returns a java array which is hard to use lol
106 |         copy = list(ctx.getFunctionManager().getFunctions(True))
107 | 
108 |         tool = self.plugin.getTool()
109 | 
110 |         pull = craft_pull_md(ctx, copy, tool)
111 | 
112 |         #TODO use Command class so we get a nicer status update panel
113 |         #(if possible; we aren't using their task queue so im not sure)
114 |         tool.setStatusInfo('[Lumina] Sending pull request...')
115 | 
116 |         msg = self.send_and_recv_rpc(RPC_TYPE.PULL_MD, **pull)[1]
117 | 
118 |         tool.setStatusInfo('[Lumina] Applying metadata...')
119 | 
120 |         if msg:
121 |             it = iter(msg.results) #also results only have valid mds so its easier to model with iterator
122 |             for i, found in enumerate(msg.found):
123 |                 if found == ResultType.RES_OK:
124 |                     apply_md(ctx, copy[i], tool, next(it))
125 |             log = 'Pulled ' + str(sum([d == ResultType.RES_OK for d in msg.found])) + '/' + str(len(msg.found)) + ' functions successfully.'
126 |             Msg.info(self.plugin, log)
127 |             tool.setStatusInfo('[Lumina] ' + log)
128 |         else:
129 |             #it doesnt matter if the status is always there its better than not being able to see it at all
130 |             tool.setStatusInfo('[Lumina] Pull request for all functions failed.')
131 | 
132 | 
133 |     def push_all_mds(self, ctx: ProgramDB):
134 |         Msg.info(self.plugin, "Pushing all function metadata in the background...")
135 | 
136 |         tool = self.plugin.getTool()
137 | 
138 |         kwargs = craft_push_md(ctx, list(ctx.getFunctionManager().getFunctions(True)), tool)
139 |         
140 |         tool.setStatusInfo('[Lumina] Sending push request...')
141 | 
142 |         msg = self.send_and_recv_rpc(RPC_TYPE.PUSH_MD, **kwargs)[1]
143 | 
144 |         if msg:
145 |             log = 'Pushed ' + str(sum([d == ResultType.RES_ADDED for d in msg.resultsFlags])) + '/' + str(len(msg.resultsFlags)) + ' functions successfully.'
146 |             Msg.info(self.plugin, log)
147 |             tool.setStatusInfo('[Lumina] ' + log)
148 |         else:
149 |             tool.setStatusInfo('[Lumina] Push request for all functions failed.')
150 | 
151 | 
152 |     #
153 |     # Function specific commands
154 |     #
155 | 
156 |     def pull_function_md(self, ctx: ProgramDB, func: FunctionDB):
157 |         Msg.debug(self.plugin, 'Pulling metadata for func ' + func.getName() + '...')
158 | 
159 |         msg = self.send_and_recv_rpc(RPC_TYPE.PULL_MD, **craft_pull_md(ctx, [func]))[1]
160 | 
161 |         #status info kinda nice for displaying subtle msgs to the user that's not lost in the logs
162 |         #so lets do it even for the function specific commands
163 |         tool = self.plugin.getTool()
164 | 
165 |         if msg and msg.results:
166 |             apply_md(ctx, func, tool, msg.results[0])
167 |             log = 'Pulled metadata for function "' + func.getName() + '" successfully.'
168 |             Msg.info(self.plugin, log)
169 |             tool.setStatusInfo('[Lumina] ' + log)
170 |         else:
171 |             tool.setStatusInfo('[Lumina] Pull request for the function failed.')
172 |                 
173 | 
174 |     def push_function_md(self, ctx: ProgramDB, func: FunctionDB):
175 |         Msg.debug(self.plugin, 'Pushing metadata for func ' + func.getName() + '...')
176 | 
177 |         msg = self.send_and_recv_rpc(RPC_TYPE.PUSH_MD, **craft_push_md(ctx, [func]))[1]
178 | 
179 |         tool = self.plugin.getTool()
180 | 
181 |         if msg:
182 |             log = 'Pushed metadata for function "' + func.getName() + '" successfully.'
183 |             Msg.info(self.plugin, log)
184 |             tool.setStatusInfo('[Lumina] ' + log)
185 |         else:
186 |             tool.setStatusInfo('[Lumina] Push request for the function failed.')


--------------------------------------------------------------------------------
/data/parsing.py:
--------------------------------------------------------------------------------
  1 | from ghidra.util import Msg
  2 | from ghidra.program.database.function import FunctionDB
  3 | from ghidra.program.database import ProgramDB
  4 | from ghidra.program.model.symbol import SourceType
  5 | from ghidra.program.flatapi import FlatProgramAPI
  6 | from ghidra.program.model.listing import CodeUnit, Function, ParameterImpl, LocalVariableImpl, VariableUtilities, VariableStorage
  7 | from ghidra.framework.plugintool import PluginTool
  8 | from ghidra.program.model.symbol import SourceType
  9 | from ghidra.program.model.data import Undefined
 10 | from java.util import Arrays
 11 | 
 12 | import socket, itertools
 13 | 
 14 | from construct import *
 15 | from lumina_structs import *
 16 | from lumina_structs.metadata import *
 17 | 
 18 | from .sig.util import Sig, ARCH_MAPPING
 19 | from .type import construct_type, cc_mapping
 20 | 
 21 | #
 22 | # Push Functions
 23 | #
 24 | 
 25 | def extract_md(ctx: ProgramDB, func: FunctionDB, gen: Sig) -> dict:
 26 |     chunks = []
 27 | 
 28 |     #turns out func.getComment and getRepeatableComment are just plate comments and repeatable comments at the entry point address
 29 |     if func.getComment():
 30 |         chunks.append({
 31 |             'type': MetadataType.MD_FUNC_CMT,
 32 |             'data': {'text': func.getComment()}})
 33 |     
 34 |     if func.getRepeatableComment():
 35 |         chunks.append({
 36 |             'type': MetadataType.MD_FUNC_REPCMT,
 37 |             'data': {'text': func.getRepeatableComment()}})
 38 | 
 39 |     prog = FlatProgramAPI(ctx)
 40 | 
 41 |     func_start = func.getEntryPoint().getOffset()
 42 | 
 43 |     #EOL comments are always instruction comments
 44 |     eol = [{'offset': addr.getOffset() - func_start,
 45 |             'text': prog.getEOLComment(addr)} 
 46 |             for addr in ctx.getCodeManager().getCommentAddressIterator(CodeUnit.EOL_COMMENT, func.getBody(), True)]
 47 |     if eol:
 48 |         chunks.append({
 49 |             'type': MetadataType.MD_INSN_CMT,
 50 |             'data': eol})
 51 | 
 52 |     #repeatable comments are instruction comments, aside from the entry point one, which we need to check for that case
 53 |     rep = [{'offset': addr.getOffset() - func_start,
 54 |             'text': prog.getRepeatableComment(addr)} 
 55 |             for addr in ctx.getCodeManager().getCommentAddressIterator(CodeUnit.REPEATABLE_COMMENT, func.getBody(), True)
 56 |             if addr.getOffset() != func_start]
 57 |     if rep:
 58 |         chunks.append({
 59 |             'type': MetadataType.MD_INSN_REPCMT,
 60 |             'data': rep})
 61 | 
 62 | 
 63 |     #do both pre and post at the same time; pre and post comments will never be related to function comments so we are good
 64 |     extra = [{'offset': addr.getOffset() - func_start,
 65 |             'anterior': pre if pre else '',
 66 |             'posterior': post if post else ''} 
 67 |             for addr in ctx.getCodeManager().getCommentAddressIterator(func.getBody(), True)
 68 |             if any([(pre:=prog.getPreComment(addr)), (post:=prog.getPostComment(addr))])]  #either one of them exists then we can add; prevent short circuit evaluation
 69 |     if extra:
 70 |         chunks.append({
 71 |             'type': MetadataType.MD_EXTRA_CMT,
 72 |             'data': extra})
 73 | 
 74 |     #TODO frame info and tinfo
 75 |     #OPREPRS as a concept doesnt really exist in Ghidra either(??)
 76 |     #but might be helpful in defining data vars so parsing might be good
 77 | 
 78 |     if chunks: #only compute signature and returns something if has data
 79 |         data = gen.calc_func_metadata(func)
 80 |         if not data:
 81 |             return None
 82 |         
 83 |         sig, block, mask = data
 84 |         return {
 85 |             "metadata": {
 86 |                 "func_name": func.getName(),  #func name is automatically whatever it should be
 87 |                 "func_size": len(block),
 88 |                 "serialized_data": {
 89 |                     "chunks": chunks}},
 90 |             "signature": {
 91 |                 "version": 1, 
 92 |                 "signature": sig}}
 93 |     else:
 94 |         return None
 95 | 
 96 | 
 97 | 
 98 | def craft_push_md(ctx: ProgramDB, funcs: list[FunctionDB], tool: PluginTool = None) -> dict:
 99 |     arch = ARCH_MAPPING[ctx.getLanguage().getProcessor().toString()](ctx)  #again, Ghidra only allows one arch at a time
100 | 
101 |     progress = "[Lumina] Extracting function metadata ({count}/" + str(len(funcs)) + " functions)"
102 |     push, eas = [], []
103 |     for i, f in enumerate(funcs):
104 |         md = extract_md(ctx, f, arch)
105 |         if md: #only apply if extracted useful data
106 |             push.append(md)
107 |             eas.append(f.getEntryPoint().getOffset())
108 |         if tool:
109 |             tool.setStatusInfo(progress.format(count=i))
110 | 
111 |     return {
112 |         "type": PushMdOpt.PUSH_OVERRIDE_IF_BETTER,  #protocol 2 default
113 |         "idb_filepath": ctx.getDomainFile().getProjectLocator().getProjectDir().getPath(), 
114 |         "input_filepath": ctx.getExecutablePath(), 
115 |         "input_md5": bytes.fromhex(ctx.getExecutableMD5()),   #Ghidra actually has a function for this so we dont need to reread the file ourselves
116 |         "hostname": socket.gethostname(),
117 |         "funcInfos": push,
118 |         "funcEas": eas}  #seems like ida is offset by one???
119 | 
120 | 
121 | #
122 | # Pull Functions
123 | #
124 | 
125 | 
126 | #again, ghidra support only one arch at a time so no more lists
127 | def craft_pull_md(ctx: ProgramDB, fs: list[FunctionDB], tool: PluginTool = None) -> dict:
128 |     arch = ARCH_MAPPING[ctx.getLanguage().getProcessor().toString()](ctx)
129 | 
130 |     sigs = []
131 |     i = 0
132 |     progress = "[Lumina] Calculating function signatures ({count}/" + str(len(fs)) + " functions)"
133 |     for func in fs:
134 |         if tool:
135 |             tool.setStatusInfo(progress.format(count=i))
136 | 
137 |         sig = arch.calc_func_metadata(func)
138 |         if sig:
139 |             sigs.append({'signature':sig[0]})
140 | 
141 |         i+=1
142 | 
143 |     return {'flags': 1,  #protocol 2 default
144 |         'types':[],
145 |         'funcInfos':sigs}
146 | 
147 | 
148 | #we need tool for passing DataTypeManagerService to construct_cmplx unfortunately 
149 | def apply_md(ctx: ProgramDB, func: FunctionDB, tool: PluginTool, info: Container):
150 |     #we don't really care about popularity atm, but it might be useful server side for sorting
151 | 
152 |     #IDA (at least on 7.5) hardcoded no-override flag into apply_metadata, so tinfo and frame desc effectively never gets applied even if existing data is entirely auto-generated
153 |     #we won't follow that - manually clearing the data on every lumina pull is very annoying and there is undo anyway
154 |     #instead we will default to resetting metadata to what lumina provides on conflict
155 |     prog = FlatProgramAPI(ctx)
156 | 
157 |     prog.start() #start a transaction
158 | 
159 |     func.setName(info.metadata.func_name, SourceType.IMPORTED)
160 |     #func size should be the same to be able to get the same signature, so no need to set
161 |     for md in info.metadata.serialized_data.chunks:
162 |         if md.type in [MetadataType.MD_INSN_CMT, MetadataType.MD_INSN_REPCMT]:
163 |             for c in md.data:
164 |                 addr = func.getEntryPoint().add(c.offset)
165 |                 setComment = prog.setEOLComment if md.type == MetadataType.MD_INSN_CMT else prog.setRepeatableComment
166 |                 setComment(addr, c.text)
167 |         elif md.type in [MetadataType.MD_FUNC_CMT, MetadataType.MD_FUNC_REPCMT]:
168 |             #ghidra actually has repeatable comments, treat them separately
169 |             setComment = func.setComment if md.type == MetadataType.MD_FUNC_CMT else func.setRepeatableComment
170 |             setComment(md.data.text)
171 |         elif md.type == MetadataType.MD_EXTRA_CMT:
172 |             #Ghidra actually has anterior and posterior comments, treat them separately
173 |             for c in md.data:
174 |                 addr = func.getEntryPoint().add(c.offset)
175 |                 if c.anterior:
176 |                     prog.setPreComment(addr, c.anterior)
177 |                 if c.posterior:
178 |                     prog.setPostComment(addr, c.posterior)
179 |         elif md.type == MetadataType.MD_TYPE_INFO:
180 |             #cannot reuse FunctionDefinitionDataType unfortunately since we have to directly set in the current func
181 |             #md.data.tinfo should always be BT_FUNC
182 |             params = []
183 |             for param in md.data.tinfo.data.params:
184 |                 name = md.data.names.pop(0) if md.data.names else ""
185 |                 params.append(ParameterImpl(name, construct_type(tool, param.type, md.data.names), ctx))
186 |             params = Arrays.asList(params)
187 |                 
188 |             func.replaceParameters(params, Function.FunctionUpdateType.DYNAMIC_STORAGE_ALL_PARAMS, True, SourceType.IMPORTED)
189 |             func.setReturnType(construct_type(tool, md.data.tinfo.data.rettype, md.data.names), SourceType.IMPORTED)
190 |             if md.data.tinfo.data.cc.convention in cc_mapping:
191 |                 cc = cc_mapping[md.data.tinfo.data.cc.convention].getDeclarationName()
192 |                 func.setCallingConvention(cc if cc != '__cdecl' else '__stdcall')  #somehow cdecl doesnt exist (at least on ghidra 10.1.4) and is mapped to __stdcall according to the DWARF parser
193 | 
194 |             #ghidra doesnt have near/far calls(?)
195 |             #TODO custom calling conventions since ghidra actually supports reordering with argloc (VariableStorage)
196 |             #i dont think spoiled registers can be defined though
197 |         elif md.type == MetadataType.MD_FRAME_DESC:
198 |             #ghidra doesnt have the variable definition section for comments storage, so discard for now; also oprepr as a concept doesnt exist in ghidra either
199 |             for var in md.data.vars:
200 |                 #sometimes type == None if its default so just treat it as undefined type of nbytes
201 |                 print(var.name, var.type.tinfo if var.type else None)
202 |                 t = construct_type(tool, var.type.tinfo, var.type.names, var.nbytes) if var.type else Undefined.getUndefinedDataType(var.nbytes)
203 |                 name = var.name if var.name else f'lumina_{hex(var.off)}'
204 | 
205 |                 #TODO check if this still matches in architectures with stack growing up (also are we sure frregs is for this purpose)
206 |                 #ghidra also uses rbp instead of rsp (offset goes down instead of up)
207 |                 v = LocalVariableImpl(name, t, -(md.data.frsize - var.off + md.data.frregs), ctx)
208 | 
209 |                 #TODO argloc
210 |                 VariableUtilities.checkVariableConflict(func, v, v.getVariableStorage(), True)
211 |                 func.addLocalVariable(v, SourceType.IMPORTED)
212 |         else:
213 |             Msg.debug("Lumina", 'Unimplemented metadata type ' + str(md.type) + ', skipping for now...')
214 | 
215 |     prog.end(True) #end the transaction
216 | 


--------------------------------------------------------------------------------