├── Module.manifest ├── settings.gradle ├── requirements.txt ├── .gitignore ├── .gitmodules ├── extension.properties ├── LICENSE ├── data ├── sig │ ├── util.py │ └── x86.py ├── entry.py ├── ida-calc-all-metadata.py ├── type.py ├── test.py ├── client.py └── parsing.py ├── README.md └── src └── main └── java └── org └── maplebacon └── lumina ├── PythonExecutor.java └── LuminaPlugin.java /Module.manifest: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | include ':Ghidrathon' -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | capstone>=4.0.2 2 | git+https://github.com/ubcctf/lumina_structs -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/__pycache__/ 2 | /build/ 3 | /bin/ 4 | /dist/ 5 | .gradle/ 6 | .settings/ 7 | .project 8 | .classpath -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "Ghidrathon"] 2 | path = Ghidrathon 3 | url = https://github.com/mandiant/Ghidrathon.git 4 | -------------------------------------------------------------------------------- /extension.properties: -------------------------------------------------------------------------------- 1 | name=Lumina 2 | description=Lumina implementation for Ghidra 3 | author=Maple Bacon 4 | createdOn=04/12/2022 5 | version=@extversion@ 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 CTF @ UBC 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /data/sig/util.py: -------------------------------------------------------------------------------- 1 | from ghidra.program.database.function import FunctionDB 2 | from ghidra.program.flatapi import FlatProgramAPI 3 | 4 | def hexdump(block): 5 | print("\n".join([" ".join([row.hex()[i:i + 2] for i in range(0, len(row.hex()), 2)]) for row in [block[i:i + 16] for i in range(0, len(block), 16)]])) 6 | 7 | 8 | #base class for all architectures' signature generation functions 9 | class Sig: 10 | def __init__(self, prog): 11 | self.prog = FlatProgramAPI(prog) 12 | self.mem = prog.getMemory() 13 | 14 | def calc_func_metadata(self, func: FunctionDB) -> tuple[str, bytes, bytes]: 15 | raise NotImplementedError() 16 | 17 | 18 | #metaclass like how binja implements its subscriptable Architecture class 19 | #have to do this to lazy load; otherwise we end up with circular import 20 | class _mapping(type): 21 | def __init__(self, name, bases, dict) -> None: 22 | super().__init__(name, bases, dict) 23 | from .x86 import X86 24 | self.map = {'x86_64': X86, 'x86': X86} 25 | 26 | def __iter__(self): 27 | return self.map.__iter__() 28 | 29 | def __getitem__(self, name: str) -> Sig: 30 | return self.map[name] 31 | 32 | class ARCH_MAPPING(metaclass=_mapping): ... -------------------------------------------------------------------------------- /data/entry.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | #hotfix for win32com SetupEnvironment NoneType is not callable; we are effectively running a bundled python installation anyway 4 | sys.frozen = 1 5 | 6 | from .client import LuminaClient 7 | 8 | from ghidra.framework.options import OptionType 9 | 10 | 11 | #ALWAYS register to prevent weird behaviours of resetting config - see removeUnusedOptions() implementation in ToolOptions 12 | #Also apparently storing as INT_TYPE might trigger !isCompatibleOption - the value retrieved from the XML is returned as a Long whereas they expect an int 13 | #seems like that's coz jep stores python integers as longs, so whatever we can just make it a string 14 | settings = plugin.getTool().getOptions("Lumina") #already creates the category if doesnt exist for us 15 | settings.registerOption('Host Address', OptionType.STRING_TYPE, '', None, 'Host address for the Lumina server') 16 | settings.registerOption('Port', OptionType.STRING_TYPE, '', None, 'Port for the Lumina server') 17 | #also needs to use None instead of an empty string as path here since that would be incompatible with java.io.File 18 | settings.registerOption('Key File', OptionType.FILE_TYPE, None, None, 'Path to the Key file to connect to the Lumina server with, if any') 19 | settings.registerOption('TLS Certificate File', OptionType.FILE_TYPE, None, None, 'Path to the TLS Certificate for the Lumina server, if any') 20 | 21 | #try logging in with configured params 22 | plugin.setClient(LuminaClient(plugin)) -------------------------------------------------------------------------------- /data/ida-calc-all-metadata.py: -------------------------------------------------------------------------------- 1 | import ida_kernwin, ida_pro, ida_funcs, ida_auto, ida_segment, ida_idp, ida_registry, idc 2 | 3 | #this script writes each function address into a file, and triggers lumina sequentially (intended to use with frida to obtain hash) 4 | #note: idat is *pretty* fragile so things might break if you reorder code (might even be nondeterministic if its related to lumina initialization) 5 | #also idat will utilize 100% of a core if the script throws an exception 6 | 7 | 8 | #open disasm window well in advance to prepare for lumina 9 | idaview = ida_kernwin.open_disasm_window('IDA View-A') 10 | ida_kernwin.display_widget(idaview, 0) 11 | 12 | #we need to disable lumina pull all on autoanalysis finish or else we get junk that we dont want 13 | orig = ida_registry.reg_read_int('AutoUseLumina', 1) 14 | ida_registry.reg_write_int('AutoUseLumina', 0) 15 | 16 | #wait until autoanalysis is finished to get full function list; this also gives time for frida to attach 17 | ida_auto.auto_wait() 18 | 19 | #restore option 20 | ida_registry.reg_write_int('AutoUseLumina', orig) 21 | 22 | #use local server to speed up processing - we dont need actual responses anyway 23 | #self note: if local server is not running idat's gonna make a ton of windows noise 24 | ida_idp.process_config_directive('LUMINA_HOST="127.0.0.1"') 25 | ida_idp.process_config_directive('LUMINA_PORT=4443') 26 | ida_idp.process_config_directive('LUMINA_TLS=NO') 27 | 28 | 29 | class Run(ida_kernwin.UI_Hooks): 30 | def __init__(self) -> None: 31 | ida_kernwin.UI_Hooks.__init__(self) 32 | 33 | #wait until ready 34 | def ready_to_run(self): 35 | #spinning seems to be necessary when we are connecting to remote but makes things worse when we are connecting to local server 36 | # #spin until lumina finishes initializing - have to use idc.qsleep not time.sleep since we cannot occupy the thread 37 | # #somehow sometimes it deadlocks though i have no idea how to fix 38 | # while not ida_kernwin.is_action_enabled(ida_kernwin.get_action_state('LuminaIDAViewPullMd')[1]): 39 | # with open('test.log', 'a') as ww: 40 | # ww.write('spinning\n') 41 | # idc.qsleep(100) 42 | 43 | ea = 0 44 | while (f:=ida_funcs.get_next_func(ea)): 45 | ea = f.start_ea #move onto the next func regardless 46 | if not ida_segment.is_spec_ea(ea): #ignore extern symbols and the likes 47 | ida_kernwin.jumpto(ea) 48 | ida_kernwin.process_ui_action('LuminaIDAViewPullMd') 49 | 50 | ida_idp.process_config_directive('ABANDON_DATABASE=YES') 51 | ida_pro.qexit(0) 52 | 53 | 54 | uihook = Run() 55 | uihook.hook() 56 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # lumina-ghidra 2 | Ghidra port for [lumina-binja](https://github.com/ubcctf/lumina-binja), a reimplmentation of IDA's [Lumina](https://hex-rays.com/products/ida/lumina/) feature in Binary Ninja 3 | 4 | The features provided by this port is on par with the Binary Ninja plugin - see the [repo](https://github.com/ubcctf/lumina-binja) for more info! 5 | 6 | **CURRENTLY IN ACTIVE DEVELOPMENT - NOTHING IS FULLY STABLE YET** 7 | 8 | ## Building and Installation 9 | - include `--recurse-submodules` to get the Ghidrathon repo when cloning this repo 10 | - `cd Ghidrathon && git checkout -b pre-10.2 0a54fa1cef41869582eb3614a86a9475ecf5c67a` if you are running Ghidra < v10.2, otherwise `cd Ghidrathon && git checkout main` 11 | - `gradle -PGHIDRA_INSTALL_DIR=` should compile both Ghidrathon (in `Ghidrathon/dist/`) and this plugin (in `dist/`) 12 | - Alternatively, if you are using `GhidraDev` in Eclipse, import the project, right click the project: `GhidraDev -> Link Ghidra...`, follow the prompts, and then `GhidraDev -> Export -> Ghidra Module Extension...` which will do the same thing as the command above 13 | - Go into Ghidra, `File -> Install Extensions`, click the green arrow and select both of the zip file 14 | - Check both of the new extensions and restart Ghidra 15 | - Since the plugin is still marked unstable currently, you will have to go to `File -> Configure -> Experimental` and check `LuminaPlugin` manually to enable it 16 | - Configure Lumina through `Edit -> Tool Options -> Lumina` in disassembler view; Most logs will be viewable in the main Ghidra tool -> `Help -> Show Log` 17 | 18 | ## Running tests 19 | The `test.py` requires more setup than the Binary Ninja counterpart, mainly because of the way headless mode works for Ghidra: 20 | - Make sure Ghidrathon is set up, and requirements are installed (along with `frida`) 21 | - Run `analyzeHeadless -import -scriptPath -postScript test.py` - This would require you to erase the `.gpr` file every single time since Ghidra does not allow reimporting 22 | - Alternatively you can run `analyzeHeadless -import ` once, and then `analyzeHeadless -process -scriptPath -postScript test.py` to use the cached analysis (faster, but things might persist in the project that is unideal for testing) 23 | - You'll need to input the filepath and verbosity arguments through stdin, along with manually finding IDA addresses for `calc_func_metadata`, `MD5Update` and `MD5Final` - `postScript` argument passing doesn't seem to work that well, and we cannot really analyze multiple binaries needed to get the addresses automatically without prior setup with `analyzeHeadless` either 24 | 25 | 26 | -------------------------------------------------------------------------------- /src/main/java/org/maplebacon/lumina/PythonExecutor.java: -------------------------------------------------------------------------------- 1 | package org.maplebacon.lumina; 2 | 3 | import java.io.PrintWriter; 4 | import java.util.concurrent.ExecutionException; 5 | import java.util.concurrent.ExecutorService; 6 | import java.util.concurrent.Executors; 7 | 8 | import org.apache.commons.lang3.concurrent.BasicThreadFactory; 9 | 10 | import generic.jar.ResourceFile; 11 | import ghidrathon.interpreter.GhidrathonInterpreter; 12 | 13 | /** 14 | * The class responsible for executing python code, 15 | * ensuring consistency of the thread state for jep.
16 | *
17 | * This class abstracts with a single thread executor, which means commands submitted are executed sequentially, 18 | * ensuring the ordering is as expected. 19 | * @author despawningbone 20 | */ 21 | public class PythonExecutor { 22 | private GhidrathonInterpreter python; 23 | private ExecutorService pyThread; 24 | 25 | //TODO figure out whether this will introduces noticable race conditions due to modifying program state (are transactions thread safe?) 26 | 27 | /** 28 | * Instantiates the environment for the python interpreter. 29 | */ 30 | public PythonExecutor() { 31 | BasicThreadFactory factory = new BasicThreadFactory.Builder() 32 | .namingPattern("Lumina-JEP-thread-%d") 33 | .priority(7) //higher than norm, lower than critical 34 | .build(); 35 | 36 | pyThread = Executors.newSingleThreadExecutor(factory); 37 | 38 | try { //wait until it finishes 39 | pyThread.submit(() -> python = GhidrathonInterpreter.get()).get(); 40 | } catch (InterruptedException | ExecutionException e) { 41 | python = null; //disable on error 42 | } 43 | } 44 | 45 | 46 | /** 47 | * Evaluates a python statement, blocking until it finishes. 48 | * @apiNote Assumes isEnabled == true 49 | * @param line python statement to execute 50 | */ 51 | public void evalSync(String line) { 52 | try { 53 | pyThread.submit(() -> python.eval(line)).get(); 54 | } catch (InterruptedException | ExecutionException e) { 55 | throw new RuntimeException(e); //pass exception to our own thread in unchecked fashion, as would happen without executor 56 | } 57 | } 58 | 59 | /** 60 | * Evaluates a python statement asynchronously. 61 | * @apiNote Assumes isEnabled == true 62 | * @param line python statement to execute 63 | */ 64 | public void eval(String line) { 65 | pyThread.execute(() -> python.eval(line)); 66 | } 67 | 68 | 69 | /** 70 | * Evaluates a python script, blocking until it finishes. 71 | * @apiNote Assumes isEnabled == true 72 | * @param file python script to execute 73 | */ 74 | public void runScriptSync(ResourceFile file) { 75 | try { 76 | pyThread.submit(() -> python.runScript(file)).get(); 77 | } catch (InterruptedException | ExecutionException e) { 78 | throw new RuntimeException(e); //pass exception to our own thread in unchecked fashion, as would happen without executor 79 | } 80 | } 81 | 82 | /** 83 | * Evaluates a python script asynchronously. 84 | * @apiNote Assumes isEnabled == true 85 | * @param file python script to execute 86 | */ 87 | public void runScript(ResourceFile line) { 88 | pyThread.execute(() -> python.runScript(line)); 89 | } 90 | 91 | 92 | /** 93 | * Passes a value to the python interpreter. 94 | */ 95 | public void set(String name, Object obj) { //we dont really need a sync method for this since we dont really care about when it finishes and the ordering is already guaranteed 96 | pyThread.execute(() -> python.set(name, obj)); 97 | } 98 | 99 | /** 100 | * Redirects the python interpreter output to the streams provided 101 | * @param out stream to redirect stdout to 102 | * @param err stream to redirect stderr to 103 | */ 104 | public void setStreams(PrintWriter out, PrintWriter err) { //we dont really need a sync method for this since we dont really care about when it finishes and the ordering is already guaranteed 105 | pyThread.execute(() -> python.setStreams(out, err)); 106 | } 107 | 108 | 109 | /** 110 | * Checks whether the python environment is available. 111 | * @return whether the python interpreter is ready 112 | */ 113 | public boolean isEnabled() { 114 | return python != null; 115 | } 116 | 117 | 118 | /** 119 | * Cleans up the python environment and disables it. 120 | */ 121 | public void close() { 122 | try { 123 | pyThread.submit(() -> python.close()).get(); 124 | pyThread.shutdown(); 125 | python = null; 126 | } catch (InterruptedException | ExecutionException e) { 127 | ; //its whatever we can just ignore it we are leaving anyway; a new PythonExecutor will have a new thread for the python state 128 | } 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /data/sig/x86.py: -------------------------------------------------------------------------------- 1 | from ghidra.program.database.function import FunctionDB 2 | from ghidra.program.model.address import Address 3 | from ghidra.program.model.symbol import SymbolType 4 | import jep 5 | 6 | from capstone import Cs, CsInsn, CS_ARCH_X86, CS_MODE_64 7 | from capstone.x86 import X86_REG_FS, X86_REG_GS, X86_REG_RIP, X86_OP_MEM, X86_OP_IMM 8 | 9 | import io, ctypes 10 | 11 | from .util import Sig 12 | 13 | 14 | class X86(Sig): 15 | 16 | def valid_loc(self, offset: Address, f: FunctionDB): 17 | #include all data variables that has a code ref 18 | #also include if is pointing to start of instruction, but never mask same function jumps 19 | #sometimes there might be multiple functions at the same address even on the same architecture it seems like - we check all of them to see if any is the same function then reject 20 | #ghidra cant have multiple functions at the same location 21 | #if a data var exists (undefined or not) at offset there has to be a reference in ghidra 22 | return offset and (((o:=self.prog.getFunctionContaining(offset)) and o.getEntryPoint() != f.getEntryPoint() and self.prog.getInstructionAt(offset)) or self.prog.getDataContaining(offset) or self.prog.getUndefinedDataAt(offset)) 23 | #jep jarrays are correctly falsey 24 | 25 | 26 | def calcrel(self, d: CsInsn, f: FunctionDB): 27 | mask = bytes(d.size) 28 | 29 | # - offsets are optional and can not exist 30 | #afaik x86 imm is always at the end 31 | 32 | if d.disp_offset: #consider references - any fs address, any relative memory accesses that's valid in program scope (see valid_loc def) 33 | m = b'\xFF' if any(op.type == X86_OP_MEM and (op.reg in [X86_REG_FS, X86_REG_GS] or (op.value.mem.base == X86_REG_RIP and self.valid_loc(self.prog.getAddressFactory().getAddress(hex(op.value.mem.disp + d.address + d.size)), f))) for op in d.operands) else b'\0' 34 | size = (d.imm_offset - d.disp_offset if d.imm_offset else d.size - d.disp_offset) 35 | mask = mask[:d.disp_offset] + m*size + mask[d.disp_offset+size:] 36 | 37 | #imm always later than disp 38 | if d.imm_offset: #references in imm just points directly to addresses 39 | m = b'\xFF' if any(op.type == X86_OP_IMM and self.valid_loc(self.prog.getAddressFactory().getAddress(hex(op.imm)), f) for op in d.operands) else b'\0' 40 | size = d.size - d.imm_offset 41 | mask = mask[:d.imm_offset] + m*size + mask[d.imm_offset+size:] 42 | 43 | return mask 44 | 45 | def calc_func_metadata(self, func: FunctionDB) -> tuple[str, bytes, bytes]: 46 | 47 | if func.isThunk() and func.getThunkedFunction(False).isExternal(): #special functions, ignore 48 | return 49 | 50 | ranges = func.getBody() 51 | 52 | #dont check the portions of the function above func.start (aka no min([r.start for r in ranges])); seems like IDA doesnt care either and this speeds things up by a ton in binaries with exception handlers 53 | func_start = func.getEntryPoint().getOffset() 54 | func_end = ranges.getMaxAddress().getOffset() #get max of the entire address space 55 | 56 | cap = Cs(CS_ARCH_X86, CS_MODE_64) #seems like 64bit mode can still disassemble 32 bit completely fine 57 | cap.detail = True 58 | 59 | #take the entire block of data including alignment into account (use size if disassembly is not available) 60 | #pass by reference workaround - we cant directly use b'' coz jep will just copy the array into java and then discard the changed result 61 | jblock = jep.jarray(func_end - func_start + 1, jep.JBYTE_ID) #func_end inclusive 62 | self.mem.getBytes(func.getEntryPoint(), jblock) 63 | block = bytes([ctypes.c_ubyte(b).value for b in jblock]) #java bytes are signed 64 | 65 | #linearly disassemble the entire block of bytes that the function encompasses (IDA does that instead of checking whether the bytes are accessible to the function or not) 66 | dis = cap.disasm(block, func_start) 67 | 68 | maskblock = io.BytesIO(bytes(len(block))) 69 | block = io.BytesIO(block) 70 | #if its in the valid proc address space then it counts as volatile 71 | for d in dis: 72 | maskblock.seek(d.address - func_start) 73 | block.seek(d.address - func_start) 74 | 75 | mask = (self.calcrel(d, func)) 76 | data = bytes([b if m != 0xFF else 0 for m, b in zip(mask, block.read(len(mask)))]) 77 | 78 | maskblock.write(mask) 79 | 80 | block.seek(d.address - func_start) 81 | block.write(data) 82 | block = block.getvalue() 83 | maskblock = maskblock.getvalue() 84 | 85 | #compute MD5 86 | import hashlib 87 | 88 | hash = hashlib.md5(block + maskblock).digest() 89 | return hash, block, maskblock -------------------------------------------------------------------------------- /src/main/java/org/maplebacon/lumina/LuminaPlugin.java: -------------------------------------------------------------------------------- 1 | package org.maplebacon.lumina; 2 | 3 | import ghidra.app.plugin.ProgramPlugin; 4 | import ghidra.app.services.ConsoleService; 5 | 6 | import java.io.File; 7 | import java.io.IOException; 8 | import java.util.Arrays; 9 | import java.util.NoSuchElementException; 10 | 11 | import docking.ActionContext; 12 | import docking.action.DockingAction; 13 | import docking.action.MenuData; 14 | import generic.jar.ResourceFile; 15 | import ghidra.app.events.ProgramLocationPluginEvent; 16 | import ghidra.app.plugin.PluginCategoryNames; 17 | import ghidra.framework.Application; 18 | import ghidra.framework.plugintool.PluginInfo; 19 | import ghidra.framework.plugintool.PluginTool; 20 | import ghidra.framework.plugintool.util.PluginStatus; 21 | import ghidra.util.Msg; 22 | 23 | @PluginInfo( 24 | status = PluginStatus.UNSTABLE, 25 | packageName = "org.maplebacon.lumina", 26 | category = PluginCategoryNames.ANALYSIS, 27 | shortDescription = "Lumina implementation for Ghidra", 28 | description = "This plugin adds support for IDA's Lumina feature in Ghidra.", 29 | servicesRequired = { ConsoleService.class }, //needed to ensure console initiates first for python logging 30 | eventsConsumed = { ProgramLocationPluginEvent.class } //needed to get currentLocation updates 31 | ) 32 | public class LuminaPlugin extends ProgramPlugin { 33 | private PythonExecutor python; 34 | private File pyScripts; 35 | 36 | //temporary storage for the LuminaClient pyObject - apparently client could go out of scope for some reason for some installations after entry.py 37 | private Object client; 38 | 39 | //expose for entry.py to be able to persist the client object here 40 | public void setClient(Object client) { 41 | this.client = client; 42 | } 43 | 44 | public LuminaPlugin(PluginTool tool) throws IOException { 45 | super(tool, false, false); 46 | 47 | //unzip the python files into extension directory if not done yet so python can read it properly 48 | //getFile implicitly copies to the application directory that we want 49 | pyScripts = Application.getModuleDataSubDirectory(".").getFile(true); 50 | } 51 | 52 | @Override 53 | protected void init() { 54 | //start the lumina client; DONE move to a separate thread in case other plugins want to make a jep interpreter on the GUI thread too? (also for running background tasks) 55 | try { 56 | python = new PythonExecutor(); 57 | ResourceFile entry = Arrays.asList(pyScripts.listFiles()).stream().filter(f -> f.getName().equals("entry.py")).map(f -> new ResourceFile(f)).findFirst().get(); 58 | 59 | //set any errors to print to the console; it is expected that all communciations should be done through Msg logger but for debugging purposes this would be much more visible 60 | ConsoleService console = tool.getService(ConsoleService.class); 61 | python.setStreams(console.getStdOut(), console.getStdErr()); 62 | 63 | python.set("plugin", this); //pass everything we need to do the plugin in python; getTool will give us the rest we need 64 | 65 | //hotfix for relative imports 66 | python.eval("import sys; sys.path.append(r'" + pyScripts.getParentFile().getParent() + "'); __package__ = 'data'"); 67 | 68 | python.runScript(entry); 69 | } catch(NoSuchElementException e) { 70 | Msg.error(this, "Lumina python scripts not found:", e); 71 | } 72 | 73 | createActions(); 74 | } 75 | 76 | private DockingAction getLuminaAction(String name, String exec, boolean checkValid, boolean funcSpecific) { 77 | MenuData tb = new MenuData(new String[] {"Lumina", name}); 78 | DockingAction action = new DockingAction(name, "Lumina") { 79 | @Override 80 | public void actionPerformed(ActionContext context) { 81 | if(python.isEnabled()) { 82 | python.set("ctx", currentProgram); 83 | //pass client back into scope before evaluating 84 | python.set("client", LuminaPlugin.this.client); 85 | 86 | if(funcSpecific) //only set if its function specific - can be null otherwise 87 | python.set("func", currentProgram.getFunctionManager().getFunctionContaining(currentLocation.getAddress())); 88 | 89 | //Msg is probably not in scope, so we import 90 | python.eval(exec + (checkValid ? " if client.is_valid(ctx) else __import__('ghidra.util').util.Msg.showWarn(plugin, None, 'Lumina - Unavailable', 'This function is not available in this context. (Either the client is not connected, or the architecture is currently unsupported.)')" : "")); 91 | } else { 92 | Msg.showWarn(LuminaPlugin.this, null, "Lumina - inconsistent state", "The python interpreter is not available right now. Please restart the plugin."); 93 | } 94 | } 95 | 96 | @Override 97 | public boolean isEnabledForContext(ActionContext context) { 98 | return !checkValid || (currentProgram != null && currentLocation != null && !(funcSpecific && currentProgram.getFunctionManager().getFunctionContaining(currentLocation.getAddress()) == null)); 99 | } 100 | }; 101 | action.setMenuBarData(tb); 102 | action.setEnabled(true); 103 | action.markHelpUnnecessary(); 104 | return action; 105 | } 106 | 107 | 108 | private void createActions() { 109 | if(tool.getDockingActionsByOwnerName("Lumina").size() == 0) { //only add if not added already 110 | tool.addAction(getLuminaAction("Pull current function metadata", "client.pull_function_md(ctx, func)", true, true)); 111 | tool.addAction(getLuminaAction("Push current function metadata", "client.push_function_md(ctx, func)", true, true)); 112 | tool.addAction(getLuminaAction("Pull all function metadata", "client.pull_all_mds(ctx)", true, false)); 113 | tool.addAction(getLuminaAction("Push all function metadata", "client.push_all_mds(ctx)", true, false)); 114 | tool.addAction(getLuminaAction("Reconnect", "client.reconnect()", false, false)); 115 | //TODO option for reverting applied metadata 116 | } 117 | } 118 | 119 | @Override 120 | protected void dispose() { 121 | if(python.isEnabled()) 122 | python.close(); //need to close it at the end in case we need to turn lumina back on (which is likely in the same thread as before aka jep is gonna die) 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /data/type.py: -------------------------------------------------------------------------------- 1 | from lumina_structs.tinfo import * 2 | from construct import Container 3 | from ghidra.program.model.data import (DataType, VoidDataType, 4 | CharDataType, SignedByteDataType, ByteDataType, 5 | SignedWordDataType, WordDataType, SignedDWordDataType, DWordDataType, 6 | SignedQWordDataType, QWordDataType, Integer16DataType, UnsignedInteger16DataType, 7 | IntegerDataType, UnsignedIntegerDataType, BooleanDataType, 8 | FloatDataType, DoubleDataType, LongDoubleDataType, Float2DataType, 9 | PointerDataType, ArrayDataType, FunctionDefinitionDataType, 10 | TypedefDataType, Undefined, BitFieldDataType) 11 | from ghidra.program.model.data import ParameterDefinitionImpl, GenericCallingConvention 12 | from ghidra.framework.plugintool import PluginTool 13 | from ghidra.app.services import DataTypeManagerService 14 | from java.util import ArrayList 15 | from typing import List, Optional 16 | 17 | # 18 | # handles mapping from generic lumina tinfo definitions to ghidra-specific data 19 | # 20 | 21 | def construct_ptr(tinfo: Container, tool: PluginTool, names: Optional[List[str]], *_): 22 | #ghidra seem to only have the basics in ptr types (no const/volatile, no closures, no near/far etc) 23 | if tinfo.data.ptrsize: 24 | return PointerDataType(construct_type(tool, tinfo.data.type, names), tinfo.data.ptrsize) 25 | else: 26 | #assuming dynamic pointer size means itll eventually be the right one 27 | return PointerDataType(construct_type(tool, tinfo.data.type, names)) 28 | 29 | def construct_arr(tinfo: Container, tool: PluginTool, names: Optional[List[str]], *_): 30 | #ghidra have no "base of array" concepts, assume zero always 31 | t = construct_type(tool, tinfo.data.type, names) 32 | return ArrayDataType(t, tinfo.data.num_elems, t.getLength()) 33 | 34 | 35 | cc_mapping = { 36 | CallingConvention.CM_CC_CDECL: GenericCallingConvention.cdecl, 37 | CallingConvention.CM_CC_ELLIPSIS: GenericCallingConvention.cdecl, 38 | CallingConvention.CM_CC_STDCALL: GenericCallingConvention.stdcall, 39 | CallingConvention.CM_CC_PASCAL: GenericCallingConvention.stdcall, #but reversed order of args 40 | CallingConvention.CM_CC_FASTCALL: GenericCallingConvention.fastcall, 41 | CallingConvention.CM_CC_THISCALL: GenericCallingConvention.thiscall, 42 | } 43 | 44 | def construct_func(tinfo: Container, tool: PluginTool, names: Optional[List[str]], *_): 45 | #again seems like the data types are pretty basic, aka no arglocs, near/far, spoiled regs etcetc 46 | funcdef = FunctionDefinitionDataType("lumina_function") #temp name for generating 47 | 48 | funcdef.setReturnType(construct_type(tool, tinfo.data.rettype, names)) 49 | funcdef.setArguments([ParameterDefinitionImpl(names.pop(0) if names else "", construct_type(tool, param.type, names), None) for param in tinfo.data.params]) 50 | if tinfo.data.cc.convention in cc_mapping: 51 | funcdef.setGenericCallingConvention(cc_mapping[tinfo.data.cc.convention]) 52 | 53 | funcdef.setName(funcdef.getPrototypeString()) 54 | 55 | return funcdef 56 | 57 | def construct_cmplx(tinfo: Container, tool: PluginTool, names: Optional[List[str]], nbytes: int): 58 | #lumina only pushes typedef, so not much we can do if it doesnt already exist in type libraries 59 | if tinfo.typedef.flags == ComplexFlags.BTMT_TYPEDEF: #just to be sure we are dealing with typedefs before we search the name up 60 | #we either just use the state in the global scope to get every type library or we have to pass it through a whole chain of things which is not ideal 61 | for lib in tool.getService(DataTypeManagerService).getDataTypeManagers(): 62 | l = ArrayList() 63 | lib.findDataTypes(tinfo.data.name, l) 64 | if l: 65 | #if nbytes is defined and the type we got is very likely defined by lumina (typedef -> undefined), trust that 66 | if hasattr(l[0], 'getBaseDataType') and Undefined.isUndefined(l[0].getBaseDataType()) and nbytes: 67 | l[0].replaceWith(TypedefDataType(tinfo.data.name, Undefined.getUndefinedDataType(nbytes))) 68 | return l[0] 69 | return TypedefDataType(tinfo.data.name, Undefined.getUndefinedDataType(nbytes)) 70 | 71 | #TODO properly parse the complex types once ive figured out ways to force lumina to push full struct info (or extend it to do that) 72 | #this should basically never be reached before then 73 | return Undefined.getUndefinedDataType(nbytes) 74 | 75 | bitfield_mapping = { 76 | BitFieldFlags.BTMT_BFLDI8: (ByteDataType, SignedByteDataType), 77 | BitFieldFlags.BTMT_BFLDI16: (WordDataType, SignedWordDataType), 78 | BitFieldFlags.BTMT_BFLDI32: (DWordDataType, SignedDWordDataType), 79 | BitFieldFlags.BTMT_BFLDI64: (QWordDataType, SignedQWordDataType), 80 | } 81 | 82 | def construct_bitfield(tinfo: Container, *_): #ive never seen this in use - see lumina_structs.tinfo for more info 83 | #BitFieldDataType is technically an internal class, but we should be fine since BT_BITFIELD is also only used in structs in IDA 84 | return BitFieldDataType(bitfield_mapping[tinfo.typedef.flags][int(not tinfo.data.unsigned)], tinfo.data.bitsize) 85 | 86 | 87 | 88 | float_mapping = { 89 | FloatFlags.BTMT_FLOAT: FloatDataType.dataType, 90 | FloatFlags.BTMT_DOUBLE: DoubleDataType.dataType, 91 | FloatFlags.BTMT_LNGDBL: LongDoubleDataType.dataType, 92 | FloatFlags.BTMT_SPECFLT: Float2DataType.dataType, #depends on use_tbyte() in IDA otherwise 2 - likely not used for lumina 93 | } 94 | 95 | basetype_mapping = { 96 | BaseTypes.BT_VOID: lambda *_: VoidDataType.dataType, 97 | BaseTypes.BT_INT8: lambda tinfo, *_: CharDataType.dataType if tinfo.typedef.flags == IntFlags.BTMT_CHAR else (SignedByteDataType.dataType if not tinfo.typedef.flags == IntFlags.BTMT_USIGNED else ByteDataType.dataType), #default to signed unless unsigned is specified 98 | BaseTypes.BT_INT16: lambda tinfo, *_: SignedWordDataType.dataType if not tinfo.typedef.flags == IntFlags.BTMT_USIGNED else WordDataType.dataType, 99 | BaseTypes.BT_INT32: lambda tinfo, *_: SignedDWordDataType.dataType if not tinfo.typedef.flags == IntFlags.BTMT_USIGNED else DWordDataType.dataType, 100 | BaseTypes.BT_INT64: lambda tinfo, *_: SignedQWordDataType.dataType if not tinfo.typedef.flags == IntFlags.BTMT_USIGNED else QWordDataType.dataType, 101 | BaseTypes.BT_INT128: lambda tinfo, *_: Integer16DataType.dataType if not tinfo.typedef.flags == IntFlags.BTMT_USIGNED else UnsignedInteger16DataType.dataType, 102 | BaseTypes.BT_INT: lambda tinfo, *_: IntegerDataType.dataType if not tinfo.typedef.flags == IntFlags.BTMT_USIGNED else UnsignedIntegerDataType.dataType, 103 | BaseTypes.BT_BOOL: lambda *_: BooleanDataType.dataType, 104 | BaseTypes.BT_FLOAT: lambda tinfo, *_: float_mapping[tinfo.typedef.flags], 105 | #complex types 106 | BaseTypes.BT_PTR: construct_ptr, 107 | BaseTypes.BT_ARRAY: construct_arr, 108 | BaseTypes.BT_FUNC: construct_func, 109 | BaseTypes.BT_COMPLEX: construct_cmplx, 110 | BaseTypes.BT_BITFIELD: construct_bitfield, 111 | } 112 | 113 | 114 | def construct_type(tool: PluginTool, tinfo: Container, names: Optional[List[str]] = None, nbytes: int = 0) -> DataType: 115 | #trust nbytes more than type info coz sometimes its missing width (especially typedefs) 116 | #though thats only on the first layer - we reuse types from libraries which ends up having the wrong nbytes size if a typedef is indirectly referred, so dont propagate 117 | return basetype_mapping[tinfo.typedef.basetype](tinfo, tool, names, nbytes) 118 | -------------------------------------------------------------------------------- /data/test.py: -------------------------------------------------------------------------------- 1 | import frida 2 | 3 | from ghidra.program.flatapi import FlatProgramAPI 4 | 5 | from capstone import * 6 | 7 | from sig.util import ARCH_MAPPING 8 | 9 | import time 10 | 11 | import os, subprocess 12 | 13 | cs_mapping = {'x86': CS_ARCH_X86, 'x86_64': CS_ARCH_X86} 14 | 15 | IDA_PATH = os.path.realpath(os.environ['IDADIR']) + os.path.sep 16 | 17 | 18 | def check_against_ida(binary: str, verbosity: int, arch: str): 19 | #it's a hassle to automatically get ida offsets from ghidra so manual input it is for now 20 | metadata, update, final = input(), input(), input() 21 | 22 | cwd = input() #manually enter the path to ida-calc-all-metadata coz __file__ doesnt exist 23 | 24 | #fetch the expected function signatures first 25 | 26 | #we rely on widget loading in ida-calc-all-metadata.py since we cant trigger calc_func_metadata other than running lumina which is why we need to run idat 27 | #with some tricks we can actually get idat to run headless and have widgets loaded like what we expect with gui mode 28 | #-c removes the old database so it doesnt affect operations between runs 29 | p = subprocess.Popen(IDA_PATH + 'idat64.exe -c -A -S' + cwd + 'ida-calc-all-metadata.py ' + binary, stdout=subprocess.PIPE) 30 | 31 | session = frida.attach(p.pid) 32 | 33 | script = session.create_script(""" 34 | const baseAddr = Module.findBaseAddress('ida64.dll');""" 35 | f"\n const metadata = resolveAddress('{metadata}');\n" 36 | f"\n const MD5Update = resolveAddress('{update}');\n" 37 | f"\n const MD5Final = resolveAddress('{final}');\n" 38 | """ 39 | var bytes = ''; 40 | var funcptr = null; 41 | 42 | function buf2hex(buffer) { // buffer is an ArrayBuffer 43 | return [...new Uint8Array(buffer)] 44 | .map(x => x.toString(16).padStart(2, '0')) 45 | .join(''); 46 | } 47 | 48 | Interceptor.attach(metadata, { 49 | onEnter(args) { 50 | funcptr = args[2].readU64(); //first param of func_t is start_ea, as shown in how calc_func_metadata uses get_ea_name 51 | bytes = ''; //prime bytes for writing 52 | }, 53 | }); 54 | 55 | Interceptor.attach(MD5Update, { 56 | onEnter(args) { 57 | bytes += ' ' + buf2hex(args[1].readByteArray(args[2].toInt32())); 58 | }, 59 | }); 60 | 61 | Interceptor.attach(MD5Final, { 62 | onEnter(args) { 63 | this.hashAddr = args[0]; 64 | this.objAddr = args[1]; 65 | }, 66 | onLeave(retval) { 67 | if(funcptr !== null) { //lumina functions always only have 2 as count; also ensure calc_func_metadata runs 68 | const hash = this.hashAddr.readByteArray(16); 69 | send(funcptr.toString(16) + ' ' + buf2hex(hash) + bytes); 70 | 71 | //reset 72 | funcptr = null; 73 | } 74 | }, 75 | }); 76 | 77 | function resolveAddress(addr) { 78 | const idaBase = ptr('0x10000000'); 79 | const offset = ptr(addr).sub(idaBase); 80 | const result = baseAddr.add(offset); 81 | return result; 82 | } 83 | 84 | """) 85 | 86 | expected = [] 87 | 88 | script.on('message', lambda msg, _: expected.append((int((pl:=msg['payload'].split(' '))[0], 16), pl[1], pl[2], pl[3]))) 89 | script.load() 90 | 91 | 92 | #ghidra already loaded currentProgram for us 93 | start = time.time() #ignore open_view overhead in our timing 94 | 95 | gen = ARCH_MAPPING[arch](currentProgram) 96 | actual = {} 97 | for f in currentProgram.getFunctionManager().getFunctions(True): 98 | if calcrel:=gen.calc_func_metadata(f): 99 | actual[f.getEntryPoint().getOffset()] = calcrel 100 | 101 | end = time.time() 102 | 103 | p.communicate() 104 | 105 | if not len(expected): 106 | print('Failed to obtain results from IDA, aborting...') 107 | return 108 | 109 | #check results 110 | 111 | missing = 0 112 | 113 | base = FlatProgramAPI(currentProgram).getFirstData().getAddress().getOffset() 114 | for addr, hash, buf, mask in list(expected): 115 | if addr not in actual: 116 | if (addr + base) in actual: #ghidra sometimes have a different base (doesnt matter in actual lumina operations, since addresses are never checked) 117 | expected[expected.index((addr, hash, buf, mask))] = (addr+base, hash, buf, mask) 118 | else: 119 | if verbosity > 0: 120 | print('Function missing from ghidra: ' + hex(addr)) 121 | expected.remove((addr, hash, buf, mask)) 122 | missing+=1 123 | 124 | if verbosity > 0: 125 | print() 126 | 127 | miss = 0 128 | cap = Cs(cs_mapping[arch], CS_MODE_64) 129 | for addr, hash, buf, mask in expected: #we dont really care about ghidra exclusive functions i guess 130 | if actual[addr][0].hex() != hash: 131 | if verbosity > 0: 132 | print('\nFunction', hex(addr), 'mismatch:') 133 | print('Expected:', hash) 134 | print('Actual:', actual[addr][0].hex()) 135 | 136 | if verbosity > 1: 137 | #print('\n' + actual[addr][1].hex() + '\n' + actual[addr][2].hex() + '\n' + buf + '\n\n') 138 | 139 | us = {d.address:str(d) for d in cap.disasm(actual[addr][1], addr)} 140 | ida = {d.address:str(d) for d in cap.disasm(bytes.fromhex(buf), addr)} 141 | 142 | #only compare the str form of the disassembly since the disasm objects themselves are different 143 | diff = set(ida.values()) ^ set(us.values()) 144 | 145 | differing, excl_us, excl_ida = [], [], [] 146 | for d in diff: 147 | diff_addr = int(d.split(' ')[1], 16) 148 | if diff_addr in us and diff_addr in ida: 149 | if hex(diff_addr) + ': ' + ida[diff_addr] + ' vs ' + us[diff_addr] not in differing: 150 | differing.append(hex(diff_addr) + ': ' + ida[diff_addr] + ' vs ' + us[diff_addr]) 151 | elif diff_addr in us: 152 | excl_us.append(hex(diff_addr) + ': ' + us[diff_addr]) 153 | elif diff_addr in ida: 154 | excl_ida.append(hex(diff_addr) + ': ' + ida[diff_addr]) 155 | 156 | if not differing and not excl_ida and not excl_us: 157 | print() 158 | print('Function matches, but mask mismatched:') 159 | print('Expected Mask:', mask) 160 | print('Actual Mask :', actual[addr][2].hex()) 161 | 162 | print() 163 | 164 | if differing: 165 | print('Differing instructions (expected vs actual):') 166 | print('\n'.join(sorted(differing, key=lambda s: int(s.split(':')[0], 16)))) 167 | if excl_ida: 168 | print('Only on IDA:') 169 | print('\n'.join(sorted(excl_ida, key=lambda s: int(s.split(':')[0], 16)))) 170 | if excl_us: 171 | print('Only on ghidra:') 172 | print('\n'.join(sorted(excl_us, key=lambda s: int(s.split(':')[0], 16)))) 173 | 174 | print() 175 | 176 | miss+=1 177 | 178 | print('Checked', len(expected), 'functions in', end - start,'seconds (' + str(missing) + ' missing), Mismatch:', str(miss) + '; Accuracy:', (len(expected)-miss)/len(expected)) 179 | 180 | 181 | if __name__ == "__main__": 182 | check_against_ida(input(), int(val) if (val:=input()) else 2, 'x86') #only x86 is supported atm -------------------------------------------------------------------------------- /data/client.py: -------------------------------------------------------------------------------- 1 | 2 | from lumina_structs import * 3 | from ghidra.util import Msg 4 | from ghidra.program.flatapi import FlatProgramAPI 5 | from ghidra.program.database.function import FunctionDB 6 | from ghidra.program.database import ProgramDB 7 | 8 | import socket, ssl, threading 9 | 10 | from .sig.util import ARCH_MAPPING 11 | from .parsing import apply_md, craft_push_md, craft_pull_md 12 | 13 | 14 | class LuminaClient: 15 | def __init__(self, plugin) -> None: 16 | self.socket = None 17 | self.lock = threading.RLock() #we need RLock to be able to enter critical sections holding a lock already 18 | self.plugin = plugin 19 | self.reconnect() 20 | 21 | def is_valid(self, ctx: ProgramDB): 22 | #ghidra doesnt allow multi arch disassembly so no function specific context needed 23 | return self.socket and ctx.getLanguage().getProcessor().toString() in ARCH_MAPPING 24 | 25 | def send_and_recv_rpc(self, code: RPC_TYPE, noretry: bool = False, **kwargs): 26 | try: 27 | with self.lock: #only lock if not already in critical section (see reconnect()) 28 | payload = rpc_message_build(code, **kwargs) 29 | Msg.debug(self.plugin, 'Sending ' + str(code) + ' command (' + str(payload) + ')') 30 | self.socket.send(payload) 31 | 32 | packet, message = rpc_message_parse(self.socket) 33 | Msg.debug(self.plugin, 'Received ' + str(packet) + 'Message: ' + str(message) + '') 34 | return packet, message 35 | except (ConnectionError, con.StreamError) as e: 36 | Msg.warn(self.plugin, 'Disconnected from the Lumina server.' + ('' if noretry else ' Reconnecting...')) 37 | if not noretry: 38 | self.reconnect() 39 | return self.send_and_recv_rpc(code, **kwargs) #retry 40 | return (None, None) 41 | except Exception as e: 42 | Msg.error(self.plugin, 'Something went wrong: ' + str(type(e)) + ': ' + str(e)) 43 | return (None, None) 44 | 45 | 46 | def reconnect(self, *_): #ignore additional args 47 | with self.lock: #lock until handshakes over to avoid other reqs go faster than we do 48 | try: 49 | if self.socket: #reset connection 50 | self.socket.close() 51 | 52 | settings = self.plugin.getTool().getOptions("Lumina") #refresh settings 53 | 54 | host = settings.getString('Host Address', ''), int(settings.getString('Port', '')) 55 | 56 | self.socket = socket.socket() 57 | self.socket.connect(host) 58 | 59 | cert = settings.getFile('TLS Certificate File', None) 60 | if cert: 61 | context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) 62 | context.load_verify_locations(cert.getPath()) 63 | self.socket = context.wrap_socket(self.socket, server_hostname=host[0]) 64 | 65 | key, id = b'', bytes(6) 66 | try: 67 | keyfile = settings.getFile('Key File', None) 68 | if keyfile: 69 | with open(keyfile.getPath(), 'rb') as kf: 70 | key = kf.read() 71 | if key.startswith(b'HEXRAYS_LICENSE'): #looks like genuine license, parse id 72 | #id is from the line with IDAPRO*W in it 73 | id = bytes.fromhex(key.split(b' IDAPRO')[0].split(b'\n')[-1].replace(b'-', b'').decode()) 74 | if len(id) != 6: #must be 6 bytes long, if not something went wrong 75 | id = bytes(6) #reset into empty bytes 76 | raise ValueError() 77 | except OSError: 78 | Msg.warn(self.plugin, 'Lumina key file path is invalid, ignoring...') 79 | except ValueError: 80 | Msg.warn(self.plugin, 'Given Hexrays license file seems malformed, skipping parsing...') 81 | 82 | #dont retry for this query to prevent infinite mutual recursion 83 | resp, msg = self.send_and_recv_rpc(RPC_TYPE.RPC_HELO, noretry=True, protocol=2, hexrays_license=key, hexrays_id=id, field_0x36=0) 84 | if not resp or resp.code != RPC_TYPE.RPC_OK: 85 | raise ConnectionError('Handshake failed ' + (f'({msg.message})' if resp and resp.code == RPC_TYPE.RPC_FAIL else '(Connection failure)')) 86 | 87 | Msg.info(self.plugin, 'Connection to Lumina server ' + host[0] + ':' + str(host[1]) + ' (TLS: ' + str(bool(cert)) + ') succeeded.') 88 | except Exception as e: 89 | if self.socket: #if we got an error after opening the socket, close it; also needs to be locked 90 | self.socket.close() 91 | self.socket = None 92 | 93 | Msg.showWarn(self.plugin, None, 'Lumina connection failed', 'Connection to Lumina server failed (' + (str(e) if type(e) != ValueError else 'invalid port') + '). Please check your configuration.') 94 | 95 | 96 | # 97 | # All functions commands 98 | # 99 | 100 | def pull_all_mds(self, ctx: ProgramDB): 101 | #background in this context is in the pythread - all commands get queued into that thread 102 | Msg.info(self.plugin, "Pulling all function metadata in the background...") 103 | 104 | #just in case functions changed while we were waiting, make a copy since we rely on ordering heavily 105 | #also coz otherwise it returns a java array which is hard to use lol 106 | copy = list(ctx.getFunctionManager().getFunctions(True)) 107 | 108 | tool = self.plugin.getTool() 109 | 110 | pull = craft_pull_md(ctx, copy, tool) 111 | 112 | #TODO use Command class so we get a nicer status update panel 113 | #(if possible; we aren't using their task queue so im not sure) 114 | tool.setStatusInfo('[Lumina] Sending pull request...') 115 | 116 | msg = self.send_and_recv_rpc(RPC_TYPE.PULL_MD, **pull)[1] 117 | 118 | tool.setStatusInfo('[Lumina] Applying metadata...') 119 | 120 | if msg: 121 | it = iter(msg.results) #also results only have valid mds so its easier to model with iterator 122 | for i, found in enumerate(msg.found): 123 | if found == ResultType.RES_OK: 124 | apply_md(ctx, copy[i], tool, next(it)) 125 | log = 'Pulled ' + str(sum([d == ResultType.RES_OK for d in msg.found])) + '/' + str(len(msg.found)) + ' functions successfully.' 126 | Msg.info(self.plugin, log) 127 | tool.setStatusInfo('[Lumina] ' + log) 128 | else: 129 | #it doesnt matter if the status is always there its better than not being able to see it at all 130 | tool.setStatusInfo('[Lumina] Pull request for all functions failed.') 131 | 132 | 133 | def push_all_mds(self, ctx: ProgramDB): 134 | Msg.info(self.plugin, "Pushing all function metadata in the background...") 135 | 136 | tool = self.plugin.getTool() 137 | 138 | kwargs = craft_push_md(ctx, list(ctx.getFunctionManager().getFunctions(True)), tool) 139 | 140 | tool.setStatusInfo('[Lumina] Sending push request...') 141 | 142 | msg = self.send_and_recv_rpc(RPC_TYPE.PUSH_MD, **kwargs)[1] 143 | 144 | if msg: 145 | log = 'Pushed ' + str(sum([d == ResultType.RES_ADDED for d in msg.resultsFlags])) + '/' + str(len(msg.resultsFlags)) + ' functions successfully.' 146 | Msg.info(self.plugin, log) 147 | tool.setStatusInfo('[Lumina] ' + log) 148 | else: 149 | tool.setStatusInfo('[Lumina] Push request for all functions failed.') 150 | 151 | 152 | # 153 | # Function specific commands 154 | # 155 | 156 | def pull_function_md(self, ctx: ProgramDB, func: FunctionDB): 157 | Msg.debug(self.plugin, 'Pulling metadata for func ' + func.getName() + '...') 158 | 159 | msg = self.send_and_recv_rpc(RPC_TYPE.PULL_MD, **craft_pull_md(ctx, [func]))[1] 160 | 161 | #status info kinda nice for displaying subtle msgs to the user that's not lost in the logs 162 | #so lets do it even for the function specific commands 163 | tool = self.plugin.getTool() 164 | 165 | if msg and msg.results: 166 | apply_md(ctx, func, tool, msg.results[0]) 167 | log = 'Pulled metadata for function "' + func.getName() + '" successfully.' 168 | Msg.info(self.plugin, log) 169 | tool.setStatusInfo('[Lumina] ' + log) 170 | else: 171 | tool.setStatusInfo('[Lumina] Pull request for the function failed.') 172 | 173 | 174 | def push_function_md(self, ctx: ProgramDB, func: FunctionDB): 175 | Msg.debug(self.plugin, 'Pushing metadata for func ' + func.getName() + '...') 176 | 177 | msg = self.send_and_recv_rpc(RPC_TYPE.PUSH_MD, **craft_push_md(ctx, [func]))[1] 178 | 179 | tool = self.plugin.getTool() 180 | 181 | if msg: 182 | log = 'Pushed metadata for function "' + func.getName() + '" successfully.' 183 | Msg.info(self.plugin, log) 184 | tool.setStatusInfo('[Lumina] ' + log) 185 | else: 186 | tool.setStatusInfo('[Lumina] Push request for the function failed.') -------------------------------------------------------------------------------- /data/parsing.py: -------------------------------------------------------------------------------- 1 | from ghidra.util import Msg 2 | from ghidra.program.database.function import FunctionDB 3 | from ghidra.program.database import ProgramDB 4 | from ghidra.program.model.symbol import SourceType 5 | from ghidra.program.flatapi import FlatProgramAPI 6 | from ghidra.program.model.listing import CodeUnit, Function, ParameterImpl, LocalVariableImpl, VariableUtilities, VariableStorage 7 | from ghidra.framework.plugintool import PluginTool 8 | from ghidra.program.model.symbol import SourceType 9 | from ghidra.program.model.data import Undefined 10 | from java.util import Arrays 11 | 12 | import socket, itertools 13 | 14 | from construct import * 15 | from lumina_structs import * 16 | from lumina_structs.metadata import * 17 | 18 | from .sig.util import Sig, ARCH_MAPPING 19 | from .type import construct_type, cc_mapping 20 | 21 | # 22 | # Push Functions 23 | # 24 | 25 | def extract_md(ctx: ProgramDB, func: FunctionDB, gen: Sig) -> dict: 26 | chunks = [] 27 | 28 | #turns out func.getComment and getRepeatableComment are just plate comments and repeatable comments at the entry point address 29 | if func.getComment(): 30 | chunks.append({ 31 | 'type': MetadataType.MD_FUNC_CMT, 32 | 'data': {'text': func.getComment()}}) 33 | 34 | if func.getRepeatableComment(): 35 | chunks.append({ 36 | 'type': MetadataType.MD_FUNC_REPCMT, 37 | 'data': {'text': func.getRepeatableComment()}}) 38 | 39 | prog = FlatProgramAPI(ctx) 40 | 41 | func_start = func.getEntryPoint().getOffset() 42 | 43 | #EOL comments are always instruction comments 44 | eol = [{'offset': addr.getOffset() - func_start, 45 | 'text': prog.getEOLComment(addr)} 46 | for addr in ctx.getCodeManager().getCommentAddressIterator(CodeUnit.EOL_COMMENT, func.getBody(), True)] 47 | if eol: 48 | chunks.append({ 49 | 'type': MetadataType.MD_INSN_CMT, 50 | 'data': eol}) 51 | 52 | #repeatable comments are instruction comments, aside from the entry point one, which we need to check for that case 53 | rep = [{'offset': addr.getOffset() - func_start, 54 | 'text': prog.getRepeatableComment(addr)} 55 | for addr in ctx.getCodeManager().getCommentAddressIterator(CodeUnit.REPEATABLE_COMMENT, func.getBody(), True) 56 | if addr.getOffset() != func_start] 57 | if rep: 58 | chunks.append({ 59 | 'type': MetadataType.MD_INSN_REPCMT, 60 | 'data': rep}) 61 | 62 | 63 | #do both pre and post at the same time; pre and post comments will never be related to function comments so we are good 64 | extra = [{'offset': addr.getOffset() - func_start, 65 | 'anterior': pre if pre else '', 66 | 'posterior': post if post else ''} 67 | for addr in ctx.getCodeManager().getCommentAddressIterator(func.getBody(), True) 68 | if any([(pre:=prog.getPreComment(addr)), (post:=prog.getPostComment(addr))])] #either one of them exists then we can add; prevent short circuit evaluation 69 | if extra: 70 | chunks.append({ 71 | 'type': MetadataType.MD_EXTRA_CMT, 72 | 'data': extra}) 73 | 74 | #TODO frame info and tinfo 75 | #OPREPRS as a concept doesnt really exist in Ghidra either(??) 76 | #but might be helpful in defining data vars so parsing might be good 77 | 78 | if chunks: #only compute signature and returns something if has data 79 | data = gen.calc_func_metadata(func) 80 | if not data: 81 | return None 82 | 83 | sig, block, mask = data 84 | return { 85 | "metadata": { 86 | "func_name": func.getName(), #func name is automatically whatever it should be 87 | "func_size": len(block), 88 | "serialized_data": { 89 | "chunks": chunks}}, 90 | "signature": { 91 | "version": 1, 92 | "signature": sig}} 93 | else: 94 | return None 95 | 96 | 97 | 98 | def craft_push_md(ctx: ProgramDB, funcs: list[FunctionDB], tool: PluginTool = None) -> dict: 99 | arch = ARCH_MAPPING[ctx.getLanguage().getProcessor().toString()](ctx) #again, Ghidra only allows one arch at a time 100 | 101 | progress = "[Lumina] Extracting function metadata ({count}/" + str(len(funcs)) + " functions)" 102 | push, eas = [], [] 103 | for i, f in enumerate(funcs): 104 | md = extract_md(ctx, f, arch) 105 | if md: #only apply if extracted useful data 106 | push.append(md) 107 | eas.append(f.getEntryPoint().getOffset()) 108 | if tool: 109 | tool.setStatusInfo(progress.format(count=i)) 110 | 111 | return { 112 | "type": PushMdOpt.PUSH_OVERRIDE_IF_BETTER, #protocol 2 default 113 | "idb_filepath": ctx.getDomainFile().getProjectLocator().getProjectDir().getPath(), 114 | "input_filepath": ctx.getExecutablePath(), 115 | "input_md5": bytes.fromhex(ctx.getExecutableMD5()), #Ghidra actually has a function for this so we dont need to reread the file ourselves 116 | "hostname": socket.gethostname(), 117 | "funcInfos": push, 118 | "funcEas": eas} #seems like ida is offset by one??? 119 | 120 | 121 | # 122 | # Pull Functions 123 | # 124 | 125 | 126 | #again, ghidra support only one arch at a time so no more lists 127 | def craft_pull_md(ctx: ProgramDB, fs: list[FunctionDB], tool: PluginTool = None) -> dict: 128 | arch = ARCH_MAPPING[ctx.getLanguage().getProcessor().toString()](ctx) 129 | 130 | sigs = [] 131 | i = 0 132 | progress = "[Lumina] Calculating function signatures ({count}/" + str(len(fs)) + " functions)" 133 | for func in fs: 134 | if tool: 135 | tool.setStatusInfo(progress.format(count=i)) 136 | 137 | sig = arch.calc_func_metadata(func) 138 | if sig: 139 | sigs.append({'signature':sig[0]}) 140 | 141 | i+=1 142 | 143 | return {'flags': 1, #protocol 2 default 144 | 'types':[], 145 | 'funcInfos':sigs} 146 | 147 | 148 | #we need tool for passing DataTypeManagerService to construct_cmplx unfortunately 149 | def apply_md(ctx: ProgramDB, func: FunctionDB, tool: PluginTool, info: Container): 150 | #we don't really care about popularity atm, but it might be useful server side for sorting 151 | 152 | #IDA (at least on 7.5) hardcoded no-override flag into apply_metadata, so tinfo and frame desc effectively never gets applied even if existing data is entirely auto-generated 153 | #we won't follow that - manually clearing the data on every lumina pull is very annoying and there is undo anyway 154 | #instead we will default to resetting metadata to what lumina provides on conflict 155 | prog = FlatProgramAPI(ctx) 156 | 157 | prog.start() #start a transaction 158 | 159 | func.setName(info.metadata.func_name, SourceType.IMPORTED) 160 | #func size should be the same to be able to get the same signature, so no need to set 161 | for md in info.metadata.serialized_data.chunks: 162 | if md.type in [MetadataType.MD_INSN_CMT, MetadataType.MD_INSN_REPCMT]: 163 | for c in md.data: 164 | addr = func.getEntryPoint().add(c.offset) 165 | setComment = prog.setEOLComment if md.type == MetadataType.MD_INSN_CMT else prog.setRepeatableComment 166 | setComment(addr, c.text) 167 | elif md.type in [MetadataType.MD_FUNC_CMT, MetadataType.MD_FUNC_REPCMT]: 168 | #ghidra actually has repeatable comments, treat them separately 169 | setComment = func.setComment if md.type == MetadataType.MD_FUNC_CMT else func.setRepeatableComment 170 | setComment(md.data.text) 171 | elif md.type == MetadataType.MD_EXTRA_CMT: 172 | #Ghidra actually has anterior and posterior comments, treat them separately 173 | for c in md.data: 174 | addr = func.getEntryPoint().add(c.offset) 175 | if c.anterior: 176 | prog.setPreComment(addr, c.anterior) 177 | if c.posterior: 178 | prog.setPostComment(addr, c.posterior) 179 | elif md.type == MetadataType.MD_TYPE_INFO: 180 | #cannot reuse FunctionDefinitionDataType unfortunately since we have to directly set in the current func 181 | #md.data.tinfo should always be BT_FUNC 182 | params = [] 183 | for param in md.data.tinfo.data.params: 184 | name = md.data.names.pop(0) if md.data.names else "" 185 | params.append(ParameterImpl(name, construct_type(tool, param.type, md.data.names), ctx)) 186 | params = Arrays.asList(params) 187 | 188 | func.replaceParameters(params, Function.FunctionUpdateType.DYNAMIC_STORAGE_ALL_PARAMS, True, SourceType.IMPORTED) 189 | func.setReturnType(construct_type(tool, md.data.tinfo.data.rettype, md.data.names), SourceType.IMPORTED) 190 | if md.data.tinfo.data.cc.convention in cc_mapping: 191 | cc = cc_mapping[md.data.tinfo.data.cc.convention].getDeclarationName() 192 | func.setCallingConvention(cc if cc != '__cdecl' else '__stdcall') #somehow cdecl doesnt exist (at least on ghidra 10.1.4) and is mapped to __stdcall according to the DWARF parser 193 | 194 | #ghidra doesnt have near/far calls(?) 195 | #TODO custom calling conventions since ghidra actually supports reordering with argloc (VariableStorage) 196 | #i dont think spoiled registers can be defined though 197 | elif md.type == MetadataType.MD_FRAME_DESC: 198 | #ghidra doesnt have the variable definition section for comments storage, so discard for now; also oprepr as a concept doesnt exist in ghidra either 199 | for var in md.data.vars: 200 | #sometimes type == None if its default so just treat it as undefined type of nbytes 201 | print(var.name, var.type.tinfo if var.type else None) 202 | t = construct_type(tool, var.type.tinfo, var.type.names, var.nbytes) if var.type else Undefined.getUndefinedDataType(var.nbytes) 203 | name = var.name if var.name else f'lumina_{hex(var.off)}' 204 | 205 | #TODO check if this still matches in architectures with stack growing up (also are we sure frregs is for this purpose) 206 | #ghidra also uses rbp instead of rsp (offset goes down instead of up) 207 | v = LocalVariableImpl(name, t, -(md.data.frsize - var.off + md.data.frregs), ctx) 208 | 209 | #TODO argloc 210 | VariableUtilities.checkVariableConflict(func, v, v.getVariableStorage(), True) 211 | func.addLocalVariable(v, SourceType.IMPORTED) 212 | else: 213 | Msg.debug("Lumina", 'Unimplemented metadata type ' + str(md.type) + ', skipping for now...') 214 | 215 | prog.end(True) #end the transaction 216 | --------------------------------------------------------------------------------