├── .devcontainer ├── dep │ └── requirements.txt └── devcontainer.json ├── screenshots ├── viewer-UI.png ├── interactive.gif ├── default-roman.png ├── selected-function.png └── fully-defined-main.png ├── tests ├── bin │ └── romannumeral.out ├── idb │ ├── romannumeral.i64 │ └── demo-control-flow-flatten.elf64.i64 └── src │ └── romannumeral.cpp ├── .gitignore ├── ida2llvm ├── __init__.py ├── _utils.py ├── type.py ├── address.py ├── function.py └── insn.py ├── run.sh ├── docker_entrypoint.py ├── Dockerfile ├── README.md └── ida2llvm_plugin.py /.devcontainer/dep/requirements.txt: -------------------------------------------------------------------------------- 1 | llvmlite==0.39.1 2 | PyQt5==5.15.10 -------------------------------------------------------------------------------- /screenshots/viewer-UI.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loyaltypollution/ida2llvm/HEAD/screenshots/viewer-UI.png -------------------------------------------------------------------------------- /tests/bin/romannumeral.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loyaltypollution/ida2llvm/HEAD/tests/bin/romannumeral.out -------------------------------------------------------------------------------- /tests/idb/romannumeral.i64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loyaltypollution/ida2llvm/HEAD/tests/idb/romannumeral.i64 -------------------------------------------------------------------------------- /screenshots/interactive.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loyaltypollution/ida2llvm/HEAD/screenshots/interactive.gif -------------------------------------------------------------------------------- /screenshots/default-roman.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loyaltypollution/ida2llvm/HEAD/screenshots/default-roman.png -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "IDA2LLVM", 3 | "build": { 4 | "dockerfile": "../Dockerfile" 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /screenshots/selected-function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loyaltypollution/ida2llvm/HEAD/screenshots/selected-function.png -------------------------------------------------------------------------------- /screenshots/fully-defined-main.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loyaltypollution/ida2llvm/HEAD/screenshots/fully-defined-main.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .pytest_cache/ 3 | *.tar 4 | *.id0 5 | *.id1 6 | *.id2 7 | *.nam 8 | *.til 9 | tests/ll 10 | tests/log -------------------------------------------------------------------------------- /tests/idb/demo-control-flow-flatten.elf64.i64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loyaltypollution/ida2llvm/HEAD/tests/idb/demo-control-flow-flatten.elf64.i64 -------------------------------------------------------------------------------- /ida2llvm/__init__.py: -------------------------------------------------------------------------------- 1 | from . import _utils 2 | from . import address 3 | from . import function 4 | from . import insn 5 | from . import type 6 | 7 | 8 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Select file from test/idb 4 | files="$(ls -A ./tests/idb)" 5 | echo "Select an valid idb:" 6 | 7 | # Use the select statement to present the user with a numbered list of files 8 | select filename in ${files}; do 9 | # If the user selects a valid file, echo its name 10 | if [[ -n "$filename" ]]; then 11 | # Make directory for script output 12 | mkdir -p ./tests/ll ./tests/log 13 | 14 | wine ~/.wine/ida/idat64.exe -A -S"./docker_entrypoint.py" "./tests/idb/$filename" -t 2>/dev/null 15 | else 16 | echo "Invalid selection" 17 | fi 18 | break 19 | done 20 | 21 | # Display results 22 | echo 'results written to ./tests/ll' -------------------------------------------------------------------------------- /docker_entrypoint.py: -------------------------------------------------------------------------------- 1 | import ida_pro 2 | import ida_ida 3 | import ida_nalt 4 | import logging 5 | 6 | from llvmlite import ir 7 | from os.path import basename 8 | 9 | import ida2llvm 10 | 11 | logger = logging.getLogger(__name__) 12 | filename = basename(ida_nalt.get_input_file_path()) 13 | logging.basicConfig(filename=f".\\tests\\log\\{filename}.log", 14 | format='%(levelname)s (%(name)s.py) %(message)s', 15 | filemode='a', 16 | level=logging.DEBUG) 17 | 18 | module = ir.Module(filename) 19 | logger.info(f"declared ir module of name {filename}") 20 | 21 | try: 22 | func_name = 'main' 23 | ida2llvm.function.lift_function(module, func_name, False) 24 | except Exception as e: 25 | logger.exception(e) 26 | 27 | with open(f'.\\tests\\ll\\{filename}.ll', 'w') as f: 28 | f.write(str(module)) 29 | 30 | ida_pro.qexit(ida_ida.IDB_PACKED) -------------------------------------------------------------------------------- /tests/src/romannumeral.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | using namespace std; 6 | 7 | class Solution { 8 | public: 9 | string intToRoman(int num) { 10 | vector> p={{1000, "M"}, {900, "CM"}, {500, "D"}, {400, "CD"}, {100, "C"}, {90, "XC"}, {50, "L"}, {40, "XL"}, {10, "X"}, {9, "IX"}, {5, "V"}, {4, "IV"}, {1, "I"}}; 11 | string roman=""; 12 | for(int i=0;i ir.Value: 9 | """ 10 | A Value can be indexed relative to some offset. 11 | 12 | :param arg: value to index from 13 | :type arg: ir.Value 14 | :param off: offset to index, defaults to 0 15 | :type off: int, optional 16 | :return: value after indexing by off 17 | :rtype: ir.Value 18 | """ 19 | match arg: 20 | case ptr if isinstance(arg.type, ir.PointerType) and isinstance(ptr.type.pointee, ir.ArrayType): 21 | arr = ptr.type.pointee 22 | td = llvm.create_target_data("e") 23 | size = arr.element.get_abi_size(td) 24 | return builder.gep(ptr, (ir.Constant(ir.IntType(8), 0), ir.Constant(ir.IntType(8), off // size),)) 25 | case ptr if isinstance(arg.type, ir.PointerType) and isinstance(ptr.type.pointee, ir.LiteralStructType): 26 | return builder.bitcast(ptr, ir.IntType(8).as_pointer()) 27 | case ptr if isinstance(arg.type, ir.PointerType) and off > 0: 28 | td = llvm.create_target_data("e") 29 | size = ptr.type.pointee.get_abi_size(td) 30 | return builder.gep(ptr, (ir.Constant(ir.IntType(8), off // size),)) 31 | case _: 32 | return arg 33 | 34 | def dedereference(arg: ir.Value) -> ir.Value: 35 | """ 36 | A memory address is deferenced if the memory at the address is loaded. 37 | In LLVM, a LoadInstruction instructs the CPU to perform the dereferencing. 38 | 39 | In cases where we wish to retrieve the memory address, we "de-dereference". 40 | - this is needed as IDA microcode treats all LVARS as registers 41 | - whereas during lifting we treat all LVARS as stack variables (in accordance to LLVM SSA) 42 | 43 | :param arg: value to de-dereference 44 | :type arg: ir.Value 45 | :raises NotImplementedError: arg is not of type LoadInstr 46 | :return: original memory address 47 | :rtype: ir.Value 48 | """ 49 | match arg: 50 | case arg if isinstance(arg, ir.LoadInstr): 51 | return arg.operands[0] 52 | case arg if isinstance(arg.type, ir.PointerType): 53 | return arg 54 | case _: 55 | raise NotImplementedError(f"not implemented: get reference for object {arg} of type {arg.type}") 56 | 57 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:stable-20230320-slim 2 | 3 | # Global: ignore TLS errors 4 | ENV CURL_CA_BUNDLE="" 5 | ENV PYTHONWARNINGS="ignore:Unverified HTTPS request" 6 | RUN echo 'Acquire::https::Verify-Peer "false";' > /etc/apt/apt.conf.d/99-cert 7 | 8 | # Install prerequisites 9 | RUN apt-get update --fix-missing && \ 10 | DEBIAN_FRONTEND="noninteractive" apt-get install -y --no-install-recommends \ 11 | ca-certificates \ 12 | git \ 13 | inotify-tools \ 14 | sudo \ 15 | wget && \ 16 | rm -rf /var/lib/apt/lists/* 17 | 18 | COPY .devcontainer/certs/ /usr/local/share/ca-certificates/ 19 | RUN update-ca-certificates 20 | 21 | # Install wine 22 | ARG DEBIAN_FRONTEND=noninteractive 23 | RUN dpkg --add-architecture i386 && \ 24 | mkdir -pm755 /etc/apt/keyrings && \ 25 | wget --no-check-certificate --output-document /etc/apt/keyrings/winehq-archive.key https://dl.winehq.org/wine-builds/winehq.key && \ 26 | wget --no-check-certificate -NP /etc/apt/sources.list.d/ https://dl.winehq.org/wine-builds/debian/dists/bullseye/winehq-bullseye.sources 27 | 28 | RUN apt-get update --fix-missing && \ 29 | DEBIAN_FRONTEND="noninteractive" apt-get install -y --no-install-recommends \ 30 | winehq-stable \ 31 | xvfb && \ 32 | rm -rf /var/lib/apt/lists/* 33 | 34 | # Prevents the popup to install mono 35 | # https://superuser.com/questions/948508/how-to-disable-gecko-package-popup-for-wine 36 | ENV WINEDLLOVERRIDES="mscoree,mshtml=" 37 | 38 | # Ensure that registry change is flushed from registry onto disk 39 | # https://serverfault.com/questions/1082578/wine-in-docker-reg-add-only-keeps-effects-temporarily 40 | RUN wine reg add "HKEY_CURRENT_USER\Software\Wine" /v Version /t REG_SZ /d win10 /f && \ 41 | while [ ! -f ~/.wine/user.reg ]; do sleep 1; done 42 | 43 | # Install python 44 | # Note that Python 3.10 requires wine to emulate Windows 10 45 | ARG PYTHON_URL=https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe 46 | ARG PYTHON_DIR="C:\\Program Files\\Python310" 47 | RUN Xvfb :0 -screen 0 1024x768x16 & \ 48 | wget --quiet --no-check-certificate "${PYTHON_URL}" --output-document \ 49 | python_installer.exe && \ 50 | DISPLAY=:0 wine python_installer.exe \ 51 | /quiet \ 52 | InstallAllUsers=1 \ 53 | PrependPath=1 && \ 54 | rm python_installer.exe 55 | 56 | # Download python dependencies on Wine-Python 57 | COPY .devcontainer/dep/requirements.txt ./requirements.txt 58 | RUN WINEPATH="${PYTHON_DIR}" wine python -m pip install -r requirements.txt && \ 59 | rm -rf requirements.txt 60 | 61 | # Copy IDA over 62 | ADD .devcontainer/dep/ida.tar ~/.wine/ida 63 | 64 | # Accept IDA EUA and set Python's version via registry 65 | RUN wine reg add "HKEY_CURRENT_USER\Software\Hex-Rays\IDA" /v Python3TargetDLL /t REG_SZ /d "${PYTHON_DIR}/python3.dll" /f && \ 66 | wine reg add "HKEY_CURRENT_USER\Software\Hex-Rays\IDA" /v "License Think-Cell Operations GmbH" /t REG_DWORD /d 1 /f && \ 67 | while inotifywait -e modify ~/.wine/user.reg; do sleep 1; done 68 | 69 | # Install LLVM using pre-built apt.llvm.org packages 70 | RUN wget -O- https://apt.llvm.org/llvm-snapshot.gpg.key | gpg --dearmor | sudo tee /usr/share/keyrings/llvm.gpg \ 71 | && echo """deb [signed-by=/usr/share/keyrings/llvm.gpg] http://apt.llvm.org/bullseye/ llvm-toolchain-bullseye-16 main \ 72 | deb-src http://apt.llvm.org/bullseye/ llvm-toolchain-bullseye-16 main""" >> /etc/apt/sources.list 73 | 74 | RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ 75 | && apt-get -y install --no-install-recommends clang-16 libclang-16-dev llvm-16-dev graphviz \ 76 | && apt-get -y autoremove \ 77 | && rm -rf /var/lib/apt/lists/* 78 | 79 | RUN mkdir /home/bin2llvm 80 | WORKDIR /home/bin2llvm 81 | 82 | COPY ida2llvm /home/bin2llvm/ida2llvm/ 83 | COPY run.sh /home/bin2llvm/run.sh 84 | COPY docker_entrypoint.py /home/bin2llvm/docker_entrypoint.py 85 | ENTRYPOINT ["/home/bin2llvm/run.sh"] -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # IDA2LLVM - Dynamic Binary Lifting IDA code to LLVM IR 2 | 3 | Because I was curious, "can Hexrays decompilation be hijacked for LLVM lifting"? 4 | 5 | 6 | ## Features 7 | 8 | 1. Lifts all IDA-decompilable binaries to LLVM bitcode, including executables and shared libraries for each platform. 9 | 2. **guarantees CFG preservation** during lifting 10 | 3. enable **interactive lifting**, for reverse-engineers most familiar with state-of-the-art IDA 11 | 12 | ## Dependencies 13 | | Name | Version | 14 | | ---- | ------- | 15 | | [Python](https://www.python.org/) | 3.10* | 16 | | [llvmlite](https://pypi.org/project/llvmlite/) | 0.39.1* | 17 | | [headless-ida](https://pypi.org/project/headless-ida/)** | 0.5.2 | 18 | | [pytest](https://pypi.org/project/pytest/)** | 7.4.3 | 19 | | [IDA Pro](https://www.hex-rays.com/products/ida) | 7.7+ | 20 | 21 | *llvmlite 0.39.1 did not have wheels for Python 3.11+ 22 | **only needed for unittests 23 | 24 | ## Using the lifter 25 | 26 | ### Run as IDA Plugin 27 | 28 | IDA2LLVM will load for any architecture with a Hex-Rays decompiler present. Use the hot-key (Ctrl+Alt+L) or Edit/IDA2LLVM/Lifting Viewer to open the Lifting Viewer. 29 | 30 | ![Lifting Viewer UI](screenshots/viewer-UI.png) 31 | 32 | Lifting is performed at IDA's **function** level (assuming Microcode can be emitted). The viewer will always synchronize against the user's cursor in the disassembly window, indicating whether it can be lifted. 33 | 34 | Functions are **recursively added** to children. If `main` calls the function `f` and main is defined, both `main` and `f` will be defined. 35 | 36 | ![Function selection](screenshots/selected-function.png) 37 | 38 | Having added a function, we can select it in the panel. Here we can: 39 | 1. mark function as declare-only 40 | 2. redefine the function 41 | - lifting is performed on a per-demand basis. 42 | - if there are changes to decompiler, we should redefine the function. 43 | 3. delete function, with `Del` key 44 | 45 | An example of a fully defined main function is as such: 46 | 47 | ![Fully defined main](screenshots/fully-defined-main.png) 48 | 49 | ### Interactive lifting 50 | 51 | > More often than not, lifting will not work out of the box. A key feature of IDA2LLVM is interactive lifting. 52 | 53 | Let's look `tests/bin/romannumeral.out`. Variable names have been renamed appropriately: 54 | 55 | ![Incorrect stack variables](screenshots/default-roman.png) 56 | 57 | Pay attention to `arr_eleX` variables. 58 | - Hexray's decompiler has displayed these as individual stack variables, 59 | - however, the line `*(&arr_ele0 + i)` betrays the fact that it is an array of integers `{1, 4, 123, 5123...}` of size 11. 60 | 61 | Our lifted output will be faithful to Hexray's decompiler. Each integer variable will be an indepedent stack variable, and the line `*(&arr_ele0 + i)` will result in an memory access error. 62 | 63 | ![Interactive change](screenshots/interactive.gif) 64 | 65 | We can fix this issue by appropriately defining the types in the decompiler, and redefining the function. All work has been done and saved in `tests/idb/romannumeral.i64`. 66 | 67 | ### Run in Docker 68 | 69 | > This works mostly as a convenience tool. We sacrifice **the entire capability for a reverse engineer to interactively lift**, hence accuracy is severely diminished. 70 | 71 | This requires an IDA Pro Windows installation. 72 | 73 | Our Dockerfile runs Windows IDA Pro in a Linux container, emulating it in Wine. 74 | 75 | #### Step 1: Clone the repository 76 | 77 | ```pwsh 78 | git clone https://github.com/loyaltypollution/ida2llvm 79 | ``` 80 | 81 | #### Step 2: Add `ida.tar` to `.devcontainer/dep` 82 | 83 | Insert a tar zip of the entire IDA Pro folder 84 | ```pwsh 85 | tar cvf ida.tar "$(dirname "$(which ida64)")" 86 | ``` 87 | 88 | #### Step 3: Build & Run Dockerfile 89 | 90 | Insert the idb in the directory 91 | 92 | ```pwsh 93 | docker build . -t bin2llvm 94 | docker run --rm -it -v .\tests:/home/bin2llvm/tests bin2llvm 95 | ``` 96 | 97 | ## Linking Notes 98 | 99 | Suppose the user lifted all functions in the idb. This potentially includes symbols from the C standard library, such as `_start`. 100 | 101 | Naievely compiling from `clang` will likely result in link issues. Common issues include: 102 | - duplicate symbols 103 | - undefined symbols 104 | 105 | In general, link issues are not our concern. Our lifter has already done its work and it's up to the user to fix linking issues *(good luck)*. 106 | 107 | Here are some tips to fix linking issues: 108 | - instruct the linker to use the first symbol seen (`allow-multiple-definition`) 109 | ```bash 110 | clang lifted.ll -c 111 | clang lifted.o -v -Wl,--allow-multiple-definition -o lifted.out 112 | ``` 113 | 114 | *please raise an issue and let the author learn more about cpp linking* -------------------------------------------------------------------------------- /ida2llvm/type.py: -------------------------------------------------------------------------------- 1 | import ida_typeinf 2 | import ida_idaapi 3 | import logging 4 | 5 | from llvmlite import ir 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | def lift_tif(tif: ida_typeinf.tinfo_t) -> ir.Type: 10 | """ 11 | Lifts the given IDA type to corresponding LLVM type. 12 | If IDA type is an array/struct/tif, type lifting is performed recursively. 13 | 14 | :param tif: the type to lift, in IDA 15 | :type tif: ida_typeinf.tinfo_t 16 | :raises NotImplementedError: variadic structs 17 | :return: lifted LLVM type 18 | :rtype: ir.Type 19 | """ 20 | match tif: 21 | case tif if tif.is_func(): 22 | # a function type is composed of: 23 | ## return type, argument types, variadic 24 | ida_rettype = tif.get_rettype() 25 | ida_args = (tif.get_nth_arg(i) for i in range(tif.get_nargs())) 26 | is_vararg = tif.is_vararg_cc() 27 | 28 | llvm_rettype = lift_tif(ida_rettype) 29 | llvm_args = (lift_tif(arg) for arg in ida_args) 30 | return ir.FunctionType(llvm_rettype, llvm_args, var_arg = is_vararg) 31 | 32 | case tif if tif.is_ptr(): 33 | child_tif = tif.get_ptrarr_object() 34 | 35 | # clang compiles C void * to LLVM IR i8* 36 | if child_tif.is_void(): 37 | return ir.IntType(8).as_pointer() 38 | 39 | return lift_tif(child_tif).as_pointer() 40 | 41 | case tif if tif.is_array(): 42 | child_tif = tif.get_ptrarr_object() 43 | element = lift_tif(child_tif) 44 | 45 | count = tif.get_array_nelems() 46 | if count == 0: 47 | # an array with an indeterminate number of elements = type pointer 48 | tif.convert_array_to_ptr() 49 | return lift_tif(tif) 50 | 51 | return ir.ArrayType(element, count) 52 | 53 | case tif if tif.is_void(): 54 | return ir.VoidType() 55 | 56 | case tif if tif.is_udt(): 57 | udt_data = ida_typeinf.udt_type_data_t() 58 | tif.get_udt_details(udt_data) 59 | type_name = tif.get_type_name() 60 | context = ir.context.global_context 61 | 62 | if type_name not in context.identified_types: 63 | struct_t = context.get_identified_type(type_name) 64 | elementTypes = [] 65 | for idx in range(udt_data.size()): 66 | udt_member = udt_data.at(idx) 67 | element = lift_tif(udt_member.type) 68 | elementTypes.append(element) 69 | if tif.is_varstruct(): 70 | raise NotImplementedError(f"variadic structs not implemented: {tif}") 71 | struct_t.set_body(*elementTypes) 72 | return context.get_identified_type(type_name) 73 | 74 | case tif if tif.is_bool(): 75 | return ir.IntType(1) 76 | 77 | case tif if tif.is_float(): 78 | return ir.FloatType() 79 | 80 | case tif if tif.is_double(): 81 | return ir.DoubleType() 82 | 83 | case _: 84 | byte_size = tif.get_size() 85 | # naieve assumption that system is either 32 bit or 64 bit 86 | bitness = 32 87 | if ida_idaapi.get_inf_structure().is_64bit(): 88 | bitness = 64 89 | if byte_size == (1 << bitness) - 1: 90 | byte_size = 1 91 | return ir.IntType(byte_size * 8) 92 | 93 | def typecast(src: ir.Value, dst_type: ir.Type, builder: ir.IRBuilder, signed: bool = False) -> ir.Value: 94 | """ 95 | Given some `src`, convert it to type `dst_type`. 96 | Instructions are emitted into `builder`. 97 | 98 | :param src: value to convert 99 | :type src: ir.Value 100 | :param dst_type: destination type 101 | :type dst_type: ir.Type 102 | :param builder: builds instructions 103 | :type builder: ir.IRBuilder 104 | :param signed: whether to preserve signness, defaults to True 105 | :type signed: bool, optional 106 | :raises NotImplementedError: type conversion not supported 107 | :return: value after typecast 108 | :rtype: ir.Value 109 | """ 110 | if src.type != dst_type: 111 | match (src, dst_type): 112 | case (src, dst_type) if isinstance(src.type, ir.PointerType) and isinstance(dst_type, ir.PointerType): 113 | return builder.bitcast(src, dst_type) 114 | case (src, dst_type) if isinstance(src.type, ir.PointerType) and isinstance(dst_type, ir.IntType): 115 | return builder.ptrtoint(src, dst_type) 116 | case (src, dst_type) if isinstance(src.type, ir.IntType) and isinstance(dst_type, ir.PointerType): 117 | return builder.inttoptr(src, dst_type) 118 | 119 | case (src, dst_type) if isinstance(src.type, ir.IntType) and isinstance(dst_type, ir.FloatType): 120 | return builder.uitofp(src, dst_type) 121 | case (src, dst_type) if isinstance(src.type, ir.FloatType) and isinstance(dst_type, ir.IntType): 122 | return builder.fptoui(src, dst_type) 123 | case (src, dst_type) if isinstance(src.type, ir.FloatType) and isinstance(dst_type, ir.FloatType): 124 | return src 125 | 126 | case (src, dst_type) if isinstance(src.type, ir.IntType) and isinstance(dst_type, ir.IntType) and src.type.width < dst_type.width: 127 | if signed: 128 | return builder.sext(src, dst_type) 129 | else: 130 | return builder.zext(src, dst_type) 131 | case (src, dst_type) if isinstance(src.type, ir.IntType) and isinstance(dst_type, ir.IntType) and src.type.width > dst_type.width: 132 | return builder.trunc(src, dst_type) 133 | case _: 134 | raise NotImplementedError(f"cannot convert {src} of type {src.type} into {dst_type}") 135 | return src -------------------------------------------------------------------------------- /ida2llvm/address.py: -------------------------------------------------------------------------------- 1 | import ida_bytes 2 | import ida_hexrays 3 | import ida_segment 4 | import ida_typeinf 5 | import ida_nalt 6 | import ida_name 7 | import ida_funcs 8 | import ida_idaapi 9 | import logging 10 | import struct 11 | import llvmlite.binding as llvm 12 | 13 | from llvmlite import ir 14 | import ida2llvm 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | def lift_type_from_address(ea: int): 19 | if ida_segment.segtype(ea) & ida_segment.SEG_XTRN: 20 | # let's assume its a function that returns ONE register and takes in variadic arguments 21 | ida_func_details = ida_typeinf.func_type_data_t() 22 | void = ida_typeinf.tinfo_t() 23 | void.create_simple_type(ida_typeinf.BTF_VOID) 24 | ida_func_details.rettype = void 25 | ida_func_details.cc = ida_typeinf.CM_CC_ELLIPSIS | ida_typeinf.CC_CDECL_OK 26 | 27 | function_tinfo = ida_typeinf.tinfo_t() 28 | function_tinfo.create_func(ida_func_details) 29 | return function_tinfo 30 | 31 | if (func := ida_funcs.get_func(ea)) is not None: 32 | ida_hf = ida_hexrays.hexrays_failure_t() 33 | tif = ida_hexrays.decompile(func, ida_hf).type 34 | ida_nalt.set_tinfo(ea, tif) 35 | 36 | tif = ida_typeinf.tinfo_t() 37 | ida_nalt.get_tinfo(tif, ea) 38 | if tif.empty(): 39 | raise NotImplementedError(f"not implemented: type inference for object at {hex(ea)}") 40 | return tif 41 | 42 | def lift_from_address(module: ir.Module, ea: int, typ: ir.Type = None): 43 | if typ is None: 44 | tif = lift_type_from_address(ea) 45 | typ = ida2llvm.type.lift_tif(tif) 46 | return _lift_from_address(module, ea, typ) 47 | 48 | def _lift_from_address(module: ir.Module, ea: int, typ: ir.Type): 49 | match typ: 50 | case typ if isinstance(typ, ir.FunctionType): 51 | ida_funcs.add_func(ea, ida_idaapi.BADADDR) 52 | func = ida_funcs.get_func(ea) 53 | func_name = ida_name.get_name(ea) 54 | res = module.get_global(func_name) 55 | res.lvars = dict() 56 | 57 | ida_hf = ida_hexrays.hexrays_failure_t() 58 | ida_mbr = ida_hexrays.mba_ranges_t() 59 | ida_mbr.ranges.push_back(func) 60 | 61 | lvars = ida_hexrays.decompile(func, ida_hf).lvars 62 | mba = ida_hexrays.gen_microcode(ida_mbr, ida_hf, None, 63 | ida_hexrays.DECOMP_ALL_BLKS, 64 | ida_hexrays.MMAT_LVARS) 65 | 66 | for index in range(mba.qty): 67 | res.append_basic_block(name = f"@{index}") 68 | 69 | ida_func_details = ida_typeinf.func_type_data_t() 70 | tif = lift_type_from_address(ea) 71 | tif.get_func_details(ida_func_details) 72 | names = [ida_func_details.at(i).name for i in range(ida_func_details.size())] 73 | 74 | builder = ir.IRBuilder(res.entry_basic_block) 75 | 76 | with builder.goto_entry_block(): 77 | # declare function arguments as stack variables 78 | for arg, arg_t, arg_n in zip(res.args, typ.args, names): 79 | res.lvars[arg_n] = builder.alloca(arg_t, name = arg_n) 80 | 81 | # declare function results as stack variable 82 | if not isinstance(typ.return_type, ir.VoidType): 83 | res.lvars["result"] = builder.alloca(typ.return_type, name = "result") 84 | 85 | # if function is variadic, declare va_start intrinsic 86 | if tif.is_vararg_cc() and typ.var_arg: 87 | ptr = builder.alloca(ir.IntType(8).as_pointer(), name = "ArgList") 88 | res.lvars["ArgList"] = ptr 89 | va_start = module.declare_intrinsic('llvm.va_start', fnty=ir.FunctionType(ir.VoidType(), [ir.IntType(8).as_pointer()])) 90 | ptr = builder.load(ptr) 91 | builder.call(va_start, (ptr, )) 92 | 93 | # store stack variables 94 | for arg, arg_n in zip(res.args, names): 95 | arg = ida2llvm.type.typecast(arg, res.lvars[arg_n].type.pointee, builder) 96 | builder.store(arg, res.lvars[arg_n]) 97 | 98 | with builder.goto_block(res.blocks[-1]): 99 | if isinstance(typ.return_type, ir.VoidType): 100 | builder.ret_void() 101 | else: 102 | builder.ret(builder.load(res.lvars["result"])) 103 | 104 | # lift each bblk in cfg 105 | for index, blk in enumerate(res.blocks): 106 | ida_blk = mba.get_mblock(index) 107 | 108 | ida_insn = ida_blk.head 109 | while ida_insn is not None: 110 | lifted_insn = ida2llvm.insn.lift_insn(ida_insn, blk, builder) 111 | logger.debug(f"=> {lifted_insn}") 112 | ida_insn = ida_insn.next 113 | 114 | if not blk.is_terminated and index + 1 < len(res.blocks): 115 | with builder.goto_block(blk): 116 | builder.branch(res.blocks[index + 1]) 117 | 118 | # if function is variadic, declare va_end intrinsic 119 | if tif.is_vararg_cc() and typ.var_arg: 120 | ptr = res.lvars["ArgList"] 121 | va_end = module.declare_intrinsic('llvm.va_end', fnty=ir.FunctionType(ir.VoidType(), [ir.IntType(8).as_pointer()])) 122 | with builder.goto_block(res.blocks[-1]): 123 | ptr = builder.load(ptr) 124 | builder.call(va_end, (ptr, )) 125 | 126 | return res 127 | 128 | # # define function return type 129 | # define_rettype() 130 | case typ if isinstance(typ, ir.IntType): 131 | # should probably check endianness 132 | r = ida_bytes.get_bytes(ea, typ.width // 8) 133 | return typ(int.from_bytes(r, "little")) 134 | case typ if isinstance(typ, ir.FloatType): 135 | # should probably check endianness 136 | # floats are not guaranteed to be 8 bytes long 137 | return typ(struct.unpack('f', ida_bytes.get_bytes(ea, 8))) 138 | case typ if isinstance(typ, ir.DoubleType): 139 | # should probably check endianness 140 | # doubles are not guaranteed to be 8 bytes long 141 | return typ(struct.unpack('d', ida_bytes.get_bytes(ea, 8))) 142 | case typ if isinstance(typ, ir.PointerType): 143 | # check if ea is valid 144 | if (ida_segment.segtype(ea) & ida_segment.SEG_BSS == ida_segment.SEG_BSS 145 | or isinstance(typ.pointee, ir.FunctionType)): 146 | val = ir.Constant(typ, None) 147 | else: 148 | assumedType = ir.ArrayType(typ.pointee, 1000) # we assume the pointer points to maximally 1000 elements 149 | val = lift_from_address(module, ea, assumedType) 150 | return val 151 | case typ if isinstance(typ, ir.ArrayType): 152 | td = llvm.create_target_data("e") 153 | subSize = typ.element.get_abi_size(td) 154 | 155 | return ir.Constant.literal_array([ lift_from_address(module, sub_ea, typ.element) 156 | for sub_ea in range(ea, ea + subSize * typ.count, subSize) 157 | ]) 158 | case typ if isinstance(typ, ir.LiteralStructType) or isinstance(typ, ir.IdentifiedStructType): 159 | td = llvm.create_target_data("e") 160 | sub_ea = ea 161 | structEles = [] 162 | for el in typ.elements: 163 | if isinstance(el, ir.PointerType): 164 | address_ea = ida_bytes.get_dword(sub_ea) 165 | if ida_idaapi.get_inf_structure().is_64bit(): 166 | address_ea = ida_bytes.get_qword(sub_ea) 167 | g_cmt = lift_from_address(module, address_ea, el) 168 | 169 | val = ir.GlobalVariable(module, g_cmt.type, f"{typ}_{hex(ea)}") 170 | val.initializer = g_cmt 171 | structEle = val.gep((ir.IntType(64)(0),)) 172 | else: 173 | structEle = lift_from_address(module, sub_ea, el) 174 | structEles.append(structEle) 175 | subSize = el.get_abi_size(td) 176 | sub_ea += subSize 177 | 178 | return ir.Constant.literal_struct(structEles) 179 | raise NotImplementedError(f"object at {hex(ea)} is of unsupported type {typ}") -------------------------------------------------------------------------------- /ida2llvm_plugin.py: -------------------------------------------------------------------------------- 1 | import ida_idaapi 2 | import ida_kernwin 3 | import idautils 4 | import ida_name 5 | import ida_funcs 6 | import ida_segment 7 | 8 | from PyQt5 import QtCore, QtWidgets, QtGui 9 | 10 | def PLUGIN_ENTRY(): 11 | return IDA2LLVMPlugin() 12 | 13 | class IDA2LLVMPlugin(ida_idaapi.plugin_t): 14 | 15 | flags = ida_idaapi.PLUGIN_PROC | ida_idaapi.PLUGIN_HIDE 16 | comment = "Microcode Lifter to LLVM" 17 | help = "" 18 | wanted_name = "IDA2LLVM" 19 | wanted_hotkey = "" 20 | 21 | def init(self): 22 | 23 | action = { 24 | 'id': 'ida2llvm:view_lifting', 25 | 'name': 'Lifting Viewer', 26 | 'hotkey': 'Ctrl-Alt-L', 27 | 'comment': 'UI for which function to lift', 28 | 'menu_location': 'Edit/IDA2LLVM/Viewer' 29 | } 30 | if not ida_kernwin.register_action(ida_kernwin.action_desc_t( 31 | action['id'], 32 | action['name'], # The name the user sees 33 | IDA2LLVMController(), # The function to call 34 | action['hotkey'], # A shortcut, if any (optional) 35 | action['comment'], # A comment, if any (optional) 36 | -1 37 | )): 38 | print("ida2llvm: failed to register action") 39 | 40 | if not ida_kernwin.attach_action_to_menu( 41 | action['menu_location'], # The menu location 42 | action['id'], # The unique function ID 43 | 0): 44 | print("ida2llvm: failed to attach to menu") 45 | 46 | return ida_idaapi.PLUGIN_KEEP 47 | 48 | def run(self, arg): 49 | ida_kernwin.warning("%s cannot be run as a script in IDA." % self.wanted_name) 50 | 51 | def term(self): 52 | pass 53 | 54 | class IDA2LLVMController(ida_kernwin.action_handler_t): 55 | """ 56 | The control component of BinaryLift Explorer. 57 | """ 58 | def __init__(self): 59 | from llvmlite import ir 60 | ida_kernwin.action_handler_t.__init__(self) 61 | self.current_address = None 62 | 63 | class AddressHook(ida_kernwin.UI_Hooks): 64 | def __init__(self, controller): 65 | ida_kernwin.UI_Hooks.__init__(self) 66 | self.controller = controller 67 | def database_inited(self, is_new_database, idc_script): 68 | self.controller.cache = dict() 69 | self.controller.namecache = dict() 70 | self.controller.config = dict() 71 | self.controller.m = ir.Module() 72 | def screen_ea_changed(self, ea, prev_ea): 73 | self.controller.screen_ea = ea 74 | self.controller.view.refresh() 75 | 76 | self._hook = AddressHook(self) 77 | self._hook.hook() 78 | self.view = IDA2LLVMView(self) 79 | 80 | def activate(self, ctx): 81 | self.view.Show() 82 | return 1 83 | 84 | def update(self, ctx): 85 | return ida_kernwin.AST_ENABLE_ALWAYS 86 | 87 | def isScreenEaInvalid(self): 88 | return ida_funcs.get_func(self.screen_ea) is None 89 | 90 | def resolveName(self, current_address): 91 | func_name = ida_name.get_name(current_address) 92 | if func_name != self.namecache.get(current_address, None): 93 | print("NAME NOT SYNCED, PROBABLY CHANGED") 94 | self.namecache[current_address] = func_name 95 | return self.namecache[current_address] 96 | 97 | def declareCurrentFunction(self, isDeclare): 98 | current_name = self.resolveName(self.current_address) 99 | self.config[self.current_address] = bool(isDeclare) 100 | self.cache[self.current_address] = self.getLiftedText() 101 | self.view.refresh() 102 | 103 | def updateFunctionSelected(self, selectName): 104 | if selectName == "": 105 | return 106 | current_address, _ = selectName.split(":", maxsplit=1) 107 | self.current_address = int(current_address, 16) 108 | current_name = self.resolveName(self.current_address) 109 | ida_kernwin.jumpto(self.current_address) 110 | ida_kernwin.activate_widget(self.view._twidget, True) 111 | 112 | def insertAllFunctions(self): 113 | for f_ea in idautils.Functions(): 114 | name = ida_funcs.get_func_name(f_ea) 115 | if (ida_funcs.get_func(f_ea).flags & ida_funcs.FUNC_LIB 116 | or ida_segment.segtype(f_ea) & ida_segment.SEG_XTRN 117 | or name.startswith("_")): 118 | continue 119 | self.insertFunctionAtEa(f_ea) 120 | 121 | def insertFunctionAtScreenEa(self): 122 | if self.isScreenEaInvalid(): 123 | return 124 | self.current_address = ida_funcs.get_func(self.screen_ea).start_ea 125 | self.insertFunctionAtEa(self.current_address) 126 | self.view.refresh() 127 | 128 | def insertFunctionAtEa(self, ea): 129 | temp_ea = self.current_address 130 | self.current_address = ea 131 | current_name = self.resolveName(self.current_address) 132 | 133 | if self.current_address not in self.config: 134 | self.config[self.current_address] = False 135 | 136 | self.cache[self.current_address] = self.getLiftedText() 137 | self.current_address = temp_ea 138 | 139 | def removeFromModule(self, func_name): 140 | from contextlib import suppress 141 | from llvmlite import ir 142 | with suppress(KeyError): 143 | old_func = self.m.globals[func_name] 144 | _m = ir.Module() 145 | for name, gv in self.m.globals.items(): 146 | if name != func_name: 147 | gv.parent = _m 148 | _m.add_global(gv) 149 | self.m = _m 150 | 151 | def getLiftedText(self): 152 | import ida2llvm 153 | 154 | func_name = self.resolveName(self.current_address) 155 | isDeclare = self.config[self.current_address] 156 | self.removeFromModule(func_name) 157 | llvm_f = ida2llvm.function.lift_function(self.m, func_name, isDeclare) 158 | 159 | for f in self.m.functions: 160 | f_name = f.name 161 | f_ea = ida_name.get_name_ea(ida_idaapi.BADADDR, f_name) 162 | self.namecache[f_ea] = f_name 163 | self.config[f_ea] = f.is_declaration 164 | self.cache[f_ea] = str(f) 165 | 166 | name = f"{hex(f_ea)}: {f_name}" 167 | if not self.view.function_list.findItems(name, QtCore.Qt.MatchExactly): 168 | self.view.function_list.addItem(name) 169 | 170 | return str(llvm_f) 171 | 172 | def save_to_file(self): 173 | filename, _ = QtWidgets.QFileDialog.getSaveFileName(None, 'Save Lifted LLVM IR', '', 'LLVM IR (*.ll)') 174 | if filename: 175 | with open(filename, 'w') as f: 176 | f.write(str(self.m)) 177 | 178 | class IDA2LLVMView(ida_kernwin.PluginForm): 179 | """ 180 | The view component of BinaryLift Explorer. 181 | """ 182 | def __init__(self, controller): 183 | ida_kernwin.PluginForm.__init__(self) 184 | self.controller = controller 185 | self.created = False 186 | 187 | def Show(self): 188 | return ida_kernwin.PluginForm.Show( 189 | self, "IDA2LLVM Viewer", 190 | options=(ida_kernwin.PluginForm.WOPN_PERSIST | 191 | ida_kernwin.PluginForm.WCLS_SAVE | 192 | ida_kernwin.PluginForm.WOPN_MENU | 193 | ida_kernwin.PluginForm.WOPN_RESTORE | 194 | ida_kernwin.PluginForm.WOPN_TAB)) 195 | 196 | def refresh(self): 197 | if not self.created: 198 | return 199 | self.lifting_settings.setDisabled(self.function_list.currentRow() == -1) 200 | self.curr_ea_button.setDisabled(self.controller.isScreenEaInvalid()) 201 | if not self.controller.isScreenEaInvalid(): 202 | self.curr_ea_button.setText(f"{'Redefine' if ida_funcs.get_func(self.controller.screen_ea).start_ea in self.controller.config else 'Add'} function at current address ({hex(self.controller.screen_ea)})") 203 | if self.controller.current_address: 204 | self.isDeclare.setChecked(self.controller.config[self.controller.current_address]) 205 | self.code_view.setText(self.controller.cache[self.controller.current_address]) 206 | 207 | def create_code_view(self): 208 | self.code_view = QtWidgets.QTextEdit(self.widget) 209 | 210 | def create_function_settings(self): 211 | self.isDeclare = QtWidgets.QCheckBox("Keep function as declare-only") 212 | self.isDeclare.setChecked(False) 213 | self.isDeclare.stateChanged.connect(self.controller.declareCurrentFunction) 214 | 215 | self.lifting_settings = QtWidgets.QGroupBox("Lift Settings") 216 | layout = QtWidgets.QVBoxLayout() 217 | layout.addWidget(self.isDeclare) 218 | self.lifting_settings.setLayout(layout) 219 | 220 | def create_function_list(self): 221 | controller = self.controller 222 | class FunctionListWidget(QtWidgets.QListWidget): 223 | def __init__(self, parent, *args, **kwargs): 224 | super().__init__(parent, *args, **kwargs) 225 | 226 | for address in controller.config: 227 | current_name = controller.resolveName(address) 228 | self.addItem(f"{hex(address)}: {current_name}") 229 | def keyPressEvent(self, event): 230 | if event.key() == QtCore.Qt.Key_Delete: 231 | row = self.currentRow() 232 | item = self.takeItem(row) 233 | address, name = item.text().split(": ", maxsplit=1) 234 | address = int(address, 16) 235 | controller.removeFromModule(name) 236 | del controller.cache[address] 237 | del controller.namecache[address] 238 | del controller.config[address] 239 | del item 240 | else: 241 | super().keyPressEvent(event) 242 | self.function_list = FunctionListWidget(self.widget) 243 | self.function_list.setSortingEnabled(True) 244 | self.function_list.currentTextChanged.connect(self.controller.updateFunctionSelected) 245 | 246 | def OnCreate(self, form): 247 | self._twidget = self.GetWidget() 248 | self.widget = self.FormToPyQtWidget(form) 249 | layout = QtWidgets.QGridLayout(self.widget) 250 | 251 | self.curr_ea_button = QtWidgets.QPushButton("Add function at current address", self.widget) 252 | self.all_functions_button = QtWidgets.QPushButton("Add all IDA-defined functions", self.widget) 253 | self.lift_button = QtWidgets.QPushButton("Lift and save to file", self.widget) 254 | 255 | self.curr_ea_button.clicked.connect(self.controller.insertFunctionAtScreenEa) 256 | self.all_functions_button.clicked.connect(self.controller.insertAllFunctions) 257 | self.lift_button.clicked.connect(self.controller.save_to_file) 258 | 259 | self.create_code_view() 260 | self.create_function_settings() 261 | self.create_function_list() 262 | 263 | # arrange the widgets in a 'grid' row col row span col span 264 | layout.addWidget(self.code_view, 0, 0, 0, 1) 265 | layout.addWidget(self.function_list, 0, 1, 1, 1) 266 | layout.addWidget(self.lifting_settings, 1, 1, 1, 1) 267 | layout.addWidget(self.curr_ea_button, 2, 1, 1, 1) 268 | layout.addWidget(self.all_functions_button, 3, 1, 1, 1) 269 | layout.addWidget(self.lift_button, 4, 1, 1, 1) 270 | 271 | self.widget.setLayout(layout) 272 | self.created = True 273 | self.refresh() -------------------------------------------------------------------------------- /ida2llvm/function.py: -------------------------------------------------------------------------------- 1 | import ida_typeinf 2 | import ida_name 3 | import ida_idaapi 4 | import ida_segment 5 | import logging 6 | 7 | from llvmlite import ir 8 | from contextlib import suppress 9 | 10 | import ida2llvm 11 | 12 | logger = logging.getLogger(__name__) 13 | i8ptr = ir.IntType(8).as_pointer() 14 | 15 | def str2size(str_size: str): 16 | """ 17 | Converts a string representing memory size into its size in bits. 18 | 19 | :param str_size: string describing size 20 | :type str_size: str 21 | :return: size of string, in bits 22 | :rtype: int 23 | """ 24 | match str_size: 25 | case "byte": 26 | return 8 27 | case "word": 28 | return 16 29 | case "dword": 30 | return 32 31 | case "qword": 32 | return 64 33 | raise AssertionError("string size must be one of byte/word/dword/qword") 34 | 35 | def lift_intrinsic_function(module: ir.Module, func_name: str): 36 | """ 37 | Lifts IDA macros to corresponding LLVM intrinsics. 38 | 39 | Hexray's decompiler recognises higher-level functions at the Microcode level. 40 | Such ida_hexrays:mop_t objects are typed as ida_hexrays.mop_h (auxillary function member) 41 | 42 | This improves decompiler output, representing operations that cannot be mapped to nice C code 43 | (https://hex-rays.com/blog/igors-tip-of-the-week-67-decompiler-helpers/). 44 | 45 | For relevant #define macros, refer to IDA SDK: `defs.h` and `pro.h`. 46 | 47 | LLVM intrinsics have well known names and semantics and are required to follow certain restrictions. 48 | 49 | :param module: _description_ 50 | :type module: ir.Module 51 | :param func_name: _description_ 52 | :type func_name: str 53 | :raises NotImplementedError: _description_ 54 | :return: _description_ 55 | :rtype: _type_ 56 | """ 57 | # retrieve intrinsic function if it already exists 58 | with suppress(KeyError): 59 | return module.get_global(func_name) 60 | 61 | match func_name: 62 | case "strcpy": 63 | typ = ir.FunctionType(ir.VoidType(), (i8ptr, i8ptr)) 64 | f = ir.Function(module, typ, "strcpy") 65 | f.append_basic_block() 66 | builder = ir.IRBuilder(f.entry_basic_block) 67 | 68 | memcpy = module.declare_intrinsic('llvm.memcpy', [i8ptr, i8ptr, ir.IntType(64)]) 69 | 70 | logger.debug("TODO: fix strcpy naieve assumptions") 71 | dest, src = f.args 72 | length = ir.Constant(ir.IntType(64), 45) 73 | volatile = ir.Constant(ir.IntType(1), True) 74 | 75 | builder.call(memcpy, (dest, src, length, volatile)) 76 | builder.ret_void() 77 | return f 78 | 79 | case "__halt": 80 | fty = ir.FunctionType(ir.VoidType(), []) 81 | f = ir.Function(module, fty, "__halt") 82 | f.append_basic_block() 83 | builder = ir.IRBuilder(f.entry_basic_block) 84 | builder.asm(fty, "hlt", "", (), True) 85 | builder.ret_void() 86 | return f 87 | 88 | case func_name if func_name.startswith("__readfs"): 89 | _, size = func_name.split("__readfs") 90 | size = str2size(size) 91 | 92 | try: 93 | fs_reg = module.get_global("virtual_fs") 94 | except KeyError: 95 | fs_reg_typ = ir.ArrayType(ir.IntType(8), 65536) 96 | fs_reg = ir.GlobalVariable(module, fs_reg_typ, "virtual_fs") 97 | fs_reg.storage_class = "thread_local" 98 | fs_reg.initializer = fs_reg_typ(None) 99 | try: 100 | threadlocal_f = module.get_global('llvm.threadlocal.address') 101 | except KeyError: 102 | f_argty = (i8ptr, ) 103 | fnty = ir.FunctionType(i8ptr, f_argty) 104 | threadlocal_f = module.declare_intrinsic('llvm.threadlocal.address', f_argty, fnty) 105 | 106 | fty = ir.FunctionType(ir.IntType(size), [ir.IntType(32),]) 107 | 108 | f = ir.Function(module, fty, func_name) 109 | offset, = f.args 110 | f.append_basic_block() 111 | builder = ir.IRBuilder(f.entry_basic_block) 112 | fs_reg = ida2llvm.type.typecast(fs_reg, ir.IntType(8).as_pointer(), builder) 113 | threadlocal_address = builder.call(threadlocal_f, (fs_reg, )) 114 | pointer = builder.gep(threadlocal_address, (offset,)) 115 | pointer = ida2llvm.type.typecast(pointer, ir.IntType(size).as_pointer(), builder) 116 | res = builder.load(pointer) 117 | builder.ret(res) 118 | 119 | return f 120 | 121 | case func_name if func_name.startswith("__writefs"): 122 | _, size = func_name.split("__writefs") 123 | size = str2size(size) 124 | 125 | try: 126 | fs_reg = module.get_global("virtual_fs") 127 | except KeyError: 128 | fs_reg_typ = ir.ArrayType(ir.IntType(8), 65536) 129 | fs_reg = ir.GlobalVariable(module, fs_reg_typ, "virtual_fs") 130 | fs_reg.storage_class = "thread_local" 131 | fs_reg.initializer = fs_reg_typ(None) 132 | try: 133 | threadlocal_f = module.get_global('llvm.threadlocal.address') 134 | except KeyError: 135 | f_argty = (i8ptr, ) 136 | fnty = ir.FunctionType(i8ptr, f_argty) 137 | threadlocal_f = module.declare_intrinsic('llvm.threadlocal.address', f_argty, fnty) 138 | 139 | fty = ir.FunctionType(ir.VoidType(), [ir.IntType(32), ir.IntType(size)]) 140 | 141 | f = ir.Function(module, fty, func_name) 142 | offset, value = f.args 143 | f.append_basic_block() 144 | builder = ir.IRBuilder(f.entry_basic_block) 145 | fs_reg = ida2llvm.type.typecast(fs_reg, ir.IntType(8).as_pointer(), builder) 146 | threadlocal_address = builder.call(threadlocal_f, (fs_reg, )) 147 | pointer = builder.gep(threadlocal_address, (offset,)) 148 | pointer = ida2llvm.type.typecast(pointer, ir.IntType(size).as_pointer(), builder) 149 | builder.store(value, pointer) 150 | builder.ret_void() 151 | 152 | return f 153 | 154 | case func_name if func_name.startswith("sys_"): 155 | fty = ir.FunctionType(ir.IntType(64), [], var_arg=True) 156 | f = ir.Function(module, fty, func_name) 157 | return f 158 | 159 | case func_name if func_name.startswith("_InterlockedCompareExchange") or func_name.startswith("_InterlockedExchange"): 160 | fty = ir.FunctionType(ir.IntType(64), [], var_arg=True) 161 | f = ir.Function(module, fty, func_name) 162 | return f 163 | 164 | # case "memset": 165 | # # ida_pro: (dest, src, length) 166 | # # llvmintrinsic: (dest, src, length, isvolatile=True) 167 | # typ = ir.FunctionType(ir.VoidType(), (i8ptr, ir.IntType(8), ir.IntType(64))) 168 | # f = declare_function("_h_memset", typ) 169 | 170 | # if not f.is_defined(): 171 | # f.llvm_f.append_basic_block() 172 | # f.builder = ir.IRBuilder(f.llvm_f.entry_basic_block) 173 | 174 | # memset = module.declare_intrinsic('llvm.memset', [i8ptr, ir.IntType(64)]) 175 | 176 | # dest, src, length = f.llvm_f.args 177 | # volatile = ir.Constant(ir.IntType(1), True) 178 | # f.builder.call(memset, (dest, src, length, volatile)) 179 | # f.builder.ret_void() 180 | # return f.llvm_f 181 | 182 | # case "__ROL8__": 183 | # # ida_pro: (item, shiftamount) 184 | # # llvmintrinsic: (left, right, shiftamount) funnelshiftright is equal to rotateright iff left and right operands are equal. 185 | # typ = ir.FunctionType(ir.IntType(64), (ir.IntType(64), ir.IntType(8))) 186 | # f = declare_function("_h_rol8", typ) 187 | 188 | # if not f.is_defined(): 189 | # f.llvm_f.append_basic_block() 190 | # f.builder = ir.IRBuilder(f.llvm_f.entry_basic_block) 191 | # rol_func_type = ir.FunctionType(ir.IntType(64), (ir.IntType(64), ir.IntType(64), ir.IntType(64))) 192 | # rol8 = module.declare_intrinsic('llvm.fshl.i64', [ir.IntType(64), ir.IntType(64), ir.IntType(64)], rol_func_type) 193 | 194 | # item, shiftamount = f.llvm_f.args 195 | # shiftamount = llvmgen.ida2llvm.type.change_type(f.builder, shiftamount, ir.IntType(64)) 196 | # return_val = f.builder.call(rol8, (item, item, shiftamount)) 197 | # f.builder.ret(return_val) 198 | # return f.llvm_f 199 | 200 | # case "_byteswap_uint64": 201 | # # ida_pro: (item) 202 | # # llvmintrinsic: (item) 203 | # typ = ir.FunctionType(ir.IntType(64), (ir.IntType(64),)) 204 | # f = declare_function("_h_byteswap_uint64", typ) 205 | 206 | # if not f.is_defined(): 207 | # f.llvm_f.append_basic_block() 208 | # f.builder = ir.IRBuilder(f.llvm_f.entry_basic_block) 209 | # byteswap_func_type = ir.FunctionType(ir.IntType(64), (ir.IntType(64),)) 210 | # byteswap64 = module.declare_intrinsic('llvm.bswap.i64', [ir.IntType(64),], byteswap_func_type) 211 | 212 | # item, = f.llvm_f.args 213 | # return_val = f.builder.call(byteswap64, (item,)) 214 | # f.builder.ret(return_val) 215 | # return f.llvm_f 216 | 217 | # case "_byteswap_ulong": 218 | # # ida_pro: (item) 219 | # # llvmintrinsic: (item) 220 | # typ = ir.FunctionType(ir.IntType(32), (ir.IntType(32),)) 221 | # f = declare_function("_h_byteswap_uint32", typ) 222 | 223 | # if not f.is_defined(): 224 | # f.llvm_f.append_basic_block() 225 | # f.builder = ir.IRBuilder(f.llvm_f.entry_basic_block) 226 | # byteswap_func_type = ir.FunctionType(ir.IntType(32), (ir.IntType(32),)) 227 | # byteswap32 = module.declare_intrinsic('llvm.bswap.i32', [ir.IntType(32),], byteswap_func_type) 228 | 229 | # item, = f.llvm_f.args 230 | # return_val = f.builder.call(byteswap32, (item,)) 231 | # f.builder.ret(return_val) 232 | # return f.llvm_f 233 | 234 | case _: 235 | raise NotImplementedError(f"NotImplementedError {func_name}") 236 | 237 | def lift_function(module: ir.Module, func_name: str, is_declare: bool, tif: ida_typeinf.tinfo_t = None): 238 | """ 239 | Declares function given its name. 240 | If `is_declare` is False, also define the function by recursively. 241 | If `tif` is given, enforce function type as given. 242 | lifting its instructions in IDA decompiler output. 243 | Heavylifting is done in `lift_from_address`. 244 | 245 | :param module: parent module of function 246 | :type module: ir.Module 247 | :param func_name: name of function to lift 248 | :type func_name: str 249 | :param is_declare: is the function declare only? 250 | :type is_declare: bool 251 | :param tif: function type, defaults to None 252 | :type tif: ida_typeinf.tinfo_t, optional 253 | :return: lifted function 254 | :rtype: ir.Function 255 | """ 256 | from contextlib import suppress 257 | with suppress(NotImplementedError): 258 | return lift_intrinsic_function(module, func_name) 259 | 260 | with suppress(KeyError): 261 | return module.get_global(func_name) 262 | 263 | func_ea = ida_name.get_name_ea(ida_idaapi.BADADDR, func_name) 264 | if ida_segment.segtype(func_ea) & ida_segment.SEG_XTRN: 265 | is_declare = True 266 | 267 | assert func_ea != ida_idaapi.BADADDR 268 | if tif is None: 269 | tif = ida2llvm.address.lift_type_from_address(func_ea) 270 | typ = ida2llvm.type.lift_tif(tif) 271 | res = ir.Function(module, typ, func_name) 272 | logger.debug(f"lifting function {func_name} at {hex(func_ea)}, type: {typ} from {tif}") 273 | 274 | # rename all function args to arg0, arg1, arg2, if does not exist 275 | ida_func_details = ida_typeinf.func_type_data_t() 276 | tif.get_func_details(ida_func_details) 277 | 278 | ida_args = (ida_func_details.at(i) for i in range(ida_func_details.size())) 279 | for i, arg in enumerate(ida_args): 280 | arg.name = f"arg{i}" 281 | function_tinfo = ida_typeinf.tinfo_t() 282 | function_tinfo.create_func(ida_func_details) 283 | if func_ea != 0xffffffffffffffff: 284 | ida_typeinf.apply_tinfo(func_ea, function_tinfo, ida_typeinf.TINFO_DEFINITE) 285 | 286 | if is_declare: 287 | return res 288 | return ida2llvm.address.lift_from_address(module, func_ea) 289 | -------------------------------------------------------------------------------- /ida2llvm/insn.py: -------------------------------------------------------------------------------- 1 | import ida_idaapi 2 | import ida_funcs 3 | import ida_hexrays 4 | import ida_typeinf 5 | import ida_segment 6 | import ida_nalt 7 | import ida_name 8 | import itertools 9 | import logging 10 | import llvmlite.binding as llvm 11 | 12 | from llvmlite import ir 13 | from contextlib import suppress 14 | import ida2llvm 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | def lift_mop(mop: ida_hexrays.mop_t, blk: ir.Block, builder: ir.IRBuilder) -> ir.Value: 19 | """Lifts an IDA mop_t object to LLVM Value. 20 | 21 | :param mop: mop_t object to lift 22 | :type mop: ida_hexrays.mop_t 23 | :param blk: LLVM Block to add instructions to 24 | :type blk: ir.Block 25 | :raises NotImplementedError: specific mop_t types are not implemented 26 | :param builder: builder to emit instructions into 27 | :type builder: ir.IRBuilder 28 | :return: lifted LLVM Value 29 | :rtype: ir.Value 30 | """ 31 | builder.position_at_end(blk) 32 | match mop.t: 33 | case ida_hexrays.mop_r: # register value 34 | # logger.warning("register lifting not implemented, None is returned") 35 | return None 36 | case ida_hexrays.mop_n: # immediate value 37 | res = ir.Constant(ir.IntType(mop.size * 8), mop.nnn.value) 38 | res.parent = blk 39 | return res 40 | case ida_hexrays.mop_d: # another instruction 41 | d = lift_insn(mop.d, blk, builder) 42 | td = llvm.create_target_data("e") 43 | match mop: 44 | case voidType if isinstance(d.type, ir.VoidType): 45 | pass 46 | case mcall if isinstance(mcall, ida_hexrays.mcallarg_t): 47 | lltype = ida2llvm.type.lift_tif(mop.type) 48 | d = ida2llvm.type.typecast(d, lltype, builder, signed=mop.type.is_signed()) 49 | case sizeEnforced if d.type.get_abi_size(td) != mop.size and mop.size != -1: 50 | d = ida2llvm.type.typecast(d, ir.IntType(mop.size * 8), builder) 51 | return d 52 | case ida_hexrays.mop_l: # local variables 53 | lvar_ref = mop.l 54 | lvar = lvar_ref.var() 55 | name = lvar.name 56 | off = lvar_ref.off 57 | 58 | if not lvar.has_user_type: 59 | ulvars = lvar_ref.mba.vars 60 | ulvar = ulvars.at(lvar_ref.idx) 61 | lvar.set_final_lvar_type(ulvar.tif) 62 | lvar.set_user_type() 63 | 64 | func = blk.parent 65 | if name not in func.lvars: 66 | with builder.goto_entry_block(): 67 | func.lvars[name] = builder.alloca(ida2llvm.type.lift_tif(lvar.tif), name = name) 68 | 69 | llvm_arg = func.lvars[name] 70 | 71 | if lvar.width != mop.size and mop.size != -1: 72 | mop_type = ir.IntType(mop.size * 8).as_pointer() 73 | llvm_arg = ida2llvm.type.typecast(llvm_arg, mop_type, builder) 74 | 75 | llvm_arg = ida2llvm._utils.get_offset_to(builder, llvm_arg, off) 76 | return builder.load(llvm_arg) 77 | case ida_hexrays.mop_S: # stack variables 78 | pass 79 | case ida_hexrays.mop_b: # block number (used in jmp\call instruction) 80 | return blk.parent.blocks[mop.b] 81 | case ida_hexrays.mop_v: # global variable 82 | ea = mop.g 83 | name = ida_name.get_name(ea) 84 | if name == "": 85 | name = f"g_{hex(ea)}" 86 | tif = ida_typeinf.tinfo_t() 87 | ida_nalt.get_tinfo(tif, ea) 88 | if tif.empty(): 89 | match mop.size: 90 | case 1: 91 | tif.create_simple_type(ida_typeinf.BT_UNK_BYTE) 92 | case 2: 93 | tif.create_simple_type(ida_typeinf.BT_UNK_WORD) 94 | case 4: 95 | tif.create_simple_type(ida_typeinf.BT_UNK_DWORD) 96 | case 8: 97 | tif.create_simple_type(ida_typeinf.BT_UNK_QWORD) 98 | case 16: 99 | tif.create_simple_type(ida_typeinf.BT_UNK_OWORD) 100 | case _: 101 | size = mop.size if mop.size > 0 else 1000 102 | onebyte_tif = ida_typeinf.tinfo_t() 103 | onebyte_tif.create_simple_type(ida_typeinf.BT_UNK_BYTE) 104 | tif.create_array(onebyte_tif, size, 0, ida_typeinf.BT_ARRAY) 105 | match tif: 106 | case func if func.is_func() or func.is_funcptr(): 107 | with suppress(KeyError): 108 | g = blk.parent.parent.get_global(name) 109 | typ = ida2llvm.type.lift_tif(tif).as_pointer() 110 | g = ida2llvm.type.typecast(g, typ, builder) 111 | return g 112 | if func.is_funcptr(): 113 | tif = tif.get_ptrarr_object() 114 | # if function is a thunk function, define the actual function instead 115 | if ((ida_funcs.get_func(ea) is not None) 116 | and (ida_funcs.get_func(ea).flags & ida_funcs.FUNC_THUNK)): 117 | tfunc_ea, ptr = ida_funcs.calc_thunk_func_target(ida_funcs.get_func(ea)) 118 | if tfunc_ea != ida_idaapi.BADADDR: 119 | ea = tfunc_ea 120 | name = ida_name.get_name(ea) 121 | 122 | # if no function definition, 123 | if ((ida_funcs.get_func(ea) is None) 124 | # or if the function is a library function, 125 | or (ida_funcs.get_func(ea).flags & ida_funcs.FUNC_LIB) 126 | # or if the function is declared in a XTRN segment, 127 | or ida_segment.segtype(ea) & ida_segment.SEG_XTRN): 128 | # return function declaration 129 | g = ida2llvm.function.lift_function(blk.parent.parent, name, True, tif) 130 | else: 131 | g = ida2llvm.function.lift_function(blk.parent.parent, name, False, tif) 132 | return g 133 | case _: 134 | with suppress(KeyError): 135 | g = blk.parent.parent.get_global(name) 136 | return builder.load(g) 137 | typ = ida2llvm.type.lift_tif(tif) 138 | g_cmt = ida2llvm.address.lift_from_address(blk.parent.parent, ea, typ) 139 | g = ir.GlobalVariable(blk.parent.parent, g_cmt.type, name = name) 140 | g.initializer = g_cmt 141 | td = llvm.create_target_data("e") 142 | if g.type.get_abi_size(td) != mop.size and mop.size != -1: 143 | g = ida2llvm.type.typecast(g, ir.IntType(mop.size * 8).as_pointer(), builder) 144 | return builder.load(g) 145 | case ida_hexrays.mop_f: # function call information 146 | mcallinfo = mop.f 147 | f_args = [] 148 | f_ret = [] 149 | for i in range(mcallinfo.retregs.size()): 150 | mopt = mcallinfo.retregs.at(i) 151 | f_ret.append(lift_mop(mopt, blk, builder)) 152 | for arg in mcallinfo.args: 153 | typ = ida2llvm.type.lift_tif(arg.type) 154 | f_arg = lift_mop(arg, blk, builder) 155 | f_arg = ida2llvm.type.typecast(f_arg, typ, builder) 156 | logger.debug(f"{f_arg} ({f_arg.type}) lifted from {arg} ({typ})") 157 | f_args.append(f_arg) 158 | return f_ret,f_args 159 | case ida_hexrays.mop_a: # operating number address (mop_l\mop_v\mop_S\mop_r) 160 | mop_addr = mop.a 161 | val = ida2llvm._utils.dedereference(lift_mop(mop_addr, blk, builder)) 162 | match mop: 163 | case mcall if isinstance(mcall, ida_hexrays.mcallarg_t): 164 | lltype = ida2llvm.type.lift_tif(mop.type) 165 | val = ida2llvm.type.typecast(val, lltype, builder) 166 | case mop_addr if isinstance(mop_addr, ida_hexrays.mop_addr_t): 167 | lltype = ida2llvm.type.lift_tif(mop.type) 168 | val = ida2llvm.type.typecast(val, lltype, builder) 169 | case _: 170 | lltype = ir.IntType(8).as_pointer() 171 | val = ida2llvm.type.typecast(val, lltype, builder) 172 | return val 173 | case ida_hexrays.mop_h: # auxiliary function number 174 | return ida2llvm.function.lift_intrinsic_function(blk.parent.parent, mop.helper) 175 | case ida_hexrays.mop_str: # string constant 176 | str_csnt = mop.cstr 177 | 178 | strType = ir.ArrayType(ir.IntType(8), len(str_csnt)) 179 | g = ir.GlobalVariable(blk.parent.parent, strType, name=str_csnt) 180 | g.initializer = ir.Constant(strType, bytearray(str_csnt.encode("utf-8"))) 181 | g.linkage = "private" 182 | g.global_constant = True 183 | return ida2llvm.type.typecast(g, ir.IntType(8).as_pointer(), builder) 184 | case ida_hexrays.mop_c: # switch case and target 185 | pass 186 | case ida_hexrays.mop_fn: # floating points constant 187 | pass 188 | case ida_hexrays.mop_p: # the number of operations is correct 189 | pass 190 | case ida_hexrays.mop_sc: # decentralized operation information 191 | pass 192 | case ida_hexrays.mop_z: # does not exist 193 | return None 194 | mop_descs = {ida_hexrays.mop_r: "register value", 195 | ida_hexrays.mop_n: "immediate value", 196 | ida_hexrays.mop_d: "another instruction", 197 | ida_hexrays.mop_l: "local variables", 198 | ida_hexrays.mop_S: "stack variables", 199 | ida_hexrays.mop_b: "block number (used in jmp\call instruction)", 200 | ida_hexrays.mop_v: "global variable", 201 | ida_hexrays.mop_f: "function call information", 202 | ida_hexrays.mop_a: "operating number address (mop_l\mop_v\mop_S\mop_r)", 203 | ida_hexrays.mop_h: "auxiliary function number", 204 | ida_hexrays.mop_str: "string constant", 205 | ida_hexrays.mop_c: "switch case and target", 206 | ida_hexrays.mop_fn: "floating points constant", 207 | ida_hexrays.mop_p: "the number of operations is correct", 208 | ida_hexrays.mop_sc: "decentralized operation information" 209 | } 210 | raise NotImplementedError(f"not implemented: {mop.dstr()} of type {mop_descs[mop.t]}") 211 | 212 | def _store_as(l: ir.Value, d: ir.Value, blk: ir.Block, builder: ir.IRBuilder, d_typ: ir.Type = None, signed: bool = True): 213 | """ 214 | Private helper function to store value to destination. 215 | """ 216 | if d is None: # destination does not exist 217 | return l 218 | 219 | d = ida2llvm._utils.dedereference(d) 220 | if d_typ: 221 | d = ida2llvm.type.typecast(d, d_typ, builder, signed) 222 | assert isinstance(d.type, ir.PointerType) 223 | 224 | if isinstance(d.type.pointee, ir.ArrayType): 225 | arrtoptr = d.type.pointee.element.as_pointer() 226 | d = ida2llvm.type.typecast(d, arrtoptr.as_pointer(), builder, signed) 227 | 228 | if isinstance(l.type, ir.VoidType): 229 | return 230 | 231 | l = ida2llvm.type.typecast(l, d.type.pointee, builder, signed) 232 | return builder.store(l, d) 233 | 234 | def lift_insn(ida_insn: ida_hexrays.minsn_t, blk: ir.Block, builder: ir.IRBuilder) -> ir.Instruction: 235 | """Heavylifter function that lifts a given IDA Microcode instruction. 236 | 237 | A given ida instruction could comprise multiple Instructions. 238 | Note that only final instruction is returned. 239 | Intermediate instructions emitted discretely into `blk`. 240 | 241 | :param ida_insn: IDA Microcode instruction 242 | :type ida_insn: ida_hexrays.minsn_t 243 | :param blk: LLVM Block to emit instructions into 244 | :type blk: ir.Block 245 | :raises NotImplementedError: m_add only supports addition between integers/pointers 246 | :raises NotImplementedError: m_sub only supports subtraction between integers/pointers 247 | :raises NotImplementedError: certain minsn_t have not been lifted yet 248 | :param builder: builder to emit instructions into 249 | :type builder: ir.IRBuilder 250 | :return: final instruction (intermediate instructions emitted are not returned) 251 | :rtype:ir.Instruction 252 | """ 253 | builder.position_at_end(blk) 254 | logger.debug(str(ida_insn.dstr())) 255 | l = lift_mop(ida_insn.l, blk, builder) 256 | r = lift_mop(ida_insn.r, blk, builder) 257 | d = lift_mop(ida_insn.d, blk, builder) 258 | logger.debug(f"{chr(10).join('-'+str(i) for i in (l,r,d) if i)}") 259 | blk_itr = iter(blk.parent.blocks) 260 | list(itertools.takewhile(lambda x: x.name != blk.name, blk_itr)) # consume iter 261 | next_blk = next(blk_itr, None) 262 | 263 | match ida_insn.opcode: 264 | case ida_hexrays.m_nop: # 0x00, nop no operation 265 | return 266 | case ida_hexrays.m_stx: # 0x01, stx l, {r=sel, d=off} store register to memory*F 267 | if d is None: # destination does not exist 268 | return l 269 | if isinstance(l.type, ir.VoidType): 270 | return 271 | 272 | if isinstance(d.type, ir.ArrayType): 273 | arrtoptr = d.type.element.as_pointer() 274 | d = ida2llvm.type.typecast(d, arrtoptr, builder, True) 275 | elif isinstance(d.type, ir.IntType): 276 | d = builder.inttoptr(d, l.type.as_pointer()) 277 | 278 | assert isinstance(d.type, ir.PointerType) 279 | l = ida2llvm.type.typecast(l, d.type.pointee, builder, True) 280 | return builder.store(l, d) 281 | case ida_hexrays.m_ldx: # 0x02, ldx {l=sel,r=off}, d load register from memory *F 282 | if not isinstance(r.type, ir.PointerType): 283 | register_size = 8*ida_insn.r.size 284 | r = ida2llvm.type.typecast(r, ir.IntType(register_size).as_pointer(), builder) 285 | r = builder.load(r) 286 | 287 | return _store_as(r, d, blk, builder) 288 | case ida_hexrays.m_ldc: # 0x03, ldc l=const,d load constant 289 | pass 290 | case ida_hexrays.m_mov: # 0x04, mov l, d move*F 291 | return _store_as(l, d, blk, builder) 292 | case ida_hexrays.m_neg: # 0x05, neg l, d negate 293 | l = builder.neg(l) 294 | return _store_as(l, d, blk, builder) 295 | case ida_hexrays.m_lnot: # 0x06, lnot l, d logical not 296 | assert isinstance(l.type, ir.IntType) 297 | cmp = builder.icmp_unsigned("==", l, ir.IntType(l.type.width)(0)) 298 | return _store_as(cmp, d, blk, builder) 299 | case ida_hexrays.m_bnot: # 0x07, bnot l, d bitwise not 300 | l = builder.xor(l.type(pow(l, l.type.width) - 1), l) 301 | return _store_as(l, d, blk, builder) 302 | case ida_hexrays.m_xds: # 0x08, xds l, d extend (signed) 303 | return _store_as(l, d, blk, builder) 304 | case ida_hexrays.m_xdu: # 0x09, xdu l, d extend (unsigned) 305 | return _store_as(l, d, blk, builder, signed=False) 306 | case ida_hexrays.m_low: # 0x0A, low l, d take low part 307 | return _store_as(l, d, blk, builder) 308 | case ida_hexrays.m_high: # 0x0B, high l, d take high part 309 | return _store_as(l, d, blk, builder) 310 | case ida_hexrays.m_add: # 0x0C, add l, r, d l + r -> dst 311 | match (l, r): 312 | case (ptr, const) if isinstance(ptr.type, ir.PointerType) and isinstance(const.type, ir.IntType): 313 | castPtr = builder.bitcast(ptr, ir.IntType(8).as_pointer()) 314 | math = builder.gep(castPtr, (const, )) 315 | math = builder.bitcast(math, ptr.type) 316 | case (const, ptr) if isinstance(ptr.type, ir.PointerType) and isinstance(const.type, ir.IntType): 317 | castPtr = builder.bitcast(ptr, ir.IntType(8).as_pointer()) 318 | math = builder.gep(castPtr, (const, )) 319 | math = builder.bitcast(math, ptr.type) 320 | case (const1, const2) if isinstance(const1.type, ir.IntType) and isinstance(const2.type, ir.IntType): 321 | math = builder.add(const1, const2) 322 | case (ptr1, ptr2) if isinstance(ptr1.type, ir.PointerType) and isinstance(ptr2.type, ir.PointerType): 323 | ptrType = ir.IntType(64) # get pointer type 324 | const1 = builder.ptrtoint(ptr1, ptrType) 325 | const2 = builder.ptrtoint(ptr2, ptrType) 326 | math = builder.add(const1, const2) 327 | case _: 328 | raise NotImplementedError("expected addition between pointer/integers") 329 | return _store_as(math, d, blk, builder) 330 | case ida_hexrays.m_sub: # 0x0D, sub l, r, d l - r -> dst 331 | match (l, r): 332 | case (ptr, const) if isinstance(ptr.type, ir.PointerType) and isinstance(const.type, ir.IntType): 333 | const.constant *= -1 334 | castPtr = builder.bitcast(ptr, ir.IntType(8).as_pointer()) 335 | math = builder.gep(castPtr, (const, )) 336 | math = builder.bitcast(math, ptr.type) 337 | case (const, ptr) if isinstance(ptr.type, ir.PointerType) and isinstance(const.type, ir.IntType): 338 | const.constant *= -1 339 | castPtr = builder.bitcast(ptr, ir.IntType(8).as_pointer()) 340 | math = builder.gep(castPtr, (const, )) 341 | math = builder.bitcast(math, ptr.type) 342 | case (const1, const2) if isinstance(const1.type, ir.IntType) and isinstance(const2.type, ir.IntType): 343 | math = builder.sub(const1, const2) 344 | case (ptr1, ptr2) if isinstance(ptr1.type, ir.PointerType) and isinstance(ptr2.type, ir.PointerType): 345 | ptrType = ir.IntType(64) # get pointer type 346 | const1 = builder.ptrtoint(ptr1, ptrType) 347 | const2 = builder.ptrtoint(ptr2, ptrType) 348 | math = builder.sub(const1, const2) 349 | case _: 350 | raise NotImplementedError("expected subtraction between pointer/integers") 351 | return _store_as(math, d, blk, builder) 352 | case ida_hexrays.m_mul: # 0x0E, mul l, r, d l * r -> dst 353 | math = builder.mul(l, r) 354 | return _store_as(math, d, blk, builder) 355 | case ida_hexrays.m_udiv: # 0x0F, udiv l, r, d l / r -> dst 356 | r = ida2llvm.type.typecast(r, l.type, builder) 357 | math = builder.udiv(l, r) 358 | return _store_as(math, d, blk, builder) 359 | case ida_hexrays.m_sdiv: # 0x10, sdiv l, r, d l / r -> dst 360 | r = ida2llvm.type.typecast(r, l.type, builder) 361 | math = builder.sdiv(l, r) 362 | return _store_as(math, d, blk, builder) 363 | case ida_hexrays.m_umod: # 0x11, umod l, r, d l % r -> dst 364 | r = ida2llvm.type.typecast(r, l.type, builder) 365 | math = builder.urem(l, r) 366 | return _store_as(math, d, blk, builder) 367 | case ida_hexrays.m_smod: # 0x12, smod l, r, d l % r -> dst 368 | r = ida2llvm.type.typecast(r, l.type, builder) 369 | math = builder.srem(l, r) 370 | return _store_as(math, d, blk, builder) 371 | case ida_hexrays.m_or: # 0x13, or l, r, d bitwise or 372 | r = ida2llvm.type.typecast(r, l.type, builder) 373 | math = builder.or_(l, r) 374 | return _store_as(math, d, blk, builder) 375 | case ida_hexrays.m_and: # 0x14, and l, r, d bitwise and 376 | r = ida2llvm.type.typecast(r, l.type, builder) 377 | math = builder.and_(l, r) 378 | return _store_as(math, d, blk, builder) 379 | case ida_hexrays.m_xor: # 0x15, xor l, r, d bitwise xor 380 | r = ida2llvm.type.typecast(r, l.type, builder) 381 | math = builder.xor(l, r) 382 | return _store_as(math, d, blk, builder) 383 | case ida_hexrays.m_shl: # 0x16, shl l, r, d shift logical left 384 | r = ida2llvm.type.typecast(r, l.type, builder) 385 | math = builder.shl(l, r) 386 | return _store_as(math, d, blk, builder) 387 | case ida_hexrays.m_shr: # 0x17, shr l, r, d shift logical right 388 | r = ida2llvm.type.typecast(r, l.type, builder) 389 | math = builder.ashr(l, r) 390 | return _store_as(math, d, blk, builder) 391 | case ida_hexrays.m_sar: # 0x18, sar l, r, d shift arithmetic right 392 | r = ida2llvm.type.typecast(r, l.type, builder) 393 | math = builder.ashr(l, r) 394 | return _store_as(math, d, blk, builder) 395 | case ida_hexrays.m_cfadd: # 0x19, cfadd l, r, d=carry calculate carry bit of (l+r) 396 | math = builder.sadd_with_overflow(l, r) # a { result, overflow bit } structure is returned 397 | math = math.gep((ir.IntType(64)(0), ir.IntType(64)(0))) 398 | return _store_as(math, d, blk, builder) 399 | case ida_hexrays.m_ofadd: # 0x1A, ofadd l, r, d=overf calculate overflow bit of (l+r) 400 | math = builder.sadd_with_overflow(l, r) # a { result, overflow bit } structure is returned 401 | math = math.gep((ir.IntType(64)(0), ir.IntType(64)(1))) 402 | return _store_as(math, d, blk, builder) 403 | case ida_hexrays.m_cfshl: # 0x1B, cfshl l, r, d=carry calculate carry bit of (l<>r) 406 | pass 407 | case ida_hexrays.m_sets: # 0x1D, sets l,d=byte SF=1Sign 408 | pass 409 | case ida_hexrays.m_seto: # 0x1E, seto l, r, d=byte OF=1Overflow of (l-r) 410 | pass 411 | case ida_hexrays.m_setp: # 0x1F, setp l, r, d=byte PF=1Unordered/Parity *F 412 | pass 413 | case ida_hexrays.m_setnz: # 0x20, setnz l, r, d=byte ZF=0Not Equal *F 414 | l = ida2llvm.type.typecast(l, ir.IntType(64), builder) 415 | r = ida2llvm.type.typecast(r, ir.IntType(64), builder) 416 | cond = builder.icmp_unsigned("!=", l, r) 417 | result = builder.select(cond, ir.IntType(1)(1), ir.IntType(1)(0)) 418 | return _store_as(result, d, blk, builder) 419 | case ida_hexrays.m_setz: # 0x21, setz l, r, d=byte ZF=1Equal *F 420 | l = ida2llvm.type.typecast(l, ir.IntType(64), builder) 421 | r = ida2llvm.type.typecast(r, ir.IntType(64), builder) 422 | cond = builder.icmp_unsigned("==", l, r) 423 | result = builder.select(cond, ir.IntType(1)(1), ir.IntType(1)(0)) 424 | return _store_as(result, d, blk, builder) 425 | case ida_hexrays.m_setae: # 0x22, setae l, r, d=byte CF=0Above or Equal *F 426 | l = ida2llvm.type.typecast(l, ir.IntType(64), builder) 427 | r = ida2llvm.type.typecast(r, ir.IntType(64), builder) 428 | cond = builder.icmp_unsigned(">=", l, r) 429 | result = builder.select(cond, ir.IntType(1)(1), ir.IntType(1)(0)) 430 | return _store_as(result, d, blk, builder) 431 | case ida_hexrays.m_setb: # 0x23, setb l, r, d=byte CF=1Below *F 432 | l = ida2llvm.type.typecast(l, ir.IntType(64), builder) 433 | r = ida2llvm.type.typecast(r, ir.IntType(64), builder) 434 | cond = builder.icmp_unsigned("<", l, r) 435 | result = builder.select(cond, ir.IntType(1)(1), ir.IntType(1)(0)) 436 | return _store_as(result, d, blk, builder) 437 | case ida_hexrays.m_seta: # 0x24, seta l, r, d=byte CF=0 & ZF=0 Above *F 438 | l = ida2llvm.type.typecast(l, ir.IntType(64), builder) 439 | r = ida2llvm.type.typecast(r, ir.IntType(64), builder) 440 | cond = builder.icmp_unsigned(">", l, r) 441 | result = builder.select(cond, ir.IntType(1)(1), ir.IntType(1)(0)) 442 | return _store_as(result, d, blk, builder) 443 | case ida_hexrays.m_setbe: # 0x25, setbe l, r, d=byte CF=1 | ZF=1 Below or Equal *F 444 | l = ida2llvm.type.typecast(l, ir.IntType(64), builder) 445 | r = ida2llvm.type.typecast(r, ir.IntType(64), builder) 446 | cond = builder.icmp_unsigned("<=", l, r) 447 | result = builder.select(cond, ir.IntType(1)(1), ir.IntType(1)(0)) 448 | return _store_as(result, d, blk, builder) 449 | case ida_hexrays.m_setg: # 0x26, setg l, r, d=byte SF=OF & ZF=0 Greater 450 | l = ida2llvm.type.typecast(l, ir.IntType(64), builder) 451 | r = ida2llvm.type.typecast(r, ir.IntType(64), builder) 452 | cond = builder.icmp_signed(">", l, r) 453 | result = builder.select(cond, ir.IntType(1)(1), ir.IntType(1)(0)) 454 | return _store_as(result, d, blk, builder) 455 | case ida_hexrays.m_setge: # 0x27, setge l, r, d=byte SF=OF Greater or Equal 456 | l = ida2llvm.type.typecast(l, ir.IntType(64), builder) 457 | r = ida2llvm.type.typecast(r, ir.IntType(64), builder) 458 | cond = builder.icmp_signed(">=", l, r) 459 | result = builder.select(cond, ir.IntType(1)(1), ir.IntType(1)(0)) 460 | return _store_as(result, d, blk, builder) 461 | case ida_hexrays.m_setl: # 0x28, setl l, r, d=byte SF!=OF Less 462 | l = ida2llvm.type.typecast(l, ir.IntType(64), builder) 463 | r = ida2llvm.type.typecast(r, ir.IntType(64), builder) 464 | cond = builder.icmp_signed("<", l, r) 465 | result = builder.select(cond, ir.IntType(1)(1), ir.IntType(1)(0)) 466 | return _store_as(result, d, blk, builder) 467 | case ida_hexrays.m_setle: # 0x29, setle l, r, d=byte SF!=OF | ZF=1 Less or Equal 468 | l = ida2llvm.type.typecast(l, ir.IntType(64), builder) 469 | r = ida2llvm.type.typecast(r, ir.IntType(64), builder) 470 | cond = builder.icmp_signed("<=", l, r) 471 | result = builder.select(cond, ir.IntType(1)(1), ir.IntType(1)(0)) 472 | return _store_as(result, d, blk, builder) 473 | case ida_hexrays.m_jcnd: # 0x2A, jcnd l, d d is mop_v or mop_b 474 | return builder.cbranch(l, d, next_blk) 475 | case ida_hexrays.m_jnz: # 0x2B, jnz l, r, d ZF=0Not Equal *F 476 | l = ida2llvm.type.typecast(l, ir.IntType(64), builder) 477 | r = ida2llvm.type.typecast(r, ir.IntType(64), builder) 478 | cond = builder.icmp_unsigned("!=", l, r) 479 | return builder.cbranch(cond, d, next_blk) 480 | case ida_hexrays.m_jz: # 0x2C, jzl, r, d ZF=1Equal*F 481 | l = ida2llvm.type.typecast(l, ir.IntType(64), builder) 482 | r = ida2llvm.type.typecast(r, ir.IntType(64), builder) 483 | cond = builder.icmp_unsigned("==", l, r) 484 | return builder.cbranch(cond, d, next_blk) 485 | case ida_hexrays.m_jae: # 0x2D, jae l, r, d CF=0Above or Equal *F 486 | l = ida2llvm.type.typecast(l, ir.IntType(64), builder) 487 | r = ida2llvm.type.typecast(r, ir.IntType(64), builder) 488 | cond = builder.icmp_unsigned(">=", l, r) 489 | return builder.cbranch(cond, d, next_blk) 490 | case ida_hexrays.m_jb: # 0x2E, jbl, r, d CF=1Below*F 491 | l = ida2llvm.type.typecast(l, ir.IntType(64), builder) 492 | r = ida2llvm.type.typecast(r, ir.IntType(64), builder) 493 | cond = builder.icmp_unsigned("<", l, r) 494 | return builder.cbranch(cond, d, next_blk) 495 | case ida_hexrays.m_ja: # 0x2F, jal, r, d CF=0 & ZF=0 Above*F 496 | l = ida2llvm.type.typecast(l, ir.IntType(64), builder) 497 | r = ida2llvm.type.typecast(r, ir.IntType(64), builder) 498 | cond = builder.icmp_unsigned(">", l, r) 499 | return builder.cbranch(cond, d, next_blk) 500 | case ida_hexrays.m_jbe: # 0x30, jbe l, r, d CF=1 | ZF=1 Below or Equal *F 501 | l = ida2llvm.type.typecast(l, ir.IntType(64), builder) 502 | r = ida2llvm.type.typecast(r, ir.IntType(64), builder) 503 | cond = builder.icmp_unsigned("<=", l, r) 504 | return builder.cbranch(cond, d, next_blk) 505 | case ida_hexrays.m_jg: # 0x31, jgl, r, d SF=OF & ZF=0 Greater 506 | l = ida2llvm.type.typecast(l, ir.IntType(64), builder) 507 | r = ida2llvm.type.typecast(r, ir.IntType(64), builder) 508 | cond = builder.icmp_signed(">", l, r) 509 | return builder.cbranch(cond, d, next_blk) 510 | case ida_hexrays.m_jge: # 0x32, jge l, r, d SF=OF Greater or Equal 511 | l = ida2llvm.type.typecast(l, ir.IntType(64), builder) 512 | r = ida2llvm.type.typecast(r, ir.IntType(64), builder) 513 | cond = builder.icmp_signed(">=", l, r) 514 | return builder.cbranch(cond, d, next_blk) 515 | case ida_hexrays.m_jl: # 0x33, jll, r, d SF!=OF Less 516 | l = ida2llvm.type.typecast(l, ir.IntType(64), builder) 517 | r = ida2llvm.type.typecast(r, ir.IntType(64), builder) 518 | cond = builder.icmp_signed("<", l, r) 519 | return builder.cbranch(cond, d, next_blk) 520 | case ida_hexrays.m_jle: # 0x34, jle l, r, d SF!=OF | ZF=1 Less or Equal 521 | l = ida2llvm.type.typecast(l, ir.IntType(64), builder) 522 | r = ida2llvm.type.typecast(r, ir.IntType(64), builder) 523 | cond = builder.icmp_signed("<=", l, r) 524 | return builder.cbranch(cond, d, next_blk) 525 | case ida_hexrays.m_jtbl: # 0x35, jtbl l, r=mcases Table jump 526 | pass 527 | case ida_hexrays.m_ijmp: # 0x36, ijmp {r=sel, d=off} indirect unconditional jump 528 | pass 529 | case ida_hexrays.m_goto: # 0x37, goto l l is mop_v or mop_b 530 | return builder.branch(l) 531 | case ida_hexrays.m_call: # 0x38, call ld l is mop_v or mop_b or mop_h 532 | rets, args = d 533 | for (i, llvmtype) in enumerate(l.type.pointee.args): 534 | args[i] = ida2llvm.type.typecast(args[i], llvmtype, builder) 535 | 536 | if l.type.pointee.var_arg: # function is variadic 537 | function = blk.parent 538 | if "ArgList" in function.lvars: 539 | logger.warning("nested variadic function detected, variadic arguments will not be passed properly") 540 | ltype = l.type.pointee 541 | newargs = list(ltype.args) 542 | for i in range(len(newargs), len(args)): 543 | newargs.append(args[i].type) 544 | new_func_type = ir.FunctionType(ltype.return_type, newargs, var_arg=True).as_pointer() 545 | # l = ida2llvm.type.typecast(l, new_func_type, builder) 546 | logger.debug(f"lifting call: {l.type} {d}") 547 | ret = builder.call(l, args) 548 | for dst in rets: 549 | _store_as(ret, dst, blk, builder) 550 | return ret 551 | case ida_hexrays.m_icall: # 0x39, icall {l=sel, r=off} d indirect call 552 | ftype = ir.FunctionType(ir.IntType(8).as_pointer(), (arg.type for arg in d)) 553 | f = ida2llvm.type.typecast(r, ftype.as_pointer(), builder) 554 | return builder.call(f, d) 555 | case ida_hexrays.m_ret: # 0x3A, ret 556 | pass 557 | case ida_hexrays.m_push: # 0x3B, push l 558 | pass 559 | case ida_hexrays.m_pop: # 0x3C, popd 560 | pass 561 | case ida_hexrays.m_und: # 0x3D, undd undefine 562 | pass 563 | case ida_hexrays.m_ext: # 0x3E, ext in1, in2, out1 external insn, not microcode *F 564 | pass 565 | case ida_hexrays.m_f2i: # 0x3F, f2il, d int(l) => d; convert fp -> integer +F 566 | pass 567 | case ida_hexrays.m_f2u: # 0x40, f2ul, d uint(l)=> d; convert fp -> uinteger +F 568 | pass 569 | case ida_hexrays.m_i2f: # 0x41, i2fl, d fp(l) => d; convert integer -> fp e +F 570 | pass 571 | case ida_hexrays.m_u2f: # 0x42, i2fl, d fp(l) => d; convert uinteger -> fp +F 572 | pass 573 | case ida_hexrays.m_f2f: # 0x43, f2fl, d l => d; change fp precision+F 574 | pass 575 | case ida_hexrays.m_fneg: # 0x44, fneg l, d -l=> d; change sign +F 576 | assert l.isfloat() and r.isfloat() 577 | math = builder.fadd(l, r) 578 | return _store_as(math, d, blk, builder) 579 | case ida_hexrays.m_fadd: # 0x45, fadd l, r, d l + r => d; add +F 580 | assert l.isfloat() and r.isfloat() 581 | math = builder.fadd(l, r) 582 | return _store_as(math, d, blk, builder) 583 | case ida_hexrays.m_fsub: # 0x46, fsub l, r, d l - r => d; subtract +F 584 | assert l.isfloat() and r.isfloat() 585 | math = builder.fsub(l, r) 586 | return _store_as(math, d, blk, builder) 587 | case ida_hexrays.m_fmul: # 0x47, fmul l, r, d l * r => d; multiply +F 588 | assert l.isfloat() and r.isfloat() 589 | math = builder.fmul(l, r) 590 | return _store_as(math, d, blk, builder) 591 | case ida_hexrays.m_fdiv: # 0x48, fdiv l, r, d l / r => d; divide +F 592 | assert l.isfloat() and r.isfloat() 593 | math = builder.fdiv(l, r) 594 | return _store_as(math, d, blk, builder) 595 | raise NotImplementedError(f"not implemented {ida_insn.dstr()}") --------------------------------------------------------------------------------