├── .gitignore ├── .gitmodules ├── README.md ├── examples ├── .gitignore ├── Makefile ├── all_equal │ ├── _pcode.h │ ├── all_equal │ ├── all_equal.asm │ ├── all_equal.c │ ├── all_equal_debug.c │ ├── all_equal_pcode.c │ ├── harness.c │ ├── notes │ ├── run.sh │ ├── trace │ └── x86.h ├── cmp │ ├── _pcode.h │ ├── cmpxchg.s │ ├── notes │ └── test.sh ├── dyn │ ├── _pcode.h │ ├── hello.c │ ├── notes │ ├── objdump │ ├── readelf │ ├── reljmp.s │ └── test.sh ├── enqueue │ └── enqueue.c ├── fact_iter │ ├── fact.c │ ├── fact.s │ └── test.sh ├── fact_rec │ └── fact.c ├── min │ ├── .gitignore │ ├── _pcode.h │ ├── harness.c │ ├── mymin.c │ └── run.sh ├── patch_challenge │ ├── _pcode.h │ ├── a.out.elf │ ├── a.out.s │ ├── a_patched.out │ ├── a_patched.out.s │ ├── hello.c │ ├── patch.py │ ├── pcode_delete_if.c │ └── test.sh ├── pile.c ├── stress │ ├── add_up.c │ ├── graham.c │ ├── graham0.c │ ├── memory.c │ ├── notes │ ├── store.c │ └── switch.c ├── tiny_float │ └── tfloat.c └── tv │ ├── factorial.c │ ├── harness.c │ ├── prime.c │ ├── ret3.c │ ├── ret3n.c │ ├── sum.c │ └── test.sh ├── ghidra_scripts ├── .gitignore ├── C_Logo.png ├── Ghidra_Logo.png ├── README.md ├── cbmc.png ├── cbmc.py ├── compile.py ├── patcherex.py ├── patcherex2.jpeg ├── pcode2c.py ├── pcode2c_util.py ├── pcode2cmega.py └── wip │ ├── e9patch.py │ ├── elf_edit.py │ ├── live_vars.py │ ├── liveness.py │ ├── patch_diff.py │ ├── pcode2c_old.py │ └── validate.py ├── notes ├── asm_test.ipynb ├── junk.md ├── pcode2c.sh ├── popcount_harness.c ├── testout ├── try_cle.ipynb └── trycle.ipynb ├── pcode2c ├── __init__.py ├── __main__.py ├── cbmc.py ├── dwarf_annot.py ├── naive.py ├── printer.py └── util.py ├── pyproject.toml └── templates ├── Makefile ├── README.md ├── _pcode.h ├── harness.c └── pcodei ├── notes └── pcodei.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | fact_proj 3 | *.vscode 4 | a.out 5 | .gdb_history 6 | */__pycache__/ 7 | *.egg-info -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "templates/pcodei/pypcode"] 2 | path = templates/pcodei/pypcode 3 | url = https://github.com/angr/pypcode.git 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pcode2c 2 | 3 | WIP: Do not expect this to work for you yet. 4 | 5 | pcode2c is a Ghidra translator script from low pcode to a [specialized C interpreter](https://www.gtoal.com/sbt/) for the purposes of running the resulting code through off the shelf verifiers (for example [CBMC](https://github.com/diffblue/cbmc)). The resulting C has a direct mapping to the underlying assembly. 6 | 7 | C is a useful intermediate because it enables using powerful off the shelf verifiers and can be directly compared (with a little muscle grease) against decompilaton or source. 8 | 9 | This enables soundly answering questions about the relationship between high level source and binary in a relatively easy manner that no other method I know of can do. 10 | 11 | Blog posts: 12 | - [PCode2C: Steps Towards Translation Validation with Ghidra and CBMC](https://www.philipzucker.com/pcode2c/) 13 | - [DWARF Verification via Ghidra and CBMC](https://www.philipzucker.com/pcode2c-dwarf/) 14 | ## Installation 15 | 16 | ```bash 17 | python3 -m pip install -e . 18 | ``` 19 | 20 | ## Usage 21 | 22 | ```bash 23 | python3 -m pcode2c ./examples/min/mymin.o > tmp.c 24 | gcc -I _templates -c tmp.c 25 | ``` 26 | 27 | ## Ghidra Plugin Installation 28 | 29 | - Open up Ghidra on a binary 30 | - Click on `Window > Script Manager` in the toolbar 31 | - Manage Script Directories button in top right of Script Manager window (looks like a checklist) 32 | - Find the `./scripts` directory and add it 33 | - Click Refresh Script List in script manager 34 | - There is now a PCode2C folder in the script manager. Go to it and click 35 | 36 | ## Example 37 | -------------------------------------------------------------------------------- /examples/.gitignore: -------------------------------------------------------------------------------- 1 | */decomp.c 2 | */arch.h 3 | */harness_template.c 4 | */pcode.h -------------------------------------------------------------------------------- /examples/Makefile: -------------------------------------------------------------------------------- 1 | mymin.o: min/mymin.c 2 | gcc -gdwarf-4 -Wall -O1 -c -o min/mymin.o min/mymin.c 3 | 4 | fact.o: fact.c 5 | gcc -g -Wall -c -o fact.o fact.c 6 | 7 | 8 | -------------------------------------------------------------------------------- /examples/all_equal/_pcode.h: -------------------------------------------------------------------------------- 1 | ../../templates/_pcode.h -------------------------------------------------------------------------------- /examples/all_equal/all_equal: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philzook58/pcode2c/a88f732f2522ea198a74ff3287fa61c7380ed9e4/examples/all_equal/all_equal -------------------------------------------------------------------------------- /examples/all_equal/all_equal.asm: -------------------------------------------------------------------------------- 1 | 0000000000001169 (File Offset: 0x1169): 2 | equal(): 3 | /tmp/all_equal.c:3 4 | #include 5 | #include 6 | bool equal(int n, int a[], int b[]){ 7 | 1169: f3 0f 1e fa endbr64 8 | /tmp/all_equal.c:4 9 | for(int i = 0; i < n; i++){ 10 | 116d: b8 00 00 00 00 mov $0x0,%eax 11 | 1172: eb 03 jmp 1177 (File Offset: 0x1177) 12 | /tmp/all_equal.c:4 (discriminator 2) 13 | 1174: 83 c0 01 add $0x1,%eax 14 | /tmp/all_equal.c:4 (discriminator 1) 15 | 1177: 39 f8 cmp %edi,%eax 16 | 1179: 7d 13 jge 118e (File Offset: 0x118e) 17 | /tmp/all_equal.c:5 18 | if(a[i] != b[i]){ 19 | 117b: 48 63 c8 movslq %eax,%rcx 20 | 117e: 44 8b 04 8a mov (%rdx,%rcx,4),%r8d 21 | 1182: 44 39 04 8e cmp %r8d,(%rsi,%rcx,4) 22 | 1186: 74 ec je 1174 (File Offset: 0x1174) 23 | /tmp/all_equal.c:6 24 | return false; 25 | 1188: b8 00 00 00 00 mov $0x0,%eax 26 | /tmp/all_equal.c:10 27 | } 28 | } 29 | return true; 30 | } 31 | 118d: c3 ret 32 | -- 33 | 11b7: e8 ad ff ff ff call 1169 (File Offset: 0x1169) 34 | 11bc: 84 c0 test %al,%al 35 | 11be: 74 1a je 11da (File Offset: 0x11da) 36 | /tmp/all_equal.c:15 37 | } 38 | 11c0: 48 8b 44 24 28 mov 0x28(%rsp),%rax 39 | 11c5: 64 48 2b 04 25 28 00 sub %fs:0x28,%rax 40 | 11cc: 00 00 41 | 11ce: 75 29 jne 11f9 (File Offset: 0x11f9) 42 | 11d0: b8 00 00 00 00 mov $0x0,%eax 43 | 11d5: 48 83 c4 38 add $0x38,%rsp 44 | 11d9: c3 ret 45 | /tmp/all_equal.c:14 (discriminator 1) 46 | assert(equal(10, a, a)); 47 | 11da: 48 8d 0d 44 0e 00 00 lea 0xe44(%rip),%rcx # 2025 <__PRETTY_FUNCTION__.0> (File Offset: 0x2025) 48 | 11e1: ba 0e 00 00 00 mov $0xe,%edx 49 | 11e6: 48 8d 35 17 0e 00 00 lea 0xe17(%rip),%rsi # 2004 <_IO_stdin_used+0x4> (File Offset: 0x2004) 50 | 11ed: 48 8d 3d 21 0e 00 00 lea 0xe21(%rip),%rdi # 2015 <_IO_stdin_used+0x15> (File Offset: 0x2015) 51 | 11f4: e8 77 fe ff ff call 1070 <__assert_fail@plt> (File Offset: 0x1070) 52 | /tmp/all_equal.c:15 53 | } 54 | 11f9: e8 62 fe ff ff call 1060 <__stack_chk_fail@plt> (File Offset: 0x1060) 55 | 56 | Disassembly of section .fini: 57 | 58 | 0000000000001200 <_fini> (File Offset: 0x1200): 59 | _fini(): 60 | 1200: f3 0f 1e fa endbr64 61 | 1204: 48 83 ec 08 sub $0x8,%rsp 62 | 1208: 48 83 c4 08 add $0x8,%rsp 63 | 120c: c3 ret 64 | -------------------------------------------------------------------------------- /examples/all_equal/all_equal.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | bool equal(int n, int a[], int b[]) { 4 | for (int i = 0; i < n; i++) { 5 | if (a[i] != b[i]) { 6 | return false; 7 | } 8 | } 9 | return true; 10 | } 11 | 12 | int main() { 13 | int a[10]; // = {42}; //calloc(10*sizeof(int)); 14 | assert(equal(10, a, a)); 15 | } -------------------------------------------------------------------------------- /examples/all_equal/all_equal_debug.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | bool equal(int n, int a[], int b[]) { 4 | for (int i = 0; i < n; i++) { 5 | pcode2c(&state, -1); 6 | if (a[i] != b[i]) { 7 | return false; 8 | } 9 | } 10 | return true; 11 | } 12 | 13 | int main() { 14 | int a[10]; // = {42}; //calloc(10*sizeof(int)); 15 | assert(equal(10, a, a)); 16 | } -------------------------------------------------------------------------------- /examples/all_equal/all_equal_pcode.c: -------------------------------------------------------------------------------- 1 | /* AUTOGENERATED. DO NOT EDIT. */ 2 | #include "_pcode.h" 3 | void pcode2c(CPUState *state, size_t breakpoint) { 4 | uint8_t *register_space = state->reg; 5 | uint8_t *unique_space = state->unique; 6 | uint8_t *ram_space = state->ram; 7 | uint8_t *ram = ram_space; // TODO: remove this 8 | for (;;) { 9 | switch (state->pc) { 10 | case 0x1169: 11 | L_0x1169ul: 12 | INSN(0x1169ul, "0x1169: ENDBR64 ") 13 | case 0x116d: 14 | L_0x116dul: 15 | INSN(0x116dul, "0x116d: MOV EAX,0x0") 16 | L_P_0x1: 17 | COPY(register_space + 0x0ul /* RAX */, 8ul, 18 | (uint8_t *)&(long unsigned int){0x0ul}, 0x8ul); 19 | case 0x1172: 20 | L_0x1172ul: 21 | INSN(0x1172ul, "0x1172: JMP 0x1177") 22 | L_P_0x2: 23 | BRANCH_GOTO_TEMPFIX(ram_space, 0x1177ul, 8ul); 24 | case 0x1174: 25 | L_0x1174ul: 26 | INSN(0x1174ul, "0x1174: ADD EAX,0x1") 27 | L_P_0x3: 28 | INT_CARRY(register_space + 0x200ul /* CF */, 1ul, 29 | register_space + 0x0ul /* EAX */, 4ul, 30 | (uint8_t *)&(long unsigned int){0x1ul}, 0x4ul); 31 | L_P_0x4: 32 | INT_SCARRY(register_space + 0x20bul /* OF */, 1ul, 33 | register_space + 0x0ul /* EAX */, 4ul, 34 | (uint8_t *)&(long unsigned int){0x1ul}, 0x4ul); 35 | L_P_0x5: 36 | INT_ADD(register_space + 0x0ul /* EAX */, 4ul, 37 | register_space + 0x0ul /* EAX */, 4ul, 38 | (uint8_t *)&(long unsigned int){0x1ul}, 0x4ul); 39 | L_P_0x6: 40 | INT_ZEXT(register_space + 0x0ul /* RAX */, 8ul, 41 | register_space + 0x0ul /* EAX */, 4ul); 42 | L_P_0x7: 43 | INT_SLESS(register_space + 0x207ul /* SF */, 1ul, 44 | register_space + 0x0ul /* EAX */, 4ul, 45 | (uint8_t *)&(long unsigned int){0x0ul}, 0x4ul); 46 | L_P_0x8: 47 | INT_EQUAL(register_space + 0x206ul /* ZF */, 1ul, 48 | register_space + 0x0ul /* EAX */, 4ul, 49 | (uint8_t *)&(long unsigned int){0x0ul}, 0x4ul); 50 | L_P_0x9: 51 | INT_AND(unique_space + 0x13180ul, 4ul, register_space + 0x0ul /* EAX */, 52 | 4ul, (uint8_t *)&(long unsigned int){0xfful}, 0x4ul); 53 | L_P_0xa: 54 | POPCOUNT(unique_space + 0x13200ul, 1ul, unique_space + 0x13180ul, 4ul); 55 | L_P_0xb: 56 | INT_AND(unique_space + 0x13280ul, 1ul, unique_space + 0x13200ul, 1ul, 57 | (uint8_t *)&(long unsigned int){0x1ul}, 0x1ul); 58 | L_P_0xc: 59 | INT_EQUAL(register_space + 0x202ul /* PF */, 1ul, 60 | unique_space + 0x13280ul, 1ul, 61 | (uint8_t *)&(long unsigned int){0x0ul}, 0x1ul); 62 | case 0x1177: 63 | L_0x1177ul: 64 | INSN(0x1177ul, "0x1177: CMP EAX,EDI") 65 | L_P_0xd: 66 | INT_LESS(register_space + 0x200ul /* CF */, 1ul, 67 | register_space + 0x0ul /* EAX */, 4ul, 68 | register_space + 0x38ul /* EDI */, 4ul); 69 | L_P_0xe: 70 | INT_SBORROW(register_space + 0x20bul /* OF */, 1ul, 71 | register_space + 0x0ul /* EAX */, 4ul, 72 | register_space + 0x38ul /* EDI */, 4ul); 73 | L_P_0xf: 74 | INT_SUB(unique_space + 0x29700ul, 4ul, register_space + 0x0ul /* EAX */, 75 | 4ul, register_space + 0x38ul /* EDI */, 4ul); 76 | L_P_0x10: 77 | INT_SLESS(register_space + 0x207ul /* SF */, 1ul, 78 | unique_space + 0x29700ul, 4ul, 79 | (uint8_t *)&(long unsigned int){0x0ul}, 0x4ul); 80 | L_P_0x11: 81 | INT_EQUAL(register_space + 0x206ul /* ZF */, 1ul, 82 | unique_space + 0x29700ul, 4ul, 83 | (uint8_t *)&(long unsigned int){0x0ul}, 0x4ul); 84 | L_P_0x12: 85 | INT_AND(unique_space + 0x13180ul, 4ul, unique_space + 0x29700ul, 4ul, 86 | (uint8_t *)&(long unsigned int){0xfful}, 0x4ul); 87 | L_P_0x13: 88 | POPCOUNT(unique_space + 0x13200ul, 1ul, unique_space + 0x13180ul, 4ul); 89 | L_P_0x14: 90 | INT_AND(unique_space + 0x13280ul, 1ul, unique_space + 0x13200ul, 1ul, 91 | (uint8_t *)&(long unsigned int){0x1ul}, 0x1ul); 92 | L_P_0x15: 93 | INT_EQUAL(register_space + 0x202ul /* PF */, 1ul, 94 | unique_space + 0x13280ul, 1ul, 95 | (uint8_t *)&(long unsigned int){0x0ul}, 0x1ul); 96 | case 0x1179: 97 | L_0x1179ul: 98 | INSN(0x1179ul, "0x1179: JGE 0x118e") 99 | L_P_0x16: 100 | INT_EQUAL(unique_space + 0xcf80ul, 1ul, register_space + 0x20bul /* OF */, 101 | 1ul, register_space + 0x207ul /* SF */, 1ul); 102 | printf("PCODE2C branch cond %d ", (int)*(unique_space + 0xcf80ul)); 103 | printf("PCODE2C SF %d ", (int)*(register_space + 0x207ul)); 104 | printf("PCODE2C OF %d ", (int)*(register_space + 0x20bul)); 105 | L_P_0x17: 106 | CBRANCH_GOTO_TEMPFIX(ram_space, 0x118eul, 8ul, unique_space, 0xcf80ul, 107 | 1ul); 108 | case 0x117b: 109 | L_0x117bul: 110 | INSN(0x117bul, "0x117b: MOVSXD RCX,EAX") 111 | L_P_0x18: 112 | INT_SEXT(register_space + 0x8ul /* RCX */, 8ul, 113 | register_space + 0x0ul /* EAX */, 4ul); 114 | case 0x117e: 115 | L_0x117eul: 116 | INSN(0x117eul, "0x117e: MOV R8D,dword ptr [RDX + RCX*0x4]") 117 | L_P_0x19: 118 | INT_MULT(unique_space + 0x3300ul, 8ul, register_space + 0x8ul /* RCX */, 119 | 8ul, (uint8_t *)&(long unsigned int){0x4ul}, 0x8ul); 120 | L_P_0x1a: 121 | INT_ADD(unique_space + 0x3400ul, 8ul, register_space + 0x10ul /* RDX */, 122 | 8ul, unique_space + 0x3300ul, 8ul); 123 | L_P_0x1b: 124 | LOAD(unique_space + 0xbf00ul, 4ul, 125 | (uint8_t *)&(long unsigned int){0x5942e6dc2ed0ul}, 0x8ul, 126 | unique_space + 0x3400ul, 8ul); 127 | L_P_0x1c: 128 | COPY(register_space + 0x80ul /* R8D */, 4ul, unique_space + 0xbf00ul, 129 | 4ul); 130 | L_P_0x1d: 131 | INT_ZEXT(register_space + 0x80ul /* R8 */, 8ul, 132 | register_space + 0x80ul /* R8D */, 4ul); 133 | case 0x1182: 134 | L_0x1182ul: 135 | INSN(0x1182ul, "0x1182: CMP dword ptr [RSI + RCX*0x4],R8D") 136 | L_P_0x1e: 137 | INT_MULT(unique_space + 0x3300ul, 8ul, register_space + 0x8ul /* RCX */, 138 | 8ul, (uint8_t *)&(long unsigned int){0x4ul}, 0x8ul); 139 | L_P_0x1f: 140 | INT_ADD(unique_space + 0x3400ul, 8ul, register_space + 0x30ul /* RSI */, 141 | 8ul, unique_space + 0x3300ul, 8ul); 142 | L_P_0x20: 143 | LOAD(unique_space + 0xbf00ul, 4ul, 144 | (uint8_t *)&(long unsigned int){0x5942e6dc2ed0ul}, 0x8ul, 145 | unique_space + 0x3400ul, 8ul); 146 | L_P_0x21: 147 | INT_LESS(register_space + 0x200ul /* CF */, 1ul, unique_space + 0xbf00ul, 148 | 4ul, register_space + 0x80ul /* R8D */, 4ul); 149 | L_P_0x22: 150 | LOAD(unique_space + 0xbf00ul, 4ul, 151 | (uint8_t *)&(long unsigned int){0x5942e6dc2ed0ul}, 0x8ul, 152 | unique_space + 0x3400ul, 8ul); 153 | L_P_0x23: 154 | INT_SBORROW(register_space + 0x20bul /* OF */, 1ul, 155 | unique_space + 0xbf00ul, 4ul, 156 | register_space + 0x80ul /* R8D */, 4ul); 157 | L_P_0x24: 158 | LOAD(unique_space + 0xbf00ul, 4ul, 159 | (uint8_t *)&(long unsigned int){0x5942e6dc2ed0ul}, 0x8ul, 160 | unique_space + 0x3400ul, 8ul); 161 | L_P_0x25: 162 | INT_SUB(unique_space + 0x29700ul, 4ul, unique_space + 0xbf00ul, 4ul, 163 | register_space + 0x80ul /* R8D */, 4ul); 164 | L_P_0x26: 165 | INT_SLESS(register_space + 0x207ul /* SF */, 1ul, 166 | unique_space + 0x29700ul, 4ul, 167 | (uint8_t *)&(long unsigned int){0x0ul}, 0x4ul); 168 | L_P_0x27: 169 | INT_EQUAL(register_space + 0x206ul /* ZF */, 1ul, 170 | unique_space + 0x29700ul, 4ul, 171 | (uint8_t *)&(long unsigned int){0x0ul}, 0x4ul); 172 | L_P_0x28: 173 | INT_AND(unique_space + 0x13180ul, 4ul, unique_space + 0x29700ul, 4ul, 174 | (uint8_t *)&(long unsigned int){0xfful}, 0x4ul); 175 | L_P_0x29: 176 | POPCOUNT(unique_space + 0x13200ul, 1ul, unique_space + 0x13180ul, 4ul); 177 | L_P_0x2a: 178 | INT_AND(unique_space + 0x13280ul, 1ul, unique_space + 0x13200ul, 1ul, 179 | (uint8_t *)&(long unsigned int){0x1ul}, 0x1ul); 180 | L_P_0x2b: 181 | INT_EQUAL(register_space + 0x202ul /* PF */, 1ul, 182 | unique_space + 0x13280ul, 1ul, 183 | (uint8_t *)&(long unsigned int){0x0ul}, 0x1ul); 184 | case 0x1186: 185 | L_0x1186ul: 186 | INSN(0x1186ul, "0x1186: JZ 0x1174") 187 | L_P_0x2c: 188 | CBRANCH_GOTO_TEMPFIX(ram_space, 0x1174ul, 8ul, register_space, 189 | 0x206ul /* ZF */, 1ul); 190 | case 0x1188: 191 | L_0x1188ul: 192 | INSN(0x1188ul, "0x1188: MOV EAX,0x0") 193 | L_P_0x2d: 194 | COPY(register_space + 0x0ul /* RAX */, 8ul, 195 | (uint8_t *)&(long unsigned int){0x0ul}, 0x8ul); 196 | case 0x118d: 197 | L_0x118dul: 198 | INSN(0x118dul, "0x118d: RET ") 199 | L_P_0x2e: 200 | LOAD(register_space + 0x288ul /* RIP */, 8ul, 201 | (uint8_t *)&(long unsigned int){0x5942e6dc2ed0ul}, 0x8ul, 202 | register_space + 0x20ul /* RSP */, 8ul); 203 | L_P_0x2f: 204 | INT_ADD(register_space + 0x20ul /* RSP */, 8ul, 205 | register_space + 0x20ul /* RSP */, 8ul, 206 | (uint8_t *)&(long unsigned int){0x8ul}, 0x8ul); 207 | L_P_0x30: 208 | RETURN(register_space + 0x288ul /* RIP */, 8ul); 209 | case 0x118e: 210 | L_0x118eul: 211 | INSN(0x118eul, "0x118e: MOV EAX,0x1") 212 | L_P_0x31: 213 | COPY(register_space + 0x0ul /* RAX */, 8ul, 214 | (uint8_t *)&(long unsigned int){0x1ul}, 0x8ul); 215 | case 0x1193: 216 | L_0x1193ul: 217 | INSN(0x1193ul, "0x1193: RET ") 218 | L_P_0x32: 219 | LOAD(register_space + 0x288ul /* RIP */, 8ul, 220 | (uint8_t *)&(long unsigned int){0x5942e6dc2ed0ul}, 0x8ul, 221 | register_space + 0x20ul /* RSP */, 8ul); 222 | L_P_0x33: 223 | INT_ADD(register_space + 0x20ul /* RSP */, 8ul, 224 | register_space + 0x20ul /* RSP */, 8ul, 225 | (uint8_t *)&(long unsigned int){0x8ul}, 0x8ul); 226 | L_P_0x34: 227 | RETURN(register_space + 0x288ul /* RIP */, 8ul); 228 | default: 229 | assert(state->pc != -1); // Unexpected PC value 230 | } 231 | } 232 | } 233 | -------------------------------------------------------------------------------- /examples/all_equal/harness.c: -------------------------------------------------------------------------------- 1 | #include "all_equal_pcode.c" 2 | #include "x86.h" 3 | #include 4 | 5 | int main() { 6 | CPUState state; 7 | init_CPUState(&state); 8 | 9 | // Initialize state here, including pc address. 10 | state.pc = 0x1169; 11 | 12 | memset(state.reg + RDI, 1, 8); 13 | memset(state.reg + RSI, 64, 8); 14 | memset(state.reg + RDX, 64, 8); 15 | memset(state.reg + RCX, 0, 8); 16 | memset(state.reg + RAX, 0, 8); 17 | memset(state.reg + RSP, 0, 8); 18 | 19 | pcode2c(&state, -1); // Run decompiled function 20 | printf("RAX: %ld\n", *(uint64_t *)(state.reg + RAX)); 21 | __CPROVER_printf("RAX: %ld\n", *(uint64_t *)(state.reg + RAX)); 22 | assert(*(uint8_t *)(state.reg + RAX) != 0); 23 | 24 | return 0; 25 | } -------------------------------------------------------------------------------- /examples/all_equal/notes: -------------------------------------------------------------------------------- 1 | ~/Documents/solvers/cbmc/build/bin/cbmc harness.c --unwind 3 --unwinding-assertions --no-standard-checks --object-bits 10 --trace | grep --no-group-separator '^PCODE2C' > trace -------------------------------------------------------------------------------- /examples/all_equal/run.sh: -------------------------------------------------------------------------------- 1 | gcc -Og -g -Wall -Wextra -std=c11 /tmp/all_equal.c -o all_equal 2 | objdump -l -F -d -S all_equal | grep -A 30 '' > all_equal.asm 3 | python3 -m pcode2c all_equal --start-address 0x1169 --file-offset 0x1169 --size 0x2b > all_equal_pcode.c 4 | gcc harness.c 5 | ./a.out -------------------------------------------------------------------------------- /examples/all_equal/trace: -------------------------------------------------------------------------------- 1 | PCODE2C R0: 0 2 | PCODE2C R1: 0 3 | PCODE2C R2: 0 4 | PCODE2C R3: 0 5 | PCODE2C R4: 0 6 | PCODE2C R5: 0 7 | PCODE2C R6: 0 8 | PCODE2C R7: 0 9 | PCODE2C R8: 0 10 | PCODE2C R9: 0 11 | PCODE2C R10: 0 12 | PCODE2C 13 | PCODE2C0x4457: 0x1169: ENDBR64 14 | PCODE2C R0: 0 15 | PCODE2C R1: 0 16 | PCODE2C R2: 0 17 | PCODE2C R3: 0 18 | PCODE2C R4: 0 19 | PCODE2C R5: 0 20 | PCODE2C R6: 0 21 | PCODE2C R7: 0 22 | PCODE2C R8: 0 23 | PCODE2C R9: 0 24 | PCODE2C R10: 0 25 | PCODE2C 26 | PCODE2C0x4461: 0x116d: MOV EAX,0x0 27 | PCODE2C COPY: (format-constant failed: typecast) 8 (format-constant failed: typecast) 8 28 | PCODE2C R0: 0 29 | PCODE2C R1: 0 30 | PCODE2C R2: 0 31 | PCODE2C R3: 0 32 | PCODE2C R4: 0 33 | PCODE2C R5: 0 34 | PCODE2C R6: 0 35 | PCODE2C R7: 0 36 | PCODE2C R8: 0 37 | PCODE2C R9: 0 38 | PCODE2C R10: 0 39 | PCODE2C 40 | PCODE2C0x4466: 0x1172: JMP 0x1177 41 | PCODE2C BRANCH: 0x1177ul 8 42 | PCODE2C R0: 0 43 | PCODE2C R1: 0 44 | PCODE2C R2: 0 45 | PCODE2C R3: 0 46 | PCODE2C R4: 0 47 | PCODE2C R5: 0 48 | PCODE2C R6: 0 49 | PCODE2C R7: 0 50 | PCODE2C R8: 0 51 | PCODE2C R9: 0 52 | PCODE2C R10: 0 53 | PCODE2C 54 | PCODE2C0x4471: 0x1177: CMP EAX,EDI 55 | PCODE2C INT_LESS: (format-constant failed: typecast) 1 (format-constant failed: typecast) 4 (format-constant failed: typecast) 4 56 | PCODE2C INT_SBORROW: (format-constant failed: typecast) 1 (format-constant failed: typecast) 4 (format-constant failed: typecast) 4 57 | PCODE2C INT_SUB: (format-constant failed: typecast) 4 (format-constant failed: typecast) 4 (format-constant failed: typecast) 4 58 | PCODE2C INT_SLESS: (format-constant failed: typecast) 1 (format-constant failed: typecast) 4 (format-constant failed: typecast) 4 59 | PCODE2C INT_EQUAL: (format-constant failed: typecast) 1 (format-constant failed: typecast) 4 (format-constant failed: typecast) 4 60 | PCODE2C INT_AND: (format-constant failed: typecast) 4 (format-constant failed: typecast) 4 (format-constant failed: typecast) 4 61 | PCODE2C POPCOUNT: (format-constant failed: typecast) 1 (format-constant failed: typecast) 4 62 | PCODE2C INT_AND: (format-constant failed: typecast) 1 (format-constant failed: typecast) 1 (format-constant failed: typecast) 1 63 | PCODE2C INT_EQUAL: (format-constant failed: typecast) 1 (format-constant failed: typecast) 1 (format-constant failed: typecast) 1 64 | PCODE2C R0: 0 65 | PCODE2C R1: 0 66 | PCODE2C R2: 0 67 | PCODE2C R3: 0 68 | PCODE2C R4: 0 69 | PCODE2C R5: 0 70 | PCODE2C R6: 0 71 | PCODE2C R7: 0 72 | PCODE2C R8: 0 73 | PCODE2C R9: 0 74 | PCODE2C R10: 0 75 | PCODE2C 76 | PCODE2C0x4473: 0x1179: JGE 0x118e 77 | PCODE2C INT_EQUAL: (format-constant failed: typecast) 1 (format-constant failed: typecast) 1 (format-constant failed: typecast) 1 78 | PCODE2C branch cond 1 79 | PCODE2C SF 0 80 | PCODE2C OF 0 81 | PCODE2C CBRANCH_GOTO_TEMPFIX: 53120 1 0x118eul 8 82 | PCODE2CBRANCH TAKEN 83 | PCODE2C R0: 0 84 | PCODE2C R1: 0 85 | PCODE2C R2: 0 86 | PCODE2C R3: 0 87 | PCODE2C R4: 0 88 | PCODE2C R5: 0 89 | PCODE2C R6: 0 90 | PCODE2C R7: 0 91 | PCODE2C R8: 0 92 | PCODE2C R9: 0 93 | PCODE2C R10: 0 94 | PCODE2C 95 | PCODE2C0x4494: 0x118e: MOV EAX,0x1 96 | PCODE2C COPY: (format-constant failed: typecast) 8 (format-constant failed: typecast) 8 97 | PCODE2C R0: 1 98 | PCODE2C R1: 0 99 | PCODE2C R2: 0 100 | PCODE2C R3: 0 101 | PCODE2C R4: 0 102 | PCODE2C R5: 0 103 | PCODE2C R6: 0 104 | PCODE2C R7: 0 105 | PCODE2C R8: 0 106 | PCODE2C R9: 0 107 | PCODE2C R10: 0 108 | PCODE2C 109 | PCODE2C0x4499: 0x1193: RET 110 | PCODE2C LOAD: (format-constant failed: typecast) 8 (format-constant failed: typecast) 8 (format-constant failed: typecast) 8 111 | PCODE2C load address: 0 112 | PCODE2C INT_ADD: (format-constant failed: typecast) 8 (format-constant failed: typecast) 8 (format-constant failed: typecast) 8 113 | PCODE2C RETURN: (format-constant failed: typecast) 8 114 | -------------------------------------------------------------------------------- /examples/cmp/cmpxchg.s: -------------------------------------------------------------------------------- 1 | .extern _start 2 | _start: 3 | cmp %eax,%ebx 4 | cmove %ecx, %edx 5 | cmpxchg %edx,(%rdi) 6 | nop # there is a slight bug when a jump can occur to the end 7 | 8 | 9 | -------------------------------------------------------------------------------- /examples/cmp/notes: -------------------------------------------------------------------------------- 1 | pcode with internal pcode relative jumps are sometimes emitted. -------------------------------------------------------------------------------- /examples/cmp/test.sh: -------------------------------------------------------------------------------- 1 | gcc -c cmpxchg.s 2 | python3 -m pcode2c cmpxchg.o > decomp.c 3 | gcc -c decomp.c -------------------------------------------------------------------------------- /examples/dyn/hello.c: -------------------------------------------------------------------------------- 1 | #include 2 | int main() 3 | { 4 | printf("Hello, World!\n"); 5 | return 0; 6 | } -------------------------------------------------------------------------------- /examples/dyn/notes: -------------------------------------------------------------------------------- 1 | Yes so -no-pie gives me an ET_EXEC rather than ET_DYN 2 | 3 | -------------------------------------------------------------------------------- /examples/dyn/reljmp.s: -------------------------------------------------------------------------------- 1 | 2 | .extern _start 3 | _start: 4 | foo: 5 | add $1, %eax 6 | nop 7 | nop 8 | nop 9 | .rept 0x1000 10 | nop 11 | .endr 12 | jmp foo 13 | -------------------------------------------------------------------------------- /examples/dyn/test.sh: -------------------------------------------------------------------------------- 1 | gcc -no-pie -static hello.c 2 | objdump a.out -d > objdump 3 | readelf -a a.out > readelf 4 | python3 -m pcode2c a.out > decomp.c 5 | gcc -c decomp.c -------------------------------------------------------------------------------- /examples/enqueue/enqueue.c: -------------------------------------------------------------------------------- 1 | #include 2 | typedef struct 3 | { 4 | uint8_t dat[8]; 5 | } my_data; 6 | 7 | my_data buffer[32]; 8 | extern int count; 9 | void enqueue(my_data *data) 10 | { 11 | count++; 12 | buffer[count] = *data; 13 | } 14 | 15 | my_data *dequeue() 16 | { 17 | count--; 18 | return &buffer[count + 1]; 19 | } -------------------------------------------------------------------------------- /examples/fact_iter/fact.c: -------------------------------------------------------------------------------- 1 | /* 2 | Some fun stuff to play with. Could take in unsigned int. 3 | != form is classic problem wth given negative argument. 4 | 5 | */ 6 | 7 | int fact(int x) 8 | { 9 | // fun_entry: 10 | int result = 1; 11 | loop_head: 12 | while (x != 0) 13 | { 14 | loop_body: 15 | result = result * x; 16 | x = x - 1; 17 | } 18 | loop_exit: 19 | return result; 20 | } -------------------------------------------------------------------------------- /examples/fact_iter/fact.s: -------------------------------------------------------------------------------- 1 | 2 | fact.o: file format elf64-x86-64 3 | 4 | Contents of the .debug_info section: 5 | 6 | Compilation Unit @ offset 0x0: 7 | Length: 0xa8 (32-bit) 8 | Version: 5 9 | Unit Type: DW_UT_compile (1) 10 | Abbrev Offset: 0x0 11 | Pointer Size: 8 12 | <0>: Abbrev Number: 2 (DW_TAG_compile_unit) 13 | DW_AT_producer : (indirect string, offset: 0x19): GNU C17 11.4.0 -mtune=generic -march=x86-64 -g -O1 -fasynchronous-unwind-tables -fstack-protector-strong -fstack-clash-protection -fcf-protection 14 | <11> DW_AT_language : 29 (C11) 15 | <12> DW_AT_name : (indirect line string, offset: 0x39): fact.c 16 | <16> DW_AT_comp_dir : (indirect line string, offset: 0x0): /home/philip/Documents/python/pcode2c/examples/fact_iter 17 | <1a> DW_AT_low_pc : 0x0 18 | <22> DW_AT_high_pc : 0x1c 19 | <2a> DW_AT_stmt_list : 0x0 20 | <1><2e>: Abbrev Number: 3 (DW_TAG_subprogram) 21 | <2f> DW_AT_external : 1 22 | <2f> DW_AT_name : (indirect string, offset: 0x0): fact 23 | <33> DW_AT_decl_file : 1 24 | <34> DW_AT_decl_line : 7 25 | <35> DW_AT_decl_column : 5 26 | <36> DW_AT_prototyped : 1 27 | <36> DW_AT_type : <0xa4> 28 | <3a> DW_AT_low_pc : 0x0 29 | <42> DW_AT_high_pc : 0x1c 30 | <4a> DW_AT_frame_base : 1 byte block: 9c (DW_OP_call_frame_cfa) 31 | <4c> DW_AT_call_all_calls: 1 32 | <4c> DW_AT_sibling : <0xa4> 33 | <2><50>: Abbrev Number: 4 (DW_TAG_formal_parameter) 34 | <51> DW_AT_name : x 35 | <53> DW_AT_decl_file : 1 36 | <54> DW_AT_decl_line : 7 37 | <55> DW_AT_decl_column : 14 38 | <56> DW_AT_type : <0xa4> 39 | <5a> DW_AT_location : 0x12 (location list) 40 | <5e> DW_AT_GNU_locviews: 0xc 41 | <2><62>: Abbrev Number: 5 (DW_TAG_variable) 42 | <63> DW_AT_name : (indirect string, offset: 0xab): result 43 | <67> DW_AT_decl_file : 1 44 | <68> DW_AT_decl_line : 10 45 | <69> DW_AT_decl_column : 9 46 | <6a> DW_AT_type : <0xa4> 47 | <6e> DW_AT_location : 0x2a (location list) 48 | <72> DW_AT_GNU_locviews: 0x24 49 | <2><76>: Abbrev Number: 1 (DW_TAG_label) 50 | <77> DW_AT_name : (indirect string, offset: 0xf): loop_head 51 | <7b> DW_AT_decl_file : 1 52 | <7b> DW_AT_decl_line : 11 53 | <7c> DW_AT_decl_column : 1 54 | <7d> DW_AT_low_pc : 0x4 55 | <2><85>: Abbrev Number: 1 (DW_TAG_label) 56 | <86> DW_AT_name : (indirect string, offset: 0xb2): loop_body 57 | <8a> DW_AT_decl_file : 1 58 | <8a> DW_AT_decl_line : 14 59 | <8b> DW_AT_decl_column : 5 60 | <8c> DW_AT_low_pc : 0xd 61 | <2><94>: Abbrev Number: 1 (DW_TAG_label) 62 | <95> DW_AT_name : (indirect string, offset: 0x5): loop_exit 63 | <99> DW_AT_decl_file : 1 64 | <99> DW_AT_decl_line : 18 65 | <9a> DW_AT_decl_column : 1 66 | <9b> DW_AT_low_pc : 0x1b 67 | <2>: Abbrev Number: 0 68 | <1>: Abbrev Number: 6 (DW_TAG_base_type) 69 | DW_AT_byte_size : 4 70 | DW_AT_encoding : 5 (signed) 71 | DW_AT_name : int 72 | <1>: Abbrev Number: 0 73 | 74 | 75 | Disassembly of section .text: 76 | 77 | 0000000000000000 : 78 | != form is classic problem wth given negative argument. 79 | 80 | */ 81 | 82 | int fact(int x) 83 | { 84 | 0: f3 0f 1e fa endbr64 85 | // fun_entry: 86 | int result = 1; 87 | loop_head: 88 | while (x != 0) 89 | 4: 85 ff test %edi,%edi 90 | 6: 74 0e je 16 91 | int result = 1; 92 | 8: b8 01 00 00 00 mov $0x1,%eax 93 | { 94 | loop_body: 95 | result = result * x; 96 | d: 0f af c7 imul %edi,%eax 97 | while (x != 0) 98 | 10: 83 ef 01 sub $0x1,%edi 99 | 13: 75 f8 jne d 100 | 15: c3 ret 101 | int result = 1; 102 | 16: b8 01 00 00 00 mov $0x1,%eax 103 | x = x - 1; 104 | } 105 | loop_exit: 106 | return result; 107 | 1b: c3 ret 108 | -------------------------------------------------------------------------------- /examples/fact_iter/test.sh: -------------------------------------------------------------------------------- 1 | gcc -g -O1 fact.c -c # try clang 2 | objdump -S -d --dwarf=info fact.o > fact.s 3 | python3 -m pcode2c.dwarf_annot fact.o -------------------------------------------------------------------------------- /examples/fact_rec/fact.c: -------------------------------------------------------------------------------- 1 | int fact(int x) 2 | { 3 | if (x == 0) 4 | { 5 | return 1; 6 | } 7 | else 8 | { 9 | return x * fact(x - 1); 10 | } 11 | } -------------------------------------------------------------------------------- /examples/min/.gitignore: -------------------------------------------------------------------------------- 1 | Makefile -------------------------------------------------------------------------------- /examples/min/_pcode.h: -------------------------------------------------------------------------------- 1 | ../../templates/_pcode.h -------------------------------------------------------------------------------- /examples/min/harness.c: -------------------------------------------------------------------------------- 1 | #include "decomp.c" 2 | #include "arch.h" 3 | #include 4 | int main() 5 | { 6 | 7 | CPUState state; 8 | uint8_t reg[0x300] = {0}; 9 | uint8_t unique[0x261000]; 10 | uint8_t ram[1024] = {0}; 11 | state.reg = reg; 12 | state.unique = unique; 13 | state.ram = ram; 14 | state.pc = 0; // 0x00100000; 15 | uint8_t rsi, rdi; 16 | // rsi = 3; 17 | // rdi = 10; 18 | state.reg[RSI] = rsi; 19 | state.reg[RDI] = rdi; 20 | pcode2c(&state, -1); 21 | uint8_t ret = state.reg[RAX]; 22 | assert(ret == rsi || ret == rdi); 23 | assert(ret <= rsi && ret <= rdi); 24 | printf("pc: 0x%lx\n", state.pc); 25 | printf("RAX: %d\n", state.reg[RAX]); 26 | return 0; 27 | } -------------------------------------------------------------------------------- /examples/min/mymin.c: -------------------------------------------------------------------------------- 1 | #include 2 | int64_t mymin(int64_t x, int64_t y) 3 | { 4 | return x < y ? x : y; 5 | } -------------------------------------------------------------------------------- /examples/min/run.sh: -------------------------------------------------------------------------------- 1 | 2 | python3 -m pcode2c mymin.o > decomp.c 3 | cbmc harness.c 4 | -------------------------------------------------------------------------------- /examples/patch_challenge/_pcode.h: -------------------------------------------------------------------------------- 1 | ../../templates/_pcode.h -------------------------------------------------------------------------------- /examples/patch_challenge/a.out.elf: -------------------------------------------------------------------------------- 1 | ELF Header: 2 | Magic: 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00 3 | Class: ELF64 4 | Data: 2's complement, little endian 5 | Version: 1 (current) 6 | OS/ABI: UNIX - System V 7 | ABI Version: 0 8 | Type: DYN (Position-Independent Executable file) 9 | Machine: Advanced Micro Devices X86-64 10 | Version: 0x1 11 | Entry point address: 0x10a0 12 | Start of program headers: 64 (bytes into file) 13 | Start of section headers: 14456 (bytes into file) 14 | Flags: 0x0 15 | Size of this header: 64 (bytes) 16 | Size of program headers: 56 (bytes) 17 | Number of program headers: 13 18 | Size of section headers: 64 (bytes) 19 | Number of section headers: 31 20 | Section header string table index: 30 21 | 22 | Section Headers: 23 | [Nr] Name Type Address Offset 24 | Size EntSize Flags Link Info Align 25 | [ 0] NULL 0000000000000000 00000000 26 | 0000000000000000 0000000000000000 0 0 0 27 | [ 1] .interp PROGBITS 0000000000000318 00000318 28 | 000000000000001c 0000000000000000 A 0 0 1 29 | [ 2] .note.gnu.pr[...] NOTE 0000000000000338 00000338 30 | 0000000000000030 0000000000000000 A 0 0 8 31 | [ 3] .note.gnu.bu[...] NOTE 0000000000000368 00000368 32 | 0000000000000024 0000000000000000 A 0 0 4 33 | [ 4] .note.ABI-tag NOTE 000000000000038c 0000038c 34 | 0000000000000020 0000000000000000 A 0 0 4 35 | [ 5] .gnu.hash GNU_HASH 00000000000003b0 000003b0 36 | 0000000000000024 0000000000000000 A 6 0 8 37 | [ 6] .dynsym DYNSYM 00000000000003d8 000003d8 38 | 00000000000000d8 0000000000000018 A 7 1 8 39 | [ 7] .dynstr STRTAB 00000000000004b0 000004b0 40 | 00000000000000b8 0000000000000000 A 0 0 1 41 | [ 8] .gnu.version VERSYM 0000000000000568 00000568 42 | 0000000000000012 0000000000000002 A 6 0 2 43 | [ 9] .gnu.version_r VERNEED 0000000000000580 00000580 44 | 0000000000000040 0000000000000000 A 7 1 8 45 | [10] .rela.dyn RELA 00000000000005c0 000005c0 46 | 00000000000000c0 0000000000000018 A 6 0 8 47 | [11] .rela.plt RELA 0000000000000680 00000680 48 | 0000000000000048 0000000000000018 AI 6 24 8 49 | [12] .init PROGBITS 0000000000001000 00001000 50 | 000000000000001b 0000000000000000 AX 0 0 4 51 | [13] .plt PROGBITS 0000000000001020 00001020 52 | 0000000000000040 0000000000000010 AX 0 0 16 53 | [14] .plt.got PROGBITS 0000000000001060 00001060 54 | 0000000000000010 0000000000000010 AX 0 0 16 55 | [15] .plt.sec PROGBITS 0000000000001070 00001070 56 | 0000000000000030 0000000000000010 AX 0 0 16 57 | [16] .text PROGBITS 00000000000010a0 000010a0 58 | 0000000000000277 0000000000000000 AX 0 0 16 59 | [17] .fini PROGBITS 0000000000001318 00001318 60 | 000000000000000d 0000000000000000 AX 0 0 4 61 | [18] .rodata PROGBITS 0000000000002000 00002000 62 | 0000000000000036 0000000000000000 A 0 0 4 63 | [19] .eh_frame_hdr PROGBITS 0000000000002038 00002038 64 | 000000000000007c 0000000000000000 A 0 0 4 65 | [20] .eh_frame PROGBITS 00000000000020b8 000020b8 66 | 00000000000001d0 0000000000000000 A 0 0 8 67 | [21] .init_array INIT_ARRAY 0000000000003da8 00002da8 68 | 0000000000000008 0000000000000008 WA 0 0 8 69 | [22] .fini_array FINI_ARRAY 0000000000003db0 00002db0 70 | 0000000000000008 0000000000000008 WA 0 0 8 71 | [23] .dynamic DYNAMIC 0000000000003db8 00002db8 72 | 00000000000001f0 0000000000000010 WA 7 0 8 73 | [24] .got PROGBITS 0000000000003fa8 00002fa8 74 | 0000000000000058 0000000000000008 WA 0 0 8 75 | [25] .data PROGBITS 0000000000004000 00003000 76 | 0000000000000010 0000000000000000 WA 0 0 8 77 | [26] .bss NOBITS 0000000000004010 00003010 78 | 0000000000000008 0000000000000000 WA 0 0 1 79 | [27] .comment PROGBITS 0000000000000000 00003010 80 | 000000000000002b 0000000000000001 MS 0 0 1 81 | [28] .symtab SYMTAB 0000000000000000 00003040 82 | 0000000000000480 0000000000000018 29 19 8 83 | [29] .strtab STRTAB 0000000000000000 000034c0 84 | 0000000000000298 0000000000000000 0 0 1 85 | [30] .shstrtab STRTAB 0000000000000000 00003758 86 | 000000000000011a 0000000000000000 0 0 1 87 | Key to Flags: 88 | W (write), A (alloc), X (execute), M (merge), S (strings), I (info), 89 | L (link order), O (extra OS processing required), G (group), T (TLS), 90 | C (compressed), x (unknown), o (OS specific), E (exclude), 91 | D (mbind), l (large), p (processor specific) 92 | 93 | There are no section groups in this file. 94 | 95 | Program Headers: 96 | Type Offset VirtAddr PhysAddr 97 | FileSiz MemSiz Flags Align 98 | PHDR 0x0000000000000040 0x0000000000000040 0x0000000000000040 99 | 0x00000000000002d8 0x00000000000002d8 R 0x8 100 | INTERP 0x0000000000000318 0x0000000000000318 0x0000000000000318 101 | 0x000000000000001c 0x000000000000001c R 0x1 102 | [Requesting program interpreter: /lib64/ld-linux-x86-64.so.2] 103 | LOAD 0x0000000000000000 0x0000000000000000 0x0000000000000000 104 | 0x00000000000006c8 0x00000000000006c8 R 0x1000 105 | LOAD 0x0000000000001000 0x0000000000001000 0x0000000000001000 106 | 0x0000000000000325 0x0000000000000325 R E 0x1000 107 | LOAD 0x0000000000002000 0x0000000000002000 0x0000000000002000 108 | 0x0000000000000288 0x0000000000000288 R 0x1000 109 | LOAD 0x0000000000002da8 0x0000000000003da8 0x0000000000003da8 110 | 0x0000000000000268 0x0000000000000270 RW 0x1000 111 | DYNAMIC 0x0000000000002db8 0x0000000000003db8 0x0000000000003db8 112 | 0x00000000000001f0 0x00000000000001f0 RW 0x8 113 | NOTE 0x0000000000000338 0x0000000000000338 0x0000000000000338 114 | 0x0000000000000030 0x0000000000000030 R 0x8 115 | NOTE 0x0000000000000368 0x0000000000000368 0x0000000000000368 116 | 0x0000000000000044 0x0000000000000044 R 0x4 117 | GNU_PROPERTY 0x0000000000000338 0x0000000000000338 0x0000000000000338 118 | 0x0000000000000030 0x0000000000000030 R 0x8 119 | GNU_EH_FRAME 0x0000000000002038 0x0000000000002038 0x0000000000002038 120 | 0x000000000000007c 0x000000000000007c R 0x4 121 | GNU_STACK 0x0000000000000000 0x0000000000000000 0x0000000000000000 122 | 0x0000000000000000 0x0000000000000000 RW 0x10 123 | GNU_RELRO 0x0000000000002da8 0x0000000000003da8 0x0000000000003da8 124 | 0x0000000000000258 0x0000000000000258 R 0x1 125 | 126 | Section to Segment mapping: 127 | Segment Sections... 128 | 00 129 | 01 .interp 130 | 02 .interp .note.gnu.property .note.gnu.build-id .note.ABI-tag .gnu.hash .dynsym .dynstr .gnu.version .gnu.version_r .rela.dyn .rela.plt 131 | 03 .init .plt .plt.got .plt.sec .text .fini 132 | 04 .rodata .eh_frame_hdr .eh_frame 133 | 05 .init_array .fini_array .dynamic .got .data .bss 134 | 06 .dynamic 135 | 07 .note.gnu.property 136 | 08 .note.gnu.build-id .note.ABI-tag 137 | 09 .note.gnu.property 138 | 10 .eh_frame_hdr 139 | 11 140 | 12 .init_array .fini_array .dynamic .got 141 | 142 | Dynamic section at offset 0x2db8 contains 27 entries: 143 | Tag Type Name/Value 144 | 0x0000000000000001 (NEEDED) Shared library: [libc.so.6] 145 | 0x000000000000000c (INIT) 0x1000 146 | 0x000000000000000d (FINI) 0x1318 147 | 0x0000000000000019 (INIT_ARRAY) 0x3da8 148 | 0x000000000000001b (INIT_ARRAYSZ) 8 (bytes) 149 | 0x000000000000001a (FINI_ARRAY) 0x3db0 150 | 0x000000000000001c (FINI_ARRAYSZ) 8 (bytes) 151 | 0x000000006ffffef5 (GNU_HASH) 0x3b0 152 | 0x0000000000000005 (STRTAB) 0x4b0 153 | 0x0000000000000006 (SYMTAB) 0x3d8 154 | 0x000000000000000a (STRSZ) 184 (bytes) 155 | 0x000000000000000b (SYMENT) 24 (bytes) 156 | 0x0000000000000015 (DEBUG) 0x0 157 | 0x0000000000000003 (PLTGOT) 0x3fa8 158 | 0x0000000000000002 (PLTRELSZ) 72 (bytes) 159 | 0x0000000000000014 (PLTREL) RELA 160 | 0x0000000000000017 (JMPREL) 0x680 161 | 0x0000000000000007 (RELA) 0x5c0 162 | 0x0000000000000008 (RELASZ) 192 (bytes) 163 | 0x0000000000000009 (RELAENT) 24 (bytes) 164 | 0x000000000000001e (FLAGS) BIND_NOW 165 | 0x000000006ffffffb (FLAGS_1) Flags: NOW PIE 166 | 0x000000006ffffffe (VERNEED) 0x580 167 | 0x000000006fffffff (VERNEEDNUM) 1 168 | 0x000000006ffffff0 (VERSYM) 0x568 169 | 0x000000006ffffff9 (RELACOUNT) 3 170 | 0x0000000000000000 (NULL) 0x0 171 | 172 | Relocation section '.rela.dyn' at offset 0x5c0 contains 8 entries: 173 | Offset Info Type Sym. Value Sym. Name + Addend 174 | 000000003da8 000000000008 R_X86_64_RELATIVE 1180 175 | 000000003db0 000000000008 R_X86_64_RELATIVE 1140 176 | 000000004008 000000000008 R_X86_64_RELATIVE 4008 177 | 000000003fd8 000100000006 R_X86_64_GLOB_DAT 0000000000000000 __libc_start_main@GLIBC_2.34 + 0 178 | 000000003fe0 000200000006 R_X86_64_GLOB_DAT 0000000000000000 _ITM_deregisterTM[...] + 0 179 | 000000003fe8 000600000006 R_X86_64_GLOB_DAT 0000000000000000 __gmon_start__ + 0 180 | 000000003ff0 000700000006 R_X86_64_GLOB_DAT 0000000000000000 _ITM_registerTMCl[...] + 0 181 | 000000003ff8 000800000006 R_X86_64_GLOB_DAT 0000000000000000 __cxa_finalize@GLIBC_2.2.5 + 0 182 | 183 | Relocation section '.rela.plt' at offset 0x680 contains 3 entries: 184 | Offset Info Type Sym. Value Sym. Name + Addend 185 | 000000003fc0 000300000007 R_X86_64_JUMP_SLO 0000000000000000 __stack_chk_fail@GLIBC_2.4 + 0 186 | 000000003fc8 000400000007 R_X86_64_JUMP_SLO 0000000000000000 printf@GLIBC_2.2.5 + 0 187 | 000000003fd0 000500000007 R_X86_64_JUMP_SLO 0000000000000000 __assert_fail@GLIBC_2.2.5 + 0 188 | No processor specific unwind information to decode 189 | 190 | Symbol table '.dynsym' contains 9 entries: 191 | Num: Value Size Type Bind Vis Ndx Name 192 | 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND 193 | 1: 0000000000000000 0 FUNC GLOBAL DEFAULT UND _[...]@GLIBC_2.34 (2) 194 | 2: 0000000000000000 0 NOTYPE WEAK DEFAULT UND _ITM_deregisterT[...] 195 | 3: 0000000000000000 0 FUNC GLOBAL DEFAULT UND __[...]@GLIBC_2.4 (3) 196 | 4: 0000000000000000 0 FUNC GLOBAL DEFAULT UND [...]@GLIBC_2.2.5 (4) 197 | 5: 0000000000000000 0 FUNC GLOBAL DEFAULT UND [...]@GLIBC_2.2.5 (4) 198 | 6: 0000000000000000 0 NOTYPE WEAK DEFAULT UND __gmon_start__ 199 | 7: 0000000000000000 0 NOTYPE WEAK DEFAULT UND _ITM_registerTMC[...] 200 | 8: 0000000000000000 0 FUNC WEAK DEFAULT UND [...]@GLIBC_2.2.5 (4) 201 | 202 | Symbol table '.symtab' contains 48 entries: 203 | Num: Value Size Type Bind Vis Ndx Name 204 | 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND 205 | 1: 0000000000000000 0 FILE LOCAL DEFAULT ABS Scrt1.o 206 | 2: 000000000000038c 32 OBJECT LOCAL DEFAULT 4 __abi_tag 207 | 3: 0000000000000000 0 FILE LOCAL DEFAULT ABS crtstuff.c 208 | 4: 00000000000010d0 0 FUNC LOCAL DEFAULT 16 deregister_tm_clones 209 | 5: 0000000000001100 0 FUNC LOCAL DEFAULT 16 register_tm_clones 210 | 6: 0000000000001140 0 FUNC LOCAL DEFAULT 16 __do_global_dtors_aux 211 | 7: 0000000000004010 1 OBJECT LOCAL DEFAULT 26 completed.0 212 | 8: 0000000000003db0 0 OBJECT LOCAL DEFAULT 22 __do_global_dtor[...] 213 | 9: 0000000000001180 0 FUNC LOCAL DEFAULT 16 frame_dummy 214 | 10: 0000000000003da8 0 OBJECT LOCAL DEFAULT 21 __frame_dummy_in[...] 215 | 11: 0000000000000000 0 FILE LOCAL DEFAULT ABS hello.c 216 | 12: 0000000000002031 5 OBJECT LOCAL DEFAULT 18 __PRETTY_FUNCTION__.0 217 | 13: 0000000000000000 0 FILE LOCAL DEFAULT ABS crtstuff.c 218 | 14: 0000000000002284 0 OBJECT LOCAL DEFAULT 20 __FRAME_END__ 219 | 15: 0000000000000000 0 FILE LOCAL DEFAULT ABS 220 | 16: 0000000000003db8 0 OBJECT LOCAL DEFAULT 23 _DYNAMIC 221 | 17: 0000000000002038 0 NOTYPE LOCAL DEFAULT 19 __GNU_EH_FRAME_HDR 222 | 18: 0000000000003fa8 0 OBJECT LOCAL DEFAULT 24 _GLOBAL_OFFSET_TABLE_ 223 | 19: 0000000000000000 0 FUNC GLOBAL DEFAULT UND __libc_start_mai[...] 224 | 20: 0000000000000000 0 NOTYPE WEAK DEFAULT UND _ITM_deregisterT[...] 225 | 21: 0000000000004000 0 NOTYPE WEAK DEFAULT 25 data_start 226 | 22: 000000000000119c 19 FUNC GLOBAL DEFAULT 16 add3_patch 227 | 23: 0000000000004010 0 NOTYPE GLOBAL DEFAULT 25 _edata 228 | 24: 0000000000001318 0 FUNC GLOBAL HIDDEN 17 _fini 229 | 25: 0000000000000000 0 FUNC GLOBAL DEFAULT UND __stack_chk_fail[...] 230 | 26: 00000000000011af 31 FUNC GLOBAL DEFAULT 16 delete_if 231 | 27: 0000000000000000 0 FUNC GLOBAL DEFAULT UND printf@GLIBC_2.2.5 232 | 28: 0000000000000000 0 FUNC GLOBAL DEFAULT UND __assert_fail@GL[...] 233 | 29: 0000000000001225 51 FUNC GLOBAL DEFAULT 16 swap_statement 234 | 30: 0000000000001189 19 FUNC GLOBAL DEFAULT 16 add3 235 | 31: 0000000000004000 0 NOTYPE GLOBAL DEFAULT 25 __data_start 236 | 32: 00000000000011e0 18 FUNC GLOBAL DEFAULT 16 insert_if 237 | 33: 0000000000000000 0 NOTYPE WEAK DEFAULT UND __gmon_start__ 238 | 34: 0000000000004008 0 OBJECT GLOBAL HIDDEN 25 __dso_handle 239 | 35: 00000000000011ce 18 FUNC GLOBAL DEFAULT 16 delete_if_patch 240 | 36: 0000000000002000 4 OBJECT GLOBAL DEFAULT 18 _IO_stdin_used 241 | 37: 0000000000004018 0 NOTYPE GLOBAL DEFAULT 26 _end 242 | 38: 00000000000010a0 38 FUNC GLOBAL DEFAULT 16 _start 243 | 39: 0000000000004010 0 NOTYPE GLOBAL DEFAULT 26 __bss_start 244 | 40: 00000000000012bd 90 FUNC GLOBAL DEFAULT 16 main 245 | 41: 0000000000001258 101 FUNC GLOBAL DEFAULT 16 replace_const 246 | 42: 0000000000001211 20 FUNC GLOBAL DEFAULT 16 delete_assign 247 | 43: 0000000000004010 0 OBJECT GLOBAL HIDDEN 25 __TMC_END__ 248 | 44: 0000000000000000 0 NOTYPE WEAK DEFAULT UND _ITM_registerTMC[...] 249 | 45: 00000000000011f2 31 FUNC GLOBAL DEFAULT 16 change_condition 250 | 46: 0000000000000000 0 FUNC WEAK DEFAULT UND __cxa_finalize@G[...] 251 | 47: 0000000000001000 0 FUNC GLOBAL HIDDEN 12 _init 252 | 253 | Histogram for `.gnu.hash' bucket list length (total of 2 buckets): 254 | Length Number % of total Coverage 255 | 0 1 ( 50.0%) 256 | 1 1 ( 50.0%) 100.0% 257 | 258 | Version symbols section '.gnu.version' contains 9 entries: 259 | Addr: 0x0000000000000568 Offset: 0x000568 Link: 6 (.dynsym) 260 | 000: 0 (*local*) 2 (GLIBC_2.34) 1 (*global*) 3 (GLIBC_2.4) 261 | 004: 4 (GLIBC_2.2.5) 4 (GLIBC_2.2.5) 1 (*global*) 1 (*global*) 262 | 008: 4 (GLIBC_2.2.5) 263 | 264 | Version needs section '.gnu.version_r' contains 1 entry: 265 | Addr: 0x0000000000000580 Offset: 0x000580 Link: 7 (.dynstr) 266 | 000000: Version: 1 File: libc.so.6 Cnt: 3 267 | 0x0010: Name: GLIBC_2.2.5 Flags: none Version: 4 268 | 0x0020: Name: GLIBC_2.4 Flags: none Version: 3 269 | 0x0030: Name: GLIBC_2.34 Flags: none Version: 2 270 | 271 | Displaying notes found in: .note.gnu.property 272 | Owner Data size Description 273 | GNU 0x00000020 NT_GNU_PROPERTY_TYPE_0 274 | Properties: x86 feature: IBT, SHSTK 275 | x86 ISA needed: x86-64-baseline 276 | 277 | Displaying notes found in: .note.gnu.build-id 278 | Owner Data size Description 279 | GNU 0x00000014 NT_GNU_BUILD_ID (unique build ID bitstring) 280 | Build ID: 09254fd66609f59b6aabd9b3236c84d89de86f2a 281 | 282 | Displaying notes found in: .note.ABI-tag 283 | Owner Data size Description 284 | GNU 0x00000010 NT_GNU_ABI_TAG (ABI version tag) 285 | OS: Linux, ABI: 3.2.0 286 | -------------------------------------------------------------------------------- /examples/patch_challenge/a.out.s: -------------------------------------------------------------------------------- 1 | 2 | a.out: file format elf64-x86-64 3 | 4 | 5 | Disassembly of section .init: 6 | 7 | 0000000000001000 <_init>: 8 | 1000: f3 0f 1e fa endbr64 9 | 1004: 48 83 ec 08 sub $0x8,%rsp 10 | 1008: 48 8b 05 d9 2f 00 00 mov 0x2fd9(%rip),%rax # 3fe8 <__gmon_start__@Base> 11 | 100f: 48 85 c0 test %rax,%rax 12 | 1012: 74 02 je 1016 <_init+0x16> 13 | 1014: ff d0 call *%rax 14 | 1016: 48 83 c4 08 add $0x8,%rsp 15 | 101a: c3 ret 16 | 17 | Disassembly of section .plt: 18 | 19 | 0000000000001020 <.plt>: 20 | 1020: ff 35 8a 2f 00 00 push 0x2f8a(%rip) # 3fb0 <_GLOBAL_OFFSET_TABLE_+0x8> 21 | 1026: f2 ff 25 8b 2f 00 00 bnd jmp *0x2f8b(%rip) # 3fb8 <_GLOBAL_OFFSET_TABLE_+0x10> 22 | 102d: 0f 1f 00 nopl (%rax) 23 | 1030: f3 0f 1e fa endbr64 24 | 1034: 68 00 00 00 00 push $0x0 25 | 1039: f2 e9 e1 ff ff ff bnd jmp 1020 <_init+0x20> 26 | 103f: 90 nop 27 | 1040: f3 0f 1e fa endbr64 28 | 1044: 68 01 00 00 00 push $0x1 29 | 1049: f2 e9 d1 ff ff ff bnd jmp 1020 <_init+0x20> 30 | 104f: 90 nop 31 | 1050: f3 0f 1e fa endbr64 32 | 1054: 68 02 00 00 00 push $0x2 33 | 1059: f2 e9 c1 ff ff ff bnd jmp 1020 <_init+0x20> 34 | 105f: 90 nop 35 | 36 | Disassembly of section .plt.got: 37 | 38 | 0000000000001060 <__cxa_finalize@plt>: 39 | 1060: f3 0f 1e fa endbr64 40 | 1064: f2 ff 25 8d 2f 00 00 bnd jmp *0x2f8d(%rip) # 3ff8 <__cxa_finalize@GLIBC_2.2.5> 41 | 106b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 42 | 43 | Disassembly of section .plt.sec: 44 | 45 | 0000000000001070 <__stack_chk_fail@plt>: 46 | 1070: f3 0f 1e fa endbr64 47 | 1074: f2 ff 25 45 2f 00 00 bnd jmp *0x2f45(%rip) # 3fc0 <__stack_chk_fail@GLIBC_2.4> 48 | 107b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 49 | 50 | 0000000000001080 : 51 | 1080: f3 0f 1e fa endbr64 52 | 1084: f2 ff 25 3d 2f 00 00 bnd jmp *0x2f3d(%rip) # 3fc8 53 | 108b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 54 | 55 | 0000000000001090 <__assert_fail@plt>: 56 | 1090: f3 0f 1e fa endbr64 57 | 1094: f2 ff 25 35 2f 00 00 bnd jmp *0x2f35(%rip) # 3fd0 <__assert_fail@GLIBC_2.2.5> 58 | 109b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 59 | 60 | Disassembly of section .text: 61 | 62 | 00000000000010a0 <_start>: 63 | 10a0: f3 0f 1e fa endbr64 64 | 10a4: 31 ed xor %ebp,%ebp 65 | 10a6: 49 89 d1 mov %rdx,%r9 66 | 10a9: 5e pop %rsi 67 | 10aa: 48 89 e2 mov %rsp,%rdx 68 | 10ad: 48 83 e4 f0 and $0xfffffffffffffff0,%rsp 69 | 10b1: 50 push %rax 70 | 10b2: 54 push %rsp 71 | 10b3: 45 31 c0 xor %r8d,%r8d 72 | 10b6: 31 c9 xor %ecx,%ecx 73 | 10b8: 48 8d 3d fe 01 00 00 lea 0x1fe(%rip),%rdi # 12bd
74 | 10bf: ff 15 13 2f 00 00 call *0x2f13(%rip) # 3fd8 <__libc_start_main@GLIBC_2.34> 75 | 10c5: f4 hlt 76 | 10c6: 66 2e 0f 1f 84 00 00 cs nopw 0x0(%rax,%rax,1) 77 | 10cd: 00 00 00 78 | 79 | 00000000000010d0 : 80 | 10d0: 48 8d 3d 39 2f 00 00 lea 0x2f39(%rip),%rdi # 4010 <__TMC_END__> 81 | 10d7: 48 8d 05 32 2f 00 00 lea 0x2f32(%rip),%rax # 4010 <__TMC_END__> 82 | 10de: 48 39 f8 cmp %rdi,%rax 83 | 10e1: 74 15 je 10f8 84 | 10e3: 48 8b 05 f6 2e 00 00 mov 0x2ef6(%rip),%rax # 3fe0 <_ITM_deregisterTMCloneTable@Base> 85 | 10ea: 48 85 c0 test %rax,%rax 86 | 10ed: 74 09 je 10f8 87 | 10ef: ff e0 jmp *%rax 88 | 10f1: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 89 | 10f8: c3 ret 90 | 10f9: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 91 | 92 | 0000000000001100 : 93 | 1100: 48 8d 3d 09 2f 00 00 lea 0x2f09(%rip),%rdi # 4010 <__TMC_END__> 94 | 1107: 48 8d 35 02 2f 00 00 lea 0x2f02(%rip),%rsi # 4010 <__TMC_END__> 95 | 110e: 48 29 fe sub %rdi,%rsi 96 | 1111: 48 89 f0 mov %rsi,%rax 97 | 1114: 48 c1 ee 3f shr $0x3f,%rsi 98 | 1118: 48 c1 f8 03 sar $0x3,%rax 99 | 111c: 48 01 c6 add %rax,%rsi 100 | 111f: 48 d1 fe sar %rsi 101 | 1122: 74 14 je 1138 102 | 1124: 48 8b 05 c5 2e 00 00 mov 0x2ec5(%rip),%rax # 3ff0 <_ITM_registerTMCloneTable@Base> 103 | 112b: 48 85 c0 test %rax,%rax 104 | 112e: 74 08 je 1138 105 | 1130: ff e0 jmp *%rax 106 | 1132: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) 107 | 1138: c3 ret 108 | 1139: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 109 | 110 | 0000000000001140 <__do_global_dtors_aux>: 111 | 1140: f3 0f 1e fa endbr64 112 | 1144: 80 3d c5 2e 00 00 00 cmpb $0x0,0x2ec5(%rip) # 4010 <__TMC_END__> 113 | 114b: 75 2b jne 1178 <__do_global_dtors_aux+0x38> 114 | 114d: 55 push %rbp 115 | 114e: 48 83 3d a2 2e 00 00 cmpq $0x0,0x2ea2(%rip) # 3ff8 <__cxa_finalize@GLIBC_2.2.5> 116 | 1155: 00 117 | 1156: 48 89 e5 mov %rsp,%rbp 118 | 1159: 74 0c je 1167 <__do_global_dtors_aux+0x27> 119 | 115b: 48 8b 3d a6 2e 00 00 mov 0x2ea6(%rip),%rdi # 4008 <__dso_handle> 120 | 1162: e8 f9 fe ff ff call 1060 <__cxa_finalize@plt> 121 | 1167: e8 64 ff ff ff call 10d0 122 | 116c: c6 05 9d 2e 00 00 01 movb $0x1,0x2e9d(%rip) # 4010 <__TMC_END__> 123 | 1173: 5d pop %rbp 124 | 1174: c3 ret 125 | 1175: 0f 1f 00 nopl (%rax) 126 | 1178: c3 ret 127 | 1179: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 128 | 129 | 0000000000001180 : 130 | 1180: f3 0f 1e fa endbr64 131 | 1184: e9 77 ff ff ff jmp 1100 132 | 133 | 0000000000001189 : 134 | 1189: f3 0f 1e fa endbr64 135 | 118d: 55 push %rbp 136 | 118e: 48 89 e5 mov %rsp,%rbp 137 | 1191: 89 7d fc mov %edi,-0x4(%rbp) 138 | 1194: 8b 45 fc mov -0x4(%rbp),%eax 139 | 1197: 83 c0 03 add $0x3,%eax 140 | 119a: 5d pop %rbp 141 | 119b: c3 ret 142 | 143 | 000000000000119c : 144 | 119c: f3 0f 1e fa endbr64 145 | 11a0: 55 push %rbp 146 | 11a1: 48 89 e5 mov %rsp,%rbp 147 | 11a4: 89 7d fc mov %edi,-0x4(%rbp) 148 | 11a7: 8b 45 fc mov -0x4(%rbp),%eax 149 | 11aa: 83 c0 05 add $0x5,%eax 150 | 11ad: 5d pop %rbp 151 | 11ae: c3 ret 152 | 153 | 00000000000011af : 154 | 11af: f3 0f 1e fa endbr64 155 | 11b3: 55 push %rbp 156 | 11b4: 48 89 e5 mov %rsp,%rbp 157 | 11b7: 89 7d fc mov %edi,-0x4(%rbp) 158 | 11ba: 83 7d fc 00 cmpl $0x0,-0x4(%rbp) 159 | 11be: 75 07 jne 11c7 160 | 11c0: b8 00 00 00 00 mov $0x0,%eax 161 | 11c5: eb 05 jmp 11cc 162 | 11c7: b8 01 00 00 00 mov $0x1,%eax 163 | 11cc: 5d pop %rbp 164 | 11cd: c3 ret 165 | 166 | 00000000000011ce : 167 | 11ce: f3 0f 1e fa endbr64 168 | 11d2: 55 push %rbp 169 | 11d3: 48 89 e5 mov %rsp,%rbp 170 | 11d6: 89 7d fc mov %edi,-0x4(%rbp) 171 | 11d9: b8 01 00 00 00 mov $0x1,%eax 172 | 11de: 5d pop %rbp 173 | 11df: c3 ret 174 | 175 | 00000000000011e0 : 176 | 11e0: f3 0f 1e fa endbr64 177 | 11e4: 55 push %rbp 178 | 11e5: 48 89 e5 mov %rsp,%rbp 179 | 11e8: 89 7d fc mov %edi,-0x4(%rbp) 180 | 11eb: b8 01 00 00 00 mov $0x1,%eax 181 | 11f0: 5d pop %rbp 182 | 11f1: c3 ret 183 | 184 | 00000000000011f2 : 185 | 11f2: f3 0f 1e fa endbr64 186 | 11f6: 55 push %rbp 187 | 11f7: 48 89 e5 mov %rsp,%rbp 188 | 11fa: 89 7d fc mov %edi,-0x4(%rbp) 189 | 11fd: 83 7d fc 00 cmpl $0x0,-0x4(%rbp) 190 | 1201: 78 07 js 120a 191 | 1203: b8 00 00 00 00 mov $0x0,%eax 192 | 1208: eb 05 jmp 120f 193 | 120a: b8 01 00 00 00 mov $0x1,%eax 194 | 120f: 5d pop %rbp 195 | 1210: c3 ret 196 | 197 | 0000000000001211 : 198 | 1211: f3 0f 1e fa endbr64 199 | 1215: 55 push %rbp 200 | 1216: 48 89 e5 mov %rsp,%rbp 201 | 1219: 89 7d fc mov %edi,-0x4(%rbp) 202 | 121c: 83 45 fc 07 addl $0x7,-0x4(%rbp) 203 | 1220: 8b 45 fc mov -0x4(%rbp),%eax 204 | 1223: 5d pop %rbp 205 | 1224: c3 ret 206 | 207 | 0000000000001225 : 208 | 1225: f3 0f 1e fa endbr64 209 | 1229: 55 push %rbp 210 | 122a: 48 89 e5 mov %rsp,%rbp 211 | 122d: 48 8d 05 d0 0d 00 00 lea 0xdd0(%rip),%rax # 2004 <_IO_stdin_used+0x4> 212 | 1234: 48 89 c7 mov %rax,%rdi 213 | 1237: b8 00 00 00 00 mov $0x0,%eax 214 | 123c: e8 3f fe ff ff call 1080 215 | 1241: 48 8d 05 c2 0d 00 00 lea 0xdc2(%rip),%rax # 200a <_IO_stdin_used+0xa> 216 | 1248: 48 89 c7 mov %rax,%rdi 217 | 124b: b8 00 00 00 00 mov $0x0,%eax 218 | 1250: e8 2b fe ff ff call 1080 219 | 1255: 90 nop 220 | 1256: 5d pop %rbp 221 | 1257: c3 ret 222 | 223 | 0000000000001258 : 224 | 1258: f3 0f 1e fa endbr64 225 | 125c: 55 push %rbp 226 | 125d: 48 89 e5 mov %rsp,%rbp 227 | 1260: 48 83 ec 50 sub $0x50,%rsp 228 | 1264: 89 7d bc mov %edi,-0x44(%rbp) 229 | 1267: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax 230 | 126e: 00 00 231 | 1270: 48 89 45 f8 mov %rax,-0x8(%rbp) 232 | 1274: 31 c0 xor %eax,%eax 233 | 1276: 8b 45 bc mov -0x44(%rbp),%eax 234 | 1279: 89 45 d0 mov %eax,-0x30(%rbp) 235 | 127c: c7 45 cc 00 00 00 00 movl $0x0,-0x34(%rbp) 236 | 1283: eb 19 jmp 129e 237 | 1285: 8b 45 cc mov -0x34(%rbp),%eax 238 | 1288: 83 e8 01 sub $0x1,%eax 239 | 128b: 48 98 cltq 240 | 128d: 8b 54 85 d0 mov -0x30(%rbp,%rax,4),%edx 241 | 1291: 8b 45 cc mov -0x34(%rbp),%eax 242 | 1294: 48 98 cltq 243 | 1296: 89 54 85 d0 mov %edx,-0x30(%rbp,%rax,4) 244 | 129a: 83 45 cc 01 addl $0x1,-0x34(%rbp) 245 | 129e: 83 7d cc 09 cmpl $0x9,-0x34(%rbp) 246 | 12a2: 7e e1 jle 1285 247 | 12a4: 8b 45 f4 mov -0xc(%rbp),%eax 248 | 12a7: 48 8b 55 f8 mov -0x8(%rbp),%rdx 249 | 12ab: 64 48 2b 14 25 28 00 sub %fs:0x28,%rdx 250 | 12b2: 00 00 251 | 12b4: 74 05 je 12bb 252 | 12b6: e8 b5 fd ff ff call 1070 <__stack_chk_fail@plt> 253 | 12bb: c9 leave 254 | 12bc: c3 ret 255 | 256 | 00000000000012bd
: 257 | 12bd: f3 0f 1e fa endbr64 258 | 12c1: 55 push %rbp 259 | 12c2: 48 89 e5 mov %rsp,%rbp 260 | 12c5: 53 push %rbx 261 | 12c6: 48 83 ec 08 sub $0x8,%rsp 262 | 12ca: bf 03 00 00 00 mov $0x3,%edi 263 | 12cf: e8 b5 fe ff ff call 1189 264 | 12d4: 89 c3 mov %eax,%ebx 265 | 12d6: bf 03 00 00 00 mov $0x3,%edi 266 | 12db: e8 bc fe ff ff call 119c 267 | 12e0: 39 c3 cmp %eax,%ebx 268 | 12e2: 74 28 je 130c 269 | 12e4: 48 8d 05 46 0d 00 00 lea 0xd46(%rip),%rax # 2031 <__PRETTY_FUNCTION__.0> 270 | 12eb: 48 89 c1 mov %rax,%rcx 271 | 12ee: ba 55 00 00 00 mov $0x55,%edx 272 | 12f3: 48 8d 05 16 0d 00 00 lea 0xd16(%rip),%rax # 2010 <_IO_stdin_used+0x10> 273 | 12fa: 48 89 c6 mov %rax,%rsi 274 | 12fd: 48 8d 05 14 0d 00 00 lea 0xd14(%rip),%rax # 2018 <_IO_stdin_used+0x18> 275 | 1304: 48 89 c7 mov %rax,%rdi 276 | 1307: e8 84 fd ff ff call 1090 <__assert_fail@plt> 277 | 130c: b8 00 00 00 00 mov $0x0,%eax 278 | 1311: 48 8b 5d f8 mov -0x8(%rbp),%rbx 279 | 1315: c9 leave 280 | 1316: c3 ret 281 | 282 | Disassembly of section .fini: 283 | 284 | 0000000000001318 <_fini>: 285 | 1318: f3 0f 1e fa endbr64 286 | 131c: 48 83 ec 08 sub $0x8,%rsp 287 | 1320: 48 83 c4 08 add $0x8,%rsp 288 | 1324: c3 ret 289 | -------------------------------------------------------------------------------- /examples/patch_challenge/a_patched.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philzook58/pcode2c/a88f732f2522ea198a74ff3287fa61c7380ed9e4/examples/patch_challenge/a_patched.out -------------------------------------------------------------------------------- /examples/patch_challenge/a_patched.out.s: -------------------------------------------------------------------------------- 1 | 2 | a_patched.out: file format elf64-x86-64 3 | 4 | 5 | Disassembly of section .init: 6 | 7 | 0000000000001000 <_init>: 8 | 1000: f3 0f 1e fa endbr64 9 | 1004: 48 83 ec 08 sub $0x8,%rsp 10 | 1008: 48 8b 05 d9 2f 00 00 mov 0x2fd9(%rip),%rax # 3fe8 <__gmon_start__@Base> 11 | 100f: 48 85 c0 test %rax,%rax 12 | 1012: 74 02 je 1016 <_init+0x16> 13 | 1014: ff d0 call *%rax 14 | 1016: 48 83 c4 08 add $0x8,%rsp 15 | 101a: c3 ret 16 | 17 | Disassembly of section .plt: 18 | 19 | 0000000000001020 <.plt>: 20 | 1020: ff 35 8a 2f 00 00 push 0x2f8a(%rip) # 3fb0 <_GLOBAL_OFFSET_TABLE_+0x8> 21 | 1026: f2 ff 25 8b 2f 00 00 bnd jmp *0x2f8b(%rip) # 3fb8 <_GLOBAL_OFFSET_TABLE_+0x10> 22 | 102d: 0f 1f 00 nopl (%rax) 23 | 1030: f3 0f 1e fa endbr64 24 | 1034: 68 00 00 00 00 push $0x0 25 | 1039: f2 e9 e1 ff ff ff bnd jmp 1020 <_init+0x20> 26 | 103f: 90 nop 27 | 1040: f3 0f 1e fa endbr64 28 | 1044: 68 01 00 00 00 push $0x1 29 | 1049: f2 e9 d1 ff ff ff bnd jmp 1020 <_init+0x20> 30 | 104f: 90 nop 31 | 1050: f3 0f 1e fa endbr64 32 | 1054: 68 02 00 00 00 push $0x2 33 | 1059: f2 e9 c1 ff ff ff bnd jmp 1020 <_init+0x20> 34 | 105f: 90 nop 35 | 36 | Disassembly of section .plt.got: 37 | 38 | 0000000000001060 <__cxa_finalize@plt>: 39 | 1060: f3 0f 1e fa endbr64 40 | 1064: f2 ff 25 8d 2f 00 00 bnd jmp *0x2f8d(%rip) # 3ff8 <__cxa_finalize@GLIBC_2.2.5> 41 | 106b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 42 | 43 | Disassembly of section .plt.sec: 44 | 45 | 0000000000001070 <__stack_chk_fail@plt>: 46 | 1070: f3 0f 1e fa endbr64 47 | 1074: f2 ff 25 45 2f 00 00 bnd jmp *0x2f45(%rip) # 3fc0 <__stack_chk_fail@GLIBC_2.4> 48 | 107b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 49 | 50 | 0000000000001080 : 51 | 1080: f3 0f 1e fa endbr64 52 | 1084: f2 ff 25 3d 2f 00 00 bnd jmp *0x2f3d(%rip) # 3fc8 53 | 108b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 54 | 55 | 0000000000001090 <__assert_fail@plt>: 56 | 1090: f3 0f 1e fa endbr64 57 | 1094: f2 ff 25 35 2f 00 00 bnd jmp *0x2f35(%rip) # 3fd0 <__assert_fail@GLIBC_2.2.5> 58 | 109b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 59 | 60 | Disassembly of section .text: 61 | 62 | 00000000000010a0 <_start>: 63 | 10a0: f3 0f 1e fa endbr64 64 | 10a4: 31 ed xor %ebp,%ebp 65 | 10a6: 49 89 d1 mov %rdx,%r9 66 | 10a9: 5e pop %rsi 67 | 10aa: 48 89 e2 mov %rsp,%rdx 68 | 10ad: 48 83 e4 f0 and $0xfffffffffffffff0,%rsp 69 | 10b1: 50 push %rax 70 | 10b2: 54 push %rsp 71 | 10b3: 45 31 c0 xor %r8d,%r8d 72 | 10b6: 31 c9 xor %ecx,%ecx 73 | 10b8: 48 8d 3d fe 01 00 00 lea 0x1fe(%rip),%rdi # 12bd
74 | 10bf: ff 15 13 2f 00 00 call *0x2f13(%rip) # 3fd8 <__libc_start_main@GLIBC_2.34> 75 | 10c5: f4 hlt 76 | 10c6: 66 2e 0f 1f 84 00 00 cs nopw 0x0(%rax,%rax,1) 77 | 10cd: 00 00 00 78 | 79 | 00000000000010d0 : 80 | 10d0: 48 8d 3d 39 2f 00 00 lea 0x2f39(%rip),%rdi # 4010 <__TMC_END__> 81 | 10d7: 48 8d 05 32 2f 00 00 lea 0x2f32(%rip),%rax # 4010 <__TMC_END__> 82 | 10de: 48 39 f8 cmp %rdi,%rax 83 | 10e1: 74 15 je 10f8 84 | 10e3: 48 8b 05 f6 2e 00 00 mov 0x2ef6(%rip),%rax # 3fe0 <_ITM_deregisterTMCloneTable@Base> 85 | 10ea: 48 85 c0 test %rax,%rax 86 | 10ed: 74 09 je 10f8 87 | 10ef: ff e0 jmp *%rax 88 | 10f1: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 89 | 10f8: c3 ret 90 | 10f9: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 91 | 92 | 0000000000001100 : 93 | 1100: 48 8d 3d 09 2f 00 00 lea 0x2f09(%rip),%rdi # 4010 <__TMC_END__> 94 | 1107: 48 8d 35 02 2f 00 00 lea 0x2f02(%rip),%rsi # 4010 <__TMC_END__> 95 | 110e: 48 29 fe sub %rdi,%rsi 96 | 1111: 48 89 f0 mov %rsi,%rax 97 | 1114: 48 c1 ee 3f shr $0x3f,%rsi 98 | 1118: 48 c1 f8 03 sar $0x3,%rax 99 | 111c: 48 01 c6 add %rax,%rsi 100 | 111f: 48 d1 fe sar %rsi 101 | 1122: 74 14 je 1138 102 | 1124: 48 8b 05 c5 2e 00 00 mov 0x2ec5(%rip),%rax # 3ff0 <_ITM_registerTMCloneTable@Base> 103 | 112b: 48 85 c0 test %rax,%rax 104 | 112e: 74 08 je 1138 105 | 1130: ff e0 jmp *%rax 106 | 1132: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) 107 | 1138: c3 ret 108 | 1139: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 109 | 110 | 0000000000001140 <__do_global_dtors_aux>: 111 | 1140: f3 0f 1e fa endbr64 112 | 1144: 80 3d c5 2e 00 00 00 cmpb $0x0,0x2ec5(%rip) # 4010 <__TMC_END__> 113 | 114b: 75 2b jne 1178 <__do_global_dtors_aux+0x38> 114 | 114d: 55 push %rbp 115 | 114e: 48 83 3d a2 2e 00 00 cmpq $0x0,0x2ea2(%rip) # 3ff8 <__cxa_finalize@GLIBC_2.2.5> 116 | 1155: 00 117 | 1156: 48 89 e5 mov %rsp,%rbp 118 | 1159: 74 0c je 1167 <__do_global_dtors_aux+0x27> 119 | 115b: 48 8b 3d a6 2e 00 00 mov 0x2ea6(%rip),%rdi # 4008 <__dso_handle> 120 | 1162: e8 f9 fe ff ff call 1060 <__cxa_finalize@plt> 121 | 1167: e8 64 ff ff ff call 10d0 122 | 116c: c6 05 9d 2e 00 00 01 movb $0x1,0x2e9d(%rip) # 4010 <__TMC_END__> 123 | 1173: 5d pop %rbp 124 | 1174: c3 ret 125 | 1175: 0f 1f 00 nopl (%rax) 126 | 1178: c3 ret 127 | 1179: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 128 | 129 | 0000000000001180 : 130 | 1180: f3 0f 1e fa endbr64 131 | 1184: e9 77 ff ff ff jmp 1100 132 | 133 | 0000000000001189 : 134 | 1189: f3 0f 1e fa endbr64 135 | 118d: 55 push %rbp 136 | 118e: 48 89 e5 mov %rsp,%rbp 137 | 1191: 89 7d fc mov %edi,-0x4(%rbp) 138 | 1194: 8b 45 fc mov -0x4(%rbp),%eax 139 | 1197: 83 c0 05 add $0x5,%eax 140 | 119a: 5d pop %rbp 141 | 119b: c3 ret 142 | 143 | 000000000000119c : 144 | 119c: f3 0f 1e fa endbr64 145 | 11a0: 55 push %rbp 146 | 11a1: 48 89 e5 mov %rsp,%rbp 147 | 11a4: 89 7d fc mov %edi,-0x4(%rbp) 148 | 11a7: 8b 45 fc mov -0x4(%rbp),%eax 149 | 11aa: 83 c0 05 add $0x5,%eax 150 | 11ad: 5d pop %rbp 151 | 11ae: c3 ret 152 | 153 | 00000000000011af : 154 | 11af: f3 0f 1e fa endbr64 155 | 11b3: 55 push %rbp 156 | 11b4: 48 89 e5 mov %rsp,%rbp 157 | 11b7: 89 7d fc mov %edi,-0x4(%rbp) 158 | 11ba: 83 7d fc 00 cmpl $0x0,-0x4(%rbp) 159 | 11be: 75 07 jne 11c7 160 | 11c0: b8 00 00 00 00 mov $0x0,%eax 161 | 11c5: eb 05 jmp 11cc 162 | 11c7: b8 01 00 00 00 mov $0x1,%eax 163 | 11cc: 5d pop %rbp 164 | 11cd: c3 ret 165 | 166 | 00000000000011ce : 167 | 11ce: f3 0f 1e fa endbr64 168 | 11d2: 55 push %rbp 169 | 11d3: 48 89 e5 mov %rsp,%rbp 170 | 11d6: 89 7d fc mov %edi,-0x4(%rbp) 171 | 11d9: b8 01 00 00 00 mov $0x1,%eax 172 | 11de: 5d pop %rbp 173 | 11df: c3 ret 174 | 175 | 00000000000011e0 : 176 | 11e0: f3 0f 1e fa endbr64 177 | 11e4: 55 push %rbp 178 | 11e5: 48 89 e5 mov %rsp,%rbp 179 | 11e8: 89 7d fc mov %edi,-0x4(%rbp) 180 | 11eb: b8 01 00 00 00 mov $0x1,%eax 181 | 11f0: 5d pop %rbp 182 | 11f1: c3 ret 183 | 184 | 00000000000011f2 : 185 | 11f2: f3 0f 1e fa endbr64 186 | 11f6: 55 push %rbp 187 | 11f7: 48 89 e5 mov %rsp,%rbp 188 | 11fa: 89 7d fc mov %edi,-0x4(%rbp) 189 | 11fd: 83 7d fc 00 cmpl $0x0,-0x4(%rbp) 190 | 1201: 78 07 js 120a 191 | 1203: b8 00 00 00 00 mov $0x0,%eax 192 | 1208: eb 05 jmp 120f 193 | 120a: b8 01 00 00 00 mov $0x1,%eax 194 | 120f: 5d pop %rbp 195 | 1210: c3 ret 196 | 197 | 0000000000001211 : 198 | 1211: f3 0f 1e fa endbr64 199 | 1215: 55 push %rbp 200 | 1216: 48 89 e5 mov %rsp,%rbp 201 | 1219: 89 7d fc mov %edi,-0x4(%rbp) 202 | 121c: 83 45 fc 07 addl $0x7,-0x4(%rbp) 203 | 1220: 8b 45 fc mov -0x4(%rbp),%eax 204 | 1223: 5d pop %rbp 205 | 1224: c3 ret 206 | 207 | 0000000000001225 : 208 | 1225: f3 0f 1e fa endbr64 209 | 1229: 55 push %rbp 210 | 122a: 48 89 e5 mov %rsp,%rbp 211 | 122d: 48 8d 05 d0 0d 00 00 lea 0xdd0(%rip),%rax # 2004 <_IO_stdin_used+0x4> 212 | 1234: 48 89 c7 mov %rax,%rdi 213 | 1237: b8 00 00 00 00 mov $0x0,%eax 214 | 123c: e8 3f fe ff ff call 1080 215 | 1241: 48 8d 05 c2 0d 00 00 lea 0xdc2(%rip),%rax # 200a <_IO_stdin_used+0xa> 216 | 1248: 48 89 c7 mov %rax,%rdi 217 | 124b: b8 00 00 00 00 mov $0x0,%eax 218 | 1250: e8 2b fe ff ff call 1080 219 | 1255: 90 nop 220 | 1256: 5d pop %rbp 221 | 1257: c3 ret 222 | 223 | 0000000000001258 : 224 | 1258: f3 0f 1e fa endbr64 225 | 125c: 55 push %rbp 226 | 125d: 48 89 e5 mov %rsp,%rbp 227 | 1260: 48 83 ec 50 sub $0x50,%rsp 228 | 1264: 89 7d bc mov %edi,-0x44(%rbp) 229 | 1267: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax 230 | 126e: 00 00 231 | 1270: 48 89 45 f8 mov %rax,-0x8(%rbp) 232 | 1274: 31 c0 xor %eax,%eax 233 | 1276: 8b 45 bc mov -0x44(%rbp),%eax 234 | 1279: 89 45 d0 mov %eax,-0x30(%rbp) 235 | 127c: c7 45 cc 00 00 00 00 movl $0x0,-0x34(%rbp) 236 | 1283: eb 19 jmp 129e 237 | 1285: 8b 45 cc mov -0x34(%rbp),%eax 238 | 1288: 83 e8 01 sub $0x1,%eax 239 | 128b: 48 98 cltq 240 | 128d: 8b 54 85 d0 mov -0x30(%rbp,%rax,4),%edx 241 | 1291: 8b 45 cc mov -0x34(%rbp),%eax 242 | 1294: 48 98 cltq 243 | 1296: 89 54 85 d0 mov %edx,-0x30(%rbp,%rax,4) 244 | 129a: 83 45 cc 01 addl $0x1,-0x34(%rbp) 245 | 129e: 83 7d cc 09 cmpl $0x9,-0x34(%rbp) 246 | 12a2: 7e e1 jle 1285 247 | 12a4: 8b 45 f4 mov -0xc(%rbp),%eax 248 | 12a7: 48 8b 55 f8 mov -0x8(%rbp),%rdx 249 | 12ab: 64 48 2b 14 25 28 00 sub %fs:0x28,%rdx 250 | 12b2: 00 00 251 | 12b4: 74 05 je 12bb 252 | 12b6: e8 b5 fd ff ff call 1070 <__stack_chk_fail@plt> 253 | 12bb: c9 leave 254 | 12bc: c3 ret 255 | 256 | 00000000000012bd
: 257 | 12bd: f3 0f 1e fa endbr64 258 | 12c1: 55 push %rbp 259 | 12c2: 48 89 e5 mov %rsp,%rbp 260 | 12c5: 53 push %rbx 261 | 12c6: 48 83 ec 08 sub $0x8,%rsp 262 | 12ca: bf 03 00 00 00 mov $0x3,%edi 263 | 12cf: e8 b5 fe ff ff call 1189 264 | 12d4: 89 c3 mov %eax,%ebx 265 | 12d6: bf 03 00 00 00 mov $0x3,%edi 266 | 12db: e8 bc fe ff ff call 119c 267 | 12e0: 39 c3 cmp %eax,%ebx 268 | 12e2: 74 28 je 130c 269 | 12e4: 48 8d 05 46 0d 00 00 lea 0xd46(%rip),%rax # 2031 <__PRETTY_FUNCTION__.0> 270 | 12eb: 48 89 c1 mov %rax,%rcx 271 | 12ee: ba 55 00 00 00 mov $0x55,%edx 272 | 12f3: 48 8d 05 16 0d 00 00 lea 0xd16(%rip),%rax # 2010 <_IO_stdin_used+0x10> 273 | 12fa: 48 89 c6 mov %rax,%rsi 274 | 12fd: 48 8d 05 14 0d 00 00 lea 0xd14(%rip),%rax # 2018 <_IO_stdin_used+0x18> 275 | 1304: 48 89 c7 mov %rax,%rdi 276 | 1307: e8 84 fd ff ff call 1090 <__assert_fail@plt> 277 | 130c: b8 00 00 00 00 mov $0x0,%eax 278 | 1311: 48 8b 5d f8 mov -0x8(%rbp),%rbx 279 | 1315: c9 leave 280 | 1316: c3 ret 281 | 282 | Disassembly of section .fini: 283 | 284 | 0000000000001318 <_fini>: 285 | 1318: f3 0f 1e fa endbr64 286 | 131c: 48 83 ec 08 sub $0x8,%rsp 287 | 1320: 48 83 c4 08 add $0x8,%rsp 288 | 1324: c3 ret 289 | -------------------------------------------------------------------------------- /examples/patch_challenge/hello.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | int add3(int x) 6 | { 7 | return x + 3; 8 | } 9 | 10 | int add3_patch(int x) 11 | { 12 | return x + 5; 13 | } 14 | 15 | // https://codeflaws.github.io/ 16 | 17 | int delete_if(int x) 18 | { 19 | if (x == 0) 20 | { 21 | return 0; 22 | } 23 | return 1; 24 | } 25 | int delete_if_patch(int x) 26 | { 27 | if (false) 28 | { 29 | return 0; 30 | } 31 | return 1; 32 | } 33 | int insert_if(int x) 34 | { 35 | return 1; 36 | } 37 | int change_condition(int x) 38 | { 39 | if (x >= 0) 40 | { 41 | return 0; 42 | } 43 | return 1; 44 | } 45 | 46 | int delete_assign(int x) 47 | { 48 | x = x + 7; 49 | return x; 50 | } 51 | 52 | int swap_statement() 53 | { 54 | printf("world"); 55 | printf("hello"); 56 | } 57 | 58 | int replace_const(int x) 59 | { 60 | int buff[10]; 61 | buff[0] = x; 62 | for (int i = 0; i < 10; i++) 63 | { 64 | buff[i] = buff[i - 1]; 65 | } 66 | return buff[9]; 67 | } 68 | /* 69 | struct queue 70 | { 71 | int buff[10]; 72 | int head; 73 | int tail; 74 | }; 75 | 76 | int enqueue(queue *buff) 77 | { 78 | int buff[10]; 79 | 80 | return x + 5; 81 | } 82 | */ 83 | int main() 84 | { 85 | assert(add3(3) == add3_patch(3)); 86 | // assert() 87 | } -------------------------------------------------------------------------------- /examples/patch_challenge/patch.py: -------------------------------------------------------------------------------- 1 | import patcherex2 2 | 3 | 4 | proj = patcherex2.Patcherex("a.out") 5 | 6 | # 1197: 83 c0 03 add $0x3,%eax 7 | addr = 0x1197 8 | code = "add eax, 0x5" 9 | my_patch = patcherex2.ModifyInstructionPatch(addr, code) 10 | proj.patches.append(my_patch) 11 | proj.apply_patches() 12 | proj.binfmt_tool.save_binary("a_patched.out") 13 | 14 | 15 | code = "x = x + 3;" 16 | -------------------------------------------------------------------------------- /examples/patch_challenge/pcode_delete_if.c: -------------------------------------------------------------------------------- 1 | /* AUTOGENERATED. DO NOT EDIT. */ 2 | #include "_pcode.h" 3 | void pcode2c(CPUState *state, size_t breakpoint) 4 | { 5 | uint8_t *register_space = state->reg; 6 | uint8_t *unique_space = state->unique; 7 | uint8_t *ram_space = state->ram; 8 | uint8_t *ram = ram_space; // TODO: remove this 9 | for (;;) 10 | { 11 | switch (state->pc) 12 | { 13 | case 0x1011ce: 14 | L_0x1011ceul: 15 | INSN(0x1011ceul, "0x1011ce: ENDBR64 ") 16 | case 0x1011d2: 17 | L_0x1011d2ul: 18 | INSN(0x1011d2ul, "0x1011d2: PUSH RBP") 19 | L_P_0x1: 20 | COPY(unique_space + 0xed00ul, 8ul, register_space + 0x28ul /* RBP */, 8ul); 21 | L_P_0x2: 22 | INT_SUB(register_space + 0x20ul /* RSP */, 8ul, register_space + 0x20ul /* RSP */, 8ul, (uint8_t *)&(long unsigned int){0x8ul}, 0x8ul); 23 | L_P_0x3: 24 | STORE((uint8_t *)&(long unsigned int){0x56490307c750ul}, 0x8ul, register_space + 0x20ul /* RSP */, 8ul, unique_space + 0xed00ul, 8ul); 25 | case 0x1011d3: 26 | L_0x1011d3ul: 27 | INSN(0x1011d3ul, "0x1011d3: MOV RBP,RSP") 28 | L_P_0x4: 29 | COPY(register_space + 0x28ul /* RBP */, 8ul, register_space + 0x20ul /* RSP */, 8ul); 30 | case 0x1011d6: 31 | L_0x1011d6ul: 32 | INSN(0x1011d6ul, "0x1011d6: MOV dword ptr [RBP + -0x4],EDI") 33 | L_P_0x5: 34 | INT_ADD(unique_space + 0x3100ul, 8ul, register_space + 0x28ul /* RBP */, 8ul, (uint8_t *)&(long unsigned int){0xfffffffffffffffcul}, 0x8ul); 35 | L_P_0x6: 36 | COPY(unique_space + 0xbf00ul, 4ul, register_space + 0x38ul /* EDI */, 4ul); 37 | L_P_0x7: 38 | STORE((uint8_t *)&(long unsigned int){0x56490307c750ul}, 0x8ul, unique_space + 0x3100ul, 8ul, unique_space + 0xbf00ul, 4ul); 39 | case 0x1011d9: 40 | L_0x1011d9ul: 41 | INSN(0x1011d9ul, "0x1011d9: MOV EAX,0x1") 42 | L_P_0x8: 43 | COPY(register_space + 0x0ul /* RAX */, 8ul, (uint8_t *)&(long unsigned int){0x1ul}, 0x8ul); 44 | case 0x1011de: 45 | L_0x1011deul: 46 | INSN(0x1011deul, "0x1011de: POP RBP") 47 | L_P_0x9: 48 | LOAD(register_space + 0x28ul /* RBP */, 8ul, (uint8_t *)&(long unsigned int){0x56490307c750ul}, 0x8ul, register_space + 0x20ul /* RSP */, 8ul); 49 | L_P_0xa: 50 | INT_ADD(register_space + 0x20ul /* RSP */, 8ul, register_space + 0x20ul /* RSP */, 8ul, (uint8_t *)&(long unsigned int){0x8ul}, 0x8ul); 51 | case 0x1011df: 52 | L_0x1011dful: 53 | INSN(0x1011dful, "0x1011df: RET ") 54 | L_P_0xb: 55 | LOAD(register_space + 0x288ul /* RIP */, 8ul, (uint8_t *)&(long unsigned int){0x56490307c750ul}, 0x8ul, register_space + 0x20ul /* RSP */, 8ul); 56 | L_P_0xc: 57 | INT_ADD(register_space + 0x20ul /* RSP */, 8ul, register_space + 0x20ul /* RSP */, 8ul, (uint8_t *)&(long unsigned int){0x8ul}, 0x8ul); 58 | L_P_0xd: 59 | RETURN(register_space + 0x288ul /* RIP */, 8ul); 60 | default: 61 | assert(state->pc != -1); // Unexpected PC value 62 | } 63 | } 64 | } -------------------------------------------------------------------------------- /examples/patch_challenge/test.sh: -------------------------------------------------------------------------------- 1 | gcc hello.c 2 | objdump -d a.out > a.out.s 3 | readelf -a a.out > a.out.elf 4 | python3 patch.py 5 | objdump -d a_patched.out > a_patched.out.s 6 | diff a.out.s a_patched.out.s -------------------------------------------------------------------------------- /examples/pile.c: -------------------------------------------------------------------------------- 1 | // https://arxiv.org/pdf/2302.02384.pdf 2 | int abs(int x) 3 | { 4 | int y = x; 5 | if (x < 0) 6 | { 7 | y = -x; 8 | } 9 | return y; 10 | } 11 | 12 | int binsearch(int x) 13 | { 14 | int a[16]; 15 | signed low = 0, high = 16; 16 | 17 | while (low < high) 18 | { 19 | signed middle = low + ((high - low) >> 1); 20 | if (a[middle] < x) 21 | high = middle; 22 | else if (a[middle] > x) 23 | low = middle + 1; 24 | else // a[middle]==x 25 | return middle; 26 | } 27 | 28 | return -1; 29 | } 30 | 31 | #include 32 | 33 | int main(int argc, char **argv) 34 | { 35 | char password[8] = {'s', 'e', 'c', 'r', 'e', 't', '!', '\0'}; 36 | char buffer[16] = { 37 | '\0', 38 | }; 39 | int tmp; 40 | int index = 0; 41 | printf("Enter your name: "); 42 | while ((tmp = getchar()) != '\n') 43 | { 44 | buffer[index] = tmp; 45 | ++index; 46 | } 47 | 48 | printf("%s\n", buffer); 49 | 50 | return 0; 51 | } -------------------------------------------------------------------------------- /examples/stress/add_up.c: -------------------------------------------------------------------------------- 1 | #include "../../templates/_pcode.h" 2 | // #include 3 | #include 4 | #include 5 | int main() 6 | { 7 | uint64_t x; 8 | x = x % 50; 9 | uint64_t y = 0; 10 | uint64_t one = 1; 11 | uint64_t old_x = x; 12 | while (x > 0) 13 | { 14 | INT_ADD(&y, 8, &y, 8, &one, 8); 15 | INT_SUB(&x, 8, &x, 8, &one, 8); 16 | } 17 | assert(y == old_x); 18 | assert(y == old_x + 1); 19 | printf("Values x %d y %d ", old_x, y); 20 | } -------------------------------------------------------------------------------- /examples/stress/graham.c: -------------------------------------------------------------------------------- 1 | /* AUTOGENERATED. DO NOT EDIT. */ 2 | #include "../../templates/_pcode.h" 3 | void pcode2c(CPUState *state, size_t breakpoint) 4 | { 5 | uint8_t *register_space = state->reg; 6 | uint8_t *unique_space = state->unique; 7 | uint8_t *ram_space = state->ram; 8 | uint8_t *ram = ram_space; // TODO: remove this 9 | for (;;) 10 | { 11 | switch (state->pc) 12 | { 13 | case 0x0: 14 | INSN(0x0, "0x0: CMP EDI,0x4") 15 | INT_LESS(register_space + 0x200ull /* CF */, 1ull, register_space + 0x38ull /* EDI */, 4ull, 0x4ull, 0x4ull); 16 | INT_SBORROW(register_space + 0x20bull /* OF */, 1ull, register_space + 0x38ull /* EDI */, 4ull, 0x4ull, 0x4ull); 17 | INT_SUB(unique_space + 0x29300ull, 4ull, register_space + 0x38ull /* EDI */, 4ull, 0x4ull, 0x4ull); 18 | INT_SLESS(register_space + 0x207ull /* SF */, 1ull, unique_space + 0x29300ull, 4ull, 0x0ull, 0x4ull); 19 | INT_EQUAL(register_space + 0x206ull /* ZF */, 1ull, unique_space + 0x29300ull, 4ull, 0x0ull, 0x4ull); 20 | INT_AND(unique_space + 0x13180ull, 4ull, unique_space + 0x29300ull, 4ull, 0xffull, 0x4ull); 21 | POPCOUNT(unique_space + 0x13200ull, 1ull, unique_space + 0x13180ull, 4ull); 22 | INT_AND(unique_space + 0x13280ull, 1ull, unique_space + 0x13200ull, 1ull, 0x1ull, 0x1ull); 23 | INT_EQUAL(register_space + 0x202ull /* PF */, 1ull, unique_space + 0x13280ull, 1ull, 0x0ull, 0x1ull); 24 | case 0x3: 25 | INSN(0x3, "0x3: JZ 0xb") 26 | int tmp0; 27 | if (tmp0 == 0) 28 | { 29 | state->pc = 0xb; 30 | break; 31 | goto L_b; 32 | } 33 | else 34 | { 35 | state->pc = 0x5; 36 | break; 37 | goto L_5; 38 | } 39 | goto L_b; 40 | CBRANCH(ram_space + 0xbull, 8ull, register_space + 0x206ull /* ZF */, 1ull); 41 | break; 42 | case 0x5: 43 | L_5: 44 | INSN(0x5, "0x5: MOV EAX,0x1") 45 | COPY(register_space + 0x0ull /* RAX */, 8ull, 0x1ull, 0x8ull); 46 | case 0xa: 47 | L_a: 48 | INSN(0xa, "0xa: RET ") 49 | LOAD(register_space + 0x288ull /* RIP */, 8ull, 0x74f310ull, 0x8ull, register_space + 0x20ull /* RSP */, 8ull); 50 | INT_ADD(register_space + 0x20ull /* RSP */, 8ull, register_space + 0x20ull /* RSP */, 8ull, 0x8ull, 0x8ull); 51 | RETURN(register_space + 0x288ull /* RIP */, 8ull); 52 | // break; 53 | // return; 54 | case 0xb: 55 | L_b: 56 | INSN(0xb, "0xb: PUSH R14") 57 | COPY(unique_space + 0xed00ull, 8ull, register_space + 0xb0ull /* R14 */, 8ull); 58 | INT_SUB(register_space + 0x20ull /* RSP */, 8ull, register_space + 0x20ull /* RSP */, 8ull, 0x8ull, 0x8ull); 59 | STORE(0x74f310ull, 0x8ull, register_space + 0x20ull /* RSP */, 8ull, unique_space + 0xed00ull, 8ull); 60 | case 0xd: 61 | INSN(0xd, "0xd: PUSH R13") 62 | COPY(unique_space + 0xed00ull, 8ull, register_space + 0xa8ull /* R13 */, 8ull); 63 | INT_SUB(register_space + 0x20ull /* RSP */, 8ull, register_space + 0x20ull /* RSP */, 8ull, 0x8ull, 0x8ull); 64 | STORE(0x74f310ull, 0x8ull, register_space + 0x20ull /* RSP */, 8ull, unique_space + 0xed00ull, 8ull); 65 | case 0xf: 66 | INSN(0xf, "0xf: PUSH R12") 67 | COPY(unique_space + 0xed00ull, 8ull, register_space + 0xa0ull /* R12 */, 8ull); 68 | INT_SUB(register_space + 0x20ull /* RSP */, 8ull, register_space + 0x20ull /* RSP */, 8ull, 0x8ull, 0x8ull); 69 | STORE(0x74f310ull, 0x8ull, register_space + 0x20ull /* RSP */, 8ull, unique_space + 0xed00ull, 8ull); 70 | 71 | default: 72 | assert(state->pc != -1); // Unexpected PC value 73 | } 74 | } 75 | } 76 | 77 | // If cprover sees stdlib, it gets radically slower. 78 | #ifndef __CPROVER__ 79 | #include 80 | #endif 81 | 82 | int main() 83 | { 84 | 85 | CPUState state; 86 | state.reg = malloc(0x2000); 87 | state.unique = malloc(0x200000); 88 | state.ram = malloc(0x200000); 89 | 90 | // Initialize state here 91 | // Uninitialized variables are nondeterministically chosen 92 | state.pc = 0; /* Initialize function address to */ 93 | ; 94 | 95 | // Run decompiled function 96 | pcode2c(&state, -1); 97 | 98 | // Check assertions here 99 | // assert(*(uint64_t*)(state.reg + RAX) == 0xdeadbeef); 100 | // assert(*(uint64_t*)(state.ram + FOO_ADDR) <= 0xBADDCAFE); 101 | return 0; 102 | } 103 | -------------------------------------------------------------------------------- /examples/stress/memory.c: -------------------------------------------------------------------------------- 1 | #include "../../templates/_pcode.h" 2 | // #include 3 | #include 4 | #include 5 | int main() 6 | { 7 | uint8_t *mem = malloc(0x100000); 8 | uint8_t temp; 9 | temp = temp + 42; 10 | mem[0] = temp; 11 | for (int i = 0; i < 99; i++) 12 | { 13 | COPY(mem + i + 1, 1, mem + i, 1); 14 | COPY(mem + i, 1, mem + i + 1, 1); 15 | } 16 | assert(mem[99] == temp); 17 | printf("Values mem[0] %d mem[99] %d ", mem[0], mem[99]); 18 | free(mem); 19 | } -------------------------------------------------------------------------------- /examples/stress/notes: -------------------------------------------------------------------------------- 1 | gcc -Wall -Wextra -Wpedantic -fsanitize=undefined,address memory.c 2 | 3 | 0x10000 malloc size 2s 4 | 0x100000 32s 5 | 6 | -------------------------------------------------------------------------------- /examples/stress/store.c: -------------------------------------------------------------------------------- 1 | #include "../../templates/_pcode.h" 2 | // #include 3 | #include 4 | #include 5 | 6 | int main() 7 | { 8 | 9 | uint8_t ram[100]; 10 | uint64_t one = 1; 11 | uint64_t two = 2; 12 | ram[0] = 42; 13 | for (uint64_t i = 1; i < 100; i++) 14 | { 15 | STORE(0, 0, &i, 8, ram + i - 1, 1); 16 | } 17 | // STORE(0, 0, &two, 8, ram + 1, 1); 18 | assert(ram[99] == 42); 19 | } -------------------------------------------------------------------------------- /examples/stress/switch.c: -------------------------------------------------------------------------------- 1 | #include "../../templates/_pcode.h" 2 | // #include 3 | #include 4 | #include 5 | 6 | int main() 7 | { 8 | uint64_t x; 9 | x = x % 26; 10 | while (1) 11 | { 12 | switch (x) 13 | { 14 | case 42: 15 | assert(x != 42); 16 | case 13: 17 | x += 6; 18 | case 16: 19 | x += 3; 20 | break; 21 | case 2: 22 | x += 1; 23 | break; 24 | case 2: 25 | x += 1; 26 | break; 27 | case 2: 28 | x += 1; 29 | break; 30 | case 2: 31 | x += 1; 32 | break; 33 | case 2: 34 | x += 1; 35 | break; 36 | case 2: 37 | x += 1; 38 | break; 39 | case 2: 40 | x += 1; 41 | break; 42 | case 2: 43 | x += 1; 44 | break; 45 | case 2: 46 | x += 1; 47 | break; 48 | case 2: 49 | x += 1; 50 | break; 51 | case 26: 52 | return 3; 53 | default: 54 | x += 1; 55 | break; 56 | } 57 | } 58 | } -------------------------------------------------------------------------------- /examples/tiny_float/tfloat.c: -------------------------------------------------------------------------------- 1 | int tfloat(float x, float y) 2 | { 3 | return 2 * y + x / 7; 4 | } -------------------------------------------------------------------------------- /examples/tv/factorial.c: -------------------------------------------------------------------------------- 1 | 2 | int test_fun(int n) 3 | { 4 | if (n == 0) 5 | return 1; 6 | else 7 | return (n * test_fun(n - 1)); 8 | } 9 | -------------------------------------------------------------------------------- /examples/tv/harness.c: -------------------------------------------------------------------------------- 1 | // #include "_pcode.h" 2 | #include "decomp.c" 3 | #include 4 | #include // TODO 5 | int test_fun(int n); 6 | int main() 7 | { 8 | int examples[] = {0}; //, -1, -2, 1, 2, 3, 4, 5, 64, 128, MININT, MININT + 1, MAXINT, MAXINT - 1}; 9 | for (int i = 0; i < sizeof(examples) / sizeof(examples[0]); i++) 10 | { 11 | int x = examples[i]; 12 | CPUState state; 13 | state.reg = malloc(0x2000); 14 | state.unique = malloc(0xfffful); 15 | memcpy(&(state.reg[RDI]), &x, sizeof(x)); 16 | pcode2c(&state, -1); 17 | int res; 18 | memcpy(&res, &(state.reg[RAX]), sizeof(res)); 19 | assert(res == test_fun(x)); 20 | } 21 | } -------------------------------------------------------------------------------- /examples/tv/prime.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int test_fun(int n) 4 | { 5 | if (n <= 1) 6 | return 0; // 0 and 1 are not prime numbers 7 | for (int i = 2; i < n; i++) 8 | if (n % i == 0) 9 | return 0; 10 | return 1; 11 | } 12 | 13 | int main() 14 | { 15 | int number; 16 | printf("Enter a number: "); 17 | scanf("%d", &number); 18 | if (isPrime(number)) 19 | printf("%d is a prime number.\n", number); 20 | else 21 | printf("%d is not a prime number.\n", number); 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /examples/tv/ret3.c: -------------------------------------------------------------------------------- 1 | int test_fun(int n) 2 | { 3 | return 3; 4 | } -------------------------------------------------------------------------------- /examples/tv/ret3n.c: -------------------------------------------------------------------------------- 1 | int test_fun(int n) 2 | { 3 | return 3 * n + 5; 4 | } -------------------------------------------------------------------------------- /examples/tv/sum.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int test_fun(int n) 4 | { 5 | int n, sum = 0; 6 | for (int i = 1; i <= n; ++i) 7 | { 8 | sum += i; 9 | } 10 | return sum; 11 | } 12 | -------------------------------------------------------------------------------- /examples/tv/test.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | #1. just run 5 | #2 cbmc / fuzz 6 | 7 | # loop over all C files in directory except harness.c 8 | 9 | #for x in *.c; do 10 | # if [ $x == "harness.c" ]; then 11 | # continue 12 | # fi 13 | x=$1 14 | printf "Running $x\n" 15 | gcc $x -c -o /tmp/a.o 16 | python3 -m pcode2c --headers /tmp/a.o > decomp.c 17 | gcc -Wpedantic -Wall -Wextra -Wno-format -I ../../templates $x harness.c 18 | ./a.out 19 | #rm decomp.c 20 | #done 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /ghidra_scripts/.gitignore: -------------------------------------------------------------------------------- 1 | *.class -------------------------------------------------------------------------------- /ghidra_scripts/C_Logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philzook58/pcode2c/a88f732f2522ea198a74ff3287fa61c7380ed9e4/ghidra_scripts/C_Logo.png -------------------------------------------------------------------------------- /ghidra_scripts/Ghidra_Logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philzook58/pcode2c/a88f732f2522ea198a74ff3287fa61c7380ed9e4/ghidra_scripts/Ghidra_Logo.png -------------------------------------------------------------------------------- /ghidra_scripts/README.md: -------------------------------------------------------------------------------- 1 | # PCode2C 2 | 3 | ![](Ghidra_Logo.png) 4 | 5 | The Pcode2C plugin takes a piece of the binary, disassembles it . 6 | The plugin builds a command line invocation of the `python3 -m pcode2c` command line tool. 7 | In order for this plugin to work you must have the `pcode2c` tool installed by running `python3 -m pip install -e .` from the root directory of this project. 8 | 9 | # CBMC 10 | 11 | ![cbmc logo](cbmc.png) will bring up a menu. 12 | 13 | CBMC is the C bounded model checker. You can use it to check for certain errors in C code lik buffer overflows. It will also attempt to check if it is possible to trigger assertions. 14 | 15 | ## Installation 16 | 17 | To install, get a copy of [CBMC](https://github.com/diffblue/cbmc/releases/) 18 | 19 | ```bash 20 | wget https://github.com/diffblue/cbmc/releases/download/cbmc-5.95.1/ubuntu-22.04-cbmc-5.95.1-Linux.deb 21 | dpkg -i ubuntu-20.04-cbmc-5.95.1-Linux.deb 22 | ``` 23 | 24 | ## Usage 25 | 26 | The default option for the `C File` field will the decompiler output. This is unlikely to even parse. 27 | 28 | To get it to work, click the button above the decompiler view that says "Export the Current Function to C". This will allow you to save a file. You may edit this file to convert it into compilable C. This fixing will require editting the function to remove any non C ghidraisms and also adding declarations and signatures to the top of the file. You do not necessarily need to include the bodies of the called functions if their behavior is not relevant for the property of interest. For example: 29 | 30 | ```C 31 | 32 | // Extra includes 33 | #include 34 | #include 35 | 36 | // Extra Declarations 37 | extern int my_global; 38 | int my_external_function(int foo); 39 | 40 | // Ghidra exported function 41 | int my_ghidra_exported_functin(int param_1) 42 | { 43 | return my_external_function(param_1) + 1 + my_global; 44 | } 45 | ``` 46 | 47 | The plugin combines these options into a command line invocation which can be seen in the Ghidra console. 48 | 49 | # Compile 50 | 51 | ![](C_Logo.png) 52 | 53 | Compile shows a template that can help you get access to registers. 54 | 55 | C does not support program fragments not in a function body. It will also erase any computations in the assembly that do not have externally observable effects. 56 | 57 | For this reason, any computation you want to really happen must be written somewhere. 58 | 59 | One useful trick is ABI abuse. In most calling conventions, the first N arguments to a C function are carried in predefined registers. By writing a function that takes N arguments, you can get access to these registers. Any edits to these registers are preserved by putting a tail call CALLBACK function in the final return. 60 | 61 | For other registers, inline assembly must be used. 62 | This declaration insists that the C variable `rax` can be read from the register `rax`. 63 | 64 | ```C 65 | register uint64_t rax asm ("rax"); 66 | ``` 67 | 68 | ```C 69 | WRITEREG(rax) 70 | ``` 71 | 72 | These methods are not entirely reliable, so manual inspection of the resulting assembly is still required. 73 | 74 | This prints assembly which can be put into the binary by using the built in Ghidra assmbler. Right click on an instruction in the Ghidra listing window and select "Patch Instruction Bytes". 75 | 76 | # Patcherex2 77 | 78 | ![](patcherex2.jpeg) 79 | The Patcherex2 plugin exposes a simplified convenience interface to the [Patcherex2](https://github.com/purseclab/Patcherex2) tool. It needs it be installed like so 80 | 81 | ```bash 82 | python3 -m pip install patcherex2 83 | ``` 84 | 85 | This is useful for patches that would not fit in place. For patches that merely replace functionality, using the in built Ghidra assembler is preferred. 86 | 87 | For multiple patches on the same binary, you will want to build Patcherex2 script to do them in bulk. 88 | 89 | # Elf Editing 90 | 91 | Manually adding a segment to the binary using Ghidra is not quite trivial but also not hard. 92 | 93 | You can find the elf program segment header. 94 | 95 | There are a couple techniques: 96 | 97 | Because of alignment constraints, the .text section often has unused padding at the end. This will not be loaded because the segment header has a fild 98 | 99 | [elf.h definition of struct elf64_phdr](https://github.com/torvalds/linux/blob/5847c9777c303a792202c609bd761dceb60f4eed/include/uapi/linux/elf.h#L260) 100 | 101 | ```C 102 | typedef struct elf64_phdr { 103 | Elf64_Word p_type; 104 | Elf64_Word p_flags; 105 | Elf64_Off p_offset; /* Segment file offset */ 106 | Elf64_Addr p_vaddr; /* Segment virtual address */ 107 | Elf64_Addr p_paddr; /* Segment physical address */ 108 | Elf64_Xword p_filesz; /* Segment size in file */ 109 | Elf64_Xword p_memsz; /* Segment size in memory */ 110 | Elf64_Xword p_align; /* Segment alignment, file & memory */ 111 | } Elf64_Phdr; 112 | ``` 113 | 114 | When you run `readelf --segments /bin/true` or `objdump -x /bin/true` you can also see this information 115 | 116 | ``` 117 | Program Header: 118 | PHDR off 0x0000000000000040 vaddr 0x0000000000000040 paddr 0x0000000000000040 align 2**3 119 | filesz 0x00000000000002d8 memsz 0x00000000000002d8 flags r-- 120 | INTERP off 0x0000000000000318 vaddr 0x0000000000000318 paddr 0x0000000000000318 align 2**0 121 | filesz 0x000000000000001c memsz 0x000000000000001c flags r-- 122 | LOAD off 0x0000000000000000 vaddr 0x0000000000000000 paddr 0x0000000000000000 align 2**12 123 | filesz 0x0000000000000fc8 memsz 0x0000000000000fc8 flags r-- 124 | LOAD off 0x0000000000001000 vaddr 0x0000000000001000 paddr 0x0000000000001000 align 2**12 125 | filesz 0x0000000000002991 memsz 0x0000000000002991 flags r-x 126 | LOAD off 0x0000000000004000 vaddr 0x0000000000004000 paddr 0x0000000000004000 align 2**12 127 | filesz 0x0000000000000d58 memsz 0x0000000000000d58 flags r-- 128 | LOAD off 0x0000000000005c88 vaddr 0x0000000000006c88 paddr 0x0000000000006c88 align 2**12 129 | filesz 0x000000000000038c memsz 0x00000000000003a0 flags rw- 130 | DYNAMIC off 0x0000000000005c98 vaddr 0x0000000000006c98 paddr 0x0000000000006c98 align 2**3 131 | filesz 0x00000000000001f0 memsz 0x00000000000001f0 flags rw- 132 | NOTE off 0x0000000000000338 vaddr 0x0000000000000338 paddr 0x0000000000000338 align 2**3 133 | filesz 0x0000000000000030 memsz 0x0000000000000030 flags r-- 134 | NOTE off 0x0000000000000368 vaddr 0x0000000000000368 paddr 0x0000000000000368 align 2**2 135 | filesz 0x0000000000000044 memsz 0x0000000000000044 flags r-- 136 | 0x6474e553 off 0x0000000000000338 vaddr 0x0000000000000338 paddr 0x0000000000000338 align 2**3 137 | filesz 0x0000000000000030 memsz 0x0000000000000030 flags r-- 138 | EH_FRAME off 0x00000000000049c4 vaddr 0x00000000000049c4 paddr 0x00000000000049c4 align 2**2 139 | filesz 0x0000000000000084 memsz 0x0000000000000084 flags r-- 140 | STACK off 0x0000000000000000 vaddr 0x0000000000000000 paddr 0x0000000000000000 align 2**4 141 | filesz 0x0000000000000000 memsz 0x0000000000000000 flags rw- 142 | RELRO off 0x0000000000005c88 vaddr 0x0000000000006c88 paddr 0x0000000000006c88 align 2**0 143 | filesz 0x0000000000000378 memsz 0x0000000000000378 flags r-- 144 | 145 | ``` 146 | 147 | You can see this table in Ghidra at the top of the binary. 148 | 149 | In a simple binary only one segment will be marked executable `flags r-x` 150 | 151 | ``` 152 | LOAD off 0x0000000000001000 vaddr 0x0000000000001000 paddr 0x0000000000001000 align 2**12 153 | filesz 0x0000000000002991 memsz 0x0000000000002991 flags r-x 154 | ``` 155 | 156 | If you click on 157 | -------------------------------------------------------------------------------- /ghidra_scripts/cbmc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philzook58/pcode2c/a88f732f2522ea198a74ff3287fa61c7380ed9e4/ghidra_scripts/cbmc.png -------------------------------------------------------------------------------- /ghidra_scripts/cbmc.py: -------------------------------------------------------------------------------- 1 | # @category: PCode2C 2 | # @toolbar cbmc.png 3 | # @menupath Pcode2C.cbmc 4 | 5 | import subprocess 6 | 7 | from ghidra.app.decompiler import DecompInterface 8 | 9 | 10 | def getCCode(): 11 | decomp = DecompInterface() 12 | decomp.openProgram(currentProgram) 13 | func = getFunctionContaining(currentAddress) 14 | res = decomp.decompileFunction(func, 0, None) 15 | ccode = res.getDecompiledFunction().getC() 16 | return ccode 17 | 18 | 19 | def run_cbmc(): 20 | values = ghidra.features.base.values.GhidraValuesMap() 21 | 22 | values.defineFile("C File (default is decompiler)", java.io.File("")) 23 | values.defineInt("Unwind", 1) 24 | values.defineInt("Object Bits", 8) 25 | values.defineChoice("Solver", "default", "default", "smt2", "boolector") 26 | values.defineString( 27 | "Options", 28 | "--trace --bounds-check --conversion-check --div-by-zero-check --float-overflow-check --malloc-fail-null \ 29 | --malloc-may-fail --nan-check --pointer-check --pointer-overflow-check --pointer-primitive-check \ 30 | --signed-overflow-check --undefined-shift-check --unsigned-overflow-check --memory-leak-check", 31 | ) 32 | values.defineString("Function", getFunctionContaining(currentAddress).getName()) 33 | 34 | values = askValues("Model Check C File with CBMC", None, values) 35 | 36 | cfile = values.getFile("C File (default is decompiler)") 37 | if cfile == None: 38 | ccode = getCCode() 39 | print("Writing decompiled code to /tmp/decomp.c:") 40 | print(ccode) 41 | filename = "/tmp/decomp.c" 42 | with open(filename, "w") as f: 43 | f.write(ccode) 44 | cfile = filename 45 | else: 46 | cfile = str(cfile) 47 | 48 | # TODO get arch, get bits, get endianess 49 | command = [ 50 | "cbmc", 51 | cfile, 52 | "--unwind", 53 | str(values.getInt("Unwind")), 54 | "--object-bits", 55 | str(values.getInt("Object Bits")), 56 | "--function", 57 | values.getString("Function"), 58 | "--unwinding-assertions", 59 | ] + values.getString("Options").split() 60 | 61 | solver = values.getChoice("Solver") 62 | if solver != "default": 63 | command.append("--" + solver) 64 | 65 | print("Running command: `" + " ".join(command) + "`") 66 | 67 | import subprocess 68 | import sys 69 | 70 | # Start the subprocess with stdout and stderr piped 71 | proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 72 | 73 | # Poll process for new output until finished 74 | while True: 75 | # Try to read from stdout 76 | output = proc.stdout.readline() 77 | if output: 78 | print(output.rstrip()) 79 | 80 | # Try to read from stderr 81 | err = proc.stderr.readline() 82 | if err: 83 | print(err.rstrip()) 84 | 85 | # Check if process has terminated 86 | if proc.poll() is not None: 87 | break 88 | 89 | # Make sure we read any remaining output after the process has finished 90 | for output in proc.stdout.readlines(): 91 | print(output.rstrip()) 92 | for err in proc.stderr.readlines(): 93 | print(err.rstrip()) 94 | 95 | # Close subprocess' file descriptors 96 | proc.stdout.close() 97 | proc.stderr.close() 98 | 99 | 100 | if __name__ == "__main__": 101 | run_cbmc() 102 | -------------------------------------------------------------------------------- /ghidra_scripts/compile.py: -------------------------------------------------------------------------------- 1 | # @category: PCode2C 2 | # @toolbar C_Logo.png 3 | # @menupath Pcode2C.compile 4 | 5 | from javax.swing import ( 6 | JButton, 7 | JPanel, 8 | JScrollPane, 9 | JFrame, 10 | JTextPane, 11 | ) 12 | from java.awt import BorderLayout, Dimension 13 | import subprocess 14 | import pcode2c_util 15 | 16 | old_template = """\ 17 | #include 18 | #define WRITEREG(name) asm( "" ::"r"(name)); 19 | uint64_t CALLBACK({args}); 20 | uint64_t __attribute__((naked)) PATCHCODE({args}){{ 21 | // **** READ REGISTERS **** 22 | // Calling convention registers already available. 23 | // ex: register uint64_t rax asm ("rax"); 24 | // **** END READ REGISTERS **** 25 | 26 | // **** PATCH CODE HERE **** 27 | 28 | // **** END PATCH CODE **** 29 | 30 | // **** WRITE REGISTERS **** 31 | // ex: WRITEREG(rax); 32 | // By default only calling convention registers are preserved. 33 | return CALLBACK({params}); 34 | // **** END WRITE REGISTERS **** 35 | }} 36 | 37 | // Put Help here. 38 | """ 39 | 40 | template = """\ 41 | #include 42 | uint64_t __attribute__((preserve_none)) CALLBACK({args}); 43 | uint64_t __attribute__((preserve_none)) PATCHCODE({args}){{ 44 | 45 | // **** PATCH CODE HERE **** 46 | 47 | // replace clobberable registers with `junk` 48 | uint64_t junk; 49 | return CALLBACK({params}); 50 | }} 51 | """ 52 | 53 | # could generate this from ghidra? 54 | # can I use GHC abi? 55 | # should I unmacroize and just give comment examples? 56 | 57 | # lookup patcherex compiler data. Or maybe just reuse it. 58 | compiler_data = { 59 | "x86/little/64/default": { 60 | "cc": "clang-19", # x86_64-linux-gnu-gcc", 61 | "args": "uint64_t rdi, uint64_t rsi, uint64_t rdx, uint64_t rcx, uint64_t r8, uint64_t r9, uint64_t r11, uint64_t r12, uint64_t r13, uint64_t r14, uint64_t r15, uint64_t rax", 62 | "options": "-Os -masm=intel ", # -fverbose-asm 63 | "params": "rdi, rsi, rdx, rcx, r8, r9, r11, r12, r13, r14, r15, rax", 64 | }, 65 | } 66 | """ 67 | "ARM/little/32/v8": { 68 | "cc": "arm-linux-gnueabi-gcc", 69 | "args": "uint32_t r0, uint32_t r1, uint32_t r2, uint32_t r3", 70 | "options": "-Os -fverbose-asm -mlittle-endian -march=armv8-a", # is this even right? 71 | "params": "r0, r1, r2, r3", 72 | }, 73 | "powerpc": { 74 | "cc": "powerpc-linux-gnu-gcc", 75 | "args": "uint64_t r3, uint64_t r4, uint64_t r5, uint64_t r6, uint64_t r7, uint64_t r8", 76 | "params": "r3, r4, r5, r6, r7, r8", 77 | }, 78 | "arm64": { 79 | "cc": "aarch64-linux-gnu-gcc", 80 | "args": "uint64_t r0, uint64_t r1, uint64_t r2, uint64_t r3", 81 | }, 82 | """ 83 | 84 | 85 | def run_compiler(code): 86 | cdata = compiler_data[currentProgram.getLanguage().toString()] 87 | values = ghidra.features.base.values.GhidraValuesMap() 88 | values.defineString("Compiler", cdata["cc"]) 89 | values.defineString("Options", cdata["options"]) 90 | values = askValues("Patch", None, values) 91 | 92 | with open("/tmp/patch_code.c", "w") as f: 93 | f.write(code) 94 | f.flush() 95 | try: 96 | subprocess.check_output( 97 | [ 98 | "{compiler} -S {options} -c /tmp/patch_code.c -o /tmp/patch_code.s".format( 99 | compiler=values.getString("Compiler"), 100 | options=values.getString("Options"), 101 | ) 102 | ], 103 | stderr=subprocess.STDOUT, 104 | shell=True, 105 | ) 106 | except subprocess.CalledProcessError as e: 107 | print(e.output) 108 | raise e 109 | 110 | with open("/tmp/patch_code.s", "r") as f: 111 | asm = f.read() 112 | 113 | asm = asm.split(".cfi_endproc")[ 114 | 0 115 | ] # Is this a good idea? There could be important stuff here. 116 | asm = asm.split(".cfi_startproc")[-1] 117 | # Could compile and dump objdump 118 | print("## PATCH CODE START ##") 119 | print(asm) 120 | print("## PATCH CODE END ##") 121 | 122 | 123 | def main(): 124 | cdata = compiler_data[currentProgram.getLanguage().toString()] 125 | 126 | frame = JFrame("Compile C Code") 127 | frame.setSize(700, 400) 128 | frame.setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE) 129 | 130 | code_panel = JPanel() 131 | 132 | patches_textarea = JTextPane() # JTextArea(20, 80) 133 | patches_textarea.setPreferredSize(Dimension(700, 400)) 134 | patches_textarea.setText( 135 | template.format(args=cdata["args"], params=cdata["params"]) 136 | ) 137 | patches_textarea.setEditable(True) 138 | scrollPane = JScrollPane(patches_textarea) 139 | code_panel.add(scrollPane, BorderLayout.CENTER) 140 | doc = patches_textarea.getStyledDocument() 141 | pcode2c_util.C_syntax_highlight(doc) 142 | 143 | frame.add(code_panel) 144 | 145 | button = JButton( 146 | "Compile", 147 | actionPerformed=lambda event: run_compiler(patches_textarea.getText()), 148 | ) 149 | frame.add(button, BorderLayout.SOUTH) 150 | 151 | frame.pack() 152 | frame.setVisible(True) 153 | 154 | 155 | main() 156 | -------------------------------------------------------------------------------- /ghidra_scripts/patcherex.py: -------------------------------------------------------------------------------- 1 | # @category: PCode2C 2 | # @toolbar patcherex2.jpeg 3 | # @menupath Pcode2C.patcherex_insert 4 | 5 | import subprocess 6 | 7 | values = ghidra.features.base.values.GhidraValuesMap() 8 | 9 | values.defineAddress("Address", currentAddress, currentProgram) 10 | values.defineString("Assembly", "") 11 | values.defineFile( 12 | "Outfile", java.io.File(currentProgram.getExecutablePath() + ".patched") 13 | ) 14 | # values.defineChoice("Patch Type", "Insert", "Insert", "Modify", "Remove") 15 | 16 | values = askValues("Insert Assembly with Patcherex2", None, values) 17 | 18 | # Obviously this is unsafe to untrusted users. Escaping attacks are very possible. 19 | prog = """\ 20 | import patcherex2 21 | proj = patcherex2.Patcherex("{binfile}") 22 | print("Patching binary at address {addr} with assembly `{assembly}`") 23 | proj.patches.append(patcherex2.InsertInstructionPatch(0x{addr}, "{assembly}", force_insert=True)) 24 | proj.apply_patches() 25 | proj.binfmt_tool.save_binary("{outfile}") 26 | print("Patched binary saved to {outfile}") 27 | """.format( 28 | assembly=values.getString("Assembly"), 29 | addr=values.getAddress("Address"), 30 | binfile=currentProgram.getExecutablePath(), 31 | outfile=values.getFile("Outfile"), 32 | ) 33 | 34 | print("Running Script:") 35 | print(prog) 36 | command = ["python3", "-c", prog] 37 | 38 | 39 | proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 40 | 41 | # Poll process for new output until finished 42 | while True: 43 | # Try to read from stdout 44 | output = proc.stdout.readline() 45 | if output: 46 | print(output.rstrip()) 47 | 48 | # Try to read from stderr 49 | err = proc.stderr.readline() 50 | if err: 51 | print(err.rstrip()) 52 | 53 | # Check if process has terminated 54 | if proc.poll() is not None: 55 | break 56 | 57 | # Make sure we read any remaining output after the process has finished 58 | for output in proc.stdout.readlines(): 59 | print(output.rstrip()) 60 | for err in proc.stderr.readlines(): 61 | print(err.rstrip()) 62 | 63 | # Close subprocess' file descriptors 64 | proc.stdout.close() 65 | proc.stderr.close() 66 | -------------------------------------------------------------------------------- /ghidra_scripts/patcherex2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philzook58/pcode2c/a88f732f2522ea198a74ff3287fa61c7380ed9e4/ghidra_scripts/patcherex2.jpeg -------------------------------------------------------------------------------- /ghidra_scripts/pcode2c.py: -------------------------------------------------------------------------------- 1 | # @category: PCode2C 2 | # @toolbar Ghidra_Logo.png 3 | # @menupath Pcode2C.pcode2c 4 | 5 | import subprocess 6 | from ghidra.features.base.values import GhidraValuesMap 7 | 8 | 9 | def run_pcode2c(): 10 | func = getFunctionContaining(currentAddress) 11 | 12 | startAddress = func.getEntryPoint() 13 | 14 | endAddress = func.getBody().getMaxAddress() 15 | listing = currentProgram.getListing() 16 | insn = listing.getInstructionContaining(endAddress) 17 | endAddress = endAddress.add(insn.getLength()) 18 | size = endAddress.subtract(startAddress) 19 | 20 | memory = currentProgram.getMemory() 21 | block = memory.getBlock(startAddress) 22 | mb_source_info = block.getSourceInfos()[0] 23 | if block and block.getStart().getOffset() != 0: 24 | file_offset = ( 25 | startAddress.getOffset() 26 | - block.getStart().getOffset() 27 | + mb_source_info.getFileBytesOffset() 28 | ) 29 | else: 30 | file_offset = None 31 | 32 | language = currentProgram.getLanguageCompilerSpecPair() 33 | filename = currentProgram.getExecutablePath() 34 | print(hex(file_offset)) 35 | values = ghidra.features.base.values.GhidraValuesMap() 36 | values.defineFile("Bin File", java.io.File(filename)) # get current file 37 | values.defineAddress("Start Address", startAddress, currentProgram) 38 | values.defineInt("File Offset", file_offset) 39 | # values.defineAddress("End Address", endAddress, currentProgram) 40 | values.defineInt("Size", size) 41 | # values.defineFile( 42 | # "Out File", java.io.File(filename + ".pcode2c." + funcname + ".c") 43 | # ) # get current file 44 | values.defineLanguage("Language", language) 45 | import random 46 | 47 | # Super hacky way to get it to clear itself 9 times uot of 10 48 | # Ghidra is caching the askvules based on the title 49 | _ = askValues("Pcode2C" + " " * random.randint(100, 200), None, values) 50 | command = [ 51 | "python3", 52 | "-m", 53 | "pcode2c", 54 | str(values.getFile("Bin File")), 55 | "-b", 56 | str(values.getLanguage("Language").getLanguageID()), 57 | "--start-address", 58 | "0x" + str(values.getAddress("Start Address")), 59 | "--file-offset", 60 | str(hex(values.getInt("File Offset"))), 61 | "--size", 62 | str(hex(values.getInt("Size"))), 63 | # "--end-address", 64 | # str(values.getAddress("End Address")), 65 | ] 66 | del values 67 | print(command) 68 | 69 | proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 70 | 71 | # Poll process for new output until finished 72 | code = [] 73 | while True: 74 | # Try to read from stdout 75 | output = proc.stdout.readline() 76 | if output: 77 | print(output.rstrip()) 78 | code.append(output) 79 | 80 | # Try to read from stderr 81 | err = proc.stderr.readline() 82 | if err: 83 | print(err.rstrip()) 84 | 85 | # Check if process has terminated 86 | if proc.poll() is not None: 87 | break 88 | 89 | # Make sure we read any remaining output after the process has finished 90 | for output in proc.stdout.readlines(): 91 | print(output.rstrip()) 92 | code.append(output) 93 | for err in proc.stderr.readlines(): 94 | print(err.rstrip()) 95 | 96 | # Close subprocess' file descriptors 97 | proc.stdout.close() 98 | proc.stderr.close() 99 | return code 100 | 101 | 102 | if __name__ == "__main__": 103 | run_pcode2c() 104 | -------------------------------------------------------------------------------- /ghidra_scripts/pcode2c_util.py: -------------------------------------------------------------------------------- 1 | from javax.swing.text import StyleConstants, SimpleAttributeSet 2 | import java.awt 3 | 4 | import re 5 | 6 | types = r"u?int\d+_t|char|int|double|float|void|size_t|ssize_t|off_t|pid_t|uid_t|gid_t|boolean" 7 | keywords = r"if|else|switch|case|default|while|do|for|break|continue|return|goto|typedef|struct|union|enum|register|static|auto|extern|const|volatile|restrict" 8 | 9 | 10 | def C_syntax_highlight(doc): 11 | comment = SimpleAttributeSet() 12 | StyleConstants.setForeground(comment, java.awt.Color(0, 128, 0)) 13 | keyword = SimpleAttributeSet() 14 | StyleConstants.setForeground(keyword, java.awt.Color(128, 0, 0)) 15 | type_ = SimpleAttributeSet() 16 | StyleConstants.setForeground(type_, java.awt.Color(0, 0, 255)) 17 | l = doc.getText(0, doc.getLength()) 18 | for m in re.finditer(types, l): 19 | doc.setCharacterAttributes(m.start(), m.end() - m.start(), type_, False) 20 | for m in re.finditer(keywords, l): 21 | doc.setCharacterAttributes(m.start(), m.end() - m.start(), keyword, False) 22 | startIndex = 0 23 | for line in l.split("\n"): 24 | line_index = line.find("//") 25 | if line_index != -1: 26 | doc.setCharacterAttributes( 27 | startIndex + line_index, len(line) - line_index, comment, False 28 | ) 29 | line_index = line.find("#include") 30 | if line_index != -1: 31 | doc.setCharacterAttributes( 32 | startIndex + line_index, len(line) - line_index, keyword, False 33 | ) 34 | startIndex += len(line) + 1 35 | -------------------------------------------------------------------------------- /ghidra_scripts/pcode2cmega.py: -------------------------------------------------------------------------------- 1 | # @category: PCode2C 2 | # @toolbar boom.png 3 | # @menupath Pcode2C.mega 4 | 5 | 6 | from javax.swing import ( 7 | JButton, 8 | JPanel, 9 | JScrollPane, 10 | JFrame, 11 | JTextPane, 12 | ) 13 | from javax.swing.text import StyleConstants, SimpleAttributeSet 14 | from java.awt import BorderLayout, Dimension 15 | import subprocess 16 | from java.io import File 17 | from ghidra.features.base.values import GhidraValuesMap 18 | import random 19 | 20 | # from pcode2c import PCode2C 21 | # from cbmc import CBMC 22 | 23 | import os 24 | import pcode2c_util 25 | 26 | script_path = __file__ # str(getScriptSourceFile().getAbsolutePath()) 27 | script_dir = os.path.dirname(script_path) # File(script_path).get_Parent() 28 | harness_template = open(script_dir + "/../templates/harness.c", "r").read() 29 | pcode_header = open(script_dir + "/../templates/_pcode.h", "r").read() 30 | 31 | 32 | def run_verify(harness_code): 33 | code = run_pcode2c() 34 | cbmc_file = "/tmp/cbmc_test.c" 35 | with open(cbmc_file, "w") as f: 36 | 37 | def write(s): 38 | print(s) 39 | f.write(s) 40 | 41 | # f.write("//****** HEADER *********\n") 42 | # f.write(pcode_header) 43 | write("//****** Interpreter *********\n") 44 | write("\n".join(code)) 45 | write("//****** HARNESS *********\n") 46 | write(harness_code) 47 | f.flush() 48 | run_cbmc(cbmc_file) 49 | 50 | 51 | # Todo. Refactor calling subprocess code to be shared. 52 | # Put all buttons in main panel. 53 | # Save harness code between invocations? 54 | def main(): 55 | frame = JFrame("Harness Code") 56 | frame.setSize(700, 400) 57 | frame.setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE) 58 | 59 | code_panel = JPanel() 60 | 61 | patches_textarea = JTextPane() # JTextArea(20, 80) 62 | patches_textarea.setPreferredSize(Dimension(700, 400)) 63 | patches_textarea.setText(harness_template) 64 | patches_textarea.setEditable(True) 65 | scrollPane = JScrollPane(patches_textarea) 66 | code_panel.add(scrollPane, BorderLayout.CENTER) 67 | 68 | frame.add(code_panel) 69 | 70 | button = JButton( 71 | "Verify", 72 | actionPerformed=lambda event: run_verify(patches_textarea.getText()), 73 | ) 74 | doc = patches_textarea.getStyledDocument() 75 | 76 | def callback(event): 77 | comment = SimpleAttributeSet() 78 | StyleConstants.setForeground(comment, java.awt.Color(0, 128, 0)) 79 | l = patches_textarea.getText() 80 | startIndex = 0 81 | for line in l.split("\n"): 82 | line_index = line.find("//") 83 | if line_index != -1: 84 | doc.setCharacterAttributes( 85 | startIndex + line_index, len(line) - line_index, comment, False 86 | ) 87 | startIndex += len(line) + 1 88 | 89 | # callback("whatev") 90 | pcode2c_util.C_syntax_highlight(doc) 91 | # doc.addDocumentListener(callback) 92 | frame.add(button, BorderLayout.SOUTH) 93 | 94 | frame.pack() 95 | frame.setVisible(True) 96 | 97 | 98 | # This is not really acceptable. 99 | # bg copy and paste 100 | 101 | 102 | def run_pcode2c(): 103 | func = getFunctionContaining(currentAddress) 104 | funcname = func.getName() 105 | 106 | startAddress = func.getEntryPoint() 107 | 108 | endAddress = func.getBody().getMaxAddress() 109 | listing = currentProgram.getListing() 110 | insn = listing.getInstructionContaining(endAddress) 111 | endAddress = endAddress.add(insn.getLength()) 112 | size = endAddress.subtract(startAddress) 113 | 114 | memory = currentProgram.getMemory() 115 | block = memory.getBlock(startAddress) 116 | mb_source_info = block.getSourceInfos()[0] 117 | if block and block.getStart().getOffset() != 0: 118 | file_offset = ( 119 | startAddress.getOffset() 120 | - block.getStart().getOffset() 121 | + mb_source_info.getFileBytesOffset() 122 | ) 123 | else: 124 | file_offset = None 125 | 126 | language = currentProgram.getLanguageCompilerSpecPair() 127 | filename = currentProgram.getExecutablePath() 128 | print(hex(file_offset)) 129 | values = ghidra.features.base.values.GhidraValuesMap() 130 | values.defineFile("Bin File", java.io.File(filename)) # get current file 131 | values.defineAddress("Start Address", startAddress, currentProgram) 132 | values.defineInt("File Offset", file_offset) 133 | values.defineInt("Size", size) 134 | # values.defineFile( 135 | # "Out File", java.io.File(filename + ".pcode2c." + funcname + ".c") 136 | # ) # get current file 137 | values.defineLanguage("Language", language) 138 | 139 | # Super hacky way to get it to clear itself 9 times uot of 10 140 | # Ghidra is caching the askvules based on the title 141 | _ = askValues("Pcode2C" + " " * random.randint(100, 200), None, values) 142 | command = [ 143 | "python3", 144 | "-m", 145 | "pcode2c", 146 | str(values.getFile("Bin File")), 147 | "--headers", 148 | "-b", 149 | str(values.getLanguage("Language").getLanguageID()), 150 | "--start-address", 151 | "0x" + str(values.getAddress("Start Address")), 152 | "--file-offset", 153 | str(hex(values.getInt("File Offset"))), 154 | "--size", 155 | str(hex(values.getInt("Size"))), 156 | # "--end-address", 157 | # str(values.getAddress("End Address")), 158 | ] 159 | del values 160 | print(command) 161 | 162 | proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 163 | 164 | # Poll process for new output until finished 165 | code = [] 166 | while True: 167 | # Try to read from stdout 168 | output = proc.stdout.readline() 169 | if output: 170 | print(output.rstrip()) 171 | code.append(output) 172 | 173 | # Try to read from stderr 174 | err = proc.stderr.readline() 175 | if err: 176 | print(err.rstrip()) 177 | 178 | # Check if process has terminated 179 | if proc.poll() is not None: 180 | break 181 | 182 | # Make sure we read any remaining output after the process has finished 183 | for output in proc.stdout.readlines(): 184 | print(output.rstrip()) 185 | code.append(output) 186 | for err in proc.stderr.readlines(): 187 | print(err.rstrip()) 188 | 189 | # Close subprocess' file descriptors 190 | proc.stdout.close() 191 | proc.stderr.close() 192 | proc.kill() 193 | return code 194 | 195 | 196 | def run_cbmc(cfile): 197 | values = GhidraValuesMap() 198 | values.defineInt("Unwind", 1) 199 | values.defineInt("Object Bits", 8) 200 | values.defineChoice("Solver", "default", "default", "smt2", "boolector") 201 | values.defineString( 202 | "Options", 203 | "--bounds-check --conversion-check --div-by-zero-check --float-overflow-check --malloc-fail-null \ 204 | --malloc-may-fail --nan-check --pointer-check --pointer-overflow-check --pointer-primitive-check \ 205 | --signed-overflow-check --undefined-shift-check --unsigned-overflow-check --memory-leak-check", 206 | ) 207 | values = askValues("CBMC", None, values) 208 | # TODO get arch, get bits, get endianess 209 | command = [ 210 | "cbmc", 211 | cfile, 212 | "-I", 213 | script_dir + "/../templates/", 214 | "--unwind", 215 | str(values.getInt("Unwind")), 216 | "--object-bits", 217 | str(values.getInt("Object Bits")), 218 | "--unwinding-assertions", 219 | ] + values.getString("Options").split() 220 | 221 | solver = values.getChoice("Solver") 222 | if solver != "default": 223 | command.append("--" + solver) 224 | 225 | print("Running command: `" + " ".join(command) + "`") 226 | 227 | # Start the subprocess with stdout and stderr piped 228 | proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 229 | """proc = subprocess.Popen( 230 | [ 231 | "cbmc", 232 | "-I", 233 | script_dir + "/../templates/", 234 | "--unwind", 235 | "1", 236 | "/tmp/cbmc_test.c", 237 | "--flush", 238 | # "--stop-on-fail", 239 | ], 240 | stdout=subprocess.PIPE, 241 | stderr=subprocess.PIPE, 242 | )""" 243 | 244 | # Poll process for new output until finished 245 | while True: 246 | # Try to read from stdout 247 | output = proc.stdout.readline() 248 | if output: 249 | print(output.rstrip()) 250 | 251 | # Try to read from stderr 252 | err = proc.stderr.readline() 253 | if err: 254 | print(err.rstrip()) 255 | 256 | # Check if process has terminated 257 | if proc.poll() is not None: 258 | break 259 | 260 | # Make sure we read any remaining output after the process has finished 261 | for output in proc.stdout.readlines(): 262 | print(output.rstrip()) 263 | for err in proc.stderr.readlines(): 264 | print(err.rstrip()) 265 | 266 | # Close subprocess' file descriptors 267 | proc.stdout.close() 268 | proc.stderr.close() 269 | proc.kill() 270 | 271 | 272 | if __name__ == "__main__": 273 | main() 274 | -------------------------------------------------------------------------------- /ghidra_scripts/wip/e9patch.py: -------------------------------------------------------------------------------- 1 | # @category: PCode2C 2 | # @toolbar icon.gif 3 | # @menupath Pcode2C.e9patch 4 | 5 | # call e9patch 6 | # ghidra script 7 | 8 | import subprocess 9 | 10 | values = ghidra.features.base.values.GhidraValuesMap() 11 | 12 | """ 13 | values.defineString("Name") 14 | values.defineInt("Count") 15 | values.defineInt("Max Results", 100) 16 | values.defineChoice("Priority", "Low", "Low", "Medium", "High") 17 | values.defineProgram("Other Program") 18 | values.defineProjectFile("Project File") 19 | values.defineProjectFolder("Project Folder") 20 | """ 21 | values.defineAddress("Address", currentAddress, currentProgram) 22 | values.defineString("In Vars", "int foo, int bar") 23 | values.defineString("PatchCode", "int patchCode() {}n\n\n\n\n\n\n\n\n") 24 | values.defineString("Patch Command", "after validateCommand(rax)@goodpatch") 25 | values.defineFile("PatchFile", None) # java.io.File("patch_code.c")) 26 | values.defineFile("Outfile", java.io.File("patched.bin")) 27 | values = askValues("Patch", None, values) 28 | 29 | # check if both PatchFile and patchcode are set 30 | 31 | """ 32 | print(values.getString("Name")) 33 | print(values.getInt("Count")) 34 | print(values.getInt("Max Results")) 35 | print(values.getChoice("Priority")) 36 | """ 37 | 38 | addr = values.getInt("Address") 39 | outfile = values.getFile("Outfile") 40 | 41 | 42 | patchcode = values.getString("PatchCode") 43 | with open("/tmp/patch_code.c", "w") as f: 44 | f.write(patchcode) 45 | 46 | res = subprocess.check_output(["e9compile", "/tmp/patch_code.c"]) 47 | 48 | 49 | def command(): 50 | return ["e9patch", "-o", outfile, str(addr)] 51 | 52 | 53 | command = [ 54 | "e9tool", 55 | "-M", 56 | "-Os", 57 | "--static-loader", 58 | "'addr == {}'".format(hex(currentAddress)), 59 | "-P", 60 | values.getString("PatchCommand"), 61 | "-o", 62 | str(values.getFile("Outfile")), 63 | ] 64 | 65 | res = subprocess.check_output(["e9tool", "--help"]) 66 | print(res) 67 | 68 | # options = currentProgram.getOptions(currentProgram.PROGRAM_INFO) 69 | # options.getString("foo", None) 70 | 71 | 72 | """ 73 | separate commands: 74 | Inspect Patches 75 | Perfrom Patch 76 | Add patch 77 | Delete patch 78 | 79 | 80 | Patcherex2 backend vs e9patch 81 | 82 | """ 83 | -------------------------------------------------------------------------------- /ghidra_scripts/wip/elf_edit.py: -------------------------------------------------------------------------------- 1 | # @category: PCode2C 2 | # @toolbar scripts/elf.png 3 | # @menupath Pcode2C.elf 4 | 5 | # Get elf data 6 | # get current address 7 | # fill out values box with data. Why is it not being updated? 8 | 9 | # Use hex writing api 10 | 11 | listing = currentProgram.getListing() 12 | 13 | # Get the Data object at the current address 14 | phdrarr = listing.getDataBefore(currentAddress) 15 | phdr = phdrarr.getComponentContaining(currentAddress.subtract(phdrarr.getAddress())) 16 | phdr = phdr.getDataType() 17 | for comp in phdr.getComponents(): 18 | print(comp) 19 | 20 | """ 21 | if data_at_address is not None: 22 | # Get the data type of the data at the address 23 | data_type = data_at_address.getDataType() 24 | 25 | # Print the data type information 26 | print("Data type at address {}: {}".format(currentAddress, data_type.getName())) 27 | else: 28 | print("No data found at address {}".format(currentAddress)) 29 | 30 | 31 | ef = currentProgram.getExecutableFormat() 32 | print(ef) 33 | # https://gitlab.com/saruman9/ghidra_scripts/-/blob/master/GetEntryPoints.java 34 | if ef != "Executable and Linking Format (ELF)": 35 | print("Not an ELF file") 36 | exit() 37 | currentProgram.getMemory() 38 | from ghidra.app.util.bin import MemoryByteProvider 39 | from ghidra.app.util.bin.format.elf import ElfHeader 40 | 41 | # from ghidra.app.util.bin.format.elf import ElfHeaderFactory 42 | # from ghidra.app.util.bin.format.elf import ElfSectionHeader 43 | 44 | byteprovider = MemoryByteProvider( 45 | currentProgram.getMemory(), currentProgram.getImageBase() 46 | ) 47 | 48 | path = currentProgram.getExecutablePath() 49 | # FileByteProvider(java.io.File(path)) 50 | from ghidra.app.util.bin import RandomAccessByteProvider 51 | 52 | byteprovider = RandomAccessByteProvider(java.io.File(path)) 53 | # public static ElfHeader createElfHeader 54 | print(dir(ElfHeader)) 55 | elf = ElfHeader(byteprovider, None) # RethrowContinuesFactory.INSTANCE, byteProvider) 56 | try: 57 | elf.parse() 58 | except Exception as e: 59 | print(e) 60 | phdrs = elf.getProgramHeaders() 61 | for phdr in phdrs: 62 | print(phdr.getType()) 63 | print(dir(elf)) 64 | print(elf.e_phoff()) 65 | print(elf.e_flags()) 66 | print(elf.e_entry()) 67 | # print(elf.e_phnum()) 68 | print(elf.getSection(".text")) 69 | print(elf.getProgramHeaderCount()) 70 | print(elf.getProgramHeaderProgramHeader()) 71 | print(elf.getProgramHeaderAt(currentAddress.getOffset())) 72 | """ 73 | -------------------------------------------------------------------------------- /ghidra_scripts/wip/live_vars.py: -------------------------------------------------------------------------------- 1 | # @category: PCode2C 2 | # @toolbar scripts/live.png 3 | # @menupath Pcode2C.liveness2 4 | 5 | #TODO Add User Code Here 6 | from pprint import pprint 7 | from ghidra.app.decompiler import DecompInterface 8 | decomp = DecompInterface() 9 | decomp.openProgram(currentProgram) 10 | func = getFunctionContaining(currentAddress) 11 | print(currentAddress) 12 | print(func) 13 | res = decomp.decompileFunction(func, 0, None) 14 | hf = res.getHighFunction() 15 | blocks = hf.getBasicBlocks() 16 | 17 | 18 | # def live_at_addr(blocks, addr): 19 | livein = {b.getStart() : set() for b in blocks} 20 | 21 | live_at_patch = None 22 | for i in range(len(blocks)): # is this enough iterations? worst case is straight line graph? 23 | for blk in blocks: 24 | blk_id = blk.getStart() 25 | live = set() #copy(liveout[blk_id]) 26 | for outind in range(blk.getOutSize()): 27 | succblk = blk.getOut(outind) 28 | live.update(livein[succblk.getStart()]) 29 | for pcode in reversed(list(blk.getIterator())): 30 | if pcode.isAssignment: 31 | live.discard(pcode.getOutput()) 32 | opcode = pcode.getOpcode() 33 | if opcode == pcode.BRANCH: 34 | pass 35 | elif opcode == pcode.CBRANCH: 36 | live.add(pcode.getInput(1)) 37 | else: 38 | print(pcode.getOpcode(), pcode.BRANCH) 39 | live.update(pcode.getInputs()) 40 | if pcode.getSeqnum().getTarget() == currentAddress: 41 | live_at_patch = {v for v in live if not v.isConstant()} 42 | livein[blk_id] = live 43 | livein = {k : {v for v in vars if not v.isConstant()} for k,vars in livein.items()} 44 | pprint([(blk, [(pcode.getSeqnum(), pcode) for pcode in blk.getIterator()]) for blk in blocks]) 45 | pprint(livein) 46 | 47 | pprint(currentAddress) 48 | print("live at patch", [(v, v.getHigh().getName()) for v in live_at_patch]) 49 | 50 | 51 | # def live_at_addr(blocks, addr): 52 | avail_out = {b.getStart() : set() for b in blocks} 53 | 54 | avail_at_patch = None 55 | for i in range(len(blocks)): # is this enough iterations? worst case is straight line graph? 56 | for blk in blocks: 57 | blk_id = blk.getStart() 58 | avail = {} 59 | #if blk.getSize() == 0: 60 | # available = set() #copy(liveout[blk_id]) 61 | '''for ind in range(blk.getInSize()): 62 | prevblk = blk.getIn(ind) 63 | if available = None: 64 | available = {v for v in available_out[prevblk.getStart()])} 65 | else: 66 | available.intersect_update(available_out[prevblk.getStart()]) 67 | ''' 68 | for pcode in blk.getIterator(): 69 | out = pcode.getOutput() 70 | if out != None: 71 | highvar = out.getHigh() 72 | if highvar != None: 73 | avail[highvar.getName()] = out 74 | if pcode.getSeqnum().getTarget() == currentAddress: 75 | avail_at_patch = {k : v for k,v in avail.items()} 76 | avail_out[blk_id] = avail 77 | 78 | 79 | print("avail at patch", avail_at_patch) -------------------------------------------------------------------------------- /ghidra_scripts/wip/liveness.py: -------------------------------------------------------------------------------- 1 | # @category: PCode2C 2 | # @toolbar scripts/live.png 3 | # @menupath Pcode2C.liveness 4 | from ghidra.program.model.block import BasicBlockModel 5 | 6 | func = getFunctionContaining(currentAddress) 7 | func.getBody() 8 | startAddress = func.getEntryPoint() 9 | 10 | listing = currentProgram.getListing() 11 | start_insn = listing.getInstructionAt(startAddress) 12 | # https://ghidra.re/ghidra_docs/api/ghidra/program/database/code/InstructionDB.html 13 | 14 | # get all instruction in a function 15 | insns = set([start_insn]) 16 | todo = [start_insn] 17 | next = {} 18 | while len(todo) > 0: 19 | insn = todo.pop() 20 | fallthrough = insn.getDefaultFallThrough() 21 | if fallthrough is None: 22 | next[insn] = set() 23 | else: 24 | new_insn = listing.getInstructionAt(fallthrough) 25 | next[insn] = set([new_insn]) 26 | if new_insn not in insns: 27 | insns.add(new_insn) 28 | todo.append(new_insn) 29 | for addr in list(insn.getFlows()): 30 | print(addr) 31 | new_insn = listing.getInstructionAt(addr) 32 | next[insn].add(new_insn) 33 | if new_insn in insns: 34 | continue 35 | insns.add(new_insn) 36 | todo.append(new_insn) 37 | # could go in there and find the ret instructions 38 | 39 | # prev = [next,insn for insn, nexts in next.items() for next in nexts] 40 | # collect up by 41 | 42 | 43 | def is_ret(insn): 44 | for op in insn.getPcode(): 45 | if op.getOpcode() == ghidra.program.model.pcode.PcodeOp.RETURN: 46 | return True 47 | return False 48 | 49 | 50 | print(insns) 51 | 52 | # callee saved and return registers are live at end of function 53 | 54 | cc = func.getCallingConvention() 55 | 56 | cc.getKilledByCallList() 57 | print("trasH", cc.getLikelyTrash()) 58 | print("return", cc.getReturnAddress()) 59 | print( 60 | "return loc", 61 | cc.getReturnLocation(func.getReturnType(), currentProgram), 62 | ) # ghidra.program.model.data.UnsignedLongDataType() 63 | # https://ghidra.re/ghidra_docs/api/ghidra/program/model/lang/PrototypeModel.html#getStorageLocations(ghidra.program.model.listing.Program,ghidra.program.model.data.DataType%5B%5D,boolean) 64 | live_exit = cc.getUnaffectedList() 65 | # this is probably a stack address + cc.getReturnAddress() 66 | 67 | print(dir(cc)) 68 | live_exit = { 69 | currentProgram.getRegister(varnode.getAddress(), varnode.getSize()) 70 | for varnode in live_exit 71 | } 72 | live_exit.add(cc.getReturnLocation(func.getReturnType(), currentProgram)) 73 | live_out = {insn: set(live_exit) if is_ret(insn) else set() for insn in insns} 74 | live_in = {insn: set() for insn in insns} 75 | 76 | 77 | print("live_exit", live_exit) 78 | # https://ghidra.re/ghidra_docs/api/ghidra/program/model/lang/OperandType.html#doesRead(int) 79 | uses = { 80 | insn: {insn.getRegister(i) for i in range(insn.getNumOperands()) if doesRead()} 81 | for insn in insns 82 | } 83 | defines = {insn: set() for insn in insns} # todo 84 | 85 | for j in range(10): 86 | for insn in insns: 87 | for next_insn in next[insn]: 88 | live_out[insn] |= live_in[next_insn] 89 | live_in[insn] |= live_out[insn].difference(defines[insn]) 90 | live_in[insn] |= uses[insn] 91 | 92 | print("live_in", live_in) 93 | print("live_out", live_out) 94 | 95 | for insn in insns: 96 | insn.getFallFrom() 97 | insn.getFlows() 98 | print(insn, "next", insn.getNext()) 99 | 100 | insn.getReferenceIteratorTo() 101 | 102 | for i in range(insn.getNumOperands()): 103 | print("obobjects", insn.getOpObjects(i)) 104 | print("register", insn.getRegister(i)) 105 | print("registervalue", insn.getRegisterValue(insn.getRegister(i))) 106 | 107 | # https://gist.github.com/bin2415/15028e78d5cf0c708fe1ab82fc252799 108 | # https://ghidra.re/ghidra_docs/api/ghidra/program/model/block/BasicBlockModel.html 109 | bbModel = BasicBlockModel(currentProgram) 110 | codeBlockIterator = bbModel.getCodeBlocksContaining(func.getBody(), None) 111 | while codeBlockIterator.hasNext(): 112 | bb = codeBlockIterator.next() 113 | print(bb) 114 | # bb_set.add(bb.getMinAddress().getOffset()) 115 | # bb_func_set.add(bb) 116 | # G = addBB(bb, G, bb_func_map) 117 | -------------------------------------------------------------------------------- /ghidra_scripts/wip/patch_diff.py: -------------------------------------------------------------------------------- 1 | # run readelf. Does it look right? 2 | 3 | # Does the segment data make sense? 4 | 5 | # run objdump. Diff. Are these expected changes 6 | -------------------------------------------------------------------------------- /ghidra_scripts/wip/pcode2c_old.py: -------------------------------------------------------------------------------- 1 | # @category: PCode2C 2 | # @toolbar scripts/C_Logo.png 3 | # @menupath Pcode2C.oldrun 4 | 5 | # Import Ghidra modules 6 | from ghidra.app.decompiler import DecompInterface 7 | from ghidra.program.model.pcode import PcodeOp 8 | from ghidra.util.task import ConsoleTaskMonitor 9 | import sys 10 | from ghidra.program.model.block import BasicBlockModel 11 | from ghidra.util.task import ConsoleTaskMonitor 12 | from ghidra.program.model.symbol import SymbolUtilities 13 | 14 | 15 | def interp_varnode(varnode): 16 | size = varnode.getSize() 17 | if size == 1: 18 | typ = "int8_t" 19 | elif size == 2: 20 | typ = "int16_t" 21 | elif size == 4: 22 | typ = "int32_t" 23 | elif size == 8: 24 | typ = "int64_t" 25 | else: 26 | assert False, "unrecognized size %d" % size 27 | space = varnode.getSpace() 28 | offset = varnode.getOffset() 29 | size = str(varnode.getSize()) 30 | # check if in constant map 31 | # also probably reg map 32 | # print(dir(space)) 33 | if varnode.isConstant(): 34 | return "&({}){{{}}}".format(typ, hex(varnode.getOffset())), size 35 | elif varnode.isRegister(): 36 | reg = currentProgram.getRegister(varnode.getAddress(), varnode.getSize()) 37 | if reg != None: 38 | name = reg.getName() 39 | return "reg + 0x{:x} /* {} */".format(varnode.getOffset(), name), size 40 | elif varnode.isUnique(): 41 | return "unique + 0x{:x}".format(varnode.getOffset()), size 42 | elif varnode.isAddress(): 43 | return "ram + 0x{:x}".format(varnode.getOffset()), size 44 | # addrspace = currentProgram.getAddressFactory().getAddressSpace(varnode.getOffset()) 45 | # name += addrspace.getName() 46 | else: 47 | assert False, "unrecognized varnode type {}".format(varnode) 48 | 49 | 50 | def pcode2c(pcode): 51 | # print(dir(pcode)) 52 | numInputs = pcode.getNumInputs() 53 | args = list(pcode.getInputs()) 54 | output = pcode.getOutput() 55 | if output != None: 56 | args = [output] + args 57 | args = map(interp_varnode, args) 58 | args = sum(args, ()) 59 | args = ", ".join(args) 60 | return "\t\t\t{}({}); // {}".format(pcode.getMnemonic(), args, pcode) 61 | 62 | 63 | def register_and_names(): 64 | output = [] 65 | currentProgram = getCurrentProgram() 66 | ctx = currentProgram.getProgramContext() 67 | reglist = ctx.getRegisters() 68 | # https://ghidra.re/ghidra_docs/api/ghidra/program/model/lang/Register.html 69 | for reg in reglist: 70 | print(reg) 71 | print(reg.getName()) 72 | print(reg.getAddress()) 73 | print(reg.getMinimumByteSize()) 74 | print(reg.getBaseRegister()) 75 | output.append( 76 | "#define {} 0x{:x}".format(reg.getName(), reg.getAddress().getOffset()) 77 | ) 78 | symtab = currentProgram.getSymbolTable() 79 | # https://ghidra.re/ghidra_docs/api/ghidra/program/model/symbol/SymbolTable.html 80 | # https://ghidra.re/ghidra_docs/api/ghidra/program/model/symbol/SymbolUtilities.html 81 | symbols = symtab.getAllSymbols(True) 82 | for symbol in symbols: 83 | print(symbol) 84 | print(symbol.getName()) 85 | print(symbol.getAddress()) 86 | print(symbol.getSymbolType()) 87 | return "\n".join(output) 88 | # symtab.getGlobalSymbols() 89 | # hmm. relocations 90 | 91 | 92 | def dump_raw_pcode(func): 93 | func_body = func.getBody() 94 | listing = currentProgram.getListing() 95 | opiter = listing.getInstructions(func_body, True) 96 | output = [] 97 | output.append("/* AUTOGENERATED. DO NOT EDIT. */") 98 | output.append('#include "_pcode.h"') 99 | output.append( 100 | "void pcode2c_{}(CPUState *state, size_t breakpoint){{".format(func.getName()) 101 | ) 102 | output.append( 103 | "\tuint8_t* reg = state->reg; uint8_t* unique = state->unique; uint8_t* ram = state->ram;" 104 | ) 105 | output.append("\tfor(;;){") 106 | output.append("\tswitch(state->pc) {") 107 | while opiter.hasNext(): 108 | op = opiter.next() 109 | output.append("\t\tcase 0x{}:".format(op.getAddress())) 110 | output.append('\t\t\tINSN(0x{},"{}")'.format(op.getAddress(), op)) 111 | raw_pcode = op.getPcode() 112 | for entry in raw_pcode: 113 | output.append(pcode2c(entry)) 114 | output.append("\t\tdefault: assert(0);") 115 | output.append("}}}") 116 | return "\n".join(output) 117 | 118 | 119 | import os 120 | import shutil 121 | 122 | if __name__ == "__main__": 123 | # Get current program and function 124 | currentProgram = getCurrentProgram() 125 | symbolTable = currentProgram.getSymbolTable() 126 | # Get function name from command line arguments 127 | 128 | args = getScriptArgs() 129 | print(args) 130 | if len(args) > 0: 131 | funcName = args[0] # sys.argv[1] 132 | func = getGlobalFunctions(funcName)[0] 133 | else: 134 | func = getFunctionContaining(currentAddress) 135 | 136 | output = dump_raw_pcode(func) 137 | print(output) 138 | # filename = askFile("Save decompilation to file", "Save") 139 | dir = askDirectory("Save decompilation to file", "Save") 140 | # with open("/home/philip/Documents/python/pcode2c/decomp.c", "w") as f: 141 | # append to path instead 142 | with open(os.path.join(dir.toString(), "decomp.c"), "w") as f: 143 | # with open(filename.toString(), "w") as f: 144 | f.write(output) 145 | with open(os.path.join(dir.toString(), "arch.h"), "w") as f: 146 | f.write(register_and_names()) 147 | # copy pcode.h from script directory to working directory 148 | this_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "templates") 149 | for filename in ["_pcode.h", "verify_template.c", "Makefile"]: 150 | shutil.copy( 151 | os.path.join(this_dir, filename), 152 | dir.toString(), 153 | ) 154 | -------------------------------------------------------------------------------- /ghidra_scripts/wip/validate.py: -------------------------------------------------------------------------------- 1 | # Is it even possible to auto run some kind of validation 2 | 3 | # just so many things can go wrong and it's enough work that it isn't clear it's worth it. 4 | harness = """ 5 | #include 6 | #include 7 | // #include 8 | // #include 9 | 10 | 11 | 12 | typ1 global1; 13 | typ2 global2; 14 | typ3 global3; 15 | 16 | {decomp} 17 | 18 | {pcode2c} 19 | 20 | int main(){ 21 | CPUState state; 22 | init_state(&state); 23 | 24 | //better because state.mem will be chaosed 25 | global1 = state.mem[global1_addr] 26 | //state.mem[global1_addr] = global1; 27 | //state.mem[global2_addr] = global2; 28 | {global_init} 29 | 30 | retval2 = ghidra_decomp(int state.rdi, state.rsi) 31 | pcode2c(&state, -1); 32 | retval1 = state.rax; 33 | 34 | assert(retval == retval2); 35 | assert(state.mem[global1_addr] == global1); 36 | assert(state.mem[global2_addr] == global2); 37 | ... 38 | } 39 | 40 | """ 41 | 42 | """ 43 | } 44 | 45 | """ 46 | -------------------------------------------------------------------------------- /notes/junk.md: -------------------------------------------------------------------------------- 1 | Ok... 2 | Try many examples 3 | storage of stuff in editor window 4 | 5 | C diffing 6 | 7 | Harness in compile like window. Call cbmc. 8 | Could auto fill in decompiled code for a comparative translatin validation. 9 | Could help fill out abi. 10 | Put all buttons, options in single pane 11 | Could copy assembly out in ghidra assmbler. (involves buldings an assembly parsr whch culd be useful.) 12 | 13 | libvmi - 14 | gogle rekall 15 | readlf -l /proc/kcore 16 | inotify 17 | gdb 18 | sudo rea 19 | readelf -l /proc/kcore , symbolization 20 | 21 | A pull request to patcherex2. 22 | micropatcherex 23 | 24 | Make documentation 25 | 26 | - Better loader implementation. Not a problem anymore. Could also go back to old style ghidra export sript 27 | Reflecting inline data into the interpreter. 28 | 29 | Just take in an offset and an address, size as command line options. 30 | Dump byte array as C variable. 31 | 32 | Can I dump right after loading 33 | 34 | diff C, add cbmc annotations at those positions? 35 | 36 | > add detour script 37 | > elf space script? 38 | 39 | look at in objdump. Hard to read flags in ghidra? 40 | G > file(0x1000) 41 | 42 | askvalues after compiler ask 43 | editting elf dasta manually in ghidra? 44 | putting patch_label: as the end of code 45 | jmp patch0: 46 | 47 | vlaidate command, takes in decompiled C, runs pcode2c, asserts typical stuff? 48 | run diff on two files. 49 | Ghidra already does this 50 | 51 | 52 | 53 | Assembler asm = Assemblers.getAssembler(currentProgram); 54 | asm.assemble(currentAddress, "ADD ..."); 55 | 56 | micropatching with lang chain. Actions: call compiler, patcherex, objdump. Has to pass cbmc 57 | 58 | Something comparative? Eh. 59 | Simple C printing 60 | grab eax using 61 | patcherex script 62 | e9patch 63 | scrtip 64 | a file with a bunch of patchable functions 65 | 66 | get a stringbox 67 | 68 | what do we do with interior markers? Just dump them? 69 | could do C rewriting step first to uglify and expand. 70 | Can I get line tables from llvm input? 71 | I guess better yet I could parse the C using clang and make some overapproximation of what 72 | program positions I think are sensible. 73 | There isn't persay a C spec answer here? Since these are sub observable beahovior positions? 74 | But come on. 75 | 76 | Iteratively delete statements as cbmc finds counterexamples. 77 | Jiggle statements. 78 | 79 | so dwarf had block delimiters, epilog, prolog, 80 | discirminator. identify blocks that correspond to same source position 81 | isa - which instruction set. Useful for thumb probably. isa fff for data? I mean perhaps one could infer it anyhow. 82 | 83 | 84 | 85 | 86 | 87 | Try to use CLE 88 | dwarf annotation example. blog it 89 | 90 | command line in bytes 91 | command line in asm string / .s file 92 | 93 | dump smt formula. Is it useful / interpretable? 94 | 95 | INSN_START 96 | INSN_END (has goto in it) 97 | 98 | collect up seen address. Use as new starts 99 | collect up from symbols. 100 | This is quite silly. 101 | I could also just disassemble every byte.... 102 | Hmm. That's intriguing 103 | explicit goto at the ned might get rid of implicit-fallthrough complaint. or I could put in the break then. 104 | 105 | Take in new starts as command line params 106 | 107 | nes static translation 108 | 109 | revng 110 | lasagne binary-transation.github.io 111 | 112 | I've really uglified with these goto annotations. 113 | 114 | PREINSN 115 | POSTINSN 116 | I could put some into INSN 117 | 118 | I could put others in pcdoe instructions 119 | 120 | relative branch 121 | if(space == 0){ 122 | goto P_ ## current + offset 123 | pc = pc + offset; 124 | } 125 | else{ 126 | pc = pc + offset; 127 | } 128 | but I need to do the offset math at compile time in order to goto or add a while-switch. Ugh. 129 | ugh. I think the simplest thing to do is special case the generation of branch. 130 | 131 | Dynamic linked libraries: 132 | 133 | 1. refuse to engage. Hmm. Maybe this is right? 134 | 2. plt section 135 | 3. grab loaded core rather than file 136 | 137 | seaparate out python 2 compatible stuff 138 | 139 | ok I should actually be loading. 140 | I should look in the segments, not sections and find any with executable bits 141 | 142 | Fix all -Wformat errors 143 | return code from decomp. (good idea? I mean i guess I could check the state anyhow 144 | And know in an application specific way. Naw. This seems nice. 145 | ) 146 | enum {CALL, CALLIND, RETURN, BREAKPOINT} 147 | switch over to separated space and offset 148 | map in the code itself in case we look atca it? 149 | 150 | init_state(); helper routine. 151 | post bug about slowness of malloc vs static array 152 | 153 | add symbol dump and arch dump 154 | automatic tranlsation validation 155 | 156 | refactor to use pypcode 157 | floating point 158 | _Generic switch? 159 | script to dump decompiled from ghidra 160 | dwarf annot 161 | 162 | exe: 163 | $(CC) -g -Wformat=0 -Wimplicit-fallthrough=0 -o harness harness.c 164 | 165 | loops: 166 | cbmc harness.c --show-loops 167 | 168 | z3 and boolector hit pretty hard 169 | --sat-solver cadical halves minisat time 170 | 171 | --external-sat-solver kissat is about as good as cadical 172 | 173 | --show-loops 174 | --no-built-in-assertions 175 | Output little vs big endian into makefile. also arch size 176 | Go through all the warnings. I should be using constant lablled by size. 177 | try enqueue now that carry is fixed. 178 | 179 | How we're printing negative constants seems wrong 180 | I should put pc = address _before_ the case. 181 | when Iincrease the unwindig, now the unwinding assertion fails. That is not good. Perhaps this is because 182 | 183 | Ok so the new cbmc is way slower. 184 | Try uautomizer / cpachecker. This is it's own longshot 185 | 186 | demo breakpoint comparison 187 | 188 | speed up via... turn off symex? 189 | try kissat or cadical 190 | 191 | malloc is astonishingly faster for allocating blocks of memory in cbmc compared to statically sized arrays. I have no idea way 192 | It does seem like it is working correctly though. 193 | ass #include is way slower.... why? 194 | 195 | doesn't seem like inline annotations change anytihg much. Maybe 10%? but maybe not 196 | 197 | cbmc harness.c --object-bits 10 --unwind 2 --property main.assertion.1 --property main.assertion.2 --unwinding-assertions --boolector 198 | 199 | If I could remove all those memcpy checks. 200 | 201 | --show-properties 202 | --property main.assertion.1 203 | 204 | Ok I'm paying loop cost for printing. I could target the dispatch loop. 205 | Conditional compilation 206 | Remove the prints 207 | Inline the loop there ahead of time. 208 | 209 | CBMC_OPTIONS= --bounds-check --conversion-check --div-by-zero-check --float-overflow-check --malloc-fail-null \ 210 | --malloc-may-fail --nan-check --pointer-check --pointer-overflow-check --pointer-primitive-check \ 211 | --signed-overflow-check --undefined-shift-check --unsigned-overflow-check 212 | 213 | harness.c:(.text+0x377f): undefined reference to `INT_CARRY' 214 | /usr/bin/ld: harness.c:(.text+0x37bf): undefined reference to`INT_SCARRY' 215 | /usr/bin/ld: harness.c:(.text+0x4a22): undefined reference to `INT_CARRY' 216 | /usr/bin/ld: harness.c:(.text+0x4a59): undefined reference to`INT_SCARRY' 217 | /usr/bin/ld: harness.c:(.text+0x4ef8): undefined reference to `INT_CARRY' 218 | /usr/bin/ld: harness.c:(.text+0x4f3b): undefined reference to`INT_SCARRY' 219 | /usr/bin/ld: /tmp/ccnfoqer.o: in function `main': 220 | harness.c:(.text+0x638d): undefined reference to`FUNCTION' 221 | collect2: error: ld returned 1 exit status 222 | 223 | goto-diff 224 | go 225 | --taint in goto-analyzer 226 | Could also have stuff that process opens cbmc with the decompiler stuff? 227 | 228 | Hmm. Maybe I could also dump the high pcode output. 229 | 230 | values map usage. 231 | 232 | 233 | echo ' 234 | import sys 235 | from ghidra.app.decompiler import DecompInterface 236 | from ghidra.util.task import ConsoleTaskMonitor 237 | 238 | decompinterface = DecompInterface() 239 | decompinterface.openProgram(currentProgram); 240 | functions = currentProgram.getFunctionManager().getFunctions(True) 241 | 242 | def block2c(block): 243 | for inst in block.getInstructions(True): 244 | print(inst) 245 | pcodeOps = highFunction.getPcodeOps(inst.getAddress()) 246 | for op in pcodeOps: 247 | pcode2c(op) 248 | 249 | def func2c(func): 250 | currentProgram = getCurrentProgram() 251 | blockModel = BasicBlockModel(currentProgram) 252 | monitor = ConsoleTaskMonitor() 253 | blocks = blockModel.getCodeBlocksContaining(func.getBody(), monitor) 254 | for block in blocks: 255 | block2c(block) 256 | 257 | with open("decompiled_output.c", "w") as output_file: 258 | for function in list(functions): 259 | # Add a comment with the name of the function 260 | # print "// Function: " + str(function) 261 | output_file.write("// Function: " + str(function)) 262 | 263 | # Decompile each function 264 | decompiled_function = decompinterface.decompileFunction(function, 0, ConsoleTaskMonitor()) 265 | # Print Decompiled Code 266 | print decompiled_function.getDecompiledFunction().getC() 267 | #output_file.write(decompiled_function.getDecompiledFunction().getC()) 268 | ' > /tmp/decompile.py 269 | 270 | echo " 271 | int foo(int x){ 272 | return x*x + 1; 273 | } 274 | " > /tmp/foo.c 275 | 276 | # huh. ghidra doesn't support dwarf 5? That seems nuts 277 | 278 | gcc -gdwarf-4 -c /tmp/foo.c -o /tmp/foo.o 279 | 280 | # Setup decompiler 281 | # decompInterface = DecompInterface() 282 | # decompInterface.openProgram(currentProgram) 283 | # Get the body of the function 284 | # functionBody = function.getBody() 285 | 286 | # Iterate over all instructions in the function 287 | # instructions = getCurrentProgram().getListing().getInstructions(functionBody, True) 288 | 289 | # func2c(func) 290 | 291 | # Find the function with the given name 292 | # functionSymbol = SymbolUtilities.getLabelOrFunctionSymbol( 293 | # currentProgram, functionName, None 294 | # ) 295 | # function = getFunctionContaining(currentProgram.getListing().getCurrentAddress()) 296 | # function = symbolTable.getFunction(functionSymbol.getAddress()) 297 | 298 | { 299 | // PUSH RBP 300 | // 0xdeadbeef 301 | { 302 | // (unique, 0xef80, 8) COPY (regsiter, 0x28, 8) 303 | int op1, op2, out; 304 | memcpy(&op1, state->register + 0x28, 8); 305 | out = op1; 306 | memcpy(state->unique + 0xef80, &out, 8); 307 | } 308 | 309 | } 310 | 311 | void COPY(addr1,addr2, size){ 312 | memcpy(addr1, addr2, size); 313 | } 314 | 315 | # define BINOP64(name, operator) void name(out,in1,in2){ } 316 | 317 | # define BINOP(name,op,type) void name(out,in1,in2){ type in1, in2, out; memcpy(&in1, in1, sizeof(type));\ 318 | 319 | memcpy(&in2, in2, sizeof(type)); \ 320 | out = in1 operator in2; \ 321 | memcpy(out, out, sizeof(type)); } 322 | We need to 323 | void INT_ADD64(a1,a2,a3){ 324 | int64_t op1, op2, out; 325 | memcpy(&in1, a1, 64); 326 | memcpy(&in2, a2, 64); 327 | memcpy(out, &out, 64); 328 | } 329 | 330 | INT_ADD64(register + 0x28, register + 0x30, unique + 0xef80); 331 | 332 | INT_ADD(resgiter + , + , ) 333 | BINOP(INT_ADD); 334 | 335 | } 336 | 337 | # Iterate through each basic block in the function 338 | for block in function.getBasicBlocks(): 339 | for inst in block.getInstructions(True): 340 | # Get the high-level representation (HighFunction) for decompilation 341 | #highFunction = decompInterface.decompileFunction( 342 | # function, 0, ConsoleTaskMonitor() 343 | #) 344 | #if highFunction is None or not highFunction.decompileCompleted(): 345 | # print("Decompilation error") 346 | # continue 347 | 348 | # Get the p-code for each instruction 349 | pcodeOps = highFunction.getPcodeOps(inst.getAddress()) 350 | for op in pcodeOps: 351 | # Process based on the type of p-code operation 352 | if op.getOpcode() == PcodeOp.COPY: 353 | print("COPY operation at", op.getAddress()) 354 | elif op.getOpcode() == PcodeOp.LOAD: 355 | print("LOAD operation at", op.getAddress()) 356 | elif op.getOpcode() == PcodeOp.STORE: 357 | print("STORE operation at", op.getAddress()) 358 | # Add more elif statements for different p-code operations as needed 359 | else: 360 | print("Other operation at", op.getAddress()) 361 | 362 | 363 | 364 | assert numInputs <= 3 365 | if numInputs == 1: 366 | x = interp_varnode(pcode.getInput(0)) 367 | print(x) 368 | 369 | 370 | if pcode.getNumInputs() > 1: 371 | y = interp_varnode(pcode.getInput(1)) 372 | if pcode.getNumInputs() > 2: 373 | z = interp_varnode(pcode.getInput(1)) 374 | # Process based on the type of p-code operation 375 | if op == PcodeOp.COPY: 376 | return "state->mem[%d] = state->;" % (pcode.getAddress()) 377 | print("COPY operation at", op.getAddress()) 378 | elif op == PcodeOp.LOAD: 379 | print("LOAD operation at", op.getAddress()) 380 | elif op == PcodeOp.STORE: 381 | print("STORE operation at", op.getAddress()) 382 | # Add more elif statements for different p-code operations as needed 383 | else: 384 | print("Other operation at", op.getAddress()) 385 | 386 | 387 | if pcode.isAssignment(): 388 | if op == pcode.BOOL_AND: 389 | e = "&" 390 | elif op == pcode.BOOL_NEGATE: 391 | e = "!" # Hmm. Is this what we want? 392 | elif op == pcode.BOOL_OR: 393 | e = "|" 394 | elif op == pcode.BOOL_XOR: 395 | e = "^" 396 | elif op == pcode.COPY: 397 | e = x 398 | elif op == pcode.INT_ADD: 399 | e = "+" 400 | signed = True 401 | elif op == pcode.INT_AND: 402 | e = "&" 403 | signed = True 404 | elif op == pcode.INT_SUB: 405 | e = "-" 406 | signed = True 407 | elif op == pcode.INT_DIV: 408 | e = "/" 409 | signed = True 410 | elif op == pcode.INT_EQUAL: 411 | e = "==" 412 | elif op == pcode.INT_LEFT: 413 | e = "<<" 414 | elif op == pcode.INT_LESS: 415 | e = "<" 416 | elif op == pcode.INT_MULT: 417 | e = "*" 418 | elif op == pcode.INT_NOTEQUAL: 419 | e = x != y 420 | elif op == pcode.INT_SBORROW: 421 | # revisit this one. I'm confused why this is different 422 | # from SLESS 423 | e = x > y 424 | elif op == pcode.INT_SEXT: 425 | outsize = pcode.getOutput().getSize() 426 | insize = pcode.getInput(0).getSize() 427 | e = Function("(_ sign_extend %d)" % (outsize - insize), [x]) 428 | elif op == pcode.INT_SLESS: 429 | e = bvslt(x, y) 430 | elif op == pcode.INT_SUB: 431 | e = x - y 432 | elif op == pcode.INT_ZEXT: 433 | outsize = pcode.getOutput().getSize() 434 | insize = pcode.getInput(0).getSize() 435 | e = Function("(_ zero_extend %d)" % (outsize - insize), [x]) 436 | elif op == pcode.INT_ZEXT: 437 | outsize = pcode.getOutput().getSize() 438 | insize = pcode.getInput(0).getSize() 439 | e = Function("(_ zero_extend %d)" % (outsize - insize), [x]) 440 | elif op == pcode.LOAD: 441 | e = get_mem(x, y) 442 | elif op == pcode.POPCOUNT: 443 | e = Function("popcount", [x]) 444 | elif op == pcode.SUBPIECE: 445 | # I always have a hard time getting extract right 446 | # in principal this offset might be symbolic? Hmm. 447 | size = pcode.getInput(1).getOffset() - 1 448 | e = Function("(extract _ %d 0)" % size ,[x]) 449 | else: 450 | print "missing opcode" 451 | print pcode 452 | assert False 453 | #out = interp_varnode(pcode.getOutput()) 454 | out = pcode.getOutput() 455 | space = bvconst(out.getSpace(), 64) 456 | offset = bvconst(out.getOffset(), 64) 457 | s = Comment(str(pcode), 458 | #Let([(out, e)], post)) 459 | Let([(Mem, store(Mem, space, offset, e))], post)) 460 | return s 461 | elif op == pcode.STORE: 462 | return Comment(str(pcode), Let([Mem], store(Mem, x, y, z))) 463 | elif op == pcode.BRANCH: 464 | print "WARN: branch unimplemented" 465 | return post 466 | elif op == pcode.CBRANCH: 467 | print "WARN: cbranch unimplemented" 468 | return post 469 | elif op == pcode.RETURN: 470 | print "WARN: return unimplemented" 471 | return post 472 | else: 473 | print pcode 474 | assert False 475 | -------------------------------------------------------------------------------- /notes/pcode2c.sh: -------------------------------------------------------------------------------- 1 | #rm -rf /tmp/ghidraproject 2 | #mkdir /tmp/ghidraproject 3 | # -import $1 4 | $GHIDRAHOME/support/analyzeHeadless /tmp/ghidraproject Project1 -process $1 -postScript pcode2c.py $2 #2>/dev/null 5 | -------------------------------------------------------------------------------- /notes/popcount_harness.c: -------------------------------------------------------------------------------- 1 | #include "pcode.h" 2 | 3 | int main() 4 | { 5 | uint32_t x; 6 | uint64_t y = 0; 7 | y = x; 8 | uint8_t out1; 9 | uint8_t out2; 10 | POPCOUNT(&out1, 1, &x, 4); 11 | POPCOUNT(&out2, 1, &y, 8); 12 | assert(out1 == out2); 13 | 14 | y = 0; 15 | y |= ((uint64_t)x << 32); 16 | POPCOUNT(&out1, 1, &x, 4); 17 | POPCOUNT(&out2, 1, &y, 8); 18 | assert(out1 == out2); 19 | return 0; 20 | } -------------------------------------------------------------------------------- /notes/testout: -------------------------------------------------------------------------------- 1 | ; SMT 2 2 | (set-info :source "Generated by CBMC 5.95.1 (cbmc-5.95.1)") 3 | (set-option :produce-models true) 4 | (set-logic QF_AUFBV) 5 | 6 | ; set_to true (equal) 7 | (define-fun |__CPROVER_dead_object#1| () (_ BitVec 64) (_ bv0 64)) 8 | 9 | ; set_to true (equal) 10 | (define-fun |__CPROVER_deallocated#1| () (_ BitVec 64) (_ bv0 64)) 11 | 12 | ; set_to true (equal) 13 | (define-fun |__CPROVER_max_malloc_size#1| () (_ BitVec 64) (_ bv36028797018963968 64)) 14 | 15 | ; set_to true (equal) 16 | (define-fun |__CPROVER_memory_leak#1| () (_ BitVec 64) (_ bv0 64)) 17 | 18 | ; set_to true (equal) 19 | (define-fun |__CPROVER_rounding_mode!0#1| () (_ BitVec 32) (_ bv0 32)) 20 | 21 | ; find_symbols 22 | (declare-fun |nondet_symex::nondet0| () (_ BitVec 32)) 23 | ; set_to true (equal) 24 | (define-fun |__CPROVER__start::x!0@1#2| () (_ BitVec 32) |nondet_symex::nondet0|) 25 | 26 | ; set_to true (equal) 27 | (define-fun |foo::x!0@1#1| () (_ BitVec 32) |__CPROVER__start::x!0@1#2|) 28 | 29 | ; set_to true (equal) 30 | (define-fun |foo::1::y!0@1#2| () (_ BitVec 32) (bvmul (_ bv2 32) |foo::x!0@1#1|)) 31 | 32 | ; find_symbols 33 | (declare-fun |nondet_symex::nondet1| () (_ BitVec 32)) 34 | ; set_to true (equal) 35 | (define-fun |foo::$tmp::return_value_my_foo!0@1#2| () (_ BitVec 32) |nondet_symex::nondet1|) 36 | 37 | ; find_symbols 38 | (declare-fun |__CPROVER__start::x!0@1#1| () (_ BitVec 32)) 39 | ; convert 40 | ; Converting var_no 0 with expr ID of = 41 | (define-fun B0 () Bool (= |__CPROVER__start::x!0@1#1| |__CPROVER__start::x!0@1#1|)) 42 | 43 | ; convert 44 | ; Converting var_no 1 with expr ID of = 45 | (define-fun B1 () Bool (= |__CPROVER__start::x!0@1#1| |__CPROVER__start::x!0@1#1|)) 46 | 47 | ; find_symbols 48 | (declare-fun |foo::1::y!0@1#1| () (_ BitVec 32)) 49 | ; convert 50 | ; Converting var_no 2 with expr ID of = 51 | (define-fun B2 () Bool (= |foo::1::y!0@1#1| |foo::1::y!0@1#1|)) 52 | 53 | ; convert 54 | ; Converting var_no 3 with expr ID of = 55 | (define-fun B3 () Bool (= |foo::1::y!0@1#1| |foo::1::y!0@1#1|)) 56 | 57 | ; find_symbols 58 | (declare-fun |foo::$tmp::return_value_my_foo!0@1#1| () (_ BitVec 32)) 59 | ; convert 60 | ; Converting var_no 4 with expr ID of = 61 | (define-fun B4 () Bool (= |foo::$tmp::return_value_my_foo!0@1#1| |foo::$tmp::return_value_my_foo!0@1#1|)) 62 | 63 | ; convert 64 | ; Converting var_no 5 with expr ID of = 65 | (define-fun B5 () Bool (= |foo::$tmp::return_value_my_foo!0@1#1| |foo::$tmp::return_value_my_foo!0@1#1|)) 66 | 67 | ; find_symbols 68 | (declare-fun |symex::args::0| () (_ BitVec 32)) 69 | ; set_to true 70 | (assert (= |__CPROVER__start::x!0@1#2| |symex::args::0|)) 71 | 72 | ; find_symbols 73 | (declare-fun |symex::args::1| () (_ BitVec 32)) 74 | ; set_to true 75 | (assert (= |foo::x!0@1#1| |symex::args::1|)) 76 | 77 | ; find_symbols 78 | (declare-fun |symex::io::0| () (_ BitVec 32)) 79 | ; set_to true 80 | (assert (= |__CPROVER__start::x!0@1#2| |symex::io::0|)) 81 | 82 | ; set_to false 83 | (assert (not (= |foo::1::y!0@1#2| |foo::$tmp::return_value_my_foo!0@1#2|))) 84 | 85 | ; convert 86 | ; Converting var_no 6 with expr ID of not 87 | (define-fun B6 () Bool (not false)) 88 | 89 | ; set_to true 90 | (assert B6) 91 | 92 | (check-sat) 93 | 94 | (get-value (B0)) 95 | (get-value (B1)) 96 | (get-value (B2)) 97 | (get-value (B3)) 98 | (get-value (B4)) 99 | (get-value (B5)) 100 | (get-value (B6)) 101 | (get-value (|__CPROVER__start::x!0@1#1|)) 102 | (get-value (|__CPROVER__start::x!0@1#2|)) 103 | (get-value (|__CPROVER_dead_object#1|)) 104 | (get-value (|__CPROVER_deallocated#1|)) 105 | (get-value (|__CPROVER_max_malloc_size#1|)) 106 | (get-value (|__CPROVER_memory_leak#1|)) 107 | (get-value (|__CPROVER_rounding_mode!0#1|)) 108 | (get-value (|foo::$tmp::return_value_my_foo!0@1#1|)) 109 | (get-value (|foo::$tmp::return_value_my_foo!0@1#2|)) 110 | (get-value (|foo::1::y!0@1#1|)) 111 | (get-value (|foo::1::y!0@1#2|)) 112 | (get-value (|foo::x!0@1#1|)) 113 | (get-value (|nondet_symex::nondet0|)) 114 | (get-value (|nondet_symex::nondet1|)) 115 | (get-value (|symex::args::0|)) 116 | (get-value (|symex::args::1|)) 117 | (get-value (|symex::io::0|)) 118 | 119 | (exit) 120 | ; end of SMT2 file 121 | -------------------------------------------------------------------------------- /notes/try_cle.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "Writing /tmp/mymin.c\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "%%file /tmp/mymin.c\n", 18 | "int min(int a, int b) {\n", 19 | " if (a < b) {\n", 20 | " return a;\n", 21 | " }\n", 22 | " return b;\n", 23 | "}\n" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 14, 29 | "metadata": {}, 30 | "outputs": [ 31 | { 32 | "name": "stdout", 33 | "output_type": "stream", 34 | "text": [ 35 | "Overwriting /tmp/hello.c\n" 36 | ] 37 | } 38 | ], 39 | "source": [ 40 | "%%file /tmp/hello.c\n", 41 | "#include \n", 42 | "int main(int argc) {\n", 43 | " printf(\"hello world\");\n", 44 | " return 0;\n", 45 | "}" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 15, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "%%bash\n", 55 | "gcc /tmp/mymin.c -c -o /tmp/mymin.o\n", 56 | "gcc /tmp/hello.c -o /tmp/hello" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "https://api.angr.io/projects/cle/en/latest/api/index.html\n", 64 | "https://api.angr.io/projects/cle/en/latest/api/relocations.html\n" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 9, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "import cle\n", 74 | "filename = \"/tmp/mymin.o\"\n", 75 | "ld = cle.Loader(filename)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 18, 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "data": { 85 | "text/plain": [ 86 | "" 87 | ] 88 | }, 89 | "execution_count": 18, 90 | "metadata": {}, 91 | "output_type": "execute_result" 92 | } 93 | ], 94 | "source": [ 95 | "ld" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 11, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "data": { 105 | "text/plain": [ 106 | "['__class__',\n", 107 | " '__delattr__',\n", 108 | " '__dict__',\n", 109 | " '__dir__',\n", 110 | " '__doc__',\n", 111 | " '__eq__',\n", 112 | " '__format__',\n", 113 | " '__ge__',\n", 114 | " '__getattribute__',\n", 115 | " '__gt__',\n", 116 | " '__hash__',\n", 117 | " '__init__',\n", 118 | " '__init_subclass__',\n", 119 | " '__le__',\n", 120 | " '__lt__',\n", 121 | " '__module__',\n", 122 | " '__ne__',\n", 123 | " '__new__',\n", 124 | " '__reduce__',\n", 125 | " '__reduce_ex__',\n", 126 | " '__repr__',\n", 127 | " '__setattr__',\n", 128 | " '__sizeof__',\n", 129 | " '__str__',\n", 130 | " '__subclasshook__',\n", 131 | " '__weakref__',\n", 132 | " '_auto_load_libs',\n", 133 | " '_backend_resolver',\n", 134 | " '_case_insensitive',\n", 135 | " '_custom_ld_path',\n", 136 | " '_except_missing_libs',\n", 137 | " '_extern_object',\n", 138 | " '_find_safe_rebase_addr',\n", 139 | " '_ignore_import_version_numbers',\n", 140 | " '_internal_load',\n", 141 | " '_is_linux_loader_name',\n", 142 | " '_is_range_free',\n", 143 | " '_juggling',\n", 144 | " '_kernel_object',\n", 145 | " '_last_object',\n", 146 | " '_lib_opts',\n", 147 | " '_load_debug_info',\n", 148 | " '_load_object_isolated',\n", 149 | " '_main_binary_path',\n", 150 | " '_main_binary_stream',\n", 151 | " '_main_object',\n", 152 | " '_main_opts',\n", 153 | " '_map_object',\n", 154 | " '_memory',\n", 155 | " '_path_insensitive',\n", 156 | " '_perform_relocations',\n", 157 | " '_possible_idents',\n", 158 | " '_possible_paths',\n", 159 | " '_rebase_granularity',\n", 160 | " '_relocated_objects',\n", 161 | " '_satisfied_deps',\n", 162 | " '_search_load_path',\n", 163 | " '_static_backend',\n", 164 | " '_tls',\n", 165 | " '_use_system_libs',\n", 166 | " 'all_elf_objects',\n", 167 | " 'all_objects',\n", 168 | " 'all_pe_objects',\n", 169 | " 'aslr',\n", 170 | " 'auto_load_libs',\n", 171 | " 'close',\n", 172 | " 'describe_addr',\n", 173 | " 'dynamic_load',\n", 174 | " 'elfcore_object',\n", 175 | " 'extern_object',\n", 176 | " 'fast_memory_load_pointer',\n", 177 | " 'finalizers',\n", 178 | " 'find_all_symbols',\n", 179 | " 'find_loadable_containing',\n", 180 | " 'find_object',\n", 181 | " 'find_object_containing',\n", 182 | " 'find_plt_stub_name',\n", 183 | " 'find_relevant_relocations',\n", 184 | " 'find_section_containing',\n", 185 | " 'find_section_next_to',\n", 186 | " 'find_segment_containing',\n", 187 | " 'find_symbol',\n", 188 | " 'get_loader_symbolic_constraints',\n", 189 | " 'initial_load_objects',\n", 190 | " 'initializers',\n", 191 | " 'kernel_object',\n", 192 | " 'linux_loader_object',\n", 193 | " 'main_object',\n", 194 | " 'max_addr',\n", 195 | " 'memory',\n", 196 | " 'min_addr',\n", 197 | " 'missing_dependencies',\n", 198 | " 'page_size',\n", 199 | " 'perform_irelative_relocs',\n", 200 | " 'preload_libs',\n", 201 | " 'requested_names',\n", 202 | " 'shared_objects',\n", 203 | " 'symbols',\n", 204 | " 'tls']" 205 | ] 206 | }, 207 | "execution_count": 11, 208 | "metadata": {}, 209 | "output_type": "execute_result" 210 | } 211 | ], 212 | "source": [ 213 | "dir(ld)" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 17, 219 | "metadata": {}, 220 | "outputs": [ 221 | { 222 | "data": { 223 | "text/plain": [ 224 | "[]" 225 | ] 226 | }, 227 | "execution_count": 17, 228 | "metadata": {}, 229 | "output_type": "execute_result" 230 | } 231 | ], 232 | "source": [ 233 | "ld.main_object\n", 234 | "ld.all_objects" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 20, 240 | "metadata": {}, 241 | "outputs": [ 242 | { 243 | "data": { 244 | "text/plain": [ 245 | "[,\n", 246 | " ,\n", 247 | " ,\n", 248 | " ]" 249 | ] 250 | }, 251 | "execution_count": 20, 252 | "metadata": {}, 253 | "output_type": "execute_result" 254 | } 255 | ], 256 | "source": [ 257 | "list(ld.symbols)" 258 | ] 259 | } 260 | ], 261 | "metadata": { 262 | "kernelspec": { 263 | "display_name": "Python 3", 264 | "language": "python", 265 | "name": "python3" 266 | }, 267 | "language_info": { 268 | "codemirror_mode": { 269 | "name": "ipython", 270 | "version": 3 271 | }, 272 | "file_extension": ".py", 273 | "mimetype": "text/x-python", 274 | "name": "python", 275 | "nbconvert_exporter": "python", 276 | "pygments_lexer": "ipython3", 277 | "version": "3.10.12" 278 | } 279 | }, 280 | "nbformat": 4, 281 | "nbformat_minor": 2 282 | } 283 | -------------------------------------------------------------------------------- /notes/trycle.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philzook58/pcode2c/a88f732f2522ea198a74ff3287fa61c7380ed9e4/notes/trycle.ipynb -------------------------------------------------------------------------------- /pcode2c/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philzook58/pcode2c/a88f732f2522ea198a74ff3287fa61c7380ed9e4/pcode2c/__init__.py -------------------------------------------------------------------------------- /pcode2c/__main__.py: -------------------------------------------------------------------------------- 1 | from pypcode import Context, PcodePrettyPrinter 2 | import argparse 3 | 4 | 5 | from .printer import * 6 | 7 | if __name__ == "__main__": 8 | parser = argparse.ArgumentParser( 9 | prog="pcode2c", description="Convert pcode to C code" 10 | ) 11 | parser.add_argument("filename") 12 | # parser.add_argument("-f", "--function") 13 | parser.add_argument("--start-address") 14 | parser.add_argument("--file-offset") 15 | # parser.add_argument("--") 16 | parser.add_argument("--size") 17 | # parser.add_arguments("--disasm-all-bytes") 18 | # parser.add_argument("--end-address") 19 | parser.add_argument("--headers", action="store_true", help="Generate arch header") 20 | # parser.add_argument("--single-file", description="Generate a single file") 21 | parser.add_argument("-b", "--langid", default="x86:LE:64:default") 22 | # parser.add_argument("-o", "--output") 23 | args = parser.parse_args() 24 | output = [] 25 | if args.headers: 26 | # with open("arch.h", "w") as f: 27 | # f.write(f"/* AutoGenerated by pcode.c from {args.filename} */\n") 28 | # f.writelines(fmt_arch_header(args.langid)) 29 | for line in fmt_arch_header(args.langid): 30 | print(line) 31 | # print(pcode2c_header) 32 | for line in pcode2c( 33 | args.filename, 34 | args.langid, 35 | start_address=int(args.start_address, 16), # if args.start_address else None, 36 | file_offset=int(args.file_offset, 16), 37 | size=int(args.size, 16), 38 | # end_address=int(args.end_address, 16) if args.end_address else None, 39 | ): 40 | print(line) 41 | -------------------------------------------------------------------------------- /pcode2c/cbmc.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import tempfile 3 | 4 | 5 | def single(precond, postcond, equality=False): 6 | return f"""\ 7 | int main(){{ 8 | CPUState state; 9 | init_state(&state); 10 | __CPROVER_assume({precond}); 11 | pcode2c(&state, -1); 12 | __CPROVER_assert({postcond}); 13 | }}""" 14 | 15 | 16 | def comparative(precond, postcond, equality=False): 17 | return f"""\ 18 | int main(){{ 19 | CPUState state_orig, state_mod; 20 | __CPROVER_assume({precond}); 21 | pcode2c_orig(&state_orig, -1); 22 | pcode2c_mod(&state_mod, -1); 23 | __CPROVER_assert({postcond}, "Postcondition"); 24 | }}""" 25 | 26 | 27 | def run_cbmc(filename, args): 28 | subprocess.run(["cbmc", filename, *args]) 29 | 30 | 31 | if __name__ == "__main__": 32 | args = argparse.ArgumentParser(description="Run CBMC on a C file") 33 | args.add_argument("filename") 34 | args.add_argument("--precond", default="true") 35 | args.add_argument("--postcond", default="true") 36 | args.parse_args() 37 | with tempfile.NamedTemporaryFile() as f: 38 | standalone = pcode2c(filename) 39 | f.write(standalone.encode("utf-8")) 40 | f.write(single_template(args.precond, args.postcond)) 41 | f.flush() 42 | run_cbmc(f.name, args.nargs) 43 | -------------------------------------------------------------------------------- /pcode2c/dwarf_annot.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from elftools.elf.elffile import ELFFile 3 | from elftools.dwarf.descriptions import ( 4 | describe_DWARF_expr, 5 | set_global_machine_arch, 6 | describe_reg_name, 7 | ) 8 | import os 9 | import sys 10 | 11 | 12 | # https://github.com/eliben/pyelftools/blob/8b97f5da6838791fd5c6b47b1623fb414daed2f0/scripts/dwarfdump.py#L136 13 | def get_full_file_path(die, dwarfinfo, cu): 14 | line_program = dwarfinfo.line_program_for_CU(cu) 15 | file_entry = die.attributes.get("DW_AT_decl_file") 16 | 17 | if file_entry: 18 | file_index = file_entry.value 19 | file_name = line_program["file_entry"][file_index - 1].name.decode("utf-8") 20 | dir_index = line_program["include_directory"][file_index - 1] 21 | dir_name = dir_index.decode("utf-8") if dir_index else "" 22 | full_path = os.path.join(dir_name, file_name) 23 | return full_path 24 | else: 25 | return "Unknown" 26 | 27 | 28 | from elftools.dwarf.locationlists import LocationParser 29 | 30 | 31 | # process linetable 32 | def linetable(dwarfinfo): 33 | for CU in dwarfinfo.iter_CUs(): 34 | lineprogram = dwarfinfo.line_program_for_CU(CU) 35 | for entry in lineprogram.get_entries(): 36 | if entry.state is not None: 37 | print(entry.state) 38 | return lineprogram 39 | 40 | 41 | # get AT_location info from dwarfinfo 42 | def locations_info(dwarfinfo): 43 | llp = dwarfinfo.location_lists() 44 | locparser = LocationParser(llp) 45 | vars = {} 46 | for CU in dwarfinfo.iter_CUs(): 47 | for die in CU.iter_DIEs(): 48 | if die.tag == "DW_TAG_variable": 49 | loc = die.attributes.get("DW_AT_location") 50 | if loc: 51 | myloc = locparser.parse_from_attribute( 52 | loc, CU.header.version, die=die 53 | ) 54 | name = die.attributes["DW_AT_name"].value.decode("utf-8") 55 | vars[name] = myloc 56 | return vars 57 | 58 | 59 | def process_elf_file(filename): 60 | with open(filename, "rb") as f: 61 | elffile = ELFFile(f) 62 | if not elffile.has_dwarf_info(): 63 | print("No DWARF info found in the file.") 64 | return die_info 65 | 66 | set_global_machine_arch(elffile.get_machine_arch()) 67 | dwarfinfo = elffile.get_dwarf_info() 68 | loclists = locations_info(dwarfinfo) 69 | # for loc_entity in loclists.values: 70 | # describe_DWARF_expr(loc_entity.loc_expr, dwarfinfo.structs, cu_offset) 71 | 72 | lineprogram = linetable(dwarfinfo) 73 | for entry in lineprogram.get_entries(): 74 | if entry.is_stmt: 75 | # check if C code is all whitespace before this. 76 | line_addr[entry.line] = entry.address 77 | 78 | die_info = {} 79 | for CU in dwarfinfo.iter_CUs(): 80 | dwarfinfo.line_program_for_CU(CU) 81 | for DIE in CU.iter_DIEs(): 82 | if DIE.tag in ["DW_TAG_label", "DW_TAG_variable"]: 83 | name = DIE.attributes["DW_AT_name"].value.decode("utf-8") 84 | line_number = DIE.attributes.get("DW_AT_decl_line").value 85 | column_number = DIE.attributes.get("DW_AT_decl_column").value 86 | file_name = get_full_file_path(DIE, dwarfinfo, CU) 87 | low_pc = DIE.attributes.get("DW_AT_low_pc") 88 | low_pc_value = f"0x{low_pc.value:X}" if low_pc else "Unknown" 89 | # Collecting DIE information 90 | die_info.setdefault(file_name, []).append( 91 | { 92 | "line": line_number, 93 | "name": name, 94 | "type": DIE.tag, 95 | "column": column_number, 96 | "low_pc": low_pc_value, 97 | # Other attributes as needed 98 | } 99 | ) 100 | 101 | return die_info 102 | 103 | 104 | def die_var2C(var): 105 | # {'line': 10, 'name': 'result', 'type': 'DW_TAG_variable', 'column': 9, 'low_pc': 'Unknown'} 106 | return f"""DW_TAG_VARIABLE("{var['name']}", {var['low_pc']});\n""" 107 | 108 | 109 | def die_label2C(label): 110 | return f"""DW_TAG_LABEL("{label['name']}", {label['low_pc']});\n""" 111 | 112 | 113 | def patch_data2C(patch_data): 114 | pass 115 | 116 | 117 | if __name__ == "__main__": 118 | parser = argparse.ArgumentParser(description="DWARF annotation tool") 119 | parser.add_argument("input", help="Input file") 120 | # parser.add_argument("-p", "--patch", help="Patch Data") 121 | parser.add_argument("-o", "--output", help="Output file") 122 | args = parser.parse_args() 123 | print(args.input) 124 | # print(args.output) 125 | die_info = process_elf_file(args.input) 126 | print(die_info) 127 | for filename, dies in die_info.items(): 128 | print(filename) 129 | # Sorting entries in reverse order of line number 130 | dies.sort(key=lambda x: x["line"], reverse=True) 131 | with open(filename, "r") as f: 132 | with sys.stdout if args.output == None else open(args.output, "w") as o: 133 | lines = f.readlines() 134 | for entry in dies: 135 | if entry["type"] == "DW_TAG_variable": 136 | comment = die_var2C(entry) 137 | elif entry["type"] == "DW_TAG_label": 138 | comment = die_label2C(entry) 139 | else: 140 | comment = f"// {entry['type']} {entry['name']}\n" 141 | line_num = entry["line"] 142 | if line_num <= len(lines): 143 | lines.insert(line_num - 1, comment) 144 | o.writelines(lines) 145 | -------------------------------------------------------------------------------- /pcode2c/naive.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Largely the same as https://github.com/angr/pypcode/blob/master/pypcode/__main__.py 4 | 5 | Runs when invoking pypcode module from command line. Lists supported 6 | architectures, and handles basic disassembly and translation to P-code of 7 | supported binaries. Does not parse object files, the binary files must be plain 8 | machine code bytes in a file. 9 | """ 10 | 11 | import pypcode 12 | import argparse 13 | import logging 14 | import sys 15 | from difflib import SequenceMatcher 16 | from .printer import is_branch 17 | from collections import defaultdict 18 | from pypcode import ( 19 | PcodeOp, 20 | Arch, 21 | BadDataError, 22 | Context, 23 | OpCode, 24 | TranslateFlags, 25 | UnimplError, 26 | Varnode, 27 | ) 28 | 29 | 30 | class OpFormat: 31 | """ 32 | General op pretty-printer. 33 | """ 34 | 35 | @staticmethod 36 | def fmt_vn(vn: Varnode) -> str: 37 | if vn.space.name == "const": 38 | return "%#x" % vn.offset 39 | elif vn.space.name == "register": 40 | name = vn.getRegisterName() 41 | if name: 42 | return name 43 | elif vn.space.name == "unique": 44 | return f"unique_{vn.offset:x}_{vn.size:d}" 45 | return f"{vn.space.name}[{vn.offset:x}:{vn.size:d}]" 46 | 47 | def fmt(self, op: PcodeOp) -> str: 48 | return f'{op.opcode.__name__} {", ".join(self.fmt_vn(i) for i in op.inputs)}' 49 | 50 | 51 | class OpFormatUnary(OpFormat): 52 | """ 53 | General unary op pretty-printer. 54 | """ 55 | 56 | __slots__ = ("operator",) 57 | 58 | def __init__(self, operator: str): 59 | super().__init__() 60 | self.operator = operator 61 | 62 | def fmt(self, op: PcodeOp) -> str: 63 | return f"{self.operator}{self.fmt_vn(op.inputs[0])}" 64 | 65 | 66 | class OpFormatBinary(OpFormat): 67 | """ 68 | General binary op pretty-printer. 69 | """ 70 | 71 | __slots__ = ("operator",) 72 | 73 | def __init__(self, operator: str): 74 | super().__init__() 75 | self.operator = operator 76 | 77 | def fmt(self, op: PcodeOp) -> str: 78 | return ( 79 | f"{self.fmt_vn(op.inputs[0])} {self.operator} {self.fmt_vn(op.inputs[1])}" 80 | ) 81 | 82 | 83 | class OpFormatFunc(OpFormat): 84 | """ 85 | Function-call style op pretty-printer. 86 | """ 87 | 88 | __slots__ = ("operator",) 89 | 90 | def __init__(self, operator: str): 91 | super().__init__() 92 | self.operator = operator 93 | 94 | def fmt(self, op: PcodeOp) -> str: 95 | return f'{self.operator}({", ".join(self.fmt_vn(i) for i in op.inputs)})' 96 | 97 | 98 | def label(vn: Varnode): 99 | assert vn.space.name == "ram" 100 | return f"L{vn.offset:#x}" 101 | 102 | 103 | class OpFormatSpecial(OpFormat): 104 | """ 105 | Specialized op pretty-printers. 106 | """ 107 | 108 | def fmt_BRANCH(self, op: PcodeOp) -> str: 109 | return f"goto {label(op.inputs[0])}" 110 | 111 | def fmt_BRANCHIND(self, op: PcodeOp) -> str: 112 | return f"goto [{self.fmt_vn(op.inputs[0])}]" 113 | 114 | def fmt_CALL(self, op: PcodeOp) -> str: 115 | return f"call {self.fmt_vn(op.inputs[0])}" 116 | 117 | def fmt_CALLIND(self, op: PcodeOp) -> str: 118 | return f"call [{self.fmt_vn(op.inputs[0])}]" 119 | 120 | def fmt_CBRANCH(self, op: PcodeOp) -> str: 121 | return f"if ({self.fmt_vn(op.inputs[1])}) goto {label(op.inputs[0])}" 122 | 123 | def fmt_LOAD(self, op: PcodeOp) -> str: 124 | assert op.inputs[0].getSpaceFromConst().name == "ram" 125 | return f"memmove(&({self.fmt_vn(op.output)}), ram + {self.fmt_vn(op.inputs[1])}, {op.output.size})" 126 | # return f"*[{op.inputs[0].getSpaceFromConst().name}]{self.fmt_vn(op.inputs[1])}" 127 | 128 | def fmt_RETURN(self, op: PcodeOp) -> str: 129 | return "return" 130 | # return f"return; //{self.fmt_vn(op.inputs[0])}" 131 | 132 | def fmt_STORE(self, op: PcodeOp) -> str: 133 | assert op.inputs[0].getSpaceFromConst().name == "ram" 134 | return f"memmove(ram + {self.fmt_vn(op.inputs[1])}, &({self.fmt_vn(op.inputs[2])}), {op.inputs[2].size})" 135 | # return f"*[{op.inputs[0].getSpaceFromConst().name}]{self.fmt_vn(op.inputs[1])} = {self.fmt_vn(op.inputs[2])}" 136 | 137 | def fmt(self, op: PcodeOp) -> str: 138 | return { 139 | OpCode.BRANCH: self.fmt_BRANCH, 140 | OpCode.BRANCHIND: self.fmt_BRANCHIND, 141 | OpCode.CALL: self.fmt_CALL, 142 | OpCode.CALLIND: self.fmt_CALLIND, 143 | OpCode.CBRANCH: self.fmt_CBRANCH, 144 | OpCode.LOAD: self.fmt_LOAD, 145 | OpCode.RETURN: self.fmt_RETURN, 146 | OpCode.STORE: self.fmt_STORE, 147 | }.get(op.opcode, super().fmt)(op) 148 | 149 | 150 | class PcodePrettyPrinter: 151 | """ 152 | P-code pretty-printer. 153 | """ 154 | 155 | DEFAULT_OP_FORMAT = OpFormat() 156 | 157 | OP_FORMATS = { 158 | OpCode.BOOL_AND: OpFormatBinary("&&"), 159 | OpCode.BOOL_NEGATE: OpFormatUnary("!"), 160 | OpCode.BOOL_OR: OpFormatBinary("||"), 161 | OpCode.BOOL_XOR: OpFormatBinary("^^"), 162 | OpCode.BRANCH: OpFormatSpecial(), 163 | OpCode.BRANCHIND: OpFormatSpecial(), 164 | OpCode.CALL: OpFormatSpecial(), 165 | OpCode.CALLIND: OpFormatSpecial(), 166 | OpCode.CBRANCH: OpFormatSpecial(), 167 | OpCode.COPY: OpFormatUnary(""), 168 | OpCode.CPOOLREF: OpFormatFunc("cpool"), 169 | OpCode.FLOAT_ABS: OpFormatFunc("abs"), 170 | OpCode.FLOAT_ADD: OpFormatBinary("f+"), 171 | OpCode.FLOAT_CEIL: OpFormatFunc("ceil"), 172 | OpCode.FLOAT_DIV: OpFormatBinary("f/"), 173 | OpCode.FLOAT_EQUAL: OpFormatBinary("f=="), 174 | OpCode.FLOAT_FLOAT2FLOAT: OpFormatFunc("float2float"), 175 | OpCode.FLOAT_FLOOR: OpFormatFunc("floor"), 176 | OpCode.FLOAT_INT2FLOAT: OpFormatFunc("int2float"), 177 | OpCode.FLOAT_LESS: OpFormatBinary("f<"), 178 | OpCode.FLOAT_LESSEQUAL: OpFormatBinary("f<="), 179 | OpCode.FLOAT_MULT: OpFormatBinary("f*"), 180 | OpCode.FLOAT_NAN: OpFormatFunc("nan"), 181 | OpCode.FLOAT_NEG: OpFormatUnary("f- "), 182 | OpCode.FLOAT_NOTEQUAL: OpFormatBinary("f!="), 183 | OpCode.FLOAT_ROUND: OpFormatFunc("round"), 184 | OpCode.FLOAT_SQRT: OpFormatFunc("sqrt"), 185 | OpCode.FLOAT_SUB: OpFormatBinary("f-"), 186 | OpCode.FLOAT_TRUNC: OpFormatFunc("trunc"), 187 | OpCode.INT_2COMP: OpFormatUnary("-"), 188 | OpCode.INT_ADD: OpFormatBinary("+"), 189 | OpCode.INT_AND: OpFormatBinary("&"), 190 | OpCode.INT_CARRY: OpFormatFunc("carry"), 191 | OpCode.INT_DIV: OpFormatBinary("/"), 192 | OpCode.INT_EQUAL: OpFormatBinary("=="), 193 | OpCode.INT_LEFT: OpFormatBinary("<<"), 194 | OpCode.INT_LESS: OpFormatBinary("<"), 195 | OpCode.INT_LESSEQUAL: OpFormatBinary("<="), 196 | OpCode.INT_MULT: OpFormatBinary("*"), 197 | OpCode.INT_NEGATE: OpFormatUnary("~"), 198 | OpCode.INT_NOTEQUAL: OpFormatBinary("!="), 199 | OpCode.INT_OR: OpFormatBinary("|"), 200 | OpCode.INT_REM: OpFormatBinary("%"), 201 | OpCode.INT_RIGHT: OpFormatBinary(">>"), 202 | OpCode.INT_SBORROW: OpFormatFunc("sborrow"), 203 | OpCode.INT_SCARRY: OpFormatFunc("scarry"), 204 | OpCode.INT_SDIV: OpFormatFunc("sdiv"), 205 | OpCode.INT_SEXT: OpFormatFunc("sext"), 206 | OpCode.INT_SLESS: OpFormatFunc("sless"), 207 | OpCode.INT_SLESSEQUAL: OpFormatFunc("slessequal"), 208 | OpCode.INT_SREM: OpFormatBinary("srem"), 209 | OpCode.INT_SRIGHT: OpFormatFunc("sright"), 210 | OpCode.INT_SUB: OpFormatBinary("-"), 211 | OpCode.INT_XOR: OpFormatBinary("^"), 212 | OpCode.INT_ZEXT: OpFormatFunc("zext"), 213 | OpCode.LOAD: OpFormatSpecial(), 214 | OpCode.NEW: OpFormatFunc("newobject"), 215 | OpCode.POPCOUNT: OpFormatFunc("popcount"), 216 | OpCode.LZCOUNT: OpFormatFunc("lzcount"), 217 | OpCode.RETURN: OpFormatSpecial(), 218 | OpCode.STORE: OpFormatSpecial(), 219 | } 220 | 221 | @classmethod 222 | def fmt_op(cls, op: PcodeOp) -> str: 223 | fmt = cls.OP_FORMATS.get(op.opcode, cls.DEFAULT_OP_FORMAT) 224 | if op.opcode == OpCode.LOAD: 225 | return fmt.fmt(op) 226 | return (f"{fmt.fmt_vn(op.output)} = " if op.output else "") + fmt.fmt(op) 227 | 228 | 229 | def is_pure(x): 230 | """ 231 | "Pure" instructions only set registers or uniques and have no other side effects 232 | """ 233 | 234 | 235 | def definite_clobbered(ops): 236 | """ 237 | Instructions tend to set many tags, which are obviously unused. This is a simple conservative analysis to state which 238 | instructions are definitely clobbered by a later instruction. 239 | """ 240 | future_clobber = set() 241 | clobber_insn = [] 242 | for i, op in enumerate(reversed(ops)): 243 | """ 244 | print( 245 | PcodePrettyPrinter.fmt_op(op), 246 | # [OpFormat.fmt_vn(vn) for vn in future_clobber], 247 | future_clobber, 248 | clobber_insn, 249 | ) 250 | """ 251 | if is_branch(op.opcode): 252 | future_clobber = set() 253 | else: 254 | if op.output != None and OpFormat.fmt_vn(op.output) in future_clobber: 255 | clobber_insn.append(len(ops) - i - 1) 256 | else: 257 | if op.output != None: 258 | future_clobber.add( 259 | OpFormat.fmt_vn(op.output) 260 | ) # any previous write will get clobbers 261 | future_clobber.difference_update( 262 | [OpFormat.fmt_vn(vn) for vn in op.inputs] 263 | ) # unless needed by intermediate instruction 264 | return clobber_insn 265 | 266 | 267 | def uniques(ops): 268 | vns = defaultdict(set) 269 | for op in ops: 270 | if op.output and op.output.space.name == "unique": 271 | vns[op.output.size].add(op.output) 272 | for vn in op.inputs: 273 | if vn.space.name == "unique": 274 | vns[vn.size].add(vn) 275 | return vns 276 | 277 | 278 | log = logging.getLogger(__name__) 279 | logging.basicConfig(level=logging.DEBUG, format="[%(name)s:%(levelname)s] %(message)s") 280 | 281 | 282 | def main(): 283 | ap = argparse.ArgumentParser( 284 | prog="pcode2c.naive", 285 | description="Disassemble and translate machine code to P-code using SLEIGH. \ 286 | This translation is close to C. Further manual fixups are needed.", 287 | ) 288 | ap.add_argument( 289 | "-l", 290 | "--list", 291 | action="store_true", 292 | help="list supported architecture languages", 293 | ) 294 | ap.add_argument("langid", help="architecture language id") 295 | ap.add_argument("binary", help="path to flat binary code") 296 | ap.add_argument("base", default="0", nargs="?", help="base address to load at") 297 | ap.add_argument( 298 | "-o", "--offset", default="0", help="offset in binary file to load from" 299 | ) 300 | ap.add_argument( 301 | "-s", "--length", default=None, help="length of code in bytes to load" 302 | ) 303 | ap.add_argument( 304 | "--clobber", 305 | action="store_true", 306 | help="Perform conservative clobber analysis to delete clobbered instructions", 307 | ) 308 | ap.add_argument( 309 | "-i", 310 | "--max-instructions", 311 | default=0, 312 | type=int, 313 | help="maximum number of instructions to translate", 314 | ) 315 | ap.add_argument( 316 | "-b", 317 | "--basic-block", 318 | action="store_true", 319 | default=False, 320 | help="stop translation at end of basic block", 321 | ) 322 | 323 | # List supported languages 324 | langs = {lang.id: lang for arch in Arch.enumerate() for lang in arch.languages} 325 | if ("-l" in sys.argv) or ("--list" in sys.argv): 326 | for langid in sorted(langs): 327 | print("%-35s - %s" % (langid, langs[langid].description)) 328 | return 329 | 330 | args = ap.parse_args() 331 | 332 | # Get requested language 333 | if args.langid not in langs: 334 | print(f'Language "{args.langid}" not found.') 335 | t = args.langid.upper() 336 | suggestions = [ 337 | langid 338 | for langid in langs 339 | if SequenceMatcher(None, t, langid.split()[0].upper()).ratio() > 0.25 340 | ] 341 | if len(suggestions): 342 | print("\nSuggestions:") 343 | for langid in sorted(suggestions): 344 | print(" %-35s - %s" % (langid, langs[langid].description)) 345 | print("") 346 | print("Try `--list` for full list of architectures.") 347 | sys.exit(1) 348 | 349 | if args.langid != "x86:LE:64:default": 350 | print("***** WARNING: Only x86:LE:64:default has been examined") 351 | # Load target binary code 352 | base = int(args.base, 0) 353 | with open(args.binary, "rb") as f: 354 | f.seek(int(args.offset, 0)) 355 | code = f.read(int(args.length, 0)) if args.length else f.read() 356 | 357 | # Translate 358 | ctx = Context(langs[args.langid]) 359 | 360 | try: 361 | flags = TranslateFlags.BB_TERMINATING if args.basic_block else 0 362 | res = ctx.translate( 363 | code, base, max_instructions=args.max_instructions, flags=flags 364 | ) 365 | if args.clobber: 366 | clobber_insn = definite_clobbered(res.ops) 367 | else: 368 | clobber_insn = [] 369 | print(f"#include ") 370 | print(f"#include ") 371 | print(f"#include ") 372 | if args.langid == "x86:LE:64:default": 373 | print( 374 | "extern uint64_t RDI, RSI, RDX, RCX, R8, R9, RAX, RBX, RBP, RSP, RIP;" 375 | ) 376 | print("extern bool ZF, SF, OF, CF, PF;") 377 | print("extern uint8_t ram[0x1000];") 378 | 379 | print(f"void asm_{args.offset}(){{") 380 | for size, vns in uniques(res.ops).items(): 381 | vns = set(OpFormat.fmt_vn(vn) for vn in vns) 382 | print(f"uint{size*8}_t {', '.join(vns)};") 383 | last_imark_idx = 0 384 | for i, op in enumerate(res.ops): 385 | if i in clobber_insn: 386 | # print( 387 | # f" // CLOBBERED P{i - last_imark_idx - 1:d}: {PcodePrettyPrinter.fmt_op(op)};" 388 | # ) 389 | continue 390 | if op.opcode == OpCode.IMARK: 391 | last_imark_idx = i 392 | disas_addr = op.inputs[0].offset 393 | disas_offset = disas_addr - base 394 | disas_len = sum(vn.size for vn in op.inputs) 395 | disas_slice = code[disas_offset : disas_offset + disas_len] 396 | for insn in ctx.disassemble(disas_slice, disas_addr).instructions: 397 | print( 398 | f"L{insn.addr.offset:#x}: // {insn.mnem} {insn.body}" # / len {insn.length} 399 | ) 400 | else: 401 | print( 402 | f" {PcodePrettyPrinter.fmt_op(op)};" # // P{i - last_imark_idx - 1:02d}: 403 | ) 404 | print("}") 405 | except (BadDataError, UnimplError) as e: 406 | print(f"An error occurred during translation: {e}") 407 | sys.exit(1) 408 | 409 | 410 | if __name__ == "__main__": 411 | main() 412 | -------------------------------------------------------------------------------- /pcode2c/printer.py: -------------------------------------------------------------------------------- 1 | import pypcode 2 | 3 | 4 | def fmt_arch_header(langid): 5 | ctx = Context(langid) 6 | output = [] 7 | for reg, vnode in ctx.registers.items(): 8 | output.append(f"#define {reg} 0x{vnode.offset:x}") 9 | return output 10 | 11 | 12 | def fmt_symbol_header(elf): 13 | # Get the symbol table 14 | output = [] 15 | symtab = elffile.get_section_by_name(".symtab") 16 | for symbol in symtab.iter_symbols(): 17 | output.append(f'#define {symbol.name} {symbol["st_value"]})') 18 | return output 19 | 20 | 21 | # import importlib 22 | # with importlib.resources("pcode2c.include", "_pcode2c.h") as f: 23 | # pcode2c_include = f.read() 24 | 25 | 26 | def is_branch(op: pypcode.OpCode): 27 | return op in ( 28 | pypcode.OpCode.BRANCH, 29 | pypcode.OpCode.CBRANCH, 30 | pypcode.OpCode.BRANCHIND, 31 | pypcode.OpCode.CALL, 32 | pypcode.OpCode.CALLIND, 33 | pypcode.OpCode.RETURN, 34 | ) 35 | 36 | 37 | def fmt_varnode(varnode: pypcode.Varnode): 38 | if varnode.space.name == "const": 39 | return ( 40 | "(uint8_t *)&(long unsigned int){" 41 | + hex(varnode.offset) 42 | + "ul}, " 43 | + hex(varnode.size) 44 | + "ul" 45 | ) 46 | 47 | regname = varnode.getRegisterName() 48 | if regname != "": 49 | regname = f" /* {regname} */" 50 | return f"{varnode.space.name}_space + {hex(varnode.offset)}ul{regname}, {varnode.size}ul" 51 | 52 | 53 | def fmt_varnode_separate_space(varnode: pypcode.Varnode): 54 | if varnode.space.name == "const": 55 | return ( 56 | "0, (uint8_t *)&(long unsigned int){" 57 | + hex(varnode.offset) 58 | + "ul}, " 59 | + hex(varnode.size) 60 | + "ul" 61 | ) 62 | regname = varnode.getRegisterName() 63 | if regname != "": 64 | regname = f" /* {regname} */" 65 | return f"{varnode.space.name}_space, {hex(varnode.offset)}ul{regname}, {varnode.size}ul" 66 | 67 | 68 | unique_pcode_insn_id = 0 69 | 70 | 71 | def fmt_branch_dest_varnode(varnode): 72 | # Special case this generation to support realtive pcode jumps 73 | if varnode.space.name == "const": 74 | return ( 75 | "0, P_" 76 | + hex(varnode.offset + unique_pcode_insn_id) 77 | + ", " 78 | + hex(varnode.size) 79 | + "ul" 80 | ) 81 | elif varnode.space.name == "ram": 82 | return f"{varnode.space.name}_space, {hex(varnode.offset)}ul, {varnode.size}ul" 83 | else: 84 | raise ValueError(f"Bad branch dest space {varnode.space.name}") 85 | 86 | 87 | def fmt_insn(op: pypcode.PcodeOp): 88 | global unique_pcode_insn_id 89 | unique_pcode_insn_id += 1 90 | args = [fmt_varnode(op.output)] if op.output else [] 91 | args.extend(fmt_varnode(varnode) for varnode in op.inputs) 92 | args = ", ".join(args) 93 | opcode = str(op.opcode).replace("pypcode.pypcode_native.OpCode.", "") 94 | if op.opcode == pypcode.OpCode.BRANCH: 95 | return f"L_P_{hex(unique_pcode_insn_id)}: BRANCH_GOTO_TEMPFIX({fmt_branch_dest_varnode(op.inputs[0])});" 96 | elif op.opcode == pypcode.OpCode.CBRANCH: 97 | args = ( 98 | fmt_branch_dest_varnode(op.inputs[0]) 99 | + ", " 100 | + fmt_varnode_separate_space(op.inputs[1]) 101 | ) 102 | return f"L_P_{hex(unique_pcode_insn_id)}: CBRANCH_GOTO_TEMPFIX({args});" 103 | else: 104 | return f"L_P_{hex(unique_pcode_insn_id)}: {opcode}({args});" 105 | 106 | 107 | header = """\ 108 | /* AUTOGENERATED. DO NOT EDIT. */ 109 | #include "_pcode.h" 110 | void pcode2c(CPUState *state, size_t breakpoint){ 111 | uint8_t* register_space = state->reg; 112 | uint8_t* unique_space = state->unique; 113 | uint8_t* ram_space = state->ram; 114 | uint8_t* ram = ram_space; // TODO: remove this 115 | for(;;){ 116 | switch(state->pc){""" 117 | 118 | footer = """\ 119 | default: assert(state->pc != -1); // Unexpected PC value 120 | }}}""" 121 | 122 | 123 | from pypcode import Context, PcodePrettyPrinter 124 | 125 | 126 | def pcode2c(filename, langid, start_address=None, file_offset=None, size=None): 127 | output = [] 128 | with open(filename, "rb") as file: 129 | file.seek(file_offset) 130 | code = file.read(size) 131 | 132 | ctx = Context(langid) 133 | 134 | dx = ctx.disassemble(code, base_address=start_address) 135 | insns = {insn.addr.offset: insn for insn in dx.instructions} 136 | # print(fmt_arch_header(ctx)) 137 | res = ctx.translate(code, base_address=start_address) 138 | output.append(header) 139 | for op in res.ops: 140 | if op.opcode == pypcode.OpCode.IMARK: 141 | assert len(op.inputs) == 1 142 | addr = op.inputs[0].offset 143 | ins = insns[addr] 144 | output.append( 145 | f"""\ 146 | case 0x{addr:x}: 147 | L_0x{addr:x}ul: 148 | INSN(0x{addr:x}ul,"{ins.addr.offset:#x}: {ins.mnem} {ins.body}")""" 149 | ) 150 | else: 151 | # print("//", PcodePrettyPrinter.fmt_op(op)) 152 | output.append(" " + fmt_insn(op)) 153 | output.append(footer) 154 | return output 155 | -------------------------------------------------------------------------------- /pcode2c/util.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import tempfile 3 | from elftools.elf.elffile import ELFFile 4 | 5 | 6 | # Also consider looking here https://docs.pwntools.com/en/stable/asm.html 7 | def asm(code): 8 | with tempfile.NamedTemporaryFile(suffix=".s") as f: 9 | f.write(code.encode("utf-8")) 10 | f.flush() 11 | binfile = f.name + ".out" 12 | subprocess.run(["gcc", "-nostdlib", "-o", binfile, f.name]) 13 | with open(binfile, "rb") as f: 14 | return f.read() 15 | 16 | 17 | # TODO 18 | def infer_address(filename): 19 | """ 20 | infer file offset, address, size. 21 | """ 22 | with open(filename, "rb") as file: 23 | elffile = ELFFile(file) 24 | 25 | # Get the symbol table 26 | # if args.function is not None: 27 | # symtab = elffile.get_section_by_name(".symtab") 28 | # Search for the function in the symbol table 29 | # for symbol in symtab.iter_symbols(): 30 | # if symbol.name == args.function: 31 | # fun_addr = symbol["st_value"] 32 | e_type = elffile.header["e_type"] 33 | if e_type == "ET_EXEC" or e_type == "ET_DYN": 34 | for segment in elffile.iter_segments(): 35 | if segment["p_flags"] & 0x1: # PF_X flag is 1 36 | offset = segment["p_offset"] 37 | base = segment["p_vaddr"] 38 | size = segment["p_memsz"] 39 | break 40 | else: 41 | raise ValueError("Unknown ELF type") 42 | if size == 0: 43 | raise ValueError("No code found") 44 | """ 45 | elif e_type == "ET_DYN": 46 | # Iterate over sections and find the .text section 47 | for section in elffile.iter_sections(): 48 | if section.name == ".text": 49 | offset = section.header["sh_offset"] 50 | size = section.header["sh_size"] 51 | base = 0 52 | break 53 | """ 54 | 55 | if start_address is not None and end_address is not None: 56 | read_offset = start_address - base + offset 57 | read_size = min(end_address - start_address, size) 58 | base = start_address 59 | else: 60 | read_offset = offset 61 | read_size = size 62 | 63 | if read_offset < 0 or read_offset > offset + size: 64 | print(size) 65 | print(read_offset) 66 | print(hex(end_address)) 67 | print(hex(start_address)) 68 | print(hex(base)) 69 | if 0x00100000 <= start_address: 70 | raise ValueError( 71 | "Bad start address (try removing Ghidra 0x00100000 offset)", 72 | hex(start_address), 73 | ) 74 | raise ValueError("Bad start address", hex(start_address)) 75 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "pcode2c" 3 | version = "0.1" 4 | authors = [{ name = "Philip Zucker" }] 5 | description = "A compiler from pcode to C" 6 | dependencies = ["pypcode", "pyelftools"] 7 | 8 | [tool.setuptools] 9 | packages = ["pcode2c"] 10 | -------------------------------------------------------------------------------- /templates/Makefile: -------------------------------------------------------------------------------- 1 | HARNESS=verify.c 2 | 3 | # Large amounts of unwinding are slower 4 | # Too few is unsound (--unwinding-assetion will fail) 5 | # set unwind to approximately number of basic blocks in longest execution path 6 | UNWIND=3 7 | 8 | # Obviously some assumptions need to be revisited for other architectures 9 | ENDIAN=--little-endian 10 | ARCH=x86_64 11 | 12 | # You may need to increase the address bit size. CBMC will throw an error saying so if needed. 13 | OBJECT_BITS=13 14 | 15 | # Picking a particular property to check 16 | # --property main.assertion.1 17 | CC=gcc 18 | 19 | # Other useful options. See CBMC manual for more information 20 | CBMC_OPTIONS= 21 | #CBMC_OPTIONS+= --trace # for counterexample traces 22 | #CBMC_OPTIONS+=--show-properties # to lookup and turn on only specific properties. Can increase speed 23 | #CBMC_OPTIONS+=--show-loops # To lookup loop names for unwinding specific loops 24 | CBMC_OPTIONS+=--no-built-in-assertions 25 | #CBMC_OPTIONS+=--property main.assertion.1 # Picking a particular property to check 26 | #CBMC_OPTIONS+= --bounds-check --conversion-check --div-by-zero-check --float-overflow-check --malloc-fail-null \ 27 | # --malloc-may-fail --nan-check --pointer-check --pointer-overflow-check --pointer-primitive-check \ 28 | # --signed-overflow-check --undefined-shift-check --unsigned-overflow-check 29 | #CBMC_OPTIONS+= --graphml-witness witness.graphml 30 | 31 | # Some alternative solver options to consider. YMMV. 32 | #SOLVER=--z3 33 | #SOLVER=--sat-solver cadical 34 | #SOLVER=--boolector 35 | #SOLVER=--ext-sat-solver kissat 36 | 37 | cbmc: 38 | cbmc $(HARNESS) --unwinding-assertions --object-bits $(OBJECT_BITS) \ 39 | --unwind $(UNWIND) $(ENDIAN) --arch $(ARCH) $(SOLVER) $(CBMC_OPTIONS) 40 | 41 | exe: 42 | $(CC) -o verify $(HARNESS) -------------------------------------------------------------------------------- /templates/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philzook58/pcode2c/a88f732f2522ea198a74ff3287fa61c7380ed9e4/templates/README.md -------------------------------------------------------------------------------- /templates/harness.c: -------------------------------------------------------------------------------- 1 | void main() 2 | { 3 | CPUState state; 4 | init_CPUState(&state); 5 | 6 | // Initialize state here, including pc address. 7 | state.pc = MY_FUNCTION_ADDR; 8 | 9 | pcode2c(&state, -1); // Run decompiled function 10 | 11 | // Check assertions here: ex, assert(*(uint64_t*)(state.reg + RAX) == 0xdeadbeef); 12 | } -------------------------------------------------------------------------------- /templates/pcodei/notes: -------------------------------------------------------------------------------- 1 | A pcode interpeter. 2 | 3 | Packaging up the lifter is it's own pain. take from pycpcode? 4 | 5 | Mmapping in may be nice 6 | 7 | lift insn every time? sure. 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /templates/pcodei/pcodei.cpp: -------------------------------------------------------------------------------- 1 | #include "../_pcode.h" 2 | #include "pypcode/pypcode/pypcode.h/sleigh/opcodes.hh" 3 | 4 | int interp() { 5 | CPUState cpu; 6 | init_CPUState(&cpu); 7 | for (;;) { 8 | opcode op = getopcode(state.pc); 9 | switch (op) { 10 | case COPY: 11 | COPY(out_addr, out_size, in_addr, in_size); 12 | break; 13 | case INT_ADD: 14 | INT_ADD(out_addr, out_size, in_addr1, in_size1, in_addr2, in_size2); 15 | break; 16 | case BOOL_OR: 17 | } 18 | } 19 | } 20 | 21 | int main(int argc, char *argv[]) { 22 | // parse arguments --base_address --offset --size 23 | 24 | // read the file 25 | 26 | // 27 | // call interp 28 | 29 | return 0; 30 | } --------------------------------------------------------------------------------