├── .gitignore
├── Android
    ├── README.md
    ├── WeddingCake_decrypt.py
    ├── WeddingCake_sysprops.txt
    └── jni_translate.py
├── LICENSE
├── README.md
├── data_offset_calc.py
├── define_code_functions.py
├── define_data_as_types.py
├── find_mem_accesses.py
├── identify_operand_locations.py
├── identify_port_use_locations.py
├── images
    └── getProcessorScreenShot.png
├── label_funcs_with_no_xrefs.py
├── make_strings.py
└── presentations
    ├── DerbyCon2017.IDAPythonPresentation.pdf
    └── RECON2017.LifeChanginMagicIDAPython.pdf


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | 


--------------------------------------------------------------------------------
/Android/README.md:
--------------------------------------------------------------------------------
 1 | # IDAPython Embedded Toolkit: Android Segment
 2 | 
 3 | The tools,scripts, and information in this section help reverse engineer
 4 | and analyze Android-related binaries. 
 5 | 
 6 | ## jni_translate
 7 | Android apps can include compiled native code (usually written in C/C++) within a native library. This code uses the Java Native Interface to interact with the Java (or Kotlin) code within the Android app. Learn more at https://maddiestone.github.io/AndroidAppRE/reversing_native_libs.html). Each JNI function within the Android native lib will take the JNIEnv pointer as its first argument. The JNIEnv struct is a table of function pointers that provide useful Java functions to the native code. 
 8 | 
 9 | The *jni_translate.py* IDA Python script will comment within the IDA disassembly what function from the JNIEnv struct is being called to help ease analysis. This is necessary because most calls to functions within the JNIEnv are called as indirect offset accesses to JNIEnv*. 
10 | 
11 | For example, the following disassembly is a call to GetStaticMethodId which is at offset 0x1C4 in the JNIEnv struct.
12 | ```
13 | LDR R1, [R0]		#R0 is JNIEnv*
14 | MOVS R0, #0x1C4
15 | LDR R4, [R0, R1]	# R4 = [JNIEnv + 0x1C4] = Ptr to GetStaticMethodId()
16 | BLX R4				# Call GetStaticMethodId
17 | ```
18 | 
19 | This script will add the function name as a comment in the IDA Disassembly. 
20 | 
21 | ## WeddingCake
22 | The WeddingCake anti-analysis library is an Android native code wrapper 
23 | that includes many techniques to frustrate analysis. More information 
24 | about this packer and how to run the tools included here is available
25 | from Maddie Stone's BlackHat USA 2018 talk, "Unpacking the Packed Unpacker".
26 | 
27 | *WeddingCake_decrypt.py*: This IDAPython script will run on ARM32 ELFs
28 | that have been packed with the WeddingCake Android anti-analysis library.
29 | The script will decrypt all of the encrypted strings in the IDA database
30 | and overwrite the database with the decrypted contents. To run the script,
31 | you must ensure that the JNI_OnLoad function is defined and exported in 
32 | the IDA database. 
33 | 
34 | *WeddingCake_sysprops.txt*: All of the system properties that WeddingCake
35 | checks for and the values that will cause the application to exit.
36 | 
37 | Copyright 2018 Google LLC.
38 | Author: maddiestone@google.com (Maddie Stone)
39 | 
40 | 


--------------------------------------------------------------------------------
/Android/WeddingCake_decrypt.py:
--------------------------------------------------------------------------------
  1 | #######################################
  2 | # IDAPython Script to Decrypt ARM 32-bit "WEDDINGCAKE" Packed Android Native Librarires
  3 | # Author: Maddie Stone (maddiestone@google.com)
  4 | # Copyright 2018 Google LLC.
  5 | #
  6 | # To run the script on the 32-bit ARM Android ELF, JNI_OnLoad must be defined
  7 | # and exported from the database. If any of the formatting or display
  8 | # of instructions is changed in your version of IDA, change the REGEX expressions
  9 | # at the beginning.
 10 | #######################################
 11 | 
 12 | import struct
 13 | 
 14 | decrypt_sub_addr = -1
 15 | ARRAY_INSTR = "ADD     R0, PC"
 16 | DATABASE_BACKUP = "decrypted_database_bkup.idb"
 17 | regex_len = re.compile(r"MOVS? +R[0-9], +#0x[0-9A-Fa-f]{1,8}")
 18 | regex_load_len = re.compile("LDR +R[0-9], +\[SP,#0x[0-9]{2}\+var_.*")
 19 | arrays_to_dec = {}
 20 | saved_lens = {}
 21 | just_array_addrs = []
 22 | 
 23 | """ Find the address of the decryption subroutine.
 24 | 
 25 | """
 26 | def find_decrypt_sub():
 27 |     # Get OnLoad addr
 28 |     exported_subs = Entries()
 29 |     jni_onload_addr = -1
 30 |     for e in exported_subs:
 31 |         if (e[3] == "JNI_OnLoad"):
 32 |             print "**** JNI_OnLoad Addr: 0x%x ****" % e[2]
 33 |             jni_onload_addr = e[2]
 34 |             break
 35 |     if (jni_onload_addr == -1):
 36 |         print "CAN'T CONTINUE! CAN'T FIND ONLOAD!"
 37 |         return -1
 38 |     else:
 39 |         curr = NextHead(jni_onload_addr + 0xD0)
 40 |         end = GetFunctionAttr(curr, FUNCATTR_END)
 41 |         while (curr < end):
 42 |             disasm = GetDisasm(curr)
 43 |             if (disasm.startswith("BL ")):
 44 |                 dec_addr = int(disasm.split("_")[-1].strip(), 16)
 45 |                 if (XrefsTo(dec_addr) > 5):
 46 |                     print "DECRYPTION_ADDR: 0x%x" % dec_addr
 47 |                     return dec_addr
 48 |             curr = NextHead(curr)
 49 |         return - 1
 50 | 
 51 | """ Arithmetic shift right maintaining sign.
 52 | """
 53 | def arith_shift_rt(num, shifts):
 54 |     val = num >> shifts
 55 |     #print "val: 0x%x" % val
 56 |     if (val < 0):
 57 |         mask = 0x10000000
 58 |         for i in range(1, shifts):
 59 |             val |= mask
 60 |             mask >> 1
 61 |     #print "num >> shifts = val --> 0x%x >> 0x%x = 0x%x" % (num, shifts, val)
 62 |     return val
 63 | 
 64 | """ Signed Branch if Less than or Equal
 65 | """
 66 | def signed_ble(left_reg, right_reg):
 67 |     if (left_reg & 0x80000000): #left_reg is negative
 68 |         if (not(right_reg & 0x80000000)): #right_reg is positve
 69 |             return True
 70 |         else: #left_reg and right_reg are negative
 71 |             return left_reg >= right_reg
 72 |     else: #left_reg is positve
 73 |         if (right_reg & 0x80000000): #right_reg is negative
 74 |             return False
 75 |     return left_reg <= right_reg #left_reg and right_reg are positive
 76 | 
 77 | """ Decrypts array of bytes and writes decrypted bytes back to array argument.
 78 | """
 79 | def decrypt(array, array_len, array_0To255, array_0x400):
 80 |     if (array is None):
 81 |         print ("Array is null. -- Exiting")
 82 |         return
 83 |     if (array_len < 1):
 84 |         print ("array len < 1 -- Exiting")
 85 |         return
 86 |     after_end_of_array_index =  array_len
 87 |     reg_4 = ~(0x00000004)
 88 |     reg_0 = 4
 89 |     reg_2 = 0
 90 |     reg_5 = 0
 91 |     do_loop = True
 92 |     while (do_loop):
 93 |         #print "IN LOOP"
 94 |         reg_6 = after_end_of_array_index + reg_0
 95 |         #print "REG6: 0x%x" % reg_6
 96 |         reg_6 = array[reg_6 + reg_4]
 97 |         #print "array[reg_6 + reg_4] = array[%d] = %d" % ((after_end_of_array_index + reg_0+ reg_4), reg_6)
 98 |         if (reg_6 & 0x80):
 99 |             #print "WENT LEFT"
100 |             if (reg_5 > 3):
101 |                 #print "reg_5 (%d) > 3 --> EXITING" % reg_5
102 |                 return
103 |             reg_6 = reg_6 & 0x7F
104 |             reg_2 = reg_2 & 0xFF
105 |             #print "Before shift -- reg_6: 0x%x reg_2: 0x%x" % (reg_6, reg_2)
106 |             reg_2 = reg_2 << 7
107 |             reg_2 = reg_2 | reg_6
108 |             reg_0 = reg_0 + reg_4 + 4
109 |             #print "reg_2: 0x%x reg_0: 0x%x" % (reg_2, reg_0)
110 |             reg_3 = array_len + reg_0 + reg_4 + 2
111 |             reg_5 += 1
112 |             #print "reg_3: 0x%x reg_5: 0x%x" % (reg_3, reg_5)
113 |             if (reg_3 & 0x80000000 or reg_3 <= 1):
114 |                 #print "reg_3 shouldn't be less than 2 --> EXITING"
115 |                 return
116 |         else:
117 |             do_loop = False
118 |             reg_5 = 0xF0 & reg_6
119 |             reg_3 = array_len + reg_0 + reg_4
120 |             reg_1 = reg_3 + 1
121 |             #print ("Is reg1 (%d) equal to array_len(%d)") % (reg_1, array_len)
122 |             if (reg_0 == 0 and reg_5 != 0):
123 |                 #print "reg_0 == 0 && reg_5 != 0 --> EXITING"
124 |                 return
125 |     reg_5 = reg_1
126 |     #print "reg_2: 0x%x reg_6: 0x%x r2<<7: 0x%x" % (reg_2, reg_6, (reg_2 << 7))
127 |     reg_1 = (reg_2 << 7) + reg_6
128 |     byte_FF = 0xFF
129 |     reg_1 = reg_1 & byte_FF
130 |     last_byte = reg_1
131 |     #print ("reg_5 = 0x%x reg_1 = 0x%x reg_3 = 0x%x last_byte = 0x%x") % (reg_5, reg_1, reg_3, last_byte)
132 |     if (reg_5 == 0 or reg_5 & 0x80000000 or last_byte == 0 or signed_ble(reg_3, last_byte)):
133 |         #print "reg_5 < 1 || last_byte == 0 || reg_3 < last_byte -- Exiting"
134 |         #print "reg_5 = 0x%x last_byte = 0x%x reg_3 = 0x%x" % (reg_5, last_byte, reg_3)
135 |         return
136 |     reg_1 = (reg_4 + 4)
137 |     reg_1 = (reg_1 * last_byte)
138 |     reg_1 += array_len
139 |     crazy_num = reg_1 + reg_0 + reg_4
140 |     #print ("(reg_4 + 4) * last_byte = 0x%x, reg_1 = 0x%x, crazy_num = 0x%x" % ((reg_4 + 4) * last_byte, reg_1, crazy_num))
141 |     if (crazy_num < 1):
142 |         #print "crazy_num < 1 --> EXITING"
143 |         return
144 |     new_index = reg_1 + reg_0
145 |     #print "new_index: 0x%x" % new_index
146 |     reg_5 = 0
147 |     while (1):
148 |         byte = array[reg_5]
149 |         reg_0 = byte << 2
150 |         reg_6 = array_0x400[byte]
151 |         reg_0 = 0xFF - reg_6
152 |         #print "byte: 0x%x reg_6: 0x%x reg_0: 0x%x" % (byte, reg_6, reg_0)
153 |         if (not reg_6 & 0x80000000):
154 |             #print "reg_6 > 0 --> Set reg_6 = reg_0"
155 |             reg_6 = reg_0
156 |         reg_0 = reg_5
157 |         reg_1 = reg_0 % last_byte
158 |         reg_0 = new_index + reg_1
159 |         #print ("reg_1: 0x%x reg_0: %x" % (reg_1, reg_0))
160 |         #print ("reg_0 = array[new_index + reg_1 + reg_4], array[0x%x] = 0x%x" % (reg_0 + reg_4, reg_0))
161 |         reg_0 = array[(reg_0 + reg_4) & 0xFF]
162 |         reg_1 = array_0x400[reg_0]
163 |         #print ("reg_1 = array_0x400[reg_0] --> 0x%x = array_0x400[0x%x]" % (reg_1, reg_0))
164 |         reg_2 = reg_1 | reg_6
165 |         #print ("reg_2 = reg_1 | reg_6 --> 0x%x = 0x%x | 0x%x" % (reg_2, reg_1, reg_6))
166 |         index_reg_0 = reg_5
167 |         if (reg_2 & 0x80000000):
168 |             #print ("reg_2 (0x%x) < 0) --> exiting " % (reg_2))
169 |             break
170 |         reg_1 = reg_6 + reg_1 + reg_5
171 |         reg_2 = arith_shift_rt(reg_1, 0x1F)
172 |         reg_2 = reg_2 >> 0x18
173 |         #print "reg_2 = 0x%x" % reg_2
174 |         reg_2 = reg_2 & ~0x000000FF
175 |         #print "After BICS -- reg_2 = 0x%x" % reg_2
176 |         reg_1 -= reg_2
177 |         #print "reg_1 -= reg_2 --> reg_1 = 0x%x" % reg_1
178 |         reg_1 = 0x000000FF - reg_1
179 |         #print ("reg_1 (0x%x)= 0x000000FF - reg_1" % (reg_1))
180 |         reg_1 = array_0To255[reg_1 & 0xFF]
181 |         #print ("0x%x = array_0To255[reg_1 & 0xFF]" % reg_1)
182 |         array[index_reg_0] = reg_1 & 0xFF
183 |         #print "array[0x%x] = 0x%x" % (index_reg_0, reg_1)
184 |         reg_5 += 1
185 |         if (reg_5 >= crazy_num):
186 |             #print ("reg_5 >= crazy_num --> Exit")
187 |             break
188 |     #print "*********** FINISHED DECRYPT ***************"
189 | 
190 | """ Processes disassembly output using defined regex's to 
191 |     find the address of the encrypted array argument and
192 |     the length of the array. These are saved to the 
193 |     arrays_to_decrypt Map to be passed to the decrypt
194 |     function.
195 | """
196 | def get_array_and_len(addr, prev_len):
197 |     disasm = GetDisasm(addr)
198 |     #print ("[get_array_and_len] 0x%x: %s" % (addr, disasm))
199 |     pieces = disasm.split(';')
200 |     array_name = pieces[-1].strip()
201 |     #print "ARRAY_NAME: %s" % array_name
202 |     array_name = array_name.split('_')[-1]
203 |     array = int(array_name, 16)
204 |     #print "Array addr: 0x%x" % array
205 |     first_addr = addr
206 |     addr = NextHead(addr)
207 |     steps = 0
208 |     array_length = 0
209 |     while (steps < 3):
210 |         disasm = GetDisasm(addr)
211 |         if (regex_len.match(disasm)):
212 |             #print "MATCHED REGEX_LEN: %s" % disasm
213 |             len_disasm = disasm.split("#")[1]
214 |             print "len_disasm1: %s" % len_disasm
215 |             len_disasm = len_disasm.split(";")[0]
216 |             print "len_disasm: %s" % len_disasm
217 |             array_length = int(len_disasm, 16)
218 |             #print "len: 0x%x" % array_length
219 |             addr = NextHead(addr)
220 |             try_if_len_saved(addr, array_length)
221 |             break
222 |         elif (regex_load_len.match(disasm)):
223 |             pieces = disasm.split("var_")
224 |             var_num = pieces[-1].strip().replace("]", "")
225 |             array_length = saved_lens[var_num]
226 |             if (array_length):
227 |                 #print "Looked up length -- saved_lens[%s] = 0x%x" % (pieces[-1].strip(), array_length)
228 |                 break
229 |             else:
230 |                 steps += 1
231 |                 addr = NextHead(addr)
232 |         elif (disasm.startswith("B ")):
233 |             addr = int(disasm.split("loc_")[-1].strip(), 16)
234 |             print "BRANCH: %s -- new_addr: 0x%x" % (disasm, addr)
235 |         else:
236 |             steps += 1
237 |             addr = NextHead(addr)
238 |     if (array_length != 0):
239 |         print "Adding to list: [0x%x, 0x%x]" % (array, array_length)
240 |         just_array_addrs.append(array)
241 |         arrays_to_dec[array] = array_length
242 |     else:
243 |         print "Keeping last length - Adding to list: [0x%x, 0x%x]" % (array, prev_len)
244 |         just_array_addrs.append(array)
245 |         array_length = prev_len
246 |         arrays_to_dec[array] = array_length
247 |         addr = NextHead(first_addr)
248 |         try_if_len_saved(addr, array_length)
249 |     return array_length
250 | 
251 | """ Checks if the length that should be used for the 
252 |     encrypted array was saved on to the stack.
253 | """
254 | def try_if_len_saved(addr, length):
255 |     disasm = GetDisasm(addr)
256 |     #print "[try_if_len_saved] 0x%x: %s" % (addr, disasm)
257 |     steps = 0
258 |     while (steps < 4):
259 |         if (disasm.startswith("STR")):
260 |             pieces = disasm.split("var_")
261 |             var_num = pieces[-1].strip().replace("]", "")
262 |             saved_lens[var_num] = length
263 |             break
264 |             print "Added -- saved_lens[%s] = 0x%x" % (pieces[-1].strip(), length)
265 |         else:
266 |             addr = NextHead(addr)
267 |             disasm = GetDisasm(addr)
268 |             steps += 1
269 |         #print "try_if_len_saved: NOPE"
270 | 
271 | """ Returns the values of the array at argument
272 |     addr for length len.
273 | """
274 | def get_array_from_addr(addr, len):
275 |     array = [0xFF] * len
276 |     for i in range(0,len):
277 |         array[i] = Byte(addr)
278 |         addr += 1
279 |     return array
280 | 
281 | 
282 | 
283 | #########################################################
284 | # MAIN
285 | #########################################################
286 | exported_subs = Entries()
287 | for e in exported_subs:
288 |     if (e[3] == "JNI_OnLoad"):
289 |         print "**** JNI_OnLoad Addr: 0x%x ****" % e[2]
290 | 
291 | 
292 | print "**** INITIALIZE THE TWO ARRAYS ****"
293 | # Create array counting up 0 - 255
294 | num_array = range(0,256)
295 | 
296 | # Run Stage 1 Loop
297 | v4 = 0x2C09
298 | for i in range (0, 256):
299 |     v6 = (0x41C64E6D * v4 + 0x3039)
300 |     v7 = v6 & 0xFF
301 |     v8 = num_array[v6 & 0xFF]
302 |     #print "---------------------------------------------------"
303 |     #print "v6: 0x%x v7: 0x%x v8: 0x%x" % (v6, v7, v8)
304 |     v9 = (0x41C64E6D * (v6 & 0x7FFFFFFF) + 0x3039)
305 |     #print "v9: 0x%x num_array[v9 & 0xFF]: 0x%x" % (v9, num_array[v9&0xFF])
306 |     #print "Replacing num_array[0x%x]=0x%x" % (v7, num_array[v9&0xFF])
307 |     num_array[v7] = num_array[v9 & 0xFF]
308 |     #print "Setting num_array[v9 & 0xFF] to v8"
309 |     num_array[v9 & 0xFF] = v8
310 |     v4 = v9 & 0x7FFFFFFF
311 |     #print "num_array[0x%x] = 0x%x -- should equal 0x%x" % (v9&0xFF, num_array[v9&0xFF], v8)
312 | #print "Finished"
313 | #print num_array
314 | 
315 | 
316 | #print "Running Stage 2"
317 | new_array = [0xFFFFFFFF] * 0x100
318 | for i in range(0,256):
319 |     index = num_array[i]
320 |     new_array[index] = i
321 |     new_array[num_array[i]] = i
322 | #print "DONE!"
323 | #print new_array
324 | 
325 | print "**** FINDING ARRAYS TO BE DECRYPTED ****"
326 | decryption_sub_addr = find_decrypt_sub()
327 | if (decryption_sub_addr != -1):
328 |     print "**** DECRYPTION SUB AT: 0x%x ****" % decryption_sub_addr
329 | 
330 |     # GET ALL OF THE DECRYPTION CALLS
331 |     decrypt_calls = XrefsTo(decryption_sub_addr)
332 |     #print decrypt_calls
333 |     callee_addrs = []
334 |     for c in decrypt_calls:
335 |         callee_addrs.append(c.frm)
336 |     min_call = min(callee_addrs)
337 |     max_call = max(callee_addrs)
338 |     print "MIN: 0x%x MAX: 0x%x" % (min_call, max_call)
339 | 
340 |     curr = min_call - 0x20
341 |     prev_length = 0
342 |     while(curr < max_call):
343 |         disasm = GetDisasm(curr)
344 |         if (disasm.startswith(ARRAY_INSTR)):
345 |             #print "MATCH: 0x%x: %s" % (curr, disasm)
346 |             prev_length = get_array_and_len(curr, prev_length)
347 |         curr = NextHead(curr)
348 | 
349 |     print "**** SAVING OFF DATABASE TO: %s" % DATABASE_BACKUP
350 |     #save_database(DATABASE_BACKUP)
351 |     print "**** STARTING DECRYPT ****"
352 |     just_array_addrs.sort()
353 |     #print "ARRAYS"
354 |     print just_array_addrs
355 |     size = len(just_array_addrs)
356 |     last_addr = just_array_addrs[size-1]
357 |     print "AREA: 0x%x - 0x%x = 0x%x" % (last_addr + arrays_to_dec[last_addr], just_array_addrs[0],(last_addr + arrays_to_dec[last_addr]) - just_array_addrs[0] )
358 | 
359 |     decrypted_bytes = 0
360 |     for a in just_array_addrs:
361 |         #print "**************************************************************************************************"
362 |         array_start = a
363 |         array_end = a
364 |         array_len = arrays_to_dec[a]
365 |         array = get_array_from_addr(a, array_len)
366 |         #print array
367 |         decrypt(array, array_len, num_array, new_array)
368 |         #print "0x%x: %s" % (a, ''.join(chr(e) for e in array))
369 |         # Writing array contents to file and IDB
370 |         idb_addr = a
371 |         for b in array:
372 |             if (b & 0xFF == 0x00 and array_end == array_start):
373 |                 array_end = a
374 |             #dec_file.write(struct.pack('<B', b & 0xFF))
375 |             PatchByte(a, b & 0xFF)
376 |             a +=1
377 |         #print array
378 |         MakeStr(array_start, array_end)
379 |         decrypted_bytes += array_len
380 |     print "FINISHED! DECRYPTED 0x%x BYTES. " % (decrypted_bytes)
381 |     #dec_file.close()
382 | else:
383 |     print "CAN'T FIND DECRYPTION SUB ADDR! EXITING!"
384 | 
385 | 
386 | 


--------------------------------------------------------------------------------
/Android/WeddingCake_sysprops.txt:
--------------------------------------------------------------------------------
 1 | System Property Checked,Value that Causes Exit
 2 | init.svc.gce_fs_monitor,running
 3 | init.svc.dempeventlog,running
 4 | init.svc.dumpipclog,running
 5 | init.svc.dumplogcat,running
 6 | init.svc.dumplogcat-efs,running
 7 | init.svc.filemon,running
 8 | ro.hardware.virtual_device,gce_x86
 9 | ro.kernel.androidboot.hardware,gce_x86
10 | ro.hardware.virtual_device,gce_x86
11 | ro.boot.hardware,gce_x86
12 | ro.boot.selinux,disable
13 | ro.factorytest,true OR 1 OR y
14 | ro.kernel.android.checkjni,true OR 1 OR y
15 | ro.hardware.virtual_device,vbox86
16 | ro.kernel.androidboot.hardware,vbox86
17 | ro.hardware,vbox86
18 | ro.boot.hardware,vbox86
19 | ro.build.product,google_sdk
20 | ro.build.product,Droid4x
21 | ro.build.product,sdk_x86
22 | ro.build.product,sdk_google
23 | ro.build.product,vbox86p
24 | ro.product.manufacturer,Genymotion
25 | ro.product.brand,generic
26 | ro.product.brand,generic_x86
27 | ro.product.device,generic
28 | ro.product.device,generic_x86
29 | ro.product.device,generic_x86_x64
30 | ro.product.device,Droid4x
31 | ro.product.device,vbox86p
32 | ro.kernel.androidboot.hardware,goldfish
33 | ro.hardware,goldfish
34 | ro.boot.hardware,goldfish
35 | ro.hardware.audio.primary,goldfish
36 | ro.kernel.androidboot.hardware,ranchu
37 | ro.hardware,ranchu
38 | ro.boot.hardware,ranchu


--------------------------------------------------------------------------------
/Android/jni_translate.py:
--------------------------------------------------------------------------------
  1 | """JNIEnv-translate recovers the offset used to access a function in JNIEnv and translates that into the corresponding function."""
  2 | 
  3 | 
  4 | from __future__ import print_function
  5 | import re
  6 | from idaapi import ida_funcs
  7 | import idc
  8 | 
  9 | 
 10 | num = 0
 11 | regs_offsets = {}
 12 | regs_loads = {}
 13 | # a function that accesses and calls a JNI function does at least of 4
 14 | # operations 1. push registers to stack, 2. get JNIEnv pointer,
 15 | # 3. dereference specific function 4. call function. All these will take at
 16 | # least 8 bytes.
 17 | min_func_len = 0x8
 18 | jnienv = {}
 19 | last_inst_not_for_jnie = False
 20 | last_inst_not_mtd3 = False
 21 | called_blx = False
 22 | 
 23 | 
 24 | REGEX_PUSH = re.compile(r"PUSH +\{(R[0-9]+,?)+ *(-R[0-9]+,)? *(LR)?\}")
 25 | # LDR     R4, [R0]
 26 | REGEX_LDR_JNIE = re.compile(r"LDR +R[0-9]+, +\[R[0-9]+\]")
 27 | # ADDS    R4, #8|#0x8
 28 | REGEX_OPT_ADD = re.compile(r"ADDS +R[0-9]+, +(#[0-9]{1,3}|#0x[0-9A-Fa-f]{1,3})")
 29 | # MOVS    R3, #8|#0x8
 30 | REGEX_OPT_MOV = re.compile(r"MOVS +R[0-9]+, +(#[0-9]{1,3}|#0x[0-9A-Fa-f]{1,3})")
 31 | # LSLS    R0, R0, #2
 32 | REGEX_OPT_LSL = re.compile(r"LSLS? +R[0-9]+, +R[0-9]+, +(#[0-9]+|#0x[0-9A-Fa-f]+)")
 33 | # MOV     R12, R3
 34 | REGEX_MOV = re.compile(r"MOV +R[0-9]+, +R[0-9]+")
 35 | # LDR     R4, [R4,#67]
 36 | REGEX_LDR_MTD1 = re.compile(r"LDR +R[0-9]+, +\[R[0-9]+, *#[0-9]{1,3}\]")
 37 | # LDR     R4, [R4,#0x7C]
 38 | REGEX_LDR_MTD4 = re.compile(r"LDR +R[0-9]+, +\[R[0-9]+, *#0x[0-9A-Fa-f]{1,3}\]")
 39 | # LDR     R3, [R4,R3]
 40 | REGEX_LDR_MTD2 = re.compile(r"LDR +R[0-9]+, +\[R[0-9]+, *R[0-9]+\]")
 41 | # LDR     R3, [R4]
 42 | REGEX_LDR_MTD3 = re.compile(r"LDR +R[0-9]+, +\[R[0-9]+\]")
 43 | # BLX     R4
 44 | REGEX_BLX = re.compile(r"BLX +R[0-9]+")
 45 | # POP     {R4,PC}
 46 | REGEX_POP = re.compile(r"POP +\{(R[0-9]+,?)+ *(-R[0-9]+,)? *(PC)?\}")
 47 | 
 48 | 
 49 | def remove_comment_from_disasm(disasm):
 50 |   """Remove comment from disassembly.
 51 | 
 52 |   Args:
 53 |    disasm: (str) disassembly of the current instruction.
 54 | 
 55 |   Returns:
 56 |    New disassembly after removing comment.
 57 |   """
 58 |   if ";" in disasm:
 59 |     return disasm.split(";")[0]
 60 |   return disasm
 61 | 
 62 | 
 63 | def match_ldr_jnie(disasm):
 64 |   """process instruction that loads function table pointer.
 65 | 
 66 |   E.g LDR     R4, [R0].
 67 | 
 68 |   Args:
 69 |    disasm: (str) disassembly of the current instruction.
 70 | 
 71 |   Returns:
 72 |    True True or False depending on whether the instruction loads JNIEnv ptr
 73 |   """
 74 |   global regs_offsets, regs_loads, called_blx
 75 |   # some instructions have comment, remove the comments
 76 |   disasm = remove_comment_from_disasm(disasm)
 77 |   # get the reg that now holds JNIEnv ptr
 78 |   disasm = disasm.split("LDR")[1].split(",")
 79 |   loc_jnie = disasm[0].strip()
 80 |   loc_source = disasm[1].strip()[1:-1]
 81 |   if loc_jnie in regs_loads and regs_loads[loc_jnie] and not called_blx:
 82 |     return False
 83 |   if loc_source in regs_offsets and regs_offsets[loc_source] != -1:
 84 |     return False
 85 |   # its currently at offset 0
 86 |   regs_offsets[loc_jnie] = 0
 87 |   # saying that the current reg holds JNIEnv ptr
 88 |   regs_loads[loc_jnie] = True
 89 |   called_blx = False
 90 |   return True
 91 | 
 92 | 
 93 | def match_opt_adds(disasm):
 94 |   """process instruction that adds a literal value to function table pointer.
 95 | 
 96 |   Also used for an instruction that moves a literal value to a register.
 97 |   E.g ADDS    R4, #8|#0x8 and MOVS    R3, #8|#0x8.
 98 | 
 99 |   Args:
100 |    disasm: (str) disassembly of the current instruction.
101 | 
102 |   Returns:
103 |    True or False based on whether the instruction adds an offset to
104 |    JNIEnv ptr or mov the offset to a register.
105 |   """
106 |   global regs_offsets, regs_loads
107 |   saved_disasm = disasm
108 |   passed_adds = False
109 |   disasm = remove_comment_from_disasm(disasm)
110 |   if REGEX_OPT_ADD.match(disasm):
111 |     passed_adds = True
112 |     disasm = disasm.split("ADDS")[1].split(",")
113 |   else:
114 |     disasm = disasm.split("MOVS")[1].split(",")
115 |   off = disasm[1].strip()[1:]
116 |   mov_add_reg = disasm[0].strip()
117 |   if "0x" in off:
118 |     off = int(off[2:], 16)
119 |   offset = int(off)
120 |   # probably a false positive
121 |   if offset == 0:
122 |     return False
123 |   if REGEX_OPT_MOV.match(saved_disasm):
124 |     regs_loads[mov_add_reg] = False
125 |   regs_offsets[mov_add_reg] = offset
126 |   return passed_adds
127 | 
128 | 
129 | def match_mov(disasm):
130 |   """process instruction that copies content of one register to another.
131 | 
132 |   E.g MOV     R12, R3.
133 | 
134 |   Args:
135 |    disasm: (str) disassembly of the current instruction.
136 |   """
137 |   global regs_offsets, regs_loads
138 |   disasm = remove_comment_from_disasm(disasm)
139 |   disasm = disasm.split("MOV")[1]
140 |   disasm = disasm.split(",")
141 |   dest_reg = disasm[0].strip()
142 |   source_reg = disasm[1].strip()
143 |   if source_reg in regs_offsets:
144 |     regs_offsets[dest_reg] = regs_offsets[source_reg]
145 |   if source_reg in regs_loads:
146 |     regs_loads[dest_reg] = regs_loads[source_reg]
147 |   else:
148 |     regs_loads[dest_reg] = False
149 | 
150 | 
151 | def match_lsl(disasm):
152 |   """process instruction that shifts the content of a register left.
153 |   E.g LSLS    R0, R0, #2.
154 | 
155 |   Args:
156 |    disasm: (str) disassembly of the current instruction.
157 |   """
158 |   global regs_offsets, regs_loads
159 |   disasm = remove_comment_from_disasm(disasm)
160 |   if "LSLS" in disasm:
161 |     disasm = disasm.split("LSLS")[1]
162 |   else:
163 |     disasm = disasm.split("LSL")[1]
164 |   disasm = disasm.split(",")
165 |   if len(disasm) == 3:
166 |     source_reg = disasm[1].strip()
167 |     if source_reg in regs_offsets:
168 |       dest_reg = disasm[0].strip()
169 |       off = disasm[2].strip()[1:]
170 |       if "0x" in off:
171 |         off = int(off[2:], 16)
172 |       offset = int(off)
173 |       regs_offsets[dest_reg] =  regs_offsets[source_reg] * pow(2, offset)
174 |   return
175 | 
176 | 
177 | def match_ldr_mtd1_and_4(disasm):
178 |   """Load function pointer.
179 | 
180 |   process instruction that loads function pointer into a register using
181 |   REGEX_LDR_MTD1 or REGEX_LDR_MTD4. E.g LDR     R4, [R4,#67] and
182 |   LDR     R4, [R4,#0x7C].
183 | 
184 |   Args:
185 |    disasm: (str) disassembly of the current instruction.
186 | 
187 |   Returns:
188 |    True or False depending on whether instruction loads the function ptr.
189 |   """
190 |   global regs_offsets, regs_loads
191 |   disasm = remove_comment_from_disasm(disasm)
192 |   disasm = disasm.split("LDR")[1].split(",")
193 |   dest_reg = disasm[0].strip()
194 |   off = disasm[2].strip()
195 |   ind = off.find("]")
196 |   off = off[1:ind]
197 |   if "0x" in off:
198 |     off = int(off[2:], 16)
199 |   if disasm[1].strip()[1:] in regs_offsets:
200 |     regs_offsets[disasm[1].strip()[1:]] += int(off)
201 |   else:
202 |     regs_offsets[disasm[1].strip()[1:]] = int(off)
203 |   regs_offsets[dest_reg] = regs_offsets[disasm[1].strip()[1:]]
204 |   regs_loads[dest_reg] = False
205 |   passed_ldmtd = True
206 |   return passed_ldmtd
207 | 
208 | 
209 | def match_ldr_mtd2(disasm):
210 |   """Load function pointer.
211 | 
212 |   process instruction that loads function pointer into a register using
213 |   REGEX_LDR_MTD2. E.g LDR     R3, [R4,R3].
214 | 
215 |   Args:
216 |    disasm: (str) disassembly of the current instruction.
217 | 
218 |   Returns:
219 |    True or False depending on whether instruction loads the function ptr.
220 |   """
221 |   global regs_offsets, regs_loads
222 |   disasm = remove_comment_from_disasm(disasm)
223 |   disasm = disasm.split("LDR")[1].split(",")
224 |   dest_reg = disasm[0].strip()
225 |   set_reg = disasm[2].strip()[:-1]
226 |   if set_reg in regs_offsets:
227 |     regs_offsets[dest_reg] = regs_offsets[set_reg]
228 |   else:
229 |     regs_offsets[dest_reg] = 0
230 |   regs_loads[dest_reg] = False
231 |   passed_ldmtd = True
232 |   return passed_ldmtd
233 | 
234 | 
235 | def match_ldr_mtd3(disasm):
236 |   """Load function pointer.
237 | 
238 |   process instruction that loads function pointer into a register using
239 |   REGEX_LDR_MTD3. E.g LDR     R3, [R4].
240 | 
241 |   Args:
242 |    disasm: (str) disassembly of the current instruction.
243 | 
244 |   Returns:
245 |    True or False depending on whether instruction loads the function ptr.
246 |   """
247 |   global regs_offsets, regs_loads, last_inst_not_for_jnie
248 |   passed_ldmtd = False
249 |   disasm = remove_comment_from_disasm(disasm)
250 |   disasm = disasm.split("LDR")[1].split(",")
251 |   dest_reg = disasm[0].strip()
252 |   set_reg = disasm[1].strip()[1:-1]
253 |   if (set_reg in regs_offsets and set_reg in regs_loads and
254 |       regs_loads[set_reg]):
255 |     if set_reg in regs_offsets:
256 |       regs_offsets[dest_reg] = regs_offsets[set_reg]
257 |     else:
258 |       regs_offsets[dest_reg] = 0
259 |     regs_loads[dest_reg] = False
260 |     passed_ldmtd = True
261 |   last_inst_not_for_jnie = False
262 |   return passed_ldmtd
263 | 
264 | 
265 | def match_blx(ea, disasm):
266 |   """process instruction that does the indirect call to JNIEnv function.
267 | 
268 |   E.g BLX     R4.
269 | 
270 |   Args:
271 |    ea: (int) current address
272 |    disasm: (str) disassembly of the current instruction.
273 | 
274 |   Returns:
275 |    True or False depending on whether instruction loads the function ptr.
276 |   """
277 |   global regs_offsets, regs_loads, jnienv, num, called_blx
278 |   disasm = remove_comment_from_disasm(disasm)
279 |   callee = disasm.split("BLX")[1].strip()
280 |   if callee in regs_offsets and str(regs_offsets[callee]) in jnienv:
281 |     idc.MakeComm(ea, str(jnienv[str(regs_offsets[callee])]))
282 |     num += 1
283 |   regs_offsets[callee] = -1
284 |   regs_loads[callee] = False
285 |   called_blx = True
286 | 
287 | 
288 | def others_lds(disasm):
289 |   """process LDR that was first mistaken as ldr_jnie or ldr_mtd3.
290 | 
291 |   Args:
292 |    disasm: (str) disassembly of the current instruction.
293 |   """
294 |   global last_inst_not_for_jnie, last_inst_not_mtd3
295 |   if "LDR" in disasm:
296 |     loc_jnie = disasm.split("LDR")[1].split(",")[0].strip()
297 |     if loc_jnie in regs_loads:
298 |       regs_loads[loc_jnie] = False
299 |   last_inst_not_for_jnie = False
300 |   last_inst_not_mtd3 = False
301 | 
302 | 
303 | def extract_routines():
304 |   """Function extracts refs to JNIEnv."""
305 |   global regs_offsets, regs_loads, num, min_func_len, jnienv
306 |   global last_inst_not_for_jnie, last_inst_not_mtd3
307 |   # get JNI function signatures
308 |   jnienv = create_jnienv_indices()
309 | 
310 |   # gets all functions defined in the binary
311 |   for i in range(ida_funcs.get_func_qty()):
312 |     func = ida_funcs.getn_func(i)
313 |     start_ea = func.startEA
314 |     end_ea = func.endEA
315 | 
316 |     # not completely sure if end_ea will always be greater than start_ea
317 |     if end_ea - start_ea < min_func_len:
318 |       continue
319 |     ea = start_ea
320 |     passed_ldjnie = False
321 |     passed_adds = False
322 |     passed_ldmtd = False
323 |     last_inst_not_for_jnie = False
324 |     last_inst_not_mtd3 = False
325 | 
326 |     # not completely sure if end_ea will always be greater than start_ea
327 |     while ea <= end_ea:
328 |       disasm = idc.GetDisasm(ea)
329 |       # check for instruction that moves JNIEnv ptr to a reg
330 |       if REGEX_LDR_JNIE.match(disasm) and not last_inst_not_for_jnie:
331 |         old_passed_ldjnie = passed_ldjnie
332 |         passed_ldjnie = match_ldr_jnie(disasm)
333 |         if passed_ldjnie:
334 |           ea = idc.NextHead(ea)
335 |         else:
336 |           passed_ldjnie = old_passed_ldjnie
337 |           last_inst_not_for_jnie = True
338 | 
339 |       # check if a literal value gets added to the reg holding JNIEnv ptr.
340 |       # keep track of that offset
341 |       elif REGEX_OPT_ADD.match(disasm) or REGEX_OPT_MOV.match(disasm):
342 |         passed_adds = match_opt_adds(disasm)
343 |         ea = idc.NextHead(ea)
344 | 
345 |       # check if the content of a reg gets moved to another reg. Data
346 |       # associated to the source reg is linked to the destination reg
347 |       # in match_mov(disasm)
348 |       elif REGEX_MOV.match(disasm):
349 |         match_mov(disasm)
350 |         ea = idc.NextHead(ea)
351 | 
352 |       # check if a shift operation is performed
353 |       elif REGEX_OPT_LSL.match(disasm):
354 |         match_lsl(disasm)
355 |         ea = idc.NextHead(ea)
356 | 
357 |       # instns that match this regex may be loading the exact function ptr.
358 |       # This instn adds an additional offset.
359 |       # Get the offset and add it to any prev offset maybe from REGEX_OPT_ADD
360 |       elif ((REGEX_LDR_MTD1.match(disasm) or REGEX_LDR_MTD4.match(disasm))
361 |             and passed_ldjnie):
362 |         passed_ldmtd = match_ldr_mtd1_and_4(disasm)
363 |         ea = idc.NextHead(ea)
364 | 
365 |       # instns that match this regex may be loading the exact function ptr.
366 |       # Uses another reg to specify offset, usually from REGEX_OPT_MOV
367 |       elif REGEX_LDR_MTD2.match(disasm) and passed_ldjnie:
368 |         passed_ldmtd = match_ldr_mtd2(disasm)
369 |         ea = idc.NextHead(ea)
370 | 
371 |       # instns that match this regex may be loading the exact function ptr
372 |       elif (REGEX_LDR_MTD3.match(disasm) and passed_ldjnie and passed_adds
373 |             and not last_inst_not_mtd3):
374 |         passed_ldmtd = match_ldr_mtd3(disasm)
375 |         if passed_ldmtd:
376 |           ea = idc.NextHead(ea)
377 |           last_inst_not_for_jnie = False
378 |         else:
379 |           last_inst_not_mtd3 = True
380 | 
381 |       # this instn calls the JNIEnv func.
382 |       # Get the exact function signature using calculated offset and
383 |       # add signature as comment at this callsite
384 |       elif REGEX_BLX.match(disasm) and passed_ldmtd:
385 |         match_blx(ea, disasm)
386 |         ea = idc.NextHead(ea)
387 |         passed_ldjnie = False
388 |         passed_adds = False
389 |         passed_ldmtd = False
390 | 
391 |       else:
392 |         others_lds(disasm)
393 |         ea = idc.NextHead(ea)
394 |     regs_offsets.clear()
395 |     regs_loads.clear()
396 | 
397 |   print("Done.", num, ("callsites have been updated with "
398 |                        "JNIEnv function signatures"))
399 | 
400 | 
401 | def create_jnienv_indices():
402 |   """create dictionary of JNIEnv function signatures (offset -> signature)."""
403 |   global jnienv
404 |   jnienv = {
405 |       "16": "jint        (*GetVersion)(JNIEnv *) ;",
406 |       "20": ("jclass      (*DefineClass)(JNIEnv*, const char*,"
407 |              " jobject, const jbyte*, jsize) ;"),
408 |       "24": "jclass      (*FindClass)(JNIEnv*, const char*) ;",
409 |       "28": "jmethodID   (*FromReflectedMethod)(JNIEnv*, jobject) ;",
410 |       "32": "jfieldID    (*FromReflectedField)(JNIEnv*, jobject) ;",
411 |       "36": ("jobject     (*ToReflectedMethod)"
412 |              "(JNIEnv*, jclass, jmethodID, jboolean) ;"),
413 |       "40": "jclass      (*GetSuperclass)(JNIEnv*, jclass) ;",
414 |       "44": "jboolean    (*IsAssignableFrom)(JNIEnv*, jclass, jclass) ;",
415 |       "48": ("jobject     (*ToReflectedField)"
416 |              "(JNIEnv*, jclass, jfieldID, jboolean) ;"),
417 |       "52": "jint        (*Throw)(JNIEnv*, jthrowable) ;",
418 |       "56": "jint        (*ThrowNew)(JNIEnv *, jclass, const char *) ;",
419 |       "60": "jthrowable  (*ExceptionOccurred)(JNIEnv*) ;",
420 |       "64": "void        (*ExceptionDescribe)(JNIEnv*) ;",
421 |       "68": "void        (*ExceptionClear)(JNIEnv*) ;",
422 |       "72": "void        (*FatalError)(JNIEnv*, const char*) ;",
423 |       "76": "jint        (*PushLocalFrame)(JNIEnv*, jint) ;",
424 |       "80": "jobject     (*PopLocalFrame)(JNIEnv*, jobject) ;",
425 |       "84": "jobject     (*NewGlobalRef)(JNIEnv*, jobject) ;",
426 |       "88": "void        (*DeleteGlobalRef)(JNIEnv*, jobject) ;",
427 |       "92": "void        (*DeleteLocalRef)(JNIEnv*, jobject) ;",
428 |       "96": "jboolean    (*IsSameObject)(JNIEnv*, jobject, jobject) ;",
429 |       "100": "jobject     (*NewLocalRef)(JNIEnv*, jobject) ;",
430 |       "104": "jint        (*EnsureLocalCapacity)(JNIEnv*, jint) ;",
431 |       "108": "jobject     (*AllocObject)(JNIEnv*, jclass) ;",
432 |       "112": "jobject     (*NewObject)(JNIEnv*, jclass, jmethodID, ...) ;",
433 |       "116": "jobject     (*NewObjectV)(JNIEnv*, jclass, jmethodID, va_list) ;",
434 |       "120": "jobject     (*NewObjectA)(JNIEnv*, jclass, jmethodID, jvalue*) ;",
435 |       "124": "jclass      (*GetObjectClass)(JNIEnv*, jobject) ;",
436 |       "128": "jboolean    (*IsInstanceOf)(JNIEnv*, jobject, jclass) ;",
437 |       "132": ("jmethodID   (*GetMethodID)"
438 |               "(JNIEnv*, jclass, const char*, const char*) ;"),
439 |       "136": ("jobject     (*CallObjectMethod)"
440 |               "(JNIEnv*, jobject, jmethodID, ...) ;"),
441 |       "140": ("jobject     (*CallObjectMethodV)"
442 |               "(JNIEnv*, jobject, jmethodID, va_list) ;"),
443 |       "144": ("jobject     (*CallObjectMethodA)"
444 |               "(JNIEnv*, jobject, jmethodID, jvalue*) ;"),
445 |       "148": ("jboolean    (*CallBooleanMethod)"
446 |               "(JNIEnv*, jobject, jmethodID, ...) ;"),
447 |       "152": ("jboolean    (*CallBooleanMethodV)"
448 |               "(JNIEnv*, jobject, jmethodID, va_list) ;"),
449 |       "156": ("jboolean    (*CallBooleanMethodA)"
450 |               "(JNIEnv*, jobject, jmethodID, jvalue*) ;"),
451 |       "160": ("jbyte       (*CallByteMethod)"
452 |               "(JNIEnv*, jobject, jmethodID, ...) ;"),
453 |       "164": ("jbyte       (*CallByteMethodV)"
454 |               "(JNIEnv*, jobject, jmethodID, va_list) ;"),
455 |       "168": ("jbyte       (*CallByteMethodA)"
456 |               "(JNIEnv*, jobject, jmethodID, jvalue*) ;"),
457 |       "172": ("jchar       (*CallCharMethod)"
458 |               "(JNIEnv*, jobject, jmethodID, ...) ;"),
459 |       "176": ("jchar       (*CallCharMethodV)"
460 |               "(JNIEnv*, jobject, jmethodID, va_list) ;"),
461 |       "180": ("jchar       (*CallCharMethodA)"
462 |               "(JNIEnv*, jobject, jmethodID, jvalue*) ;"),
463 |       "184": ("jshort      (*CallShortMethod)"
464 |               "(JNIEnv*, jobject, jmethodID, ...) ;"),
465 |       "188": ("jshort      (*CallShortMethodV)"
466 |               "(JNIEnv*, jobject, jmethodID, va_list) ;"),
467 |       "192": ("jshort      (*CallShortMethodA)"
468 |               "(JNIEnv*, jobject, jmethodID, jvalue*) ;"),
469 |       "196": ("jint        (*CallIntMethod)"
470 |               "(JNIEnv*, jobject, jmethodID, ...) ;"),
471 |       "200": ("jint        (*CallIntMethodV)"
472 |               "(JNIEnv*, jobject, jmethodID, va_list) ;"),
473 |       "204": ("jint        (*CallIntMethodA)"
474 |               "(JNIEnv*, jobject, jmethodID, jvalue*) ;"),
475 |       "208": ("jlong       (*CallLongMethod)"
476 |               "(JNIEnv*, jobject, jmethodID, ...) ;"),
477 |       "212": ("jlong       (*CallLongMethodV)"
478 |               "(JNIEnv*, jobject, jmethodID, va_list) ;"),
479 |       "216": ("jlong       (*CallLongMethodA)"
480 |               "(JNIEnv*, jobject, jmethodID, jvalue*) ;"),
481 |       "220": ("jfloat      (*CallFloatMethod)"
482 |               "(JNIEnv*, jobject, jmethodID, ...) ;"),
483 |       "224": ("jfloat      (*CallFloatMethodV)"
484 |               "(JNIEnv*, jobject, jmethodID, va_list) ;"),
485 |       "228": ("jfloat      (*CallFloatMethodA)"
486 |               "(JNIEnv*, jobject, jmethodID, jvalue*) ;"),
487 |       "232": ("jdouble     (*CallDoubleMethod)"
488 |               "(JNIEnv*, jobject, jmethodID, ...) ;"),
489 |       "236": ("jdouble     (*CallDoubleMethodV)"
490 |               "(JNIEnv*, jobject, jmethodID, va_list) ;"),
491 |       "240": ("jdouble     (*CallDoubleMethodA)"
492 |               "(JNIEnv*, jobject, jmethodID, jvalue*) ;"),
493 |       "244": ("void        (*CallVoidMethod)"
494 |               "(JNIEnv*, jobject, jmethodID, ...) ;"),
495 |       "248": ("void        (*CallVoidMethodV)"
496 |               "(JNIEnv*, jobject, jmethodID, va_list) ;"),
497 |       "252": ("void        (*CallVoidMethodA)"
498 |               "(JNIEnv*, jobject, jmethodID, jvalue*) ;"),
499 |       "256": ("jobject     (*CallNonvirtualObjectMethod)"
500 |               "(JNIEnv*, jobject, jclass, jmethodID, ...) ;"),
501 |       "260": ("jobject     (*CallNonvirtualObjectMethodV)"
502 |               "(JNIEnv*, jobject, jclass, jmethodID, va_list) ;"),
503 |       "264": ("jobject     (*CallNonvirtualObjectMethodA)"
504 |               "(JNIEnv*, jobject, jclass, jmethodID, jvalue*) ;"),
505 |       "268": ("jboolean    (*CallNonvirtualBooleanMethod)"
506 |               "(JNIEnv*, jobject, jclass, jmethodID, ...) ;"),
507 |       "272": ("jboolean    (*CallNonvirtualBooleanMethodV)"
508 |               "(JNIEnv*, jobject, jclass, jmethodID, va_list) ;"),
509 |       "276": ("jboolean    (*CallNonvirtualBooleanMethodA)"
510 |               "(JNIEnv*, jobject, jclass, jmethodID, jvalue*) ;"),
511 |       "280": ("jbyte       (*CallNonvirtualByteMethod)"
512 |               "(JNIEnv*, jobject, jclass, jmethodID, ...) ;"),
513 |       "284": ("jbyte       (*CallNonvirtualByteMethodV)"
514 |               "(JNIEnv*, jobject, jclass, jmethodID, va_list) ;"),
515 |       "288": ("jbyte       (*CallNonvirtualByteMethodA)"
516 |               "(JNIEnv*, jobject, jclass, jmethodID, jvalue*) ;"),
517 |       "292": ("jchar       (*CallNonvirtualCharMethod)"
518 |               "(JNIEnv*, jobject, jclass, jmethodID, ...) ;"),
519 |       "296": ("jchar       (*CallNonvirtualCharMethodV)"
520 |               "(JNIEnv*, jobject, jclass, jmethodID, va_list) ;"),
521 |       "300": ("jchar       (*CallNonvirtualCharMethodA)"
522 |               "(JNIEnv*, jobject, jclass, jmethodID, jvalue*) ;"),
523 |       "304": ("jshort      (*CallNonvirtualShortMethod)"
524 |               "(JNIEnv*, jobject, jclass, jmethodID, ...) ;"),
525 |       "308": ("jshort      (*CallNonvirtualShortMethodV)"
526 |               "(JNIEnv*, jobject, jclass, jmethodID, va_list) ;"),
527 |       "312": ("jshort      (*CallNonvirtualShortMethodA)"
528 |               "(JNIEnv*, jobject, jclass, jmethodID, jvalue*) ;"),
529 |       "316": ("jint        (*CallNonvirtualIntMethod)"
530 |               "(JNIEnv*, jobject, jclass, jmethodID, ...) ;"),
531 |       "320": ("jint        (*CallNonvirtualIntMethodV)"
532 |               "(JNIEnv*, jobject, jclass, jmethodID, va_list) ;"),
533 |       "324": ("jint        (*CallNonvirtualIntMethodA)"
534 |               "(JNIEnv*, jobject, jclass, jmethodID, jvalue*) ;"),
535 |       "328": ("jlong       (*CallNonvirtualLongMethod)"
536 |               "(JNIEnv*, jobject, jclass, jmethodID, ...) ;"),
537 |       "332": ("jlong       (*CallNonvirtualLongMethodV)"
538 |               "(JNIEnv*, jobject, jclass, jmethodID, va_list) ;"),
539 |       "336": ("jlong       (*CallNonvirtualLongMethodA)"
540 |               "(JNIEnv*, jobject, jclass, jmethodID, jvalue*) ;"),
541 |       "340": ("jfloat      (*CallNonvirtualFloatMethod)"
542 |               "(JNIEnv*, jobject, jclass, jmethodID, ...) ;"),
543 |       "344": ("jfloat      (*CallNonvirtualFloatMethodV)"
544 |               "(JNIEnv*, jobject, jclass, jmethodID, va_list) ;"),
545 |       "348": ("jfloat      (*CallNonvirtualFloatMethodA)"
546 |               "(JNIEnv*, jobject, jclass, jmethodID, jvalue*) ;"),
547 |       "352": ("jdouble     (*CallNonvirtualDoubleMethod)"
548 |               "(JNIEnv*, jobject, jclass, jmethodID, ...) ;"),
549 |       "356": ("jdouble     (*CallNonvirtualDoubleMethodV)"
550 |               "(JNIEnv*, jobject, jclass, jmethodID, va_list) ;"),
551 |       "360": "context",
552 |       "364": ("void        (*CallNonvirtualVoidMethod)"
553 |               "(JNIEnv*, jobject, jclass, jmethodID, ...) ;"),
554 |       "368": ("void        (*CallNonvirtualVoidMethodV)"
555 |               "(JNIEnv*, jobject, jclass, jmethodID, va_list) ;"),
556 |       "372": ("void        (*CallNonvirtualVoidMethodA)"
557 |               "(JNIEnv*, jobject, jclass, jmethodID, jvalue*) ;"),
558 |       "376": ("jfieldID    (*GetFieldID)"
559 |               "(JNIEnv*, jclass, const char*, const char*) ;"),
560 |       "380": "jobject     (*GetObjectField)(JNIEnv*, jobject, jfieldID) ;",
561 |       "384": "jboolean    (*GetBooleanField)(JNIEnv*, jobject, jfieldID) ;",
562 |       "388": "jbyte       (*GetByteField)(JNIEnv*, jobject, jfieldID) ;",
563 |       "392": "jchar       (*GetCharField)(JNIEnv*, jobject, jfieldID) ;",
564 |       "396": "jshort      (*GetShortField)(JNIEnv*, jobject, jfieldID) ;",
565 |       "400": "jint        (*GetIntField)(JNIEnv*, jobject, jfieldID) ;",
566 |       "404": "jlong       (*GetLongField)(JNIEnv*, jobject, jfieldID) ;",
567 |       "408": "jfloat      (*GetFloatField)(JNIEnv*, jobject, jfieldID) ;",
568 |       "412": "jdouble     (*GetDoubleField)(JNIEnv*, jobject, jfieldID) ;",
569 |       "416": ("void        (*SetObjectField)"
570 |               "(JNIEnv*, jobject, jfieldID, jobject) ;"),
571 |       "420": ("void        (*SetBooleanField)"
572 |               "(JNIEnv*, jobject, jfieldID, jboolean) ;"),
573 |       "424": "void        (*SetByteField)(JNIEnv*, jobject, jfieldID, jbyte) ;",
574 |       "428": "void        (*SetCharField)(JNIEnv*, jobject, jfieldID, jchar) ;",
575 |       "432": ("void        (*SetShortField)"
576 |               "(JNIEnv*, jobject, jfieldID, jshort) ;"),
577 |       "436": "void        (*SetIntField)(JNIEnv*, jobject, jfieldID, jint) ;",
578 |       "440": "void        (*SetLongField)(JNIEnv*, jobject, jfieldID, jlong) ;",
579 |       "444": ("void        (*SetFloatField)"
580 |               "(JNIEnv*, jobject, jfieldID, jfloat) ;"),
581 |       "448": ("void        (*SetDoubleField)"
582 |               "(JNIEnv*, jobject, jfieldID, jdouble) ;"),
583 |       "452": ("jmethodID   (*GetStaticMethodID)"
584 |               "(JNIEnv*, jclass, const char*, const char*) ;"),
585 |       "456": ("jobject     (*CallStaticObjectMethod)"
586 |               "(JNIEnv*, jclass, jmethodID, ...) ;"),
587 |       "460": ("jobject     (*CallStaticObjectMethodV)"
588 |               "(JNIEnv*, jclass, jmethodID, va_list) ;"),
589 |       "464": ("jobject     (*CallStaticObjectMethodA)"
590 |               "(JNIEnv*, jclass, jmethodID, jvalue*) ;"),
591 |       "468": ("jboolean    (*CallStaticBooleanMethod)"
592 |               "(JNIEnv*, jclass, jmethodID, ...) ;"),
593 |       "472": ("jboolean    (*CallStaticBooleanMethodV)"
594 |               "(JNIEnv*, jclass, jmethodID, va_list) ;"),
595 |       "476": ("jboolean    (*CallStaticBooleanMethodA)"
596 |               "(JNIEnv*, jclass, jmethodID, jvalue*) ;"),
597 |       "480": ("jbyte       (*CallStaticByteMethod)"
598 |               "(JNIEnv*, jclass, jmethodID, ...) ;"),
599 |       "484": ("jbyte       (*CallStaticByteMethodV)"
600 |               "(JNIEnv*, jclass, jmethodID, va_list) ;"),
601 |       "488": ("jbyte       (*CallStaticByteMethodA)"
602 |               "(JNIEnv*, jclass, jmethodID, jvalue*) ;"),
603 |       "492": ("jchar       (*CallStaticCharMethod)"
604 |               "(JNIEnv*, jclass, jmethodID, ...) ;"),
605 |       "496": ("jchar       (*CallStaticCharMethodV)"
606 |               "(JNIEnv*, jclass, jmethodID, va_list) ;"),
607 |       "500": ("jchar       (*CallStaticCharMethodA)"
608 |               "(JNIEnv*, jclass, jmethodID, jvalue*) ;"),
609 |       "504": ("jshort      (*CallStaticShortMethod)"
610 |               "(JNIEnv*, jclass, jmethodID, ...) ;"),
611 |       "508": ("jshort      (*CallStaticShortMethodV)"
612 |               "(JNIEnv*, jclass, jmethodID, va_list) ;"),
613 |       "512": ("jshort      (*CallStaticShortMethodA)"
614 |               "(JNIEnv*, jclass, jmethodID, jvalue*) ;"),
615 |       "516": ("jint        (*CallStaticIntMethod)"
616 |               "(JNIEnv*, jclass, jmethodID, ...) ;"),
617 |       "520": ("jint        (*CallStaticIntMethodV)"
618 |               "(JNIEnv*, jclass, jmethodID, va_list) ;"),
619 |       "524": ("jint        (*CallStaticIntMethodA)"
620 |               "(JNIEnv*, jclass, jmethodID, jvalue*) ;"),
621 |       "528": ("jlong       (*CallStaticLongMethod)"
622 |               "(JNIEnv*, jclass, jmethodID, ...) ;"),
623 |       "532": ("jlong       (*CallStaticLongMethodV)"
624 |               "(JNIEnv*, jclass, jmethodID, va_list) ;"),
625 |       "536": ("jlong       (*CallStaticLongMethodA)"
626 |               "(JNIEnv*, jclass, jmethodID, jvalue*) ;"),
627 |       "540": ("jfloat      (*CallStaticFloatMethod)"
628 |               "(JNIEnv*, jclass, jmethodID, ...) ;"),
629 |       "544": ("jfloat      (*CallStaticFloatMethodV)"
630 |               "(JNIEnv*, jclass, jmethodID, va_list) ;"),
631 |       "548": ("jfloat      (*CallStaticFloatMethodA)"
632 |               "(JNIEnv*, jclass, jmethodID, jvalue*) ;"),
633 |       "552": ("jdouble     (*CallStaticDoubleMethod)"
634 |               "(JNIEnv*, jclass, jmethodID, ...) ;"),
635 |       "556": ("jdouble     (*CallStaticDoubleMethodV)"
636 |               "(JNIEnv*, jclass, jmethodID, va_list) ;"),
637 |       "560": ("jdouble     (*CallStaticDoubleMethodA)"
638 |               "(JNIEnv*, jclass, jmethodID, jvalue*) ;"),
639 |       "564": ("void        (*CallStaticVoidMethod)"
640 |               "(JNIEnv*, jclass, jmethodID, ...) ;"),
641 |       "568": ("void        (*CallStaticVoidMethodV)"
642 |               "(JNIEnv*, jclass, jmethodID, va_list) ;"),
643 |       "572": ("void        (*CallStaticVoidMethodA)"
644 |               "(JNIEnv*, jclass, jmethodID, jvalue*) ;"),
645 |       "576": ("jfieldID    (*GetStaticFieldID)"
646 |               "(JNIEnv*, jclass, const char*, const char*) ;"),
647 |       "580": "jobject     (*GetStaticObjectField)(JNIEnv*, jclass, jfieldID) ;",
648 |       "584": ("jboolean    (*GetStaticBooleanField)"
649 |               "(JNIEnv*, jclass, jfieldID) ;"),
650 |       "588": "jbyte       (*GetStaticByteField)(JNIEnv*, jclass, jfieldID) ;",
651 |       "592": "jchar       (*GetStaticCharField)(JNIEnv*, jclass, jfieldID) ;",
652 |       "596": "jshort      (*GetStaticShortField)(JNIEnv*, jclass, jfieldID) ;",
653 |       "600": "jint        (*GetStaticIntField)(JNIEnv*, jclass, jfieldID) ;",
654 |       "604": "jlong       (*GetStaticLongField)(JNIEnv*, jclass, jfieldID) ;",
655 |       "608": "jfloat      (*GetStaticFloatField)(JNIEnv*, jclass, jfieldID) ;",
656 |       "612": "jdouble     (*GetStaticDoubleField)(JNIEnv*, jclass, jfieldID) ;",
657 |       "616": ("void        (*SetStaticObjectField)"
658 |               "(JNIEnv*, jclass, jfieldID, jobject) ;"),
659 |       "620": ("void        (*SetStaticBooleanField)"
660 |               "(JNIEnv*, jclass, jfieldID, jboolean) ;"),
661 |       "624": ("void        (*SetStaticByteField)"
662 |               "(JNIEnv*, jclass, jfieldID, jbyte) ;"),
663 |       "628": ("void        (*SetStaticCharField)"
664 |               "(JNIEnv*, jclass, jfieldID, jchar) ;"),
665 |       "632": ("void        (*SetStaticShortField)"
666 |               "(JNIEnv*, jclass, jfieldID, jshort) ;"),
667 |       "636": ("void        (*SetStaticIntField)"
668 |               "(JNIEnv*, jclass, jfieldID, jint) ;"),
669 |       "640": ("void        (*SetStaticLongField)"
670 |               "(JNIEnv*, jclass, jfieldID, jlong) ;"),
671 |       "644": ("void        (*SetStaticFloatField)"
672 |               "(JNIEnv*, jclass, jfieldID, jfloat) ;"),
673 |       "648": ("void        (*SetStaticDoubleField)"
674 |               "(JNIEnv*, jclass, jfieldID, jdouble) ;"),
675 |       "652": "jstring     (*NewString)(JNIEnv*, const jchar*, jsize) ;",
676 |       "656": "jsize       (*GetStringLength)(JNIEnv*, jstring) ;",
677 |       "660": "const jchar* (*GetStringChars)(JNIEnv*, jstring, jboolean*) ;",
678 |       "664": ("void        (*ReleaseStringChars)"
679 |               "(JNIEnv*, jstring, const jchar*) ;"),
680 |       "668": "jstring     (*NewStringUTF)(JNIEnv*, const char*) ;",
681 |       "672": "jsize       (*GetStringUTFLength)(JNIEnv*, jstring) ;",
682 |       "676": "const char* (*GetStringUTFChars)(JNIEnv*, jstring, jboolean*) ;",
683 |       "680": ("void        (*ReleaseStringUTFChars)"
684 |               "(JNIEnv*, jstring, const char*) ;"),
685 |       "684": "jsize       (*GetArrayLength)(JNIEnv*, jarray) ;",
686 |       "688": ("jobjectArray (*NewObjectArray)"
687 |               "(JNIEnv*, jsize, jclass, jobject) ;"),
688 |       "692": ("jobject     (*GetObjectArrayElement)"
689 |               "(JNIEnv*, jobjectArray, jsize) ;"),
690 |       "696": ("void        (*SetObjectArrayElement)"
691 |               "(JNIEnv*, jobjectArray, jsize, jobject) ;"),
692 |       "700": "jbooleanArray (*NewBooleanArray)(JNIEnv*, jsize) ;",
693 |       "704": "jbyteArray    (*NewByteArray)(JNIEnv*, jsize) ;",
694 |       "708": "jcharArray    (*NewCharArray)(JNIEnv*, jsize) ;",
695 |       "712": "jshortArray   (*NewShortArray)(JNIEnv*, jsize) ;",
696 |       "716": "jintArray     (*NewIntArray)(JNIEnv*, jsize) ;",
697 |       "720": "jlongArray    (*NewLongArray)(JNIEnv*, jsize) ;",
698 |       "724": "jfloatArray   (*NewFloatArray)(JNIEnv*, jsize) ;",
699 |       "728": "jdoubleArray  (*NewDoubleArray)(JNIEnv*, jsize) ;",
700 |       "732": ("jboolean*   (*GetBooleanArrayElements)"
701 |               "(JNIEnv*, jbooleanArray, jboolean*) ;"),
702 |       "736": ("jbyte*      (*GetByteArrayElements)"
703 |               "(JNIEnv*, jbyteArray, jboolean*) ;"),
704 |       "740": ("jchar*      (*GetCharArrayElements)"
705 |               "(JNIEnv*, jcharArray, jboolean*) ;"),
706 |       "744": ("jshort*     (*GetShortArrayElements)"
707 |               "(JNIEnv*, jshortArray, jboolean*) ;"),
708 |       "748": ("jint*       (*GetIntArrayElements)"
709 |               "(JNIEnv*, jintArray, jboolean*) ;"),
710 |       "752": ("jlong*      (*GetLongArrayElements)"
711 |               "(JNIEnv*, jlongArray, jboolean*) ;"),
712 |       "756": ("jfloat*     (*GetFloatArrayElements)"
713 |               "(JNIEnv*, jfloatArray, jboolean*) ;"),
714 |       "760": ("jdouble*    (*GetDoubleArrayElements)"
715 |               "(JNIEnv*, jdoubleArray, jboolean*) ;"),
716 |       "764": ("void        (*ReleaseBooleanArrayElements)"
717 |               "(JNIEnv*, jbooleanArray, jboolean*, jint) ;"),
718 |       "768": ("void        (*ReleaseByteArrayElements)"
719 |               "(JNIEnv*, jbyteArray, jbyte*, jint) ;"),
720 |       "772": ("void        (*ReleaseCharArrayElements)"
721 |               "(JNIEnv*, jcharArray, jchar*, jint) ;"),
722 |       "776": ("void        (*ReleaseShortArrayElements)"
723 |               "(JNIEnv*, jshortArray, jshort*, jint) ;"),
724 |       "780": ("void        (*ReleaseIntArrayElements)"
725 |               "(JNIEnv*, jintArray, jint*, jint) ;"),
726 |       "784": ("void        (*ReleaseLongArrayElements)"
727 |               "(JNIEnv*, jlongArray, jlong*, jint) ;"),
728 |       "788": ("void        (*ReleaseFloatArrayElements)"
729 |               "(JNIEnv*, jfloatArray, jfloat*, jint) ;"),
730 |       "792": ("void        (*ReleaseDoubleArrayElements)"
731 |               "(JNIEnv*, jdoubleArray, jdouble*, jint) ;"),
732 |       "796": ("void        (*GetBooleanArrayRegion)"
733 |               "(JNIEnv*, jbooleanArray, jsize, jsize, jboolean*) ;"),
734 |       "800": ("void        (*GetByteArrayRegion)"
735 |               "(JNIEnv*, jbyteArray, jsize, jsize, jbyte*) ;"),
736 |       "804": ("void        (*GetCharArrayRegion)"
737 |               "(JNIEnv*, jcharArray, jsize, jsize, jchar*) ;"),
738 |       "808": ("void        (*GetShortArrayRegion)"
739 |               "(JNIEnv*, jshortArray, jsize, jsize, jshort*) ;"),
740 |       "812": ("void        (*GetIntArrayRegion)"
741 |               "(JNIEnv*, jintArray, jsize, jsize, jint*) ;"),
742 |       "816": ("void        (*GetLongArrayRegion)"
743 |               "(JNIEnv*, jlongArray, jsize, jsize, jlong*) ;"),
744 |       "820": ("void        (*GetFloatArrayRegion)"
745 |               "(JNIEnv*, jfloatArray, jsize, jsize, jfloat*) ;"),
746 |       "824": ("void        (*GetDoubleArrayRegion)"
747 |               "(JNIEnv*, jdoubleArray, jsize, jsize, jdouble*) ;"),
748 |       "828": ("void        (*SetBooleanArrayRegion)"
749 |               "(JNIEnv*, jbooleanArray, jsize, jsize, const jboolean*) ;"),
750 |       "832": ("void        (*SetByteArrayRegion)"
751 |               "(JNIEnv*, jbyteArray, jsize, jsize, const jbyte*) ;"),
752 |       "836": ("void        (*SetCharArrayRegion)"
753 |               "(JNIEnv*, jcharArray, jsize, jsize, const jchar*) ;"),
754 |       "840": ("void        (*SetShortArrayRegion)"
755 |               "(JNIEnv*, jshortArray, jsize, jsize, const jshort*) ;"),
756 |       "844": ("void        (*SetIntArrayRegion)"
757 |               "(JNIEnv*, jintArray, jsize, jsize, const jint*) ;"),
758 |       "848": ("void        (*SetLongArrayRegion)"
759 |               "(JNIEnv*, jlongArray, jsize, jsize, const jlong*) ;"),
760 |       "852": ("void        (*SetFloatArrayRegion)"
761 |               "(JNIEnv*, jfloatArray, jsize, jsize, const jfloat*) ;"),
762 |       "856": ("void        (*SetDoubleArrayRegion)"
763 |               "(JNIEnv*, jdoubleArray, jsize, jsize, const jdouble*) ;"),
764 |       "860": ("jint        (*RegisterNatives)"
765 |               "(JNIEnv*, jclass, const JNINativeMethod*, jint) ;"),
766 |       "864": "jint        (*UnregisterNatives)"
767 |              "(JNIEnv*, jclass) ;",
768 |       "868": "jint        (*MonitorEnter)(JNIEnv*, jobject) ;",
769 |       "872": "jint        (*MonitorExit)(JNIEnv*, jobject) ;",
770 |       "876": "jint        (*GetJavaVM)(JNIEnv*, JavaVM**) ;",
771 |       "880": ("void        (*GetStringRegion)"
772 |               "(JNIEnv*, jstring, jsize, jsize, jchar*) ;"),
773 |       "884": ("void        (*GetStringUTFRegion)"
774 |               "(JNIEnv*, jstring, jsize, jsize, char*) ;"),
775 |       "888": ("void*       (*GetPrimitiveArrayCritical)"
776 |               "(JNIEnv*, jarray, jboolean*) ;"),
777 |       "892": ("void        (*ReleasePrimitiveArrayCritical)"
778 |               "(JNIEnv*, jarray, void*, jint) ;"),
779 |       "896": ("const jchar* (*GetStringCritical)"
780 |               "(JNIEnv*, jstring, jboolean*) ;"),
781 |       "900": ("void        (*ReleaseStringCritical)"
782 |               "(JNIEnv*, jstring, const jchar*) ;"),
783 |       "904": "jweak       (*NewWeakGlobalRef)(JNIEnv*, jobject) ;",
784 |       "908": "void        (*DeleteWeakGlobalRef)(JNIEnv*, jweak) ;",
785 |       "912": "jboolean    (*ExceptionCheck)(JNIEnv*) ;",
786 |       "916": "jobject     (*NewDirectByteBuffer)(JNIEnv*, void*, jlong) ;",
787 |       "920": "void*       (*GetDirectBufferAddress)(JNIEnv*, jobject) ;",
788 |       "924": "jlong       (*GetDirectBufferCapacity)(JNIEnv*, jobject) ;",
789 |       "928": "jobjectRefType (*GetObjectRefType)(JNIEnv*, jobject) ;"}
790 |   return jnienv
791 | 
792 | 
793 | def run_test():
794 |   """TEST to be sure that regex works fine."""
795 |   print("*"*20)
796 |   test_regex_push()
797 |   test_regex_ldr_jnie()
798 |   test_regex_opt_add()
799 |   test_regex_opt_mov()
800 |   test_regex_opt_lsl()
801 |   test_regex_mov()
802 |   test_regex_ldr_mtd1()
803 |   test_regex_ldr_mtd14()
804 |   test_regex_ldr_mtd2()
805 |   test_regex_ldr_mtd3()
806 |   test_regex_blx()
807 |   test_regex_pop()
808 | 
809 | 
810 | def test_regex_push():
811 |   """TEST REGEX_PUSH."""
812 |   print(REGEX_PUSH.match("PUSH    {R4, LR}"), "Should pass push")
813 |   print(REGEX_PUSH.match("PUSH    {R3-R5,LR}"), "Should pass push")
814 |   print(REGEX_PUSH.match("POP    {R14,LR}"), "Should Fail")
815 | 
816 | 
817 | def test_regex_ldr_jnie():
818 |   """TEST REGEX_LDR_JNIE."""
819 |   print(REGEX_LDR_JNIE.match("LDR     R4, [R0]"), "Should pass jni")
820 | 
821 | 
822 | def test_regex_opt_add():
823 |   """TEST regex_REGEX_OPT_ADD."""
824 |   print(REGEX_OPT_ADD.match("ADDS    R4, #0xFC"), "Should pass adds")
825 | 
826 | 
827 | def test_regex_opt_mov():
828 |   """TEST regex_REGEX_OPT_MOV."""
829 |   print(REGEX_OPT_MOV.match("MOVS    R3, #0x17C"), "Should pass movs")
830 | 
831 | 
832 | def test_regex_opt_lsl():
833 |   """TEST REGEX_OPT_LSL"""
834 |   print(REGEX_OPT_LSL.match("LSL R7, R7, #2"), "pass lsl")
835 |   print(REGEX_OPT_LSL.match("LSLS R7, R7, #2"), "pass lsls")
836 | 
837 | 
838 | def test_regex_mov():
839 |   """TEST regex_REGEX_MOV."""
840 |   print(REGEX_MOV.match("MOV     R12, R3"), "Should pass mov")
841 | 
842 | 
843 | def test_regex_ldr_mtd1():
844 |   """TEST REGEX_LDR_MTD1."""
845 |   print(REGEX_LDR_MTD1.match("LDR     R4, [R4,#45]"), "Should pass mtd1")
846 | 
847 | 
848 | def test_regex_ldr_mtd14():
849 |   """TEST REGEX_LDR_MTD4."""
850 |   print(REGEX_LDR_MTD4.match("LDR     R4, [R4,#0x7C]"), "Should pass mtd4")
851 | 
852 | 
853 | def test_regex_ldr_mtd2():
854 |   """TEST REGEX_LDR_MTD2."""
855 |   print(REGEX_LDR_MTD2.match("LDR     R3, [R4,R3]"), "Should pass mtd2")
856 | 
857 | 
858 | def test_regex_ldr_mtd3():
859 |   """TEST REGEX_LDR_MTD3."""
860 |   print(REGEX_LDR_MTD3.match("LDR     R3, [R4]"), "Should pass mtd3")
861 | 
862 | 
863 | def test_regex_blx():
864 |   """TEST REGEX_BLX."""
865 |   print(REGEX_BLX.match("BLX     R4"), "Should pass blx")
866 | 
867 | 
868 | def test_regex_pop():
869 |   """TEST REGEX_POP."""
870 |   print(REGEX_POP.match("POP     {R4,PC}"), "Should pass pop")
871 | 
872 | 
873 | def main():
874 |   """main function."""
875 |   extract_routines()
876 | 
877 | if __name__ == "__main__":
878 |   main()
879 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Maddie Stone
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # IDAPython Embedded Toolkit
  2 | The IDAPython Embedded Toolkit is a set of script to automate many of the steps associated
  3 | with statically analyzing, or reverse engineering, the firmware of embedded devices in IDA Pro. 
  4 | 
  5 | ## Presentations
  6 | The IDAPython Embedded Toolkit has been presented at the following venues:
  7 | * DerbyCon "IDAPython: The Wonder Woman of Embedded Device Reversing" -- September 2017<br/>
  8 |     Recording of Talk: http://www.irongeek.com/i.php?page=videos/derbycon7/t215-idapython-the-wonder-woman-of-embedded-device-reversing-maddie-stone <br/>
  9 |     Slides and Demo Videos from Presentation are available in the [presentations](presentations/) folder
 10 | * RECON Montreal "The Life-Changing Magic of IDAPython: Embedded Device Edition" -- June 2017 <br/>
 11 |     Recording of Talk: https://recon.cx/media-archive/2017/mtl/recon2017-mtl-20-maddie-stone-The-Life-Changing-Magic-of-IDAPython-Embedded-Device-Edition.mp4
 12 |     Slides and Demo Videos from Presentation are available in the [presentations](presentations/) folder
 13 | 
 14 | # Getting Started
 15 | To understand how and why the IDAPython Embedded Toolkit was created, check out the slides and recording from 
 16 | the DerbyCon or RECON Presentations. 
 17 | 
 18 | The IDAPython Embedded Toolkit is a set of IDAPython scripts written to be processor/architecture-agnostic
 19 | and automate the triage, analysis, and annotation processes associated with reversing the firmware 
 20 | image of an embedded device. The currently available scripts: 
 21 | * TRIAGE<a name="triage"></a>
 22 |     * [Define Code & Functions](https://github.com/maddiestone/IDAPythonEmbeddedToolkit/blob/master/define_code_functions.py)
 23 |     * [Define Data](https://github.com/maddiestone/IDAPythonEmbeddedToolkit/blob/master/define_data_as_types.py)
 24 |     * [Define Strings](https://github.com/maddiestone/IDAPythonEmbeddedToolkit/blob/master/make_strings.py)
 25 | * ANALYSIS<a name="analysis"></a>
 26 |     * [Calculate Indirect Offset Memory Accesses](https://github.com/maddiestone/IDAPythonEmbeddedToolkit/blob/master/data_offset_calc.py)
 27 |     * [Find Memory Accesses](https://github.com/maddiestone/IDAPythonEmbeddedToolkit/blob/master/find_mem_accesses.py)
 28 | * ANNOTATE<a name="annotate"></a>
 29 |     * [Identify GPIO Usage](https://github.com/maddiestone/IDAPythonEmbeddedToolkit/blob/master/identify_port_use_locations.py)
 30 |     * [Identify "Dead" Code](https://github.com/maddiestone/IDAPythonEmbeddedToolkit/blob/master/label_funcs_with_no_xrefs.py)
 31 |     * [Trace Operand Use](https://github.com/maddiestone/IDAPythonEmbeddedToolkit/blob/master/identify_operand_locations.py)
 32 | 
 33 | Each script is written to be processor/architecture-agnostic, but in some scripts, this requires a regular expression
 34 | to address each architecture's specific-syntax. Before running the scripts, verify that the architecture of the firmware 
 35 | image to be analyzed is supported in the script.Please see [Architecture Agnostic Structure of Scripts](#archagnostic) for more details.
 36 | The IDAPython Embedded Toolkit only becomes more powerful, the more processors that are supported, so please submit a pull request
 37 | as you add new processors.
 38 | 
 39 | To run a script, you must have IDA Pro 6.95 installed. Open the IDA database on which you'd like to run a script and then
 40 | select File > Script File... and select the script to run.
 41 | 
 42 | ## Versioning
 43 | Currently, the IDAPython Embedded Toolkit has only been tested on IDA Pro 6.95. Testing on IDA 
 44 | Pro 7.0 is currently in process.
 45 |     
 46 | ## Installation/ Usage
 47 | If you completed the default installation for IDA Pro, then IDAPython should be installed.
 48 | You can verify by checking your IDA directory for a Python/ folder. If that is there, IDAPython
 49 | is installed.
 50 | 
 51 | Otherwise, install IDAPython per: https://github.com/idapython/src
 52 | 
 53 | Once IDAPython is installed, the IDAPython Embedded Toolkit scripts may be run by opening an
 54 | IDA database and selecting File > Script file... from the upper menu. Then, select the script to run.
 55 | Each script is run individually by selecting it through this process.
 56 | 
 57 | ## Architecture Agnostic Structure of Scripts<a name="archagnostic"></a>
 58 | The scripts in the IDAPython Embedded Toolkit are written to be architecture and processor-agnostic. 
 59 | This is done by finding the common structure and processes that are not dependent on architecture-specific syntax.
 60 | For the scripts that require processor-specific syntax (for example: Special Function Register Names or Instruction Syntax),
 61 | regular expressions are used for each architecture. For more information on how to write regular expressions in Python:
 62 | https://docs.python.org/2/library/re.html 
 63 | 
 64 | Thanks to the contribution by @tmr232, each script auto-identifies the architecture in use and selects the correct set of 
 65 | regular expressions using the IDAPython function: 
 66 | `processor_name = idaapi.get_inf_structure().procName `
 67 | 
 68 | ### Add a Processor to a Script
 69 | If the processor-in-use does not have regular expressions defined within the script, then the script will exit with an 
 70 | "Unsupported Processor Type" error. To make the script work, you simply need to add the required regular expression. To do this:
 71 | 1. Determine IDA's string representation of the processor. In the bottom console bar, type the following 
 72 | command as shown in the image below: `idaapi.get_inf_structure().procName` The command will output a string. That string is the processor name. <br/><br/>
 73 | ![Image of Command to Get Processor](images/getProcessorScreenShot.png)
 74 | <br/><br/>
 75 | 2. Add an elif statement to the script with the processor name output in Step 1.
 76 | 3. Copy the regular expression assignments from another one of the processor's and customize them for the new processor being added.
 77 | The Python documentation for regular expressions is [here.](https://docs.python.org/2/library/re.html) Each script that utilizes 
 78 | processor-specific regular expressions describes what the regular expression is describing in the header of the script. 
 79 | 
 80 | Example of the Regular Expressions for Processor-Specific Syntax in define_code_functions.py
 81 | ```
 82 | ################### USER DEFINED VALUES ###################
 83 | # Enter a regular expression for how this architecture usually 
 84 | # begins and ends functions. If the architecture does not 
 85 | # dictate how to start or end a function use r".*" to allow
 86 | # for any instruction.
 87 | #
 88 | processor_name = idaapi.get_inf_structure().procName
 89 | 
 90 | if processor_name == '8051':	# 8051 Architecture Prologue and Epilogue   	smart_prolog = re.compile(r".*")	
 91 | 	smart_epilog = re.compile(r"reti{0,1}")
 92 | elif processor_name == 'PIC18Cxx':	# PIC18 Architecture Prologue and Epilogue	
 93 | 	smart_prolog = re.compile(r".*")	
 94 | 	smart_epilog = re.compile(r"return  0")
 95 | elif processor_name == 'm32r':	# Mitsubishi M32R Architecutre Prologue and Epilogue
 96 | 	smart_prolog = re.compile(r"push +lr")
 97 | 	smart_epilog = re.compile(r"jmp +lr.*")
 98 | elif processor_name == 'TMS32028':	# Texas Instruments TMS320C28x	
 99 | 	smart_prolog = re.compile(r".*")	
100 | 	smart_epilog = re.compile(r"lretr")
101 | elif processor_name == 'AVR':	# AVR	
102 | 	smart_prolog = re.compile(r"push +r")	
103 | 	smart_epilog = re.compile(r"reti{0,1}")
104 | else:	
105 | 	print "[define_code_functions.py] UNSUPPORTED PROCESSOR. Processor = %s is unsupported. Exiting." % processor_name	
106 | 	raise NotImplementedError('Unsupported Processor Type.')
107 | ```
108 | 
109 | ## Scripts in the IDAPython Embedded Toolkit
110 | * **data_offset_calc.py -- Resolve Indirect Offset Memory Accesses**
111 | Resolves the references to indirect offsets of a variable, register, or memory location
112 | whose value is known. Changes the display of the operand in the instruction (OpAlt function),
113 | creates a data cross references (add_dref), and creates a comment of the resolved address
114 | (MakeComment). User nees to define the following:
115 | 		offset_var_string: The string representation of the variable, register, or memory
116 | 							location to be replaced by the resolved value
117 | 		offset_var_value:	The value of the variable defined in offset_var_string
118 | 		reg_ex_indirect:	A regular expression of how indirect offset accesses to the variable
119 | 		reg_ex_immediate:	A regular expression of how the immediate offset value is represented
120 | 		new_opnd_display:	A string representation of how the calculated and resolved 
121 | 							value should be displayed as the operand in the instruction
122 | 
123 | For example, let's say we have firmware where fp = 0x808000 and the majority of memory accesses are as 
124 | offsets from fp. This script will calculate that the instruction is reading 0x80C114, create a cross-reference
125 | to that location, and replace the operand in the instruction with this calculated value as shown below.
126 | ```
127 | ld      R1, @(0x4114, fp)   -->     ld      R1, @[0x80C114]
128 | add3    R10, fp, 0x4147     -->     add3    R10, fp, 0x4147;    @[0x80C147]
129 | ```
130 | 
131 | * **define_code_functions.py -- Define Code and Functions**
132 | This script scans an area of the database from the user input "start address" to "end address"
133 | defining the bytes as code and attempting to define functions from that code. The script
134 | is architecture agnostic by having the user define a regular expression for the "function prologue"
135 | and the "function epilogue" for the architecture being analyzed.
136 | 
137 | * **define_data_as_types.py -- Define a Block as Data**
138 | Defines a segment of addresses as the user-specified data type (byte, word, or double word).
139 | The byte length for each of these types is architecture dependent, but generally:
140 | 	1 byte  = Byte
141 |       2 bytes = Word
142 | 	4 bytes = Double Word
143 | This script with undefine all bytes in the range first which means if you previously had
144 | code or strings defined in the area, they will be overwritten as data.
145 | 
146 | * **make_strings.py -- Define a Block as Strings**
147 | This script is used to search for and declare blocks of "Unexplored" bytes as ASCII strings. 
148 | The user inserts the starting and ending address of the areas to be analyzed. The script then
149 | checks if each byte is an ASCII character value and ends with a defined "ending string character."
150 | In this example, the ending string characters are 0xD, 0xA, and 0x00. The script only checks 
151 | "undefined or unexplored" values in the database. For example, if a string is currently 
152 | defined as code, it will not identify this string. This is to protect previously defined values. 
153 | 
154 | * **label_funcs_with_no_xrefs.py -- Label All Functions without Cross-References/ Valid Code Paths**
155 | This script identifies what could be "dead code". It checks each function for cross-references to 
156 | the function in question. If there are none, it adds the prefix "noXrefs_" to the function name. This
157 | is very efficient for architectures that do not call functions indirectly.
158 | 
159 | * **identify_port_use_locations.py -- Find All CPU Port Usage**
160 | Identifies all code using the CPU's ports and records the address and instruction
161 | in the identified file. There is the option to annotate each function that accesses a CPU port/pin
162 | with a prefix stating that it's using the specific port/pin.
163 | 
164 | * **find_mem_acceses.py -- Identify Memory Accesses**
165 | Identifies the memory accesses used in the code. When a memory access is identified based
166 | on the user contributed regular expression, this script completes three different actions
167 | to help with the static analysis:
168 | 	1. 	A cross reference is created between the instruction and the memory address. This 
169 | 		will fail if the address doesn't currently exist because the segment was not created.
170 |  	2. 	The value at the memory address is retrieved and added as a comment to the 
171 | 		referencing instruction.
172 | 	3. 	A dictionary of all of the memory addresses accessed and the referencing instructions'
173 | 		addresses are printed and saved to a file.
174 | * **identify_operand_locations.py -- Identify Instructions that Reference an Operand**
175 | Identifies the instructions in the range start_addr to end_addr that reference the 
176 | input operand (regex_operand). The addresses of all instructions where the operand is 
177 | found are printed to the IDA output window and saved to a file.
178 | 
179 | ## Copyright
180 | Copyright 2017 The Johns Hopkins University Applied Physics Laboratory LLC
181 | All rights reserved.
182 | Permission is hereby granted, free of charge, to any person obtaining a copy of this 
183 | software and associated documentation files (the "Software"), to deal in the Software 
184 | without restriction, including without limitation the rights to use, copy, modify, 
185 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 
186 | permit persons to whom the Software is furnished to do so.
187 | 
188 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
189 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
190 | PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
191 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
192 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
193 | OR OTHER DEALINGS IN THE SOFTWARE.
194 | 


--------------------------------------------------------------------------------
/data_offset_calc.py:
--------------------------------------------------------------------------------
  1 | ##############################################################################################
  2 | # Copyright 2017 The Johns Hopkins University Applied Physics Laboratory LLC
  3 | # All rights reserved.
  4 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this 
  5 | # software and associated documentation files (the "Software"), to deal in the Software 
  6 | # without restriction, including without limitation the rights to use, copy, modify, 
  7 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 
  8 | # permit persons to whom the Software is furnished to do so.
  9 | #
 10 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
 11 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
 12 | # PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
 13 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
 14 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
 15 | # OR OTHER DEALINGS IN THE SOFTWARE.
 16 | 
 17 | ##############################################################################################
 18 | # data_offset_calc.py
 19 | # Resolves the references to indirect offsets of a variable, register, or memory location
 20 | # whose value is known. Changes the display of the operand in the instruction (OpAlt function),
 21 | # creates a data cross references (add_dref), and creates a comment of the resolved address
 22 | # (MakeComment). User nees to define the following:
 23 | # 			offset_var_string: The string representation of the variable, register, or memory
 24 | #								location to be replaced by the resolved value
 25 | #			offset_var_value:	The value of the variable defined in offset_var_string
 26 | #			reg_ex_indirect:	A regular expression of how indirect offset accesses to the variable
 27 | #			reg_ex_immediate:	A regular expression of how the immediate offset value is represented
 28 | #			new_opnd_display:	A string representation of how the calculated and resolved 
 29 | #								value should be displayed as the operand in the instruction
 30 | #
 31 | # Inputs: 	start_addr: 	Start address for segment to define as data
 32 | #			end_addr:		End address for segment to define as data
 33 | #
 34 | ##############################################################################################
 35 | import re
 36 | 
 37 | 
 38 | ################### USER DEFINED VALUES ###################
 39 | # String of the variable/register/location used as the indirect variable
 40 | offset_var_str = "fp"
 41 | 
 42 | # The defined offset_var_str's value			
 43 | offset_var_value = 0x808000		
 44 | 
 45 | # Regular expression for out offset_var_str is referenced indirectly in the IDA Disassembly
 46 | # @(-0x(1-8 hex chars), fp )
 47 | reg_ex_indirect = re.compile(r"@\(-?0x[0-9A-Fa-f]{1,8}, "+ offset_var_str +"\)")
 48 | 
 49 | # Regular expression for how immediate values are shown in the indirect reference
 50 | # For this example, it's 0x1044, but some architectures would show that as 1044h
 51 | regex_immediate = re.compile(r"0x[0-9A-Fa-f]{1,8}")
 52 | 
 53 | # String expression for how the newly calculated instruction should be displayed within the instruction
 54 | new_opnd_display =  '@[0x%x]' 
 55 | 
 56 | # OPTIONAL ---- EXAMPLE FOR ADDING OTHER INSTRUCTIONS TO THE PROCESSING
 57 | # If you'd like to add other instructions to be processed for resolving indirect offset accesses,
 58 | # update the regular expression here and use it as shown in the "else" block below
 59 | reg_ex_add3 = re.compile(r"add3    \w\w, fp, #-?0x[0-9A-Fa-f]{1,8}")
 60 | #############################################################
 61 | 
 62 | start_addr = AskAddr(MinEA(), "Please enter the starting address for the data to be analyzed.")
 63 | end_addr = AskAddr(MaxEA(), "Please enter the ending address for the data to be analyzed.")
 64 | 
 65 | if ((start_addr is not None and end_addr is not None) and (start_addr != BADADDR and end_addr != BADADDR) and start_addr < end_addr):
 66 | 	print "[data_offset_calc.py] STARTING. Looking for indirect accesses across 0x%x to 0x%x" % (start_addr, end_addr)
 67 | 	curr_addr = start_addr;
 68 | 	while curr_addr < end_addr:
 69 | 		operand = GetOpnd(curr_addr, 1)							# Operand = 2nd Operand in the Instruction at curr_addr
 70 | 		if reg_ex_indirect.match(operand):
 71 | 			print ('[data_offset_calc.py] 0x%x Operand: ' % curr_addr) + operand
 72 | 			
 73 | 			# This checks if there are any immediate values also in the 2nd operand with the variable. For example, mov R3, @(0x10, fp)
 74 | 			offset = re.findall(regex_immediate, operand) 		
 75 | 			if (offset):
 76 | 				print "[data_offset_calc.py] 0x%x Offset: 0x%x" % (curr_addr, int(offset[0],16))
 77 | 		
 78 | 				# Check if Immediate Operand is Neg or Pos
 79 | 				if '-' in operand :
 80 | 					new_opnd = offset_var_value - int(offset[0], 16)
 81 | 				else:
 82 | 					new_opnd = offset_var_value + int(offset[0], 16)
 83 | 				
 84 | 				print ("[data_offset_calc.py] 0x%x: Offset + " + offset_var_str + " = 0x%0x") % (curr_addr, new_opnd)
 85 | 				OpAlt(curr_addr, 1, new_opnd_display % new_opnd)	# Changes Display of Instruction
 86 | 				result = add_dref(curr_addr, new_opnd, dr_T)			# Create Data Ref -- Using dref_T because not checking if read or write	
 87 | 				print ("[data_offset_calc.py] Creating dref from 0x%x to 0x%x: " % (curr_addr, new_opnd)) + str(result)
 88 | 			# Using dr_O (O as in Offset, not 0) because we are not check if this a "write" or "read"
 89 | 			else:													
 90 | 				print "[data_offset_calc.py] 0x%x: No immediate offset identified." % curr_addr													
 91 | 
 92 | 	#####################################################################################
 93 | 	# This block is optional but shows how to add additional regular expressions for other instructions
 94 | 	# you'd like to match besides the general indirect offset acceses. For M32R we are also matching
 95 | 	# the add3 instruction that take the form "add3  Reg, fp, 0xNUM"
 96 | 
 97 | 		else:
 98 | 			instruct = GetDisasm(curr_addr)
 99 | 			if reg_ex_add3.match(instruct):
100 | 				print ('[data_offset_calc.py] 0x%08x Instruct: ' % curr_addr) + instruct
101 | 				immed_opnd = GetOpnd(curr_addr, 2)				# Getting the 3rd Operand Based on the reg_ex_add3 defined above
102 | 				offset = re.findall(regex_immediate, immed_opnd);
103 | 				if offset:
104 | 					if '-' in immed_opnd:
105 | 						new_opnd = offset_var_value - int(offset[0], 16)
106 | 					else:
107 | 						new_opnd = offset_var_value + int(offset[0], 16)
108 | 					print '[data_offset_calc.py] 0x%x: Offset + fp = 0x%08x' % (curr_addr, new_opnd)
109 | 					MakeComm(curr_addr, '0x%08x' % new_opnd) 		# Add comment with new operand instead of overwriting instruction as done above
110 | 					result = add_dref(curr_addr, new_opnd, dr_T) 	# Creates Data XREF from Instruct to Calculated Val
111 | 					print ("[data_offset_calc.py] Creating dref from 0x%x to 0x%x: " % (curr_addr, new_opnd)) + str(result)
112 | 				else:
113 | 					print "[data_offset_calc.py] 0x%x: No immediate offset identified." % curr_addr
114 | 	########################################################################################
115 | 		prev = curr_addr
116 | 		curr_addr = NextHead(curr_addr, 0xFFFFF)
117 | 		if (curr_addr == BADADDR):
118 | 			print "[data_offset_calc.py] EXITING."
119 | 			break 
120 | else:
121 | 	print "[data_offset_calc.py] QUITTING. Invalid values entered for starting and ending addresses."
122 | 


--------------------------------------------------------------------------------
/define_code_functions.py:
--------------------------------------------------------------------------------
  1 | ##############################################################################################
  2 | # Copyright 2017 The Johns Hopkins University Applied Physics Laboratory LLC
  3 | # All rights reserved.
  4 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this 
  5 | # software and associated documentation files (the "Software"), to deal in the Software 
  6 | # without restriction, including without limitation the rights to use, copy, modify, 
  7 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 
  8 | # permit persons to whom the Software is furnished to do so.
  9 | #
 10 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
 11 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
 12 | # PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
 13 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
 14 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
 15 | # OR OTHER DEALINGS IN THE SOFTWARE.
 16 | 
 17 | ##############################################################################################
 18 | # define_code_functions.py
 19 | # Attempts to define the bytes in the user-entered address range as code and then as functions
 20 | # based on the user-define smart_prolog and smart_epilog regular expressions for that architecture.
 21 | #
 22 | # Inputs: 	start_addr: 	Start address for segment to define as data
 23 | #			end_addr:		End address for segment to define as data
 24 | #			data_type:		Type of data to set segment to (dependent on architecture)
 25 | #
 26 | ##############################################################################################
 27 | import re
 28 | import idaapi
 29 | 
 30 | ################### USER DEFINED VALUES ###################
 31 | # Enter a regular expression for how this architecture usually begins and ends functions.
 32 | # If the architecture does not dictate how to start or end a function use r".*" to allow
 33 | # for any instruction
 34 | 
 35 | processor_name = idaapi.get_inf_structure().procName
 36 | 
 37 | 
 38 | if processor_name == '8051':
 39 | 	# 8051 Architecture Prologue and Epilogue
 40 | 	smart_prolog = re.compile(r".*")
 41 | 	smart_epilog = re.compile(r"reti{0,1}")
 42 | 
 43 | elif processor_name == 'PIC18Cxx':
 44 | 	# PIC18 Architecture Prologue and Epilogue
 45 | 	smart_prolog = re.compile(r".*")
 46 | 	smart_epilog = re.compile(r"return  0")
 47 | 
 48 | elif processor_name == 'm32r':
 49 | 	# Mitsubishi M32R Architecutre Prologue and Epilogue
 50 | 	smart_prolog = re.compile(r"push +lr")
 51 | 	smart_epilog = re.compile(r"jmp +lr.*")
 52 | 
 53 | elif processor_name == 'TMS32028':
 54 | 	# Texas Instruments TMS320C28x
 55 | 	smart_prolog = re.compile(r".*")
 56 | 	smart_epilog = re.compile(r"lretr")
 57 | 
 58 | elif processor_name == 'AVR':
 59 | 	# AVR
 60 | 	smart_prolog = re.compile(r"push +r")
 61 | 	smart_epilog = re.compile(r"reti{0,1}")
 62 | 
 63 | else:
 64 | 	print "[define_code_functions.py] UNSUPPORTED PROCESSOR. Processor = %s is unsupported. Exiting." % processor_name
 65 | 	raise NotImplementedError('Unsupported Processor Type.')
 66 | 
 67 | print "[define_code_functions.py] Processor = %s -- Reg Expressions Selected. Proceeding." % processor_name
 68 | ############################################################
 69 | 
 70 | start_addr = AskAddr(MinEA(), "Please enter the starting address for the data to be defined.")
 71 | end_addr = AskAddr(MaxEA(), "Please enter the ending address for the data to be defined.")
 72 | 
 73 | if ((start_addr is not None and end_addr is not None) and (start_addr != BADADDR and end_addr != BADADDR)):
 74 | 	do_make_unk = AskYN(0, "Do you want to make all of the code block UNKNOWN first?")
 75 | 	if (do_make_unk == 1):
 76 | 		curr_addr = start_addr
 77 | 		while (curr_addr < end_addr):
 78 | 			MakeUnkn(curr_addr,idc.DOUNK_SIMPLE)
 79 | 			curr_addr += 1
 80 | 	if (do_make_unk != -1):
 81 | 		curr_addr = start_addr
 82 | 		print "[make_code_functions.py] Running script to define code and functions on 0x%x to 0x%x" % (start_addr, end_addr)
 83 | 		while (curr_addr < end_addr):
 84 | 			next_unexplored = FindUnexplored(curr_addr, idaapi.BIN_SEARCH_FORWARD)
 85 | 			MakeCode(next_unexplored)		# We don't care whether it succeeds or fails so not storing retval
 86 | 			curr_addr = next_unexplored
 87 | 
 88 | 		# Finished attempting to make all unexplored bytes into code
 89 | 		# Now, attempt to create functions of all code not currently in a function
 90 | 		print "[make_code_functions.py] Completed attempting to define bytes as code. Now trying to define functions."
 91 | 		curr_addr = start_addr
 92 | 		while (curr_addr != BADADDR and curr_addr < end_addr):
 93 | 			if (isCode(GetFlags(curr_addr)) and GetFunctionAttr(curr_addr, FUNCATTR_START) == BADADDR):
 94 | 					#print "Function Stuffs 0x%0x" % curr_addr
 95 | 					if(smart_prolog.match(GetDisasm(curr_addr)) or smart_epilog.match(GetDisasm(PrevHead(curr_addr)))):
 96 | 						#print "Smart Prolog match"
 97 | 						if (MakeFunction(curr_addr) != 0):	
 98 | 							# MakeFunction(curr_addr) was successful so set curr_addr to next addr after the new function
 99 | 							curr_addr = GetFunctionAttr(curr_addr, FUNCATTR_END)	# Returns first address AFTER the end of the function
100 | 							continue
101 | 			curr_addr = NextHead(curr_addr)
102 | else:
103 | 	print "[make_code_functions.py] Quitting. Entered address values are not valid."
104 | 	
105 | 		
106 | 
107 |         
108 | 


--------------------------------------------------------------------------------
/define_data_as_types.py:
--------------------------------------------------------------------------------
 1 | ##############################################################################################
 2 | # Copyright 2017 The Johns Hopkins University Applied Physics Laboratory LLC
 3 | # All rights reserved.
 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this 
 5 | # software and associated documentation files (the "Software"), to deal in the Software 
 6 | # without restriction, including without limitation the rights to use, copy, modify, 
 7 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 
 8 | # permit persons to whom the Software is furnished to do so.
 9 | #
10 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
11 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
12 | # PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
13 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
14 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
15 | # OR OTHER DEALINGS IN THE SOFTWARE.
16 | 
17 | ##############################################################################################
18 | # define_data_as_types.py
19 | # Defines a segment of addresses as the user-specified data type (byte, word, or double word).
20 | # The byte length for each of these types is architecture dependent, but generally:
21 | #		1 byte  = Byte
22 | #       2 bytes = Word
23 | #		4 bytes = Double Word
24 | # This script with undefine all bytes in the range first which means if you previously had
25 | # code or strings defined in the area, they will be overwritten as data.
26 | #
27 | # Inputs: 	start_addr: 	Start address for segment to define as data
28 | #			end_addr:		End address for segment to define as data
29 | #			data_type:		Type of data to set segment to (dependent on architecture)
30 | #
31 | ##############################################################################################
32 | 
33 | def define_as_data_by_size_for_block(start_addr, end_addr, data_size):
34 | 	curr_addr = start_addr;
35 | 	while (curr_addr < end_addr):
36 | 		if (data_size == 1):
37 | 			MakeByte(curr_addr)
38 | 		elif (data_size == 2):
39 | 			MakeWord(curr_addr)
40 | 		elif (data_size == 4):
41 | 			MakeDword(curr_addr)
42 | 		else:
43 | 			Warning("Invalid data_size. Breaking.")
44 | 			break;
45 | 		curr_addr += data_size
46 | 
47 | start_addr = AskAddr(MinEA(), "Please enter the starting address for the data to be defined.")
48 | end_addr = AskAddr(MaxEA(), "Please enter the ending address for the data to be defined.")
49 | 
50 | if ((start_addr is not None and end_addr is not None) and (start_addr != BADADDR and end_addr != BADADDR) and start_addr < end_addr):
51 | 	data_size = AskLong(1, "Enter the size of each data item to be defined in the address block.\nExample: '1' for byte, '2' for word, '4' for dword\nNote the exact implementation will be dependent on architecture.")
52 | 	if (data_size == 1 or data_size ==  2 or data_size == 4):
53 | 		print ("[define_data_as_types.py] STARTING. start_addr: 0x%X, end_addr: 0x%X, data_size: %d" % (start_addr, end_addr, data_size))
54 | 		MakeUnknown(start_addr, (end_addr - start_addr), DOUNK_SIMPLE)
55 | 		print "[define_data_as_types.py] Undefined all data between 0x%X and 0x%0X" % (start_addr, end_addr)
56 | 		print "[define_data_as_types.py] Defining all data as size " + str(data_size) 
57 | 		define_as_data_by_size_for_block(start_addr, end_addr, data_size)
58 | 		print "[define_data_as_types.py] FINISHED."	
59 | 	else:
60 | 		Warning("[define_data_as_types.py] You entered a size of %d bytes. Please enter 1 (byte), 2 (short/word), 4(long, dword)");	
61 | 
62 | else:
63 | 	print "[define_data_as_types.py] ERROR. Please enter valid address values."


--------------------------------------------------------------------------------
/find_mem_accesses.py:
--------------------------------------------------------------------------------
 1 | ##############################################################################################
 2 | # Copyright 2017 The Johns Hopkins University Applied Physics Laboratory LLC
 3 | # All rights reserved.
 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this 
 5 | # software and associated documentation files (the "Software"), to deal in the Software 
 6 | # without restriction, including without limitation the rights to use, copy, modify, 
 7 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 
 8 | # permit persons to whom the Software is furnished to do so.
 9 | #
10 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
11 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
12 | # PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
13 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
14 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
15 | # OR OTHER DEALINGS IN THE SOFTWARE.
16 | 
17 | ##############################################################################################
18 | # find_mem_accesses.py
19 | # 
20 | # Identifies the memory accesses used in the code. When a memory access is identified based
21 | # on the user contributed regular expression, this script completes three different actions
22 | # to help with the static analysis:
23 | # 	1. 	A cross reference is created between the instruction and the memory address. This 
24 | #		will fail if the address doesn't currently exist because the segment was not created.
25 | # 	2. 	The value at the memory address is retrieved and added as a comment to the 
26 | #		referencing instruction.
27 | #	3. 	A dictionary of all of the memory addresses accessed and the referencing instructions'
28 | #		addresses are printed and saved to a file.
29 | # ** NOTE:If you are using a Harvard architecture, ensure you can distinguish between memory
30 | # spaces or comment out the cross-reference and value parts of this script.
31 | #
32 | # Inputs: 	start_addr: 	Start address for segment to define as data
33 | #			end_addr:		End address for segment to define as data
34 | #			file_name:		File to write the accesses to
35 | #
36 | ##############################################################################################
37 | import re
38 | 
39 | ################### USER DEFINED VALUES ###################
40 | # Enter a regular expression for the memory access instructions you'd like to identify. 
41 | # Also enter the index of the operand in the instruction so that it can be retrieved via
42 | # the GetOperandValue() function. 
43 | #
44 | 
45 | processor_name = idaapi.get_inf_structure().procName
46 | 
47 | if processor_name == "8051":
48 | 	# 8051 (movx DPTR, #addr)
49 | 	regex_mem_instruct = re.compile(r"mov +DPTR, #")
50 | 	operand_index = 1
51 | else:
52 | 	print "[find_mem_accesses.py] UNSUPPORTED PROCESSOR. Processor = %s is not supported. Exiting." % processor_name
53 | 	raise NotImplementedError("Unsupported Processor Type")
54 | 
55 | print "[find_mem_accesses.py] Processor = %s -- Reg Expressions Selected. Proceeding." % processor_name
56 | 
57 | ############################################################
58 | 
59 | 
60 | start_addr = AskAddr(MinEA(), "Please enter the starting address for the code to be analyzed.")
61 | end_addr = AskAddr(MaxEA(), "Please enter the ending address for the code to be analyzed.")
62 | 
63 | default_fn = "memory_use_locations.txt"
64 | filename = AskFile(1, default_fn, "Please choose the location to save the memory accesses file.")
65 | 
66 | accesses_dict = {}
67 | 
68 | if ((start_addr is not None and end_addr is not None) and (start_addr != BADADDR and end_addr != BADADDR) and start_addr < end_addr):
69 | 	curr_addr = start_addr
70 | 	while (curr_addr < end_addr):
71 | 		if (regex_mem_instruct.match(GetDisasm(curr_addr))):
72 | 			#mem_addr = regex_mem_addr.match(GetDisasm(curr_addr))
73 | 			mem_addr = GetOperandValue(curr_addr, operand_index)
74 | 			print "[find_mem_accesses.py] Instruction Address: 0x%x Operand Address: 0x%0x" % (curr_addr, mem_addr)
75 | 			# Create Cross-Reference to Address
76 | 			result = add_dref(curr_addr, mem_addr, dr_T)
77 | 			if (not result):
78 | 				print "[find_mem_accesses.py] Could NOT create data cross-references."
79 | 			else:
80 | 				# Try to Get Value at Memory Address and Record at Reference
81 | 				# Defaulting to WORD (2 bytes) can change or add other intelligence here
82 | 				value = Word(mem_addr)
83 | 				MakeComm(curr_addr, "@[0x%x] = 0x%x" % (mem_addr, value))
84 | 				if (mem_addr in accesses_dict):
85 | 					accesses_dict[mem_addr].append(curr_addr)
86 | 				else:
87 | 					accesses_dict[mem_addr] = [curr_addr, ]
88 | 		curr_addr = NextHead(curr_addr)
89 | 	print "[find_mem_accesses.py] Finished searching range. Writing to file."
90 | 	with open(filename, "w") as out_file:
91 | 		for key in sorted(accesses_dict.keys()):
92 | 			out_file.write("0x%0x: \n" % key)
93 | 			for ref in accesses_dict[key]:
94 | 				out_file.write("\t0x%0x \n" % ref)
95 | else:
96 | 	print "[find_mem_accesses.py] ERROR. Please enter valid addresses." 
97 | 									
98 | 			


--------------------------------------------------------------------------------
/identify_operand_locations.py:
--------------------------------------------------------------------------------
 1 | ##############################################################################################
 2 | # Copyright 2017 The Johns Hopkins University Applied Physics Laboratory LLC
 3 | # All rights reserved.
 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this 
 5 | # software and associated documentation files (the "Software"), to deal in the Software 
 6 | # without restriction, including without limitation the rights to use, copy, modify, 
 7 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 
 8 | # permit persons to whom the Software is furnished to do so.
 9 | #
10 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
11 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
12 | # PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
13 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
14 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
15 | # OR OTHER DEALINGS IN THE SOFTWARE.
16 | 
17 | ##############################################################################################
18 | # identify_operand_locations.py
19 | # Identifies the instructions in the range start_addr to end_addr that reference the 
20 | # input operand (regex_operand). The addresses of all instructions where the operand is 
21 | # found are printed to the IDA output window and saved to a file.
22 | #
23 | # User-Defined Input: 	
24 | # ** Before use, edit the regex_operand regular expression to match how the operand of interest
25 | # is displayed in instructions.
26 | # ** If you do not want to search the entire database, change start_addr and end_addr.
27 | #
28 | ##############################################################################################
29 | 
30 | # UPDATE THIS VALUE FOR THE REFERENCES TO THE OPERAND YOU'RE LOOKING FOR
31 | regex_operand = re.compile(r"\[ebp+arg_4\]")
32 | 
33 | start_addr = AskAddr(MinEA(), "Please enter the starting address for the code to be analyzed.")
34 | end_addr = AskAddr(MaxEA(), "Please enter the ending address for the code to be analyzed.")
35 | 
36 | default_fn = "operand_locations.txt"
37 | filename = AskFile(1, default_fn, "Please choose the location to save the operand use locations file.")
38 | 
39 | curr_addr = start_addr
40 | with open(filename, "w") as out_file:
41 | 	while curr_addr < end_addr:
42 | 		if (isCode(GetFlags(curr_addr))):
43 | 			instruct = GetDisasm(curr_addr);
44 | 			if regex_operand.search(instruct):
45 | 				out_file.write(("0x%08x: " % curr_addr) + instruct);
46 | 				print ("0x%08x: " % curr_addr) + instruct
47 | 		curr_addr = NextHead(curr_addr)
48 | 			
49 | 			


--------------------------------------------------------------------------------
/identify_port_use_locations.py:
--------------------------------------------------------------------------------
 1 | ##############################################################################################
 2 | # Copyright 2017 The Johns Hopkins University Applied Physics Laboratory LLC
 3 | # All rights reserved.
 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this 
 5 | # software and associated documentation files (the "Software"), to deal in the Software 
 6 | # without restriction, including without limitation the rights to use, copy, modify, 
 7 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 
 8 | # permit persons to whom the Software is furnished to do so.
 9 | #
10 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
11 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
12 | # PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
13 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
14 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
15 | # OR OTHER DEALINGS IN THE SOFTWARE.
16 | 
17 | ##############################################################################################
18 | # identify_ port_use_locations.py
19 | # Identifies all code using the CPU's ports and records the address and instruction
20 | # in the identified file.
21 | #
22 | # User-Defined Input: 	
23 | # ** Before use, edit the regex_pinref regular expression to match how the ports are displayed
24 | # in instructions for your architecture.
25 | #
26 | ##############################################################################################
27 | 
28 | 
29 | ################### USER DEFINED VALUES ###################
30 | # PIC18F8722
31 | regex_pinref = re.compile(r" PORT[A-H]")
32 | 
33 | # 87C52 (8051) - Ports referenced as FSR_80 (P0)...FSR_B0 (P3)
34 | #regex_pinref = re.compile(r" FSR_[8-9A-Ba-b]0.?[0-7]?")
35 | 
36 | # C515 (8051) - Ports referened as P1 or P1_8
37 | #regex_pinref = re.compile(r" P\d+\_?\d+")
38 | 
39 | # M32R
40 | #regex_pinref = re.compile(r"
41 | ############################################################
42 | 
43 | 
44 | 
45 | start_addr = AskAddr(MinEA(), "Please enter the starting address for the code to be analyzed.")
46 | end_addr = AskAddr(MaxEA(), "Please enter the ending address for the code to be analyzed.")
47 | 
48 | default_fn = "port_use_locations.txt"
49 | filename = AskFile(1, default_fn, "Please choose the location to save the port use locations file.")
50 | 
51 | change_func_nm = AskYN(0, "Would you like to append a prefix to the names of funcs using ports?")
52 | 
53 | curr_addr = start_addr
54 | func_name_out = False
55 | with open(filename, "w") as out_file:
56 | 	while curr_addr < end_addr:
57 | 		if (isCode(GetFlags(curr_addr))):
58 | 			instruct = GetDisasm(curr_addr);
59 | 			#print ("0x%08x: " % curr_addr) + instruct
60 | 			if regex_pinref.search(instruct):
61 | 				out_file.write(("0x%08x: " % curr_addr) + instruct);
62 | 				print ("0x%08x: " % curr_addr) + instruct
63 | 				if (change_func_nm == 1):
64 | 					func_start_addr = GetFunctionAttr(curr_addr, FUNCATTR_START)
65 | 					if (func_start_addr != BADADDR):
66 | 						curr_name = GetFunctionName(curr_addr)
67 | 						if (curr_name != "" and not curr_name.startswith("pin")):
68 | 							port_nums = regex_pinref.findall(instruct)
69 | 							name = "pin" + port_nums[0] + "Used_" + curr_name
70 | 							MakeName(func_start_addr, name)
71 | 		curr_addr = NextHead(curr_addr)
72 | 			
73 | 			


--------------------------------------------------------------------------------
/images/getProcessorScreenShot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maddiestone/IDAPythonEmbeddedToolkit/3cf0346f93eb3c009f4bf274c91bd00398a56d77/images/getProcessorScreenShot.png


--------------------------------------------------------------------------------
/label_funcs_with_no_xrefs.py:
--------------------------------------------------------------------------------
 1 | ##############################################################################################
 2 | # Copyright 2017 The Johns Hopkins University Applied Physics Laboratory LLC
 3 | # All rights reserved.
 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this 
 5 | # software and associated documentation files (the "Software"), to deal in the Software 
 6 | # without restriction, including without limitation the rights to use, copy, modify, 
 7 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 
 8 | # permit persons to whom the Software is furnished to do so.
 9 | #
10 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
11 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
12 | # PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
13 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
14 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
15 | # OR OTHER DEALINGS IN THE SOFTWARE.
16 | 
17 | ##############################################################################################
18 | # label_funcs_with_no_xrefs.py
19 | # This script checks each defined function in the address range entered for cross-references. 
20 | # If there are no cross-references to the function, the prefix "noXrefs_" is added to the 
21 | # function's name. It then iterates through all functions in the code range again to identify
22 | # all functions who's only code references are functions that have no cross-references. This
23 | # is to detected functions called only by other functions who have no code references.
24 | # This script helps to detect "dead code" that is never called.
25 | #
26 | # Inputs: 	start_addr: 	Start address for segment to define as data
27 | #			end_addr:		End address for segment to define as data
28 | #           ignore_addrs:   Addresses of functions that should not be considered as "noXref"
29 | #                           For Example, RESET should be listed here
30 | #
31 | ##############################################################################################
32 | 
33 | ################### USER DEFINED VALUES ###################
34 | # Function Addresses that should not be considered "No Crossreferences/Dead"
35 | # For example, the reset and interrupt vectors don't have cross-references but should
36 | # not be labeled as such.
37 | ignore_addrs = (0x0, 0x8, 0x18)
38 | ###########################################################
39 | 			
40 | def addPrefixToFunctionName(prefix, functionAddr):
41 | 	name = GetFunctionName(curr_addr)
42 | 	if (name and not name.startswith(prefix)):
43 | 		name = prefix + name
44 | 		print ("[label_funcs_with_no_xrefs.py] Function 0x%x Name: " % curr_addr) + name
45 | 		MakeName(curr_addr, name)
46 | 
47 | start_addr = AskAddr(MinEA(), "Please enter the starting address for the functions to be examined.")
48 | end_addr = AskAddr(MaxEA(), "Please enter the ending address for the functions to be examined.")
49 | 
50 | if ((start_addr is not None and end_addr is not None) and (start_addr != BADADDR and end_addr != BADADDR) and start_addr < end_addr):
51 | 	print "[label_funcs_with_no_xrefs.py] Running on addresses 0x%x to 0x%x" % (start_addr, end_addr)
52 | 	
53 | 	# If start_addr is in a function, get the starting address of that function. Else, returns -1.
54 | 	curr_addr = GetFunctionAttr(start_addr, FUNCATTR_START) # Get the function head for the "start" addr
55 | 	if (curr_addr == BADADDR):
56 | 		# start_addr is not currently in a function so select the beginning of the next function
57 | 		curr_addr = NextFunction(start_addr)
58 | 	
59 | 	# Using this to continually iterate through all functions until no new functions 
60 | 	# having no code reference paths are found. 
61 | 	new_noXrefs_found = False
62 | 	while (curr_addr != BADADDR and curr_addr < end_addr):
63 | 		if (curr_addr not in ignore_addrs and (not GetFunctionName(curr_addr).startswith("noXrefs_"))):
64 | 			xrefs = XrefsTo(curr_addr)
65 | 			has_valid_xref = False;
66 | 			for x in xrefs:
67 | 				if (not GetFunctionName(x.frm).startswith("noXrefs_")):	
68 | 					# Function has a valid cross-reference and is not "dead code"
69 | 					has_valid_xref = True;
70 | 					break;
71 | 			if (has_valid_xref == False):
72 | 				# No valid xrefs were found to this function
73 | 				new_noXrefs_found = True
74 | 				addPrefixToFunctionName("noXrefs_", curr_addr)
75 | 			curr_addr = NextFunction(curr_addr)
76 | 			if ((curr_addr == BADADDR or curr_addr >= end_addr) and new_noXrefs_found):
77 | 				print "[label_funcs_with_no_xrefs.py] Iterating through range again because new functions with no Xrefs found."
78 | 				curr_addr = start_addr
79 | 				new_noXrefs_found = False
80 | 		curr_addr = NextFunction(curr_addr)	
81 | 	print "[label_funcs_with_no_xrefs.py] FINISHED."
82 | else:
83 | 	print "[label_funcs_with_no_xrefs.py] QUITTING. Invalid address(es) entered."


--------------------------------------------------------------------------------
/make_strings.py:
--------------------------------------------------------------------------------
 1 | ##############################################################################################
 2 | # Copyright 2017 The Johns Hopkins University Applied Physics Laboratory LLC
 3 | # All rights reserved.
 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this 
 5 | # software and associated documentation files (the "Software"), to deal in the Software 
 6 | # without restriction, including without limitation the rights to use, copy, modify, 
 7 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 
 8 | # permit persons to whom the Software is furnished to do so.
 9 | #
10 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
11 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
12 | # PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
13 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
14 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
15 | # OR OTHER DEALINGS IN THE SOFTWARE.
16 | 
17 | ##############################################################################################
18 | # make_strings.py
19 | # Searches the user entered address range for a series of ASCII bytes to define as strings.
20 | # If the continuous series of ASCII bytes has a length greater or equal to minimum_length and
21 | # ends with a character in string_end, the scripts undefines the bytes in the series
22 | # and attempts to define it as a string.
23 | #
24 | # Input: 	start_addr: 	Start address for range to search for strings
25 | #			end_addr:		End address for range to search for strings
26 | #
27 | ##############################################################################################
28 | 
29 | ################### USER DEFINED VALUES ###################
30 | min_length = 5           			# Minimum number of characters needed to define a string       
31 | string_end = [0x00]		# Possible "ending characters" for strings. A string will not be 
32 |                                     # defined if it does not end with one of these characters
33 | ###########################################################
34 | 
35 | start_addr = AskAddr(MinEA(), "Please enter the starting address for the data to be analyzed.")
36 | end_addr = AskAddr(MaxEA(), "Please enter the ending address for the data to be analyzed.")
37 | 
38 | if ((start_addr is not None and end_addr is not None) and (start_addr != BADADDR and end_addr != BADADDR) and start_addr < end_addr):
39 | 	string_start = start_addr
40 | 	print "[make_strings.py] STARTING. Attempting to make strings with a minimum length of %d on data in range 0x%x to 0x%x" % (min_length, start_addr, end_addr)
41 | 	num_strings = 0;
42 | 	while string_start < end_addr:
43 | 		num_chars = 0
44 | 		curr_addr = string_start
45 | 		while curr_addr < end_addr:
46 | 			byte = Byte(curr_addr)
47 | 			if ((byte < 0x7F and byte > 0x1F) or byte in (0x9, 0xD, 0xA)):		# Determine if a byte is a "character" based on this ASCII range
48 | 				num_chars += 1
49 | 				curr_addr += 1			
50 | 			else:
51 | 				if ((byte in string_end) and (num_chars >= min_length)):
52 | 					MakeUnknown(string_start, curr_addr - string_start, DOUNK_SIMPLE)
53 | 					if (MakeStr(string_start, curr_addr) == 1):
54 | 						print "[make_strings.py] String created at 0x%x to 0x%x" % (string_start, curr_addr)
55 | 						num_strings += 1
56 | 						string_start = curr_addr
57 | 						break
58 | 					else:
59 | 						print "[make_strings.py] String create FAILED at 0x%x to 0x%x" % (string_start, curr_addr)
60 | 						break
61 | 				else:		
62 | 					# String does not end with one of the defined "ending characters", does not meet the minimum string length, or is not an ASCII character
63 | 					break
64 | 		string_start += 1
65 | 	print "[make_strings.py] FINISHED. Created %d strings in range 0x%x to 0x%x" % (num_strings, start_addr, end_addr)
66 | else:
67 | 	print "[make_strings.py] QUITTING. Entered address values not valid."


--------------------------------------------------------------------------------
/presentations/DerbyCon2017.IDAPythonPresentation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maddiestone/IDAPythonEmbeddedToolkit/3cf0346f93eb3c009f4bf274c91bd00398a56d77/presentations/DerbyCon2017.IDAPythonPresentation.pdf


--------------------------------------------------------------------------------
/presentations/RECON2017.LifeChanginMagicIDAPython.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maddiestone/IDAPythonEmbeddedToolkit/3cf0346f93eb3c009f4bf274c91bd00398a56d77/presentations/RECON2017.LifeChanginMagicIDAPython.pdf


--------------------------------------------------------------------------------