├── README.md ├── frm_parser.py ├── ibdata_parser.py └── iblogfile_parser.py /README.md: -------------------------------------------------------------------------------- 1 | mysql_forensics 2 | =============== 3 | 4 | Due to my Master Thesis i developed some scripts to analyse mysql-database systems. 5 | 6 | These scripts will be published in the middle of July. 7 | 8 | I hope you enjoy them and may you will improve them :) 9 | 10 | you can contact me via jabber: 404 at jabber dot 3 times c dot de 11 | -------------------------------------------------------------------------------- /frm_parser.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | import os 4 | 5 | dbType = {"00": "Unknown", 6 | "01": "DIAB_ISAM", 7 | "02": "Hash", 8 | "03": "MISAM", 9 | "04": "PISAM", 10 | "05": "RMS_ISAM", 11 | "06": "Heap", 12 | "07": "ISAM", 13 | "08": "MRG_ISAM", 14 | "09": "MyISAM", 15 | "0a": "MRG_MyISAM", 16 | "0b": "Berkeley", 17 | "0c": "InnoDB", 18 | "0d": "Gemini", 19 | "0e": "NDBCluster", 20 | "0f": "Example_DB", 21 | "10": "Archive_DB", 22 | "11": "CSV_DB", 23 | "12": "Federated_DB", 24 | "13": "Blackhole_DB", 25 | "14": "Partition_DB", 26 | "15": "Binlog", 27 | "16": "Solid", 28 | "17": "PBXT", 29 | "18": "Table_Function", 30 | "19": "MemCache", 31 | "1a": "Falcon", 32 | "1b": "Maria", 33 | "1c": "Performance_Schema", 34 | "2a": "First_Dynamic", 35 | "7f": "Default"} 36 | 37 | dataType = {"00": "Decimal", 38 | "01": "Tiny", 39 | "02": "Short", 40 | "03": "Int", 41 | "04": "Float", 42 | "05": "Double", 43 | "06": "Null", 44 | "07": "Timestamp", 45 | "08": "Longlong", 46 | "09": "Int24", 47 | "0a": "Date", 48 | "0b": "Time", 49 | "0c": "Datetime", 50 | "0d": "Year", 51 | "0e": "Newdate", 52 | "0f": "Varchar", 53 | "10": "Bit", 54 | "11": "Timestamp2", 55 | "12": "Datetime2", 56 | "13": "Time2", 57 | "f6": "Newdecimal", 58 | "f7": "Enum", 59 | "f8": "Set", 60 | "f9": "Tiny_Blob", 61 | "fa": "Medium_Blob", 62 | "fb": "Long_Block", 63 | "fc": "Blob", 64 | "fd": "Var_String", 65 | "fe": "String", 66 | "ff": "Geometry"} 67 | 68 | keyType = {"1b00": "Primary Key", 69 | "1b40": "Primary Key Auto_Increment", 70 | "4b00": "Prmary Key Auto_Increment Not NULL", 71 | "1b80": "Foreign Key"} 72 | 73 | 74 | def parse_tableInformation(myFile): 75 | myFile.seek(int("03", 16),0) #jump to offset 76 | return myFile.read(1).encode("hex") #Storage Engine of table 77 | 78 | 79 | def parse_keyInformation(myFile, numberOfCols): 80 | myFile.seek(int("1000", 16),0) #jump to the beginning of this block 81 | info = myFile.read(int("200",16)).encode("hex") #read the complete block as string 82 | keys = int(info[:2],16) #number of keys 83 | keyFields = int(info[2:4],16) #number of fields as keys (incl. fk) 84 | startTitles = info.find("ff")+2 #jump to the start of col descriptions 85 | endTitles = info.rfind("ff") #jump to the end of col descriptions 86 | titles = info[startTitles:endTitles].split("ff") 87 | fields = info[28:startTitles-2] 88 | splitfields = [] 89 | for i, c in enumerate(fields):#parses the key information 90 | if int(c,16) <= numberOfCols and fields[i+1:i+3]=="80": 91 | splitfields.append(fields[i:i+17]) 92 | details = [] 93 | for i, element in enumerate(splitfields): 94 | tmp = [] 95 | tmp.append(element[:1]) #column 96 | tmp.append(element[9:13]) #type of key (pk or fk) 97 | tmp.append(titles[i]) 98 | details.append(tmp) 99 | return details 100 | 101 | def parse_fields(myFile): 102 | completeFields = [] 103 | myFile.seek(int("2101", 16),0) 104 | cols = myFile.read(2).encode("hex") # number of columns in the table 105 | s = myFile.read().encode("hex")#reading the rest of the file 106 | i = s.find("00ff")+2 #last col value byte plus one 107 | y = 34*int(cols,16) #17 Byte per col multiplite by number of cols 108 | startValues = s[(i-y):] #from first col entry till end 109 | startTitles = s[i+2:] 110 | colValues = [] 111 | x = 0 112 | for element in range(int(cols,16)): #to get the 17 Bytes of each column 113 | colValues.append(startValues[x:(x+34)]) 114 | x += 34 115 | titles = startTitles.split("ff") 116 | titleList = [] #titles of each column 117 | for element in titles: #to get the column titles as string 118 | if element == "00": continue 119 | c = "" 120 | for stri in range(0, len(element), 2): 121 | c += chr(int(element[stri:(stri+2)], 16)) 122 | titleList.append(c) 123 | detColVal = [] #detailled information about each column 124 | for element in colValues: #to parse the detail information about eacht column, like length and datatype 125 | colValueTuple = [] 126 | colValueTuple.append(element[6:8]) #length of the column 127 | colValueTuple.append(element[26:28]) #datatype 128 | detColVal.append(colValueTuple) 129 | keys = parse_keyInformation(myFile, int(cols,16)) 130 | for i, title in enumerate(titleList): 131 | completeFields.append([title, detColVal[i]]) 132 | return completeFields, keys 133 | 134 | def print_table(fields, keys, fileName, se): 135 | print "Reconstruction of table:",fileName[fileName.rfind("/")+1:fileName.find(".")] 136 | for i, field in enumerate(fields): 137 | print "Column ",i+1,":",field[0]," ",dataType[field[1][1]], "(", int(field[1][0],16),")" 138 | for key in keys: 139 | if key[1] in keyType: print "Column ",key[0], " is ", keyType[key[1]] 140 | else: print "Column ",key[0], " is a Key Column, but type is unknown" 141 | print "Storage Engine: ",dbType[se] 142 | print "" 143 | 144 | def read_frmfile(path): 145 | try: 146 | for frm in os.listdir(path): 147 | if frm.endswith(".frm"): 148 | with open(path+"/"+frm, "rb") as f: 149 | fields = parse_fields(f) 150 | fi = fields[0] 151 | ke = fields[1] 152 | print_table(fi, ke, f.name, parse_tableInformation(f)) 153 | except OSError: 154 | print "----- ERROR -----" 155 | print "Path not found!" 156 | print "----- ERROR -----" 157 | 158 | def main(): 159 | parser = argparse.ArgumentParser(description="This script reconstruct the structe of the database tables from .frm files", 160 | epilog="And that's how you should do it ;)") 161 | parser.add_argument("PATH", help="The path to the .frm files. I.e. /var/lib/mysql/") 162 | args = parser.parse_args() 163 | read_frmfile(args.PATH) 164 | 165 | if __name__ == "__main__": 166 | main() 167 | 168 | 169 | -------------------------------------------------------------------------------- /ibdata_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KasperFridolin/mysql_forensics/7dc9a1bca9791eb70b3dcc10975edb1439e235a4/ibdata_parser.py -------------------------------------------------------------------------------- /iblogfile_parser.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | 4 | dataManipulationType = {"0b": "Insert", 5 | "1c": "Update", 6 | "0e": "Delete"} 7 | 8 | 9 | #Since the file header, Checkpoints etc are 2048 Bytes in total the first 10 | #block starts at 0x800. 11 | def jump_firstBlock(myFile): 12 | return myFile.seek(2048,0) 13 | 14 | #Jump back to 0x0 and read the complete 2048 Bytes of the header. 15 | def parse_fileHeader(myFile): 16 | myFile.seek(0,0) 17 | return myFile.read(2048).encode("hex") 18 | 19 | #Reads the 512 Byte of each block in the given file and parsed the block header incl. detailled information like the block no, the block entry 20 | #as well as the block trailer. It returns a list with all block elements (no empty blocks). One block element is a single list 21 | #with 3 elements (header, entry, trailer) 22 | def parse_blocks(myFile): 23 | jump_firstBlock(myFile) 24 | blocks = [] 25 | i = 0 26 | while myFile: 27 | #Block_Header details 28 | tmpBlockHeaderNo = myFile.read(4).encode("hex") 29 | tmpBlockNrWrittenBytes = myFile.read(2).encode("hex") 30 | tmpBlockOffsetOfLRGStart = myFile.read(2).encode("hex") 31 | tmpBlockNrActiveCheckpoints = myFile.read(4).encode("hex") 32 | tmpBlockHdrSize = myFile.read(2).encode("hex") 33 | #Block_Entry details 34 | tmpBlockEntry = myFile.read(494).encode("hex") 35 | #Block_Trailer details 36 | tmpBlockTrailer = myFile.read(4).encode("hex") 37 | if tmpBlockOffsetOfLRGStart == "0000" and tmpBlockHeaderNo != "00000000" and tmpBlockHeaderNo != "0000": continue 38 | blockHeader = [tmpBlockHeaderNo, tmpBlockNrWrittenBytes, tmpBlockOffsetOfLRGStart, tmpBlockNrActiveCheckpoints, tmpBlockHdrSize] 39 | blocks.insert(i, [blockHeader, tmpBlockEntry, tmpBlockTrailer]) 40 | i += 1 41 | if tmpBlockHeaderNo == "00000000": break 42 | return blocks 43 | 44 | #returns the block header number of a given block. 45 | def get_blockHeaderNo(block): 46 | return block[0][0] 47 | 48 | #returns the number of written bytes of this block. 49 | def get_blockNrWrittenBytes(block): 50 | return block[0][1] 51 | 52 | #returns the offset of the entry block start. 53 | def get_blockOffsetOfLRGStart(block): 54 | return block[0][2] 55 | 56 | #returns the active checkpoint of this block. 57 | def get_blockActiveCheckpoints(block): 58 | return block[0][3] 59 | 60 | #returns the hdr size of this block 61 | def get_blockHdrSize(block): 62 | return block[0][4] 63 | 64 | #returns the complete block header as a string. MAYBE USELESS 65 | def get_BlockHeaderAsString(block): 66 | return get_blockHeaderNo(block)+get_blockNrWrittenBytes(block)+get_blockOffsetOfLRGStart(block)+get_blockActiveCheckpoints(block)+get_blockHdrSize(block) 67 | 68 | #returns the block entry type from the entry start (log entry type) to the end. Needs the offset of the log entry type and a single block. 69 | def get_logEntryReconstruction(offset, blockEntry): 70 | tmpBlock = get_BlockHeaderAsString(blockEntry)+blockEntry[1] 71 | return tmpBlock[int(offset, 16)*2:] 72 | 73 | #references a given block entry (start from log entry type) with the header number 74 | def get_mlog_undo_insert_Entry(block): 75 | reference = get_logEntryReconstruction(get_blockOffsetOfLRGStart(block), block) 76 | if reference.startswith("94") or reference.startswith("14"): 77 | return [get_blockHeaderNo(block), reference] 78 | 79 | # 80 | def set_mlog_undo_insert_list(blocks): 81 | mlog_undo_list = [] 82 | i = 0 83 | for block in blocks: 84 | if get_mlog_undo_insert_Entry(block) is not None: 85 | mlog_undo_list.insert(i, get_mlog_undo_insert_Entry(block)) 86 | i += 1 87 | return mlog_undo_list 88 | 89 | #parses the block entry from the first entry to the end i.e. from 0x94 90 | def parse_mlog_undo_insert_entry(mlog_undo_list): 91 | detailled_mlog_undo_insert_list = [] 92 | for element in mlog_undo_list: 93 | tmpString = element[1] 94 | # BlockNo, Log Entry Type,TableSpace ID, Page ID , Length of Entry, Data Manip Type, Table ID , Rest 95 | detailled_mlog_undo_insert_list.append([element[0], tmpString[:2], tmpString[2:6], tmpString[6:8], tmpString[8:12], 96 | tmpString[12:14], tmpString[14:18], tmpString[18:]]) 97 | return detailled_mlog_undo_insert_list 98 | 99 | 100 | def get_logEntryType(detailled_mlog_undo_insert_list_element): 101 | return detailled_mlog_undo_insert_list_element[1] 102 | 103 | def get_tableSpaceID(detailled_mlog_undo_insert_list_element): 104 | return detailled_mlog_undo_insert_list_element[2] 105 | 106 | def get_pageID(detailled_mlog_undo_insert_list_element): 107 | return detailled_mlog_undo_insert_list_element[3] 108 | 109 | def get_lengthOfLogEntry(detailled_mlog_undo_insert_list_element): 110 | return detailled_mlog_undo_insert_list_element[4] 111 | 112 | def get_dataManipulationType(detailled_mlog_undo_insert_list_element): 113 | return detailled_mlog_undo_insert_list_element[5] 114 | 115 | def get_tableID(detailled_mlog_undo_insert_list_element): 116 | return detailled_mlog_undo_insert_list_element[6] 117 | 118 | def get_rest(detailled_mlog_undo_insert_list_element): 119 | return detailled_mlog_undo_insert_list_element[7] 120 | 121 | def set_insertStatementList(detailled_mlog_undo_insert_list): 122 | insertList = [] 123 | for element in detailled_mlog_undo_insert_list: 124 | if get_dataManipulationType(element) == "0b": insertList.append(element) 125 | return insertList 126 | 127 | def set_updateStatementList(detailled_mlog_undo_insert_list): 128 | updateList = [] 129 | for element in detailled_mlog_undo_insert_list: 130 | if get_dataManipulationType(element) == "1c": updateList.append(element) 131 | return updateList 132 | 133 | def set_deleteStatementList(detailled_mlog_undo_insert_list): 134 | deleteList = [] 135 | for element in detailled_mlog_undo_insert_list: 136 | if get_dataManipulationType(element) == "0e": deleteList.append(element) 137 | return deleteList 138 | 139 | def parse_detailled_update_information(updateList): 140 | splitList = [] 141 | printList = [] 142 | tableIDs = [] 143 | for element in updateList: 144 | tmpList = [] 145 | hl = [] 146 | tmp = [] 147 | tmpList.append(element[0]) #BlockNo 148 | tmpList.append(element[1]) #Log Entry Type 149 | tmpList.append(element[2]) #Tablespace ID 150 | tmpList.append(element[3]) #Page ID 151 | tmpList.append(element[4]) #Length of the Log Entry 152 | tmpList.append(element[5]) #Data Manipulation Type 153 | tmpList.append(element[6]) #Table ID 154 | tableIDs.append(element[6]) 155 | hl = parse_transIDAndRBPFields(element[7],0) #last transaction id and rollback pointer 156 | tmpList += hl[0] 157 | pointer = hl[1] 158 | pointer += 2 #2 Byte unbekannt 159 | hl = parse_pkInformation(element[7], pointer) 160 | tmpList += hl[0] 161 | pkLength = int(hl[0][0],16)*2 162 | pointer = hl[1] 163 | hl = parse_numberOfUpdatesFields(element[7], pointer) 164 | if hl is None: continue 165 | tmpList += hl[0] 166 | pointer = hl[1] 167 | tmp += hl[0] 168 | hl = parse_newUpdateValue(element[7], element[2], pkLength) 169 | tmpList += hl[0] 170 | pointer = hl[1] 171 | tmp += hl[2] 172 | printList.append(tmp) 173 | splitList.append(tmpList) 174 | return splitList, printList, tableIDs 175 | 176 | 177 | def parse_newUpdateValue(element, tablespaceID, pkLength): 178 | tmp = [] 179 | hl = [] 180 | printList = [] 181 | pointer = 0 182 | newStmt = element[element.find("26"+tablespaceID):] 183 | tmp.append(newStmt[:pointer+2]) # Log Entry Type 184 | pointer += 2 185 | tmp.append(newStmt[pointer:pointer+4]) # Tablespace ID 186 | pointer += 4 187 | tmp.append(newStmt[pointer:pointer+2]) # Page ID 188 | pageID = newStmt[pointer:pointer+2] 189 | pointer += 2 190 | hl = parse_mlog_comp_rec_insert(newStmt, pointer, tablespaceID, pageID, pkLength) 191 | tmp += hl[0] 192 | pointer = hl[1] 193 | printList = hl[3] 194 | return tmp, pointer, printList 195 | 196 | 197 | def print_updates(printList, tableIDs, tableRef): 198 | updatedFields = 0 199 | colsOfStmt = 0 200 | start = 4 201 | for i, update in enumerate(printList): 202 | updatedFields = int(update[0],16) 203 | colsOfStmt = update[4]*updatedFields 204 | start *= updatedFields 205 | if tableIDs[i] in tableRef: print "UPDATE ", tableRef[tableIDs[i]] 206 | else: print "UPDATE " 207 | print "SET column",int(update[1],16)-1,"=",update[(start+int(update[1],16)-1)] 208 | print "WHERE column",int(update[1],16)-1,"=",update[3],");" 209 | print "" 210 | 211 | def parse_numberOfUpdatesFields(element, pointer): 212 | tmp = [] 213 | tmp.append(element[pointer:pointer+2]) #Number of updated fields 214 | pointer += 2 215 | if element[pointer-2:pointer] == "": return 216 | for field in range(int(element[pointer-2:pointer], 16)): 217 | tmp.append(element[pointer:pointer+2]) #ID of the updated field 218 | pointer += 2 219 | length = int(element[pointer:pointer+2], 16) 220 | tmp.append(length) #Length of updated field 221 | pointer += 2 222 | tmp.append(read_hexdump(element[pointer:pointer+length*2])) #Value of the updated field 223 | pointer += length*2 224 | return tmp, pointer 225 | 226 | 227 | 228 | #returns a list with the all insert statement splitted into the detailled information. 229 | #hier muss ich mir noch was fuer die false positives ueberlegen. Eventuell einfach extra wo hinspeichern 230 | def parse_detailled_insert_information(insertList): 231 | splitList = [] 232 | printList = [] 233 | tableIDs = [] 234 | for element in insertList: 235 | tmpList = [] 236 | metaList = [] 237 | hl = [] 238 | #Metadata 239 | if (element[0])[:2] != "80": continue 240 | metaList.append(element[0]) #BlockNo 241 | metaList.append(element[1]) #Log Entry Type 242 | metaList.append(element[2]) #Tablespace ID 243 | metaList.append(element[3]) #Page ID 244 | metaList.append(element[4]) #Length of the Log Entry 245 | metaList.append(element[5]) #Data Manipulation Type 246 | metaList.append(element[6]) #Table ID 247 | tableIDs.append(element[6]) 248 | #Specific data 249 | hl = parse_insert_mlog_undo_insert(element[7], 0, element[2], element[3]) 250 | if hl is not None: 251 | splitList.append([metaList,hl[0]]) 252 | printList.append(hl[3]) 253 | return splitList, printList, tableIDs 254 | 255 | #Pareses mlog_undo_insert Statements 256 | def parse_insert_mlog_undo_insert(element, pointer, tablespaceID, pageID): 257 | metaList = [] 258 | printList = [] 259 | hl = parse_pkInformation(element, pointer) 260 | tmpList = hl[0] 261 | pkLength = int(hl[0][0],16)*2 262 | pointer = hl[1] 263 | pointer += 2 #Unbekannt 1Byte 264 | hl = parse_varTablespaceID(element, tablespaceID, pointer) 265 | tmpList += hl[0] 266 | pointer = hl[1] 267 | pointer += 2 #Unbekannt 1Byte 268 | hl = parse_mlog_comp_rec_insert(element, pointer, tablespaceID, pageID, pkLength) 269 | if hl is None: return 270 | tmpList += hl[0] 271 | pointer = hl[1] 272 | metaList += hl[2] 273 | printList += hl[3] 274 | tl = split_multipleInserts(element[pointer:], tablespaceID, pageID) 275 | if tl is not None: 276 | for sub in tl: 277 | if sub != "": 278 | hl = parse_insert_mlog_undo_insert(sub, 10, tablespaceID, pageID) 279 | tmpList += hl[0] 280 | pointer = hl[1] 281 | metaList += hl[2] 282 | printList += hl[3] 283 | return tmpList, pointer, metaList, printList 284 | 285 | #Parses mlog_comp_rec_insert Statements 286 | def parse_mlog_comp_rec_insert(element, pointer, tablespaceID, pageID, pkLength): 287 | hl = [] 288 | tmpList = [] 289 | parseList = [] 290 | metaList = [] 291 | printList = [] 292 | parseList.append(pkLength) 293 | hl = parse_fieldCount(element, pointer) 294 | tmpList += hl[0] 295 | pointer = hl[1] 296 | fieldsInEntry = hl[0][0] #2Byte 297 | parseList.append(fieldsInEntry) 298 | uniqueFields = hl[0][1] #Number of unique fields 2Byte 299 | parseList.append(uniqueFields) 300 | printList.append(fieldsInEntry) 301 | hl = parse_uniqueFieldLength(element, pointer, uniqueFields) 302 | tmpList += hl[0] 303 | parseList += hl[0] 304 | pointer = hl[1] 305 | hl = parse_transIDAndRBPLength(element, pointer) 306 | tmpList += hl[0] 307 | pointer = hl[1] 308 | metaList.append(hl[0][0]) #length of the transaction id as metadata to use in delete recovery 309 | metaList.append(hl[0][1]) #length of the data rollback pointer as metadata to use in delete recovery 310 | hl = parse_nonUniqueFieldLength(element, pointer, fieldsInEntry-uniqueFields) 311 | tmpList += hl[0] 312 | pointer = hl[1] 313 | parseList += hl[0] 314 | hl = parse_offset(element, pointer) 315 | tmpList += hl[0] 316 | pointer = hl[1] 317 | hl = parse_realLengthofData(element, pointer, fieldsInEntry-uniqueFields) 318 | tmpList += hl[0] 319 | pointer = hl[1] 320 | parseList += hl[0] 321 | pointer +=10 #Unbekannt 5Byte 322 | hl = parse_uniqueFields(element, pointer, uniqueFields, pkLength) 323 | tmpList += hl[0] 324 | pointer = hl[1] 325 | printList += hl[0] 326 | hl = parse_transIDAndRBPFields(element, pointer) 327 | tmpList += hl[0] 328 | pointer = hl[1] 329 | totalLength = sort_structure(parseList) 330 | hl = parse_FieldHexdump(element, totalLength, pointer) 331 | if hl is None: return 332 | tmpList += hl[0] 333 | pointer = hl[1] 334 | printList += hl[0] 335 | return tmpList, pointer, metaList, printList 336 | 337 | #Prints all insert statements as SQL-Statement. 338 | def print_inserts(printList, tableIDs, tables): 339 | for i, y in enumerate(printList): 340 | cols = y[0] 341 | start = 1 342 | end = cols+1 343 | if tableIDs[i] in tables: print "INSERT INTO", tables[tableIDs[i]] ,"VALUES (" 344 | else: print "INSERT INTO VALUES (" 345 | for x in range(len(printList[i])): 346 | if len(printList[i][start:end]) > 0: print ", ".join("%s" %s for s in printList[i][start:end]), "," 347 | start = end+1 348 | end += cols+1 349 | print ");" 350 | print "" 351 | 352 | #Parses the hexdump 353 | def parse_FieldHexdump(element, totalLength, pointer): 354 | if totalLength is None: return 355 | tmp = [] 356 | for l in totalLength: 357 | hexdump = element[pointer:pointer+int(l[1],16)*2] 358 | pointer += int(l[1],16)*2 359 | if l[0] == "var": 360 | v = read_hexdump(hexdump) 361 | tmp.append(v) 362 | if l[0] == "fix": 363 | f = read_int(hexdump) 364 | tmp.append(f) 365 | return tmp, pointer 366 | 367 | #Parses the unique fields within the entry 368 | def parse_uniqueFields(element, pointer, numberOfUniFields, pkLength): 369 | tmp = [] 370 | if numberOfUniFields <10: 371 | for i in range(numberOfUniFields): 372 | value = element[pointer:pointer+pkLength] 373 | if not value.startswith("8") and not value.startswith("00"): 374 | pointer += 2 375 | tmp.append(read_int(element[pointer:pointer+pkLength])) 376 | pointer += pkLength 377 | return tmp, pointer 378 | 379 | #Parses the real length of the data of the related fields within the entry 380 | def parse_realLengthofData(element, pointer, fieldsInEntry): 381 | tmp = [] 382 | if fieldsInEntry < 20: #UGLY AYM!!!! 383 | for i in range(fieldsInEntry): 384 | tmp.append(element[pointer:pointer+2]) 385 | pointer += 2 386 | return tmp, pointer 387 | 388 | #Parses the offset 389 | def parse_offset(element, pointer): 390 | tmp = [] 391 | tmp.append(element[pointer:pointer+4]) 392 | pointer += 4 393 | for field in range(4): 394 | tmp.append(element[pointer:pointer+2]) 395 | pointer += 2 396 | return tmp, pointer 397 | 398 | #Parses the transaction ID and the Data Rollback Pointer 399 | def parse_transIDAndRBPFields(element, pointer): 400 | tmp = [] 401 | tmp.append(element[pointer:pointer+12]) 402 | pointer += 12 403 | tmp.append(element[pointer:pointer+14]) 404 | pointer += 14 405 | return tmp, pointer 406 | 407 | #Parses the length of the transaction ID and the Data Rollback Pointer 408 | def parse_transIDAndRBPLength(element, pointer): 409 | tmp = [] 410 | tmp.append(element[pointer:pointer+4]) 411 | pointer += 4 412 | tmp.append(element[pointer:pointer+4]) 413 | pointer += 4 414 | return tmp, pointer 415 | 416 | #Parses the length of the non unique fields 417 | def parse_nonUniqueFieldLength(element, pointer, numberOfFields): 418 | tmp = [] 419 | if numberOfFields < 10: #UGLY AYM!!!! 420 | for i in range(numberOfFields): 421 | tmp.append(element[pointer:pointer+4]) 422 | pointer += 4 423 | return tmp, pointer 424 | 425 | #Parses the length of the unique fields 426 | def parse_uniqueFieldLength(element, pointer, numberOfUniqueFields): 427 | tmp = [] 428 | if numberOfUniqueFields < 10: #UGLY AYM!!!! 429 | for i in range(numberOfUniqueFields): 430 | tmp.append(element[pointer:pointer+4]) 431 | pointer += 4 432 | return tmp, pointer 433 | 434 | #Parses the number of all data fields in this entry 435 | def parse_fieldCount(element, pointer): 436 | tmp = [] 437 | tmp.append(int(element[pointer:pointer+4],16)-2) 438 | pointer += 4 439 | tmp.append(int(element[pointer:pointer+4],16)) 440 | pointer += 4 441 | return tmp, pointer 442 | 443 | #Parses the information of the primary key 444 | def parse_pkInformation(element, pointer): 445 | tmp = [] 446 | tmp.append(element[pointer:pointer+2]) # Length of the primary key field 447 | pkLength = int(element[pointer:pointer+2], 16)*2 448 | pointer += 2 449 | tmp.append(element[pointer:pointer+pkLength]) #primary key of the inserted field 450 | pointer += pkLength 451 | return tmp, pointer 452 | 453 | #Parses the variable tablespace ID 454 | def parse_varTablespaceID(element, tablespaceID, pointer): 455 | tmp = [] 456 | if tablespaceID == element[pointer:pointer+4]: 457 | tmp.append(element[pointer:pointer+4]) 458 | pointer +=4 459 | else: 460 | tmp.append(element[pointer:pointer+2]) 461 | pointer +=2 462 | return tmp, pointer 463 | 464 | #Splits the block into each insert entry if more than one entry exisits 465 | def split_multipleInserts(values, tableID, pageID): 466 | if values.startswith("94"+tableID+pageID): 467 | return values.split("94"+tableID+pageID) 468 | if values.startswith("14"+tableID+pageID): 469 | return values.split("14"+tableID+pageID) 470 | 471 | 472 | #Reads the int value of the given hexdump 473 | def read_int(hexdump): 474 | return int(hexdump[2:], 16) 475 | 476 | #Reads the char of the fiven hexdump 477 | def read_hexdump(hexdump): 478 | c = "" 479 | for i in range(0, len(hexdump), 2): 480 | c += chr(int(hexdump[i:i+2],16)) 481 | return c 482 | 483 | #Sorts the structure of a fiven list to get a better output 484 | def sort_structure(parseList): 485 | pks = [] 486 | dfs = [] 487 | rls = [] 488 | sorts = [] 489 | pkLength = parseList[0] 490 | nonUnique = parseList[1]-parseList[2] 491 | pkCount = parseList[2] 492 | if len(parseList) <= 3: return 493 | for i in range(pkCount): 494 | pks.append(parseList[i+3]) 495 | for i in range(nonUnique): 496 | dfs.append(parseList[i+3+pkCount]) 497 | if not parseList[i+3+pkCount].endswith("0"): nonUnique -= 1 498 | for i in range(nonUnique): 499 | rls.append(parseList[i+3+pkCount+len(dfs)]) 500 | i = 0 501 | for element in dfs: 502 | tl = [] 503 | if element.endswith("0"): 504 | tl.append("var") 505 | tl.append(rls[len(rls)-i-1]) 506 | sorts.append(tl) 507 | i += 1 508 | else: 509 | tl.append("fix") 510 | tl.append(element[len(element)-2:]) 511 | sorts.append(tl) 512 | return sorts 513 | 514 | #returns a list with the all insert statement splitted into the detailled information. 515 | def parse_detailled_delete_information(deleteList, lengthOfTransactionID, lengthOfRollbackPointer): 516 | splitList = [] 517 | lengthOfTransactionID *= 2 518 | lengthOfRollbackPointer *= 2 519 | rbpEnd = lengthOfTransactionID+lengthOfRollbackPointer+2 520 | for element in deleteList: 521 | tmpList = [] 522 | tmpList.append(element[0]) #BlockNo 523 | tmpList.append(element[1]) #Log Entry Type 524 | tmpList.append(element[2]) #Tablespace ID 525 | tmpList.append(element[3]) #Page ID 526 | tmpList.append(element[4]) #Length of the Log Entry 527 | tmpList.append(element[5]) #Data Manipulation Type 528 | tmpList.append(element[6]) #Table ID 529 | tmpList.append((element[7])[:lengthOfTransactionID]) #Last Transaction ID 530 | tmpList.append((element[7])[lengthOfTransactionID:lengthOfTransactionID+2]) #Unbekannt 531 | tmpList.append((element[7])[lengthOfTransactionID+2:rbpEnd]) #Last data rollback pointer 532 | tmpList.append((element[7])[rbpEnd:rbpEnd+2]) #Length of the primary key 533 | pkLength = int(((element[7])[rbpEnd:rbpEnd+2]),16)*2 534 | tmpList.append((element[7])[rbpEnd+2:(rbpEnd+2+pkLength)]) #Affected primary key 535 | tmpList.append((element[7])[(rbpEnd+8+pkLength):(rbpEnd+10+pkLength)]) #Length of the primary key field 536 | pkFieldLength = int(((element[7])[(rbpEnd+8+pkLength):(rbpEnd+10+pkLength)]),16)*2 537 | tmpList.append((element[7])[(rbpEnd+10+pkLength):((rbpEnd+10+pkLength)+pkFieldLength)]) #primary key of deleted field 538 | splitList.append(tmpList) 539 | return splitList 540 | 541 | #Prints the delete list 542 | def print_deletes(deleteList, tableRef): 543 | for delete in deleteList: 544 | if not delete[0].startswith("80"): continue 545 | if delete[6] in tableRef: print "DELETE FROM", tableRef[delete[6]] 546 | else: print "DELETE FROM " 547 | print "WHERE primaryKey =",read_int(delete[len(delete)-1]),";" 548 | print "" 549 | 550 | #Gets the length of the Transaction ID 551 | def get_LengthOfTransactionID(insertDataList): 552 | return (insertDataList[0][7])[3:] 553 | 554 | #Get the length of the Data Rollback Pointer 555 | def get_LengthOfRollbackPointer(insertDataList): 556 | return (insertDataList[0][8])[3:] 557 | 558 | def print_statistics(inserts, updates, deletes): 559 | print "---- Overview ----" 560 | print "" 561 | print "Type\t| Block No" 562 | print "----\t| --------" 563 | print "Insert\t|" 564 | x = 0 565 | y = 0 566 | z = 0 567 | for insert in inserts: 568 | if not insert[0][0].startswith("80"): continue 569 | x += 1 570 | print "\t| ", insert[0][0] 571 | print "----\t| --------" 572 | print "Update\t|" 573 | #for update in updates: 574 | # if not update[0].startswith("80"): continue 575 | # y += 1 576 | # print "\t| ", update[0] 577 | print "OUT OF ORDER :)" 578 | print "" 579 | print "----\t| --------" 580 | print "Delete\t|" 581 | for delete in deletes: 582 | if not delete[0].startswith("80"): continue 583 | z += 1 584 | print "\t| ", delete[0] 585 | print "----\t| --------" 586 | print "" 587 | print "-- Total number of Insert-Statements found: ", x 588 | print "-- Total number of Update-Statements found: ", y 589 | print "-- Total number of Delete-Statements found: ", z 590 | print "" 591 | 592 | #opens the ib_logfile 593 | def read_ib_logfile(ib_logfile, dbName, ibdata, numberOfColumns): 594 | inserts = [] 595 | updates = [] 596 | deletes = [] 597 | try: 598 | with open(ib_logfile, "rb") as f: 599 | print "Start of analyses:", time.strftime("%d.%m.%Y - %H:%M:%S") 600 | print "Results of analysing the", dbName, "database: " 601 | print "" 602 | detailledList = parse_mlog_undo_insert_entry(set_mlog_undo_insert_list(parse_blocks(f))) 603 | print "" 604 | print "---- INSERTS ----" 605 | hl = parse_detailled_insert_information(set_insertStatementList(detailledList)) 606 | inserts = hl[0] 607 | tableRef = get_tableName(read_ibdata(ibdata, inserts, dbName)) 608 | print_inserts(hl[1], hl[2], tableRef) 609 | print "" 610 | print "---- UPDATES ----" 611 | #hl = parse_detailled_update_information(set_updateStatementList(detailledList)) 612 | #if hl is not None: print_updates(hl[1], hl[2], tableRef) 613 | print "OUT OF ORDER :)" 614 | print "" 615 | print "---- DELETES ----" 616 | deletes = hl[0] 617 | hl = parse_detailled_delete_information(set_deleteStatementList(detailledList), 6,7) #No specific "printList" like in insert or update parser 618 | if hl is not None: print_deletes(hl, tableRef) 619 | print_statistics(inserts, deletes, hl) 620 | print "End of analyses: ", time.strftime("%d.%m.%Y - %H:%M:%S") 621 | f.close() 622 | except IOError: 623 | print "----- ERROR -----" 624 | print "ib_logfile not found!" 625 | print "----- ERROR -----" 626 | 627 | #Reads the given ibdata 628 | def read_ibdata(myFile, splitList, dbName): 629 | tableIDs = [] 630 | tmp = [] 631 | dbName = dbName.encode("hex") 632 | for element in splitList: 633 | tableIDs.append(get_tableID(element[0])) 634 | try: 635 | with open(myFile, "rb") as f: 636 | tmp = split_ibdata(f, tableIDs, dbName) 637 | f.close() 638 | except IOError: 639 | print "----- ERROR -----" 640 | print "ibdata not found!" 641 | print "----- ERROR -----" 642 | return tmp 643 | 644 | #Splits the ibdata file to get the tableID - tablename faster 645 | def split_ibdata(myFile, tableIDs, dbName): 646 | tmp = [] 647 | pointer = 0 648 | while myFile: 649 | length = len(dbName)/2 650 | offset = myFile.read(2).encode("hex") 651 | if offset in tableIDs: 652 | dbHex = myFile.read(length).encode("hex") 653 | if dbName in dbHex: 654 | tmpName = myFile.read(50).encode("hex") 655 | myFile.seek(-50,1) 656 | tmp.append([offset, tmpName]) 657 | myFile.seek(-length,1) 658 | if not offset: break 659 | return tmp 660 | 661 | #Gets the name of the table 662 | def get_tableName(ibdataList): 663 | tableRef = {} 664 | for element in ibdataList: 665 | c = "" 666 | for i in range(0, len(element[1]), 2): 667 | if (int("30",16) <= int((element[1])[i:i+2],16) <= int("39",16)) or (int("41",16) <= int((element[1])[i:i+2],16) <= int("5A",16)) or (int("61",16) <= int((element[1])[i:i+2],16) <= int("7A",16)) or (int("2f",16) == int((element[1])[i:i+2],16)): 668 | c += chr(int((element[1])[i:i+2],16)) 669 | else: 670 | c += "--" 671 | if c[c.find("/")+1:c.find("--")] != "": tableRef[element[0]] = c[c.find("/")+1:c.find("--")] 672 | return tableRef 673 | 674 | #Parses the arguments from the command line 675 | def main(): 676 | parser = argparse.ArgumentParser(description="This script interprets the given ib_logfile (ib_logfile0 or ob_logfile_1) and the ibdata1 of a database to reconstruct the used insert, update and delete statements.", 677 | epilog="And that's how you should do it ;)") 678 | parser.add_argument("-l", default="/var/lib/mysql/ib_logfile0", help="The ib_logile0 file, i.e. /var/lib/mysq/ib_logfile0") 679 | parser.add_argument("DB", help="The name of the database") 680 | parser.add_argument("-i", default="/var/lib/mysql/ibdata1", help="The ibdata1 file, i.e. /var/lib/mysql/ibdata1") 681 | parser.add_argument("-f", default=20, help="The number of max. columns within the tables. This is needed for performance. Default value is 20.") 682 | args = parser.parse_args() 683 | read_ib_logfile(args.l, args.DB, args.i, args.f) 684 | 685 | if __name__ == "__main__": 686 | main() 687 | 688 | --------------------------------------------------------------------------------