├── README.md
├── frm_parser.py
├── ibdata_parser.py
└── iblogfile_parser.py


/README.md:
--------------------------------------------------------------------------------
 1 | mysql_forensics
 2 | ===============
 3 | 
 4 | Due to my Master Thesis i developed some scripts to analyse mysql-database systems.
 5 | 
 6 | These scripts will be published in the middle of July.
 7 | 
 8 | I hope you enjoy them and may you will improve them :)
 9 | 
10 | you can contact me via jabber: 404 at jabber dot 3 times c dot de
11 | 


--------------------------------------------------------------------------------
/frm_parser.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time
  3 | import os
  4 | 
  5 | dbType = {"00": "Unknown",
  6 |           "01": "DIAB_ISAM",
  7 |           "02": "Hash",
  8 |           "03": "MISAM",
  9 |           "04": "PISAM",
 10 |           "05": "RMS_ISAM",
 11 |           "06": "Heap",
 12 |           "07": "ISAM",
 13 |           "08": "MRG_ISAM",
 14 |           "09": "MyISAM",
 15 |           "0a": "MRG_MyISAM",
 16 |           "0b": "Berkeley",
 17 |           "0c": "InnoDB",
 18 |           "0d": "Gemini",
 19 |           "0e": "NDBCluster",
 20 |           "0f": "Example_DB",
 21 |           "10": "Archive_DB",
 22 |           "11": "CSV_DB",
 23 |           "12": "Federated_DB",
 24 |           "13": "Blackhole_DB",
 25 |           "14": "Partition_DB",
 26 |           "15": "Binlog",
 27 |           "16": "Solid",
 28 |           "17": "PBXT",
 29 |           "18": "Table_Function",
 30 |           "19": "MemCache",
 31 |           "1a": "Falcon",
 32 |           "1b": "Maria",
 33 |           "1c": "Performance_Schema",
 34 |           "2a": "First_Dynamic",
 35 |           "7f": "Default"}
 36 | 
 37 | dataType = {"00": "Decimal",
 38 |             "01": "Tiny",
 39 |             "02": "Short",
 40 |             "03": "Int",
 41 |             "04": "Float",
 42 |             "05": "Double",
 43 |             "06": "Null",
 44 |             "07": "Timestamp",
 45 |             "08": "Longlong",
 46 |             "09": "Int24",
 47 |             "0a": "Date",
 48 |             "0b": "Time",
 49 |             "0c": "Datetime",
 50 |             "0d": "Year",
 51 |             "0e": "Newdate",
 52 |             "0f": "Varchar",
 53 |             "10": "Bit",
 54 |             "11": "Timestamp2",
 55 |             "12": "Datetime2",
 56 |             "13": "Time2",
 57 |             "f6": "Newdecimal",
 58 |             "f7": "Enum",
 59 |             "f8": "Set",
 60 |             "f9": "Tiny_Blob",
 61 |             "fa": "Medium_Blob",
 62 |             "fb": "Long_Block",
 63 |             "fc": "Blob",
 64 |             "fd": "Var_String",
 65 |             "fe": "String",
 66 |             "ff": "Geometry"}
 67 | 
 68 | keyType = {"1b00": "Primary Key",
 69 |            "1b40": "Primary Key Auto_Increment",
 70 |            "4b00": "Prmary Key Auto_Increment Not NULL",
 71 |            "1b80": "Foreign Key"}
 72 |           
 73 | 
 74 | def parse_tableInformation(myFile):
 75 |     myFile.seek(int("03", 16),0) #jump to offset
 76 |     return myFile.read(1).encode("hex") #Storage Engine of table
 77 | 
 78 | 
 79 | def parse_keyInformation(myFile, numberOfCols):
 80 |     myFile.seek(int("1000", 16),0) #jump to the beginning of this block
 81 |     info = myFile.read(int("200",16)).encode("hex") #read the complete block as string
 82 |     keys = int(info[:2],16) #number of keys
 83 |     keyFields = int(info[2:4],16) #number of fields as keys (incl. fk)
 84 |     startTitles = info.find("ff")+2 #jump to the start of col descriptions
 85 |     endTitles = info.rfind("ff") #jump to the end of col descriptions
 86 |     titles = info[startTitles:endTitles].split("ff")
 87 |     fields = info[28:startTitles-2]
 88 |     splitfields = []
 89 |     for i, c in enumerate(fields):#parses the key information
 90 |         if int(c,16) <= numberOfCols and fields[i+1:i+3]=="80":
 91 |             splitfields.append(fields[i:i+17])
 92 |     details = []
 93 |     for i, element in enumerate(splitfields):
 94 |         tmp = []
 95 |         tmp.append(element[:1]) #column
 96 |         tmp.append(element[9:13]) #type of key (pk or fk)
 97 |         tmp.append(titles[i])
 98 |         details.append(tmp)
 99 |     return details
100 | 
101 | def parse_fields(myFile):
102 |     completeFields = []
103 |     myFile.seek(int("2101", 16),0) 
104 |     cols = myFile.read(2).encode("hex") # number of columns in the table
105 |     s = myFile.read().encode("hex")#reading the rest of the file
106 |     i = s.find("00ff")+2 #last col value byte plus one
107 |     y = 34*int(cols,16) #17 Byte per col multiplite by number of cols
108 |     startValues = s[(i-y):] #from first col entry till end
109 |     startTitles = s[i+2:]
110 |     colValues = [] 
111 |     x = 0
112 |     for element in range(int(cols,16)): #to get the 17 Bytes of each column
113 |         colValues.append(startValues[x:(x+34)])
114 |         x += 34
115 |     titles = startTitles.split("ff")
116 |     titleList = [] #titles of each column
117 |     for element in titles: #to get the column titles as string
118 |         if element == "00": continue
119 |         c = ""
120 |         for stri in range(0, len(element), 2):
121 |             c += chr(int(element[stri:(stri+2)], 16))
122 |         titleList.append(c)
123 |     detColVal = []    #detailled information about each column
124 |     for element in colValues: #to parse the detail information about eacht column, like length and datatype
125 |         colValueTuple = []
126 |         colValueTuple.append(element[6:8]) #length of the column
127 |         colValueTuple.append(element[26:28]) #datatype
128 |         detColVal.append(colValueTuple)
129 |     keys = parse_keyInformation(myFile, int(cols,16))
130 |     for i, title in enumerate(titleList):
131 |         completeFields.append([title, detColVal[i]])
132 |     return completeFields, keys
133 | 
134 | def print_table(fields, keys, fileName, se):
135 |     print "Reconstruction of table:",fileName[fileName.rfind("/")+1:fileName.find(".")]
136 |     for i, field in enumerate(fields): 
137 |         print "Column ",i+1,":",field[0]," ",dataType[field[1][1]], "(", int(field[1][0],16),")" 
138 |     for key in keys:
139 |         if key[1] in keyType: print "Column ",key[0], " is ", keyType[key[1]]
140 |         else: print "Column ",key[0], " is a Key Column, but type is unknown"
141 |     print "Storage Engine: ",dbType[se]    
142 |     print ""
143 | 
144 | def read_frmfile(path):
145 |     try:
146 |         for frm in os.listdir(path):
147 |             if frm.endswith(".frm"):  
148 |                 with open(path+"/"+frm, "rb") as f:
149 |                     fields = parse_fields(f)
150 |                     fi = fields[0]
151 |                     ke = fields[1]
152 |                     print_table(fi, ke, f.name, parse_tableInformation(f))
153 |     except OSError:
154 |         print "----- ERROR -----"
155 |         print "Path not found!" 
156 |         print "----- ERROR -----"
157 | 
158 | def main():
159 |         parser = argparse.ArgumentParser(description="This script reconstruct the structe of the database tables from .frm files",
160 |                                          epilog="And that's how you should do it ;)")
161 |         parser.add_argument("PATH", help="The path to the .frm files. I.e. /var/lib/mysql/<database_name>")
162 |         args = parser.parse_args()
163 |         read_frmfile(args.PATH)
164 | 
165 | if __name__ == "__main__":
166 |    main()
167 | 
168 |                 
169 | 


--------------------------------------------------------------------------------
/ibdata_parser.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KasperFridolin/mysql_forensics/7dc9a1bca9791eb70b3dcc10975edb1439e235a4/ibdata_parser.py


--------------------------------------------------------------------------------
/iblogfile_parser.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time
  3 | 
  4 | dataManipulationType = {"0b": "Insert",
  5 |                         "1c": "Update",
  6 |                         "0e": "Delete"}
  7 | 
  8 | 
  9 | #Since the file header, Checkpoints etc are 2048 Bytes in total the first
 10 | #block starts at 0x800.
 11 | def jump_firstBlock(myFile):
 12 |         return myFile.seek(2048,0)
 13 | 
 14 | #Jump back to 0x0 and read the complete 2048 Bytes of the header.
 15 | def parse_fileHeader(myFile):
 16 |         myFile.seek(0,0)
 17 |         return myFile.read(2048).encode("hex")
 18 |         
 19 | #Reads the 512 Byte of each block in the given file and parsed the block header incl. detailled information like the block no, the block entry
 20 | #as well as the block trailer. It returns a list with all block elements (no empty blocks). One block element is a single list
 21 | #with 3 elements (header, entry, trailer)
 22 | def parse_blocks(myFile):
 23 |         jump_firstBlock(myFile)
 24 |         blocks = []
 25 |         i = 0
 26 |         while myFile:
 27 |                 #Block_Header details
 28 |                 tmpBlockHeaderNo = myFile.read(4).encode("hex")
 29 |                 tmpBlockNrWrittenBytes = myFile.read(2).encode("hex")
 30 |                 tmpBlockOffsetOfLRGStart = myFile.read(2).encode("hex")
 31 |                 tmpBlockNrActiveCheckpoints = myFile.read(4).encode("hex")
 32 |                 tmpBlockHdrSize = myFile.read(2).encode("hex")
 33 |                 #Block_Entry details
 34 |                 tmpBlockEntry = myFile.read(494).encode("hex")
 35 |                 #Block_Trailer details
 36 |                 tmpBlockTrailer = myFile.read(4).encode("hex")
 37 |                 if tmpBlockOffsetOfLRGStart == "0000" and tmpBlockHeaderNo != "00000000" and tmpBlockHeaderNo != "0000": continue
 38 |                 blockHeader = [tmpBlockHeaderNo, tmpBlockNrWrittenBytes, tmpBlockOffsetOfLRGStart, tmpBlockNrActiveCheckpoints, tmpBlockHdrSize]
 39 |                 blocks.insert(i, [blockHeader, tmpBlockEntry, tmpBlockTrailer])
 40 |                 i += 1
 41 |                 if tmpBlockHeaderNo == "00000000": break
 42 |         return blocks        
 43 | 
 44 | #returns the block header number of a given block.
 45 | def get_blockHeaderNo(block):
 46 |         return block[0][0]
 47 | 
 48 | #returns the number of written bytes of this block.
 49 | def get_blockNrWrittenBytes(block):
 50 |         return block[0][1]
 51 | 
 52 | #returns the offset of the entry block start.
 53 | def get_blockOffsetOfLRGStart(block):
 54 |         return block[0][2]
 55 | 
 56 | #returns the active checkpoint of this block.
 57 | def get_blockActiveCheckpoints(block):
 58 |         return block[0][3]
 59 | 
 60 | #returns the hdr size of this block
 61 | def get_blockHdrSize(block):
 62 |         return block[0][4]
 63 | 
 64 | #returns the complete block header as a string. MAYBE USELESS
 65 | def get_BlockHeaderAsString(block):
 66 |         return get_blockHeaderNo(block)+get_blockNrWrittenBytes(block)+get_blockOffsetOfLRGStart(block)+get_blockActiveCheckpoints(block)+get_blockHdrSize(block)
 67 | 
 68 | #returns the block entry type from the entry start (log entry type) to the end. Needs the offset of the log entry type and a single block.
 69 | def get_logEntryReconstruction(offset, blockEntry):
 70 |         tmpBlock = get_BlockHeaderAsString(blockEntry)+blockEntry[1]
 71 |         return tmpBlock[int(offset, 16)*2:]
 72 | 
 73 | #references a given block entry (start from log entry type) with the header number
 74 | def get_mlog_undo_insert_Entry(block):
 75 |         reference = get_logEntryReconstruction(get_blockOffsetOfLRGStart(block), block)
 76 |         if reference.startswith("94") or reference.startswith("14"): 
 77 |                 return [get_blockHeaderNo(block), reference]
 78 | 
 79 | #
 80 | def set_mlog_undo_insert_list(blocks):
 81 |         mlog_undo_list = []
 82 |         i = 0
 83 |         for block in blocks:
 84 |                 if get_mlog_undo_insert_Entry(block) is not None:
 85 |                         mlog_undo_list.insert(i, get_mlog_undo_insert_Entry(block))
 86 |                         i += 1
 87 |         return mlog_undo_list
 88 | 
 89 | #parses the block entry from the first entry to the end i.e. from 0x94
 90 | def parse_mlog_undo_insert_entry(mlog_undo_list):
 91 |         detailled_mlog_undo_insert_list = []
 92 |         for element in mlog_undo_list:
 93 |                 tmpString = element[1]
 94 |                 #                                       BlockNo,    Log Entry Type,TableSpace ID,  Page ID      ,  Length of Entry, Data Manip Type,  Table ID        , Rest               
 95 |                 detailled_mlog_undo_insert_list.append([element[0], tmpString[:2], tmpString[2:6], tmpString[6:8], tmpString[8:12],
 96 |                                                         tmpString[12:14], tmpString[14:18], tmpString[18:]])
 97 |         return detailled_mlog_undo_insert_list
 98 | 
 99 | 
100 | def get_logEntryType(detailled_mlog_undo_insert_list_element):
101 |         return detailled_mlog_undo_insert_list_element[1]
102 | 
103 | def get_tableSpaceID(detailled_mlog_undo_insert_list_element):
104 |         return detailled_mlog_undo_insert_list_element[2]
105 | 
106 | def get_pageID(detailled_mlog_undo_insert_list_element):
107 |         return detailled_mlog_undo_insert_list_element[3]
108 | 
109 | def get_lengthOfLogEntry(detailled_mlog_undo_insert_list_element):
110 |         return detailled_mlog_undo_insert_list_element[4]
111 | 
112 | def get_dataManipulationType(detailled_mlog_undo_insert_list_element):
113 |         return detailled_mlog_undo_insert_list_element[5]
114 | 
115 | def get_tableID(detailled_mlog_undo_insert_list_element):
116 |         return detailled_mlog_undo_insert_list_element[6]
117 | 
118 | def get_rest(detailled_mlog_undo_insert_list_element):
119 |         return detailled_mlog_undo_insert_list_element[7]
120 | 
121 | def set_insertStatementList(detailled_mlog_undo_insert_list):
122 |         insertList = []
123 |         for element in detailled_mlog_undo_insert_list:
124 |                 if get_dataManipulationType(element) == "0b": insertList.append(element)
125 |         return insertList
126 | 
127 | def set_updateStatementList(detailled_mlog_undo_insert_list):
128 |         updateList = []
129 |         for element in detailled_mlog_undo_insert_list:
130 |                 if get_dataManipulationType(element) == "1c": updateList.append(element)
131 |         return updateList
132 | 
133 | def set_deleteStatementList(detailled_mlog_undo_insert_list):
134 |         deleteList = []
135 |         for element in detailled_mlog_undo_insert_list:
136 |                 if get_dataManipulationType(element) == "0e": deleteList.append(element)
137 |         return deleteList
138 | 
139 | def parse_detailled_update_information(updateList):
140 |         splitList = []
141 |         printList = []
142 |         tableIDs = []
143 |         for element in updateList:
144 |                 tmpList = []
145 |                 hl = []
146 |                 tmp = []
147 |                 tmpList.append(element[0])                                                              #BlockNo
148 |                 tmpList.append(element[1])                                                              #Log Entry Type
149 |                 tmpList.append(element[2])                                                              #Tablespace ID
150 |                 tmpList.append(element[3])                                                              #Page ID
151 |                 tmpList.append(element[4])                                                              #Length of the Log Entry
152 |                 tmpList.append(element[5])                                                              #Data Manipulation Type
153 |                 tmpList.append(element[6])                                                              #Table ID
154 |                 tableIDs.append(element[6])
155 |                 hl = parse_transIDAndRBPFields(element[7],0)                                            #last transaction id and rollback pointer
156 |                 tmpList += hl[0]
157 |                 pointer = hl[1] 
158 |                 pointer += 2                                                                            #2 Byte unbekannt
159 |                 hl = parse_pkInformation(element[7], pointer)
160 |                 tmpList += hl[0]
161 |                 pkLength = int(hl[0][0],16)*2
162 |                 pointer = hl[1]
163 |                 hl = parse_numberOfUpdatesFields(element[7], pointer)
164 |                 if hl is None: continue
165 |                 tmpList += hl[0]
166 |                 pointer = hl[1]
167 |                 tmp += hl[0]
168 |                 hl = parse_newUpdateValue(element[7], element[2], pkLength)
169 |                 tmpList += hl[0]
170 |                 pointer = hl[1]
171 |                 tmp += hl[2]
172 |                 printList.append(tmp)
173 |                 splitList.append(tmpList)
174 |         return splitList, printList, tableIDs
175 |                 
176 | 
177 | def parse_newUpdateValue(element, tablespaceID, pkLength):
178 |         tmp = []
179 |         hl = []
180 |         printList = []
181 |         pointer = 0
182 |         newStmt = element[element.find("26"+tablespaceID):]
183 |         tmp.append(newStmt[:pointer+2])                  # Log Entry Type
184 |         pointer += 2
185 |         tmp.append(newStmt[pointer:pointer+4])           # Tablespace ID
186 |         pointer += 4
187 |         tmp.append(newStmt[pointer:pointer+2])           # Page ID
188 |         pageID = newStmt[pointer:pointer+2]
189 |         pointer += 2
190 |         hl = parse_mlog_comp_rec_insert(newStmt, pointer, tablespaceID, pageID, pkLength)
191 |         tmp += hl[0]
192 |         pointer = hl[1]
193 |         printList = hl[3]
194 |         return tmp, pointer, printList
195 | 
196 | 
197 | def print_updates(printList, tableIDs, tableRef):
198 |         updatedFields = 0
199 |         colsOfStmt = 0
200 |         start = 4
201 |         for i, update in enumerate(printList):
202 |                 updatedFields = int(update[0],16)
203 |                 colsOfStmt = update[4]*updatedFields
204 |                 start *= updatedFields
205 |                 if tableIDs[i] in tableRef: print "UPDATE ", tableRef[tableIDs[i]]
206 |                 else: print "UPDATE <unknown>"
207 |                 print "SET column",int(update[1],16)-1,"=",update[(start+int(update[1],16)-1)]
208 |                 print "WHERE column",int(update[1],16)-1,"=",update[3],");"
209 |                 print ""
210 |            
211 | def parse_numberOfUpdatesFields(element, pointer):
212 |         tmp = []
213 |         tmp.append(element[pointer:pointer+2])                                  #Number of updated fields
214 |         pointer += 2
215 |         if element[pointer-2:pointer] == "": return
216 |         for field in range(int(element[pointer-2:pointer], 16)):
217 |                 tmp.append(element[pointer:pointer+2])                          #ID of the updated field
218 |                 pointer += 2
219 |                 length = int(element[pointer:pointer+2], 16)
220 |                 tmp.append(length)                                              #Length of updated field
221 |                 pointer += 2
222 |                 tmp.append(read_hexdump(element[pointer:pointer+length*2]))     #Value of the updated field
223 |                 pointer += length*2
224 |         return tmp, pointer
225 |         
226 | 
227 | 
228 | #returns a list with the all insert statement splitted into the detailled information.
229 | #hier muss ich mir noch was fuer die false positives ueberlegen. Eventuell einfach extra wo hinspeichern
230 | def parse_detailled_insert_information(insertList):
231 |         splitList = []
232 |         printList = []
233 |         tableIDs = []
234 |         for element in insertList:
235 |                 tmpList = []
236 |                 metaList = []
237 |                 hl = []
238 |                 #Metadata
239 |                 if (element[0])[:2] != "80": continue
240 |                 metaList.append(element[0])                                             #BlockNo
241 |                 metaList.append(element[1])                                             #Log Entry Type
242 |                 metaList.append(element[2])                                             #Tablespace ID
243 |                 metaList.append(element[3])                                             #Page ID
244 |                 metaList.append(element[4])                                             #Length of the Log Entry
245 |                 metaList.append(element[5])                                             #Data Manipulation Type
246 |                 metaList.append(element[6])                                             #Table ID
247 |                 tableIDs.append(element[6])
248 |                 #Specific data
249 |                 hl = parse_insert_mlog_undo_insert(element[7], 0, element[2], element[3])
250 |                 if hl is not None:
251 |                         splitList.append([metaList,hl[0]])
252 |                         printList.append(hl[3])
253 |         return splitList, printList, tableIDs
254 | 
255 | #Pareses mlog_undo_insert Statements
256 | def parse_insert_mlog_undo_insert(element, pointer, tablespaceID, pageID):
257 |                 metaList = []
258 |                 printList = []
259 |                 hl = parse_pkInformation(element, pointer)
260 |                 tmpList = hl[0]
261 |                 pkLength = int(hl[0][0],16)*2
262 |                 pointer = hl[1]
263 |                 pointer += 2                                    #Unbekannt 1Byte
264 |                 hl = parse_varTablespaceID(element, tablespaceID, pointer)
265 |                 tmpList += hl[0]
266 |                 pointer = hl[1]
267 |                 pointer += 2                                    #Unbekannt 1Byte
268 |                 hl = parse_mlog_comp_rec_insert(element, pointer, tablespaceID, pageID, pkLength)
269 |                 if hl is None: return
270 |                 tmpList += hl[0]
271 |                 pointer = hl[1]
272 |                 metaList += hl[2]
273 |                 printList += hl[3]
274 |                 tl = split_multipleInserts(element[pointer:], tablespaceID, pageID)
275 |                 if tl is not None:
276 |                         for sub in tl:
277 |                                 if sub != "":
278 |                                         hl = parse_insert_mlog_undo_insert(sub, 10, tablespaceID, pageID)
279 |                                         tmpList += hl[0]
280 |                                         pointer = hl[1]
281 |                                         metaList += hl[2]
282 |                                         printList += hl[3]
283 |                 return tmpList, pointer, metaList, printList
284 |                 
285 | #Parses mlog_comp_rec_insert Statements                
286 | def parse_mlog_comp_rec_insert(element, pointer, tablespaceID, pageID, pkLength):
287 |                 hl = []
288 |                 tmpList = []
289 |                 parseList = []
290 |                 metaList = []
291 |                 printList = []
292 |                 parseList.append(pkLength)
293 |                 hl = parse_fieldCount(element, pointer)
294 |                 tmpList += hl[0]
295 |                 pointer = hl[1]
296 |                 fieldsInEntry = hl[0][0]                      #2Byte
297 |                 parseList.append(fieldsInEntry)
298 |                 uniqueFields = hl[0][1]                       #Number of unique fields 2Byte
299 |                 parseList.append(uniqueFields)
300 |                 printList.append(fieldsInEntry)
301 |                 hl = parse_uniqueFieldLength(element, pointer, uniqueFields)
302 |                 tmpList += hl[0]
303 |                 parseList += hl[0]
304 |                 pointer = hl[1]
305 |                 hl = parse_transIDAndRBPLength(element, pointer)
306 |                 tmpList += hl[0]
307 |                 pointer = hl[1]
308 |                 metaList.append(hl[0][0])                        #length of the transaction id as metadata to use in delete recovery
309 |                 metaList.append(hl[0][1])                        #length of the data rollback pointer as metadata to use in delete recovery
310 |                 hl = parse_nonUniqueFieldLength(element, pointer, fieldsInEntry-uniqueFields)
311 |                 tmpList += hl[0]
312 |                 pointer = hl[1]
313 |                 parseList += hl[0]
314 |                 hl = parse_offset(element, pointer)
315 |                 tmpList += hl[0]
316 |                 pointer = hl[1]
317 |                 hl = parse_realLengthofData(element, pointer, fieldsInEntry-uniqueFields)
318 |                 tmpList += hl[0]
319 |                 pointer = hl[1]
320 |                 parseList += hl[0]
321 |                 pointer +=10                                    #Unbekannt 5Byte
322 |                 hl = parse_uniqueFields(element, pointer, uniqueFields, pkLength)
323 |                 tmpList += hl[0]
324 |                 pointer = hl[1]
325 |                 printList += hl[0]
326 |                 hl = parse_transIDAndRBPFields(element, pointer)
327 |                 tmpList += hl[0]
328 |                 pointer = hl[1]
329 |                 totalLength = sort_structure(parseList)
330 |                 hl = parse_FieldHexdump(element, totalLength, pointer)
331 |                 if hl is None: return
332 |                 tmpList += hl[0]
333 |                 pointer = hl[1]
334 |                 printList += hl[0]
335 |                 return tmpList, pointer, metaList, printList
336 |         
337 | #Prints all insert statements as SQL-Statement. 
338 | def print_inserts(printList, tableIDs, tables):
339 |         for i, y in enumerate(printList):
340 |                 cols = y[0]
341 |                 start = 1
342 |                 end = cols+1
343 |                 if tableIDs[i] in tables: print "INSERT INTO", tables[tableIDs[i]] ,"VALUES ("
344 |                 else: print "INSERT INTO <unknown> VALUES ("
345 |                 for x in range(len(printList[i])):
346 |                         if len(printList[i][start:end]) > 0: print ", ".join("%s" %s for s in printList[i][start:end]), ","
347 |                         start = end+1
348 |                         end += cols+1
349 |                 print ");"
350 |                 print ""
351 | 
352 | #Parses the hexdump
353 | def parse_FieldHexdump(element, totalLength, pointer):
354 |         if totalLength is None: return
355 |         tmp = []
356 |         for l in totalLength:
357 |                 hexdump = element[pointer:pointer+int(l[1],16)*2]
358 |                 pointer += int(l[1],16)*2
359 |                 if l[0] == "var":
360 |                         v = read_hexdump(hexdump)
361 |                         tmp.append(v)
362 |                 if l[0] == "fix":
363 |                         f = read_int(hexdump)
364 |                         tmp.append(f)
365 |         return tmp, pointer
366 | 
367 | #Parses the unique fields within the entry
368 | def parse_uniqueFields(element, pointer, numberOfUniFields, pkLength):
369 |         tmp = []
370 |         if numberOfUniFields <10:
371 |                 for i in range(numberOfUniFields):
372 |                         value = element[pointer:pointer+pkLength]
373 |                         if not value.startswith("8") and not value.startswith("00"):
374 |                                 pointer += 2
375 |                         tmp.append(read_int(element[pointer:pointer+pkLength]))
376 |                         pointer += pkLength
377 |         return tmp, pointer
378 | 
379 | #Parses the real length of the data of the related fields within the entry
380 | def parse_realLengthofData(element, pointer, fieldsInEntry):
381 |         tmp = []
382 |         if fieldsInEntry < 20: #UGLY AYM!!!!
383 |                 for i in range(fieldsInEntry):
384 |                         tmp.append(element[pointer:pointer+2])
385 |                         pointer += 2
386 |         return tmp, pointer
387 | 
388 | #Parses the offset
389 | def parse_offset(element, pointer):
390 |         tmp = []
391 |         tmp.append(element[pointer:pointer+4])
392 |         pointer += 4
393 |         for field in range(4):
394 |                 tmp.append(element[pointer:pointer+2])
395 |                 pointer += 2
396 |         return tmp, pointer
397 | 
398 | #Parses the transaction ID and the Data Rollback Pointer
399 | def parse_transIDAndRBPFields(element, pointer):
400 |         tmp = []
401 |         tmp.append(element[pointer:pointer+12])
402 |         pointer += 12
403 |         tmp.append(element[pointer:pointer+14])
404 |         pointer += 14
405 |         return tmp, pointer
406 | 
407 | #Parses the length of the transaction ID and the Data Rollback Pointer                
408 | def parse_transIDAndRBPLength(element, pointer):
409 |         tmp = []
410 |         tmp.append(element[pointer:pointer+4])
411 |         pointer += 4
412 |         tmp.append(element[pointer:pointer+4])
413 |         pointer += 4
414 |         return tmp, pointer
415 | 
416 | #Parses the length of the non unique fields
417 | def parse_nonUniqueFieldLength(element, pointer, numberOfFields):
418 |         tmp = []
419 |         if numberOfFields < 10: #UGLY AYM!!!!
420 |                 for i in range(numberOfFields):
421 |                         tmp.append(element[pointer:pointer+4])
422 |                         pointer += 4
423 |         return tmp, pointer
424 | 
425 | #Parses the length of the unique fields
426 | def parse_uniqueFieldLength(element, pointer, numberOfUniqueFields):
427 |         tmp = []
428 |         if numberOfUniqueFields < 10: #UGLY AYM!!!!
429 |                 for i in range(numberOfUniqueFields):
430 |                         tmp.append(element[pointer:pointer+4])
431 |                         pointer += 4
432 |         return tmp, pointer
433 |                         
434 | #Parses the number of all data fields in this entry
435 | def parse_fieldCount(element, pointer):
436 |         tmp = []
437 |         tmp.append(int(element[pointer:pointer+4],16)-2)
438 |         pointer += 4
439 |         tmp.append(int(element[pointer:pointer+4],16))
440 |         pointer += 4
441 |         return tmp, pointer
442 | 
443 | #Parses the information of the primary key        
444 | def parse_pkInformation(element, pointer):
445 |         tmp = []
446 |         tmp.append(element[pointer:pointer+2]) # Length of the primary key field
447 |         pkLength = int(element[pointer:pointer+2], 16)*2
448 |         pointer += 2
449 |         tmp.append(element[pointer:pointer+pkLength]) #primary key of the inserted field
450 |         pointer += pkLength
451 |         return tmp, pointer
452 | 
453 | #Parses the variable tablespace ID
454 | def parse_varTablespaceID(element, tablespaceID, pointer):
455 |         tmp = []
456 |         if tablespaceID == element[pointer:pointer+4]:
457 |                 tmp.append(element[pointer:pointer+4])
458 |                 pointer +=4
459 |         else:
460 |                 tmp.append(element[pointer:pointer+2])
461 |                 pointer +=2
462 |         return tmp, pointer
463 | 
464 | #Splits the block into each insert entry if more than one entry exisits        
465 | def split_multipleInserts(values, tableID, pageID):
466 |         if values.startswith("94"+tableID+pageID):
467 |                 return values.split("94"+tableID+pageID)
468 |         if values.startswith("14"+tableID+pageID):
469 |                 return values.split("14"+tableID+pageID)
470 | 
471 |         
472 | #Reads the int value of the given hexdump
473 | def read_int(hexdump):
474 |         return int(hexdump[2:], 16)
475 | 
476 | #Reads the char of the fiven hexdump
477 | def read_hexdump(hexdump):
478 |         c = ""
479 |         for i in range(0, len(hexdump), 2):
480 |                 c += chr(int(hexdump[i:i+2],16))
481 |         return c
482 | 
483 | #Sorts the structure of a fiven list to get a better output
484 | def sort_structure(parseList):
485 |         pks = []
486 |         dfs = []
487 |         rls = []
488 |         sorts = []
489 |         pkLength = parseList[0]
490 |         nonUnique = parseList[1]-parseList[2]
491 |         pkCount = parseList[2]
492 |         if len(parseList) <= 3: return
493 |         for i in range(pkCount):
494 |                 pks.append(parseList[i+3])
495 |         for i in range(nonUnique):
496 |                 dfs.append(parseList[i+3+pkCount])
497 |                 if not parseList[i+3+pkCount].endswith("0"): nonUnique -= 1
498 |         for i in range(nonUnique):
499 |                 rls.append(parseList[i+3+pkCount+len(dfs)])
500 |         i = 0
501 |         for element in dfs:
502 |                 tl = []
503 |                 if element.endswith("0"):
504 |                         tl.append("var")
505 |                         tl.append(rls[len(rls)-i-1])
506 |                         sorts.append(tl)
507 |                         i += 1
508 |                 else:
509 |                         tl.append("fix")
510 |                         tl.append(element[len(element)-2:])
511 |                         sorts.append(tl)
512 |         return sorts
513 | 
514 | #returns a list with the all insert statement splitted into the detailled information. 
515 | def parse_detailled_delete_information(deleteList, lengthOfTransactionID, lengthOfRollbackPointer):
516 |         splitList = []
517 |         lengthOfTransactionID *= 2
518 |         lengthOfRollbackPointer *= 2
519 |         rbpEnd = lengthOfTransactionID+lengthOfRollbackPointer+2
520 |         for element in deleteList:
521 |                 tmpList = []
522 |                 tmpList.append(element[0])                                                              #BlockNo
523 |                 tmpList.append(element[1])                                                              #Log Entry Type
524 |                 tmpList.append(element[2])                                                              #Tablespace ID
525 |                 tmpList.append(element[3])                                                              #Page ID
526 |                 tmpList.append(element[4])                                                              #Length of the Log Entry
527 |                 tmpList.append(element[5])                                                              #Data Manipulation Type
528 |                 tmpList.append(element[6])                                                              #Table ID
529 |                 tmpList.append((element[7])[:lengthOfTransactionID])                                    #Last Transaction ID
530 |                 tmpList.append((element[7])[lengthOfTransactionID:lengthOfTransactionID+2])             #Unbekannt
531 |                 tmpList.append((element[7])[lengthOfTransactionID+2:rbpEnd])                            #Last data rollback pointer
532 |                 tmpList.append((element[7])[rbpEnd:rbpEnd+2])                                           #Length of the primary key
533 |                 pkLength = int(((element[7])[rbpEnd:rbpEnd+2]),16)*2
534 |                 tmpList.append((element[7])[rbpEnd+2:(rbpEnd+2+pkLength)])                              #Affected primary key
535 |                 tmpList.append((element[7])[(rbpEnd+8+pkLength):(rbpEnd+10+pkLength)])                  #Length of the primary key field
536 |                 pkFieldLength = int(((element[7])[(rbpEnd+8+pkLength):(rbpEnd+10+pkLength)]),16)*2
537 |                 tmpList.append((element[7])[(rbpEnd+10+pkLength):((rbpEnd+10+pkLength)+pkFieldLength)]) #primary key of deleted field
538 |                 splitList.append(tmpList)
539 |         return splitList
540 | 
541 | #Prints the delete list
542 | def print_deletes(deleteList, tableRef):
543 |         for delete in deleteList:
544 |                 if not delete[0].startswith("80"): continue
545 |                 if delete[6] in tableRef: print "DELETE FROM", tableRef[delete[6]]
546 |                 else: print "DELETE FROM <unknown>"
547 |                 print "WHERE primaryKey =",read_int(delete[len(delete)-1]),";"
548 |                 print ""
549 | 
550 | #Gets the length of the Transaction ID
551 | def get_LengthOfTransactionID(insertDataList):
552 |         return (insertDataList[0][7])[3:]
553 | 
554 | #Get the length of the Data Rollback Pointer
555 | def get_LengthOfRollbackPointer(insertDataList):
556 |         return (insertDataList[0][8])[3:]
557 | 
558 | def print_statistics(inserts, updates, deletes):
559 |         print "---- Overview ----"
560 |         print ""
561 |         print "Type\t| Block No"
562 |         print "----\t| --------"
563 |         print "Insert\t|"
564 |         x = 0
565 |         y = 0
566 |         z = 0
567 |         for insert in inserts:
568 |                 if not insert[0][0].startswith("80"): continue
569 |                 x += 1
570 |                 print "\t| ", insert[0][0]
571 |         print "----\t| --------"
572 |         print "Update\t|" 
573 |         #for update in updates:
574 |         #        if not update[0].startswith("80"): continue
575 |         #        y += 1
576 |         #        print "\t| ", update[0]
577 |         print "OUT OF ORDER :)"
578 |         print ""
579 |         print "----\t| --------"
580 |         print "Delete\t|"
581 |         for delete in deletes: 
582 |                 if not delete[0].startswith("80"): continue
583 |                 z += 1
584 |                 print "\t| ", delete[0]
585 |         print "----\t| --------"
586 |         print ""
587 |         print "-- Total number of Insert-Statements found: ", x
588 |         print "-- Total number of Update-Statements found: ", y
589 |         print "-- Total number of Delete-Statements found: ", z
590 |         print ""
591 |                 
592 | #opens the ib_logfile                
593 | def read_ib_logfile(ib_logfile, dbName, ibdata, numberOfColumns):
594 |         inserts = []
595 |         updates = []
596 |         deletes = []
597 |         try:
598 |                 with open(ib_logfile, "rb") as f:
599 |                         print "Start of analyses:", time.strftime("%d.%m.%Y - %H:%M:%S")
600 |                         print "Results of analysing the", dbName, "database: "
601 |                         print ""
602 |                         detailledList = parse_mlog_undo_insert_entry(set_mlog_undo_insert_list(parse_blocks(f)))
603 |                         print ""
604 |                         print "---- INSERTS ----"
605 |                         hl = parse_detailled_insert_information(set_insertStatementList(detailledList))
606 |                         inserts = hl[0]
607 |                         tableRef = get_tableName(read_ibdata(ibdata, inserts, dbName))
608 |                         print_inserts(hl[1], hl[2], tableRef) 
609 |                         print ""
610 |                         print "---- UPDATES ----"
611 |                         #hl = parse_detailled_update_information(set_updateStatementList(detailledList))
612 |                         #if hl is not None: print_updates(hl[1], hl[2], tableRef)
613 |                         print "OUT OF ORDER :)"
614 |                         print ""
615 |                         print "---- DELETES ----"
616 |                         deletes = hl[0]
617 |                         hl = parse_detailled_delete_information(set_deleteStatementList(detailledList), 6,7) #No specific "printList" like in insert or update parser
618 |                         if hl is not None: print_deletes(hl, tableRef)
619 |                         print_statistics(inserts, deletes, hl)
620 |                         print "End of analyses: ", time.strftime("%d.%m.%Y - %H:%M:%S")
621 |                         f.close()
622 |         except IOError:
623 |                 print "----- ERROR -----"
624 |                 print "ib_logfile not found!"
625 |                 print "----- ERROR -----"
626 | 
627 | #Reads the given ibdata
628 | def read_ibdata(myFile, splitList, dbName):
629 |         tableIDs = []
630 |         tmp = []
631 |         dbName = dbName.encode("hex") 
632 |         for element in splitList:
633 |                 tableIDs.append(get_tableID(element[0]))
634 |         try:
635 |                 with open(myFile, "rb") as f:
636 |                         tmp = split_ibdata(f, tableIDs, dbName)
637 |                 f.close()
638 |         except IOError:
639 |                 print "----- ERROR -----"
640 |                 print "ibdata not found!"
641 |                 print "----- ERROR -----"
642 |         return tmp
643 | 
644 | #Splits the ibdata file to get the tableID - tablename faster
645 | def split_ibdata(myFile, tableIDs, dbName):
646 |         tmp = []
647 |         pointer = 0
648 |         while myFile:
649 |                 length = len(dbName)/2
650 |                 offset = myFile.read(2).encode("hex")
651 |                 if offset in tableIDs:
652 |                         dbHex = myFile.read(length).encode("hex")
653 |                         if dbName in dbHex:
654 |                                 tmpName = myFile.read(50).encode("hex")
655 |                                 myFile.seek(-50,1)
656 |                                 tmp.append([offset, tmpName])
657 |                         myFile.seek(-length,1)
658 |                 if not offset: break
659 |         return tmp
660 | 
661 | #Gets the name of the table
662 | def get_tableName(ibdataList):
663 |         tableRef = {}
664 |         for element in ibdataList:
665 |                 c = ""
666 |                 for i in range(0, len(element[1]), 2):
667 |                         if (int("30",16) <= int((element[1])[i:i+2],16) <= int("39",16)) or (int("41",16) <= int((element[1])[i:i+2],16) <= int("5A",16)) or (int("61",16) <= int((element[1])[i:i+2],16) <= int("7A",16)) or (int("2f",16) == int((element[1])[i:i+2],16)):
668 |                                 c += chr(int((element[1])[i:i+2],16)) 
669 |                         else:
670 |                                 c += "--"
671 |                 if c[c.find("/")+1:c.find("--")] != "": tableRef[element[0]] = c[c.find("/")+1:c.find("--")]
672 |         return tableRef
673 | 
674 | #Parses the arguments from the command line
675 | def main():
676 |         parser = argparse.ArgumentParser(description="This script interprets the given ib_logfile (ib_logfile0 or ob_logfile_1) and the ibdata1 of a database to reconstruct the used insert, update and delete statements.",
677 |                                          epilog="And that's how you should do it ;)")
678 |         parser.add_argument("-l", default="/var/lib/mysql/ib_logfile0", help="The ib_logile0 file, i.e. /var/lib/mysq/ib_logfile0")
679 |         parser.add_argument("DB", help="The name of the database")
680 |         parser.add_argument("-i", default="/var/lib/mysql/ibdata1", help="The ibdata1 file, i.e. /var/lib/mysql/ibdata1")
681 |         parser.add_argument("-f", default=20, help="The number of max. columns within the tables. This is needed for performance. Default value is 20.")
682 |         args = parser.parse_args()
683 |         read_ib_logfile(args.l, args.DB, args.i, args.f)
684 | 
685 | if __name__ == "__main__":
686 |    main()
687 | 
688 | 


--------------------------------------------------------------------------------