├── ibd2sql ├── checksum.py ├── super_fast_count.py ├── utils │ ├── __init__.py │ ├── charset │ │ ├── __init__.py │ │ ├── swe7.py │ │ ├── dec8.py │ │ ├── geostd8.py │ │ ├── armscii8.py │ │ ├── hp8.py │ │ ├── keybcs2.py │ │ └── tis620.py │ ├── test_time.py │ ├── check_table_old.py │ ├── keyring_file.py │ ├── crc32c.py │ ├── lz4.py │ ├── mysql_json2.py │ ├── aes.py │ ├── mysql_json.py │ └── b2data.py ├── __init__.py ├── modify_lower_case_table_names.py ├── innodb_page │ ├── __init__.py │ ├── lob.py │ ├── xdes.py │ ├── inode.py │ ├── column_type.py │ ├── subpartition.py │ ├── compressed_row.py │ ├── sdi.py │ ├── fsp.py │ ├── page.py │ ├── index_compressed.py │ └── index.py ├── check_block.py ├── web.py └── ibd2sql.py ├── docs ├── innodb_index_page.md ├── CHANGELOG.md └── USAGE.md ├── .github └── ISSUE_TEMPLATE │ └── bug_report.md ├── README_zh.md ├── README.md ├── tests ├── test.sh └── gen_data.py └── main.py /ibd2sql/checksum.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ibd2sql/super_fast_count.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ibd2sql/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ibd2sql/utils/charset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ibd2sql/__init__.py: -------------------------------------------------------------------------------- 1 | PAGE_SIZE = 16384 2 | -------------------------------------------------------------------------------- /ibd2sql/modify_lower_case_table_names.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ibd2sql/innodb_page/__init__.py: -------------------------------------------------------------------------------- 1 | import struct 2 | -------------------------------------------------------------------------------- /docs/innodb_index_page.md: -------------------------------------------------------------------------------- 1 | 本文将简单描述下ibd文件的结构, 主要是索引页的结构. 2 | 3 | ~~本文较长, 所以写起来很慢, 所以就鸽了.~~ 大部分内容之前都以博客的形式讲过, 后面再汇总下吧. 4 | 5 | 6 | 7 | 博客地址: 8 | 9 | https://cloud.tencent.com/developer/user/1130242 10 | 11 | https://www.modb.pro/u/17942 -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Environment** 11 | python version: 12 | ibd2sql version: 13 | OS version: 14 | 15 | 16 | **Describe the bug** 17 | A clear and concise description of what the bug is. 18 | 19 | **To Reproduce** 20 | 21 | **Other** 22 | -------------------------------------------------------------------------------- /ibd2sql/check_block.py: -------------------------------------------------------------------------------- 1 | import os 2 | from ibd2sql.utils.crc32c import * 3 | def check_block(filename,PAGE_SIZE): 4 | HAVE_BAD_BLOCK = False 5 | with open(filename,'rb') as f: 6 | for pageid in range(os.path.getsize(filename)//PAGE_SIZE): 7 | if not CHECK_PAGE(f.read(PAGE_SIZE)): 8 | HAVE_BAD_BLOCK = True 9 | print('BAD PAGE NO:',pageid) 10 | if not HAVE_BAD_BLOCK: 11 | print('NO BAD PAGE.') 12 | -------------------------------------------------------------------------------- /ibd2sql/utils/test_time.py: -------------------------------------------------------------------------------- 1 | from b2data import * 2 | import sys 3 | def test_time(): 4 | data = [ 5 | [(b'\x80\xc8\xb8\x00\x00\x00',5), '12:34:56.00000'], 6 | [(b'\x7f7H\x00\x00\x00',5),'-12:34:56.00000'], 7 | [(b'\x80\xc8\xb8\x01\xe0x',5),'12:34:56.12300'], 8 | [(b'\x80\xc8\xb8\x0fB6',5),'12:34:56.99999'], 9 | [(b'\x7f7G\xfe\x1f\x88',5),'-12:34:56.12300'], 10 | [(b'\x7f7G\xf0\xbd\xca',5),'-12:34:56.99999'] 11 | ] 12 | for x in data: 13 | if B2TIME(*x[0]) == repr(x[1]): 14 | print('PASS') 15 | else: 16 | sys.stdout.write('FAILD'+B2TIME(*x[0])+x[1]+'\n') 17 | test_time() 18 | -------------------------------------------------------------------------------- /ibd2sql/utils/check_table_old.py: -------------------------------------------------------------------------------- 1 | import struct 2 | UT_HASH_RANDOM_MASK = 1463735687 3 | UT_HASH_RANDOM_MASK2 = 1653893711 4 | FIL_PAGE_OFFSET = 4 5 | FIL_PAGE_FILE_FLUSH_LSN = 26 6 | FIL_PAGE_DATA = 38 7 | FIL_PAGE_END_LSN_OLD_CHKSUM = 8 8 | def ut_fold_ulint_pair(n1,n2): 9 | return(((((n1 ^ n2 ^ UT_HASH_RANDOM_MASK2) << 8) + n1) ^ UT_HASH_RANDOM_MASK) + n2) 10 | 11 | def ut_fold_binary(data): 12 | fold = 0 13 | for i in range(len(data)): 14 | fold = ut_fold_ulint_pair(fold, data[i]) 15 | return fold 16 | 17 | def calc_page_new_checksum(data): 18 | checksum = ut_fold_binary(data[4:26])+ut_fold_binary(data[38:-8]) 19 | return checksum&0xFFFFFFFF 20 | 21 | def CHECK_PAGE_OLD(data): 22 | if data[:4] == b'\x00\x00\x00\x00': 23 | return True 24 | else: 25 | return struct.unpack('>L',data[:4])[0] == calc_page_new_checksum(data) 26 | -------------------------------------------------------------------------------- /ibd2sql/innodb_page/lob.py: -------------------------------------------------------------------------------- 1 | import struct 2 | 3 | def FIRST_BLOB(pg,pageno): 4 | """ 5 | INPUT: 6 | pg: page reader 7 | pageno: page number 8 | RETURN: 9 | binary data of blob 10 | """ 11 | firstpagno = pageno 12 | data = pg.read(firstpagno) 13 | entry = data[96:96+60] 14 | rdata = b'' 15 | while True: 16 | if len(entry) < 12: 17 | break 18 | pageno,datalen,lobversion = struct.unpack('>3L',entry[-12:]) 19 | datalen = datalen>>16 20 | if pageno == 0: 21 | break 22 | elif pageno == firstpagno: 23 | rdata += data[696:696+datalen] 24 | else: 25 | rdata += pg.read(pageno)[49:49+datalen] 26 | next_entry_pageno,next_entry_offset = struct.unpack('>LH',entry[6:12]) 27 | if next_entry_pageno >0 and next_entry_pageno < 4294967295: 28 | entry = pg.read(next_entry_pageno)[next_entry_offset:next_entry_offset+60] 29 | else: 30 | break 31 | return rdata 32 | -------------------------------------------------------------------------------- /ibd2sql/utils/keyring_file.py: -------------------------------------------------------------------------------- 1 | import struct 2 | def READ_KEYRING(data): 3 | offset = 24 4 | kd = {} 5 | xor_str = '*305=Ljt0*!@$Hnm(*-9-w;:'.encode() 6 | while True: 7 | if data[offset:offset+3] == b'EOF': 8 | break 9 | total_length, key_id_length, key_type_length, user_id_length, key_length = struct.unpack_from('Q',data[:8]), 23 | 'XDES_FLST_NODE':struct.unpack('>LHLH',data[8:20]), 24 | 'XDES_STATE':struct.unpack('>L',data[20:24]), 25 | 'XDES_BITMAP':data[24:] 26 | }) 27 | 28 | -------------------------------------------------------------------------------- /ibd2sql/utils/crc32c.py: -------------------------------------------------------------------------------- 1 | import struct 2 | def create_crc32c_table(): 3 | poly = 0x82f63b78 4 | table = [] 5 | for i in range(256): 6 | crc = i 7 | for _ in range(8): 8 | if crc & 1: 9 | crc = (crc >> 1) ^ poly 10 | else: 11 | crc >>= 1 12 | table.append(crc) 13 | return table 14 | crc32_slice_table = create_crc32c_table() 15 | def calculate_crc32c(data): 16 | crc = 0xFFFFFFFF 17 | for byte in data: 18 | crc = crc32_slice_table[(crc ^ byte) & 0xFF] ^ (crc >> 8) 19 | return crc ^ 0xFFFFFFFF 20 | 21 | 22 | def CHECK_PAGE(data): 23 | if data[:4] == b'\x00\x00\x00\x00' and data[26:28] == b'\x00\x00': 24 | return True 25 | checksum_field1 = struct.unpack('>L',data[:4])[0] 26 | checksum_field2 = struct.unpack('>L',data[-8:-4])[0] 27 | c1 = calculate_crc32c(data[4:26]) 28 | c2 = calculate_crc32c(data[38:-8]) 29 | return True if checksum_field1 == checksum_field2 == (c1^c2)&(2**32-1) else False 30 | 31 | def REPACK_PAGE(data): 32 | c1 = calculate_crc32c(data[4:26]) 33 | c2 = calculate_crc32c(data[38:-8]) 34 | c3 = struct.pack('>L',(c1^c2)&(2**32-1)) 35 | return c3 + data[4:-8] + c3 + data[-4:] 36 | -------------------------------------------------------------------------------- /ibd2sql/innodb_page/inode.py: -------------------------------------------------------------------------------- 1 | import struct 2 | 3 | class INODE(object): 4 | """ 5 | INPUT: 6 | pg: page reader 7 | inode no: inode number, default 2 8 | RETURN: 9 | INODE LIST 10 | """ 11 | def __init__(self,pg,inodeno=2): 12 | self.pg = pg 13 | self.inodeno = inodeno 14 | self.seg = [] 15 | self.init() 16 | 17 | def _segment(self,data): 18 | return { 19 | "FSEG_ID":struct.unpack('>Q',data[:8])[0], 20 | "FSEG_NOT_FULL_N_USED":struct.unpack('>L',data[8:12])[0], 21 | "FSEG_FREE":struct.unpack('>LLHLH',data[12:28]), # FLST_BASE_NODE_SIZE 22 | "FSEG_NOT_FULL":struct.unpack('>LLHLH',data[28:44]), # len=4 first: page=4 offset=2 last: page=4 offset=2 23 | "FSEG_FULL":struct.unpack('>LLHLH',data[44:60]), 24 | "FSEG_MAGIC":struct.unpack('>L',data[60:64])[0], 25 | "FSEG_FRAG_ARR":struct.unpack('>32L',data[64:192]) 26 | } 27 | 28 | def init(self): 29 | nextpageno = self.inodeno 30 | while True: 31 | data = self.pg.read(nextpageno) 32 | INODE_PRE = struct.unpack('>LH',data[38:44]) 33 | INODE_NEXT = struct.unpack('>LH',data[44:50]) 34 | nextpageno = INODE_NEXT[0] 35 | offset = 50 36 | for _ in range(self.pg.PAGE_SIZE//192//2): 37 | seg1 = self._segment(data[offset:offset+192]) # root node 38 | offset += 192 39 | seg2 = self._segment(data[offset:offset+192]) # first leaf node 40 | offset += 192 41 | if seg1['FSEG_MAGIC'] == 97937874: 42 | self.seg.append([seg1,seg2]) 43 | else: 44 | break 45 | if nextpageno == 4294967295 or nextpageno == 0: 46 | break 47 | -------------------------------------------------------------------------------- /ibd2sql/innodb_page/column_type.py: -------------------------------------------------------------------------------- 1 | COLUMN_TYPE = { 2 | 1:{'name':'DECIMAL','is_var':False,'name2':'decimal'}, 3 | 2:{'name':'TINY','is_var':False,'name2':'tinyint'}, 4 | 3:{'name':'SHORT','is_var':False,'name2':'smallint'}, 5 | 4:{'name':'LONG','is_var':False,'name2':'int'}, 6 | 5:{'name':'FLOAT','is_var':False,'name2':'float'}, 7 | 6:{'name':'DOUBLE','is_var':False,'name2':'double'}, 8 | 7:{'name':'TYPE_NULL','is_var':False,'name2':'unknown'}, 9 | 8:{'name':'TIMESTAMP','is_var':False,'name2':'timestamp'}, 10 | 9:{'name':'LONGLONG','is_var':False,'name2':'bigint'}, 11 | 10:{'name':'INT24','is_var':False,'name2':'mediumint'}, 12 | 11:{'name':'DATE','is_var':False,'name2':'date'}, 13 | 12:{'name':'TIME','is_var':False,'name2':'time'}, 14 | 13:{'name':'DATETIME','is_var':False,'name2':'datetime'}, 15 | 14:{'name':'YEAR','is_var':False,'name2':'year'}, 16 | 15:{'name':'NEWDATE','is_var':False,'name2':'date'}, 17 | 16:{'name':'VARCHAR','is_var':True,'name2':'varchar'}, 18 | 17:{'name':'BIT','is_var':False,'name2':'bit'}, 19 | 18:{'name':'TIMESTAMP2','is_var':False,'name2':'timestamp'}, 20 | 19:{'name':'DATETIME2','is_var':False,'name2':'datetime'}, 21 | 20:{'name':'TIME2','is_var':False,'name2':'time'}, 22 | 21:{'name':'NEWDECIMAL','is_var':False,'name2':'decimal'}, 23 | 22:{'name':'ENUM','is_var':False,'name2':'enum'}, 24 | 23:{'name':'SET','is_var':False,'name2':'set'}, 25 | 24:{'name':'TINY_BLOB','is_var':True,'name2':'tinytext'}, 26 | 25:{'name':'MEDIUM_BLOB','is_var':True,'name2':'mediumtext'}, 27 | 26:{'name':'LONG_BLOB','is_var':True,'name2':'longtext'}, 28 | 27:{'name':'BLOB','is_var':True,'name2':'text'}, 29 | 28:{'name':'VAR_STRING','is_var':True,'name2':'varchar'}, 30 | 29:{'name':'STRING','is_var':True,'name2':'char'}, 31 | 30:{'name':'GEOMETRY','is_var':True,'name2':'geometry'}, 32 | 31:{'name':'JSON','is_var':True,'name2':'json'}, 33 | 32:{'name':'VECTOR','is_var':True,'name2':'vector'}, 34 | } 35 | -------------------------------------------------------------------------------- /ibd2sql/utils/lz4.py: -------------------------------------------------------------------------------- 1 | # write by ddcw @https://github.com/ddcw/ibd2sql 2 | # lz4 decompress (fast) 3 | # references : https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md 4 | 5 | """ 6 | LZ4 compressed block is composed of sequences. 7 | sequence = token + [length literals] + literals + offset + [length match] + match 8 | token: 1bytes, first 4-bits length of literals 9 | last 4-bits length of match 10 | each field ranges from 0 to 15, when 15, read more 1 bytes for length to add 11 | literals: not-compressed bytes 12 | offset : 从解压后的数据的某个位置开始复制 match长度的数据 13 | match : 要复制的数据的长度 14 | """ 15 | 16 | # lz4 compress (TODO) 17 | def compress(bdata): 18 | """ 19 | input: bdata: 要压缩的数据 20 | return: data: 压缩之后的数据 21 | """ 22 | return bdata 23 | 24 | 25 | # lz4 decompress 26 | def decompress(bdata,decompress_size): 27 | """ 28 | input: 29 | bdata: compressed data 30 | decompress_size : decompress size 31 | return: data of decompressed 32 | ignore dict & prefix_size 33 | """ 34 | def read_to_less255(tdata,ip): 35 | length = 0 36 | while True: 37 | t = tdata[ip] 38 | ip += 1 39 | length += t 40 | if t != 255: 41 | break 42 | return length,ip 43 | 44 | ip = 0 # input pointer 45 | op = 0 # output pointer 46 | data = bytearray(decompress_size) 47 | 48 | while True: 49 | token = bdata[ip] 50 | ip += 1 51 | ll = token >> 4 # literals length 52 | if ll == 15: 53 | tll,ip = read_to_less255(bdata,ip) 54 | ll += tll 55 | data[op:op+ll] = bdata[ip:ip+ll] # literals 不可压缩的部分 56 | op += ll 57 | ip += ll 58 | if decompress_size-op < 12: 59 | if op == decompress_size: 60 | break 61 | else: 62 | raise ValueError('Invalid lz4 compress data.') 63 | offset = (bdata[ip+1]<<8) | bdata[ip] 64 | ip += 2 65 | ml = token & 15 66 | if ml == 15: 67 | tml,ip = read_to_less255(bdata,ip) 68 | ml += tml 69 | ml += 4 70 | match = op - offset 71 | data[op:op+ml] = data[match:match+ml] 72 | op += ml 73 | return bytes(data) 74 | 75 | -------------------------------------------------------------------------------- /docs/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # CHANGE LOG 2 | 3 | ## 2.1 (2025.10.24) 4 | 2.1 5 | 1. 修复了一些BUG 6 | 2. 支持对碎片页的解析. 7 | 3. 增加<尽可能的提取坏块中的数据> 8 | 9 | 10 | ## 2.0 (2025.08.30) 11 | 12 | 2.x系列的第一个版本, 重构了大部分代码, 性能提升也是很明显的, 最重要的是加了并发功能, 上限无限高. 而且还支持解析多个文件和mysql.ibd这样一个文件多个表的情况. 13 | 14 | try it ! 15 | 16 | 17 | 18 | ## 历史版本信息 19 | 20 | | VERSION | UPDATE | NOTE | 21 | | ------- | ---------- | ------------------------------------------------------------ | 22 | | v0.1 | 2023.4.27 | first version | 23 | | v0.2 | 2023.08.30 | support more data types | 24 | | v0.3 | 2023.10.13 | support parse file from 5.x upgrade to 8.x | 25 | | v1.0 | 2024.01.05 | add debug and support more data types | 26 | | v1.1 | 2024.04.12 | fix some bugs | 27 | | v1.2 | 2024.04.25 | add support of geometry data types | 28 | | v1.3 | 2024.05.11 | add support 5.x | 29 | | v1.4 | 2024.05.21 | add support extra page and subpartition | 30 | | v1.5 | 2024.07.10 | add support vector data types and fix INSTANT bug | 31 | | v1.6 | 2024.09.19 | fix some bugs | 32 | | v1.7 | 2024.10.29 | fix some bugs&support compress page&support recovery **drop table** & support ucs2,utf16,utf32 charset | 33 | | v1.8 | 2024.11.09 | support keyring plugin encryption & all character set | 34 | | v1.9 | 2025.02.21 | fix some bugs & support direct parsing of 5.x files | 35 | | v1.10 | 2025.04.16 | fix some bugs & add super_fast_count.py | 36 | | v1.11 | 2025.06.13 | fix some bugs & make `--force` to view page one by one for skip BAD BLOCK | 37 | | v1.12 | 2025.08.30 | fix some bugs and improve performance by over 20% | 38 | | v2.0 | 2025.08.30 | improved performance and support concurrency | 39 | | v2.1 | 2025.10.24 | fix some bugs & add `--set bad-pages=fast/try/skip` | 40 | -------------------------------------------------------------------------------- /ibd2sql/innodb_page/subpartition.py: -------------------------------------------------------------------------------- 1 | def SUB_PARTITION(dd): 2 | """ 3 | INPUT: 4 | dd: subpartition data dict 5 | RERURN: 6 | ddl: subpartion ddl 7 | """ 8 | ddl = "" 9 | if dd['partition_type'] == 1: # HASH 10 | return ddl 11 | elif dd['partition_type'] == 3: # KEY 12 | return ddl 13 | elif dd['partition_type'] == 7: # RANGE 14 | ddl += f"/*!50100 PARTITION BY RANGE ({dd['partition_expression_utf8']})" 15 | elif dd['partition_type'] == 8 : # LIST 16 | ddl += f"/*!50100 PARTITION BY LIST ({dd['partition_expression_utf8']})" 17 | else: 18 | return ddl 19 | ddl += "\n" 20 | # sub partition 21 | if dd['subpartition_type'] == 1: # HASH 22 | ddl += f"SUBPARTITION BY HASH ({dd['subpartition_expression_utf8']})" 23 | elif dd['subpartition_type'] == 3: # KEY 24 | ddl += f"SUBPARTITION BY KEY ({dd['subpartition_expression_utf8']})" 25 | ISAUTO = True 26 | pn = -1 27 | for p in dd['partitions']: 28 | pn += 1 29 | if f"p{pn}" != p['name']: 30 | ISAUTO = False 31 | break 32 | spn = -1 33 | for sp in p['subpartitions']: 34 | spn += 1 35 | if f"p{pn}sp{spn}" != sp['name']: 36 | ISAUTO = False 37 | break 38 | if ISAUTO: 39 | ddl += f"\nSUBPARTITIONS {len(dd['partitions'][0]['subpartitions'])}\n(" 40 | if dd['partition_type'] == 7: 41 | ddl += ",\n".join([ f"PARTITION {x['name']} VALUES LESS THAN {'('+x['values'][0]['value_utf8']+')' if not x['values'][0]['max_value'] else 'MAXVALUE'}" for x in dd['partitions'] ]) + ")" 42 | elif dd['partition_type'] == 8: 43 | ddl += ",\n".join([ f"PARTITION {x['name']} VALUES IN ({','.join([ _x['value_utf8'] for _x in x['values']])})" for x in dd['partitions'] ]) + ")" 44 | ddl += " */" 45 | else: 46 | ddl += "(\n" 47 | for p in dd['partitions']: 48 | if dd['partition_type'] == 7: 49 | ddl += f" PARTITION {p['name']} VALUES LESS THAN {'('+p['values'][0]['value_utf8']+')' if not p['values'][0]['max_value'] else 'MAXVALUE'}\n" 50 | elif dd['partition_type'] == 8: 51 | ddl += f" PARTITION {p['name']} VALUES in ({','.join([ x['value_utf8'] for x in p['values']])})\n" 52 | _ddl = "" 53 | for sp in p['subpartitions']: 54 | _ddl += f" SUBPARTITION {sp['name']} ENGINE = {sp['engine']},\n" 55 | ddl += " (" + _ddl[8:-2] + "),\n" 56 | 57 | ddl = ddl[:-2] + ")*/" 58 | return ddl 59 | -------------------------------------------------------------------------------- /ibd2sql/innodb_page/compressed_row.py: -------------------------------------------------------------------------------- 1 | from ibd2sql.innodb_page.page import PAGE 2 | import struct 3 | import zlib 4 | # row_format = compressed 5 | def PAGE_ZIP_DECOMPRESS(data,isleaf=True,ispk=True): 6 | """ 7 | INPUT: bdata (compressed page) 8 | RETURN: data (decompressed page) 9 | """ 10 | # fil_header + page_header 11 | unpage = data[:94] 12 | # number of records on the page 13 | n_dense = struct.unpack('>H',data[42:44])[0] & 32767 14 | n_recs = struct.unpack('>H',data[54:56])[0] 15 | # infimum & supremum 16 | unpage += struct.pack('>BBB',0x01,0x00,0x02) 17 | unpage += data[-2:] 18 | unpage += struct.pack('>8B',0x69, 0x6e, 0x66, 0x69, 0x6d, 0x75, 0x6d, 0x00) 19 | unpage += b'\x03' 20 | unpage += struct.pack('>12B',0x00,0x0b,0x00,0x00,0x73,0x75,0x70,0x72,0x65,0x6d,0x75,0x6d) 21 | # decompress data 22 | d = zlib.decompressobj() 23 | c = d.decompress(data[94:]) 24 | toffset = c.find(b'\x01') + 1 25 | unpage += c[toffset:] 26 | compressed_offset = len(unpage) + 120 27 | # uncompressed data 28 | unpage += d.unused_data 29 | dataobj = PAGE() 30 | dataobj.init(unpage) 31 | dataobj._offset = len(data) 32 | dd = [] 33 | page_dir = [] 34 | # page_dir & rec_header 35 | for x in range(n_recs): # used record 36 | slot = struct.unpack('>H',dataobj.read_reverse(2))[0] 37 | deleted = False 38 | owned = False 39 | if slot > 16384: 40 | owned = True 41 | slot -= 16384 42 | page_dir.append(slot) 43 | dd.append([slot,deleted,owned,slot]) 44 | for j in range(n_dense-n_recs-2): # user record deleted 45 | slot = struct.unpack('>H',dataobj.read_reverse(2))[0] 46 | deleted = True 47 | owned = False 48 | if slot > 16384: 49 | owned = True 50 | slot -= 16384 51 | page_dir.append(slot) 52 | dd.append([slot,deleted,owned,slot]) 53 | _ = dd.sort() 54 | # trxid&rollptr 55 | trxid_rollptr = [] 56 | for x in range(n_dense-2): 57 | trxid_rollptr.append(dataobj.read_reverse(13)) 58 | # big char PASS 59 | # index 60 | offset = 0 61 | dataobj.offset = 0 62 | rdata = b'' 63 | for x in range(len(dd)): 64 | c_offset = dd[x][0] - 5 65 | if compressed_offset < c_offset: 66 | c_offset += 1 if x < 64 else 2 67 | r_offset = c_offset - offset 68 | offset = c_offset 69 | rdata += dataobj.read(r_offset) 70 | rdata += b'\x00'*5 # record header 71 | rdata += b'\x00'*13 # rollptr 72 | return rdata 73 | -------------------------------------------------------------------------------- /README_zh.md: -------------------------------------------------------------------------------- 1 | # ibd2sql 2 | 3 | [english](https://github.com/ddcw/ibd2sql/blob/ibd2sql-v2.x/README.md) 4 | 5 | [ibd2sql](https://github.com/ddcw/ibd2sql) 是一个解析MySQL数据文件的工具. ~~老NB了~~. 使用python3编写的,没得依赖包, 下载就能使用, 所以也就不提供二进制包了. 6 | 7 | 当你只剩下IBD文件或者剩下半截数据文件的时候, 你可以使用`ibd2sql`去恢复其中的数据. 8 | 9 | 10 | 11 | # 下载和使用 12 | 13 | ## 下载 14 | 当前最新版是: ibd2sql-v2.1 [https://github.com/ddcw/ibd2sql/archive/refs/tags/v2.1.tar.gz](https://github.com/ddcw/ibd2sql/archive/refs/tags/v2.1.tar.gz) 15 | 16 | **Linux** 17 | 18 | ```shell 19 | wget https://github.com/ddcw/ibd2sql/archive/refs/heads/ibd2sql-v2.x.zip 20 | unzip ibd2sql-v2.x.zip 21 | cd ibd2sql-ibd2sql-v2.x/ 22 | ``` 23 | 24 | 25 | 26 | **Windows** 27 | 28 | 点击下载: https://github.com/ddcw/ibd2sql/archive/refs/heads/ibd2sql-v2.x.zip 29 | 30 | 31 | 32 | ## 使用 33 | 34 | **Linux** 35 | 36 | ```shell 37 | python3 main.py your_file.ibd --sql --ddl 38 | ``` 39 | 40 | 41 | 42 | **Windows** 43 | 44 | windows环境python3是叫做python,名字问题,小坑 45 | 46 | ```powershell 47 | python main.py your_file.ibd --sql --ddl 48 | ``` 49 | 50 | 51 | 52 | ## WEB 控制台 53 | 54 | ``` 55 | python3 main.py your_file.ibd --web 56 | # 执行之后,就可以使用浏览器访问: http://你的IP地址:8080 57 | ``` 58 | 59 | 60 | 61 | 完整的使用说明: [docs/USAGE.md](https://github.com/ddcw/ibd2sql/blob/ibd2sql-v2.x/docs/USAGE.md) 62 | 63 | 64 | 65 | # 性能 66 | 67 | 环境说明: MySQL 8.0.28 Python 3.10.4 CPU MHz: 2688.011 68 | 69 | | ibd2sql VERSION | PARALLEL | RATE | 70 | | --------------- | -------- | ---------------------- | 71 | | 2.0 | 1 | 50941 行每秒, 25MB/s | 72 | | 2.0 | 8 | 209993 行每秒, 104MB/s | 73 | | 1.12 | - | 12037 行每秒, 6MB/s | 74 | | 0.3 | - | 53998 行每秒, 26MB/s | 75 | 76 | 2000W行数据,5GB大小, 开16并发,差不多2分半解析完. 由于CPU不多,磁盘性能也不行, 所以没测出来上限是多少 -_- 77 | 78 | 79 | 80 | # 修改日志 81 | 82 | | 版本 | 更新时间 | 简述 | 83 | | ---- | -------- | --------------------------------------- | 84 | | 2.x | 2025.8 | 支持范围更广, 并且性能更高, 还支持并发! | 85 | | 1.x | 2024.1 | 支持所有数据类型,也支持5.7环境 | 86 | | 0.x | 2023.4 | 只支持8.0的部分情况 | 87 | 88 | 完整的历史更新记录: [docs/CHANGELOG.md](https://github.com/ddcw/ibd2sql/blob/ibd2sql-v2.x/docs/CHANGELOG.md) 89 | 90 | 91 | 92 | # 要求和支持范围 93 | 94 | 要求: Python >= 3.6 95 | 96 | 支持: MySQL 5.6, MySQL 5.7, MySQL 8.0, MySQL 8.4. MySQL 9.x 97 | 98 | 99 | 100 | **数据备份更重要**, 事后恢复不是万能的. 101 | 102 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ibd2sql 2 | 3 | [中文版介绍](https://github.com/ddcw/ibd2sql/blob/ibd2sql-v2.x/README_zh.md) 4 | 5 | [ibd2sql](https://github.com/ddcw/ibd2sql) is tool of transform MySQL IBD file to SQL(data). Write using Python3 . 6 | 7 | When you only have IBD data file or a portion of IBD data files left, you can use `ibd2sql` to parse the data within it. 8 | 9 | 10 | 11 | # DOWNLOAD & USAGE 12 | 13 | ## download 14 | 15 | lastest version: ibd2sql-v2.1 [https://github.com/ddcw/ibd2sql/archive/refs/tags/v2.1.tar.gz](https://github.com/ddcw/ibd2sql/archive/refs/tags/v2.1.tar.gz) 16 | 17 | **Linux** 18 | 19 | ```shell 20 | wget https://github.com/ddcw/ibd2sql/archive/refs/heads/ibd2sql-v2.x.zip 21 | unzip ibd2sql-v2.x.zip 22 | cd ibd2sql-ibd2sql-v2.x/ 23 | ``` 24 | 25 | 26 | 27 | **Windows** 28 | 29 | click https://github.com/ddcw/ibd2sql/archive/refs/heads/ibd2sql-v2.x.zip to download 30 | 31 | 32 | 33 | ## usage 34 | 35 | **Linux** 36 | 37 | ```shell 38 | python3 main.py your_file.ibd --sql --ddl 39 | ``` 40 | 41 | 42 | 43 | **Windows** 44 | 45 | Python3 is called Python on Windows 46 | 47 | ```powershell 48 | python main.py your_file.ibd --sql --ddl 49 | ``` 50 | 51 | 52 | 53 | ## WEB CONSOLE 54 | 55 | ``` 56 | python3 main.py your_file.ibd --web 57 | # and then, you can visit http://yourip:8080 to view that ibd file 58 | ``` 59 | 60 | 61 | 62 | more usage: [docs/USAGE.md](https://github.com/ddcw/ibd2sql/blob/ibd2sql-v2.x/docs/USAGE.md) 63 | 64 | 65 | 66 | # PERFORMANCE 67 | 68 | env: MySQL 8.0.28 Python 3.10.4 CPU MHz: 2688.011 69 | 70 | | ibd2sql VERSION | PARALLEL | RATE | 71 | | --------------- | -------- | ---------------------- | 72 | | 2.0 | 1 | 50941 rows/s, 25MB/s | 73 | | 2.0 | 8 | 209993 rows/s, 104MB/s | 74 | | 1.12 | - | 12037 rows/s, 6MB/s | 75 | | 0.3 | - | 53998 rows/s, 26MB/s | 76 | 77 | 78 | 79 | # CHANGE LOG 80 | 81 | | VERSION | UPDATE | NOTE | 82 | | ------- | ------ | ---------------------------------------------------------- | 83 | | 2.x | 2025.8 | Support for more situations and improvement in performance | 84 | | 1.x | 2024.1 | Supports complete data types and 5.7 | 85 | | 0.x | 2023.4 | Only supports partial cases of 8.0 | 86 | 87 | detail: [docs/CHANGELOG.md](https://github.com/ddcw/ibd2sql/blob/ibd2sql-v2.x/docs/CHANGELOG.md) 88 | 89 | 90 | 91 | # REQUIRE & SUPPORT 92 | 93 | require: Python >= 3.6 94 | 95 | support: MySQL 5.6, MySQL 5.7, MySQL 8.0, MySQL 8.4. MySQL 9.x 96 | 97 | **Data backup is very important** 98 | -------------------------------------------------------------------------------- /tests/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | MYSQL_CONN='mysql -h127.0.0.1 -P3314 -uroot -p123456' 3 | #MYSQL_CONN="/data/mysql5096/soft/mysql-5.0.96-linux-x86_64-glibc23/bin/mysql -h192.168.101.21 -P5096 -p123456 -uu1" 4 | SCHEMA_01='t20250828_01' 5 | SCHEMA_02='t20250828_02' 6 | IBD2SQL_COM="python3 ../main.py" 7 | 8 | # check 9 | if ${MYSQL_CONN} -e "create database if not exists ${SCHEMA_02};create database if not exists ${SCHEMA_01};" >/dev/null 2>&1;then 10 | echo "starting...." 11 | else 12 | echo "faild to connect mysql." 13 | exit 1 14 | fi 15 | 16 | mysql_version_id=$(${MYSQL_CONN} -NB -e "select @@version" 2>/dev/null|sed -E 's/\.([0-9])\./0\1/g;s/-log//g') 17 | 18 | # init data 19 | echo "init data for ${mysql_version_id}" 20 | python3 gen_data.py ${mysql_version_id} 2>/dev/null | ${MYSQL_CONN} ${SCHEMA_01} 2>/dev/null 21 | $(${MYSQL_CONN} -NB -e "flush tables" 2>/dev/null) 22 | sleep 5 23 | 24 | # check data 25 | TC=0 26 | SC=0 27 | echo "check data" 28 | datadir=$(${MYSQL_CONN} -NB -e "select @@datadir" 2>/dev/null) 29 | for tblname in `${MYSQL_CONN} -NB -e "select table_name from information_schema.tables where table_schema='${SCHEMA_01}' and table_name not like '%_partition_%'" 2>/dev/null`; 30 | do 31 | echo -n "${tblname}" 32 | $(${MYSQL_CONN} ${SCHEMA_02} -NB -e "drop table if exists ${tblname}" 2>/dev/null) 33 | ${IBD2SQL_COM} ${datadir}/${SCHEMA_01}/${tblname}.ibd --ddl --sql --schema ${SCHEMA_02} 2>/dev/null| ${MYSQL_CONN} ${SCHEMA_02} 2>/dev/null 34 | checksum1=$(${MYSQL_CONN} ${SCHEMA_01} -NB -e "checksum table ${tblname}" 2>/dev/null | awk '{print $NF}') 35 | checksum2=$(${MYSQL_CONN} ${SCHEMA_02} -NB -e "checksum table ${tblname}" 2>/dev/null | awk '{print $NF}') 36 | echo -ne "\t${checksum1}\t${checksum2}" 37 | TC=$[ ${TC} + 1 ] 38 | if [ "${checksum1}" == "${checksum2}" ];then 39 | echo -e "\tPASS" 40 | SC=$[ ${SC} + 1 ] 41 | else 42 | echo -e "\tFAILED" 43 | fi 44 | done 45 | 46 | for tblname in `${MYSQL_CONN} -NB -e "select table_name from information_schema.tables where table_schema='${SCHEMA_01}' and table_name like '%_partition_%'" 2>/dev/null`; 47 | do 48 | echo -n "${tblname}" 49 | $(${MYSQL_CONN} ${SCHEMA_02} -NB -e "drop table if exists ${tblname}" 2>/dev/null) 50 | ${IBD2SQL_COM} ${datadir}/${SCHEMA_01}/${tblname}\#*.ibd --ddl --sql --schema ${SCHEMA_02} | ${MYSQL_CONN} 2>/dev/null 51 | checksum1=$(${MYSQL_CONN} ${SCHEMA_01} -NB -e "checksum table ${tblname}" 2>/dev/null | awk '{print $NF}') 52 | checksum2=$(${MYSQL_CONN} ${SCHEMA_02} -NB -e "checksum table ${tblname}" 2>/dev/null | awk '{print $NF}') 53 | echo -ne "\t${checksum1}\t${checksum2}" 54 | TC=$[ ${TC} + 1 ] 55 | if [ "${checksum1}" == "${checksum2}" ];then 56 | echo -e "\tPASS" 57 | SC=$[ ${SC} + 1 ] 58 | else 59 | echo -e "\tFAILED" 60 | fi 61 | done 62 | 63 | echo -e "summary: ${SC}/${TC}" 64 | -------------------------------------------------------------------------------- /ibd2sql/utils/charset/swe7.py: -------------------------------------------------------------------------------- 1 | DD_SWE7 = {0: b'\x00', 1: b'\x01', 2: b'\x02', 3: b'\x03', 4: b'\x04', 5: b'\x05', 6: b'\x06', 7: b'\x07', 8: b'\x08', 9: b'\t', 10: b'\n', 11: b'\x0b', 12: b'\x0c', 13: b'\r', 14: b'\x0e', 15: b'\x0f', 16: b'\x10', 17: b'\x11', 18: b'\x12', 19: b'\x13', 20: b'\x14', 21: b'\x15', 22: b'\x16', 23: b'\x17', 24: b'\x18', 25: b'\x19', 26: b'\x1a', 27: b'\x1b', 28: b'\x1c', 29: b'\x1d', 30: b'\x1e', 31: b'\x1f', 32: b' ', 33: b'!', 34: b'"', 35: b'#', 36: b'$', 37: b'%', 38: b'&', 39: b"'", 40: b'(', 41: b')', 42: b'*', 43: b'+', 44: b',', 45: b'-', 46: b'.', 47: b'/', 48: b'0', 49: b'1', 50: b'2', 51: b'3', 52: b'4', 53: b'5', 54: b'6', 55: b'7', 56: b'8', 57: b'9', 58: b':', 59: b';', 60: b'<', 61: b'=', 62: b'>', 63: b'?', 64: b'\xc3\x89', 65: b'A', 66: b'B', 67: b'C', 68: b'D', 69: b'E', 70: b'F', 71: b'G', 72: b'H', 73: b'I', 74: b'J', 75: b'K', 76: b'L', 77: b'M', 78: b'N', 79: b'O', 80: b'P', 81: b'Q', 82: b'R', 83: b'S', 84: b'T', 85: b'U', 86: b'V', 87: b'W', 88: b'X', 89: b'Y', 90: b'Z', 91: b'\xc3\x84', 92: b'\xc3\x96', 93: b'\xc3\x85', 94: b'\xc3\x9c', 95: b'_', 96: b'\xc3\xa9', 97: b'a', 98: b'b', 99: b'c', 100: b'd', 101: b'e', 102: b'f', 103: b'g', 104: b'h', 105: b'i', 106: b'j', 107: b'k', 108: b'l', 109: b'm', 110: b'n', 111: b'o', 112: b'p', 113: b'q', 114: b'r', 115: b's', 116: b't', 117: b'u', 118: b'v', 119: b'w', 120: b'x', 121: b'y', 122: b'z', 123: b'\xc3\xa4', 124: b'\xc3\xb6', 125: b'\xc3\xa5', 126: b'\xc3\xbc', 127: b'?', 128: b'?', 129: b'?', 130: b'?', 131: b'?', 132: b'?', 133: b'?', 134: b'?', 135: b'?', 136: b'?', 137: b'?', 138: b'?', 139: b'?', 140: b'?', 141: b'?', 142: b'?', 143: b'?', 144: b'?', 145: b'?', 146: b'?', 147: b'?', 148: b'?', 149: b'?', 150: b'?', 151: b'?', 152: b'?', 153: b'?', 154: b'?', 155: b'?', 156: b'?', 157: b'?', 158: b'?', 159: b'?', 160: b'?', 161: b'?', 162: b'?', 163: b'?', 164: b'?', 165: b'?', 166: b'?', 167: b'?', 168: b'?', 169: b'?', 170: b'?', 171: b'?', 172: b'?', 173: b'?', 174: b'?', 175: b'?', 176: b'?', 177: b'?', 178: b'?', 179: b'?', 180: b'?', 181: b'?', 182: b'?', 183: b'?', 184: b'?', 185: b'?', 186: b'?', 187: b'?', 188: b'?', 189: b'?', 190: b'?', 191: b'?', 192: b'?', 193: b'?', 194: b'?', 195: b'?', 196: b'?', 197: b'?', 198: b'?', 199: b'?', 200: b'?', 201: b'?', 202: b'?', 203: b'?', 204: b'?', 205: b'?', 206: b'?', 207: b'?', 208: b'?', 209: b'?', 210: b'?', 211: b'?', 212: b'?', 213: b'?', 214: b'?', 215: b'?', 216: b'?', 217: b'?', 218: b'?', 219: b'?', 220: b'?', 221: b'?', 222: b'?', 223: b'?', 224: b'?', 225: b'?', 226: b'?', 227: b'?', 228: b'?', 229: b'?', 230: b'?', 231: b'?', 232: b'?', 233: b'?', 234: b'?', 235: b'?', 236: b'?', 237: b'?', 238: b'?', 239: b'?', 240: b'?', 241: b'?', 242: b'?', 243: b'?', 244: b'?', 245: b'?', 246: b'?', 247: b'?', 248: b'?', 249: b'?', 250: b'?', 251: b'?', 252: b'?', 253: b'?', 254: b'?', 255: b'?'} 2 | -------------------------------------------------------------------------------- /ibd2sql/innodb_page/sdi.py: -------------------------------------------------------------------------------- 1 | import struct 2 | import json 3 | import zlib 4 | from ibd2sql.innodb_page.page import PAGE 5 | 6 | class SDI(object): 7 | """ 8 | INPUT: 9 | sdino: sdi page id 10 | pg: page reader 11 | row_format: row format 12 | RETURN: 13 | data(json): sdi data 14 | """ 15 | def __init__(self,sdino,pg,row_format): 16 | self.sdino = sdino 17 | self.pg = pg 18 | self.offset = 99 19 | self.rec_header_size = 5 20 | self.row_format = row_format 21 | if row_format == "REDUNDANT": 22 | self.offset = 101 23 | self.rec_header_size = 6 24 | 25 | def get_sdi(self): 26 | pageno = self.sdino 27 | data = PAGE() 28 | n_recs = 0 29 | #dd = {} 30 | dd = [] 31 | while pageno < 4294967295: 32 | data.init(self.pg.read(pageno)) 33 | data.init_fil() 34 | data.init_page_header() 35 | n_recs = data.PAGE_N_RECS 36 | pageno = data.FIL_PAGE_NEXT 37 | if self.row_format == 'COMPRESSED': 38 | data._offset = self.pg.PAGE_SIZE - 2*n_recs 39 | d = zlib.decompressobj() 40 | _dd = d.decompress(data.data[94:]) 41 | data.offset = self.pg.PAGE_SIZE - len(d.unused_data) 42 | self.offset = data.offset 43 | self.offset += struct.unpack('>h',data.data[data.offset-2:data.offset])[0] 44 | while n_recs > 0:# general tablespace 45 | n_recs -= 1 46 | if self.row_format != 'COMPRESSED': 47 | data.offset = self.offset 48 | self.offset += struct.unpack('>h',data.data[data.offset-2:data.offset])[0] 49 | if data.offset > 16384 or (data.offset == 112 and self.row_format != 'COMPRESSED'): 50 | break 51 | is_overflow = True if data.data[data.offset-self.rec_header_size-2:data.offset-self.rec_header_size] == b'\x14\xc0' else False 52 | if self.row_format == 'COMPRESSED': 53 | _t1,_t2 = struct.unpack('>BB',data.read(2)) 54 | if _t2 >= 128: 55 | _t1 += (_t2-128)*256 56 | else: 57 | data.offset -= 1 58 | _ = data.read(1) 59 | sdi_type,sdi_id = struct.unpack('>LQ',data.read(12)) 60 | trx1,trx2,undo1,undo2,undo3 = struct.unpack('>LHLHB',data.read_reverse(13)) 61 | else: 62 | sdi_type,sdi_id = struct.unpack('>LQ',data.read(12)) 63 | trx1,trx2,undo1,undo2,undo3 = struct.unpack('>LHLHB',data.read(13)) 64 | dunzip_len,dzip_len = struct.unpack('>LL',data.read(8)) 65 | trx = (trx1<<16) + trx2 66 | undo = (undo1<<24) + (undo2<<8) + undo3 67 | unzbdata = b'' 68 | if is_overflow: # overflow page 69 | unzbdata = b'' 70 | SPACE_ID,PAGENO,BLOB_HEADER,REAL_SIZE = struct.unpack('>3LQ',data.read(20)) 71 | if REAL_SIZE != dzip_len: 72 | sys.exit(1) 73 | while True: 74 | tdata = self.pg.read(PAGENO) 75 | REAL_SIZE,PAGENO = struct.unpack('>LL',tdata[38:46]) 76 | unzbdata += tdata[46:-8] 77 | if PAGENO == 4294967295: 78 | break 79 | unzbdata = zlib.decompress(unzbdata) 80 | else: 81 | #unzbdata = zlib.decompress(data[offset+33:offset+33+dzip_len]) 82 | unzbdata = zlib.decompress(data.read(dzip_len)) 83 | dic_info = json.loads(unzbdata.decode()) 84 | #dd[dic_info['dd_object']['schema_ref']+'.'+dic_info['dd_object']['name']] = dic_info 85 | #dd[dic_info['dd_object']['name']] = dic_info 86 | dd.append(dic_info) 87 | return dd 88 | -------------------------------------------------------------------------------- /ibd2sql/utils/charset/dec8.py: -------------------------------------------------------------------------------- 1 | DD_DEC8 = {0: b'\x00', 1: b'\x01', 2: b'\x02', 3: b'\x03', 4: b'\x04', 5: b'\x05', 6: b'\x06', 7: b'\x07', 8: b'\x08', 9: b'\t', 10: b'\n', 11: b'\x0b', 12: b'\x0c', 13: b'\r', 14: b'\x0e', 15: b'\x0f', 16: b'\x10', 17: b'\x11', 18: b'\x12', 19: b'\x13', 20: b'\x14', 21: b'\x15', 22: b'\x16', 23: b'\x17', 24: b'\x18', 25: b'\x19', 26: b'\x1a', 27: b'\x1b', 28: b'\x1c', 29: b'\x1d', 30: b'\x1e', 31: b'\x1f', 32: b' ', 33: b'!', 34: b'"', 35: b'#', 36: b'$', 37: b'%', 38: b'&', 39: b"'", 40: b'(', 41: b')', 42: b'*', 43: b'+', 44: b',', 45: b'-', 46: b'.', 47: b'/', 48: b'0', 49: b'1', 50: b'2', 51: b'3', 52: b'4', 53: b'5', 54: b'6', 55: b'7', 56: b'8', 57: b'9', 58: b':', 59: b';', 60: b'<', 61: b'=', 62: b'>', 63: b'?', 64: b'@', 65: b'A', 66: b'B', 67: b'C', 68: b'D', 69: b'E', 70: b'F', 71: b'G', 72: b'H', 73: b'I', 74: b'J', 75: b'K', 76: b'L', 77: b'M', 78: b'N', 79: b'O', 80: b'P', 81: b'Q', 82: b'R', 83: b'S', 84: b'T', 85: b'U', 86: b'V', 87: b'W', 88: b'X', 89: b'Y', 90: b'Z', 91: b'[', 92: b'\\', 93: b']', 94: b'^', 95: b'_', 96: b'`', 97: b'a', 98: b'b', 99: b'c', 100: b'd', 101: b'e', 102: b'f', 103: b'g', 104: b'h', 105: b'i', 106: b'j', 107: b'k', 108: b'l', 109: b'm', 110: b'n', 111: b'o', 112: b'p', 113: b'q', 114: b'r', 115: b's', 116: b't', 117: b'u', 118: b'v', 119: b'w', 120: b'x', 121: b'y', 122: b'z', 123: b'{', 124: b'|', 125: b'}', 126: b'~', 127: b'\x7f', 128: b'\xc2\x80', 129: b'\xc2\x81', 130: b'\xc2\x82', 131: b'\xc2\x83', 132: b'\xc2\x84', 133: b'\xc2\x85', 134: b'\xc2\x86', 135: b'\xc2\x87', 136: b'\xc2\x88', 137: b'\xc2\x89', 138: b'\xc2\x8a', 139: b'\xc2\x8b', 140: b'\xc2\x8c', 141: b'\xc2\x8d', 142: b'\xc2\x8e', 143: b'\xc2\x8f', 144: b'\xc2\x90', 145: b'\xc2\x91', 146: b'\xc2\x92', 147: b'\xc2\x93', 148: b'\xc2\x94', 149: b'\xc2\x95', 150: b'\xc2\x96', 151: b'\xc2\x97', 152: b'\xc2\x98', 153: b'\xc2\x99', 154: b'\xc2\x9a', 155: b'\xc2\x9b', 156: b'\xc2\x9c', 157: b'\xc2\x9d', 158: b'\xc2\x9e', 159: b'\xc2\x9f', 160: b'\xc2\xa0', 161: b'\xc2\xa1', 162: b'\xc2\xa2', 163: b'\xc2\xa3', 164: b'?', 165: b'\xc2\xa5', 166: b'?', 167: b'\xc2\xa7', 168: b'\xc2\xa4', 169: b'\xc2\xa9', 170: b'\xc2\xaa', 171: b'\xc2\xab', 172: b'?', 173: b'?', 174: b'?', 175: b'?', 176: b'\xc2\xb0', 177: b'\xc2\xb1', 178: b'\xc2\xb2', 179: b'\xc2\xb3', 180: b'?', 181: b'\xc2\xb5', 182: b'\xc2\xb6', 183: b'\xc2\xb7', 184: b'?', 185: b'\xc2\xb9', 186: b'\xc2\xba', 187: b'\xc2\xbb', 188: b'\xc2\xbc', 189: b'\xc2\xbd', 190: b'?', 191: b'\xc2\xbf', 192: b'\xc3\x80', 193: b'\xc3\x81', 194: b'\xc3\x82', 195: b'\xc3\x83', 196: b'\xc3\x84', 197: b'\xc3\x85', 198: b'\xc3\x86', 199: b'\xc3\x87', 200: b'\xc3\x88', 201: b'\xc3\x89', 202: b'\xc3\x8a', 203: b'\xc3\x8b', 204: b'\xc3\x8c', 205: b'\xc3\x8d', 206: b'\xc3\x8e', 207: b'\xc3\x8f', 208: b'?', 209: b'\xc3\x91', 210: b'\xc3\x92', 211: b'\xc3\x93', 212: b'\xc3\x94', 213: b'\xc3\x95', 214: b'\xc3\x96', 215: b'\xc5\x92', 216: b'\xc3\x98', 217: b'\xc3\x99', 218: b'\xc3\x9a', 219: b'\xc3\x9b', 220: b'\xc3\x9c', 221: b'\xc5\xb8', 222: b'?', 223: b'\xc3\x9f', 224: b'\xc3\xa0', 225: b'\xc3\xa1', 226: b'\xc3\xa2', 227: b'\xc3\xa3', 228: b'\xc3\xa4', 229: b'\xc3\xa5', 230: b'\xc3\xa6', 231: b'\xc3\xa7', 232: b'\xc3\xa8', 233: b'\xc3\xa9', 234: b'\xc3\xaa', 235: b'\xc3\xab', 236: b'\xc3\xac', 237: b'\xc3\xad', 238: b'\xc3\xae', 239: b'\xc3\xaf', 240: b'?', 241: b'\xc3\xb1', 242: b'\xc3\xb2', 243: b'\xc3\xb3', 244: b'\xc3\xb4', 245: b'\xc3\xb5', 246: b'\xc3\xb6', 247: b'\xc5\x93', 248: b'\xc3\xb8', 249: b'\xc3\xb9', 250: b'\xc3\xba', 251: b'\xc3\xbb', 252: b'\xc3\xbc', 253: b'\xc3\xbf', 254: b'?', 255: b'?'} 2 | -------------------------------------------------------------------------------- /ibd2sql/utils/charset/geostd8.py: -------------------------------------------------------------------------------- 1 | DD_GEOSTD8 = {0: b'\x00', 1: b'\x01', 2: b'\x02', 3: b'\x03', 4: b'\x04', 5: b'\x05', 6: b'\x06', 7: b'\x07', 8: b'\x08', 9: b'\t', 10: b'\n', 11: b'\x0b', 12: b'\x0c', 13: b'\r', 14: b'\x0e', 15: b'\x0f', 16: b'\x10', 17: b'\x11', 18: b'\x12', 19: b'\x13', 20: b'\x14', 21: b'\x15', 22: b'\x16', 23: b'\x17', 24: b'\x18', 25: b'\x19', 26: b'\x1a', 27: b'\x1b', 28: b'\x1c', 29: b'\x1d', 30: b'\x1e', 31: b'\x1f', 32: b' ', 33: b'!', 34: b'"', 35: b'#', 36: b'$', 37: b'%', 38: b'&', 39: b"'", 40: b'(', 41: b')', 42: b'*', 43: b'+', 44: b',', 45: b'-', 46: b'.', 47: b'/', 48: b'0', 49: b'1', 50: b'2', 51: b'3', 52: b'4', 53: b'5', 54: b'6', 55: b'7', 56: b'8', 57: b'9', 58: b':', 59: b';', 60: b'<', 61: b'=', 62: b'>', 63: b'?', 64: b'@', 65: b'A', 66: b'B', 67: b'C', 68: b'D', 69: b'E', 70: b'F', 71: b'G', 72: b'H', 73: b'I', 74: b'J', 75: b'K', 76: b'L', 77: b'M', 78: b'N', 79: b'O', 80: b'P', 81: b'Q', 82: b'R', 83: b'S', 84: b'T', 85: b'U', 86: b'V', 87: b'W', 88: b'X', 89: b'Y', 90: b'Z', 91: b'[', 92: b'\\', 93: b']', 94: b'^', 95: b'_', 96: b'`', 97: b'a', 98: b'b', 99: b'c', 100: b'd', 101: b'e', 102: b'f', 103: b'g', 104: b'h', 105: b'i', 106: b'j', 107: b'k', 108: b'l', 109: b'm', 110: b'n', 111: b'o', 112: b'p', 113: b'q', 114: b'r', 115: b's', 116: b't', 117: b'u', 118: b'v', 119: b'w', 120: b'x', 121: b'y', 122: b'z', 123: b'{', 124: b'|', 125: b'}', 126: b'~', 127: b'\x7f', 128: b'\xe2\x82\xac', 129: b'?', 130: b'\xe2\x80\x9a', 131: b'?', 132: b'\xe2\x80\x9e', 133: b'\xe2\x80\xa6', 134: b'\xe2\x80\xa0', 135: b'\xe2\x80\xa1', 136: b'?', 137: b'\xe2\x80\xb0', 138: b'?', 139: b'\xe2\x80\xb9', 140: b'?', 141: b'?', 142: b'?', 143: b'?', 144: b'?', 145: b'\xe2\x80\x98', 146: b'\xe2\x80\x99', 147: b'\xe2\x80\x9c', 148: b'\xe2\x80\x9d', 149: b'\xe2\x80\xa2', 150: b'\xe2\x80\x93', 151: b'\xe2\x80\x94', 152: b'?', 153: b'?', 154: b'?', 155: b'\xe2\x80\xba', 156: b'?', 157: b'?', 158: b'?', 159: b'?', 160: b'\xc2\xa0', 161: b'\xc2\xa1', 162: b'\xc2\xa2', 163: b'\xc2\xa3', 164: b'\xc2\xa4', 165: b'\xc2\xa5', 166: b'\xc2\xa6', 167: b'\xc2\xa7', 168: b'\xc2\xa8', 169: b'\xc2\xa9', 170: b'\xc2\xaa', 171: b'\xc2\xab', 172: b'\xc2\xac', 173: b'\xc2\xad', 174: b'\xc2\xae', 175: b'\xc2\xaf', 176: b'\xc2\xb0', 177: b'\xc2\xb1', 178: b'\xc2\xb2', 179: b'\xc2\xb3', 180: b'\xc2\xb4', 181: b'\xc2\xb5', 182: b'\xc2\xb6', 183: b'\xc2\xb7', 184: b'\xc2\xb8', 185: b'\xc2\xb9', 186: b'\xc2\xba', 187: b'\xc2\xbb', 188: b'\xc2\xbc', 189: b'\xc2\xbd', 190: b'\xc2\xbe', 191: b'\xc2\xbf', 192: b'\xe1\x83\x90', 193: b'\xe1\x83\x91', 194: b'\xe1\x83\x92', 195: b'\xe1\x83\x93', 196: b'\xe1\x83\x94', 197: b'\xe1\x83\x95', 198: b'\xe1\x83\x96', 199: b'\xe1\x83\xb1', 200: b'\xe1\x83\x97', 201: b'\xe1\x83\x98', 202: b'\xe1\x83\x99', 203: b'\xe1\x83\x9a', 204: b'\xe1\x83\x9b', 205: b'\xe1\x83\x9c', 206: b'\xe1\x83\xb2', 207: b'\xe1\x83\x9d', 208: b'\xe1\x83\x9e', 209: b'\xe1\x83\x9f', 210: b'\xe1\x83\xa0', 211: b'\xe1\x83\xa1', 212: b'\xe1\x83\xa2', 213: b'\xe1\x83\xb3', 214: b'\xe1\x83\xa3', 215: b'\xe1\x83\xa4', 216: b'\xe1\x83\xa5', 217: b'\xe1\x83\xa6', 218: b'\xe1\x83\xa7', 219: b'\xe1\x83\xa8', 220: b'\xe1\x83\xa9', 221: b'\xe1\x83\xaa', 222: b'\xe1\x83\xab', 223: b'\xe1\x83\xac', 224: b'\xe1\x83\xad', 225: b'\xe1\x83\xae', 226: b'\xe1\x83\xb4', 227: b'\xe1\x83\xaf', 228: b'\xe1\x83\xb0', 229: b'\xe1\x83\xb5', 230: b'?', 231: b'?', 232: b'?', 233: b'?', 234: b'?', 235: b'?', 236: b'?', 237: b'?', 238: b'?', 239: b'?', 240: b'?', 241: b'?', 242: b'?', 243: b'?', 244: b'?', 245: b'?', 246: b'?', 247: b'?', 248: b'?', 249: b'?', 250: b'?', 251: b'?', 252: b'?', 253: b'\xe2\x84\x96', 254: b'?', 255: b'?'} 2 | -------------------------------------------------------------------------------- /ibd2sql/utils/charset/armscii8.py: -------------------------------------------------------------------------------- 1 | DD_ARMSCII8 = {0: b'\x00', 1: b'\x01', 2: b'\x02', 3: b'\x03', 4: b'\x04', 5: b'\x05', 6: b'\x06', 7: b'\x07', 8: b'\x08', 9: b'\t', 10: b'\n', 11: b'\x0b', 12: b'\x0c', 13: b'\r', 14: b'\x0e', 15: b'\x0f', 16: b'\x10', 17: b'\x11', 18: b'\x12', 19: b'\x13', 20: b'\x14', 21: b'\x15', 22: b'\x16', 23: b'\x17', 24: b'\x18', 25: b'\x19', 26: b'\x1a', 27: b'\x1b', 28: b'\x1c', 29: b'\x1d', 30: b'\x1e', 31: b'\x1f', 32: b' ', 33: b'!', 34: b'"', 35: b'#', 36: b'$', 37: b'%', 38: b'&', 39: b"'", 40: b'(', 41: b')', 42: b'*', 43: b'+', 44: b',', 45: b'-', 46: b'.', 47: b'/', 48: b'0', 49: b'1', 50: b'2', 51: b'3', 52: b'4', 53: b'5', 54: b'6', 55: b'7', 56: b'8', 57: b'9', 58: b':', 59: b';', 60: b'<', 61: b'=', 62: b'>', 63: b'?', 64: b'@', 65: b'A', 66: b'B', 67: b'C', 68: b'D', 69: b'E', 70: b'F', 71: b'G', 72: b'H', 73: b'I', 74: b'J', 75: b'K', 76: b'L', 77: b'M', 78: b'N', 79: b'O', 80: b'P', 81: b'Q', 82: b'R', 83: b'S', 84: b'T', 85: b'U', 86: b'V', 87: b'W', 88: b'X', 89: b'Y', 90: b'Z', 91: b'[', 92: b'\\', 93: b']', 94: b'^', 95: b'_', 96: b'`', 97: b'a', 98: b'b', 99: b'c', 100: b'd', 101: b'e', 102: b'f', 103: b'g', 104: b'h', 105: b'i', 106: b'j', 107: b'k', 108: b'l', 109: b'm', 110: b'n', 111: b'o', 112: b'p', 113: b'q', 114: b'r', 115: b's', 116: b't', 117: b'u', 118: b'v', 119: b'w', 120: b'x', 121: b'y', 122: b'z', 123: b'{', 124: b'|', 125: b'}', 126: b'~', 127: b'\x7f', 128: b'\xc2\x80', 129: b'\xc2\x81', 130: b'\xc2\x82', 131: b'\xc2\x83', 132: b'\xc2\x84', 133: b'\xc2\x85', 134: b'\xc2\x86', 135: b'\xc2\x87', 136: b'\xc2\x88', 137: b'\xc2\x89', 138: b'\xc2\x8a', 139: b'\xc2\x8b', 140: b'\xc2\x8c', 141: b'\xc2\x8d', 142: b'\xc2\x8e', 143: b'\xc2\x8f', 144: b'\xc2\x90', 145: b'\xc2\x91', 146: b'\xc2\x92', 147: b'\xc2\x93', 148: b'\xc2\x94', 149: b'\xc2\x95', 150: b'\xc2\x96', 151: b'\xc2\x97', 152: b'\xc2\x98', 153: b'\xc2\x99', 154: b'\xc2\x9a', 155: b'\xc2\x9b', 156: b'\xc2\x9c', 157: b'\xc2\x9d', 158: b'\xc2\x9e', 159: b'\xc2\x9f', 160: b'\xc2\xa0', 161: b'\xe2\x9d\x81', 162: b'\xc2\xa7', 163: b'\xd6\x89', 164: b')', 165: b'(', 166: b'\xc2\xbb', 167: b'\xc2\xab', 168: b'\xe2\x80\x94', 169: b'.', 170: b'\xd5\x9d', 171: b',', 172: b'-', 173: b'\xd5\x9f', 174: b'\xe2\x80\xa6', 175: b'\xd5\x9c', 176: b'\xd5\x9b', 177: b'\xd5\x9e', 178: b'\xd4\xb1', 179: b'\xd5\xa1', 180: b'\xd4\xb2', 181: b'\xd5\xa2', 182: b'\xd4\xb3', 183: b'\xd5\xa3', 184: b'\xd4\xb4', 185: b'\xd5\xa4', 186: b'\xd4\xb5', 187: b'\xd5\xa5', 188: b'\xd4\xb6', 189: b'\xd5\xa6', 190: b'\xd4\xb7', 191: b'\xd5\xa7', 192: b'\xd4\xb8', 193: b'\xd5\xa8', 194: b'\xd4\xb9', 195: b'\xd5\xa9', 196: b'\xd4\xba', 197: b'\xd5\xaa', 198: b'\xd4\xbb', 199: b'\xd5\xab', 200: b'\xd4\xbc', 201: b'\xd5\xac', 202: b'\xd4\xbd', 203: b'\xd5\xad', 204: b'\xd4\xbe', 205: b'\xd5\xae', 206: b'\xd4\xbf', 207: b'\xd5\xaf', 208: b'\xd5\x80', 209: b'\xd5\xb0', 210: b'\xd5\x81', 211: b'\xd5\xb1', 212: b'\xd5\x82', 213: b'\xd5\xb2', 214: b'\xd5\x83', 215: b'\xd5\xb3', 216: b'\xd5\x84', 217: b'\xd5\xb4', 218: b'\xd5\x85', 219: b'\xd5\xb5', 220: b'\xd5\x86', 221: b'\xd5\xb6', 222: b'\xd5\x87', 223: b'\xd5\xb7', 224: b'\xd5\x88', 225: b'\xd5\xb8', 226: b'\xd5\x89', 227: b'\xd5\xb9', 228: b'\xd5\x8a', 229: b'\xd5\xba', 230: b'\xd5\x8b', 231: b'\xd5\xbb', 232: b'\xd5\x8c', 233: b'\xd5\xbc', 234: b'\xd5\x8d', 235: b'\xd5\xbd', 236: b'\xd5\x8e', 237: b'\xd5\xbe', 238: b'\xd5\x8f', 239: b'\xd5\xbf', 240: b'\xd5\x90', 241: b'\xd6\x80', 242: b'\xd5\x91', 243: b'\xd6\x81', 244: b'\xd5\x92', 245: b'\xd6\x82', 246: b'\xd5\x93', 247: b'\xd6\x83', 248: b'\xd5\x94', 249: b'\xd6\x84', 250: b'\xd5\x95', 251: b'\xd6\x85', 252: b'\xd5\x96', 253: b'\xd6\x86', 254: b'\xe2\x80\x99', 255: b"'"} 2 | -------------------------------------------------------------------------------- /ibd2sql/utils/charset/hp8.py: -------------------------------------------------------------------------------- 1 | DD_HP8 = {0: b'\x00', 1: b'\x01', 2: b'\x02', 3: b'\x03', 4: b'\x04', 5: b'\x05', 6: b'\x06', 7: b'\x07', 8: b'\x08', 9: b'\t', 10: b'\n', 11: b'\x0b', 12: b'\x0c', 13: b'\r', 14: b'\x0e', 15: b'\x0f', 16: b'\x10', 17: b'\x11', 18: b'\x12', 19: b'\x13', 20: b'\x14', 21: b'\x15', 22: b'\x16', 23: b'\x17', 24: b'\x18', 25: b'\x19', 26: b'\x1a', 27: b'\x1b', 28: b'\x1c', 29: b'\x1d', 30: b'\x1e', 31: b'\x1f', 32: b' ', 33: b'!', 34: b'"', 35: b'#', 36: b'$', 37: b'%', 38: b'&', 39: b"'", 40: b'(', 41: b')', 42: b'*', 43: b'+', 44: b',', 45: b'-', 46: b'.', 47: b'/', 48: b'0', 49: b'1', 50: b'2', 51: b'3', 52: b'4', 53: b'5', 54: b'6', 55: b'7', 56: b'8', 57: b'9', 58: b':', 59: b';', 60: b'<', 61: b'=', 62: b'>', 63: b'?', 64: b'@', 65: b'A', 66: b'B', 67: b'C', 68: b'D', 69: b'E', 70: b'F', 71: b'G', 72: b'H', 73: b'I', 74: b'J', 75: b'K', 76: b'L', 77: b'M', 78: b'N', 79: b'O', 80: b'P', 81: b'Q', 82: b'R', 83: b'S', 84: b'T', 85: b'U', 86: b'V', 87: b'W', 88: b'X', 89: b'Y', 90: b'Z', 91: b'[', 92: b'\\', 93: b']', 94: b'^', 95: b'_', 96: b'`', 97: b'a', 98: b'b', 99: b'c', 100: b'd', 101: b'e', 102: b'f', 103: b'g', 104: b'h', 105: b'i', 106: b'j', 107: b'k', 108: b'l', 109: b'm', 110: b'n', 111: b'o', 112: b'p', 113: b'q', 114: b'r', 115: b's', 116: b't', 117: b'u', 118: b'v', 119: b'w', 120: b'x', 121: b'y', 122: b'z', 123: b'{', 124: b'|', 125: b'}', 126: b'~', 127: b'\x7f', 128: b'\xc2\x80', 129: b'\xc2\x81', 130: b'\xc2\x82', 131: b'\xc2\x83', 132: b'\xc2\x84', 133: b'\xc2\x85', 134: b'\xc2\x86', 135: b'\xc2\x87', 136: b'\xc2\x88', 137: b'\xc2\x89', 138: b'\xc2\x8a', 139: b'\xc2\x8b', 140: b'\xc2\x8c', 141: b'\xc2\x8d', 142: b'\xc2\x8e', 143: b'\xc2\x8f', 144: b'\xc2\x90', 145: b'\xc2\x91', 146: b'\xc2\x92', 147: b'\xc2\x93', 148: b'\xc2\x94', 149: b'\xc2\x95', 150: b'\xc2\x96', 151: b'\xc2\x97', 152: b'\xc2\x98', 153: b'\xc2\x99', 154: b'\xc2\x9a', 155: b'\xc2\x9b', 156: b'\xc2\x9c', 157: b'\xc2\x9d', 158: b'\xc2\x9e', 159: b'\xc2\x9f', 160: b'\xc2\xa0', 161: b'\xc3\x80', 162: b'\xc3\x82', 163: b'\xc3\x88', 164: b'\xc3\x8a', 165: b'\xc3\x8b', 166: b'\xc3\x8e', 167: b'\xc3\x8f', 168: b'\xc2\xb4', 169: b'\xcb\x8b', 170: b'\xcb\x86', 171: b'\xc2\xa8', 172: b'\xcb\x9c', 173: b'\xc3\x99', 174: b'\xc3\x9b', 175: b'\xe2\x82\xa4', 176: b'\xc2\xaf', 177: b'\xc3\x9d', 178: b'\xc3\xbd', 179: b'\xc2\xb0', 180: b'\xc3\x87', 181: b'\xc3\xa7', 182: b'\xc3\x91', 183: b'\xc3\xb1', 184: b'\xc2\xa1', 185: b'\xc2\xbf', 186: b'\xc2\xa4', 187: b'\xc2\xa3', 188: b'\xc2\xa5', 189: b'\xc2\xa7', 190: b'\xc6\x92', 191: b'\xc2\xa2', 192: b'\xc3\xa2', 193: b'\xc3\xaa', 194: b'\xc3\xb4', 195: b'\xc3\xbb', 196: b'\xc3\xa1', 197: b'\xc3\xa9', 198: b'\xc3\xb3', 199: b'\xc3\xba', 200: b'\xc3\xa0', 201: b'\xc3\xa8', 202: b'\xc3\xb2', 203: b'\xc3\xb9', 204: b'\xc3\xa4', 205: b'\xc3\xab', 206: b'\xc3\xb6', 207: b'\xc3\xbc', 208: b'\xc3\x85', 209: b'\xc3\xae', 210: b'\xc3\x98', 211: b'\xc3\x86', 212: b'\xc3\xa5', 213: b'\xc3\xad', 214: b'\xc3\xb8', 215: b'\xc3\xa6', 216: b'\xc3\x84', 217: b'\xc3\xac', 218: b'\xc3\x96', 219: b'\xc3\x9c', 220: b'\xc3\x89', 221: b'\xc3\xaf', 222: b'\xc3\x9f', 223: b'\xc3\x94', 224: b'\xc3\x81', 225: b'\xc3\x83', 226: b'\xc3\xa3', 227: b'\xc3\x90', 228: b'\xc3\xb0', 229: b'\xc3\x8d', 230: b'\xc3\x8c', 231: b'\xc3\x93', 232: b'\xc3\x92', 233: b'\xc3\x95', 234: b'\xc3\xb5', 235: b'\xc5\xa0', 236: b'\xc5\xa1', 237: b'\xc3\x9a', 238: b'\xc5\xb8', 239: b'\xc3\xbf', 240: b'\xc3\x9e', 241: b'\xc3\xbe', 242: b'\xc2\xb7', 243: b'\xc2\xb5', 244: b'\xc2\xb6', 245: b'\xc2\xbe', 246: b'\xe2\x80\x94', 247: b'\xc2\xbc', 248: b'\xc2\xbd', 249: b'\xc2\xaa', 250: b'\xc2\xba', 251: b'\xc2\xab', 252: b'\xe2\x96\xa0', 253: b'\xc2\xbb', 254: b'\xc2\xb1', 255: b'?'} 2 | -------------------------------------------------------------------------------- /ibd2sql/innodb_page/fsp.py: -------------------------------------------------------------------------------- 1 | import struct 2 | from ibd2sql.utils import crc32c 3 | from ibd2sql.innodb_page.xdes import GET_XDES_SIZE_COUNT 4 | from ibd2sql.utils.aes import aes_ecb256_decrypt 5 | 6 | INFO_SIZE = 111 7 | INFO_MAX_SIZE = 115 8 | SDI_VERSION = 1 9 | 10 | def GET_FSP_STATUS_FROM_FLAGS(flags): 11 | logical_size = 16384 if ((flags & 960) >> 6) == 0 else 512 << ((flags & 960) >> 6) 12 | physical_size = logical_size if ((flags & 30) >> 1) == 0 else 512<<((flags & 30) >> 1) 13 | compressed = False if ((flags & 30) >> 1) == 0 else True 14 | return { 15 | 'POST_ANTELOPE':(flags & 1) >> 0, 16 | 'ZIP_SSIZE':(flags & 30) >> 1, 17 | 'ATOMIC_BLOBS':(flags & 32) >> 5, 18 | 'PAGE_SSIZE':(flags & 960) >> 6, 19 | 'DATA_DIR':(flags & 1024) >> 10, 20 | 'SHARED':(flags & 2048) >> 11, 21 | 'TEMPORARY':(flags & 4096) >> 12, 22 | 'ENCRYPTION':(flags & 8192) >> 13, 23 | 'SDI':(flags & 16384) >> 14, 24 | 'logical_size':logical_size, # logical page size (in memory) 25 | 'physical_size':physical_size, # physical page size (in disk) 26 | 'compressed':compressed 27 | } 28 | 29 | def PARSE_ENCRYPTION_INFO(data,kd): 30 | """ 31 | INPUT: 32 | data: encryption_data(115 bytes) 33 | kd: keyring file data dict 34 | RETURN: 35 | dict(key,iv...) 36 | """ 37 | magic = data[:3] 38 | master_key_id = struct.unpack('>L',data[3:7])[0] 39 | offset = 7 40 | if magic == b"lCB": 41 | offset += 4 42 | server_uuid = data[offset:offset+36].decode() 43 | offset += 36 44 | master_key = kd['INNODBKey'+'-'+server_uuid+'-'+str(master_key_id)]['key'] 45 | key_info = aes_ecb256_decrypt(master_key,data[offset:offset+32*2]) 46 | offset += 32*2 47 | checksum_1 = crc32c.calculate_crc32c(key_info) 48 | checksum_2 = struct.unpack('>L',data[offset:offset+4])[0] 49 | return {'key':key_info[:32],'iv':key_info[32:48],'magic':magic,'server_uuid':server_uuid,'checksum_1':checksum_1,'checksum_2':checksum_2,'status':checksum_1 == checksum_2} 50 | 51 | class FSP(object): 52 | def __init__(self,data,page_size=0,compression_ratio=1): 53 | self.compression_ratio = compression_ratio 54 | self.data = data 55 | self.PAGE_SIZE = len(data) if page_size == 0 else page_size 56 | self.XDES_SIZE,self.XDES_COUNT = GET_XDES_SIZE_COUNT(self.PAGE_SIZE) 57 | self.XDES_SIZE = self.XDES_SIZE//compression_ratio 58 | self.init_space_header() 59 | self.init_xdes() 60 | self.init_encryption() 61 | self.init_sdi() 62 | self.FIL_PAGE_PREV,self.FIL_PAGE_NEXT = struct.unpack('>2L',data[8:16]) 63 | #self.init_fsp_status() 64 | 65 | 66 | def init_space_header(self): 67 | data = self.data[38:38+112] 68 | self.FSP_SPACE_ID,self.FSP_NOT_USED,self.FSP_SIZE,self.FSP_FREE_LIMIT,self.FSP_SPACE_FLAGS,self.FSP_FRAG_N_USED = struct.unpack('>6L',data[:6*4]) 69 | self.FSP_FREE = struct.unpack('>LLHLH',data[24:40]) 70 | self.FSP_FREE_FRAG = struct.unpack('>LLHLH',data[40:56]) 71 | self.FSP_FULL_FRAG = struct.unpack('>LLHLH',data[56:72]) 72 | self.FSP_SEG_ID = struct.unpack('>Q',data[72:80]) 73 | self.FSP_SEG_INODES_FULL = struct.unpack('>LLHLH',data[80:96]) 74 | self.FSP_SEG_INODES_FREE = struct.unpack('>LLHLH',data[96:112]) 75 | 76 | 77 | def init_xdes(self): 78 | self.xdes = [] 79 | for i in range(self.XDES_COUNT): 80 | self.xdes.append(self.data[150+i*self.XDES_SIZE:150+i*self.XDES_SIZE+self.XDES_SIZE]) 81 | 82 | def init_encryption(self): 83 | offset = 150+self.XDES_COUNT*self.XDES_SIZE 84 | self.encryption_info = self.data[offset:offset+INFO_MAX_SIZE] 85 | self.encryption = False if self.encryption_info != b'\x00'*115 else True 86 | 87 | def init_sdi(self): 88 | offset = 150+self.XDES_COUNT*self.XDES_SIZE+115 89 | self.SDI_VERSION,self.SDI_PAGE_NO = struct.unpack('>LL',self.data[offset:offset+8]) 90 | 91 | -------------------------------------------------------------------------------- /ibd2sql/utils/charset/keybcs2.py: -------------------------------------------------------------------------------- 1 | DD_KEYBCS2 = {0: b'\x00', 1: b'\x01', 2: b'\x02', 3: b'\x03', 4: b'\x04', 5: b'\x05', 6: b'\x06', 7: b'\x07', 8: b'\x08', 9: b'\t', 10: b'\n', 11: b'\x0b', 12: b'\x0c', 13: b'\r', 14: b'\x0e', 15: b'\x0f', 16: b'\x10', 17: b'\x11', 18: b'\x12', 19: b'\x13', 20: b'\x14', 21: b'\x15', 22: b'\x16', 23: b'\x17', 24: b'\x18', 25: b'\x19', 26: b'\x1a', 27: b'\x1b', 28: b'\x1c', 29: b'\x1d', 30: b'\x1e', 31: b'\x1f', 32: b' ', 33: b'!', 34: b'"', 35: b'#', 36: b'$', 37: b'%', 38: b'&', 39: b"'", 40: b'(', 41: b')', 42: b'*', 43: b'+', 44: b',', 45: b'-', 46: b'.', 47: b'/', 48: b'0', 49: b'1', 50: b'2', 51: b'3', 52: b'4', 53: b'5', 54: b'6', 55: b'7', 56: b'8', 57: b'9', 58: b':', 59: b';', 60: b'<', 61: b'=', 62: b'>', 63: b'?', 64: b'@', 65: b'A', 66: b'B', 67: b'C', 68: b'D', 69: b'E', 70: b'F', 71: b'G', 72: b'H', 73: b'I', 74: b'J', 75: b'K', 76: b'L', 77: b'M', 78: b'N', 79: b'O', 80: b'P', 81: b'Q', 82: b'R', 83: b'S', 84: b'T', 85: b'U', 86: b'V', 87: b'W', 88: b'X', 89: b'Y', 90: b'Z', 91: b'[', 92: b'\\', 93: b']', 94: b'^', 95: b'_', 96: b'`', 97: b'a', 98: b'b', 99: b'c', 100: b'd', 101: b'e', 102: b'f', 103: b'g', 104: b'h', 105: b'i', 106: b'j', 107: b'k', 108: b'l', 109: b'm', 110: b'n', 111: b'o', 112: b'p', 113: b'q', 114: b'r', 115: b's', 116: b't', 117: b'u', 118: b'v', 119: b'w', 120: b'x', 121: b'y', 122: b'z', 123: b'{', 124: b'|', 125: b'}', 126: b'~', 127: b'\x7f', 128: b'\xc4\x8c', 129: b'\xc3\xbc', 130: b'\xc3\xa9', 131: b'\xc4\x8f', 132: b'\xc3\xa4', 133: b'\xc4\x8e', 134: b'\xc5\xa4', 135: b'\xc4\x8d', 136: b'\xc4\x9b', 137: b'\xc4\x9a', 138: b'\xc4\xb9', 139: b'\xc3\x8d', 140: b'\xc4\xbe', 141: b'\xc4\xba', 142: b'\xc3\x84', 143: b'\xc3\x81', 144: b'\xc3\x89', 145: b'\xc5\xbe', 146: b'\xc5\xbd', 147: b'\xc3\xb4', 148: b'\xc3\xb6', 149: b'\xc3\x93', 150: b'\xc5\xaf', 151: b'\xc3\x9a', 152: b'\xc3\xbd', 153: b'\xc3\x96', 154: b'\xc3\x9c', 155: b'\xc5\xa0', 156: b'\xc4\xbd', 157: b'\xc3\x9d', 158: b'\xc5\x98', 159: b'\xc5\xa5', 160: b'\xc3\xa1', 161: b'\xc3\xad', 162: b'\xc3\xb3', 163: b'\xc3\xba', 164: b'\xc5\x88', 165: b'\xc5\x87', 166: b'\xc5\xae', 167: b'\xc3\x94', 168: b'\xc5\xa1', 169: b'\xc5\x99', 170: b'\xc5\x95', 171: b'\xc5\x94', 172: b'\xc2\xbc', 173: b'\xc2\xa1', 174: b'\xc2\xab', 175: b'\xc2\xbb', 176: b'\xe2\x96\x91', 177: b'\xe2\x96\x92', 178: b'\xe2\x96\x93', 179: b'\xe2\x94\x82', 180: b'\xe2\x94\xa4', 181: b'\xe2\x95\xa1', 182: b'\xe2\x95\xa2', 183: b'\xe2\x95\x96', 184: b'\xe2\x95\x95', 185: b'\xe2\x95\xa3', 186: b'\xe2\x95\x91', 187: b'\xe2\x95\x97', 188: b'\xe2\x95\x9d', 189: b'\xe2\x95\x9c', 190: b'\xe2\x95\x9b', 191: b'\xe2\x94\x90', 192: b'\xe2\x94\x94', 193: b'\xe2\x94\xb4', 194: b'\xe2\x94\xac', 195: b'\xe2\x94\x9c', 196: b'\xe2\x94\x80', 197: b'\xe2\x94\xbc', 198: b'\xe2\x95\x9e', 199: b'\xe2\x95\x9f', 200: b'\xe2\x95\x9a', 201: b'\xe2\x95\x94', 202: b'\xe2\x95\xa9', 203: b'\xe2\x95\xa6', 204: b'\xe2\x95\xa0', 205: b'\xe2\x95\x90', 206: b'\xe2\x95\xac', 207: b'\xe2\x95\xa7', 208: b'\xe2\x95\xa8', 209: b'\xe2\x95\xa4', 210: b'\xe2\x95\xa5', 211: b'\xe2\x95\x99', 212: b'\xe2\x95\x98', 213: b'\xe2\x95\x92', 214: b'\xe2\x95\x93', 215: b'\xe2\x95\xab', 216: b'\xe2\x95\xaa', 217: b'\xe2\x94\x98', 218: b'\xe2\x94\x8c', 219: b'\xe2\x96\x88', 220: b'\xe2\x96\x84', 221: b'\xe2\x96\x8c', 222: b'\xe2\x96\x90', 223: b'\xe2\x96\x80', 224: b'\xce\xb1', 225: b'\xc3\x9f', 226: b'\xce\x93', 227: b'\xcf\x80', 228: b'\xce\xa3', 229: b'\xcf\x83', 230: b'\xc2\xb5', 231: b'\xcf\x84', 232: b'\xce\xa6', 233: b'\xce\x98', 234: b'\xce\xa9', 235: b'\xce\xb4', 236: b'\xe2\x88\x9e', 237: b'\xcf\x86', 238: b'\xce\xb5', 239: b'\xe2\x88\xa9', 240: b'\xe2\x89\xa1', 241: b'\xc2\xb1', 242: b'\xe2\x89\xa5', 243: b'\xe2\x89\xa4', 244: b'\xe2\x8c\xa0', 245: b'\xe2\x8c\xa1', 246: b'\xc3\xb7', 247: b'\xe2\x89\x88', 248: b'\xc2\xb0', 249: b'\xe2\x88\x99', 250: b'\xc2\xb7', 251: b'\xe2\x88\x9a', 252: b'\xe2\x81\xbf', 253: b'\xc2\xb2', 254: b'\xe2\x96\xa0', 255: b'\xc2\xa0'} 2 | -------------------------------------------------------------------------------- /ibd2sql/utils/charset/tis620.py: -------------------------------------------------------------------------------- 1 | DD_TIS620 = {0: b'\x00', 1: b'\x01', 2: b'\x02', 3: b'\x03', 4: b'\x04', 5: b'\x05', 6: b'\x06', 7: b'\x07', 8: b'\x08', 9: b'\t', 10: b'\n', 11: b'\x0b', 12: b'\x0c', 13: b'\r', 14: b'\x0e', 15: b'\x0f', 16: b'\x10', 17: b'\x11', 18: b'\x12', 19: b'\x13', 20: b'\x14', 21: b'\x15', 22: b'\x16', 23: b'\x17', 24: b'\x18', 25: b'\x19', 26: b'\x1a', 27: b'\x1b', 28: b'\x1c', 29: b'\x1d', 30: b'\x1e', 31: b'\x1f', 32: b' ', 33: b'!', 34: b'"', 35: b'#', 36: b'$', 37: b'%', 38: b'&', 39: b"'", 40: b'(', 41: b')', 42: b'*', 43: b'+', 44: b',', 45: b'-', 46: b'.', 47: b'/', 48: b'0', 49: b'1', 50: b'2', 51: b'3', 52: b'4', 53: b'5', 54: b'6', 55: b'7', 56: b'8', 57: b'9', 58: b':', 59: b';', 60: b'<', 61: b'=', 62: b'>', 63: b'?', 64: b'@', 65: b'A', 66: b'B', 67: b'C', 68: b'D', 69: b'E', 70: b'F', 71: b'G', 72: b'H', 73: b'I', 74: b'J', 75: b'K', 76: b'L', 77: b'M', 78: b'N', 79: b'O', 80: b'P', 81: b'Q', 82: b'R', 83: b'S', 84: b'T', 85: b'U', 86: b'V', 87: b'W', 88: b'X', 89: b'Y', 90: b'Z', 91: b'[', 92: b'\\', 93: b']', 94: b'^', 95: b'_', 96: b'`', 97: b'a', 98: b'b', 99: b'c', 100: b'd', 101: b'e', 102: b'f', 103: b'g', 104: b'h', 105: b'i', 106: b'j', 107: b'k', 108: b'l', 109: b'm', 110: b'n', 111: b'o', 112: b'p', 113: b'q', 114: b'r', 115: b's', 116: b't', 117: b'u', 118: b'v', 119: b'w', 120: b'x', 121: b'y', 122: b'z', 123: b'{', 124: b'|', 125: b'}', 126: b'~', 127: b'\x7f', 128: b'\xc2\x80', 129: b'\xc2\x81', 130: b'\xc2\x82', 131: b'\xc2\x83', 132: b'\xc2\x84', 133: b'\xc2\x85', 134: b'\xc2\x86', 135: b'\xc2\x87', 136: b'\xc2\x88', 137: b'\xc2\x89', 138: b'\xc2\x8a', 139: b'\xc2\x8b', 140: b'\xc2\x8c', 141: b'\xc2\x8d', 142: b'\xc2\x8e', 143: b'\xc2\x8f', 144: b'\xc2\x90', 145: b'\xc2\x91', 146: b'\xc2\x92', 147: b'\xc2\x93', 148: b'\xc2\x94', 149: b'\xc2\x95', 150: b'\xc2\x96', 151: b'\xc2\x97', 152: b'\xc2\x98', 153: b'\xc2\x99', 154: b'\xc2\x9a', 155: b'\xc2\x9b', 156: b'\xc2\x9c', 157: b'\xc2\x9d', 158: b'\xc2\x9e', 159: b'\xc2\x9f', 160: b'\xef\xbf\xbd', 161: b'\xe0\xb8\x81', 162: b'\xe0\xb8\x82', 163: b'\xe0\xb8\x83', 164: b'\xe0\xb8\x84', 165: b'\xe0\xb8\x85', 166: b'\xe0\xb8\x86', 167: b'\xe0\xb8\x87', 168: b'\xe0\xb8\x88', 169: b'\xe0\xb8\x89', 170: b'\xe0\xb8\x8a', 171: b'\xe0\xb8\x8b', 172: b'\xe0\xb8\x8c', 173: b'\xe0\xb8\x8d', 174: b'\xe0\xb8\x8e', 175: b'\xe0\xb8\x8f', 176: b'\xe0\xb8\x90', 177: b'\xe0\xb8\x91', 178: b'\xe0\xb8\x92', 179: b'\xe0\xb8\x93', 180: b'\xe0\xb8\x94', 181: b'\xe0\xb8\x95', 182: b'\xe0\xb8\x96', 183: b'\xe0\xb8\x97', 184: b'\xe0\xb8\x98', 185: b'\xe0\xb8\x99', 186: b'\xe0\xb8\x9a', 187: b'\xe0\xb8\x9b', 188: b'\xe0\xb8\x9c', 189: b'\xe0\xb8\x9d', 190: b'\xe0\xb8\x9e', 191: b'\xe0\xb8\x9f', 192: b'\xe0\xb8\xa0', 193: b'\xe0\xb8\xa1', 194: b'\xe0\xb8\xa2', 195: b'\xe0\xb8\xa3', 196: b'\xe0\xb8\xa4', 197: b'\xe0\xb8\xa5', 198: b'\xe0\xb8\xa6', 199: b'\xe0\xb8\xa7', 200: b'\xe0\xb8\xa8', 201: b'\xe0\xb8\xa9', 202: b'\xe0\xb8\xaa', 203: b'\xe0\xb8\xab', 204: b'\xe0\xb8\xac', 205: b'\xe0\xb8\xad', 206: b'\xe0\xb8\xae', 207: b'\xe0\xb8\xaf', 208: b'\xe0\xb8\xb0', 209: b'\xe0\xb8\xb1', 210: b'\xe0\xb8\xb2', 211: b'\xe0\xb8\xb3', 212: b'\xe0\xb8\xb4', 213: b'\xe0\xb8\xb5', 214: b'\xe0\xb8\xb6', 215: b'\xe0\xb8\xb7', 216: b'\xe0\xb8\xb8', 217: b'\xe0\xb8\xb9', 218: b'\xe0\xb8\xba', 219: b'\xef\xbf\xbd', 220: b'\xef\xbf\xbd', 221: b'\xef\xbf\xbd', 222: b'\xef\xbf\xbd', 223: b'\xe0\xb8\xbf', 224: b'\xe0\xb9\x80', 225: b'\xe0\xb9\x81', 226: b'\xe0\xb9\x82', 227: b'\xe0\xb9\x83', 228: b'\xe0\xb9\x84', 229: b'\xe0\xb9\x85', 230: b'\xe0\xb9\x86', 231: b'\xe0\xb9\x87', 232: b'\xe0\xb9\x88', 233: b'\xe0\xb9\x89', 234: b'\xe0\xb9\x8a', 235: b'\xe0\xb9\x8b', 236: b'\xe0\xb9\x8c', 237: b'\xe0\xb9\x8d', 238: b'\xe0\xb9\x8e', 239: b'\xe0\xb9\x8f', 240: b'\xe0\xb9\x90', 241: b'\xe0\xb9\x91', 242: b'\xe0\xb9\x92', 243: b'\xe0\xb9\x93', 244: b'\xe0\xb9\x94', 245: b'\xe0\xb9\x95', 246: b'\xe0\xb9\x96', 247: b'\xe0\xb9\x97', 248: b'\xe0\xb9\x98', 249: b'\xe0\xb9\x99', 250: b'\xe0\xb9\x9a', 251: b'\xe0\xb9\x9b', 252: b'\xef\xbf\xbd', 253: b'\xef\xbf\xbd', 254: b'\xef\xbf\xbd', 255: b'\xef\xbf\xbd'} 2 | -------------------------------------------------------------------------------- /ibd2sql/innodb_page/page.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | import zlib 3 | import struct 4 | from ibd2sql.utils import lz4 5 | from ibd2sql.utils import aes 6 | from ibd2sql.utils.b2data import * 7 | 8 | PAGE_NEW_INFIMUM = 99 9 | PAGE_NEW_SUPREMUM = 112 10 | INFO_SIZE = 111 11 | INFO_MAX_SIZE = 115 12 | SDI_VERSION = 1 13 | 14 | class PAGE_READER(object): 15 | """ 16 | INPUT: 17 | require: 18 | page_size: page size 19 | filename: filename 20 | option: 21 | encryption: True or False 22 | iv (require if encryption is True) 23 | key (require if encryption is True) 24 | compression: True or False 25 | 26 | RETURN: 27 | PAGE: binary data 28 | 29 | """ 30 | def __init__(self,*args,**kwargs): 31 | self.PAGE_SIZE = kwargs['page_size'] 32 | self.filename = kwargs['filename'] 33 | self.f = open(self.filename,'rb') 34 | self.pageid = 0 35 | if 'encryption' in kwargs and kwargs['encryption']: 36 | self.iv = kwargs['iv'] 37 | self.key = kwargs['key'] 38 | self.read = self._read_page_encryption 39 | else: 40 | self.read = self._read_page_compression 41 | # elif 'compression' in kwargs and kwargs['compression']: 42 | # self.read = self._read_page_compression 43 | # else: 44 | # self.read = self._read_page 45 | 46 | def __close__(self): 47 | self.f.close() 48 | 49 | def read(self,n): 50 | pass 51 | 52 | def _read_page_compression(self,*args): 53 | data = self._read_page(*args) 54 | if data[24:26] == b'\x00\x0e': 55 | FIL_PAGE_VERSION,FIL_PAGE_ALGORITHM_V1,FIL_PAGE_ORIGINAL_TYPE_V1,FIL_PAGE_ORIGINAL_SIZE_V1,FIL_PAGE_COMPRESS_SIZE_V1 = struct.unpack('>BBHHH',data[26:34]) 56 | if FIL_PAGE_ALGORITHM_V1 == 1: 57 | data = data[:24] + struct.pack('>H',FIL_PAGE_ORIGINAL_TYPE_V1) + b'\x00'*8 + data[34:38] + zlib.decompress(data[38:38+FIL_PAGE_COMPRESS_SIZE_V1]) 58 | elif FIL_PAGE_ALGORITHM_V1 == 2: 59 | data = data[:24] + struct.pack('>H',FIL_PAGE_ORIGINAL_TYPE_V1) + b'\x00'*8 + data[34:38] + lz4.decompress(data[38:38+FIL_PAGE_COMPRESS_SIZE_V1],FIL_PAGE_ORIGINAL_SIZE_V1) 60 | return data 61 | 62 | def _read_page(self,n=None): 63 | if n is not None: 64 | self.f.seek(n*self.PAGE_SIZE,0) 65 | self.pageid = n 66 | else: 67 | self.pageid += 1 68 | return self.f.read(self.PAGE_SIZE) 69 | 70 | def _read_page_encryption(self,*args): 71 | data = self._read_page(*args) 72 | if data[24:26] == b'\x00\x0f': 73 | FIL_PAGE_VERSION,FIL_PAGE_ALGORITHM_V1,FIL_PAGE_ORIGINAL_TYPE_V1,FIL_PAGE_ORIGINAL_SIZE_V1,FIL_PAGE_COMPRESS_SIZE_V1 = struct.unpack('>BBHHH',data[26:34]) 74 | data = data[:24] + struct.pack('>H',FIL_PAGE_ORIGINAL_TYPE_V1) + b'\x00'*8 + data[34:38] + aes.aes_cbc256_decrypt(self.key,data[38:-10],self.iv) + aes.aes_cbc256_decrypt(self.key,data[-32:],self.iv)[-10:] 75 | return data 76 | 77 | 78 | class PAGE(object): 79 | def __init__(self,): 80 | pass 81 | 82 | def init(self,data): 83 | self.data = data 84 | self.offset = 0 85 | self._offset = 0 86 | 87 | def init_fil(self): 88 | # FIL HEADER 0:38 89 | self.FIL_PAGE_SPACE_OR_CHECKSUM,self.FIL_PAGE_OFFSET,self.FIL_PAGE_PREV,self.FIL_PAGE_NEXT,self.FIL_PAGE_LSN,self.FIL_PAGE_TYPE,self.FIL_PAGE_FILE_FLUSH_LSN,self.FIL_PAGE_SPACE_ID = struct.unpack('>4LQHQL',self.data[:38]) 90 | # FIL DATA 38:-8 91 | # FIL TRAILER -8: 92 | self.CHECKSUM,self.FIL_PAGE_LSN2 = struct.unpack('>LL',self.data[-8:]) 93 | 94 | def init_page_header(self): 95 | self.PAGE_N_DIR_SLOTS,self.PAGE_HEAP_TOP,self.PAGE_N_HEAP,self.PAGE_FREE,self.PAGE_GARBAGE,self.PAGE_LAST_INSERT,self.PAGE_DIRECTION,self.PAGE_N_DIRECTION,self.PAGE_N_RECS,self.PAGE_MAX_TRX_ID,self.PAGE_LEVEL,self.PAGE_INDEX_ID = struct.unpack('>9HQHQ',self.data[38:38+36]) 96 | self.PAGE_BTR_SEG_LEAF = struct.unpack('>LLH',self.data[74:84]) 97 | self.PAGE_BTR_SEG_TOP = struct.unpack('>LLH',self.data[84:94]) 98 | self.offset = PAGE_NEW_INFIMUM 99 | self._offset = PAGE_NEW_INFIMUM 100 | 101 | def read(self,n): 102 | data = self.data[self.offset:self.offset+n] 103 | self.offset += n 104 | return data 105 | 106 | def read_reverse(self,n): 107 | data = self.data[self._offset-n:self._offset] 108 | self._offset -= n 109 | return data 110 | -------------------------------------------------------------------------------- /ibd2sql/utils/mysql_json2.py: -------------------------------------------------------------------------------- 1 | import struct 2 | import sys 3 | #sys.setrecursionlimit(20) 4 | 5 | def DEJSONTYPE(t): 6 | isbig = False 7 | signed = False 8 | size = 2 9 | name = '' 10 | _type = 'None' 11 | if t <= 3: 12 | _type = 'obj' if t <= 1 else 'array' 13 | isbig = True if t%2 == 1 else False 14 | name = f"{'large' if isbig else 'small'} JSON object" 15 | size = 4 if isbig else 2 16 | elif t == 4: 17 | _type = 'literal' 18 | name = "literal" 19 | size = 2 20 | elif t <= 10: 21 | _type = 'int' 22 | if t in [7,8]: 23 | size = 4 24 | if t in [9,10]: 25 | size = 8 26 | signed = True if t%2 == 1 else False 27 | name = f"{'' if signed else 'u'}int{size*8}" 28 | elif t == 11: 29 | size = 8 30 | name = 'double' 31 | _type = 'double' 32 | elif t == 12: 33 | name = 'utf8mb4 string' 34 | _type = 'char' 35 | 36 | return { 37 | 'isbig':isbig, 38 | 'signed':signed, 39 | 'size':size, 40 | 'name':name, 41 | 'type':_type, 42 | 't':t 43 | } 44 | 45 | 46 | 47 | class JSON2DICT(object): 48 | def __init__(self,data): 49 | self.data = data 50 | self.offset = 0 51 | self.offset_start = 0 52 | 53 | def read(self,n,offset_start=1,offset=0): 54 | toffset = offset_start + offset 55 | data = self.data[toffset:toffset+n] 56 | #print(offset_start,'OFFSET:',self.offset,'-->',end='') 57 | #print(self.offset) 58 | return offset+n,data 59 | 60 | def init(self,offset_start=1,t=None,offset=0): 61 | if t is None: # first init 62 | t = self.data[0] 63 | dj = DEJSONTYPE(t) 64 | #print(offset_start,dj['type'],dj) 65 | if dj['type'] == 'obj': # json object 66 | offset,data = self.read(dj['size'],offset_start,offset) 67 | element_count = int.from_bytes(data,'little') 68 | #print('ELEMENT OBJ COUNT:',element_count) 69 | offset,data = self.read(dj['size'],offset_start,offset) 70 | size = int.from_bytes(data,'little') 71 | offset,key_entry = self._read_key_entry(offset,offset_start,element_count,dj['size']) 72 | offset,value_entry = self._read_value_entry(offset,offset_start,element_count) 73 | offset,key = self._read_key(offset,offset_start,key_entry) 74 | #print(offset_start,'^^^^^^^^^^^^^^^^^^^',len(value_entry)) 75 | offset,value = self._read_value(offset,offset_start,value_entry) 76 | #print(offset_start,'*******************',value,len(value_entry)) 77 | return {k:v for k,v in zip(key,value)} 78 | elif dj['type'] == 'array': # json array 79 | offset,data = self.read(dj['size'],offset_start,offset) 80 | element_count = int.from_bytes(data,'little') 81 | #print('ELEMENT ARRAY COUNT:',element_count) 82 | offset,data = self.read(dj['size'],offset_start,offset) 83 | size = int.from_bytes(data,'little') 84 | offset,value_entry = self._read_value_entry(offset,offset_start,element_count) 85 | offset,value = self._read_value(offset,offset_start,value_entry) 86 | #print(offset_start,'################VALUE,',value) 87 | return value 88 | elif dj['type'] == 'literal': # literal(null/true/false) 89 | offset,data = self.read(2,offset_start,offset) 90 | data, = struct.unpack('d',data) 142 | elif dj['type'] == 'char': 143 | offset,data = self.read(dj['size'],offset_start,offset) 144 | tdata = int.from_bytes(data,'little',signed=False) 145 | elif dj['type'] == 'literal': 146 | offset,data = self.read(dj['size'],offset_start,offset) 147 | tdata = int.from_bytes(data,'little',signed=False) 148 | else: 149 | offset,data = self.read(dj['size'],offset_start,offset) 150 | #print('#########################################',offset_start,offset,data) 151 | tdata = int.from_bytes(data,'little',signed=False) + offset_start 152 | #print(offset_start,tdata,dj) 153 | rdata.append([dj,tdata]) 154 | return offset,rdata 155 | 156 | def _read_value(self,offset,offset_start,value_entry): 157 | rdata = [] 158 | for dj,tdata in value_entry: 159 | if dj['type'] == 'char': 160 | #self.offset = dj['size'] 161 | #print('QQQQQQQQQQQQQQQ',offset_start,offset,tdata) 162 | offset = tdata 163 | offset,data = self._read_vardata(offset,offset_start) 164 | rdata.append(data) 165 | elif dj['type'] in ['array','obj']: 166 | #print('PPPPPPPPPPPPPPP',offset,tdata,dj) 167 | rdata.append(self.init(tdata,dj['t'])) 168 | else: 169 | rdata.append(tdata) 170 | return offset,rdata 171 | 172 | def _read_vardata(self,offset,offset_start): 173 | offset,data = self.read(1,offset_start,offset) 174 | t1, = struct.unpack(' python3 main.py /data/mysql_3414/mysqldata/db1/t20250830_test_ddl.ibd --ddl history 35 | CREATE TABLE IF NOT EXISTS `db1`.`t20250830_test_ddl` ( 36 | `id` int DEFAULT NULL, 37 | `name` varchar(200) DEFAULT NULL 38 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci; 39 | ALTER TABLE `db1`.`t20250830_test_ddl` ADD COLUMN `age` int DEFAULT NULL; 40 | ALTER TABLE `db1`.`t20250830_test_ddl` DROP COLUMN `name`; 41 | ``` 42 | 43 | `--ddl disable-keys`: ddl中不含非主键的索引信息 44 | 45 | `--ddl keys-after`: ddl中的索引将在最后以alter table add的形式添加. 46 | 47 | 48 | 49 | ## --sql (微调) 50 | 51 | `--sql`依然为打印sql语句的选项, 但考虑到insert的速度问题, 故新增了额外功能: 52 | 53 | `--sql data`: 输出不再是sql语句,而是可以使用`LOAD DATA`导入数据的文件格式. 默认字段使用`,`隔开, 行之间使用`\n`隔开, 可以使用`--set`指定为其它字符 54 | 55 | 56 | 57 | ## --delete (微调) 58 | 59 | `--delete` 依然为打印被删除的数据行, 但是新增了选项功能: 60 | 61 | `--delete only`: 仅打印被删除的数据, 也是--delete的默认选项 62 | 63 | `--delete with`: 打印数据的同时还包含被标记为删除的数据. 64 | 65 | 66 | 67 | ## --complete-insert 68 | 69 | INSERT语句包含字段名称信息 70 | 71 | 72 | 73 | ## --multi-value 74 | 75 | 按照每页为一条insert语句进行输出. 76 | 77 | 78 | 79 | ## --force 80 | 81 | 若使用该选项,将强制遍历整个数据文件. (默认为按照btr+叶子节点遍历) 82 | 83 | 84 | 85 | ## --replace 86 | 87 | 使用`REPLACE INTO`代替`INSERT INTO` 88 | 89 | 90 | 91 | ## --table 92 | 93 | 输出的表名替换为这个选项的值. 仅适合一张表的时候 94 | 95 | 96 | 97 | ## --schema 98 | 99 | 输出的表的schema替换为这个选项的值. 100 | 101 | 102 | 103 | ## --sdi-table 104 | 105 | 指定表的元数据信息, 当前版本为默认识别分区表和frm元数据信息, 若未识别到则使用此选项值. 106 | 107 | 108 | 109 | ## --limit 110 | 111 | 输出的数据行数(语句行数而非数据行数, 若使用了`--multi-value`则输出的是n页的数据) 112 | 113 | 114 | 115 | ## --keyring-file 116 | 117 | 指定keyring-file 118 | 119 | 120 | 121 | ## --output (新增) 122 | 123 | 指定输出目录 124 | 125 | `--output` 将在当前目录下创建`ibd2sql_auto_dir_`开头的目录作为输出目录. 126 | 127 | `--ouput /tmp` 将在tmp目录创建`ibd2sql_auto_dir_`开头的目录作为输出目录. 128 | 129 | 130 | 131 | ## --output-filesize (新增) 132 | 133 | 输出文件若超过此选项值, 则自动进行轮转. 134 | 135 | 136 | 137 | ## --print-sdi (新增) 138 | 139 | 输出表的元数据信息, 同`ibd2sdi` 140 | 141 | 142 | 143 | ## --count 144 | 145 | 统计表的行数 146 | 147 | 148 | 149 | ## --web 150 | 151 | 启用web功能, 可在浏览器上以btr+的形式查看表的数据. 原`ibd2sql_web.py`的功能. 支持多个数据文件 152 | 153 | 154 | 155 | ## --lctn 156 | 157 | 查看/修改mysql.ibd中的`lower_case_table_names`选项的值. 158 | 159 | `--lctn` 查看mysql.ibd中lower_case_table_names的值 160 | 161 | `--lctn 1` 修改mysql.ibd中lower_case_table_names的值为1. 可选值为0,1,2 162 | 163 | 原`modify_lower_case_table_names.py`的功能 164 | 165 | ## --parallel (新增) 166 | 167 | 指定并发度, 当解析数据时,可以使用此选项指定并发度. 对于大表来说, 使用此选项可显著增加解析速度. 并发数量建议为cpu空闲数量. 168 | 169 | 170 | 171 | ## --log (新增) 172 | 173 | 输出日志 174 | 175 | `--log` 将日志输出到stderr 176 | 177 | `--log xxx.log` 将日志输出到xxx.log 178 | 179 | 180 | 181 | ## --set (新增) 182 | 183 | 一部分不那么重要但也不错的选项,就放这里了. 使用方法为: 184 | 185 | ```shell 186 | --set 'k1=v,k2;k3=v' --set 'k4=v' 187 | ``` 188 | 189 | `--set='hex'` 输出的字段值将以16进制的形式展示. 190 | 191 | `--set='leafno=4'` 指定叶子节点为4 192 | 193 | `--set='schema=db1'` 对目标数据文件进行schema过滤, 若不为db1则跳过. 194 | 195 | `--set='table=t1'` 对目标数据文件进行table过滤, 若不为t1则跳过 196 | 197 | 198 | 199 | 200 | 201 | # 使用例子 202 | 203 | 解析数据文件,获取DDL和DML 204 | 205 | ```shell 206 | python3 main.py /data/mysql_3314/mysqldata/db1/sbtest2.ibd --sql --ddl 207 | ``` 208 | 209 | 210 | 211 | 解析数据文件,获取DDL和DML, 使用8个进程并发解析 212 | 213 | ``` 214 | python3 main.py /data/mysql_3314/mysqldata/db1/sbtest2.ibd --sql --ddl --parallel 8 215 | ``` 216 | 217 | 218 | 219 | 解析数据文件,获取DDL和DML, 使用8个进程并发解析 并输出到 '/tmp'目录 220 | 221 | ```shell 222 | python3 main.py /data/mysql_3314/mysqldata/db1/sbtest2.ibd --sql --ddl --parallel 8 --output='/tmp' 223 | ``` 224 | 225 | 226 | 227 | 强制解析目标数据文件 228 | 229 | ```shell 230 | python3 main.py /data/mysql_3314/mysqldata/db1/sbtest2.ibd --sql --ddl --force 231 | ``` 232 | 233 | 234 | 235 | 解析多个数据文件 236 | 237 | ```shell 238 | python3 main.py /data/mysql_3314/mysqldata/db1/sbtest* --sql --ddl 239 | ``` 240 | 241 | 242 | 243 | 解析数据文件中被标记为删除的数据 244 | 245 | ```shell 246 | python3 main.py /data/mysql_3314/mysqldata/db1/sbtest2.ibd --sql --delete 247 | ``` 248 | 249 | 250 | 251 | 解析数据文件并输出为data模式, 方便使用load data导入 252 | 253 | ```shell 254 | python3 main.py /data/mysql_3314/mysqldata/db1/sbtest2.ibd --sql data 255 | ``` 256 | 257 | 258 | 259 | 查看目标文件的sdi信息 260 | 261 | ```shell 262 | python3 main.py /data/mysql_3314/mysqldata/db1/sbtest2.ibd --print-sdi 263 | ``` 264 | 265 | 266 | 267 | 查看目标文件指定表的信息 268 | 269 | ```shell 270 | python3 main.py /data/mysql_3314/mysqldata/mysql.ibd --set='table=user' --ddl --sql 271 | ``` 272 | 273 | 274 | 275 | 以web控制台展示 276 | 277 | ```shell 278 | python3 main.py /data/mysql_3314/mysqldata/mysql.ibd --web 279 | ``` 280 | 281 | 282 | 283 | 查看mysql.ibd中记录的lower_case_table_names值 284 | 285 | ```shell 286 | python3 main.py /data/mysql_3314/mysqldata/mysql.ibd --lctn 287 | ``` 288 | 289 | # 碎片页/坏块/ibd文件损坏 290 | 对于碎片页,坏块,ibd文件损坏,不完整等各种情况, 我们还可以解析出剩余数据. 只需要使用`--set rootno=0 --set leafno=0 --force`即可,当然大概率也是需要使用`--sdi`指定元数据信息的. 291 | ```shell 292 | python3 main.py /tmp/t20250908_test_4_pages.ibd --sdi /data/mysql_3308/mysqldata/db1/sbtest2.frm --sql --set leafno=0 --set rootno=0 --force 293 | ``` 294 | 295 | 当然并发等选项也是可以的 296 | ```shell 297 | python3 main.py /tmp/t20250908_test_4_pages.ibd --sdi /data/mysql_3308/mysqldata/db1/sbtest2.frm --sql --set leafno=0 --set rootno=0 --force --parallel 4 298 | ``` 299 | 300 | 如果针对undrop-for-innodb解析的page信息,则可以使用如下方法 301 | ```shell 302 | python3 main.py /PATH/pages-vda1/ --sdi /PATH/t20250912_2.frm --set indexid=22 --sql 303 | ``` 304 | indexid=22 是对应 的/PATH/pages-vda1/FIL_PAGE_INDEX/0000000000000022.page 文件中的22(indexid) 305 | 306 | # 强制从坏块中提取数据 307 | 对于坏块我们提供了3种选择. 308 | 1. `--set bad-pages=fast` 根据page-directory信息尽可能的解析坏块中的数据, 数据可能会多,也可能会少 309 | 2. `--set bad-pages=try` 1字节1字节的解析坏块中的数据, 会多出很多数据(表结构越简单,多的数据越多), 但不会差数据(有的都解析了) 310 | 3. `--set bad-pages=skip` 跳过坏块中的数据. 311 | 例子: 312 | ```shell 313 | python3 main.py /tmp/sbtest2.ibd --sql --force --set bad-pages=fast 314 | ``` 315 | > 由于存在坏块,叶子节点间的指向就不准确了, 故要使用--force来强制遍历整个数据文件. 其它选项请自行组合. 对于bad-pages目前只在单进程中做了判断. 316 | -------------------------------------------------------------------------------- /ibd2sql/utils/aes.py: -------------------------------------------------------------------------------- 1 | # aes ecb&cbc解密 2 | # references: 3 | # https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.197-upd1.pdf 4 | # https://github.com/ricmoo/pyaes 5 | 6 | # Substitution Box 7 | Sbox = [ 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 ] 8 | 9 | # Inverse Substitution Box 10 | I_Sbox = [ 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d ] 11 | 12 | # Round Constants 13 | Rcon = [ 0x00000000, 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, 0x40000000, 0x80000000, 0x1b000000, 0x36000000, 0x6c000000, 0xd8000000, 0xab000000, 0x4d000000, 0x9a000000, 0x2f000000, 0x5e000000, 0xbc000000, 0x63000000, 0xc6000000, 0x97000000, 0x35000000, 0x6a000000, 0xd4000000, 0xb3000000, 0x7d000000, 0xfa000000, 0xef000000, 0xc5000000, 0x91000000, 0x39000000, ] 14 | 15 | # GF(2^8) 16 | def GMul(a,b): 17 | p = 0 18 | for _ in range(8): 19 | if b & 1: 20 | p ^= a 21 | hi_bit_set = a & 0x80 22 | a <<= 1 23 | a &= 0xff 24 | if hi_bit_set: 25 | a ^= 0x1b 26 | b >>= 1 27 | return p 28 | 29 | # RotWord ([1,2,3,4] --> [2,3,4,1]) 30 | def RotWord(word): 31 | return word[1:] + word[:1] 32 | 33 | # SubWord 34 | def SubWord(word): 35 | return [ Sbox[x] for x in word ] 36 | 37 | def KeyExpansion(key): 38 | key_symbols = [ x for x in key ] 39 | Nb = 4 40 | Nk = 8 41 | Nr = 14 42 | w = [0] * Nb * (Nr + 1) 43 | for x in range(Nk): 44 | w[x] = key_symbols[ 4*x : 4*(x+1) ] 45 | for i in range(Nk, Nb*(Nr+1)): 46 | temp = w[ i-1 ][:] 47 | if i % Nk == 0: 48 | temp = SubWord(RotWord(temp)) 49 | temp[0] ^= Rcon[i // Nk] >> 24 50 | elif Nk > 6 and i % Nk == 4: 51 | temp = SubWord(temp) 52 | w[i] = [w_i ^ t_i for w_i, t_i in zip(w[i - Nk], temp)] 53 | return w 54 | 55 | def AddRoundKey(state,w): 56 | for i in range(4): 57 | for j in range(4): 58 | state[j][i] ^= w[i][j] 59 | return state 60 | 61 | def InvSubBytes(s): 62 | for i in range(4): 63 | for j in range(4): 64 | s[i][j] = I_Sbox[s[i][j]] 65 | return s 66 | 67 | # ([0,1,2,3] --> [3,0,1,2]) 68 | def InvShiftRows(s): 69 | s[1][0], s[1][1], s[1][2], s[1][3] = s[1][3], s[1][0], s[1][1], s[1][2] 70 | s[2][0], s[2][1], s[2][2], s[2][3] = s[2][2], s[2][3], s[2][0], s[2][1] 71 | s[3][0], s[3][1], s[3][2], s[3][3] = s[3][1], s[3][2], s[3][3], s[3][0] 72 | return s 73 | 74 | def InvMixColumns(s): 75 | for i in range(4): 76 | a = s[0][i] 77 | b = s[1][i] 78 | c = s[2][i] 79 | d = s[3][i] 80 | s[0][i] = GMul(a,14) ^ GMul(b,11) ^ GMul(c,13) ^ GMul(d,9) 81 | s[1][i] = GMul(a,9) ^ GMul(b,14) ^ GMul(c,11) ^ GMul(d,13) 82 | s[2][i] = GMul(a,13) ^ GMul(b,9) ^ GMul(c,14) ^ GMul(d,11) 83 | s[3][i] = GMul(a,11) ^ GMul(b,13) ^ GMul(c,9) ^ GMul(d,14) 84 | return s 85 | 86 | # AES解密 87 | def AESDecrypt(block,key_schedule): 88 | Nb = 4 89 | Nr = len(key_schedule) // Nb - 1 90 | #state = [[0]*4]*4 # 这TM是引用... 91 | state = [[0] * 4 for _ in range(4)] 92 | for i in range(4): 93 | for j in range(4): 94 | state[j][i] = block[i*4 + j] 95 | state = AddRoundKey(state, key_schedule[Nr*Nb : (Nr+1)*Nb]) 96 | for round in range(Nr-1, 0, -1): 97 | state = InvShiftRows(state) 98 | state = InvSubBytes(state) 99 | state = AddRoundKey(state, key_schedule[round*Nb : (round+1)*Nb]) 100 | state = InvMixColumns(state) 101 | # 最后一轮比较特殊 102 | state = InvShiftRows(state) 103 | state = InvSubBytes(state) 104 | state = AddRoundKey(state, key_schedule[0 : Nb]) 105 | decrypted_block = [] 106 | for i in range(4): 107 | for j in range(4): 108 | decrypted_block.append(state[j][i]) 109 | return bytes(decrypted_block) 110 | 111 | # ecb256 encrypt 112 | def aes_ecb_256_encrypt(key,data): 113 | pass 114 | 115 | # ecb256 decrypt 116 | def aes_ecb256_decrypt(key,data): 117 | """ 118 | INPUT: 119 | key: key 120 | data: data of encryped 121 | RETURN: 122 | rdata: data of decryped 123 | """ 124 | rdata = b'' 125 | expanded_key = KeyExpansion(key) 126 | for i in range(0,len(data),16): 127 | rdata += AESDecrypt(data[i:i+16], expanded_key) 128 | return rdata 129 | 130 | # cbc256 decrypt 131 | def aes_cbc256_decrypt(key,data,iv): 132 | """ 133 | INPUT: 134 | key: key 135 | data: data of encryped 136 | iv: intialization vector 137 | RETURN: 138 | rdata: data of decryped 139 | """ 140 | rdata = b'' 141 | expanded_key = KeyExpansion(key) 142 | pre_block = iv 143 | for i in range(0,int(len(data)/16)*16,16): 144 | block = data[i:i+16] 145 | decrypted_block = AESDecrypt(block, expanded_key) 146 | plaintext_block = bytes([d^p for d,p in zip(decrypted_block,pre_block)]) 147 | pre_block = block 148 | rdata += plaintext_block 149 | return rdata 150 | -------------------------------------------------------------------------------- /ibd2sql/utils/mysql_json.py: -------------------------------------------------------------------------------- 1 | #@mysql sql/json_binary.h 2 | import struct 3 | import sys 4 | 5 | _ = """ 6 | - ----------------- 7 | | JSON OBJECT/ARRAY | 8 | - ----------------- 9 | | 10 | ------------------------------------------------------------------------- 11 | | TYPE | ELEMENT_COUNT | KEY-ENTRY(if object) | VALUE-ENTRY | KEY | VALUE | 12 | ------------------------------------------------------------------------- 13 | | | | 14 | | | -------------- 15 | -------------------------- | | UTF8MB4 DATA | 16 | | KEY-OFFSET | KEY-LENGTH | | -------------- 17 | -------------------------- | 18 | | 19 | -------------------------------- 20 | | TYPE | OFFSET/VALUE(if small) | 21 | -------------------------------- 22 | 23 | small 2 bytes large 4 bytes 24 | --------------------------------------------------- 25 | TYPE 1 byte 26 | COUNT 2/4 bytes 27 | SIZE 2/4 bytes 28 | VALUE VALUE/OBJECT/ARRAY 29 | --------------------------------------------------- 30 | 31 | --------------------------------------------------- 32 | OBJECT VALUE = KEY_ENTRY + VALUE_ENTRY + KEY + VALUE #KEY肯定是字符串, 所以不需要记录数据类型 33 | ARRAY VALUE = VALUE_ENTRY + VALUE #不需要KEY 34 | 35 | KEY_ENTRY = KEY_OFFSET(2/4bytes) + KEY_LNGTH(2 bytes) 36 | VALUE_ENTRY = TYPE(1byte) + OFFSET(2/4 bytes)/VALUE (如果类型是int,literal之类的,就直接是值了, 否则就走OFFSET) 37 | --------------------------------------------------- 38 | 39 | """ 40 | 41 | # type ::= 42 | # 0x00 | // small JSON object 43 | # 0x01 | // large JSON object 44 | # 0x02 | // small JSON array 45 | # 0x03 | // large JSON array 46 | # 0x04 | // literal (true/false/null) 47 | # 0x05 | // int16 48 | # 0x06 | // uint16 49 | # 0x07 | // int32 50 | # 0x08 | // uint32 51 | # 0x09 | // int64 52 | # 0x0a | // uint64 53 | # 0x0b | // double 54 | # 0x0c | // utf8mb4 string 55 | # 0x0f // custom data (any MySQL data type) 56 | 57 | 58 | # value ::= 59 | # object | 60 | # array | 61 | # literal | 62 | # number | 63 | # string | 64 | # custom-data 65 | 66 | class jsonob(object): 67 | def __init__(self,bdata,t): 68 | """ 69 | bdata = json data 70 | t 类型 json类型 71 | """ 72 | self.bdata = bdata 73 | self.t = t 74 | self.offset = 0 75 | self.ssize = 2 if self.t == 0x00 or self.t == 0x02 else 4 76 | self._type = None 77 | self._bdata = b'' 78 | #print("BEGIN JSON TO B, CURRENT TYPE:",self.t) 79 | 80 | def read_key_entry(self): 81 | """ 82 | read key-entry 83 | """ 84 | #print("READ KEY ENTRY") 85 | key_entry = [] 86 | for x in range(self.element_count): 87 | key_offset = self.read_little() 88 | key_length = self.read_little(2) 89 | key_entry.append((key_offset,key_length)) 90 | self.key_entry = key_entry 91 | 92 | def read_value_entry(self): 93 | #print("READ VALUE ENTRY") 94 | value_entry = [] 95 | for x in range(self.element_count): 96 | t = self.read_little(1) 97 | #print("\t entry: type:",t) 98 | data = None 99 | if t < 0x04: 100 | #print("READ VALUE ENTRY JSON object/array") 101 | data = self.read_little() 102 | elif t == 0x04: #literal 103 | #print("READ VALUE ENTRY literal") 104 | _data = self.read_little() 105 | if _data == 1: 106 | data = True 107 | elif _data == 2: 108 | data = False 109 | elif _data == 0: 110 | data = None 111 | else: 112 | data = '' 113 | elif t >= 0x05 and t <= 0x06: #inline data 114 | #print("READ VALUE ENTRY Inline data for INT",t,0x05,0x0a) 115 | data = self.read_inline_data(t) 116 | else: 117 | data = self.read_little() 118 | #elif t == 0x0b: #double 119 | #print("READ VALUE ENTRY Double") 120 | #data = struct.unpack('d',self.read(8))[0] 121 | # data = self.read_little() 122 | #elif t == 0x0c or t == 0x09 or t == 0x0a: #string 123 | #print("READ DATA ENTRY STRING",self.offset) 124 | # data = self.read_little() #OFFSET 125 | value_entry.append((t,data)) 126 | self.value_entry = value_entry 127 | #print("VALUE ENTRY LIST ---------",self.value_entry) 128 | 129 | def read_key(self): 130 | #print("READ KEY") 131 | key = [] 132 | for x in self.key_entry: 133 | key.append(self.bdata[x[0]:x[0]+x[1]].decode() ) 134 | self.key = key 135 | 136 | def read_value(self): 137 | #print("READ VALUE") 138 | value = [] 139 | for x in self.value_entry: 140 | #print("VALUE TYPE:xxxxxxx",x[0]) 141 | if x[0] == 0x0c: #字符串 142 | _s,size = self.read_var(x[1]) 143 | #size = int.from_bytes(self.bdata[x[1]:x[1]+1],'little') #先都按1字节计算 144 | value.append(self.bdata[x[1]+_s:x[1]+_s+size].decode()) 145 | elif x[0] == 0x0b: 146 | value.append(struct.unpack('d',self.bdata[x[1]:x[1]+8])[0]) 147 | elif x[0] == 0x07: # int32 148 | value.append(int.from_bytes(self.bdata[x[1]:x[1]+4],'little')) 149 | elif x[0] == 0x08: # uint32 150 | value.append(int.from_bytes(self.bdata[x[1]:x[1]+4],'little')) 151 | elif x[0] == 0x09: # int64 152 | value.append(int.from_bytes(self.bdata[x[1]:x[1]+8],'little')) 153 | elif x[0] == 0x0a: # uint64 154 | value.append(int.from_bytes(self.bdata[x[1]:x[1]+8],'little',signed=False)) 155 | elif x[0] <= 0x03: #json对象, 又递归 156 | s = self.ssize 157 | size = int.from_bytes(self.bdata[x[1]+s: x[1]+s+s ], 'little') 158 | data = self.bdata[x[1]:x[1]+size] 159 | _aa = jsonob(data,x[0]) 160 | value.append(_aa.init()) 161 | else: 162 | value.append(x[1]) 163 | self.value = value 164 | 165 | def read_var(self,offset): 166 | """ 167 | 读mysql的varchar的 记录长度的大小, 范围字节数量和大小 168 | 如果第一bit是1 就表示要使用2字节表示: 169 | 后面1字节表示 使用有多少个128字节, 然后加上前面1字节(除了第一bit)的数据(0-127) 就是最终数据 170 | ----------------------------------------------------- 171 | | 1 bit flag | 7 bit data | if flag, 8 bit data*128 | 172 | ----------------------------------------------------- 173 | """ 174 | _s = int.from_bytes(self.bdata[offset:offset+1],'little') 175 | size = 1 176 | if _s & (1<<7): 177 | size += 1 178 | _s = self.bdata[offset:offset+2] 179 | _t = int.from_bytes(_s[1:2],'little')*128 + int.from_bytes(_s[:1],'little')-128 180 | else: 181 | _t = _s 182 | 183 | return size,_t 184 | 185 | 186 | def init(self,): 187 | #print(self.bdata) 188 | self.element_count = self.read_little() 189 | #print("ELEMENT COUNT:",self.element_count) 190 | #print(self.read_little()) 191 | self._size = self.read_little() 192 | #print(f"THIS OBJECT SIZE:",self._size, "ACTUAL SIZE:",len(self.bdata)) 193 | if self._size != len(self.bdata): 194 | return None 195 | #print("WILL INIT") 196 | if self.t == 0x00 or self.t == 0x01: #object 197 | self._type = "JSON Object" 198 | #print(f"THIS TYPE IS {self._type}") 199 | self.data = {} 200 | self.read_key_entry() 201 | self.read_value_entry() 202 | self.read_key() 203 | self.read_value() 204 | self.data = {k:v for k,v in zip(self.key,self.value)} 205 | 206 | elif self.t == 0x02 or self.t == 0x03: #array 207 | self._type = "JSON Array" 208 | #print(f"THIS TYPE IS {self._type}") 209 | self.data = [] 210 | self.read_value_entry() 211 | self.read_value() 212 | self.data = self.value 213 | return self.data 214 | 215 | 216 | def read_little(self,ssize=None): 217 | ssize = self.ssize if ssize is None else ssize 218 | s = int.from_bytes(self.read(ssize),'little') 219 | #print(f"READ LITTLE SIZE: {ssize} bytes bdata:{self._bdata} value:{s} ") 220 | return s 221 | 222 | def read(self,n): 223 | _t = self.bdata[self.offset:self.offset+n] 224 | self.offset += n 225 | self._bdata = _t 226 | return _t 227 | 228 | def _read_int(self,n): 229 | data = self.read(n) 230 | return int.from_bytes(data,'big') 231 | 232 | def read_uint(self,n,is_unsigned=True): 233 | _t = self._read_int(n) 234 | _s = n*8 - 1 235 | #print("read uint",self._bdata,_t,_s) 236 | return (_t&((1<<_s)-1))-2**_s if _t < 2**_s and not is_unsigned else (_t&((1<<_s)-1)) 237 | 238 | def read_int(self,n): 239 | return self.read_uint(n,False) 240 | 241 | def read_inline_data(self,t): 242 | n = 0 243 | is_unsigned = True 244 | #print("\tread_inline_data TYPE:",t) 245 | if t == 0x05: #int16 246 | n = 2 247 | elif t == 0x06: #uint16 248 | n = 2 249 | is_unsigned = True 250 | elif t == 0x07: #int32 251 | n = 4 252 | elif t == 0x08: #uint32 253 | n = 4 254 | is_unsigned = True 255 | elif t == 0x09: #int64 256 | n = 8 257 | elif t == 0x0a: #uint64 258 | n = 8 259 | is_unsigned = True 260 | #return self.read_uint(n,is_unsigned) 261 | signed = False if is_unsigned else True 262 | rs = int.from_bytes(self.read(n),'little',signed=signed) 263 | #print("\tINLINE DATA:",rs) 264 | return rs 265 | 266 | 267 | 268 | 269 | #aa = btojson(b'\x00\x01\x00\r\x00\x0b\x00\x02\x00\x05{\x00t1') 270 | #aa = btojson(b'\x00\x01\x00,\x00\x0b\x00\x02\x00\x0c\r\x00t1\x1eAAAAAAAAAAAAAAAAACBBBBBBBBBBBB') 271 | #aa = btojson(b'\x00\x02\x00)\x00\x12\x00\x02\x00\x14\x00\x02\x00\x00\x16\x00\x0c&\x00a1a2\x01\x00\x10\x00\x0b\x00\x02\x00\x0c\r\x00b1\x02b1\x02a6') 272 | #aa = jsonob(b'\x01\x00\r\x00\x0b\x00\x02\x00\x05{\x00t1',0x00) 273 | #aa = jsonob(b'\x01\x00,\x00\x0b\x00\x02\x00\x0c\r\x00t1\x1eAAAAAAAAAAAAAAAAACBBBBBBBBBBBB',0x00) 274 | #aa = jsonob(b'\x02\x00)\x00\x12\x00\x02\x00\x14\x00\x02\x00\x00\x16\x00\x0c&\x00a1a2\x01\x00\x10\x00\x0b\x00\x02\x00\x0c\r\x00b1\x02b1\x02a6',0x00) 275 | #aa = jsonob(b'\x03\x00T\x00\x00\r\x00\x007\x00\x00G\x00\x01\x00*\x00\x0b\x00\x02\x00\x00\r\x0013\x01\x00\x1d\x00\x0b\x00\x02\x00\x00\r\x00CC\x01\x00\x10\x00\x0b\x00\x02\x00\x0c\r\x00DD\x02DD\x01\x00\x10\x00\x0b\x00\x02\x00\x0c\r\x00BB\x02BB\x01\x00\r\x00\x0b\x00\x02\x00\x05\x02\x00FF',0x02) 276 | #print(aa.init()) 277 | -------------------------------------------------------------------------------- /ibd2sql/utils/b2data.py: -------------------------------------------------------------------------------- 1 | # convert binary data to data(int/str/json) 2 | # future: geom 3 | 4 | import struct 5 | import json 6 | import time 7 | from .charset.armscii8 import DD_ARMSCII8 8 | from .charset.dec8 import DD_DEC8 9 | from .charset.geostd8 import DD_GEOSTD8 10 | from .charset.hp8 import DD_HP8 11 | from .charset.keybcs2 import DD_KEYBCS2 12 | from .charset.swe7 import DD_SWE7 13 | from .charset.tis620 import DD_TIS620 14 | from .mysql_json import jsonob 15 | from .mysql_json2 import JSON2DICT 16 | 17 | # Format Big Unsigned Int 4 bytes 18 | _F_B_U_INT4 = struct.Struct('>L') 19 | 20 | # Format Double 21 | _F_D = struct.Struct('d') 22 | 23 | def map_decimal(n): 24 | # [[bytes, length], [],... ] 25 | return [ [4,9] for _ in range(n//9) ] + ([[ ((n%9)+1)//2 if n%9 < 7 else 4,n%9 ]] if n%9 > 0 else []) 26 | 27 | # convert binary data to unsigned int 28 | def B2UINT4(data): 29 | return struct.unpack('>L',data)[0] 30 | 31 | def B2INT4(data): 32 | return struct.unpack('>L',data)[0] - 2147483648 33 | 34 | def B2UINT3(data): 35 | return struct.unpack('>L',data+b'\x00')[0]>>8 36 | 37 | def B2INT3(data): 38 | return (struct.unpack('>L',data+b'\x00')[0]>>8) - 8388608 39 | 40 | def B2UINT2(data): 41 | return struct.unpack('>H',data)[0] 42 | 43 | def B2INT2(data): 44 | return struct.unpack('>H',data)[0] - 32768 45 | 46 | def B2UINT1(data): 47 | return struct.unpack('>B',data)[0] 48 | 49 | def B2INT1(data): 50 | return struct.unpack('>B',data)[0] - 128 51 | 52 | def B2UINT8(data): 53 | return struct.unpack('>Q',data)[0] 54 | 55 | def B2INT8(data): 56 | return struct.unpack('>Q',data)[0] - 9223372036854775808 57 | 58 | def B2UINT6(data): 59 | return (B2UINT4(data[:4])<<16) + B2UINT2(data[4:6]) 60 | 61 | def B2UINT7(data): 62 | return (B2UINT4(data[:4])<<24) + B2UINT3(data[4:7]) 63 | 64 | def B2BIT(data): # unsigned big int 65 | return '0x' + data.hex() 66 | 67 | B2INT = B2INT4 68 | 69 | def B2DOUBLE(data): 70 | return struct.unpack('d',data)[0] 71 | 72 | def B2FLOAT(data): 73 | return struct.unpack('f',data)[0] 74 | 75 | def B2YEAR(data): 76 | return B2UINT1(data) + 1900 77 | 78 | def __year(data): 79 | return str(data).zfill(4) 80 | 81 | def __month(data): 82 | return str(data).zfill(2) 83 | 84 | def __day(data): 85 | return str(data).zfill(2) 86 | 87 | def __hour(data): 88 | return str(data).zfill(2) 89 | 90 | def __minute(data): 91 | return str(data).zfill(2) 92 | 93 | def __second(data): 94 | return str(data).zfill(2) 95 | 96 | def B2DATE(data): 97 | t = B2UINT3(data) 98 | signed = t&8388608 99 | year = (t&8388096)>>9 100 | month = (t&480)>>5 101 | day = t&31 102 | return repr(__year(year)+'-'+__month(month)+'-'+__day(day)) 103 | 104 | def B2TIME(data,pad): #(data,with fsp) 105 | t = B2INT3(data[:3]) 106 | signed = '' 107 | fractional = '' 108 | if t < 0: 109 | signed = '-' 110 | hour= 2047 - ((8384512&t)>>12) 111 | minute = 63 - ((4032&t)>>6) 112 | second = 63 - (63&t) 113 | if pad > 0: 114 | fr = int.from_bytes(data[3:],'big') 115 | if fr == 0: 116 | second += 1 117 | fractional = 0 118 | else: 119 | fractional = 2**((pad+1)//2*8) - fr 120 | else: 121 | hour = (8384512&t)>>12 122 | minute = (4032&t)>>6 123 | second = 63&t 124 | if pad > 0: 125 | fractional = int.from_bytes(data[3:],'big') 126 | if fractional != '': 127 | fractional = "." + str(fractional).zfill(pad)[:pad] 128 | return repr(signed + __hour(hour) + ':' + __minute(minute) + ':' + __second(second) 129 | + fractional) 130 | 131 | def B2DATETIME(data,pad): 132 | t = (B2UINT4(data[:4])<<8) + B2UINT1(data[4:5]) 133 | months = (t&549751619584)>>22 134 | year = months//13 135 | month = months%13 136 | day = (t&4063232)>>17 137 | hour = (t&126976)>>12 138 | minute = (t&4032)>>6 139 | second = t&63 140 | fractional = '' 141 | if pad > 0: 142 | fractional = "." + str(int.from_bytes(data[5:],'big')).zfill(pad)[:pad] 143 | return repr(__year(year)+'-'+__month(month)+'-'+__day(day)+' ' 144 | + __hour(hour) + ':' + __minute(minute) + ':' + __second(second) 145 | + fractional) 146 | 147 | def B2DATETIME_OLD(data,pad=0): 148 | dt, = struct.unpack('>Q',data) 149 | if dt > 9223372036854775808: 150 | dt -= 9223372036854775808 151 | else: 152 | dt = '00000000000000' 153 | dt = str(dt) 154 | return repr(dt[:4] + '-' + dt[4:6] + '-' + dt[6:8] + ' ' 155 | + dt[8:10] + ':' + dt[10:12] + ':' + dt[12:14]) 156 | 157 | def B2TIMESTAMP(data,pad): 158 | t = time.localtime(B2UINT4(data[:4])) 159 | year = t.tm_year 160 | month = t.tm_mon 161 | day = t.tm_mday 162 | hour = t.tm_hour 163 | minute = t.tm_min 164 | second = t.tm_sec 165 | fractional = '' 166 | if pad > 0: 167 | fractional = "." + str(int.from_bytes(data[4:],'big')).zfill(pad)[:pad] 168 | return repr(__year(year)+'-'+__month(month)+'-'+__day(day)+' ' 169 | + __hour(hour) + ':' + __minute(minute) + ':' + __second(second) 170 | + fractional) 171 | 172 | def B2TIME_OLD(data,pad=0): 173 | dt = B2INT3(data) 174 | sign = '' 175 | if dt < 0: 176 | sign = '-' 177 | dt = str(dt)[1:].zfill(6) 178 | else: 179 | dt = str(dt).zfill(6) 180 | if len(dt) > 6: 181 | sign += dt[:len(dt)-6] 182 | dt = dt[len(dt)-6:] 183 | return repr(sign+dt[:2]+':'+dt[2:4]+':'+dt[4:6]) 184 | 185 | 186 | # decode binary data 187 | def B2STR_armscii8(data): 188 | return repr(b''.join([ DD_ARMSCII8[x] for x in data ]).decode()) 189 | 190 | def B2STR_ascii(data): 191 | return repr(data.decode('utf-8')) 192 | 193 | def B2STR_big5(data): 194 | return repr(data.decode('big5')) 195 | 196 | def B2STR_binary(data): 197 | return '0x'+data.hex() 198 | 199 | def B2STR_cp1250(data): 200 | return repr(data.decode('cp1250')) 201 | 202 | def B2STR_cp1251(data): 203 | return repr(data.decode('cp1251')) 204 | 205 | def B2STR_cp1256(data): 206 | return repr(data.decode('cp1256')) 207 | 208 | def B2STR_cp1257(data): 209 | return repr(data.decode('cp1257')) 210 | 211 | def B2STR_cp850(data): 212 | return repr(data.decode('cp850')) 213 | 214 | def B2STR_cp852(data): 215 | return repr(data.decode('cp852')) 216 | 217 | def B2STR_cp866(data): 218 | return repr(data.decode('cp866')) 219 | 220 | def B2STR_cp932(data): 221 | return repr(data.decode('cp932')) 222 | 223 | def B2STR_dec8(data): 224 | return repr(b''.join([ DD_DEC8[x] for x in data ]).decode()) 225 | 226 | def B2STR_eucjpms(data): 227 | return repr(data.decode('euc-jp')) 228 | 229 | def B2STR_euckr(data): 230 | return repr(data.decode('euc-kr')) 231 | 232 | def B2STR_gb18030(data): 233 | return repr(data.decode('gb18030')) 234 | 235 | def B2STR_gb2312(data): 236 | return repr(data.decode('gb2312')) 237 | 238 | def B2STR_gbk(data): 239 | return repr(data.decode('gbk')) 240 | 241 | def B2STR_geostd8(data): 242 | return repr(b''.join([ DD_GEOSTD8[x] for x in data ]).decode()) 243 | 244 | def B2STR_greek(data): 245 | return repr(data.decode('iso8859-7')) 246 | 247 | def B2STR_hebrew(data): 248 | return repr(data.decode('iso8859-8')) 249 | 250 | def B2STR_hp8(data): 251 | return repr(b''.join([ DD_HP8[x] for x in data ]).decode()) 252 | 253 | def B2STR_keybcs2(data): 254 | return repr(b''.join([ DD_KEYBCS2[x] for x in data ]).decode()) 255 | 256 | def B2STR_koi8r(data): 257 | return repr(data.decode('koi8-r')) 258 | 259 | def B2STR_koi8u(data): 260 | return repr(data.decode('koi8-u')) 261 | 262 | def B2STR_latin1(data): 263 | return repr(data.decode('latin1')) 264 | 265 | def B2STR_latin2(data): 266 | return repr(data.decode('iso8859-2')) 267 | 268 | def B2STR_latin5(data): 269 | return repr(data.decode('iso8859-9')) 270 | 271 | def B2STR_latin7(data): 272 | return repr(data.decode('iso8859-13')) 273 | 274 | def B2STR_macce(data): 275 | return repr(data.decode('mac-latin2')) 276 | 277 | def B2STR_macroman(data): 278 | return repr(data.decode('macroman')) 279 | 280 | def B2STR_sjis(data): 281 | return repr(data.decode('shift-jis')) 282 | 283 | def B2STR_swe7(data): 284 | return repr(b''.join([ DD_SWE7[x] for x in data ]).decode()) 285 | 286 | def B2STR_tis620(data): 287 | #return repr(data.decode('tis-620')) 288 | return repr(b''.join([ DD_TIS620[x] for x in data ]).decode()) 289 | 290 | def B2STR_ucs2(data): 291 | return repr(data.decode('utf-16-be')) 292 | 293 | def B2STR_ujis(data): 294 | return repr(data.decode('euc_jp')) 295 | 296 | def B2STR_utf16(data): 297 | return repr(data.decode('utf-16-be')) 298 | 299 | def B2STR_utf16le(data): 300 | return repr(data.decode('utf-16-le')) 301 | 302 | def B2STR_utf32(data): 303 | return repr(data.decode('utf-32-be')) 304 | 305 | def B2STR_utf8(data): 306 | return repr(data.decode('utf-8')) 307 | 308 | def B2STR_utf8mb4(data): 309 | return repr(data.decode('utf-8')) 310 | 311 | B2STR = B2STR_utf8 312 | 313 | def _READ_DECIMAL(data,p,offset): 314 | rdata = "" 315 | for psize,pzfill in p: 316 | tdata = int.from_bytes(data[offset:offset+psize],'big',signed=False) 317 | rdata += str(tdata).zfill(pzfill) 318 | offset += psize 319 | return rdata,offset 320 | 321 | # decimal 322 | # b'\x7f\xff\xff\xff\xff\xff\xff\xfc\xff\xeb' -3.0020 323 | # b'\x80\x00\x00\x00\x00\x00\x00\x03\x00\x14' 3.0020 324 | def B2DECIMAL(data,p1,p2): 325 | p1_data = "" 326 | signed = False if struct.unpack('>B',data[:1])[0] & 128 else True 327 | if signed: 328 | data = bytes((~b&0xff) for b in data) 329 | data = struct.pack('>B',data[0]-128)+data[1:] 330 | offset = 0 331 | p1_data,offset = _READ_DECIMAL(data,p1,offset) 332 | p2_data,offset = _READ_DECIMAL(data,p2,offset) 333 | p1_data = str(int(p1_data)) 334 | return f"{'-' if signed else ''}{p1_data}.{p2_data}" 335 | 336 | p1_data = "".join([ toint(bdata).zfill(zfill) for zfill,bdata in data[0] ]) 337 | p2_data = "".join([ toint(bdata).zfill(zfill) for zfill,bdata in data[1] ]) 338 | return str(int(p1_data)) + str(p2_data) 339 | 340 | # ENUM 341 | def B2ENUM(data,elements): 342 | return repr(elements[int.from_bytes(data,'big',signed=False)]) 343 | 344 | # SET 345 | def B2SET(data,elements): 346 | data = int.from_bytes(data,'big',signed=False) 347 | rdata = "" 348 | n = 0 349 | for x in elements: 350 | if 1<H',self.data[42:44])[0] & 32767 112 | n_recs = struct.unpack('>H',self.data[54:56])[0] 113 | all_row = [] 114 | d = zlib.decompressobj() 115 | c = d.decompress(self.data[94:]) 116 | toffset = c.find(b'\x01') + 1 117 | data = c[toffset:] # col type, nullable 118 | compressed_offset = len(data) 119 | data += d.unused_data 120 | self.data = data 121 | self.offset = 0 122 | self._offset = len(data) 123 | page_dir = [] # [offset,deleted] 124 | for x in range(n_recs): # used record 125 | slot = struct.unpack('>H',self.read_reverse(2))[0] & 16383 126 | page_dir.append([slot,False]) 127 | for j in range(n_dense-n_recs-2): # user record deleted 128 | slot = struct.unpack('>H',self.read_reverse(2))[0] & 16383 129 | page_dir.append([slot,True]) 130 | _ = page_dir.sort() 131 | trxid_rollptr = [ self.read(13) for x in range(n_dense-2) ] 132 | for x in range(n_dense-2): 133 | if self.offset >= compressed_offset: 134 | if self.read(1) == b'\x80': 135 | _ = self.read(1) 136 | row,extra = self.read_row() 137 | all_row.append({'data':row,'extra':extra}) 138 | # nullable 139 | # varsize 140 | # data 141 | 142 | def _get_sql(self): 143 | sql_list = [] 144 | for data in self.read_all_rows(): 145 | data = data['data'] 146 | v = '' 147 | for colname,coldefault in self.table.column_order: 148 | v += f"{coldefault if colname not in data else data[colname]['data']}," 149 | sql_list.append(f"{self.sqlpre}({v[:-1]})") 150 | return sql_list 151 | 152 | def _get_sql_multi(self): 153 | sql = f"{self.sqlpre}" 154 | for data in self.read_all_rows(): 155 | data = data['data'] 156 | v = '' 157 | for colname,coldefault in self.table.column_order: 158 | v += f"{coldefault if colname not in data else data[colname]['data']}," 159 | sql += f"{v[:-1]}," 160 | return sql[:-1] 161 | 162 | def get_sql(self): 163 | pass 164 | 165 | def read_row(self): 166 | return None,None 167 | 168 | #def _read_row(self,null_list,size_list): 169 | def _read_row(self,colid_list,null_count): 170 | null_list,size_list = self.read_nullbitmask_varsize(colid_list,null_count) 171 | row = {} 172 | for colid in colid_list: 173 | col = self.table.column[colid] 174 | colname = col['name'] 175 | vsize = size_list.pop(0) 176 | nullable = null_list.pop(0) 177 | if colname in ['DB_TRX_ID','DB_ROLL_PTR'] and self.row_format == "COMPRESSED": 178 | pass 179 | start_offset = self.offset 180 | data = None 181 | if nullable: 182 | data = "null" 183 | elif vsize == 16384 and not self.dep: 184 | data = self.read_extra_column() 185 | else: 186 | data = self.read(vsize) 187 | print(data) 188 | row[colname] = { 189 | 'data':col['decode'](data,*col['args']) if self.decode and not nullable else data, 190 | 'start_offset':start_offset, 191 | 'size':vsize 192 | } 193 | return row 194 | 195 | def _read_row_pk_leaf(self): 196 | #self.read_rec_header() 197 | row_version = self.read_row_version() 198 | colid_list = self.table.pk + self.table.pkmr[row_version]['colid'] 199 | null_count = self.table.pkmr[row_version]['null_count'] 200 | row = self._read_row(colid_list, null_count) 201 | return row,0 202 | 203 | def _read_row_pk_non_leaf(self): 204 | row = self._read_row(self.table.pk,0) 205 | return row,struct.unpack(self.read(4))[0] 206 | 207 | 208 | def _read_row_key_leaf(self): 209 | row = self._read_row(self.colid_list+self.pk,self.null_count) 210 | return row,0 211 | 212 | def _read_row_key_non_leaf(self): 213 | row = self._read_row(self.colid_list,self.null_count) 214 | return row,struct.unpack(self.read(4))[0] 215 | 216 | def read_row_version(self): 217 | return struct.unpack('>B',self.read_reverse(1))[0] if self.rec_header['REC_INFO_INSTANT'] or self.rec_header['REC_INFO_VERSION'] else 0 218 | 219 | def read_row_version_compressed(self): 220 | return 0 221 | 222 | def _read_nullbitmask_varsize_old(self,colid_list,null_count=0): 223 | null_list = [] 224 | size_list = [] # size_list.append(), size_list.pop(0) 225 | size_null_format = '>H' 226 | size_null_size = 2 227 | nmask = 32768 228 | if self.rec_header['REC_SHORT']: 229 | size_null_format = '>B' 230 | size_null_size = 1 231 | nmask = 128 232 | lastoffset = 0 233 | for colid in colid_list: 234 | #col = self.table.column[colid] 235 | size_null = struct.unpack(size_null_format,self.read_reverse(size_null_size))[0] 236 | isnull = True if nmask&size_null else False 237 | vsize = (nmask-1)&size_null 238 | t = vsize 239 | vsize -= lastoffset 240 | lastoffset = t 241 | size_list.append(vsize) 242 | null_list.append(isnull) 243 | return null_list,size_list 244 | 245 | def _read_null_new(self,null_count): 246 | return int.from_bytes(self.read_reverse((null_count+7)//8),'big') if null_count > 0 else 0 247 | 248 | def _read_null_compressed(self,null_count): 249 | return int.from_bytes(self.read((null_count+7)//8),'big') if null_count > 0 else 0 250 | 251 | def _read_varsize_new(self,): 252 | return self.read_reverse(1) 253 | 254 | def _read_varsize_compressed(self,): 255 | return self.read(1) 256 | 257 | def _read_nullbitmask_varsize_new(self,colid_list,null_count): 258 | null_list = [] 259 | size_list = [] 260 | #nullvalue = int.from_bytes(self.read_reverse((null_count+7)//8),'big') if null_count > 0 else 0 261 | nullvalue = self._read_null_new(null_count) 262 | n = 0 263 | for colid in colid_list: 264 | col = self.table.column[colid] 265 | if col['is_nullable']: 266 | null_list.append(True if nullvalue&(1<B',self.read_reverse(1))[0] 274 | tsize = struct.unpack('>B',self._read_varsize_new())[0] 275 | if tsize > REC_N_FIELDS_ONE_BYTE_MAX: 276 | #vsize = struct.unpack('>B',self.read_reverse(1))[0] + (tsize-128)*256 277 | vsize = struct.unpack('>B',self._read_varsize_new())[0] + (tsize-128)*256 278 | else: 279 | vsize = tsize 280 | else: 281 | #vsize = struct.unpack('>B',self.read_reverse(1))[0] 282 | vsize = struct.unpack('>B',self._read_varsize_new())[0] 283 | size_list.append(vsize) 284 | return null_list,size_list 285 | 286 | def read_nullbitmask_varsize(self,row_version,rec_header): 287 | pass 288 | 289 | def _read_rec_header_old(self): 290 | data = self.read_reverse(6) 291 | rec,rec_next = struct.unpack('>LH',data) 292 | REC_TYPE = REC_STATUS_ORDINARY if self.data[64:66] == b'\x00\x00' else REC_STATUS_NODE_PTR 293 | if self.offset == 101: 294 | REC_TYPE = REC_STATUS_INFIMUM 295 | if rec_next == 0: 296 | REC_TYPE = REC_STATUS_SUPREMUM 297 | self.rec_header = { 298 | "REC_INFO_INSTANT": True if rec&2147483648 > 0 else False, 299 | "REC_INFO_VERSION": True if rec&1073741824 > 0 else False, 300 | "REC_INFO_DELETED": True if rec&536870912 > 0 else False, 301 | "REC_INFO_MIN_REC": True if rec&268435456 > 0 else False, 302 | "REC_N_OWNED" : (rec&251658240)>>24, 303 | "REC_HEAP_NO" : (rec&16775168)>>11, 304 | "REC_N_FIELDS": (rec&2046)>>1, 305 | "REC_SHORT" : True if rec&1 == 1 else False, 306 | "REC_TYPE" : REC_TYPE, 307 | "REC_NEXT" : rec_next 308 | } 309 | 310 | def _read_rec_header_new(self): 311 | data = self.read_reverse(5) 312 | rec1,rec2,rec_next = struct.unpack('>HBh',data) 313 | rec = (rec1<<8)+rec2 314 | self.rec_header = { 315 | "REC_INFO_INSTANT": True if rec&8388608 > 0 else False, 316 | "REC_INFO_VERSION": True if rec&4194304 > 0 else False, 317 | "REC_INFO_DELETED": True if rec&2097152 > 0 else False, 318 | "REC_INFO_MIN_REC": True if rec&1048576 > 0 else False, 319 | "REC_N_OWNED" : (rec&983040)>>16, 320 | "REC_HEAP_NO" : (rec&65528)>>3, 321 | "REC_TYPE" : rec&7, 322 | "REC_NEXT" : rec_next + self.offset 323 | } 324 | 325 | def read_rec_header(self): 326 | pass 327 | 328 | def _read_extra_column_with_768(self,): 329 | return self.read(768)+self._read_extra_column() 330 | 331 | def _read_overpage_20_new(self,): 332 | return self.read(20) 333 | 334 | def _read_overpage_20_compressed(self,): 335 | return self.reada_reverse(20) 336 | 337 | def _read_extra_column(self,): 338 | #SPACE_ID,PAGENO,BLOB_HEADER,REAL_SIZE = struct.unpack('>3LQ',self.read(20)) 339 | SPACE_ID,PAGENO,BLOB_HEADER,REAL_SIZE = struct.unpack('>3LQ',self._read_overpage_20_new()) 340 | data = b'' 341 | if self.table.mysqld_version_id > 50744: 342 | data = FIRST_BLOB(self.f,PAGENO) 343 | else: 344 | while True: 345 | _ndata = self.pg.read(PAGENO) 346 | REAL_SIZE,PAGENO = struct.unpack('>LL',_ndata[38:46]) 347 | data += _ndata[46:46+REAL_SIZE] 348 | if PAGENO == 4294967295: 349 | break 350 | return data 351 | 352 | def read_extra_column(self,): 353 | pass 354 | 355 | -------------------------------------------------------------------------------- /tests/gen_data.py: -------------------------------------------------------------------------------- 1 | import random 2 | import json 3 | import sys 4 | from datetime import datetime, timedelta 5 | 6 | 7 | _STRINGS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' 8 | DATE_FORMAT = "%Y-%m-%d" 9 | TIME_FORMAT = "%H:%M:%S" 10 | DATETIME_FORMAT = f"{DATE_FORMAT} {TIME_FORMAT}" 11 | 12 | def getgen_int(start=-127,stop=127): 13 | return random.randint(start,stop) 14 | 15 | def _getgen_en(start=1,stop=200): 16 | return ''.join(random.choice(_STRINGS) for _ in range(random.randint(start,stop))) 17 | 18 | def getgen_en(start=1,stop=200): 19 | return repr(_getgen_en(start,stop)) 20 | 21 | def getgen_zh(start=1,stop=100): 22 | return repr(''.join(chr(random.randint(0x4E00, 0x9FFF)) for _ in range(random.randint(start,stop)))) 23 | 24 | def getgen_emoji(start=1,stop=100): 25 | return repr(''.join(chr(random.randint(0x1F600, 0x1F64F)) for _ in range(random.randint(start,stop)))) 26 | 27 | def getgen_datetime(start='1970-01-01 11:11:11',stop='2025-11-11 11:11:11'): 28 | start = datetime.strptime(start, DATETIME_FORMAT) 29 | stop = datetime.strptime(stop, DATETIME_FORMAT) 30 | rangesecond = int((stop - start).total_seconds()) 31 | return repr((start + timedelta(seconds=random.randint(0,rangesecond))).strftime(DATETIME_FORMAT)) 32 | 33 | def getgen_date(start='1970-01-01 11:11:11',stop='2025-11-11 11:11:11'): 34 | return repr(datetime.strptime(getgen_datetime()[1:-1],DATETIME_FORMAT).strftime(DATE_FORMAT)) 35 | 36 | def getgen_time(start='1970-01-01 11:11:11',stop='2025-11-11 11:11:11'): 37 | return repr(datetime.strptime(getgen_datetime()[1:-1],DATETIME_FORMAT).strftime(TIME_FORMAT)) 38 | 39 | def getgen_float(start=-100000,stop=100000): 40 | return random.random()*random.randint(start,stop) 41 | 42 | def getgen_decimal(p0=20,p1=4): 43 | return f"{'-' if random.choice((True,False)) else ''}{str(round(10**random.randint(0,p0-p1)*random.random(),p1))}" 44 | 45 | def getgen_json(start=1,stop=10): 46 | data = {'author':'ddcw','url':'https://github.com/ddcw/ibd2sql'} 47 | for i in range(4): 48 | data[_getgen_en()] = _getgen_en() 49 | if random.choice((True,False)): 50 | data['test_int2'] = getgen_int(-2**15,2**15) 51 | if random.choice((True,False)): 52 | data['test_int4'] = getgen_int(-2**31,2**31) 53 | if random.choice((True,False)): 54 | data['test_int8'] = getgen_int(-2**63,2**63) 55 | if random.choice((True,False)): 56 | data['test_boolean'] = random.choice((True,False)) 57 | if start < stop and random.choice((True,False)): 58 | data[_getgen_en()] = getgen_json(start,stop-1) if random.choice((True,False)) else [ getgen_json(start,stop-1) for _ in range(4) ] 59 | return data 60 | 61 | 62 | def _ft(tablename): 63 | return f"`test_ibd2sql_by_ddcw_{tablename}`",f"DROP TABLE IF EXISTS `test_ibd2sql_by_ddcw_{tablename}`;\nCREATE TABLE IF NOT EXISTS `test_ibd2sql_by_ddcw_{tablename}`" 64 | 65 | def test_int(): 66 | name,ddl = _ft('int') 67 | ddl += """( 68 | id int, 69 | c_tinyint_01 tinyint, 70 | c_tinyint_02 tinyint unsigned, 71 | c_smallint_01 smallint, 72 | c_smallint_02 smallint unsigned, 73 | c_mediumint_01 mediumint, 74 | c_mediumint_02 mediumint unsigned, 75 | c_int_01 int, 76 | c_int_02 int unsigned, 77 | c_bigint_01 bigint, 78 | c_bigint_02 bigint unsigned 79 | ) engine=innodb; 80 | """ 81 | print(ddl) 82 | for i in range(200): 83 | print(f"insert into {name} values({i},{getgen_int(-127,127)},{getgen_int(0,255)},{getgen_int(-32767,32767)},{getgen_int(0,65535)},{getgen_int(-8388607,8388607)},{getgen_int(0,16777215)},{getgen_int(-2147483647,2147483647)},{getgen_int(0,4294967295)},{getgen_int(-9223372036854775807,9223372036854775807)},{getgen_int(0,18446744073709551615)});") 84 | 85 | def test_char(): # char,varchar 86 | name,ddl = _ft('char') 87 | ddl += """( 88 | id int, 89 | c_char_01 char(20), 90 | c_char_02 char(127), 91 | c_char_03 char(255), 92 | c_varchar_01 varchar(20), 93 | c_varchar_02 varchar(127), 94 | c_varchar_03 varchar(255), 95 | c_varchar_04 varchar(300) 96 | ) engine=innodb; 97 | """ 98 | print(ddl) 99 | for i in range(200): 100 | print(f"insert into {name} values({i},{getgen_en(0,18)},{getgen_en(0,120)},{getgen_en(0,255)},{getgen_en(0,20)},{getgen_en(0,126)},{getgen_en(0,255)},{getgen_en(0,300)});") 101 | 102 | def test_text(): # tinytext,mediumtext,text,longtext 103 | name,ddl = _ft('text') 104 | ddl += """( 105 | id int, 106 | c_tinytext tinytext, 107 | c_mediumtext mediumtext, 108 | c_text text, 109 | c_longtext longtext 110 | ) engine=innodb; 111 | """ 112 | print(ddl) 113 | for i in range(2): 114 | #print(f"insert into {name} values({i},{getgen_en(0,127)},{getgen_en(0,8388607)},{getgen_en(0,8388607)},{getgen_en(0,8388607)});") 115 | print(f"insert into {name} values({i},{getgen_en(0,127)},{getgen_en(0,127)},{getgen_en(0,255)},{getgen_en(0,255)});") 116 | 117 | def test_blob(): # tinyblob,mediumblob,blob,longblob 118 | name,ddl = _ft('blob') 119 | ddl += """( 120 | id int, 121 | c_tinyblob tinyblob, 122 | c_mediumblob mediumblob, 123 | c_blob blob, 124 | c_longblob longblob 125 | ) engine=innodb; 126 | """ 127 | print(ddl) 128 | for i in range(200): 129 | #print(f"insert into {name} values({i},{getgen_en(0,127)},{getgen_en(0,8388607)},{getgen_en(0,8388607)},{getgen_en(0,8388607)});") 130 | print(f"insert into {name} values({i},{getgen_en(0,127)},{getgen_en(0,127)},{getgen_en(0,255)},{getgen_en(0,255)});") 131 | 132 | def test_binary(): # binary,varbinary,bit 133 | name,ddl = _ft('binary') 134 | ddl += """( 135 | id int, 136 | c_binary_01 binary(20), 137 | c_binary_02 binary(127), 138 | c_binary_03 binary(255), 139 | c_varbinary_01 varbinary(20), 140 | c_varbinary_02 varbinary(127), 141 | c_varbinary_03 varbinary(300), 142 | c_bit_01 bit(20), 143 | c_bit_02 bit(31), 144 | c_bit_03 bit(64) 145 | ) engine=innodb; 146 | """ 147 | print(ddl) 148 | for i in range(200): 149 | print(f"insert into {name} values({i},{getgen_en(0,20)},{getgen_en(0,127)},{getgen_en(0,255)},{getgen_en(0,20)},{getgen_en(0,127)},{getgen_en(0,300)},{getgen_en(0,2)},{getgen_en(0,3)},{getgen_en(0,8)});") 150 | 151 | def test_set(): # enum,set 152 | name,ddl = _ft('set') 153 | ddl += """( 154 | id int, 155 | c_set set('X','Y','Z'), 156 | c_enum enum('A','B','C') 157 | ) engine=innodb; 158 | """ 159 | print(ddl) 160 | for i in range(200): 161 | print(f"insert into {name} values({i},{getgen_int(0,3)},{repr(random.choice(('A','B','C')))} );") 162 | 163 | def test_time(): # date,time,datetime,timestamp,year 164 | name,ddl = _ft('time') 165 | ddl += """( 166 | id int, 167 | c_date date, 168 | c_time time, 169 | c_datetime datetime, 170 | c_timestamp timestamp, 171 | c_year year 172 | ) engine=innodb; 173 | """ 174 | print(ddl) 175 | for i in range(20): 176 | print(f"insert into {name} values({i},{getgen_date()},{getgen_time()},{getgen_datetime()},{getgen_datetime()},{getgen_int(1901,2025)});") 177 | 178 | def test_json(): # json 179 | name,ddl = _ft('json') 180 | ddl += """( 181 | id int, 182 | c_json json 183 | ) engine=innodb; 184 | """ 185 | print(ddl) 186 | for i in range(200): 187 | print(f"insert into {name} values({i},{repr(json.dumps(getgen_json()))});") 188 | 189 | def test_spatial(): # geometry,point,linestring,polygon,multipoint,multilinestring,multipolygon,geometrycollection 190 | name,ddl = _ft('spatial') 191 | ddl += """( 192 | id int, 193 | c_geometry geometry, 194 | c_point point /*!80003 SRID 4326 */, 195 | c_linestring linestring, 196 | c_polygon polygon, 197 | c_geometrycollection geometrycollection, 198 | c_multipoint multipoint, 199 | c_multilinestring multilinestring, 200 | c_multipolygon multipolygon 201 | ) engine=innodb; 202 | """ 203 | print(ddl) 204 | for x in range(2): 205 | print(f"insert into {name} values({x},ST_GeomFromText('point({x} {x})'), ST_GeomFromText('point({x} {x})', 4326), ST_GeomFromText('linestring({x} {x}, {x} {x}, {x} {x}, {x} {x})'), ST_GeomFromText('polygon((0 0,0 3,3 3,3 0,0 0),(1 1,1 2,2 2,2 1,1 1))'), ST_GeomFromText('GeometryCollection(Point(1 1),LineString(2 2, 3 3))'), ST_GeomFromText('MULTIPOINT((60 -24),(28 -77))'), ST_GeomFromText('MultiLineString((1 1,2 2,3 3),(4 4,5 5))'), ST_GeomFromText('MultiPolygon(((0 0,0 3,3 3,3 0,0 0),(1 1,1 2,2 2,2 1,1 1)))') );") 206 | 207 | def test_vector(): # vector 208 | name,ddl = _ft('vector') 209 | ddl += """( 210 | id int, 211 | c_vector vector 212 | ) engine=innodb; 213 | """ 214 | print(ddl) 215 | for x in range(200): 216 | print(f"insert into {name} values({x},TO_VECTOR('[{x},{x}]'));") 217 | 218 | def test_instant(): # add column 219 | name,ddl = _ft('instant') 220 | ddl += """( 221 | id int, 222 | name varchar(20) 223 | ) engine=innodb; 224 | """ 225 | print(ddl) 226 | for x in range(200): 227 | print(f"insert into {name} values({x},{getgen_en(0,20)});") 228 | print(f'ALTER TABLE {name} ADD COLUMN test varchar(20),ALGORITHM=INSTANT;') 229 | for x in range(200): 230 | print(f"insert into {name} values({x},{getgen_en(0,20)},{getgen_en(0,20)});") 231 | 232 | def test_row_version(): # add/drop column 233 | name,ddl = _ft('row_version') 234 | ddl += """( 235 | id int, 236 | name varchar(20) 237 | ) engine=innodb; 238 | """ 239 | print(ddl) 240 | for x in range(200): 241 | print(f"insert into {name} values({x},{getgen_en(0,20)});") 242 | print(f'ALTER TABLE {name} ADD COLUMN test varchar(20),ALGORITHM=INSTANT;') 243 | for x in range(200): 244 | print(f"insert into {name} values({x},{getgen_en(0,20)},{getgen_en(0,20)});") 245 | print(f'ALTER TABLE {name} DROP COLUMN name,ALGORITHM=INSTANT;') 246 | for x in range(200): 247 | print(f"insert into {name} values({x},{getgen_en(0,20)});") 248 | 249 | def test_partition(): # partition 250 | # range 251 | name,ddl = _ft('partition_range') 252 | ddl += """(id int,name varchar(200)) engine=innodb PARTITION BY RANGE (id) (PARTITION p0 VALUES LESS THAN (10), PARTITION p1 VALUES LESS THAN (100), PARTITION p2 VALUES LESS THAN (200),PARTITION p3 VALUES LESS THAN (10000));""" 253 | print(ddl) 254 | for x in range(200): 255 | print(f"insert into {name} values({x},{getgen_int(0,10000)});") 256 | 257 | # hash 258 | name,ddl = _ft('partition_hash') 259 | ddl += """(id int, name varchar(200), age_y datetime) engine=innodb PARTITION BY HASH (year(age_y)) partitions 4 ;""" 260 | print(ddl) 261 | for x in range(200): 262 | print(f"insert into {name} values({x},{getgen_en()},{getgen_datetime()});") 263 | 264 | # list 265 | name,ddl = _ft('partition_list') 266 | ddl += """(id int, aa varchar(200)) engine=innodb PARTITION BY list(id)(PARTITION p1 VALUES IN (0,1,2,3,4), PARTITION p2 VALUES IN (5,6,7,8) );""" 267 | print(ddl) 268 | for x in range(8): 269 | print(f"insert into {name} values({x},{getgen_en()});") 270 | 271 | # key 272 | name,ddl = _ft('partition_key') 273 | ddl += """(id int primary key, aa varchar(200)) engine=innodb PARTITION BY KEY() PARTITIONS 2;""" 274 | print(ddl) 275 | for x in range(200): 276 | print(f"insert into {name} values({x},{getgen_en()});") 277 | 278 | def test_subpartition(): # subpartition 279 | name,ddl = _ft('sub_partition_rangehash') 280 | ddl += """(id INT, purchased DATE) engine=innodb 281 | PARTITION BY RANGE( YEAR(purchased) ) 282 | SUBPARTITION BY HASH( TO_DAYS(purchased) ) 283 | SUBPARTITIONS 2 ( 284 | PARTITION p0 VALUES LESS THAN (1990), 285 | PARTITION p1 VALUES LESS THAN (2000), 286 | PARTITION p2 VALUES LESS THAN MAXVALUE 287 | );""" 288 | print(ddl) 289 | for x in range(200): 290 | print(f"insert into {name} values({x},{getgen_date()});") 291 | 292 | 293 | def test_char_maxlen1(): # char&latin1 294 | name,ddl = _ft('char_maxlen1') 295 | ddl += """(id int, name varchar(200)) engine=innodb default charset=latin1;""" 296 | print(ddl) 297 | for x in range(200): 298 | print(f"insert into {name} values({x},{getgen_en()});") 299 | 300 | def test_char_emoji(): # emoji 301 | name,ddl = _ft('char_emoji') 302 | ddl += """(id int, name varchar(200)) engine=innodb default charset=utf8mb4;""" 303 | print(ddl) 304 | for x in range(200): 305 | print(f"insert into {name} values({x},{getgen_emoji()});") 306 | 307 | def test_foreign_key(): # CONSTRAINT FOREIGN KEY 308 | pass 309 | 310 | def test_check(): # CONSTRAINT CHECK 311 | pass 312 | 313 | def test_gen(): # GENERATED ALWAYS VIRTUAL/STORED 314 | pass 315 | 316 | def test_ddl_pk(): # primary key 317 | pass 318 | 319 | def test_ddl_unique_key(): # unique key 320 | pass 321 | 322 | def test_ddl_key(): # key 323 | pass 324 | 325 | def test_ddl_spatial_key(): # spatial key 326 | pass 327 | 328 | def test_ddl_pre_key(): # pre-key 329 | pass 330 | 331 | def test_ddl_comp_key(): # key(id,name) 332 | pass 333 | 334 | def test_ddl_comp_pre_key(): # key(id,name(20)) 335 | pass 336 | 337 | def test_ddl_fulltext_key(): # fulltext 338 | pass 339 | 340 | def test_ddl_invisible(): # key(col) INVISIBLE 341 | pass 342 | 343 | def test_on_update(): # on update 344 | pass 345 | 346 | def test_hentai_ddl(): 347 | ddl = """ 348 | create table if not exists test_ibd2sql_ddl_00( 349 | id bigint unsigned not null primary key auto_increment, 350 | name varchar(200) 351 | ); 352 | 353 | create table if not exists test_ibd2sql_ddl_01( 354 | `id` serial primary key auto_increment, -- serial: bigint unsigned not null 355 | `id_default` int default 0, 356 | `id_unsigned_zerofill` int unsigned zerofill, 357 | `int_col` int DEFAULT NULL, 358 | `id_invisible` int /*!80023 INVISIBLE */, 359 | `tinyint_col` tinyint DEFAULT '1', 360 | `boolean_col` boolean, -- tinyint(1) 361 | `smallint_col` smallint DEFAULT NULL, 362 | `mediumint_col` mediumint DEFAULT NULL, 363 | `bigint_col` bigint DEFAULT NULL, 364 | `float_col` float DEFAULT NULL, 365 | `double_col` double DEFAULT NULL, 366 | `decimal_col` decimal(10,2) DEFAULT NULL, 367 | `date_col` date DEFAULT NULL, 368 | `datetime_col` datetime(6), 369 | `timestamp_col` timestamp DEFAULT CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP, 370 | `time_col` time(4) DEFAULT NULL, 371 | `year_col` year DEFAULT NULL, 372 | `char_col` char(100) CHARACTER SET utf8 COLLATE utf8_danish_ci DEFAULT NULL, 373 | `nchar_col` nchar(10), -- 同char(10) 374 | `varchar_col` varchar(100), 375 | `nvarchar_col` nvarchar(10), -- 同nvarchar(10) 376 | `binary_col` binary(10) DEFAULT NULL, 377 | `varbinary_col` varbinary(20) DEFAULT NULL, 378 | `bit_col` bit(4) DEFAULT NULL, 379 | `enum_col` enum('A','B','C'), 380 | `set_col` set('X','Y','Z'), 381 | `json_type_col` json DEFAULT NULL, 382 | `tinyblob_col` tinyblob, 383 | `mediumblob_col` mediumblob, 384 | `blob_col` blob, 385 | `longblob_col` longblob, 386 | `tinytext_col` tinytext, 387 | `mediumtext_col` mediumtext, 388 | `text_col` text, 389 | `longtext_col` longtext, 390 | `gen_stored` INT GENERATED ALWAYS AS (int_col + 1) STORED, 391 | `gen_virtual` INT GENERATED ALWAYS AS (id_default + 1) virtual, 392 | `spatial_geometry` geometry, 393 | `spatial_point` point not null /*!80003 SRID 4326 */, 394 | `spatial_linestring` linestring, 395 | `spatial_polygon` polygon, 396 | `spatial_geometrycollection` geometrycollection, 397 | `spatial_multipoint` multipoint, 398 | `spatial_multilinestring` multilinestring, 399 | `spatial_multipolygon` multipolygon, 400 | `concat_char` varchar(201) as (concat(char_col,' ',varchar_col)), 401 | unique key(int_col), 402 | key(bigint_col), 403 | key(concat_char), 404 | key(varchar_col desc), 405 | key(int_col,time_col), 406 | key(int_col) /*!80000 INVISIBLE */, 407 | fulltext(varchar_col,text_col), 408 | spatial index(spatial_point), 409 | check (int_col>0 and tinyint_col>0), 410 | foreign key(id) references test_ibd2sql_ddl_00(id) 411 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; 412 | """ 413 | print(ddl) 414 | 415 | def test_1000_column(): 416 | ddl = 'drop table if exists test_ibd2sql_ddl_1000;create table if not exists test_ibd2sql_ddl_1000( id int' 417 | for i in range(1000): 418 | ddl += f",c{i} varchar(1) null default 'a'" 419 | ddl += ");" 420 | print(ddl) 421 | 422 | if __name__ == '__main__': 423 | MYSQL_VERSION_ID = int(sys.argv[1]) if len(sys.argv) == 2 else 0 424 | test_int() 425 | test_char() 426 | test_set() 427 | test_time() 428 | test_char_maxlen1() 429 | test_1000_column() 430 | #test_text() 431 | #test_blob() 432 | #test_binary() 433 | #if MYSQL_VERSION_ID >= 50708: 434 | # test_json() 435 | #if MYSQL_VERSION_ID >= 50706: 436 | # test_spatial() 437 | if MYSQL_VERSION_ID >= 50719: 438 | test_partition() 439 | if MYSQL_VERSION_ID >= 90001: 440 | test_vector() 441 | if MYSQL_VERSION_ID >= 80013 and MYSQL_VERSION_ID <= 80028: 442 | test_instant() 443 | if MYSQL_VERSION_ID > 80028: 444 | test_row_version() 445 | if MYSQL_VERSION_ID >= 80028: 446 | test_hentai_ddl() 447 | test_char_emoji() 448 | test_subpartition() 449 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # writen by ddcw @https://github.com/ddcw/ibd2sql 3 | # ibd2sql: parse ibd file to sql 4 | 5 | from ibd2sql.ibd2sql import FORMAT_IBD_FILE 6 | from ibd2sql.ibd2sql import IBD2SQL_SINGLE 7 | from ibd2sql.ibd2sql import IBD2SQL_MULTI 8 | from ibd2sql.ibd2sql import FIND_LEAF_PAGE_FROM_ROOT 9 | from ibd2sql.innodb_page.lob import FIRST_BLOB 10 | from ibd2sql.utils.crc32c import REPACK_PAGE 11 | from ibd2sql.utils.crc32c import CHECK_PAGE 12 | from ibd2sql.web import RUN_IBD2SQL_WEB 13 | import datetime 14 | import argparse 15 | import time 16 | import json 17 | import glob 18 | import sys 19 | import os 20 | 21 | from ibd2sql.innodb_page.sdi import SDI 22 | from ibd2sql.innodb_page.page import PAGE_READER 23 | from ibd2sql.innodb_page.fsp import FSP 24 | from ibd2sql.innodb_page.fsp import GET_FSP_STATUS_FROM_FLAGS 25 | from ibd2sql.innodb_page.inode import INODE 26 | from ibd2sql.innodb_page.index import INDEX 27 | from ibd2sql.innodb_page.table import TABLE 28 | import struct 29 | 30 | def print_error_and_exit(msg,exit_code=1): 31 | msg += "\n" 32 | sys.stdout.write(msg) 33 | sys.exit(exit_code) 34 | 35 | def MODIFY_PAGE_INPLACE(filename,pageno,pagesize,offset,new_value): 36 | with open(filename,'r+b') as f: 37 | f.seek(pageno*pagesize,0) 38 | data = f.read(pagesize) 39 | newdata = data[:offset] + new_value + data[offset+len(new_value):] 40 | data = REPACK_PAGE(newdata) 41 | f.seek(pageno*pagesize,0) 42 | f.write(data) 43 | return True 44 | 45 | def _argparse(): 46 | parser = argparse.ArgumentParser(add_help=False,description="parse mysql ibd file. https://github.com/ddcw/ibd2sql") 47 | parser.add_argument( 48 | "--help", "-h", "-H", 49 | action="store_true", 50 | dest="HELP", 51 | default=False, 52 | help="show help" 53 | ) 54 | parser.add_argument( 55 | "--version", "-v", "-V", 56 | action="store_true", 57 | dest="VERSION", 58 | default=False, 59 | help="show version" 60 | ) 61 | parser.add_argument( 62 | "--ddl", 63 | nargs='?', 64 | choices=['history','disable-keys','keys-after'], 65 | const=True, 66 | dest="DDL", 67 | default=False, 68 | help="print ddl" 69 | ) 70 | parser.add_argument( 71 | "--sql", 72 | nargs='?', 73 | choices=['sql','data'], 74 | const=True, 75 | dest="SQL", 76 | default=False, 77 | help="print data(default sql)" 78 | ) 79 | parser.add_argument( 80 | "--delete", 81 | nargs='?', 82 | choices=['only','with'], 83 | const=True, # only 84 | dest="DELETED", 85 | default=False, 86 | help="deleted flag(default only)" 87 | ) 88 | parser.add_argument( 89 | "--complete-insert", 90 | action="store_true", 91 | dest="COMPLETE_INSERT", 92 | default=False, 93 | help="sql with column name" 94 | ) 95 | parser.add_argument( 96 | "--multi-value", 97 | action="store_true", 98 | dest="MULTI_VALUE", 99 | default=False, 100 | help="single sql if data belong to the page" 101 | ) 102 | parser.add_argument( 103 | "--force","-f", 104 | action="store_true", 105 | dest="FORCE", 106 | default=False, 107 | help="force pasrse all cluster index" 108 | ) 109 | parser.add_argument( 110 | "--replace", 111 | action="store_true", 112 | dest="REPLACE", # replace会覆盖掉multi 113 | default=False, 114 | help='"REPLACE INTO" replace to "INSERT INTO"' 115 | ) 116 | parser.add_argument( 117 | "--table", 118 | dest="TABLE_NAME", 119 | help="replace table name" 120 | ) 121 | parser.add_argument( 122 | "--schema", 123 | dest="SCHEMA_NAME", 124 | help="replace schema name" 125 | ) 126 | parser.add_argument( 127 | '--sdi-table', # for compatibility 128 | '--sdi', 129 | '--sdi-file', 130 | dest="SDI_FILE", 131 | help='read SDI from this file(ibd/sdi/frm)' 132 | ) 133 | parser.add_argument( 134 | "--limit", 135 | dest="LIMIT", 136 | type=int, 137 | default=17592186044416, 138 | help="limit rows" 139 | ) 140 | parser.add_argument( 141 | "--keyring-file", 142 | action="store", 143 | dest="KEYRING_FILE", 144 | help="keyring filename" 145 | ) 146 | parser.add_argument( 147 | "--output",#"-o","-O", 148 | nargs='?', 149 | const=True, 150 | dest="OUTPUT_FILEDIR", 151 | help="output dir(auto create), stdout if Not" # {schema}.{table}{_partition_}{pid}_{rotateno}{_gen}.sql 152 | ) 153 | parser.add_argument( 154 | "--output-filesize", 155 | action="store", 156 | type=int, 157 | dest="OUTPUT_FILESIZE", 158 | default=17592186044416, 159 | help="rotate output filename if size(bytes) greate to this" 160 | ) 161 | parser.add_argument( 162 | "--print-sdi", 163 | action="store_true", 164 | dest="PRINT_SDI", 165 | help="only print sdi info(json)" 166 | ) 167 | parser.add_argument( 168 | "--count", 169 | action="store_true", 170 | dest="SUPER_FAST_COUNT", 171 | help="print total rows of cluster index(super_fast)" 172 | ) 173 | # parser.add_argument( 174 | # "--checksum", 175 | # action="store_true", 176 | # dest="CHECKSUM", 177 | # help="like: CHECKSUM TABLE tablename" 178 | # ) 179 | parser.add_argument( 180 | "--web", 181 | action="store_true", 182 | dest="WEB", 183 | help="web console to browse data in ibd file" 184 | ) 185 | parser.add_argument( 186 | "--lctn", 187 | nargs='?', 188 | const=True, 189 | type=int, 190 | dest="LCTN", 191 | choices=[0,1,2], 192 | help="show/set lower_case_table_name in mysql.ibd " 193 | ) 194 | parser.add_argument( 195 | "--parallel", 196 | type=int, 197 | dest="PARALLEL", 198 | default=1, 199 | help="run multi-process to parse ibd file" 200 | ) 201 | 202 | parser.add_argument( 203 | "--log", 204 | nargs='?', 205 | const=True, 206 | dest="LOG_FILE", 207 | help="log file" 208 | ) 209 | 210 | 211 | # fields-terminated-by/fields-enclosed-by/lines-terminated-by for --sql=data 212 | # table/schema: filter table/schema 213 | # disable-extra-pages: disable extra pages 214 | # leafno/rootno: pk leaf/root pageno 215 | # trim_trailing_space: trim trailing space for CHAR 216 | # hex: show fields data in hex 217 | # foreign-keys-after: alter table add foreign-keys after insert 218 | # disable-foreign-keys: ddl without foreign-keys 219 | # host: listen host for WEB, default '0.0.0.0' 220 | # port: listen port for WEB, default '8080' 221 | # bad-pages: skip/try/fast 222 | # check-table-old: check-table-old 223 | parser.add_argument( 224 | "--set", 225 | dest="SET_OPTIONS", 226 | action='append', 227 | help="set some options:fields-terminated-by,fields-enclosed-by,lines-terminated-by,schema(filter),table,disable-extra-pages,leafno,rootno,trim_trailing_space(only for char),hex,foreign-keys-after,disable-foreign-keys,host,port,bad-pages,check-table-old\n example:--set='rootno=4;hex'" 228 | ) 229 | 230 | #parser.add_argument(dest='FILENAME', help='ibd filename or dirname with ibd file', nargs='?') 231 | parser.add_argument(dest='FILENAME', help='ibd filename or dirname with ibd file', nargs='*') 232 | 233 | if parser.parse_args().VERSION: 234 | print('ibd2sql v2.1-20251024') 235 | sys.exit(0) 236 | 237 | if parser.parse_args().HELP or parser.parse_args().FILENAME == []: 238 | parser.print_help() 239 | print('\nNew issue if have questions : https://github.com/ddcw/ibd2sql/issues\n') 240 | sys.exit(0) 241 | 242 | parser = parser.parse_args() 243 | 244 | if parser.MULTI_VALUE and parser.REPLACE: 245 | print_error_and_exit('conflict between --replace and --multi-value') 246 | 247 | return parser 248 | 249 | 250 | class LOG(object): 251 | def __init__(self,filename=None): 252 | self.filename = filename 253 | if self.filename is not None: 254 | if self.filename is True: 255 | self.f = sys.stderr 256 | else: 257 | self.f = open(self.filename,'a') 258 | else: 259 | self._write = self._write_nothing 260 | 261 | def _write_nothing(self,msg): 262 | pass 263 | 264 | def _write(self,msg): 265 | self.f.write(msg) 266 | 267 | def info(self,*args): 268 | msg = f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] [INFO] {' '.join([ str(x) for x in args ])}\n" 269 | return self._write(msg) 270 | 271 | def error(self,*args): 272 | msg = f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] [ERROR] {' '.join([ str(x) for x in args ])}\n" 273 | return self._write(msg) 274 | 275 | def warning(self,*args): 276 | msg = f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] [WARNING] {' '.join([ str(x) for x in args ])}\n" 277 | return self._write(msg) 278 | 279 | def __close__(self): 280 | if self.filename is not None: 281 | self.f.close() 282 | 283 | if __name__ == '__main__': 284 | parser = _argparse() 285 | opt = {} 286 | # --set="a=1,b=2" --set 'c=3;d=4' --set 'a=5' 287 | if parser.SET_OPTIONS is not None: 288 | for x in parser.SET_OPTIONS: 289 | for y in x.split(';'): 290 | for z in y.split(','): 291 | kv = z.split('=') 292 | if len(kv) == 2: 293 | opt[kv[0]] = kv[1] 294 | elif len(kv) == 1 and z != '': 295 | opt[kv[0]] = True 296 | disable_foreign_key = True if 'disable-foreign-keys' in opt or 'foreign-keys-after' in opt else False 297 | foreign_keys_after = True if 'foreign-keys-after' in opt else False 298 | # init log 299 | log = LOG(parser.LOG_FILE) 300 | log.info('SET:',opt) 301 | log.info('INIT FILENAME') 302 | 303 | # for fragmented page 304 | FRAGMENT_PAGE = False 305 | FRAGMENT_FILENAME = '' 306 | FRAGMENT_FILENAME_PRE = '' 307 | if os.path.isdir(parser.FILENAME[0]): 308 | if os.path.join(parser.FILENAME[0],'FIL_PAGE_INDEX') and os.path.join(parser.FILENAME[0],'FIL_PAGE_TYPE_BLOB'): 309 | log.info('for fragmented page') 310 | if 'indexid' not in opt: 311 | print_error_and_exit('--set indexid=xx is must when fragementd page') 312 | filename = os.path.join(parser.FILENAME[0],'FIL_PAGE_INDEX',opt['indexid'].zfill(16)) + ".page" 313 | if not os.path.exists(filename): 314 | print_error_and_exit(f"indexid={opt['indexid']} file {filename} not exists") 315 | if parser.SDI_FILE is None: 316 | print_error_and_exit(f"--sdi xx is must when fragementd page") 317 | if not os.path.exists(parser.SDI_FILE): 318 | print_error_and_exit(f"sdifile {parser.SDI_FILE} is not exists") 319 | FRAGMENT_FILENAME = filename 320 | FRAGMENT_PAGE = True 321 | opt['leafno'] = 0 322 | opt['rootno'] = 0 323 | parser.FORCE = True 324 | FRAGMENT_FILENAME_PRE = os.path.join(parser.FILENAME[0],'FIL_PAGE_TYPE_BLOB') 325 | # init filename 326 | filename_list = [] if not FRAGMENT_PAGE else [FRAGMENT_FILENAME] 327 | for x in parser.FILENAME: 328 | for filename in glob.glob(x): 329 | if os.path.isfile(filename): 330 | filename_list.append(filename) 331 | elif os.path.isdir(filename): 332 | for n in os.listdir(filename): 333 | nfilename = os.path.join(filename,n) 334 | if os.path.isfile(nfilename): 335 | filename_list.append(nfilename) 336 | else: 337 | log.warning('file',filename,'not exists. [skip it]') 338 | if len(filename_list) == 0: 339 | print(*parser.FILENAME,'not exists') 340 | sys.exit(1) 341 | if len(filename_list) > 1 and parser.SDI_FILE is not None: 342 | print_error_and_exit(f'there are multiple files({len(filename_list)}), but with --sdi-file {parser.SDI_FILE}',2) 343 | 344 | 345 | # init data file/sdi(json)/keyringfile(if) 346 | _file_list = FORMAT_IBD_FILE(filename_list,parser.SDI_FILE,parser.KEYRING_FILE,log) 347 | # filter table/schema 348 | file_list = [] 349 | for file_base in _file_list: 350 | if 'table' in opt and opt['table'] != file_base['sdi']['dd_object']['name']: 351 | log.info(f"table name: {opt['table']} != {file_base['sdi']['dd_object']['name']}, skip it") 352 | continue 353 | if 'schema' in opt and opt['schema'] != file_base['sdi']['dd_object']['tablespace_ref']: 354 | log.info(f"table name: {opt['schema']} != {file_base['sdi']['dd_object']['tablespace_ref'].name}, skip it") 355 | continue 356 | file_list.append(file_base) 357 | if len(file_list) == 0: 358 | print_error_and_exit('no tables matched',4) 359 | if len(file_list) > 1 and parser.TABLE_NAME is not None: 360 | print_error_and_exit(f'there are multiple tables({len(file_list)}), but with --table',3) 361 | 362 | # web 363 | if parser.WEB: 364 | RUN_IBD2SQL_WEB(file_list,opt,log) 365 | elif parser.PRINT_SDI: 366 | sdi = [] 367 | for x in file_list: 368 | sdi.append(x['sdi']) 369 | print(json.dumps(sdi)) 370 | elif parser.LCTN or parser.LCTN == 0: # show/set lower_case_table_name 371 | log.info('modify lower_case_table_name') 372 | if len(filename_list) == 1 and len(file_list) > 60: # shared tablespace 373 | log.info('get table(dd_properties) info...',) 374 | table = None 375 | file_base = None 376 | rootno = 0 377 | for i in range(len(file_list)): 378 | file_base = file_list[i] 379 | if file_base['sdi']['dd_object']['name'] == 'dd_properties': 380 | table = TABLE(file_base['sdi']) 381 | rootno = int(file_base['sdi']['dd_object']['indexes'][0]['root']) 382 | if table is None: 383 | print_error_and_exit(' no dd_properties') 384 | pg = PAGE_READER(page_size=file_base['pagesize'],filename=file_base['filename'],encryption=file_base['encryption'],key=file_base['key'],iv=file_base['iv']) 385 | inode = INODE(pg) 386 | #rootno = inode.seg[0][0]['FSEG_FRAG_ARR'][0] if file_base['fsp_flags']['SDI'] == 0 else inode.seg[1][0]['FSEG_FRAG_ARR'][0] 387 | leafno = FIND_LEAF_PAGE_FROM_ROOT(pg,rootno,table) 388 | pageid = leafno 389 | log.info('leaf no',leafno) 390 | data = pg.read(pageid) 391 | offset = 99 392 | offset += struct.unpack('>h',data[97:99])[0] 393 | log.info(f'first row: pageid:{pageid} offset:{offset}') 394 | LCTN_PAGENO = 0 395 | LCTN_OFFSET = 0 396 | current_lctn = -1 397 | if data[offset-2-5-1:offset-5-1] == b'\x14\xc0': # 398 | offset += 6 + 13 399 | SPACE_ID,PAGENO,BLOB_HEADER,REAL_SIZE = struct.unpack('>3LQ',data[offset:offset+20]) 400 | log.info(f'SPACE_ID:{SPACE_ID} PAGENO:{PAGENO} BLOB_HEADER:{BLOB_HEADER} REAL_SIZE:{REAL_SIZE}') 401 | data = pg.read(PAGENO) 402 | entry = data[96:96+60] 403 | while True: 404 | if len(entry) < 12: 405 | break 406 | LCTN_PAGENO,datalen,lobversion = struct.unpack('>3L',entry[-12:]) 407 | datalen = datalen>>16 408 | if LCTN_PAGENO == 0: 409 | break 410 | elif LCTN_PAGENO == PAGENO: 411 | rdata = data 412 | else: 413 | rdata = pg.read(LCTN_PAGENO) 414 | LCTN_OFFSET = rdata.find(b';LCTN=') 415 | if LCTN_OFFSET > 0: 416 | LCTN_OFFSET += 6 417 | current_lctn = rdata[LCTN_OFFSET:][:1].decode() 418 | break 419 | next_entry_pageno,next_entry_offset = struct.unpack('>LH',entry[6:12]) 420 | if next_entry_pageno >0 and next_entry_pageno < 4294967295: 421 | entry = pg.read(next_entry_pageno)[next_entry_offset:next_entry_offset+60] 422 | else: 423 | break 424 | 425 | print('current lower_case_table_name =',current_lctn) 426 | print('pageno:',LCTN_PAGENO,'offset:',LCTN_OFFSET) 427 | if type(parser.LCTN) is int and parser.LCTN != int(current_lctn) : 428 | log.info('will set lower_case_table_name =',parser.LCTN) 429 | # danger! start after countdown ends 430 | if MODIFY_PAGE_INPLACE(filename,LCTN_PAGENO,file_base['pagesize'],LCTN_OFFSET,str(parser.LCTN).encode()): 431 | print('set lower_case_table_name =',parser.LCTN,'success') 432 | else: 433 | print('set lower_case_table_name =',parser.LCTN,'faild') 434 | else: 435 | print_error_and_exit('cant not support lctn') 436 | else: 437 | print(f'there have {len(filename_list)} files, but have {len(file_list)} tables. not mysql.ibd') 438 | elif parser.SUPER_FAST_COUNT: # super_fast_count 439 | for file_base in file_list: 440 | time_1 = time.time() 441 | table = TABLE(file_base['sdi']) 442 | count_1 = 0 443 | count_2 = 0 # with deleted 444 | pg = PAGE_READER(page_size=file_base['pagesize'],filename=file_base['filename'],encryption=file_base['encryption'],key=file_base['key'],iv=file_base['iv']) 445 | inode = INODE(pg) 446 | if 'rootno' in opt: 447 | rootno = opt['rootno'] 448 | elif file_base['fsp_flags']['SHARED']: 449 | rootno = int(file_base['sdi']['dd_object']['indexes'][0]['root']) 450 | else: 451 | rootno = inode.seg[0][0]['FSEG_FRAG_ARR'][0] if file_base['fsp_flags']['SDI'] == 0 else inode.seg[1][0]['FSEG_FRAG_ARR'][0] 452 | leafno = FIND_LEAF_PAGE_FROM_ROOT(pg,rootno,table) 453 | pageid = leafno 454 | log.info(file_base['filename'],file_base['sdi']['dd_object']['name'],f'rootno:{rootno} leafno:{leafno}') 455 | while pageid < 4294967295: 456 | data = pg.read(pageid) 457 | if data == b'': 458 | break 459 | pageid = struct.unpack('>L',data[12:16])[0] 460 | count_1 += struct.unpack('>H',data[54:56])[0] 461 | count_2 += (struct.unpack('>H',data[42:44])[0] & 32767) - 2 462 | time_2 = time.time() 463 | print(f"{file_base['filename']}\t{file_base['sdi']['dd_object']['name']}\tROWS:{count_1}\tROWS(with deleted):{count_2}\tTIME:{round((time_2-time_1),2)} seconds\tFILE SIZE:{round(os.path.getsize(file_base['filename'])/1024/1024,2)} MB") 464 | else: # ddl/sql 465 | filename_pre = '' 466 | if parser.OUTPUT_FILEDIR: 467 | if parser.OUTPUT_FILEDIR is True: 468 | filename_pre = f"ibd2sql_auto_dir_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}" 469 | else: 470 | filename_pre = parser.OUTPUT_FILEDIR 471 | print('output dir:',filename_pre) 472 | if filename_pre != '': 473 | os.makedirs(filename_pre,exist_ok=True) 474 | log.info('output dir:',filename_pre) 475 | if parser.DELETED: 476 | parser.SQL = True 477 | if not parser.SQL and not parser.DDL: 478 | parser.DDL = True 479 | for x in file_list: 480 | table = TABLE(x['sdi']) 481 | if parser.SCHEMA_NAME is not None: 482 | table.schema = parser.SCHEMA_NAME 483 | if parser.TABLE_NAME is not None: 484 | table.name = parser.TABLE_NAME 485 | if filename_pre != '': 486 | ddl_filename = os.path.join(filename_pre,f'{table.schema}.{table.name}{x["partition_name"] if x["partition_name"] is not None else ""}_{os.getpid()}')+'_ddl.sql' 487 | f = open(ddl_filename,'a') 488 | print('DDL filename:',ddl_filename) 489 | else: 490 | f = sys.stdout 491 | if parser.DDL: 492 | ddl = '' 493 | if parser.DDL == 'history': 494 | ddl = table.get_ddl_history(True,False,disable_foreign_key) 495 | elif parser.DDL in ['disable-keys','keys-after']: 496 | ddl = table.get_ddl(False,True,disable_foreign_key) 497 | else: 498 | ddl = table.get_ddl(False,False,disable_foreign_key) 499 | f.write(ddl+"\n") 500 | 501 | # sql/data 502 | if parser.SQL: 503 | IBD2SQL_SINGLE(table,x,opt,filename_pre,log,parser,FRAGMENT_FILENAME_PRE) 504 | 505 | if parser.DDL == 'keys-after': 506 | f.write(table.get_ddl_key()+"\n") 507 | if foreign_keys_after: 508 | f.write(table.get_ddl_reference()+"\n") 509 | if filename_pre != '': 510 | f.close() 511 | -------------------------------------------------------------------------------- /ibd2sql/web.py: -------------------------------------------------------------------------------- 1 | from http.server import HTTPServer, BaseHTTPRequestHandler 2 | from ibd2sql.ibd2sql import FIND_LEAF_PAGE_FROM_ROOT 3 | from ibd2sql.innodb_page.page import PAGE_READER 4 | from ibd2sql.innodb_page.inode import INODE 5 | from ibd2sql.innodb_page.index import INDEX 6 | from ibd2sql.innodb_page.table import TABLE 7 | import urllib.parse 8 | import datetime 9 | import signal 10 | import struct 11 | import json 12 | import sys 13 | import os 14 | 15 | 16 | """ 17 | ------------------ ------------------------------------------------- 18 | | 389 | """ + "\n".join([ f"" for x in range(len(self.IDX)) ]) + """ 390 | 391 |



392 | 393 |
394 |
395 | 396 |
397 | 398 | 399 | 400 |
401 |
402 |
403 | 404 | 405 | 406 |

407 |
408 |
409 | 410 | 411 | 412 | 413 | 414 |
415 |
416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 |
427 | 434 |
435 | 443 | 444 | 445 | 446 | """ 447 | self.wfile.write(html_content.encode('utf-8')) 448 | 449 | 450 | class IBD2SQL_WEB(INDEX): 451 | def read_page_id_next(self,): 452 | return struct.unpack('>L',self.data[12:16])[0] 453 | def read_page_id_pre(self,): 454 | return struct.unpack('>L',self.data[8:12])[0] 455 | def read_page(self,pageid): 456 | data = self.pg.read(pageid) 457 | if data != b'': 458 | self.init_data(data) 459 | return True 460 | else: 461 | return False 462 | def read_page_next(self,): 463 | return self.read_page(self.read_page_id_next()) 464 | def read_page_pre(self): 465 | return self.read_page(self.read_page_id_pre()) 466 | 467 | 468 | def RUN_IBD2SQL_WEB(file_list,opt,log,server_class=HTTPServer, handler_class=MY_HANDLER): 469 | BIND_HOST = '0.0.0.0' if 'host' not in opt else opt['host'] 470 | BIND_PORT = 8080 if 'port' not in opt else opt['port'] 471 | log.info(f'listen {BIND_HOST}:{BIND_PORT}') 472 | IDX = [] 473 | for file_base in file_list: 474 | table = TABLE(file_base['sdi']) 475 | pg = PAGE_READER(page_size=file_base['pagesize'],filename=file_base['filename'],encryption=file_base['encryption'],key=file_base['key'],iv=file_base['iv']) 476 | log.info('init',file_base['filename']) 477 | inode = INODE(pg) 478 | inode = inode.seg[1:] if file_base['fsp_flags']['SDI'] == 1 else inode.seg[0:] 479 | idx = [] 480 | for x in table.index: 481 | idxname = table.index[x]['name'] 482 | log.info('init',file_base['filename'],'idxname:',idxname) 483 | is_primary = True if idxname == 'PRIMARY' else False 484 | rootno = inode[x][0]['FSEG_FRAG_ARR'][0] if table.mysql_version_id <= 50744 else int(table.index[x]['root']) 485 | try: 486 | leafno = FIND_LEAF_PAGE_FROM_ROOT(pg,rootno,table,'PK_NON_LEAF' if is_primary else 'KEY_NON_LEAF',x) 487 | except Exception as e: 488 | log.info(e,'skit it') 489 | continue 490 | rootdata = pg.read(rootno) 491 | leafdata = pg.read(leafno) 492 | idx_root = IBD2SQL_WEB() 493 | idx_root.init_index(table=table,idxid=x,pg=pg,page_type='PK_NON_LEAF' if is_primary else 'KEY_NON_LEAF' ) 494 | idx_root.init_data(rootdata) 495 | idx_leaf = IBD2SQL_WEB() 496 | idx_leaf.init_index(table=table,idxid=x,pg=pg,page_type='PK_LEAF' if is_primary else 'KEY_LEAF' ) 497 | idx_leaf.init_data(leafdata) 498 | #if rootno == leafno: 499 | # idx_root = idx_leaf 500 | #key = [ table.column[y['column_opx']]['name'] for y in table.index[x]['elements'] ] 501 | key = [] 502 | for y in table.index[x]['elements']: 503 | kname = table.column[y['column_opx']]['name'] 504 | if kname in ['DB_TRX_ID','DB_ROLL_PTR']: 505 | break 506 | key.append(kname) 507 | idx.append({ 508 | 'rootno':rootno, 509 | 'leafno':leafno, 510 | 'root':idx_root, 511 | 'leaf':idx_leaf, 512 | 'primary':is_primary, 513 | 'name':idxname, 514 | 'idxno':x, 515 | 'level':struct.unpack('>H',rootdata[64:66])[0], 516 | 'key':key 517 | }) 518 | filename = file_base['filename'] 519 | IDX.append({ 520 | 'filename':file_base['filename'], 521 | 'idx':idx, 522 | 'schema':table.schema, 523 | 'name':table.name, 524 | 'ddl':table.get_ddl(False,False,False) 525 | }) 526 | 527 | # for i in range(10): 528 | # if IDX[0]['idx'][0]['leaf'].read_page_next(): 529 | # print(IDX[0]['idx'][0]['leaf'].get_all_rows()) 530 | # else: 531 | # break 532 | 533 | handler_class.IDX = IDX 534 | 535 | server_address = (BIND_HOST,BIND_PORT) 536 | httpd = server_class(server_address, handler_class) 537 | 538 | msg = f''' 539 | ############################### 540 | # ibd2sql web console # 541 | ############################### 542 | 543 | http://{BIND_HOST}:{BIND_PORT} 544 | 545 | ''' 546 | print(msg) 547 | httpd.serve_forever() 548 | 549 | -------------------------------------------------------------------------------- /ibd2sql/ibd2sql.py: -------------------------------------------------------------------------------- 1 | import struct 2 | import json 3 | import glob 4 | import sys 5 | import os 6 | from ibd2sql.innodb_page.sdi import SDI 7 | from ibd2sql.innodb_page.page import PAGE 8 | from ibd2sql.innodb_page.page import PAGE_READER 9 | from ibd2sql.innodb_page.fsp import GET_FSP_STATUS_FROM_FLAGS 10 | from ibd2sql.innodb_page.fsp import FSP 11 | from ibd2sql.innodb_page.fsp import PARSE_ENCRYPTION_INFO 12 | from ibd2sql.utils.keyring_file import READ_KEYRING 13 | from ibd2sql.frm.frm2sdi import MYSQLFRM 14 | 15 | from ibd2sql.innodb_page.inode import INODE 16 | from ibd2sql.innodb_page.index import INDEX 17 | from ibd2sql.innodb_page.table import TABLE 18 | 19 | import ctypes 20 | from multiprocessing import Process 21 | from multiprocessing import Value 22 | from multiprocessing import Lock 23 | 24 | from ibd2sql.utils.crc32c import CHECK_PAGE 25 | from ibd2sql.utils.check_table_old import CHECK_PAGE_OLD 26 | 27 | class PAGE_READER_FRAGMENT(object): 28 | def __init__(self,filename_pre): 29 | self.filename_pre = filename_pre 30 | def read(self,n): 31 | filename = os.path.join(self.filename_pre,str(n).zfill(16)) + ".page" 32 | data = b'' 33 | if os.path.exists(filename): 34 | with open(filename,'rb') as f: 35 | data = f.read(16384) 36 | return data 37 | 38 | def GET_LEAF_PAGE_NO_FROM_SDI(pg,pageid): 39 | while True: 40 | data = pg.read(pageid) 41 | if data[64:66] == b'\x00\x00': 42 | break 43 | else: 44 | offset = 99 45 | offset += struct.unpack('>h',data[offset-2:offset])[0] 46 | dtype,table_id,pageid = struct.unpack('>LQL',data[offset:offset+16]) 47 | return pageid 48 | 49 | class IBDBASE(object): 50 | def __init__(self,filename,log,kd): 51 | self.status = False 52 | f = open(filename,'rb') 53 | data = f.read(1024) 54 | if len(data) != 1024: 55 | log.error(filename,'is too small ...') 56 | return None 57 | if data[24:26] != b'\x00\x08': 58 | log.info(f'{filename} version may be is too low') # 5.0 ? 59 | self.status = False 60 | FSP_SPACE_ID,FSP_NOT_USED,FSP_SIZE,FSP_FREE_LIMIT,FSP_SPACE_FLAGS,FSP_FRAG_N_USED = struct.unpack('>6L',data[38:62]) 61 | self.fsp_flags = GET_FSP_STATUS_FROM_FLAGS(FSP_SPACE_FLAGS) 62 | 63 | self.logical_size = self.fsp_flags['logical_size'] 64 | self.physical_size = self.fsp_flags['physical_size'] 65 | self.page_size = self.physical_size 66 | self.compressed = self.fsp_flags['compressed'] 67 | self.compression_ratio = self.logical_size//self.physical_size 68 | 69 | self.ENCRYPTION = True if self.fsp_flags['ENCRYPTION'] == 1 else False 70 | self.SDI = True if self.fsp_flags['SDI'] == 1 else False # True: >=8.0 False: <=5.7 71 | self.SHARED = True if self.fsp_flags['SHARED'] == 1 else False # shared tablespace 72 | self.POST_ANTELOPE = self.fsp_flags['POST_ANTELOPE'] # innodb file format 0:compact/redundant 1:dynamic/compressed 73 | 74 | log.info(filename,'logical_size',self.logical_size) 75 | log.info(filename,'physical_size',self.physical_size) 76 | log.info(filename,'compressed',self.compressed) 77 | log.info(filename,'ENCRYPTION',self.ENCRYPTION) 78 | log.info(filename,'SDI',self.SDI) 79 | log.info(filename,'SHARED',self.SHARED) 80 | log.info(filename,'POST_ANTELOPE',self.POST_ANTELOPE) 81 | 82 | if os.path.getsize(filename)%self.physical_size != 0: # not faild 83 | log.warning(filename,'maybe have been damaged') 84 | 85 | f.seek(0,0) 86 | self.fsp = FSP(f.read(self.physical_size),self.logical_size,self.compression_ratio) 87 | f.close() 88 | 89 | self.key = None 90 | self.iv = None 91 | if self.ENCRYPTION: 92 | try: 93 | t = PARSE_ENCRYPTION_INFO(self.fsp.encryption_info,kd) 94 | self.key = t['key'] 95 | self.iv = t['iv'] 96 | log.info('key',self.key.hex()) 97 | log.info('iv',self.iv.hex()) 98 | except Exception as e: 99 | log.error(filename,'master_key not in',kd,'exception:',e) 100 | return None 101 | 102 | self.pg = PAGE_READER(page_size=self.physical_size,filename=filename,encryption=self.ENCRYPTION,key=self.key,iv=self.iv) 103 | self.sdi = None 104 | if self.SDI: 105 | #sdi = SDI(self.fsp.SDI_PAGE_NO,self.pg,'COMPRESSED' if self.compressed == 1 else '1') 106 | sdi = SDI(GET_LEAF_PAGE_NO_FROM_SDI(self.pg,self.fsp.SDI_PAGE_NO),self.pg,'COMPRESSED' if self.compressed == 1 else '1') 107 | try: 108 | self.sdi = sdi.get_sdi() 109 | except Exception as e: 110 | log.error(filename,'get sdi faild',e) 111 | return None 112 | 113 | self.status = True 114 | 115 | def test(self,): 116 | pass 117 | 118 | 119 | def GET_PARTITION_TABLE_SDIDATA(filename_t,log,kd): 120 | filename_re = filename_t.split('#')[0] + "#" + "*.ibd" 121 | for filename in glob.glob(filename_re): 122 | if filename == filename_t: 123 | continue 124 | if os.path.isfile(filename): 125 | ibdbase = IBDBASE(filename,log,kd) 126 | if ibdbase.status: 127 | if ibdbase.sdi is not None and len(ibdbase.sdi) == 2: 128 | return ibdbase.sdi[0] 129 | return None 130 | 131 | def FORMAT_IBD_FILE(filename_list,sdi_file,keyring_file,log): 132 | """ 133 | INPUT: 134 | filename_list: ibd/frm/sdi file list 135 | sdi_file: sdi file(ibd/frm/sdi) 136 | keyring_file: keyring filename 137 | log: log 138 | 139 | RETURN: 140 | [ 141 | { 142 | 'filename':xx, 143 | 'sdi':sdi info (dict), 144 | 'key':key, 145 | 'iv':'iv', 146 | 'pagesize': page size(phy), 147 | 'partition_name':'' 148 | }, 149 | ] 150 | """ 151 | 152 | kd = {} 153 | if keyring_file is not None: 154 | with open(keyring_file,'rb') as f: 155 | kd = READ_KEYRING(f.read()) 156 | 157 | global_sdi_info = None 158 | if sdi_file is not None: 159 | if sdi_file.endswith('.frm'): 160 | log.info(sdi_file,'maybe frm') 161 | global_sdi_info = sdidata = json.loads(MYSQLFRM(sdi_file)._get_sdi_json()) 162 | log.info(sdi_file,'global_sdi_info is frm file') 163 | elif sdi_file.endswith('.sdi'): 164 | log.info(sdi_file,'maybe sdi') 165 | with open(sdi_file,'r') as f: 166 | global_sdi_info = json.load(f) 167 | if len(global_sdi_info) == 3: 168 | global_sdi_info = global_sdi_info[1] 169 | log.info(sdi_file,'global_sdi_info is sdi file') 170 | elif sdi_file.endswith('.ibd'): 171 | log.info(sdi_file,'maybe ibd') 172 | ibdbase = IBDBASE(sdi_file,log,kd) 173 | global_sdi_info = ibdbase.sdi[0] 174 | 175 | file_list = [] 176 | for filename in filename_list: 177 | if not (filename.endswith('.ibd') or filename.endswith('.page')): 178 | log.error(filename,'not endswith .ibd, skip it') 179 | continue 180 | ibdbase = IBDBASE(filename,log,kd) 181 | if (ibdbase is None or not ibdbase.status) and len(filename_list) > 1: 182 | log.error('skip file:',filename) 183 | continue 184 | 185 | partition_name = None 186 | sdidata = None 187 | if not ibdbase.status or not ibdbase.SDI: #and ibdbase.fsp.FIL_PAGE_PREV in [0,4294967295] and ibdbase.fsp.FIL_PAGE_NEXT in [0,4294967295]: # 5.x 188 | log.info(filename,'is mysql 5, will get sdi....',) 189 | partition_offset = filename.find('#') 190 | frm_filename = '' 191 | if partition_offset > 0: 192 | frm_filename = filename[:partition_offset]+'.frm' 193 | partition_name = filename[partition_offset:-4] 194 | log.info(filename,'is partition table') 195 | else: 196 | frm_filename = filename[:-4]+'.frm' 197 | if os.path.exists(frm_filename): 198 | log.info(filename,'will use frm file:',frm_filename) 199 | sdidata = json.loads(MYSQLFRM(frm_filename)._get_sdi_json()) 200 | log.info(filename,'ADD TABLE',sdidata['dd_object']['schema_ref'],sdidata['dd_object']['name']) 201 | else: 202 | log.warning('frm file',frm_filename,'not exists') 203 | if global_sdi_info is not None: 204 | log.warning(filename,'use global_sdi_info',sdi_file) 205 | sdidata = global_sdi_info 206 | else: 207 | log.error(filename,'have not sdi info, skip it') 208 | continue 209 | elif ibdbase.fsp.FIL_PAGE_PREV > 80000 and ibdbase.fsp.FIL_PAGE_NEXT == 1: # 8.x 210 | log.info(filename,'mysql version:',ibdbase.fsp.FIL_PAGE_PREV) 211 | if ibdbase.SHARED: # such as : mysql.ibd 212 | log.info(filename,'is shared') 213 | for x in ibdbase.sdi: 214 | if 'dd_object' not in x or 'schema_ref' not in x['dd_object']: 215 | continue 216 | log.info(filename,'ADD TABLE',x['dd_object']['schema_ref'],x['dd_object']['name']) 217 | file_list.append({ 218 | 'filename':filename, 219 | 'sdi':x, 220 | 'encryption':ibdbase.ENCRYPTION, 221 | 'key':ibdbase.key, 222 | 'iv':ibdbase.iv, 223 | 'pagesize':ibdbase.physical_size, 224 | 'partition_name':partition_name, 225 | 'fsp_flags':ibdbase.fsp_flags, 226 | }) 227 | continue 228 | else: 229 | sdi_count = len(ibdbase.sdi) 230 | if sdi_count == 1: # partition 231 | log.info(filename,'is partition table',) 232 | sdidata = GET_PARTITION_TABLE_SDIDATA(filename,log,kd) 233 | if sdidata is None: 234 | if global_sdi_info is None: 235 | log.error(filename,'can not find sdi info, skip it') 236 | continue 237 | else: 238 | sdidata = global_sdi_info 239 | log.info(filename,'use global sdi',sdi_file) 240 | else: 241 | log.info(filename,'ADD TABLE',sdidata['dd_object']['schema_ref'],sdidata['dd_object']['name']) 242 | elif sdi_count == 2: 243 | if 'schema_ref' in ibdbase.sdi[0]['dd_object']: 244 | sdidata = ibdbase.sdi[0] 245 | else: 246 | sdidata = ibdbase.sdi[1] 247 | 248 | log.info(filename,'ADD TABLE',sdidata['dd_object']['schema_ref'],sdidata['dd_object']['name']) 249 | else: 250 | log.error('unknown error when read sdi',sdi_count) 251 | continue 252 | else: 253 | log.error('skip file',filename,ibdbase.SDI,ibdbase.fsp.FIL_PAGE_PREV,ibdbase.fsp.FIL_PAGE_NEXT) 254 | continue 255 | 256 | file_list.append({ 257 | 'filename':filename, 258 | 'sdi':sdidata, 259 | 'encryption':ibdbase.ENCRYPTION, 260 | 'key':ibdbase.key, 261 | 'iv':ibdbase.iv, 262 | 'pagesize':ibdbase.physical_size, 263 | 'partition_name':partition_name, 264 | 'fsp_flags':ibdbase.fsp_flags, 265 | }) 266 | return file_list 267 | 268 | class IBD2SQL(object): 269 | def __init__(self,pg,pageid,force=False,v=None): 270 | self.pg = pg 271 | self.pageid = pageid 272 | self.pageid = pageid 273 | self.force = force 274 | self.v = v 275 | if force and v is None: 276 | self._read_page = self._read_page_add1 277 | elif force and v is not None: 278 | self._read_page = self._read_page_share_add1 279 | elif not force and v is None: 280 | pass 281 | elif not force and v is not None: 282 | self._read_page = self._read_page_share 283 | 284 | def read(self): 285 | data = self._read_page() 286 | if len(data) != self.pg.PAGE_SIZE: 287 | return False,data 288 | else: 289 | return True,data 290 | 291 | def _read_page(self,): 292 | data = self.pg.read(self.pageid) 293 | self.pageid = struct.unpack('>L',data[12:16])[0] 294 | return data 295 | 296 | def _read_page_share(self,): # parallel 297 | pass 298 | 299 | def _read_page_add1(self,): # force 300 | data = self.pg.read(self.pageid) 301 | self.pageid += 1 302 | return data 303 | 304 | def _read_page_share_add1(self,): # force & parallel 305 | pass 306 | 307 | def FIND_LEAF_PAGE_FROM_ROOT(pg,pageid,table,page_type='PK_NON_LEAF',idxid=0): 308 | idx = INDEX() 309 | idx.init_index(table=table,idxid=idxid,pg=pg,page_type=page_type) 310 | while True: 311 | data = pg.read(pageid) 312 | if data[64:66] == b'\x00\x00': 313 | break 314 | idx.init_data(data) 315 | pageid = idx.get_all_rows()[0]['pageid'] 316 | return pageid 317 | 318 | def ROTAED_FILE(f,log,action='w'): 319 | filename = f.name 320 | findex = filename.find('.p0') 321 | f.close() 322 | if findex > 0 and len(filename[findex:]) == 10: 323 | newfilename = filename[:findex+2] + str(int(filename[findex+2:])+1).zfill(8) 324 | else: 325 | newfilename = filename + ".p00000001" 326 | os.rename(filename,filename + ".p00000000") 327 | log.info('rotate new file, name:',newfilename) 328 | newf = open(newfilename,action) 329 | return newf 330 | 331 | def IBD2SQL_SINGLE(table,file_base,opt,filename_pre,log,parser,FRAGMENT_FILENAME_PRE): 332 | writed_size = 0 # rotaed 333 | writed_rows = 0 334 | usehex = True if 'hex' in opt else False 335 | if 'lines-terminated-by' in opt: 336 | enclosed_by = opt['lines-terminated-by'] 337 | elif parser.SQL == 'data': 338 | enclosed_by = '\n' 339 | else: 340 | enclosed_by = ';\n' 341 | fields_terminated = opt['fields-terminated-by'] if 'fields-terminated-by' in opt else ',' 342 | BAD_PAGES_ACTION = '' if 'bad-pages' not in opt else opt['bad-pages'] 343 | CHECK_TABLE = CHECK_PAGE_OLD if 'check-table-old' in opt else CHECK_PAGE 344 | LIMIT = parser.LIMIT if parser.LIMIT is not None else -1 # limit 345 | OUTPUT_FILESIZE = parser.OUTPUT_FILESIZE 346 | FORCE = parser.FORCE 347 | HAVE_DATA = True 348 | HAVE_DELETED = False 349 | if parser.DELETED == 'only' or parser.DELETED == True: 350 | HAVE_DELETED = True 351 | HAVE_DATA = False 352 | if parser.DELETED == 'with': 353 | HAVE_DELETED = True 354 | PAGE_INDEX_ID = b'\x00'*8 355 | pg = PAGE_READER(page_size=file_base['pagesize'],filename=file_base['filename'],encryption=file_base['encryption'],key=file_base['key'],iv=file_base['iv']) 356 | # inode 357 | inode = INODE(pg) if 'rootno' not in opt else None 358 | if 'rootno' in opt: 359 | rootno = int(opt['rootno']) 360 | elif file_base['fsp_flags']['SHARED']: 361 | rootno = int(file_base['sdi']['dd_object']['indexes'][0]['root']) 362 | else: 363 | rootno = inode.seg[0][0]['FSEG_FRAG_ARR'][0] if file_base['fsp_flags']['SDI'] == 0 else inode.seg[1][0]['FSEG_FRAG_ARR'][0] 364 | #if file_base['fsp_flags']['SDI'] == 1: # 8.x 365 | # rootno = inode.seg[1][0]['FSEG_FRAG_ARR'][0] 366 | log.info(file_base['filename'],file_base['sdi']['dd_object']['name'],'ROOT PAGEID:',rootno) 367 | # FIND LEAF PAGE 368 | if 'leafno' in opt: 369 | leafno = int(opt['leafno']) 370 | else: 371 | leafno = FIND_LEAF_PAGE_FROM_ROOT(pg,rootno,table) 372 | log.info(file_base['filename'],'LEAF PAGEID:',leafno) 373 | leaf_page_data = pg.read(leafno) 374 | PAGE_INDEX_ID = leaf_page_data[66:74] if 'indexid' not in opt else struct.pack('>Q',int(opt['indexid'])) 375 | if parser.PARALLEL <= 1: # single 376 | # f write 377 | if filename_pre != '': 378 | filename = os.path.join(filename_pre,f'{table.schema}.{table.name}{file_base["partition_name"] if file_base["partition_name"] is not None else ""}_{os.getpid()}')+'_sql.sql' 379 | if parser.SQL == 'data': 380 | print(f"-- LOAD DATA INFILE {repr(filename)} INTO TABLE `{table.schema}`.`{table.name}` FIELDS TERMINATED BY {repr(fields_terminated)} OPTIONALLY ENCLOSED BY \"'\" LINES TERMINATED BY '\\n';") 381 | f = open(filename,'a') 382 | print('SQL filename,',filename) 383 | else: 384 | f = sys.stdout 385 | log.info('output is stdout') 386 | 387 | # parser the rest data 388 | pageid = leafno 389 | idx = INDEX() 390 | idx_bad = INDEX() # for bad-pages 391 | pg2 = PAGE_READER_FRAGMENT(FRAGMENT_FILENAME_PRE) 392 | idx.init_index(table=table,idxid=0,pg=pg if FRAGMENT_FILENAME_PRE == '' else pg2,page_type='PK_LEAF',replace=parser.REPLACE,complete=parser.COMPLETE_INSERT,multi=parser.MULTI_VALUE,fields_terminated=fields_terminated,decode=not usehex,POST_ANTELOPE=file_base['fsp_flags']['POST_ANTELOPE']) 393 | idx_bad.init_index(table=table,idxid=0,pg=pg if FRAGMENT_FILENAME_PRE == '' else pg2,page_type='PK_LEAF',replace=parser.REPLACE,complete=parser.COMPLETE_INSERT,multi=parser.MULTI_VALUE,fields_terminated=fields_terminated,decode=not usehex,POST_ANTELOPE=file_base['fsp_flags']['POST_ANTELOPE'],BAD_PAGES=BAD_PAGES_ACTION) 394 | if parser.SQL == 'data': 395 | idx.get_sql = idx.get_data 396 | idx_bad.get_sql = idx_bad.get_data 397 | if FORCE: 398 | pages = os.path.getsize(file_base['filename'])//file_base['pagesize'] 399 | pg.pageid = -1 400 | for _ in range(pages): 401 | log.info('READ PAGE ID:',pg.pageid) 402 | data = pg.read(_) 403 | check_status = True if BAD_PAGES_ACTION == '' else CHECK_TABLE(data) 404 | if check_status: 405 | if data[24:26] != b'E\xbf' or data[64:66] != b'\x00\x00' or PAGE_INDEX_ID != data[66:74]: 406 | continue 407 | elif BAD_PAGES_ACTION == 'skip': 408 | continue 409 | idx.init_data(data) 410 | idx_bad.init_data(data) 411 | row = [] 412 | if HAVE_DATA: 413 | row += idx.get_sql(False) if check_status else idx_bad.get_sql(False) 414 | if HAVE_DELETED: 415 | row += idx.get_sql(True) if check_status else idx_bad.get_sql(True) 416 | for sql in row: 417 | if LIMIT > 0: 418 | f.write(sql+enclosed_by) 419 | LIMIT -= 1 420 | else: 421 | return None 422 | else: 423 | while pageid < 4294967295: 424 | data = pg.read(pageid) 425 | log.info('READ PAGE ID:',pageid) 426 | if data == b'': 427 | log.error(f'read page({pageid}) faild, will exit') 428 | break 429 | pageid = struct.unpack('>L',data[12:16])[0] 430 | idx.init_data(data) 431 | row = [] 432 | if HAVE_DATA: 433 | row += idx.get_sql(False) 434 | if HAVE_DELETED: 435 | row += idx.get_sql(True) 436 | for sql in row: 437 | if LIMIT > 0: 438 | writed_size += f.write(sql+enclosed_by) 439 | LIMIT -= 1 440 | if writed_size >= OUTPUT_FILESIZE: 441 | f = ROTAED_FILE(f,log) 442 | writed_size = 0 443 | else: 444 | return None 445 | if filename_pre != '': 446 | f.close() 447 | else: # multi 448 | log.info('PARALLEL:',parser.PARALLEL) 449 | pageid = Value(ctypes.c_uint32, 0) 450 | pageid.value = 0 if parser.FORCE else leafno 451 | lock = Lock() 452 | worker = {} 453 | for x in range(parser.PARALLEL): 454 | worker[x] = Process(target=IBD2SQL_WORKER,args=(x,pageid,lock,log,filename_pre,HAVE_DATA,HAVE_DELETED,table,parser,file_base,PAGE_INDEX_ID,enclosed_by,fields_terminated,FRAGMENT_FILENAME_PRE)) 455 | for x in range(parser.PARALLEL): 456 | worker[x].start() 457 | for x in range(parser.PARALLEL): 458 | worker[x].join() 459 | return 460 | 461 | def IBD2SQL_WORKER(p,pageid,lock,log,filename_pre,HAVE_DATA,HAVE_DELETED,table,parser,file_base,PAGE_INDEX_ID,enclosed_by,fields_terminated,FRAGMENT_FILENAME_PRE): 462 | infopre = f'PROCESS {p} (pid:{os.getpid()}):' 463 | writed_size = 0 464 | log.info(infopre,'START') 465 | if filename_pre != '': 466 | filename = os.path.join(filename_pre,f'{table.schema}.{table.name}{file_base["partition_name"] if file_base["partition_name"] is not None else ""}_p{p}_{os.getpid()}')+'_sql.sql' 467 | f = open(filename,'a') 468 | print(infopre,'SQL filename,',filename) 469 | else: 470 | f = sys.stdout 471 | log.info(infopre,'output is stdout') 472 | idx = INDEX() 473 | pg = PAGE_READER(page_size=file_base['pagesize'],filename=file_base['filename'],encryption=file_base['encryption'],key=file_base['key'],iv=file_base['iv']) 474 | pg2 = PAGE_READER_FRAGMENT(FRAGMENT_FILENAME_PRE) 475 | idx.init_index(table=table,idxid=0,pg=pg if FRAGMENT_FILENAME_PRE == '' else pg2,page_type='PK_LEAF',replace=parser.REPLACE,complete=parser.COMPLETE_INSERT,multi=parser.MULTI_VALUE,fields_terminated=fields_terminated,POST_ANTELOPE=file_base['fsp_flags']['POST_ANTELOPE']) 476 | if parser.SQL == 'data': 477 | idx.get_sql = idx.get_data 478 | pages = os.path.getsize(file_base['filename'])//file_base['pagesize'] 479 | data = b'\x00'*file_base['pagesize'] 480 | pgid = 0 481 | while True: 482 | with lock: 483 | pgid = pageid.value 484 | if pgid > pages or pgid == 4294967295: 485 | break 486 | data = pg.read(pgid) 487 | if parser.FORCE: 488 | pageid.value = pgid + 1 489 | else: 490 | pageid.value = struct.unpack('>L',data[12:16])[0] 491 | if data[24:26] != b'E\xbf' or data[64:66] != b'\x00\x00' or PAGE_INDEX_ID != data[66:74]: 492 | continue 493 | idx.init_data(data) 494 | row = [] 495 | if HAVE_DATA: 496 | row += idx.get_sql(False) 497 | if HAVE_DELETED: 498 | row += idx.get_sql(True) 499 | for sql in row: 500 | writed_size += f.write(sql+enclosed_by) 501 | if writed_size >= parser.OUTPUT_FILESIZE: 502 | f = ROTAED_FILE(f,log) 503 | writed_size = 0 504 | if filename_pre != '': 505 | f.close() 506 | 507 | log.info(infopre,'FINISH') 508 | 509 | 510 | def IBD2SQL_MULTI(table,file_base,opt,filename_pre,log,parser): 511 | pass 512 | -------------------------------------------------------------------------------- /ibd2sql/innodb_page/index.py: -------------------------------------------------------------------------------- 1 | from ibd2sql.innodb_page.page import PAGE 2 | from ibd2sql.innodb_page.lob import FIRST_BLOB 3 | from ibd2sql.utils.b2data import B2UINT6 4 | from ibd2sql.utils.b2data import B2UINT7 5 | import struct 6 | import zlib 7 | REC_STATUS_ORDINARY = 0 # leaf 8 | REC_STATUS_NODE_PTR = 1 # non-leaf 9 | REC_STATUS_INFIMUM = 2 # INFIMUM 10 | REC_STATUS_SUPREMUM = 3 # SUPREMUM 11 | REC_N_FIELDS_ONE_BYTE_MAX = 0x7F 12 | 13 | class INDEX(PAGE): 14 | """ 15 | init_index INPUT: 16 | table: 17 | idxid: 18 | colid_list: 19 | null_count: 20 | pg: 21 | page_type: 22 | disable_extra_pages: 23 | decode: 24 | row_format: 25 | replace: 26 | complete: 27 | multi: # for sql 28 | fields_terminated: # for load data 29 | fields_enclosed: 30 | lines_terminated: 31 | 32 | USAGE: 33 | init_index: init index obj 34 | init_data : init data for next page 35 | get_sql : return sql list # only for pk leaf 36 | get_data : return data # for load data 37 | get_all_rows : return all rows list[dict] 38 | """ 39 | 40 | def init_index(self,**kwargs): 41 | # must 42 | self.table = kwargs['table'] 43 | self.idxid = kwargs['idxid'] 44 | self.pg = kwargs['pg'] 45 | self.page_type = kwargs['page_type'] # pk_leaf,pk_non_leaf... 46 | 47 | # other 48 | self.foffset = 99 49 | self.offset = 99 50 | self._offset = 99 51 | self.offset_start = 120 52 | self.offset_end = 0 53 | self.rec_header = {} 54 | 55 | # options 56 | self.disable_extra_pages = kwargs['disable_extra_pages'] if 'disable_extra_pages' in kwargs else False 57 | self.decode = kwargs['decode'] if 'decode' in kwargs else True 58 | self.replace = kwargs['replace'] if 'replace' in kwargs else False 59 | self.complete = kwargs['complete'] if 'complete' in kwargs else False 60 | self.multi = kwargs['multi'] if 'multi' in kwargs else False 61 | self.fields_terminated = kwargs['fields_terminated'] if 'fields_terminated' in kwargs else '\t' 62 | self.fields_enclosed = kwargs['fields_enclosed'] if 'fields_enclosed' in kwargs else '' # no use 63 | self.lines_terminated = kwargs['lines_terminated'] if 'lines_terminated' in kwargs else '\n' 64 | 65 | # gen 66 | self.row_format = self.table.row_format 67 | self.colid_list = self.table.index[self.idxid]['colid_list'] 68 | self.colid_list_pk = self.colid_list + self.table.pk if self.table.mysql_version_id <= 50744 else self.colid_list 69 | self.null_count = self.table.index[self.idxid]['null_count'] 70 | self.sqlpre = "REPLACE " if self.replace and not self.multi else "INSERT " 71 | self.sqlpre += f"INTO {self.table._enclosed}{self.table.schema}{self.table._enclosed}.{self.table._enclosed}{self.table.name}{self.table._enclosed}" 72 | if self.complete: 73 | self.sqlpre += "(" + ','.join([ self.table._enclosed+colname+self.table._enclosed for colname,coldefault in self.table.column_order ]) + ")" 74 | self.sqlpre += " VALUES " 75 | if self.multi: 76 | self.get_sql = self._get_sql_multi 77 | else: 78 | self.get_sql = self._get_sql_single 79 | 80 | if self.page_type == "PK_LEAF": 81 | self._read_row = self._read_row_pk_leaf 82 | elif self.page_type == "PK_NON_LEAF": 83 | self._read_row = self._read_row_pk_non_leaf 84 | self.null_count = self.table.pk_null_count 85 | elif self.page_type == "KEY_LEAF": 86 | self._read_row = self._read_row_key_leaf 87 | elif self.page_type == "KEY_NON_LEAF": 88 | self._read_row = self._read_row_key_non_leaf 89 | 90 | if self.table.mysql_version_id <= 80028: 91 | self._read_row_version = self._read_row_count 92 | 93 | # auto (base at REDUNDANT) 94 | if self.row_format == "REDUNDANT": 95 | self._read_extra_column = self._read_extra_column_with_768 96 | self._read_rec_header_new = self._read_rec_header_old 97 | self._read_nullbitmask_varsize_new = self._read_nullbitmask_varsize_old 98 | self.foffset = 101 99 | elif self.row_format == "COMPACT": 100 | self._read_extra_column = self._read_extra_column_with_768 101 | elif self.row_format == "COMPRESSED": 102 | self.get_all_rows = self._get_all_rows_compressed 103 | self._read_extra_20 = self._read_extra_20_compressed 104 | self._read_trx_id_rollptr = self._read_trx_id_rollptr_compressed 105 | #self._read_nullbitmask_varsize_new = self._read_nullbitmask_varsize_compressed 106 | 107 | self.off_page_flag = 16404 108 | if 'POST_ANTELOPE' in kwargs and kwargs['POST_ANTELOPE'] == 0 and self.table.mysql_version_id<80000: 109 | self._read_extra_column = self._read_extra_column_with_768 110 | self.off_page_flag = 17172 111 | 112 | if 'BAD_PAGES' in kwargs: 113 | if kwargs['BAD_PAGES'] == 'fast': 114 | self.get_all_rows = self.get_all_rows_fast 115 | elif kwargs['BAD_PAGES'] == 'try': 116 | self.get_all_rows = self.get_all_rows_try 117 | 118 | def init_data(self,data): 119 | self.data = data 120 | self.offset = self.foffset 121 | self._offset = self.foffset 122 | 123 | def get_data(self,deleted=False): 124 | data_list = [] 125 | for data in self.get_all_rows(deleted): 126 | data = data['data'] 127 | v = '' 128 | for colname,coldefault in self.table.column_order: 129 | v += f"{coldefault if colname not in data else data[colname]['data']}"+self.fields_terminated 130 | data_list.append(v[:-1]) 131 | return data_list 132 | 133 | def get_sql(self): 134 | pass # do nothing 135 | 136 | def _get_sql_single(self,deleted=False): 137 | sql_list = [] 138 | for data in self.get_all_rows(deleted): 139 | data = data['data'] 140 | v = '' 141 | for colname,coldefault in self.table.column_order: 142 | v += f"{coldefault if colname not in data else data[colname]['data']}," 143 | sql_list.append(f"{self.sqlpre}({v[:-1]})") 144 | return sql_list 145 | 146 | def _get_sql_multi(self,deleted=False): 147 | sql = f"{self.sqlpre}" 148 | for data in self.get_all_rows(deleted): 149 | data = data['data'] 150 | v = '' 151 | for colname,coldefault in self.table.column_order: 152 | v += f"{coldefault if colname not in data else data[colname]['data']}," 153 | sql += f"({v[:-1]})," 154 | return [sql[:-1]] 155 | 156 | def get_all_rows(self,deleted=False): 157 | all_row = [] 158 | row_count = 0 159 | if deleted: 160 | deleted_offset = struct.unpack('>H',self.data[44:46])[0] 161 | self.offset = deleted_offset 162 | self._offset = deleted_offset 163 | row_count = (struct.unpack('>H',self.data[42:44])[0] & 32767) - struct.unpack('>H',self.data[54:56])[0] - 2 164 | else: 165 | row_count = struct.unpack('>H',self.data[54:56])[0] + 2 166 | 167 | for _ in range(row_count): 168 | self._read_rec_header_new() 169 | if self.rec_header['REC_TYPE'] <= 1: 170 | row,pageid = self._read_row() 171 | all_row.append({'data':row,'pageid':pageid,'deleted':self.rec_header['REC_INFO_DELETED']}) 172 | # next page 173 | self.offset = self._offset = self.rec_header['REC_NEXT'] 174 | return all_row 175 | 176 | def get_all_rows_fast(self,deleted=False): # from page directory 177 | all_row = [] 178 | pagedirs, = struct.unpack('>H',self.data[38:40]) 179 | pagedirs = min(1000,pagedirs) 180 | pagedirs_offset = list(struct.unpack(f'>{pagedirs}H',self.data[-2*pagedirs-8:-8])) 181 | _ = pagedirs_offset.reverse() 182 | for offset in pagedirs_offset: 183 | owned_count = 0 184 | max_count = 16 # 防止死循环 185 | while owned_count <= 1 and max_count > 0: 186 | max_count -= 1 187 | try: 188 | self.offset = self._offset = offset 189 | self._read_rec_header_new() 190 | owned_count += 1 if self.rec_header['REC_N_OWNED'] > 0 else 0 191 | if self.rec_header['REC_TYPE'] == 3: 192 | break 193 | if self.rec_header['REC_TYPE'] <= 1 and self.rec_header['REC_INFO_DELETED'] == deleted and owned_count <= 1: 194 | row,pageid = self._read_row() 195 | all_row.append({'data':row,'pageid':pageid,'deleted':self.rec_header['REC_INFO_DELETED']}) 196 | offset = self.rec_header['REC_NEXT'] 197 | except: 198 | pass 199 | return all_row 200 | 201 | def get_all_rows_try(self,deleted=False): 202 | all_row = [] 203 | for offset in range(99,len(self.data)-8): 204 | try: 205 | self.offset = self._offset = offset 206 | self._read_rec_header_new() 207 | row,pageid = self._read_row() 208 | if self.rec_header['REC_TYPE'] <= 1 and self.rec_header['REC_INFO_DELETED'] == deleted: 209 | all_row.append({'data':row,'pageid':pageid,'deleted':self.rec_header['REC_INFO_DELETED']}) 210 | except: 211 | pass 212 | return all_row 213 | 214 | def _get_all_rows_compressed(self,deleted=False): 215 | all_row = [] 216 | n_dense = struct.unpack('>H',self.data[42:44])[0] & 32767 217 | n_recs = struct.unpack('>H',self.data[54:56])[0] 218 | d = zlib.decompressobj() 219 | c = d.decompress(self.data[94:]) 220 | toffset = c.find(b'\x01') + 1 221 | data = self.data[:94] 222 | data += struct.pack('>BBB',0x01,0x00,0x02) 223 | data += self.data[-2:] 224 | data += struct.pack('>8B',0x69, 0x6e, 0x66, 0x69, 0x6d, 0x75, 0x6d, 0x00) 225 | data += b'\x03' 226 | data += struct.pack('>12B',0x00,0x0b,0x00,0x00,0x73,0x75,0x70,0x72,0x65,0x6d,0x75,0x6d) 227 | data += c[toffset:] 228 | compressed_offset = len(data) 229 | data += d.unused_data 230 | old_data = self.data 231 | self.data = data 232 | self.offset = 120 233 | self.offset_end = len(data) 234 | self.offset_start = self.offset 235 | page_dir = [] 236 | for i in range(n_recs): 237 | slot = struct.unpack('>H',self._read_compressed_end(2))[0] & 16383 # ignore owned 238 | page_dir.append([slot,False]) 239 | for j in range(n_dense-n_recs-2): # user record deleted 240 | slot = struct.unpack('>H',self._read_compressed_end(2))[0] & 16383 241 | page_dir.append([slot,True]) 242 | _ = page_dir.sort() 243 | if self.page_type == "PK_LEAF": 244 | self.trxid_rollptr = [ self._read_compressed_end(13) for x in range(n_dense-2) ] 245 | self.c_offset = 0 # compressed offset 246 | have_compressed = False # True if compressed else False 247 | have_compressed_offset = 0 248 | for x in range(n_dense-2): 249 | self.offset_start = self.offset 250 | offset,is_deleted = page_dir[x] 251 | self.offset = offset - 5*(x+1) - 13*x 252 | if self.offset > compressed_offset: 253 | #print(x,have_compressed_offset,have_compressed,self.offset_start,page_dir[x],data[1037:1037+14]) 254 | have_compressed_offset += 1 if x <= 62 else 2 255 | have_compressed = True 256 | self.offset_start += 1 if x <= 62 else 2 257 | if have_compressed: 258 | self.offset += have_compressed_offset 259 | if deleted != is_deleted: 260 | continue 261 | self._offset = self.offset 262 | self.rec_header = { 263 | "REC_INFO_INSTANT":False, 264 | "REC_INFO_VERSION":False, 265 | "REC_INFO_DELETED":is_deleted, 266 | "REC_INFO_MIN_REC":True if x == 0 else False, 267 | "REC_N_OWNED":False, 268 | "REC_HEAP_NO":0, 269 | "REC_TYPE": 0 if self.page_type in ["PK_LEAF","KEY_LEAF"] else 1, 270 | "REC_NEXT":self.offset, 271 | "is_compressed": not have_compressed 272 | } 273 | row,pageid = self._read_row() 274 | all_row.append({'data':row,'pageid':pageid,'deleted':self.rec_header['REC_INFO_DELETED']}) 275 | self.data = old_data 276 | return all_row 277 | 278 | 279 | def _read_id_comprssed(self): 280 | b1 = self._read_compressed_start(1) 281 | if b1 == b'\x80': 282 | b1 += self._read_compressed_start(1) 283 | return b1 284 | 285 | def _read_nullbitmask_varsize_old(self,colid_list,null_count,compressed=True): 286 | null_list = [] 287 | size_list = [] 288 | size_null_format = '>H' 289 | size_null_size = 2 290 | nmask = 32768 291 | if self.rec_header['REC_SHORT']: 292 | size_null_format = '>B' 293 | size_null_size = 1 294 | nmask = 128 295 | lastoffset = 0 296 | for colid in colid_list: 297 | size_null = struct.unpack(size_null_format,self.read_reverse(size_null_size))[0] 298 | isnull = True if nmask&size_null else False 299 | vsize = (nmask-1)&size_null 300 | t = vsize 301 | vsize -= lastoffset 302 | lastoffset = t 303 | if self.table.column[colid]['name'] in ['DB_TRX_ID','DB_ROLL_PTR']: 304 | continue 305 | size_list.append(vsize) 306 | null_list.append(isnull) 307 | return null_list,size_list 308 | 309 | def _read_nullbitmask_varsize_new(self,colid_list,null_count,compressed=True): 310 | null_list = [] 311 | size_list = [] 312 | nullvalue = 0 313 | compressed = self.rec_header['is_compressed'] 314 | #compressed = True if 'is_compressed' in self.rec_header and self.rec_header['is_compressed'] else False 315 | if compressed: 316 | nullvalue = int.from_bytes(self.read_reverse((null_count+7)//8),'big') if null_count > 0 else 0 317 | else: 318 | nullvalue = int.from_bytes(self._read_compressed_start((null_count+7)//8),'big') if null_count > 0 else 0 319 | n = 0 320 | for colid in colid_list: 321 | col = self.table.column[colid] 322 | vsize = col['size'] 323 | null = False 324 | if col['is_nullable']: 325 | null = True if nullvalue&(1<B',self.read_reverse(1) if compressed else self._read_compressed_start(1))[0] 333 | if tsize > REC_N_FIELDS_ONE_BYTE_MAX: 334 | vsize = struct.unpack('>B',self.read_reverse(1) if compressed else self._read_compressed_start(1) )[0] + (tsize-128)*256 335 | else: 336 | vsize = tsize 337 | else: 338 | vsize = struct.unpack('>B',self.read_reverse(1))[0] if compressed else self._read_compressed_start(1)[0] 339 | null_list.append(null) 340 | size_list.append(vsize) 341 | return null_list,size_list 342 | 343 | def _read_nullbitmask_varsize_compressed(self,colid_list,null_count,compressed=True): 344 | pass # do nothing 345 | 346 | def _read_trx_id_rollptr(self): 347 | offset = self.offset 348 | trxid = self.read(6) 349 | rollptr = self.read(7) 350 | return self._read_trx_id_format(trxid,rollptr,offset) 351 | 352 | def _read_trx_id_format(self,trxid,rollptr,offset): 353 | return { 354 | 'DB_TRX_ID':{ 355 | 'data':B2UINT6(trxid), 356 | 'offset':offset, 357 | 'size':6 358 | }, 359 | 'DB_ROLL_PTR':{ 360 | 'data':B2UINT7(rollptr), 361 | 'offset':offset+6, 362 | 'size':7 363 | } 364 | } 365 | 366 | def _read_trx_id_rollptr_compressed(self): 367 | offset = self.offset_end 368 | data = self.trxid_rollptr.pop(0) 369 | trxid = data[:6] 370 | rollptr = data[6:13] 371 | return self._read_trx_id_format(trxid,rollptr,0) 372 | 373 | def _read_row(self): 374 | pass # do nothing. return row,(pageid if non leaf else 0) 375 | 376 | def _read_row_pk_leaf(self): 377 | # varsize,null_bitmask,row_version,record_header,pk,[pk,field] 378 | row_version = self._read_row_version() 379 | colid_list = self.table.pk + self.table.pkmr[row_version]['colid'] 380 | null_count = self.table.pkmr[row_version]['null_count'] 381 | null_list,size_list = self._read_nullbitmask_varsize_new(colid_list,null_count) 382 | row = self._read_field(self.table.pk,[ null_list.pop(0) for _ in range(len(self.table.pk)) ],[ size_list.pop(0) for _ in range(len(self.table.pk))]) # key, nullable,varsize 383 | row.update(self._read_trx_id_rollptr()) 384 | the_rest_of_field = self.table.pkmr[row_version]['colid'][2:] if self.row_format == 'REDUNDANT' else self.table.pkmr[row_version]['colid'] 385 | row.update(self._read_field(the_rest_of_field,null_list,size_list)) 386 | return row,0 387 | 388 | def _read_row_pk_non_leaf(self): 389 | # varsize,null_bitmask,record_header,pk,child_pageid 390 | null_list,size_list = self._read_nullbitmask_varsize_new(self.table.pk,self.null_count) 391 | row = self._read_field(self.table.pk,null_list,size_list) 392 | return row,struct.unpack('>L',self.read(4))[0] 393 | 394 | def _read_row_key_leaf(self): 395 | # varsize,null_bitmask,record_header,key,pk 396 | null_list,size_list = self._read_nullbitmask_varsize_new(self.colid_list+self.table.pk,self.null_count) 397 | row = self._read_field(self.colid_list,null_list,size_list,) 398 | return row,0 399 | 400 | def _read_row_key_non_leaf(self): 401 | # varsize,null_bitmask,key,record_header,key,pk,child_pageid 402 | null_list,size_list = self._read_nullbitmask_varsize_new(self.colid_list_pk,self.null_count) 403 | row = self._read_field(self.colid_list_pk,null_list,size_list) 404 | return row,struct.unpack('>L',self.read(4))[0] 405 | 406 | 407 | def _read_field(self,colid_list,null_list,size_list): 408 | row = {} 409 | for colid in colid_list: 410 | col = self.table.column[colid] 411 | colname = col['name'] 412 | vsize = size_list.pop(0) 413 | null = null_list.pop(0) 414 | offset = self.offset 415 | data = None 416 | if null: 417 | data = 'null' 418 | elif vsize == self.off_page_flag: 419 | if self.disable_extra_pages: 420 | data = 'null' 421 | null = True 422 | else: 423 | data = self._read_extra_column() 424 | else: 425 | data = self.read(vsize) 426 | if not null: 427 | if self.decode: 428 | data = col['decode'](data,*col['args']) 429 | else: 430 | data = '0x' + data.hex() 431 | row[colname] = { 432 | 'data':data, 433 | 'offset:':offset, 434 | 'size':vsize 435 | } 436 | return row 437 | 438 | def _read_row_version(self): 439 | return struct.unpack('>B',self.read_reverse(1))[0] if self.rec_header['REC_INFO_INSTANT'] or self.rec_header['REC_INFO_VERSION'] else 0 440 | 441 | def _read_row_count(self,): # <=8.0.28 442 | rdata = 0 443 | if self.rec_header['REC_INFO_INSTANT']: 444 | t1 = struct.unpack('>B',self.read_reverse(1))[0] 445 | if t1 >= 128: 446 | t2 = struct.unpack('>B',self.read_reverse(1))[0] 447 | t1 = t2 + (t1-128)*256 448 | rdata = t1 449 | return rdata 450 | 451 | def _read_rec_header_old(self): 452 | data = self.read_reverse(6) 453 | rec,rec_next = struct.unpack('>LH',data) 454 | REC_TYPE = REC_STATUS_ORDINARY if self.data[64:66] == b'\x00\x00' else REC_STATUS_NODE_PTR 455 | if self.offset == 101: 456 | REC_TYPE = REC_STATUS_INFIMUM 457 | if rec_next == 0: 458 | REC_TYPE = REC_STATUS_SUPREMUM 459 | self.rec_header = { 460 | "REC_INFO_INSTANT": True if rec&2147483648 > 0 else False, 461 | "REC_INFO_VERSION": True if rec&1073741824 > 0 else False, 462 | "REC_INFO_DELETED": True if rec&536870912 > 0 else False, 463 | "REC_INFO_MIN_REC": True if rec&268435456 > 0 else False, 464 | "REC_N_OWNED" : (rec&251658240)>>24, 465 | "REC_HEAP_NO" : (rec&16775168)>>11, 466 | "REC_N_FIELDS": (rec&2046)>>1, 467 | "REC_SHORT" : True if rec&1 == 1 else False, 468 | "REC_TYPE" : REC_TYPE, 469 | "REC_NEXT" : rec_next, 470 | "is_compressed":True, 471 | } 472 | 473 | def _read_rec_header_new(self): 474 | data = self.read_reverse(5) 475 | rec1,rec2,rec_next = struct.unpack('>HBh',data) 476 | rec = (rec1<<8)+rec2 477 | self.rec_header = { 478 | "REC_INFO_INSTANT": True if rec&8388608 > 0 else False, 479 | "REC_INFO_VERSION": True if rec&4194304 > 0 else False, 480 | "REC_INFO_DELETED": True if rec&2097152 > 0 else False, 481 | "REC_INFO_MIN_REC": True if rec&1048576 > 0 else False, 482 | "REC_N_OWNED" : (rec&983040)>>16, 483 | "REC_HEAP_NO" : (rec&65528)>>3, 484 | "REC_TYPE" : rec&7, 485 | "REC_NEXT" : rec_next + self._offset + 5, 486 | "is_compressed":True, 487 | } 488 | 489 | def _read_extra_column_with_768(self): 490 | return self.read(768) + self._read_extra_column_1() 491 | 492 | def _read_extra_column(self): 493 | return self._read_extra_column_1() 494 | 495 | def _read_extra_column_1(self): 496 | SPACE_ID,PAGENO,BLOB_HEADER,REAL_SIZE = struct.unpack('>3LQ',self._read_extra_20()) 497 | data = b'' 498 | if self.table.mysql_version_id > 50744: 499 | data = FIRST_BLOB(self.pg,PAGENO) 500 | else: 501 | while True: 502 | _ndata = self.pg.read(PAGENO) 503 | REAL_SIZE,PAGENO = struct.unpack('>LL',_ndata[38:46]) 504 | data += _ndata[46:46+REAL_SIZE] 505 | if PAGENO == 4294967295: 506 | break 507 | return data 508 | 509 | def _read_extra_20(self): 510 | return self.read(20) 511 | 512 | def _read_extra_20_compressed(self): 513 | return self._read_compressed_end(20) 514 | 515 | def _read_compressed_start(self,n): 516 | data = self.data[self.offset_start:self.offset_start+n] 517 | self.offset_start += n 518 | return data 519 | 520 | def _read_compressed_end(self,n): 521 | data = self.data[self.offset_end-n:self.offset_end] 522 | self.offset_end -= n 523 | return data 524 | 525 | --------------------------------------------------------------------------------