├── crodump ├── __init__.py ├── kodump.py ├── hexdump.py ├── koddecoder.py ├── readers.py ├── dumpdbfields.py ├── croconvert.py ├── Datamodel.py ├── Database.py ├── crodump.py └── Datafile.py ├── test_data └── all_field_types │ ├── CroIndex.tad │ ├── Voc │ ├── CroIndex.tad │ ├── CroBank.dat │ ├── CroBank.tad │ ├── CroStru.dat │ ├── CroStru.tad │ └── CroIndex.dat │ ├── CroBank.dat │ ├── CroBank.tad │ ├── CroIndex.dat │ ├── CroStru.dat │ └── CroStru.tad ├── bin ├── crodump └── croconvert ├── .gitignore ├── templates ├── postgres.j2 └── html.j2 ├── LICENSE ├── setup.py ├── README.md └── docs └── cronos-research.md /crodump/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test_data/all_field_types/CroIndex.tad: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test_data/all_field_types/Voc/CroIndex.tad: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /bin/crodump: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | BINPATH=`dirname $0` 4 | export PYTHONPATH="$BINPATH/.." 5 | python3 -mcrodump.crodump "$@" 6 | -------------------------------------------------------------------------------- /bin/croconvert: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | BINPATH=`dirname $0` 4 | export PYTHONPATH="$BINPATH/.." 5 | python3 -mcrodump.croconvert "$@" 6 | -------------------------------------------------------------------------------- /test_data/all_field_types/CroBank.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alephdata/cronodump/HEAD/test_data/all_field_types/CroBank.dat -------------------------------------------------------------------------------- /test_data/all_field_types/CroBank.tad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alephdata/cronodump/HEAD/test_data/all_field_types/CroBank.tad -------------------------------------------------------------------------------- /test_data/all_field_types/CroIndex.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alephdata/cronodump/HEAD/test_data/all_field_types/CroIndex.dat -------------------------------------------------------------------------------- /test_data/all_field_types/CroStru.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alephdata/cronodump/HEAD/test_data/all_field_types/CroStru.dat -------------------------------------------------------------------------------- /test_data/all_field_types/CroStru.tad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alephdata/cronodump/HEAD/test_data/all_field_types/CroStru.tad -------------------------------------------------------------------------------- /test_data/all_field_types/Voc/CroBank.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alephdata/cronodump/HEAD/test_data/all_field_types/Voc/CroBank.dat -------------------------------------------------------------------------------- /test_data/all_field_types/Voc/CroBank.tad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alephdata/cronodump/HEAD/test_data/all_field_types/Voc/CroBank.tad -------------------------------------------------------------------------------- /test_data/all_field_types/Voc/CroStru.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alephdata/cronodump/HEAD/test_data/all_field_types/Voc/CroStru.dat -------------------------------------------------------------------------------- /test_data/all_field_types/Voc/CroStru.tad: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alephdata/cronodump/HEAD/test_data/all_field_types/Voc/CroStru.tad -------------------------------------------------------------------------------- /test_data/all_field_types/Voc/CroIndex.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alephdata/cronodump/HEAD/test_data/all_field_types/Voc/CroIndex.dat -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # Distribution / packaging 7 | .Python 8 | build/ 9 | develop-eggs/ 10 | dist/ 11 | downloads/ 12 | eggs/ 13 | .eggs/ 14 | lib/ 15 | lib64/ 16 | parts/ 17 | sdist/ 18 | var/ 19 | wheels/ 20 | share/python-wheels/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | MANIFEST 25 | 26 | # Installer logs 27 | pip-log.txt 28 | pip-delete-this-directory.txt 29 | 30 | # PyBuilder 31 | .pybuilder/ 32 | target/ 33 | 34 | # Jupyter Notebook 35 | .ipynb_checkpoints 36 | 37 | # IPython 38 | profile_default/ 39 | ipython_config.py 40 | 41 | # Environments 42 | .env 43 | .venv 44 | env/ 45 | venv/ 46 | ENV/ 47 | env.bak/ 48 | venv.bak/ 49 | -------------------------------------------------------------------------------- /templates/postgres.j2: -------------------------------------------------------------------------------- 1 | {% for table in db.enumerate_tables(files=False) %} 2 | 3 | CREATE TABLE "{{ table.tablename | replace('"', '_') }}" ( 4 | {%- for field in table.fields %} 5 | "{{ field.name | replace('"', '_') }}" {{ field.sqltype() -}} 6 | {{- ", " if not loop.last else "" -}} 7 | {%- endfor %} 8 | ); 9 | 10 | INSERT INTO "{{ table.tablename | replace('"', '_') }}" VALUES 11 | {%- for record in db.enumerate_records( table ) %} 12 | ( {%- for field in record.fields -%} 13 | '{{ field.content | replace("'", "''") }}' {{- ", " if not loop.last else "" -}} 14 | {%- endfor -%} 15 | ) 16 | {{- ", " if not loop.last else "" -}} 17 | {%- endfor %} 18 | ; 19 | 20 | {% endfor %} 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2021 Organized Crime and Corruption Reporting Project 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | setup( 3 | name = "cronodump", 4 | version = "1.1.0", 5 | entry_points = { 6 | 'console_scripts': [ 7 | 'croconvert=crodump.croconvert:main', 8 | 'crodump=crodump.crodump:main', 9 | ], 10 | }, 11 | packages = ['crodump'], 12 | author = "Willem Hengeveld, Dirk Engling", 13 | author_email = "itsme@xs4all.nl, erdgeist@erdgeist.org", 14 | description = "Tool and library for extracting data from Cronos databases.", 15 | long_description_content_type='text/markdown', 16 | long_description = """ 17 | The cronodump utility can parse most of the databases created by the [CronosPro](https://www.cronos.ru/) database software 18 | and dump it to several output formats. 19 | 20 | The software is popular among Russian public offices, companies and police agencies. 21 | 22 | Example usage: 23 | 24 | croconvert --csv 25 | 26 | Will create a .csv dump of all records in your database. 27 | 28 | or: 29 | 30 | crodump strudump 31 | 32 | Will print details on the internal definitions of the tables present in your database. 33 | 34 | For more details see the [README.md](https://github.com/alephdata/cronodump/blob/master/README.md) file. 35 | """, 36 | license = "MIT", 37 | keywords = "cronos dataconversion databaseexport", 38 | url = "https://github.com/alephdata/cronodump/", 39 | classifiers = [ 40 | 'Environment :: Console', 41 | 'Intended Audience :: End Users/Desktop', 42 | 'Intended Audience :: Developers', 43 | 'License :: OSI Approved :: MIT License', 44 | 'Operating System :: OS Independent', 45 | 'Programming Language :: Python :: 3.7', 46 | 'Topic :: Utilities', 47 | 'Topic :: Database', 48 | ], 49 | python_requires = '>=3.7', 50 | extras_require={ 'templates': ['Jinja2'] }, 51 | ) 52 | -------------------------------------------------------------------------------- /templates/html.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Cronos Database Dump 6 | 7 | 8 | {% for table in db.enumerate_tables(files=True) %} 9 | 10 | 11 | 12 | 13 | {%- for field in table.fields %} 14 | 15 | {%- endfor %} 16 | 17 | 18 | 19 | 20 | {% for system_number, file in db.enumerate_files(table) %} 21 | 22 | 23 | 24 | 25 | {% endfor %} 26 | 27 |
{{ table.tablename | e }}
{{ field.name | e }}Data
{{ system_number | e }}File content
28 | {% endfor %} 29 | {% for table in db.enumerate_tables(files=False) %} 30 | {%- if table.tableimage -%} 31 | 32 | {%- endif -%} 33 | 34 | 35 | 36 | 37 | {%- for field in table.fields %} 38 | 39 | {%- endfor %} 40 | 41 | 42 | 43 | {%- for record in db.enumerate_records( table ) %} 44 | 45 | {%- for field in record.fields %} 46 | {%- if field.typ == 6 and field.content -%} 47 | 48 | {%- else -%} 49 | 50 | {%- endif -%} 51 | {%- endfor %} 52 | 53 | {%- endfor %} 54 | 55 |
{{ table.tablename | e }}
{{ field.name | e }}
{{ field.filename | e }}.{{ field.extname | e }}{{ field.content | e }}
56 | {% endfor %} 57 | 58 | 59 | -------------------------------------------------------------------------------- /crodump/kodump.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module has the functions for the 'kodump' subcommand from the 'crodump' script. 3 | """ 4 | from .hexdump import unhex, toout, hexdump 5 | import io 6 | import struct 7 | 8 | 9 | def decode_kod(kod, args, data): 10 | """ 11 | various methods of hexdumping KOD decoded data. 12 | """ 13 | if args.nokod: 14 | # plain hexdump, no KOD decode 15 | hexdump(args.offset, data, args) 16 | 17 | elif args.shift: 18 | # explicitly specified shift. 19 | args.shift = int(args.shift, 0) 20 | enc = kod.decode(args.shift, data) 21 | hexdump(args.offset, enc, args) 22 | elif args.increment: 23 | 24 | def incdata(data, s): 25 | """ 26 | add 's' to each byte. 27 | This is useful for finding the correct shift from an incorrectly shifted chunk. 28 | """ 29 | return b"".join(struct.pack(" read from stdin. 74 | import sys 75 | 76 | data = sys.stdin.buffer.read() 77 | if args.unhex: 78 | data = unhex(data) 79 | decode_kod(kod, args, data) 80 | 81 | 82 | -------------------------------------------------------------------------------- /crodump/hexdump.py: -------------------------------------------------------------------------------- 1 | """ 2 | Several functions for converting bytes to readable text or hex bytes. 3 | """ 4 | import struct 5 | from binascii import b2a_hex, a2b_hex 6 | 7 | 8 | def unhex(data): 9 | """ 10 | convert a possibly space separated list of 2-digit hex values to a byte-array 11 | """ 12 | if type(data) == bytes: 13 | data = data.decode("ascii") 14 | data = data.replace(" ", "") 15 | data = data.strip() 16 | return a2b_hex(data) 17 | 18 | 19 | def ashex(line): 20 | """ 21 | convert a byte-array to a space separated list of 2-digit hex values. 22 | """ 23 | return " ".join("%02x" % _ for _ in line) 24 | 25 | 26 | def aschr(b): 27 | """ 28 | convert a CP-1251 byte to a unicode character. 29 | This will make both cyrillic and latin text readable. 30 | """ 31 | if 32 <= b < 0x7F: 32 | return "%c" % b 33 | elif 0x80 <= b <= 0xFF: 34 | try: 35 | c = struct.pack(" b[0]..b[n-1] 40 | b[i] = KOD[a[i]]- (i+shift) 41 | """ 42 | return bytes((self.kod[b] - i - o) % 256 for i, b in enumerate(data)) 43 | 44 | def encode(self, o, data): 45 | """ 46 | encode : shift, b[0]..b[n-1] -> a[0]..a[n-1] 47 | a[i] = INV[b[i]+ (i+shift)] 48 | """ 49 | return bytes(self.inv[(b + i + o) % 256] for i, b in enumerate(data)) 50 | 51 | 52 | def new(*args): 53 | """ 54 | create a KODcoding object with the specified arguments. 55 | """ 56 | return KODcoding(*args) 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /crodump/readers.py: -------------------------------------------------------------------------------- 1 | import struct 2 | 3 | 4 | class ByteReader: 5 | """ 6 | The ByteReader object is used when decoding various variable sized structures. 7 | all functions raise EOFError when attempting to read beyond the end of the buffer. 8 | 9 | functions starting with `read` advance the current position. 10 | """ 11 | def __init__(self, data): 12 | self.data = data 13 | self.o = 0 14 | 15 | def readbyte(self): 16 | """ 17 | Reads a single byte 18 | """ 19 | if self.o + 1 > len(self.data): 20 | raise EOFError() 21 | self.o += 1 22 | return struct.unpack_from(" len(self.data): 29 | raise EOFError() 30 | return self.data[self.o] == bytevalue 31 | 32 | def readword(self): 33 | """ 34 | Reads a 16 bit unsigned little endian value 35 | """ 36 | if self.o + 2 > len(self.data): 37 | raise EOFError() 38 | self.o += 2 39 | return struct.unpack_from(" len(self.data): 46 | raise EOFError() 47 | self.o += 4 48 | return struct.unpack_from(" len(self.data): 58 | raise EOFError() 59 | self.o += n 60 | return self.data[self.o-n:self.o] 61 | 62 | def readlongstring(self): 63 | """ 64 | Reads a cp1251 encoded string prefixed with a dword sized length 65 | """ 66 | namelen = self.readdword() 67 | return self.readbytes(namelen).decode("cp1251") 68 | 69 | def readname(self): 70 | """ 71 | Reads a cp1251 encoded string prefixed with a byte sized length 72 | """ 73 | namelen = self.readbyte() 74 | return self.readbytes(namelen).decode("cp1251") 75 | 76 | def readtoseperator(self, sep): 77 | """ 78 | reads bytes upto a bytes sequence matching `sep`. 79 | when no `sep` is found, return the remaining bytes in the buffer. 80 | """ 81 | if self.o > len(self.data): 82 | raise EOFError() 83 | oldoff = self.o 84 | off = self.data.find(sep, self.o) 85 | if off >= 0: 86 | self.o = off + len(sep) 87 | return self.data[oldoff:off] 88 | else: 89 | self.o = len(self.data) 90 | return self.data[oldoff:] 91 | 92 | def eof(self): 93 | """ 94 | return True when the current position is at or beyond the end of the buffer. 95 | """ 96 | return self.o >= len(self.data) 97 | -------------------------------------------------------------------------------- /crodump/dumpdbfields.py: -------------------------------------------------------------------------------- 1 | """ 2 | `dumpdbfields` demonstrates how to enumerate tables and records. 3 | """ 4 | import os 5 | import os.path 6 | from .Database import Database 7 | from .crodump import strucrack, dbcrack 8 | from .hexdump import unhex 9 | 10 | 11 | def processargs(args): 12 | for dbpath in args.dbdirs: 13 | if args.recurse: 14 | for path, _, files in os.walk(dbpath): 15 | # check if there is a crostru file in this directory. 16 | if any(_ for _ in files if _.lower() == "crostru.dat"): 17 | yield path 18 | else: 19 | yield dbpath 20 | 21 | 22 | def main(): 23 | import argparse 24 | 25 | parser = argparse.ArgumentParser(description="db field dumper") 26 | parser.add_argument("--kod", type=str, help="specify custom KOD table") 27 | parser.add_argument("--strucrack", action="store_true", help="infer the KOD sbox from CroStru.dat") 28 | parser.add_argument("--dbcrack", action="store_true", help="infer the KOD sbox from CroIndex.dat+CroBank.dat") 29 | parser.add_argument("--nokod", "-n", action="store_true", help="don't KOD decode") 30 | parser.add_argument("--maxrecs", "-m", type=int, default=100) 31 | parser.add_argument("--recurse", "-r", action="store_true") 32 | parser.add_argument("--verbose", "-v", action="store_true") 33 | parser.add_argument("dbdirs", type=str, nargs='*') 34 | args = parser.parse_args() 35 | 36 | for path in processargs(args): 37 | try: 38 | import crodump.koddecoder 39 | if args.kod: 40 | if len(args.kod)!=512: 41 | raise Exception("--kod should have a 512 hex digit argument") 42 | kod = crodump.koddecoder.new(list(unhex(args.kod))) 43 | elif args.nokod: 44 | kod = None 45 | elif args.strucrack: 46 | class Cls: pass 47 | cargs = Cls() 48 | cargs.dbdir = path 49 | cargs.sys = False 50 | cargs.silent = True 51 | cracked = strucrack(None, cargs) 52 | if not cracked: 53 | return 54 | kod = crodump.koddecoder.new(cracked) 55 | elif args.dbcrack: 56 | class Cls: pass 57 | cargs = Cls() 58 | cargs.dbdir = path 59 | cargs.sys = False 60 | cargs.silent = True 61 | cracked = dbcrack(None, cargs) 62 | if not cracked: 63 | return 64 | kod = crodump.koddecoder.new(cracked) 65 | else: 66 | kod = crodump.koddecoder.new() 67 | 68 | db = Database(path, kod) 69 | for tab in db.enumerate_tables(): 70 | tab.dump(args) 71 | print("nr of records: %d" % db.bank.nrofrecords) 72 | i = 0 73 | for rec in db.enumerate_records(tab): 74 | for field, fielddef in zip(rec.fields, tab.fields): 75 | print(">> %s -- %s" % (fielddef, field.content)) 76 | i += 1 77 | if i > args.maxrecs: 78 | break 79 | except Exception as e: 80 | print("ERROR: %s" % e) 81 | 82 | 83 | if __name__ == "__main__": 84 | main() 85 | -------------------------------------------------------------------------------- /crodump/croconvert.py: -------------------------------------------------------------------------------- 1 | """ 2 | Commandline tool which convert a cronos database to .csv, .sql or .html. 3 | 4 | python3 croconvert.py -t html chechnya_proverki_ul_2012/ 5 | """ 6 | from .Database import Database 7 | from .crodump import strucrack, dbcrack 8 | from .hexdump import unhex 9 | from sys import exit, stdout 10 | from os.path import dirname, abspath, join 11 | from os import mkdir, chdir 12 | from datetime import datetime 13 | import base64 14 | import csv 15 | 16 | 17 | def template_convert(kod, args): 18 | """looks up template to convert to, parses the database and passes it to jinja2""" 19 | try: 20 | from jinja2 import Environment, FileSystemLoader 21 | except ImportError: 22 | exit( 23 | "Fatal: Jinja templating engine not found. Install using pip install jinja2" 24 | ) 25 | 26 | db = Database(args.dbdir, args.compact, kod) 27 | 28 | template_dir = join(dirname(dirname(abspath(__file__))), "templates") 29 | j2_env = Environment(loader=FileSystemLoader(template_dir)) 30 | j2_templ = j2_env.get_template(args.template + ".j2") 31 | j2_templ.stream(db=db, base64=base64).dump(stdout) 32 | 33 | 34 | def safepathname(name): 35 | return name.replace(':', '_').replace('/', '_').replace('\\', '_') 36 | 37 | 38 | def csv_output(kod, args): 39 | """creates a directory with the current timestamp and in it a set of CSV or TSV 40 | files with all the tables found and an extra directory with all the files""" 41 | db = Database(args.dbdir, args.compact, kod) 42 | 43 | mkdir(args.outputdir) 44 | chdir(args.outputdir) 45 | 46 | filereferences = [] 47 | 48 | # first dump all non-file tables 49 | for table in db.enumerate_tables(files=False): 50 | tablesafename = safepathname(table.tablename) + ".csv" 51 | 52 | with open(tablesafename, 'w', encoding='utf-8') as csvfile: 53 | writer = csv.writer(csvfile, delimiter=args.delimiter, escapechar='\\') 54 | writer.writerow([field.name for field in table.fields]) 55 | 56 | # Record should be iterable over its fields, so we could use writerows 57 | for record in db.enumerate_records(table): 58 | writer.writerow([field.content for field in record.fields]) 59 | 60 | filereferences.extend([field for field in record.fields if field.typ == 6]) 61 | 62 | # Write all files from the file table. This is useful for unreferenced files 63 | for table in db.enumerate_tables(files=True): 64 | filedir = "Files-" + table.abbrev 65 | mkdir(filedir) 66 | 67 | for system_number, content in db.enumerate_files(table): 68 | with open(join(filedir, str(system_number)), "wb") as binfile: 69 | binfile.write(content) 70 | 71 | if len(filereferences): 72 | filedir = "Files-Referenced" 73 | mkdir(filedir) 74 | 75 | # Write all referenced files with their filename and extension intact 76 | for reffile in filereferences: 77 | if reffile.content: # only print when file is not NULL 78 | filesafename = safepathname(reffile.filename) + "." + safepathname(reffile.extname) 79 | content = db.get_record(reffile.filedatarecord) 80 | with open(join("Files-Referenced", filesafename), "wb") as binfile: 81 | binfile.write(content) 82 | 83 | 84 | def main(): 85 | import argparse 86 | 87 | parser = argparse.ArgumentParser(description="CRONOS database converter") 88 | parser.add_argument("--template", "-t", type=str, default="html", 89 | help="output template to use for conversion") 90 | parser.add_argument("--csv", "-c", action='store_true', help="create output in .csv format") 91 | parser.add_argument("--delimiter", "-d", default=",", help="delimiter used in csv output") 92 | parser.add_argument("--outputdir", "-o", type=str, help="directory to create the dump in") 93 | parser.add_argument("--kod", type=str, help="specify custom KOD table") 94 | parser.add_argument("--compact", action="store_true", help="save memory by not caching the index, note: increases convert time by factor 1.15") 95 | parser.add_argument("--strucrack", action="store_true", help="infer the KOD sbox from CroStru.dat") 96 | parser.add_argument("--dbcrack", action="store_true", help="infer the KOD sbox from CroIndex.dat+CroBank.dat") 97 | parser.add_argument("--nokod", "-n", action="store_true", help="don't KOD decode") 98 | parser.add_argument("dbdir", type=str) 99 | args = parser.parse_args() 100 | 101 | import crodump.koddecoder 102 | if args.kod: 103 | if len(args.kod)!=512: 104 | raise Exception("--kod should have a 512 hex digit argument") 105 | kod = crodump.koddecoder.new(list(unhex(args.kod))) 106 | elif args.nokod: 107 | kod = None 108 | elif args.strucrack: 109 | class Cls: pass 110 | cargs = Cls() 111 | cargs.dbdir = args.dbdir 112 | cargs.sys = False 113 | cargs.silent = True 114 | cracked = strucrack(None, cargs) 115 | if not cracked: 116 | return 117 | kod = crodump.koddecoder.new(cracked) 118 | elif args.dbcrack: 119 | class Cls: pass 120 | cargs = Cls() 121 | cargs.dbdir = args.dbdir 122 | cargs.sys = False 123 | cargs.silent = True 124 | cracked = dbcrack(None, cargs) 125 | if not cracked: 126 | return 127 | kod = crodump.koddecoder.new(cracked) 128 | else: 129 | kod = crodump.koddecoder.new() 130 | 131 | if args.csv: 132 | if not args.outputdir: 133 | args.outputdir = "cronodump"+datetime.now().strftime("-%Y-%m-%d-%H-%M-%S-%f") 134 | csv_output(kod, args) 135 | else: 136 | template_convert(kod, args) 137 | 138 | 139 | if __name__ == "__main__": 140 | main() 141 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cronodump 2 | 3 | The cronodump utility can parse most of the databases created by the [CronosPro](https://www.cronos.ru/) database software 4 | and dump it to several output formats. 5 | 6 | The software is popular among Russian public offices, companies and police agencies. 7 | 8 | 9 | # Quick start 10 | 11 | In its simplest form, without any dependencies, the croconvert command creates a [CSV](https://en.wikipedia.org/wiki/Comma-separated_values) representation of all the database's tables and a copy of all files contained in the database: 12 | 13 | ```bash 14 | bin/croconvert --csv test_data/all_field_types 15 | ``` 16 | 17 | By default it creates a `cronodump-YYYY-mm-DD-HH-MM-SS-ffffff/` directory containing CSV files for each table found. It will under this directory also create a `Files-FL/` directory containing all the files stored in the Database, regardless if they are (still) referenced in any data table. All files that are actually referenced (and thus are known by their filename) will be stored under the `Files-Referenced` directory. With the `--outputdir` option you can chose your own dump location. 18 | 19 | When you get an error message, or just unreadable data, chances are your database is protected. You may need to look into the `--dbcrack` or `--strucrack` options, explained below. 20 | 21 | 22 | # Templates 23 | 24 | The croconvert command can use the powerful [jinja templating framework](https://jinja.palletsprojects.com/en/3.0.x/) to render more file formats like PostgreSQL and HTML. 25 | The default action for `croconvert` is to convert the database using the `html` template. 26 | Use 27 | 28 | ```bash 29 | python3 -m venv ./venc 30 | . venv/bin/activate 31 | pip install jinja2 32 | bin/croconvert test_data/all_field_types > test_data.html 33 | ``` 34 | 35 | to dump an HTML file with all tables found in the database, files listed and ready for download as inlined [data URI](https://en.wikipedia.org/wiki/Data_URI_scheme) and all table images inlined as well. Note that the resulting HTML file can be huge for large databases, causing a lot of load on browsers when trying to open them. 36 | 37 | 38 | The `-t postgres` command will dump the table schemes and records as valid `CREATE TABLE` and `INSERT INTO` statements to stdout. This dump can then be imported in a PostgreSQL database. Note that the backslash character is not escaped and thus the [`standard_conforming_strings`](https://www.postgresql.org/docs/current/runtime-config-compatible.html#GUC-STANDARD-CONFORMING-STRINGS) option should be off. 39 | 40 | Pull requests for [more templates supporting other output types](/templates) are welcome. 41 | 42 | 43 | # Inspection 44 | 45 | There's a `bin/crodump` tool to further investigate databases. This might be useful for extracting metadata like path names of table image files or input and output forms. Not all metadata has yet been completely reverse engineered, so some experience with understanding binary dumps might be required. 46 | 47 | The crodump script has a plethora of options but in the most basic for the `strudump` sub command will provide a rich variety of metadata to look further: 48 | 49 | ```bash 50 | bin/crodump strudump -v -a test_data/all_field_types/ 51 | ``` 52 | The `-a` option tells strudump to output ascii instead of a hexdump. 53 | 54 | For a low level dump of the database contents, use: 55 | ```bash 56 | bin/crodump crodump -v test_data/all_field_types/ 57 | ``` 58 | The `-v` option tells crodump to include all unused byte ranges, this may be useful when identifying deleted records. 59 | 60 | For a bit higher level dump of the database contents, use: 61 | ```bash 62 | bin/crodump recdump test_data/all_field_types/ 63 | ``` 64 | This will print a hexdump of all records for all tables. 65 | 66 | 67 | ## decoding password protected databases 68 | 69 | Cronos v4 and higher are able to password protect databases, the protection works 70 | by modifying the KOD sbox. `cronodump` has two methods of deriving the KOD sbox from 71 | a database: 72 | 73 | Both these methods are statistics based operations, it may not always 74 | yield the correct KOD sbox. 75 | 76 | 77 | ### 1. strudump 78 | 79 | When the database has a sufficiently large CroStru.dat file, 80 | it is easy to derive the nodified KOD-sbox from the CroStru file, the `--strucrack` option 81 | will do this. 82 | 83 | crodump --strucrack recdump 84 | 85 | ### 2. dbdump 86 | 87 | When the Bank and Index files are compressed, we can derive the KOD sbox by inspecting 88 | the fourth byte of each record, which should decode to a zero. 89 | 90 | The `--dbcrack` option will do this. 91 | 92 | crodump --dbcrack recdump 93 | 94 | 95 | # Installing 96 | 97 | `cronodump` requires python 3.7 or later. It has been tested on Linux, MacOS and Windows. 98 | There is one optional requirement: the `Jinja2` templating engine, but it will install fine without. 99 | 100 | There are several ways of installing `cronodump`: 101 | 102 | * You can run `cronodump` directly from the cloned git repository, by using the shell scripts in the `bin` subdirectory. 103 | * You can install `cronodump` in your python environment by ruinning: `python setup.py build install`. 104 | * You can install `cronodump` from the public [pypi repository](https://pypi.org/project/cronodump/) with `pip install cronodump`. 105 | * You can install `cronodump` with the `Jinja2` templating engine from the public [pypi repository](https://pypi.org/project/cronodump/) with `pip install cronodump[templates]`. 106 | 107 | 108 | # Terminology 109 | 110 | We decided to use the more common terminology for database, tables, records, etc. 111 | Here is a table showing how cronos calls these: 112 | 113 | | what | cronos english | cronos russian 114 | |:------ |:------ |:------ 115 | | Database | Bank | Банк 116 | | Table | Base | Базы 117 | | Record | Record | Записи 118 | | Field | Field | поля 119 | | recid | System Number | Системный номер 120 | 121 | 122 | # License 123 | 124 | cronodump is released under the [MIT license](LICENSE). 125 | 126 | 127 | # References 128 | 129 | cronodump builds upon [documentation of the file format found in older versions of Cronos](http://sergsv.narod.ru/cronos.htm) and 130 | the [subsequent implementation of a parser for the old file format](https://github.com/occrp/cronosparser) but dropped the heuristic 131 | approach to guess offsets and obfuscation parameters for a more rigid parser. Refer to [the docs](docs/cronos-research.md) for further 132 | details. 133 | -------------------------------------------------------------------------------- /crodump/Datamodel.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from .hexdump import tohex, ashex 3 | from .readers import ByteReader 4 | 5 | 6 | class FieldDefinition: 7 | """ 8 | Contains the properties for a single field in a record. 9 | """ 10 | def __init__(self, data): 11 | self.decode(data) 12 | 13 | def decode(self, data): 14 | self.defdata = data 15 | 16 | rd = ByteReader(data) 17 | self.typ = rd.readword() 18 | self.idx1 = rd.readdword() 19 | self.name = rd.readname() 20 | self.flags = rd.readdword() 21 | self.minval = rd.readbyte() # Always 1 22 | if self.typ: 23 | self.idx2 = rd.readdword() 24 | self.maxval = rd.readdword() # max value or length 25 | self.unk4 = rd.readdword() # Always 0x00000009 or 0x0001000d 26 | else: 27 | self.idx2 = 0 28 | self.maxval = self.unk4 = None 29 | self.remaining = rd.readbytes() 30 | 31 | def __str__(self): 32 | if self.typ: 33 | return "Type: %2d (%2d/%2d) %04x,(%d-%4d),%04x - %-40s -- %s" % ( 34 | self.typ, self.idx1, self.idx2, 35 | self.flags, self.minval, self.maxval, self.unk4, 36 | "'%s'" % self.name, tohex(self.remaining)) 37 | else: 38 | return "Type: %2d %2d %d,%d - '%s'" % ( 39 | self.typ, self.idx1, self.flags, self.minval, self.name) 40 | 41 | def sqltype(self): 42 | return { 0: "INTEGER PRIMARY KEY", 43 | 1: "INTEGER", 44 | 2: "VARCHAR(" + str(self.maxval) + ")", 45 | 3: "TEXT", # dictionaray 46 | 4: "DATE", 47 | 5: "TIMESTAMP", 48 | 6: "TEXT", # file reference 49 | }.get(self.typ, "TEXT") 50 | 51 | 52 | class TableImage: 53 | def __init__(self, data): 54 | self.decode(data) 55 | 56 | def decode(self, data): 57 | if not len(data): 58 | self.filename = "none" 59 | self.data = b'' 60 | return 61 | 62 | rd = ByteReader(data) 63 | 64 | _ = rd.readbyte() 65 | namelen = rd.readdword() 66 | self.filename = rd.readbytes(namelen).decode("cp1251", 'ignore') 67 | 68 | imagelen = rd.readdword() 69 | self.data = rd.readbytes(imagelen) 70 | 71 | 72 | class TableDefinition: 73 | def __init__(self, data, image=''): 74 | self.decode(data, image) 75 | 76 | def decode(self, data, image): 77 | """ 78 | decode the 'base' / table definition 79 | """ 80 | rd = ByteReader(data) 81 | 82 | self.unk1 = rd.readword() 83 | self.version = rd.readbyte() 84 | if self.version > 1: 85 | _ = rd.readbyte() # always 0 anyway 86 | 87 | # if this is not 5 (but 9), there's another 4 bytes inserted, this could be a length-byte. 88 | self.unk2 = rd.readbyte() 89 | 90 | self.unk3 = rd.readbyte() 91 | if self.unk2 > 5: # seen only 5 and 9 for now with 9 implying an extra dword 92 | _ = rd.readdword() 93 | self.unk4 = rd.readdword() 94 | 95 | self.tableid = rd.readdword() 96 | 97 | self.tablename = rd.readname() 98 | self.abbrev = rd.readname() 99 | self.unk7 = rd.readdword() 100 | nrfields = rd.readdword() 101 | 102 | self.headerdata = data[: rd.o] 103 | 104 | # There's (at least) two blocks describing fields, ended when encountering ffffffff 105 | self.fields = [] 106 | for _ in range(nrfields): 107 | deflen = rd.readword() 108 | fielddef = rd.readbytes(deflen) 109 | self.fields.append(FieldDefinition(fielddef)) 110 | 111 | # Between the first and the second block, there's some byte strings inbetween, count 112 | # given in first dword 113 | self.extraunkdatastrings = rd.readdword() 114 | 115 | for _ in range(self.extraunkdatastrings): 116 | datalen = rd.readword() 117 | skip = rd.readbytes(datalen) 118 | 119 | try: 120 | # Then there's another unknow dword and then (probably section indicator) 02 byte 121 | self.unk8_ = rd.readdword() 122 | if rd.readbyte() != 2: 123 | print("Warning: FieldDefinition Section 2 not marked with a 2") 124 | self.unk9 = rd.readdword() 125 | 126 | # Then there's the amount of extra fields in the second section 127 | nrextrafields = rd.readdword() 128 | 129 | for _ in range(nrextrafields): 130 | deflen = rd.readword() 131 | fielddef = rd.readbytes(deflen) 132 | self.fields.append(FieldDefinition(fielddef)) 133 | except Exception as e: 134 | print("Warning: Error '%s' parsing FieldDefinitions" % e) 135 | 136 | try: 137 | self.terminator = rd.readdword() 138 | except EOFError: 139 | print("Warning: FieldDefinition section not terminated") 140 | except Exception as e: 141 | print("Warning: Error '%s' parsing Tabledefinition" % e) 142 | 143 | self.fields.sort(key=lambda field: field.idx2) 144 | 145 | self.remainingdata = rd.readbytes() 146 | 147 | self.tableimage = TableImage(image) 148 | 149 | def __str__(self): 150 | return "%d,%d<%d,%d,%d>%d %d,%d '%s' '%s' [TableImage(%d bytes): %s]" % ( 151 | self.unk1, self.version, self.unk2, self.unk3, self.unk4, self.tableid, 152 | self.unk7, len(self.fields), 153 | self.tablename, self.abbrev, len(self.tableimage.data), self.tableimage.filename) 154 | 155 | def dump(self, args): 156 | if args.verbose: 157 | print("table: %s" % tohex(self.headerdata)) 158 | 159 | print(str(self)) 160 | 161 | for i, field in enumerate(self.fields): 162 | if args.verbose: 163 | print("field#%2d: %04x - %s" % ( 164 | i, len(field.defdata), tohex(field.defdata))) 165 | print(str(field)) 166 | if args.verbose: 167 | print("remaining: %s" % tohex(self.remainingdata)) 168 | 169 | 170 | class Field: 171 | """ 172 | Contains a single fully decoded value. 173 | """ 174 | def __init__(self, fielddef, data): 175 | self.decode(fielddef, data) 176 | 177 | def decode(self, fielddef, data): 178 | self.typ = fielddef.typ 179 | self.data = data 180 | 181 | if not data: 182 | self.content = "" 183 | return 184 | elif self.typ == 0: 185 | # typ 0 is the recno, or as cronos calls this: Системный номер, systemnumber. 186 | # just convert this to string for presentation 187 | self.content = str(data) 188 | 189 | elif self.typ == 4: 190 | # typ 4 is DATE, formatted like: 191 | try: 192 | data = data.rstrip(b"\x00") 193 | y, m, d = 1900+int(data[:-4]), int(data[-4:-2]), int(data[-2:]) 194 | self.content = "%04d-%02d-%02d" % (y, m, d) 195 | except ValueError: 196 | self.content = str(data) 197 | 198 | elif self.typ == 5: 199 | # typ 5 is TIME, formatted like: 200 | try: 201 | data = data.rstrip(b"\x00") 202 | h, m = int(data[-4:-2]), int(data[-2:]) 203 | self.content = "%02d:%02d" % (h, m) 204 | except ValueError: 205 | self.content = str(data) 206 | 207 | elif self.typ == 6: 208 | # decode internal file reference 209 | rd = ByteReader(data) 210 | self.flag = rd.readdword() 211 | self.remlen = rd.readdword() 212 | self.filename = rd.readtoseperator(b"\x1e").decode("cp1251", 'ignore') 213 | self.extname = rd.readtoseperator(b"\x1e").decode("cp1251", 'ignore') 214 | self.filedatarecord = rd.readtoseperator(b"\x1e").decode("cp1251", 'ignore') 215 | self.content = " ".join([self.filename, self.extname, self.filedatarecord]) 216 | 217 | elif self.typ == 7 or self.typ == 8 or self.typ == 9: 218 | # just hexdump foreign keys 219 | self.content = ashex(data) 220 | 221 | else: 222 | # currently assuming everything else to be strings, which is wrong 223 | self.content = data.rstrip(b"\x00").decode("cp1251", 'ignore') 224 | 225 | 226 | class Record: 227 | """ 228 | Contains a single fully decoded record. 229 | """ 230 | def __init__(self, recno, tabledef, data): 231 | self.decode(recno, tabledef, data) 232 | 233 | def decode(self, recno, tabledef, data): 234 | """ 235 | decode the fields in a record 236 | """ 237 | self.data = data 238 | self.recno = recno 239 | self.table = tabledef 240 | 241 | # start with the record number, or as Cronos calls this: 242 | # the system number, in russian: Системный номер. 243 | self.fields = [ Field(tabledef[0], str(recno)) ] 244 | 245 | rd = ByteReader(data) 246 | for fielddef in tabledef[1:]: 247 | if not rd.eof() and rd.testbyte(0x1b): 248 | # read complex record indicated by b"\x1b" 249 | rd.readbyte() 250 | size = rd.readdword() 251 | fielddata = rd.readbytes(size) 252 | else: 253 | fielddata = rd.readtoseperator(b"\x1e") 254 | 255 | self.fields.append(Field(fielddef, fielddata)) 256 | -------------------------------------------------------------------------------- /crodump/Database.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import os 3 | import re 4 | from sys import stderr 5 | from binascii import b2a_hex 6 | from .readers import ByteReader 7 | from .hexdump import strescape, toout, ashex 8 | from .Datamodel import TableDefinition, Record 9 | from .Datafile import Datafile 10 | import base64 11 | import struct 12 | import crodump.koddecoder 13 | 14 | import sys 15 | if sys.version_info[0] == 2: 16 | sys.exit("cronodump needs python3") 17 | 18 | 19 | class Database: 20 | """represent the entire database, consisting of Stru, Index and Bank files""" 21 | 22 | def __init__(self, dbdir, compact, kod=crodump.koddecoder.new()): 23 | """ 24 | `dbdir` is the directory containing the Cro*.dat and Cro*.tad files. 25 | `compact` if set, the .tad file is not cached in memory, making dumps 15 % slower 26 | `kod` is optionally a KOD coder object. 27 | by default the v3 KOD coding will be used. 28 | """ 29 | self.dbdir = dbdir 30 | self.compact = compact 31 | self.kod = kod 32 | 33 | # Stru+Index+Bank for the components for most databases 34 | self.stru = self.getfile("Stru") 35 | self.index = self.getfile("Index") 36 | self.bank = self.getfile("Bank") 37 | 38 | # the Sys file resides in the "Program Files\Cronos" directory, and 39 | # contains an index of all known databases. 40 | self.sys = self.getfile("Sys") 41 | 42 | def getfile(self, name): 43 | """ 44 | Returns a Datafile object for `name`. 45 | this function expects a `Cro.dat` and a `Cro.tad` file. 46 | When no such files exist, or only one, then None is returned. 47 | 48 | `name` is matched case insensitively 49 | """ 50 | try: 51 | datname = self.getname(name, "dat") 52 | tadname = self.getname(name, "tad") 53 | if datname and tadname: 54 | return Datafile(name, open(datname, "rb"), open(tadname, "rb"), self.compact, self.kod) 55 | except IOError: 56 | return 57 | 58 | def getname(self, name, ext): 59 | """ 60 | Get a case-insensitive filename match for 'name.ext'. 61 | Returns None when no matching file was not found. 62 | """ 63 | basename = "Cro%s.%s" % (name, ext) 64 | for fn in os.listdir(self.dbdir): 65 | if basename.lower() == fn.lower(): 66 | return os.path.join(self.dbdir, fn) 67 | 68 | def dump(self, args): 69 | """ 70 | Calls the `dump` method on all database components. 71 | """ 72 | if self.stru: 73 | self.stru.dump(args) 74 | if self.index: 75 | self.index.dump(args) 76 | if self.bank: 77 | self.bank.dump(args) 78 | if self.sys: 79 | self.sys.dump(args) 80 | 81 | def strudump(self, args): 82 | """ 83 | prints all info found in the CroStru file. 84 | """ 85 | if not self.stru: 86 | print("missing CroStru file") 87 | return 88 | self.dump_db_table_defs(args) 89 | 90 | def decode_db_definition(self, data): 91 | """ 92 | decode the 'bank' / database definition 93 | """ 94 | rd = ByteReader(data) 95 | 96 | d = dict() 97 | while not rd.eof(): 98 | keyname = rd.readname() 99 | if keyname in d: 100 | print("WARN: duplicate key: %s" % keyname) 101 | 102 | index_or_length = rd.readdword() 103 | if index_or_length >> 31: 104 | d[keyname] = rd.readbytes(index_or_length & 0x7FFFFFFF) 105 | else: 106 | refdata = self.stru.readrec(index_or_length) 107 | if refdata[:1] != b"\x04": 108 | print("WARN: expected refdata to start with 0x04") 109 | d[keyname] = refdata[1:] 110 | return d 111 | 112 | def dump_db_definition(self, args, dbdict): 113 | """ 114 | decode the 'bank' / database definition 115 | """ 116 | for k, v in dbdict.items(): 117 | if re.search(b"[^\x0d\x0a\x09\x20-\x7e\xc0-\xff]", v): 118 | print("%-20s - %s" % (k, toout(args, v))) 119 | else: 120 | print('%-20s - "%s"' % (k, strescape(v))) 121 | 122 | def dump_db_table_defs(self, args): 123 | """ 124 | decode the table defs from recid #1, which always has table-id #3 125 | Note that I don't know if it is better to refer to this by recid, or by table-id. 126 | 127 | other table-id's found in CroStru: 128 | #4 -> large values referenced from tableid#3 129 | """ 130 | dbinfo = self.stru.readrec(1) 131 | if dbinfo[:1] != b"\x03": 132 | print("WARN: expected dbinfo to start with 0x03") 133 | dbdef = self.decode_db_definition(dbinfo[1:]) 134 | self.dump_db_definition(args, dbdef) 135 | 136 | for k, v in dbdef.items(): 137 | if k.startswith("Base") and k[4:].isnumeric(): 138 | print("== %s ==" % k) 139 | tbdef = TableDefinition(v, dbdef.get("BaseImage" + k[4:], b'')) 140 | tbdef.dump(args) 141 | elif k == "NS1": 142 | self.dump_ns1(v) 143 | 144 | def dump_ns1(self, data): 145 | if len(data)<2: 146 | print("NS1 is unexpectedly short") 147 | return 148 | unk1, sh, = struct.unpack_from(" %6d, %d, %d:'%s'" % (unk1, sh, serial, unk2, pwlen, password)) 162 | 163 | def enumerate_tables(self, files=False): 164 | """ 165 | yields a TableDefinition object for all `BaseNNN` entries found in CroStru 166 | """ 167 | dbinfo = self.stru.readrec(1) 168 | if dbinfo[:1] != b"\x03": 169 | print("WARN: expected dbinfo to start with 0x03") 170 | try: 171 | dbdef = self.decode_db_definition(dbinfo[1:]) 172 | except Exception as e: 173 | print("ERROR decoding db definition: %s" % e) 174 | print("This could possibly mean that you need to try with the --strucrack option") 175 | return 176 | 177 | for k, v in dbdef.items(): 178 | if k.startswith("Base") and k[4:].isnumeric(): 179 | if files and k[4:] == "000": 180 | yield TableDefinition(v) 181 | if not files and k[4:] != "000": 182 | yield TableDefinition(v, dbdef.get("BaseImage" + k[4:], b'')) 183 | 184 | def enumerate_records(self, table): 185 | """ 186 | Yields a Record object for all records in CroBank matching 187 | the tableid from `table` 188 | 189 | usage: 190 | for tab in db.enumerate_tables(): 191 | for rec in db.enumerate_records(tab): 192 | print(sqlformatter(tab, rec)) 193 | """ 194 | for i in range(self.bank.nrofrecords): 195 | data = self.bank.readrec(i + 1) 196 | if data and data[0] == table.tableid: 197 | try: 198 | yield Record(i + 1, table.fields, data[1:]) 199 | except EOFError: 200 | print("Record %d too short: -- %s" % (i+1, ashex(data)), file=stderr) 201 | except Exception as e: 202 | print("Record %d broken: ERROR '%s' -- %s" % (i+1, e, ashex(data)), file=stderr) 203 | 204 | def enumerate_files(self, table): 205 | """ 206 | Yield all file contents found in CroBank for `table`. 207 | This is most likely the table with id 0. 208 | """ 209 | for i in range(self.bank.nrofrecords): 210 | data = self.bank.readrec(i + 1) 211 | if data and data[0] == table.tableid: 212 | yield i + 1, data[1:] 213 | 214 | def get_record(self, index, asbase64=False): 215 | """ 216 | Retrieve a single record from CroBank with record number `index`. 217 | """ 218 | data = self.bank.readrec(int(index)) 219 | if asbase64: 220 | return base64.b64encode(data[1:]).decode('utf-8') 221 | else: 222 | return data[1:] 223 | 224 | def recdump(self, args): 225 | """ 226 | Function for outputing record contents of the various .dat files. 227 | 228 | This function is mostly useful for reverse-engineering the database format. 229 | """ 230 | if args.index: 231 | dbfile = self.index 232 | elif args.sys: 233 | dbfile = self.sys 234 | elif args.stru: 235 | dbfile = self.stru 236 | else: 237 | dbfile = self.bank 238 | 239 | if not dbfile: 240 | print(".dat not found") 241 | return 242 | nerr = 0 243 | nr_recnone = 0 244 | nr_recempty = 0 245 | tabidxref = [0] * 256 246 | bytexref = [0] * 256 247 | for i in range(1, args.maxrecs + 1): 248 | try: 249 | data = dbfile.readrec(i) 250 | if args.find1d: 251 | if data and (data.find(b"\x1d") > 0 or data.find(b"\x1b") > 0): 252 | print("record with '1d': %d -> %s" % (i, b2a_hex(data))) 253 | break 254 | 255 | elif not args.stats: 256 | if data is None: 257 | print("%5d: " % i) 258 | else: 259 | print("%5d: %s" % (i, toout(args, data))) 260 | else: 261 | if data is None: 262 | nr_recnone += 1 263 | elif not len(data): 264 | nr_recempty += 1 265 | else: 266 | tabidxref[data[0]] += 1 267 | for b in data[1:]: 268 | bytexref[b] += 1 269 | nerr = 0 270 | except IndexError: 271 | break 272 | except Exception as e: 273 | print("%5d: <%s>" % (i, e)) 274 | if args.debug: 275 | raise 276 | nerr += 1 277 | if nerr > 5: 278 | break 279 | 280 | if args.stats: 281 | print("-- table-id stats --, %d * none, %d * empty" % (nr_recnone, nr_recempty)) 282 | for k, v in enumerate(tabidxref): 283 | if v: 284 | print("%5d * %02x" % (v, k)) 285 | print("-- byte stats --") 286 | for k, v in enumerate(bytexref): 287 | if v: 288 | print("%5d * %02x" % (v, k)) 289 | -------------------------------------------------------------------------------- /crodump/crodump.py: -------------------------------------------------------------------------------- 1 | from .kodump import kod_hexdump 2 | from .hexdump import unhex, tohex 3 | from .readers import ByteReader 4 | from .Database import Database 5 | from .Datamodel import TableDefinition 6 | 7 | 8 | def destruct_sys3_def(rd): 9 | # todo 10 | pass 11 | 12 | 13 | def destruct_sys4_def(rd): 14 | """ 15 | decode type 4 of the records found in CroSys. 16 | 17 | This function is only useful for reverse-engineering the CroSys format. 18 | """ 19 | n = rd.readdword() 20 | for _ in range(n): 21 | marker = rd.readdword() 22 | description = rd.readlongstring() 23 | path = rd.readlongstring() 24 | marker2 = rd.readdword() 25 | 26 | print("%08x;%08x: %-50s : %s" % (marker, marker2, path, description)) 27 | 28 | 29 | def destruct_sys_definition(args, data): 30 | """ 31 | Decode the 'sys' / dbindex definition 32 | 33 | This function is only useful for reverse-engineering the CroSys format. 34 | """ 35 | rd = ByteReader(data) 36 | 37 | systype = rd.readbyte() 38 | if systype == 3: 39 | return destruct_sys3_def(rd) 40 | elif systype == 4: 41 | return destruct_sys4_def(rd) 42 | else: 43 | raise Exception("unsupported sys record") 44 | 45 | 46 | def cro_dump(kod, args): 47 | """handle 'crodump' subcommand""" 48 | if args.maxrecs: 49 | args.maxrecs = int(args.maxrecs, 0) 50 | else: 51 | # an arbitrarily large number. 52 | args.maxrecs = 0xFFFFFFFF 53 | 54 | db = Database(args.dbdir, args.compact, kod) 55 | db.dump(args) 56 | 57 | 58 | def stru_dump(kod, args): 59 | """handle 'strudump' subcommand""" 60 | db = Database(args.dbdir, args.compact, kod) 61 | db.strudump(args) 62 | 63 | 64 | def sys_dump(kod, args): 65 | """hexdump all CroSys records""" 66 | # an arbitrarily large number. 67 | args.maxrecs = 0xFFFFFFFF 68 | 69 | db = Database(args.dbdir, args.compact, kod) 70 | if db.sys: 71 | db.sys.dump(args) 72 | 73 | 74 | def rec_dump(kod, args): 75 | """hexdump all records of the specified CroXXX.dat file.""" 76 | if args.maxrecs: 77 | args.maxrecs = int(args.maxrecs, 0) 78 | else: 79 | # an arbitrarily large number. 80 | args.maxrecs = 0xFFFFFFFF 81 | 82 | db = Database(args.dbdir, args.compact, kod) 83 | db.recdump(args) 84 | 85 | 86 | def destruct(kod, args): 87 | """ 88 | decode the index#1 structure information record 89 | Takes hex input from stdin. 90 | """ 91 | import sys 92 | 93 | data = sys.stdin.buffer.read() 94 | data = unhex(data) 95 | 96 | if args.type == 1: 97 | # create a dummy db object 98 | db = Database(".", args.compact) 99 | db.dump_db_definition(args, data) 100 | elif args.type == 2: 101 | tbdef = TableDefinition(data) 102 | tbdef.dump(args) 103 | elif args.type == 3: 104 | destruct_sys_definition(args, data) 105 | 106 | 107 | def strucrack(kod, args): 108 | """ 109 | This function derives the KOD key from the assumption that most bytes in 110 | the CroStru records will be zero, given a sufficient number of CroStru 111 | items, statistically the most common bytes will encode to '0x00' 112 | """ 113 | 114 | # start without 'KOD' table, so we will get the encrypted records 115 | db = Database(args.dbdir, args.compact, None) 116 | if args.sys: 117 | table = db.sys 118 | if not db.sys: 119 | print("no CroSys.dat file found in %s" % args.dbdir) 120 | return 121 | else: 122 | table = db.stru 123 | if not db.stru: 124 | print("no CroStru.dat file found in %s" % args.dbdir) 125 | return 126 | 127 | xref = [ [0]*256 for _ in range(256) ] 128 | for i, data in enumerate(table.enumrecords()): 129 | if not data: continue 130 | for ofs, byte in enumerate(data): 131 | xref[(ofs+i+1)%256][byte] += 1 132 | 133 | KOD = [0] * 256 134 | for i, xx in enumerate(xref): 135 | k, v = max(enumerate(xx), key=lambda kv: kv[1]) 136 | KOD[k] = i 137 | 138 | if not args.silent: 139 | print(tohex(bytes(KOD))) 140 | 141 | return KOD 142 | 143 | def dbcrack(kod, args): 144 | """ 145 | This function derives the KOD key from the assumption that most records in CroIndex 146 | and CroBank will be compressed, and start with: 147 | uint16 size 148 | byte 0x08 149 | byte 0x00 150 | 151 | So because the fourth byte in each record will be 0x00 when kod-decoded, I can 152 | use this as the inverse of the KOD table, adjusting for record-index. 153 | 154 | """ 155 | # start without 'KOD' table, so we will get the encrypted records 156 | db = Database(args.dbdir, args.compact, None) 157 | xref = [ [0]*256 for _ in range(256) ] 158 | 159 | for dbfile in db.bank, db.index: 160 | if not dbfile: 161 | print("no data file found in %s" % args.dbdir) 162 | return 163 | for i in range(1, min(10000, dbfile.nrofrecords)): 164 | rec = dbfile.readrec(i) 165 | if rec and len(rec)>11: 166 | xref[(i+3)%256][rec[3]] += 1 167 | 168 | KOD = [0] * 256 169 | for i, xx in enumerate(xref): 170 | k, v = max(enumerate(xx), key=lambda kv: kv[1]) 171 | KOD[k] = i 172 | 173 | if not args.silent: 174 | print(tohex(bytes(KOD))) 175 | 176 | return KOD 177 | 178 | 179 | def main(): 180 | import argparse 181 | 182 | parser = argparse.ArgumentParser(description="CRO hexdumper") 183 | subparsers = parser.add_subparsers(title='commands', 184 | help='Use the --help option for the individual sub commands for more details') 185 | parser.set_defaults(handler=lambda *args: parser.print_help()) 186 | parser.add_argument("--debug", action="store_true", help="break on exceptions") 187 | parser.add_argument("--kod", type=str, help="specify custom KOD table") 188 | parser.add_argument("--strucrack", action="store_true", help="infer the KOD sbox from CroStru.dat") 189 | parser.add_argument("--dbcrack", action="store_true", help="infer the KOD sbox from CroBank.dat + CroIndex.dat") 190 | parser.add_argument("--nokod", "-n", action="store_true", help="don't KOD decode") 191 | parser.add_argument("--compact", action="store_true", help="save memory by not caching the index, note: increases convert time by factor 1.15") 192 | 193 | p = subparsers.add_parser("kodump", help="KOD/hex dumper") 194 | p.add_argument("--offset", "-o", type=str, default="0") 195 | p.add_argument("--length", "-l", type=str) 196 | p.add_argument("--width", "-w", type=str) 197 | p.add_argument("--endofs", "-e", type=str) 198 | p.add_argument("--nokod", "-n", action="store_true", help="don't KOD decode") 199 | p.add_argument("--unhex", "-x", action="store_true", help="assume the input contains hex data") 200 | p.add_argument("--shift", "-s", type=str, help="KOD decode with the specified shift") 201 | p.add_argument("--increment", "-i", action="store_true", 202 | help="assume data is already KOD decoded, but with wrong shift -> dump alternatives.") 203 | p.add_argument("--ascdump", "-a", action="store_true", help="CP1251 asc dump of the data") 204 | p.add_argument("--invkod", "-I", action="store_true", help="KOD encode") 205 | p.add_argument("filename", type=str, nargs="?", help="dump either stdin, or the specified file") 206 | p.set_defaults(handler=kod_hexdump) 207 | 208 | p = subparsers.add_parser("crodump", help="CROdumper") 209 | p.add_argument("--verbose", "-v", action="store_true") 210 | p.add_argument("--ascdump", "-a", action="store_true") 211 | p.add_argument("--maxrecs", "-m", type=str, help="max nr or recots to output") 212 | p.add_argument("--nodecompress", action="store_false", dest="decompress", default="true") 213 | p.add_argument("dbdir", type=str) 214 | p.set_defaults(handler=cro_dump) 215 | 216 | p = subparsers.add_parser("sysdump", help="SYSdumper") 217 | p.add_argument("--verbose", "-v", action="store_true") 218 | p.add_argument("--ascdump", "-a", action="store_true") 219 | p.add_argument("--nodecompress", action="store_false", dest="decompress", default="true") 220 | p.add_argument("dbdir", type=str) 221 | p.set_defaults(handler=sys_dump) 222 | 223 | p = subparsers.add_parser("recdump", help="record dumper") 224 | p.add_argument("--verbose", "-v", action="store_true") 225 | p.add_argument("--ascdump", "-a", action="store_true") 226 | p.add_argument("--maxrecs", "-m", type=str, help="max nr or recots to output") 227 | p.add_argument("--find1d", action="store_true", help="Find records with 0x1d in it") 228 | p.add_argument("--stats", action="store_true", help="calc table stats from the first byte of each record",) 229 | p.add_argument("--index", action="store_true", help="dump CroIndex") 230 | p.add_argument("--stru", action="store_true", help="dump CroIndex") 231 | p.add_argument("--bank", action="store_true", help="dump CroBank") 232 | p.add_argument("--sys", action="store_true", help="dump CroSys") 233 | p.add_argument("dbdir", type=str) 234 | p.set_defaults(handler=rec_dump) 235 | 236 | p = subparsers.add_parser("strudump", help="STRUdumper") 237 | p.add_argument("--verbose", "-v", action="store_true") 238 | p.add_argument("--ascdump", "-a", action="store_true") 239 | p.add_argument("dbdir", type=str) 240 | p.set_defaults(handler=stru_dump) 241 | 242 | p = subparsers.add_parser("destruct", help="Stru dumper") 243 | p.add_argument("--verbose", "-v", action="store_true") 244 | p.add_argument("--ascdump", "-a", action="store_true") 245 | p.add_argument("--type", "-t", type=int, help="what type of record to destruct") 246 | p.set_defaults(handler=destruct) 247 | 248 | p = subparsers.add_parser("strucrack", help="Crack v4 KOD encrypion, bypassing the need for the database password.") 249 | p.add_argument("--sys", action="store_true", help="Use CroSys for cracking") 250 | p.add_argument("--silent", action="store_true", help="no output") 251 | p.add_argument("dbdir", type=str) 252 | p.set_defaults(handler=strucrack) 253 | 254 | p = subparsers.add_parser("dbcrack", help="Crack v4 KOD encrypion, bypassing the need for the database password.") 255 | p.add_argument("--silent", action="store_true", help="no output") 256 | p.add_argument("dbdir", type=str) 257 | p.set_defaults(handler=dbcrack) 258 | 259 | args = parser.parse_args() 260 | 261 | import crodump.koddecoder 262 | if args.kod: 263 | if len(args.kod)!=512: 264 | raise Exception("--kod should have a 512 hex digit argument") 265 | kod = crodump.koddecoder.new(list(unhex(args.kod))) 266 | elif args.nokod: 267 | kod = None 268 | elif args.strucrack: 269 | class Cls: pass 270 | cargs = Cls() 271 | cargs.dbdir = args.dbdir 272 | cargs.sys = False 273 | cargs.silent = True 274 | cracked = strucrack(None, cargs) 275 | if not cracked: 276 | return 277 | kod = crodump.koddecoder.new(cracked) 278 | elif args.dbcrack: 279 | class Cls: pass 280 | cargs = Cls() 281 | cargs.dbdir = args.dbdir 282 | cargs.sys = False 283 | cargs.silent = True 284 | cracked = dbcrack(None, cargs) 285 | if not cracked: 286 | return 287 | kod = crodump.koddecoder.new(cracked) 288 | else: 289 | kod = crodump.koddecoder.new() 290 | 291 | if args.handler: 292 | args.handler(kod, args) 293 | 294 | 295 | if __name__ == "__main__": 296 | main() 297 | -------------------------------------------------------------------------------- /crodump/Datafile.py: -------------------------------------------------------------------------------- 1 | import io 2 | import struct 3 | import zlib 4 | from .hexdump import tohex, toout 5 | import crodump.koddecoder 6 | 7 | class Datafile: 8 | """Represent a single .dat with it's .tad index file""" 9 | 10 | def __init__(self, name, dat, tad, compact, kod): 11 | self.name = name 12 | self.dat = dat 13 | self.tad = tad 14 | self.compact = compact 15 | 16 | self.readdathdr() 17 | self.readtad() 18 | 19 | self.dat.seek(0, io.SEEK_END) 20 | self.datsize = self.dat.tell() 21 | 22 | self.kod = kod if not kod or self.isencrypted() else crodump.koddecoder.new() 23 | 24 | def isencrypted(self): 25 | return self.version in (b'01.04', b'01.05') or self.isv4() 26 | 27 | def isv3(self): 28 | # 01.02: 32 bit file offsets 29 | # 01.03: 64 bit file offsets 30 | # 01.04: encrypted?, 32bit 31 | # 01.05: encrypted?, 64bit 32 | return self.version in (b'01.02', b'01.03', b'01.04', b'01.05') 33 | 34 | def isv4(self): 35 | # 01.11 v4 ( 64bit ) 36 | # 01.14 v4 ( 64bit ), encrypted? 37 | # 01.13 ?? I have not seen this version anywhere yet. 38 | return self.version in (b'01.11', b'01.13', b'01.14') 39 | 40 | def isv7(self): 41 | # 01.19 ?? I have not seen this version anywhere yet. 42 | return self.version in (b'01.19',) 43 | 44 | def readdathdr(self): 45 | """ 46 | Read the .dat file header. 47 | Note that the 19 byte header if followed by 0xE9 random bytes, generated by 48 | 'srand(time())' followed by 0xE9 times obfuscate(rand()) 49 | """ 50 | self.dat.seek(0) 51 | hdrdata = self.dat.read(19) 52 | 53 | ( 54 | magic, # +00 8 bytes 55 | self.hdrunk, # +08 uint16 56 | self.version, # +0a 5 bytes 57 | self.encoding, # +0f uint16 58 | self.blocksize, # +11 uint16 59 | ) = struct.unpack("<8sH5sHH", hdrdata) 60 | 61 | if magic != b"CroFile\x00": 62 | print("unknown magic: ", magic) 63 | raise Exception("not a Crofile") 64 | self.use64bit = self.version in (b"01.03", b"01.05", b"01.11") 65 | 66 | # blocksize 67 | # 0040 -> Bank 68 | # 0400 -> Index or Sys 69 | # 0200 -> Stru or Sys 70 | 71 | # encoding 72 | # bit0 = 'KOD encoded' 73 | # bit1 = compressed 74 | 75 | def readtad(self): 76 | """ 77 | read and decode the .tad file. 78 | """ 79 | self.tad.seek(0) 80 | if self.isv3(): 81 | hdrdata = self.tad.read(2 * 4) 82 | self.nrdeleted, self.firstdeleted = struct.unpack("<2L", hdrdata) 83 | elif self.isv4(): 84 | hdrdata = self.tad.read(4 * 4) 85 | unk1, self.nrdeleted, self.firstdeleted, unk2 = struct.unpack("<4L", hdrdata) 86 | else: 87 | raise Exception("unsupported .tad version") 88 | 89 | self.tadhdrlen = self.tad.tell() 90 | self.tadentrysize = 16 if self.use64bit else 12 91 | if self.compact: 92 | self.tad.seek(0, io.SEEK_END) 93 | else: 94 | self.idxdata = self.tad.read() 95 | self.tadsize = self.tad.tell() - self.tadhdrlen 96 | self.nrofrecords = self.tadsize // self.tadentrysize 97 | if self.tadsize % self.tadentrysize: 98 | print("WARN: leftover data in .tad") 99 | 100 | def tadidx(self, idx): 101 | """ 102 | If we're not supposed to be more compact but slower, lookup from a cached .tad 103 | """ 104 | if self.compact: 105 | return self.tadidx_seek(idx) 106 | 107 | if self.use64bit: 108 | # 01.03 and 01.11 have 64 bit file offsets 109 | return struct.unpack_from("> 24 149 | ln &= 0xFFFFFFF 150 | elif self.isv4(): 151 | flags = ofs >> 56 152 | ofs &= (1<<56)-1 153 | 154 | dat = self.readdata(ofs, ln) 155 | 156 | if not dat: 157 | # empty record 158 | encdat = dat 159 | elif not flags: 160 | if self.use64bit: 161 | extofs, extlen = struct.unpack(" o: 202 | yield o, start - o 203 | o = end 204 | if o < filesize: 205 | yield o, filesize - o 206 | 207 | def dump(self, args): 208 | """ 209 | Dump decodes all data referenced from the .tad file. 210 | And optionally print out all unreferenced byte ranges in the .dat file. 211 | 212 | This function is mostly useful for reverse-engineering the database format. 213 | 214 | the `args` object controls how data is decoded. 215 | """ 216 | print("hdr: %-6s dat: %04x %s enc:%04x bs:%04x, tad: %08x %08x" % ( 217 | self.name, self.hdrunk, self.version, 218 | self.encoding, self.blocksize, 219 | self.nrdeleted, self.firstdeleted)) 220 | 221 | ranges = [] # keep track of used bytes in the .dat file. 222 | 223 | for i in range(self.nrofrecords): 224 | (ofs, ln, chk) = self.tadidx(i) 225 | idx = i + 1 226 | if args.maxrecs and i==args.maxrecs: 227 | break 228 | if ln == 0xFFFFFFFF: 229 | print("%5d: %08x %08x %08x" % (idx, ofs, ln, chk)) 230 | continue 231 | 232 | if self.isv3(): 233 | flags = ln >> 24 234 | ln &= 0xFFFFFFF 235 | elif self.isv4(): 236 | flags = ofs >> 56 237 | # 04 --> data, v3compdata 238 | # 02,03 --> deleted 239 | # 00 --> extrec 240 | ofs &= (1<<56)-1 241 | 242 | dat = self.readdata(ofs, ln) 243 | ranges.append((ofs, ofs + ln, "item #%d" % i)) 244 | decflags = [" ", " "] 245 | infostr = "" 246 | tail = b"" 247 | 248 | if not dat: 249 | # empty record 250 | encdat = dat 251 | elif not flags: 252 | if self.use64bit: 253 | extofs, extlen = struct.unpack("HH", data, o) 311 | if flag != 0x800 and flag != 0x008: 312 | return 313 | o += size + 2 314 | return True 315 | 316 | def decompress(self, data): 317 | """ 318 | Decompress a record. 319 | 320 | Compressed records can have several chunks of compressed data. 321 | Note that the compression header uses a mix of big-endian and little numbers. 322 | 323 | each chunk has the following format: 324 | size - big endian uint16, size of flag + crc + compdata 325 | flag - big endian uint16 - always 0x800 326 | crc - little endian uint32, crc32 of the decompressed data 327 | the final chunk has only 3 bytes: a zero size followed by a 2. 328 | 329 | the crc algorithm is the one labeled 'crc-32' on this page: 330 | http://crcmod.sourceforge.net/crcmod.predefined.html 331 | """ 332 | result = b"" 333 | o = 0 334 | while o < len(data) - 3: 335 | # note the mix of bigendian and little endian numbers here. 336 | size, flag = struct.unpack_from(">HH", data, o) 337 | storedcrc, = struct.unpack_from(" large record 71 | uint32 checksum // but sometimes just 0x00000000, 0x00000001 or 0x00000002 72 | 73 | versions `01.03`, `01.05` and `01.11` use 64 bit offsets: 74 | 75 | uint64 offset 76 | uint32 size // with flag in upper bit, 0 -> large record 77 | uint32 checksum // but sometimes just 0x00000000, 0x00000001 or 0x00000002 78 | 79 | where size can be 0xffffffff (indicating a free/deleted block). 80 | Bit 31 of the size indicates that this is an extended record. 81 | 82 | Extended records start with plaintext: { uint32 offset, uint32 size } or { uint64 offset, uint32 size } 83 | 84 | 85 | ## the 'old format' 86 | 87 | The original description made it look like there were different formats for the block references. 88 | 89 | This was found in previously existing documentation, but no sample databases with this format were found so far. 90 | 91 | If the .dat file has a version of 01.03 or later, the corresponding .tad file looks like this: 92 | 93 | uint32_t offset 94 | uint32_t size // with flag in upper bit, 0 -> large record 95 | uint32_t checksum // but sometimes just 0x00000000, 0x00000001 or 0x00000002 96 | uint32_t unknownn // mostly 0 97 | 98 | The old description would also assume 12 byte reference blocks but a packed struct, probably if the CroFile version is 01.01. 99 | 100 | uint32 offset1 101 | uint16 size1 102 | uint32 offset2 103 | uint16 size2 104 | 105 | with the first chunk read from offset1 with length size1 and potentially more parts with total length of size2 starting at file offset offset2 with the first `uint32` of the 256 byte chunk being the next chunk's offset and a maximum of 252 bytes being actual data. 106 | 107 | However, I never found files with .tad like that. Also the original description insisted on those chunks needing the decode-magic outlined below, but the python implementation only does that for CroStru files and still seems to produce results. 108 | 109 | ## CroStru 110 | 111 | Interesting files are CroStru.dat containing metadata on the database within blocks whose size and length are found in CroStru.tad. These blocks are rotated byte wise using an sbox found in the cro2sql sources and then each byte is incremented by a one byte counter which is initialised by a per block offset. The sbox looks like this: 112 | 113 | unsigned char kod[256] = { 114 | 0x08, 0x63, 0x81, 0x38, 0xa3, 0x6b, 0x82, 0xa6, 115 | 0x18, 0x0d, 0xac, 0xd5, 0xfe, 0xbe, 0x15, 0xf6, 116 | 0xa5, 0x36, 0x76, 0xe2, 0x2d, 0x41, 0xb5, 0x12, 117 | 0x4b, 0xd8, 0x3c, 0x56, 0x34, 0x46, 0x4f, 0xa4, 118 | 0xd0, 0x01, 0x8b, 0x60, 0x0f, 0x70, 0x57, 0x3e, 119 | 0x06, 0x67, 0x02, 0x7a, 0xf8, 0x8c, 0x80, 0xe8, 120 | 0xc3, 0xfd, 0x0a, 0x3a, 0xa7, 0x73, 0xb0, 0x4d, 121 | 0x99, 0xa2, 0xf1, 0xfb, 0x5a, 0xc7, 0xc2, 0x17, 122 | 0x96, 0x71, 0xba, 0x2a, 0xa9, 0x9a, 0xf3, 0x87, 123 | 0xea, 0x8e, 0x09, 0x9e, 0xb9, 0x47, 0xd4, 0x97, 124 | 0xe4, 0xb3, 0xbc, 0x58, 0x53, 0x5f, 0x2e, 0x21, 125 | 0xd1, 0x1a, 0xee, 0x2c, 0x64, 0x95, 0xf2, 0xb8, 126 | 0xc6, 0x33, 0x8d, 0x2b, 0x1f, 0xf7, 0x25, 0xad, 127 | 0xff, 0x7f, 0x39, 0xa8, 0xbf, 0x6a, 0x91, 0x79, 128 | 0xed, 0x20, 0x7b, 0xa1, 0xbb, 0x45, 0x69, 0xcd, 129 | 0xdc, 0xe7, 0x31, 0xaa, 0xf0, 0x65, 0xd7, 0xa0, 130 | 0x32, 0x93, 0xb1, 0x24, 0xd6, 0x5b, 0x9f, 0x27, 131 | 0x42, 0x85, 0x07, 0x44, 0x3f, 0xb4, 0x11, 0x68, 132 | 0x5e, 0x49, 0x29, 0x13, 0x94, 0xe6, 0x1b, 0xe1, 133 | 0x7d, 0xc8, 0x2f, 0xfa, 0x78, 0x1d, 0xe3, 0xde, 134 | 0x50, 0x4e, 0x89, 0xb6, 0x30, 0x48, 0x0c, 0x10, 135 | 0x05, 0x43, 0xce, 0xd3, 0x61, 0x51, 0x83, 0xda, 136 | 0x77, 0x6f, 0x92, 0x9d, 0x74, 0x7c, 0x04, 0x88, 137 | 0x86, 0x55, 0xca, 0xf4, 0xc1, 0x62, 0x0e, 0x28, 138 | 0xb7, 0x0b, 0xc0, 0xf5, 0xcf, 0x35, 0xc5, 0x4c, 139 | 0x16, 0xe0, 0x98, 0x00, 0x9b, 0xd9, 0xae, 0x03, 140 | 0xaf, 0xec, 0xc9, 0xdb, 0x6d, 0x3b, 0x26, 0x75, 141 | 0x3d, 0xbd, 0xb2, 0x4a, 0x5d, 0x6c, 0x72, 0x40, 142 | 0x7e, 0xab, 0x59, 0x52, 0x54, 0x9c, 0xd2, 0xe9, 143 | 0xef, 0xdd, 0x37, 0x1e, 0x8f, 0xcb, 0x8a, 0x90, 144 | 0xfc, 0x84, 0xe5, 0xf9, 0x14, 0x19, 0xdf, 0x6e, 145 | 0x23, 0xc4, 0x66, 0xeb, 0xcc, 0x22, 0x1c, 0x5c, 146 | }; 147 | 148 | 149 | given the `shift`, the encoded data: `a[0]..a[n-1]` and the decoded data: `b[0]..b[n-1]`, the encoding works as follows: 150 | 151 | decode: b[i] = KOD[a[i]] - (i+shift) 152 | encode: a[i] = INV[b[i] + (i+shift)] 153 | 154 | 155 | The original description of an older database format called the per block counter start offset 'sistN' which seems to imply it to be constant for certain entries. They correspond to a "system number" of meta entries visible in the database software. For encoded records this is their primary key. 156 | 157 | In noticed that the first 256 bytes of CroStru.dat look close to identical (except the first 16 bytes) than CroBank.dat. 158 | 159 | The toplevel table-id for CroStru and CroSys is #3, while referenced records have tableid #4. 160 | 161 | ## CroBank 162 | 163 | CroBank.dat contains the actual database entries for multiple tables as described in the CroStru file. After each chunk is re-assembled (and potentially decoded with the per block offset being the record number in the .tad file). 164 | 165 | Its first byte defines, which table it belongs to. It is encoded in cp1251 (or possibly IBM866) with actual column data separated by 0x1e. 166 | 167 | There is an extra concept of sub fields in those columns, indicated by a 0x1d byte. 168 | 169 | Fields of field types 6 and 9 start with an 0x1b byte, followed by a uint32 size of the actual fields. It may then contain further 0x1e bytes which indicate sub field separators. 170 | 171 | If used for field type 6, the field begins with two uint32 (the first one mostly 0x00000001, the second one the size of the next strings) followed by three 0x1e separated strings containing file name, file extension and system number of the actual file record data referred to by this record. 172 | 173 | ## structure definitions 174 | 175 | records start numbering at '1'. 176 | Names are stored as: `byte strlen + char value[strlen]` 177 | 178 | The first entry contains: 179 | 180 | uint8 181 | array { 182 | Name keyname 183 | uint32 index_or_size; // size when bit31 is set. 184 | uint8 data[size] 185 | } 186 | 187 | this results in a dictionary, with keys like: `Bank`, `BankId`, `BankTable`, `Base`nnn, etc. 188 | 189 | the `Base000` entry contains the record number for the table definition of the first table. 190 | 191 | ## table definitions 192 | 193 | uint16 unk1 194 | union { 195 | uint8 shortversion; // 1 196 | uint16 version; // >1 197 | } 198 | uint8 somelen; // 5 or 9 199 | struct { 200 | uint8 unk3 201 | uint32 unk4 // not there when 'somelen'==5 202 | uint32 unk5 203 | } 204 | uint32 tableid 205 | Name tablename 206 | Name abbreviation 207 | uint32 unk7 208 | uint32 nrfields 209 | 210 | array { 211 | uint16 entrysize -- total nr of bytes in this entry. 212 | uint16 fieldtype // see below 213 | uint32 fieldindex1 // presentation index (i.e. where in the UI it shows) 214 | Name fieldname 215 | uint32 flags 216 | uint8 alwaysone // maybe the 'minvalue' 217 | uint32 fieldindex2 // serialization index (i.e. where in the record in the .dat it appears) 218 | uint32 fieldsize // max fieldsize 219 | uint32 unk4 220 | ... 221 | followed by remaining unknown bytes 222 | } fields[nrfields] 223 | 224 | uint32 extradatstr // amount of unknown length indexed data strings between field definition blocks 225 | array { 226 | uint16 datalen 227 | uint8[datalen] 228 | } datastrings[extradatstr] 229 | 230 | uint32 unk8 231 | uint8 fielddefblock // always 2, probably the number of this block of field definitions 232 | uint32 unk9 233 | 234 | uint32 nrextrafields 235 | array { 236 | ... as above 237 | } extrafields[nrextrafields] 238 | 239 | followed by remaining unknown bytes 240 | ... 241 | 242 | 243 | In order to have field definitions for all the fields in a record from the .dat for that table, 244 | fields.append(extrafields) must be sorted by their fieldindex2. 245 | 246 | ## field types 247 | 248 | The interface gives a list of field types I can select for table columns: 249 | 250 | * 0 - Системный номер = Primary Key ID 251 | * 1 - Числовое = Numeric 252 | * 2 - Текстовое = Text 253 | * 3 - Словарное = Dictionary 254 | * 4 - Дата = Date 255 | * 5 - Время = Time 256 | * 6 - Фаил = File (internal) 257 | * 29 - Внеэшний фаил = File (external) 258 | * 7 - Прямая ссылка = Direkt link 259 | * 8 - Обратная ссылка = Back link 260 | * 9 - Прямаяь-Обратная ссылка = Direct-Reverse link 261 | * 17 - Связь по полю = Field communication 262 | 263 | Other unassigned values in the table entry definition are 264 | 265 | * Dictionary Base (defaults to 0) 266 | * номер в записи = number in the record 267 | * Длина Поля = Field size 268 | * Flags: 269 | * (0x2000) Множественное = Multiple 270 | * (0x0800) Информативное = Informative 271 | * (0x0040) Некорректируемое = Uncorrectable 272 | * (0x1000) поиск на вводе = input search 273 | * (?) симбольное = symbolic 274 | * (?) Лемматизировать = Lemmatize 275 | * (?) поиск по значениям = search by values 276 | * (0x0200) замена непустого значения = replacement of a non-empty value 277 | * (0x0100) замена значения = value replacement 278 | * (0x0004) автозаполнения = autocomplete 279 | * (?) корневая связь = root connection 280 | * (?) допускать дубли = allow doubles 281 | * (0x0002) обязательное = obligatory 282 | 283 | ## compressed records 284 | 285 | some records are compressed, the format is like this: 286 | 287 | multiple-chunks { 288 | uint16 size; // stored in bigendian format. 289 | uint8 head[2] = { 8, 0 } 290 | uint32 crc32 291 | uint8 compdata[size-6] 292 | } 293 | uint8 tail[3] = { 0, 0, 2 } 294 | 295 | 296 | # v4 format 297 | 298 | The header version 01.11 indicates a database created with cronos v4.x. 299 | 300 | ## .tad 301 | 302 | A 4 dword header: 303 | 304 | dword -2 305 | dword nr deleted 306 | dword first deleted 307 | dword 0 308 | 309 | 16 byte records: 310 | qword offset, with flags in upper 8 bits. 311 | dword size 312 | dword unk 313 | 314 | flags: 315 | 02,03 - deleted record. 316 | 04 - compressed { int16be size; int16be flag int32le crc; byte data[size-6]; } 00 00 02 317 | 00 - extended record 318 | 319 | ## .dat 320 | 321 | The .dat file of a 01.11 database has 64bit offsets, like the 01.03 file format. 322 | 323 | --------------------------------------------------------------------------------