├── crodump
    ├── __init__.py
    ├── kodump.py
    ├── hexdump.py
    ├── koddecoder.py
    ├── readers.py
    ├── dumpdbfields.py
    ├── croconvert.py
    ├── Datamodel.py
    ├── Database.py
    ├── crodump.py
    └── Datafile.py
├── test_data
    └── all_field_types
    │   ├── CroIndex.tad
    │   ├── Voc
    │       ├── CroIndex.tad
    │       ├── CroBank.dat
    │       ├── CroBank.tad
    │       ├── CroStru.dat
    │       ├── CroStru.tad
    │       └── CroIndex.dat
    │   ├── CroBank.dat
    │   ├── CroBank.tad
    │   ├── CroIndex.dat
    │   ├── CroStru.dat
    │   └── CroStru.tad
├── bin
    ├── crodump
    └── croconvert
├── .gitignore
├── templates
    ├── postgres.j2
    └── html.j2
├── LICENSE
├── setup.py
├── README.md
└── docs
    └── cronos-research.md


/crodump/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test_data/all_field_types/CroIndex.tad:
--------------------------------------------------------------------------------
1 |         


--------------------------------------------------------------------------------
/test_data/all_field_types/Voc/CroIndex.tad:
--------------------------------------------------------------------------------
1 |         


--------------------------------------------------------------------------------
/bin/crodump:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | BINPATH=`dirname $0`
4 | export PYTHONPATH="$BINPATH/.."
5 | python3 -mcrodump.crodump "$@"
6 | 


--------------------------------------------------------------------------------
/bin/croconvert:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | BINPATH=`dirname $0`
4 | export PYTHONPATH="$BINPATH/.."
5 | python3 -mcrodump.croconvert "$@"
6 | 


--------------------------------------------------------------------------------
/test_data/all_field_types/CroBank.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alephdata/cronodump/HEAD/test_data/all_field_types/CroBank.dat


--------------------------------------------------------------------------------
/test_data/all_field_types/CroBank.tad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alephdata/cronodump/HEAD/test_data/all_field_types/CroBank.tad


--------------------------------------------------------------------------------
/test_data/all_field_types/CroIndex.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alephdata/cronodump/HEAD/test_data/all_field_types/CroIndex.dat


--------------------------------------------------------------------------------
/test_data/all_field_types/CroStru.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alephdata/cronodump/HEAD/test_data/all_field_types/CroStru.dat


--------------------------------------------------------------------------------
/test_data/all_field_types/CroStru.tad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alephdata/cronodump/HEAD/test_data/all_field_types/CroStru.tad


--------------------------------------------------------------------------------
/test_data/all_field_types/Voc/CroBank.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alephdata/cronodump/HEAD/test_data/all_field_types/Voc/CroBank.dat


--------------------------------------------------------------------------------
/test_data/all_field_types/Voc/CroBank.tad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alephdata/cronodump/HEAD/test_data/all_field_types/Voc/CroBank.tad


--------------------------------------------------------------------------------
/test_data/all_field_types/Voc/CroStru.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alephdata/cronodump/HEAD/test_data/all_field_types/Voc/CroStru.dat


--------------------------------------------------------------------------------
/test_data/all_field_types/Voc/CroStru.tad:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alephdata/cronodump/HEAD/test_data/all_field_types/Voc/CroStru.tad


--------------------------------------------------------------------------------
/test_data/all_field_types/Voc/CroIndex.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alephdata/cronodump/HEAD/test_data/all_field_types/Voc/CroIndex.dat


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # Distribution / packaging
 7 | .Python
 8 | build/
 9 | develop-eggs/
10 | dist/
11 | downloads/
12 | eggs/
13 | .eggs/
14 | lib/
15 | lib64/
16 | parts/
17 | sdist/
18 | var/
19 | wheels/
20 | share/python-wheels/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 | MANIFEST
25 | 
26 | # Installer logs
27 | pip-log.txt
28 | pip-delete-this-directory.txt
29 | 
30 | # PyBuilder
31 | .pybuilder/
32 | target/
33 | 
34 | # Jupyter Notebook
35 | .ipynb_checkpoints
36 | 
37 | # IPython
38 | profile_default/
39 | ipython_config.py
40 | 
41 | # Environments
42 | .env
43 | .venv
44 | env/
45 | venv/
46 | ENV/
47 | env.bak/
48 | venv.bak/
49 | 


--------------------------------------------------------------------------------
/templates/postgres.j2:
--------------------------------------------------------------------------------
 1 | {% for table in db.enumerate_tables(files=False) %}
 2 | 
 3 | CREATE TABLE "{{ table.tablename | replace('"', '_') }}" (
 4 |     {%- for field in table.fields %}
 5 |         "{{ field.name | replace('"', '_') }}" {{ field.sqltype() -}}
 6 |         {{- ", " if not loop.last else "" -}}
 7 |     {%- endfor %}
 8 | );
 9 | 
10 | INSERT INTO "{{ table.tablename | replace('"', '_') }}" VALUES
11 |     {%- for record in db.enumerate_records( table ) %}
12 |         ( {%- for field in record.fields -%}
13 |             '{{ field.content | replace("'", "''") }}' {{- ", " if not loop.last else "" -}}
14 |         {%- endfor -%}
15 |         )
16 |         {{- ", " if not loop.last else "" -}}
17 |     {%- endfor %}
18 | ;
19 | 
20 | {% endfor %}
21 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2021 Organized Crime and Corruption Reporting Project
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | setup(
 3 |     name = "cronodump",
 4 |     version = "1.1.0",
 5 |     entry_points = {
 6 |         'console_scripts': [
 7 |             'croconvert=crodump.croconvert:main',
 8 |             'crodump=crodump.crodump:main',
 9 |         ],
10 |     },
11 |     packages = ['crodump'],
12 |     author = "Willem Hengeveld, Dirk Engling",
13 |     author_email = "itsme@xs4all.nl, erdgeist@erdgeist.org",
14 |     description = "Tool and library for extracting data from Cronos databases.",
15 |     long_description_content_type='text/markdown',
16 |     long_description = """
17 | The cronodump utility can parse most of the databases created by the [CronosPro](https://www.cronos.ru/) database software
18 | and dump it to several output formats.
19 | 
20 | The software is popular among Russian public offices, companies and police agencies.
21 | 
22 | Example usage:
23 | 
24 |     croconvert --csv <yourdbpath>
25 | 
26 | Will create a .csv dump of all records in your database.
27 | 
28 | or:
29 | 
30 |     crodump strudump <yourdbpath>
31 | 
32 | Will print details on the internal definitions of the tables present in your database.
33 | 
34 | For more details see the [README.md](https://github.com/alephdata/cronodump/blob/master/README.md) file.
35 | """,
36 |     license = "MIT",
37 |     keywords = "cronos dataconversion databaseexport",
38 |     url = "https://github.com/alephdata/cronodump/",
39 |     classifiers = [
40 |         'Environment :: Console',
41 |         'Intended Audience :: End Users/Desktop',
42 |         'Intended Audience :: Developers',
43 |         'License :: OSI Approved :: MIT License',
44 |         'Operating System :: OS Independent',
45 |         'Programming Language :: Python :: 3.7',
46 |         'Topic :: Utilities',
47 |         'Topic :: Database',
48 |     ],
49 |     python_requires = '>=3.7',
50 |     extras_require={ 'templates': ['Jinja2'] },
51 | )
52 | 


--------------------------------------------------------------------------------
/templates/html.j2:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="utf-8">
 5 |     <title>Cronos Database Dump</title>
 6 |   </head>
 7 |   <body>
 8 |   {% for table in db.enumerate_tables(files=True) %}
 9 |     <table>
10 |       <caption>{{ table.tablename | e }}</caption>
11 |       <thead>
12 |         <tr>
13 |         {%- for field in table.fields %}
14 |           <th>{{ field.name | e }}</th>
15 |         {%- endfor %}
16 |           <th>Data</th>
17 |         </tr>
18 |       </thead>
19 |       <tbody>
20 |       {% for system_number, file in db.enumerate_files(table) %}
21 |         <tr>
22 |           <td>{{ system_number | e }}</td>
23 |           <td><a href="data:application/x-binary;base64,{{ base64.b64encode( file ).decode('utf-8') }}">File content</a></td>
24 |         <tr>
25 |       {% endfor %}
26 |       </tbody>
27 |     </table>
28 |   {% endfor %}
29 |   {% for table in db.enumerate_tables(files=False) %}
30 |     {%- if table.tableimage -%}
31 |       <img src="data:image;base64,{{ base64.b64encode( table.tableimage.data ).decode('utf-8') }}"/>
32 |     {%- endif -%}
33 |     <table>
34 |       <caption>{{ table.tablename | e }}</caption>
35 |       <thead>
36 |         <tr>
37 |         {%- for field in table.fields %}
38 |           <th>{{ field.name | e }}</th>
39 |         {%- endfor %}
40 |         </tr>
41 |       </thead>
42 |       <tbody>
43 |         {%- for record in db.enumerate_records( table ) %}
44 |         <tr>
45 |           {%- for field in record.fields %}
46 |             {%- if field.typ == 6 and field.content -%}
47 |             <td><a download="{{ field.filename }}.{{ field.extname }}" href="data:application/x-binary;base64,{{ db.get_record( field.filedatarecord, True ) }}">{{ field.filename | e }}.{{ field.extname | e }}</a></td>
48 |             {%- else -%}
49 |             <td>{{ field.content | e }}</td>
50 |             {%- endif -%}
51 |           {%- endfor %}
52 |         </tr>
53 |         {%- endfor %}
54 |       </tbody>
55 |     </table>
56 |     {% endfor %}
57 |   </body>
58 | </html>
59 | 


--------------------------------------------------------------------------------
/crodump/kodump.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module has the functions for the 'kodump' subcommand from the 'crodump' script.
 3 | """
 4 | from .hexdump import unhex, toout, hexdump
 5 | import io
 6 | import struct
 7 | 
 8 | 
 9 | def decode_kod(kod, args, data):
10 |     """
11 |     various methods of hexdumping KOD decoded data.
12 |     """
13 |     if args.nokod:
14 |         # plain hexdump, no KOD decode
15 |         hexdump(args.offset, data, args)
16 | 
17 |     elif args.shift:
18 |         # explicitly specified shift.
19 |         args.shift = int(args.shift, 0)
20 |         enc = kod.decode(args.shift, data)
21 |         hexdump(args.offset, enc, args)
22 |     elif args.increment:
23 | 
24 |         def incdata(data, s):
25 |             """
26 |             add 's' to each byte.
27 |             This is useful for finding the correct shift from an incorrectly shifted chunk.
28 |             """
29 |             return b"".join(struct.pack("<B", (_ + s) & 0xFF) for _ in data)
30 | 
31 |         # explicitly specified shift.
32 |         for s in range(256):
33 |             enc = incdata(data, s)
34 |             print("%02x: %s" % (s, toout(args, enc)))
35 |     else:
36 |         # output with all possible 'shift' values.
37 |         for s in range(256):
38 |             if args.invkod:
39 |                 enc = kod.encode(s, data)
40 |             else:
41 |                 enc = kod.decode(s, data)
42 |             print("%02x: %s" % (s, toout(args, enc)))
43 | 
44 | 
45 | def kod_hexdump(kod, args):
46 |     """
47 |     handle the `kodump` subcommand, KOD decode a section of a data file
48 | 
49 |     This function is mostly useful for reverse-engineering the database format.
50 |     """
51 |     args.offset = int(args.offset, 0)
52 |     if args.length:
53 |         args.length = int(args.length, 0)
54 |     elif args.endofs:
55 |         args.endofs = int(args.endofs, 0)
56 |         args.length = args.endofs - args.offset
57 | 
58 |     if args.width:
59 |         args.width = int(args.width, 0)
60 |     else:
61 |         args.width = 64 if args.ascdump else 16
62 | 
63 |     if args.filename:
64 |         with open(args.filename, "rb") as fh:
65 |             if args.length is None:
66 |                 fh.seek(0, io.SEEK_END)
67 |                 filesize = fh.tell()
68 |                 args.length = filesize - args.offset
69 |             fh.seek(args.offset)
70 |             data = fh.read(args.length)
71 |             decode_kod(kod, args, data)
72 |     else:
73 |         # no filename -> read from stdin.
74 |         import sys
75 | 
76 |         data = sys.stdin.buffer.read()
77 |         if args.unhex:
78 |             data = unhex(data)
79 |         decode_kod(kod, args, data)
80 | 
81 | 
82 | 


--------------------------------------------------------------------------------
/crodump/hexdump.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Several functions for converting bytes to readable text or hex bytes.
 3 | """
 4 | import struct
 5 | from binascii import b2a_hex, a2b_hex
 6 | 
 7 | 
 8 | def unhex(data):
 9 |     """
10 |     convert a possibly space separated list of 2-digit hex values to a byte-array
11 |     """
12 |     if type(data) == bytes:
13 |         data = data.decode("ascii")
14 |     data = data.replace(" ", "")
15 |     data = data.strip()
16 |     return a2b_hex(data)
17 | 
18 | 
19 | def ashex(line):
20 |     """
21 |     convert a byte-array to a space separated list of 2-digit hex values.
22 |     """
23 |     return " ".join("%02x" % _ for _ in line)
24 | 
25 | 
26 | def aschr(b):
27 |     """
28 |     convert a CP-1251 byte to a unicode character.
29 |     This will make both cyrillic and latin text readable.
30 |     """
31 |     if 32 <= b < 0x7F:
32 |         return "%c" % b
33 |     elif 0x80 <= b <= 0xFF:
34 |         try:
35 |             c = struct.pack("<B", b).decode("cp1251")
36 |             if c:
37 |                 return c
38 |         except UnicodeDecodeError:
39 |             # 0x98 is the only invalid cp1251 character.
40 |             pass
41 |     return "."
42 | 
43 | 
44 | def asasc(line):
45 |     """
46 |     convert a CP-1251 encoded byte-array to a line of unicode characters.
47 |     """
48 |     return "".join(aschr(_) for _ in line)
49 | 
50 | 
51 | def hexdump(ofs, data, args):
52 |     """
53 |     Output offset prefixed lines of hex + ascii characters.
54 |     """
55 |     w = args.width
56 |     if args.ascdump:
57 |         fmt = "%08x: %s"
58 |     else:
59 |         fmt = "%%08x: %%-%ds  %%s" % (3 * w - 1)
60 |     for o in range(0, len(data), w):
61 |         if args.ascdump:
62 |             print(fmt % (o + ofs, asasc(data[o:o+w])))
63 |         else:
64 |             print(fmt % (o + ofs, ashex(data[o:o+w]), asasc(data[o:o+w])))
65 | 
66 | 
67 | def tohex(data):
68 |     """
69 |     Convert a byte-array to a sequence of 2-digit hex values without separators.
70 |     """
71 |     return b2a_hex(data).decode("ascii")
72 | 
73 | 
74 | def toout(args, data):
75 |     """
76 |     Return either ascdump or hexdump, depending on the `args.ascdump` flag.
77 |     """
78 |     if args.ascdump:
79 |         return asasc(data)
80 |     else:
81 |         return tohex(data)
82 | 
83 | 
84 | def strescape(txt):
85 |     """
86 |     Convert bytes or text to a c-style escaped string.
87 |     """
88 |     if type(txt) == bytes:
89 |         txt = txt.decode("cp1251")
90 |     txt = txt.replace("\\", "\\\\")
91 |     txt = txt.replace("\n", "\\n")
92 |     txt = txt.replace("\r", "\\r")
93 |     txt = txt.replace("\t", "\\t")
94 |     txt = txt.replace('"', '\\"')
95 |     return txt
96 | 


--------------------------------------------------------------------------------
/crodump/koddecoder.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Decode CroStru KOD encoding.
 3 | """
 4 | INITIAL_KOD = [
 5 |     0x08, 0x63, 0x81, 0x38, 0xA3, 0x6B, 0x82, 0xA6, 0x18, 0x0D, 0xAC, 0xD5, 0xFE, 0xBE, 0x15, 0xF6,
 6 |     0xA5, 0x36, 0x76, 0xE2, 0x2D, 0x41, 0xB5, 0x12, 0x4B, 0xD8, 0x3C, 0x56, 0x34, 0x46, 0x4F, 0xA4,
 7 |     0xD0, 0x01, 0x8B, 0x60, 0x0F, 0x70, 0x57, 0x3E, 0x06, 0x67, 0x02, 0x7A, 0xF8, 0x8C, 0x80, 0xE8,
 8 |     0xC3, 0xFD, 0x0A, 0x3A, 0xA7, 0x73, 0xB0, 0x4D, 0x99, 0xA2, 0xF1, 0xFB, 0x5A, 0xC7, 0xC2, 0x17,
 9 |     0x96, 0x71, 0xBA, 0x2A, 0xA9, 0x9A, 0xF3, 0x87, 0xEA, 0x8E, 0x09, 0x9E, 0xB9, 0x47, 0xD4, 0x97,
10 |     0xE4, 0xB3, 0xBC, 0x58, 0x53, 0x5F, 0x2E, 0x21, 0xD1, 0x1A, 0xEE, 0x2C, 0x64, 0x95, 0xF2, 0xB8,
11 |     0xC6, 0x33, 0x8D, 0x2B, 0x1F, 0xF7, 0x25, 0xAD, 0xFF, 0x7F, 0x39, 0xA8, 0xBF, 0x6A, 0x91, 0x79,
12 |     0xED, 0x20, 0x7B, 0xA1, 0xBB, 0x45, 0x69, 0xCD, 0xDC, 0xE7, 0x31, 0xAA, 0xF0, 0x65, 0xD7, 0xA0,
13 |     0x32, 0x93, 0xB1, 0x24, 0xD6, 0x5B, 0x9F, 0x27, 0x42, 0x85, 0x07, 0x44, 0x3F, 0xB4, 0x11, 0x68,
14 |     0x5E, 0x49, 0x29, 0x13, 0x94, 0xE6, 0x1B, 0xE1, 0x7D, 0xC8, 0x2F, 0xFA, 0x78, 0x1D, 0xE3, 0xDE,
15 |     0x50, 0x4E, 0x89, 0xB6, 0x30, 0x48, 0x0C, 0x10, 0x05, 0x43, 0xCE, 0xD3, 0x61, 0x51, 0x83, 0xDA,
16 |     0x77, 0x6F, 0x92, 0x9D, 0x74, 0x7C, 0x04, 0x88, 0x86, 0x55, 0xCA, 0xF4, 0xC1, 0x62, 0x0E, 0x28,
17 |     0xB7, 0x0B, 0xC0, 0xF5, 0xCF, 0x35, 0xC5, 0x4C, 0x16, 0xE0, 0x98, 0x00, 0x9B, 0xD9, 0xAE, 0x03,
18 |     0xAF, 0xEC, 0xC9, 0xDB, 0x6D, 0x3B, 0x26, 0x75, 0x3D, 0xBD, 0xB2, 0x4A, 0x5D, 0x6C, 0x72, 0x40,
19 |     0x7E, 0xAB, 0x59, 0x52, 0x54, 0x9C, 0xD2, 0xE9, 0xEF, 0xDD, 0x37, 0x1E, 0x8F, 0xCB, 0x8A, 0x90,
20 |     0xFC, 0x84, 0xE5, 0xF9, 0x14, 0x19, 0xDF, 0x6E, 0x23, 0xC4, 0x66, 0xEB, 0xCC, 0x22, 0x1C, 0x5C,
21 | ]
22 | 
23 | 
24 | class KODcoding:
25 |     """
26 |     class handing KOD encoding and decoding, optionally
27 |     with a user specified KOD table.
28 |     """
29 |     def __init__(self, initial=INITIAL_KOD):
30 |         self.kod = [_ for _ in initial]
31 | 
32 |         # calculate the inverse table.
33 |         self.inv = [0 for _ in initial]
34 |         for i, x in enumerate(self.kod):
35 |             self.inv[x] = i
36 | 
37 |     def decode(self, o, data):
38 |         """
39 |         decode : shift, a[0]..a[n-1] -> b[0]..b[n-1]
40 |             b[i] = KOD[a[i]]- (i+shift)
41 |         """
42 |         return bytes((self.kod[b] - i - o) % 256 for i, b in enumerate(data))
43 | 
44 |     def encode(self, o, data):
45 |         """
46 |         encode : shift, b[0]..b[n-1] -> a[0]..a[n-1]
47 |             a[i] = INV[b[i]+ (i+shift)]
48 |         """
49 |         return bytes(self.inv[(b + i + o) % 256] for i, b in enumerate(data))
50 | 
51 | 
52 | def new(*args):
53 |     """
54 |     create a KODcoding object with the specified arguments.
55 |     """
56 |     return KODcoding(*args)
57 | 
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/crodump/readers.py:
--------------------------------------------------------------------------------
 1 | import struct
 2 | 
 3 | 
 4 | class ByteReader:
 5 |     """
 6 |     The ByteReader object is used when decoding various variable sized structures.
 7 |     all functions raise EOFError when attempting to read beyond the end of the buffer.
 8 | 
 9 |     functions starting with `read` advance the current position.
10 |     """
11 |     def __init__(self, data):
12 |         self.data = data
13 |         self.o = 0
14 | 
15 |     def readbyte(self):
16 |         """
17 |         Reads a single byte
18 |         """
19 |         if self.o + 1 > len(self.data):
20 |             raise EOFError()
21 |         self.o += 1
22 |         return struct.unpack_from("<B", self.data, self.o - 1)[0]
23 | 
24 |     def testbyte(self, bytevalue):
25 |         """
26 |         returns True when the current bytes matches `bytevalue`.
27 |         """
28 |         if self.o + 1 > len(self.data):
29 |             raise EOFError()
30 |         return self.data[self.o] == bytevalue
31 | 
32 |     def readword(self):
33 |         """
34 |         Reads a 16 bit unsigned little endian value
35 |         """
36 |         if self.o + 2 > len(self.data):
37 |             raise EOFError()
38 |         self.o += 2
39 |         return struct.unpack_from("<H", self.data, self.o - 2)[0]
40 | 
41 |     def readdword(self):
42 |         """
43 |         Reads a 32 bit unsigned little endian value
44 |         """
45 |         if self.o + 4 > len(self.data):
46 |             raise EOFError()
47 |         self.o += 4
48 |         return struct.unpack_from("<L", self.data, self.o - 4)[0]
49 | 
50 |     def readbytes(self, n=None):
51 |         """
52 |         Reads the specified number of bytes, or
53 |         when no size was specified, the remaining bytes in the buffer
54 |         """
55 |         if n is None:
56 |             n = len(self.data) - self.o
57 |         if self.o + n > len(self.data):
58 |             raise EOFError()
59 |         self.o += n
60 |         return self.data[self.o-n:self.o]
61 | 
62 |     def readlongstring(self):
63 |         """
64 |         Reads a cp1251 encoded string prefixed with a dword sized length
65 |         """
66 |         namelen = self.readdword()
67 |         return self.readbytes(namelen).decode("cp1251")
68 | 
69 |     def readname(self):
70 |         """
71 |         Reads a cp1251 encoded string prefixed with a byte sized length
72 |         """
73 |         namelen = self.readbyte()
74 |         return self.readbytes(namelen).decode("cp1251")
75 | 
76 |     def readtoseperator(self, sep):
77 |         """
78 |         reads bytes upto a bytes sequence matching `sep`.
79 |         when no `sep` is found, return the remaining bytes in the buffer.
80 |         """
81 |         if self.o > len(self.data):
82 |             raise EOFError()
83 |         oldoff = self.o
84 |         off = self.data.find(sep, self.o)
85 |         if off >= 0:
86 |             self.o = off + len(sep)
87 |             return self.data[oldoff:off]
88 |         else:
89 |             self.o = len(self.data)
90 |             return self.data[oldoff:]
91 | 
92 |     def eof(self):
93 |         """
94 |         return True when the current position is at or beyond the end of the buffer.
95 |         """
96 |         return self.o >= len(self.data)
97 | 


--------------------------------------------------------------------------------
/crodump/dumpdbfields.py:
--------------------------------------------------------------------------------
 1 | """
 2 | `dumpdbfields` demonstrates how to enumerate tables and records.
 3 | """
 4 | import os
 5 | import os.path
 6 | from .Database import Database
 7 | from .crodump import strucrack, dbcrack
 8 | from .hexdump import unhex
 9 | 
10 | 
11 | def processargs(args):
12 |     for dbpath in args.dbdirs:
13 |         if args.recurse:
14 |             for path, _, files in os.walk(dbpath):
15 |                 # check if there is a crostru file in this directory.
16 |                 if any(_ for _ in files if _.lower() == "crostru.dat"):
17 |                     yield path
18 |         else:
19 |             yield dbpath
20 | 
21 | 
22 | def main():
23 |     import argparse
24 | 
25 |     parser = argparse.ArgumentParser(description="db field dumper")
26 |     parser.add_argument("--kod", type=str, help="specify custom KOD table")
27 |     parser.add_argument("--strucrack", action="store_true", help="infer the KOD sbox from CroStru.dat")
28 |     parser.add_argument("--dbcrack", action="store_true", help="infer the KOD sbox from CroIndex.dat+CroBank.dat")
29 |     parser.add_argument("--nokod", "-n", action="store_true", help="don't KOD decode")
30 |     parser.add_argument("--maxrecs", "-m", type=int, default=100)
31 |     parser.add_argument("--recurse", "-r", action="store_true")
32 |     parser.add_argument("--verbose", "-v", action="store_true")
33 |     parser.add_argument("dbdirs", type=str, nargs='*')
34 |     args = parser.parse_args()
35 | 
36 |     for path in processargs(args):
37 |         try:
38 |             import crodump.koddecoder
39 |             if args.kod:
40 |                 if len(args.kod)!=512:
41 |                     raise Exception("--kod should have a 512 hex digit argument")
42 |                 kod = crodump.koddecoder.new(list(unhex(args.kod)))
43 |             elif args.nokod:
44 |                 kod = None
45 |             elif args.strucrack:
46 |                 class Cls: pass
47 |                 cargs = Cls()
48 |                 cargs.dbdir = path
49 |                 cargs.sys = False
50 |                 cargs.silent = True
51 |                 cracked = strucrack(None, cargs)
52 |                 if not cracked:
53 |                     return
54 |                 kod = crodump.koddecoder.new(cracked)
55 |             elif args.dbcrack:
56 |                 class Cls: pass
57 |                 cargs = Cls()
58 |                 cargs.dbdir = path
59 |                 cargs.sys = False
60 |                 cargs.silent = True
61 |                 cracked = dbcrack(None, cargs)
62 |                 if not cracked:
63 |                     return
64 |                 kod = crodump.koddecoder.new(cracked)
65 |             else:
66 |                 kod = crodump.koddecoder.new()
67 | 
68 |             db = Database(path, kod)
69 |             for tab in db.enumerate_tables():
70 |                 tab.dump(args)
71 |                 print("nr of records: %d" % db.bank.nrofrecords)
72 |                 i = 0
73 |                 for rec in db.enumerate_records(tab):
74 |                     for field, fielddef in zip(rec.fields, tab.fields):
75 |                         print(">> %s -- %s" % (fielddef, field.content))
76 |                     i += 1
77 |                     if i > args.maxrecs:
78 |                         break
79 |         except Exception as e:
80 |             print("ERROR: %s" % e)
81 | 
82 | 
83 | if __name__ == "__main__":
84 |     main()
85 | 


--------------------------------------------------------------------------------
/crodump/croconvert.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Commandline tool which convert a cronos database to .csv, .sql or .html.
  3 | 
  4 | python3 croconvert.py -t html chechnya_proverki_ul_2012/
  5 | """
  6 | from .Database import Database
  7 | from .crodump import strucrack, dbcrack
  8 | from .hexdump import unhex
  9 | from sys import exit, stdout
 10 | from os.path import dirname, abspath, join
 11 | from os import mkdir, chdir
 12 | from datetime import datetime
 13 | import base64
 14 | import csv
 15 | 
 16 | 
 17 | def template_convert(kod, args):
 18 |     """looks up template to convert to, parses the database and passes it to jinja2"""
 19 |     try:
 20 |         from jinja2 import Environment, FileSystemLoader
 21 |     except ImportError:
 22 |         exit(
 23 |             "Fatal: Jinja templating engine not found. Install using pip install jinja2"
 24 |         )
 25 | 
 26 |     db = Database(args.dbdir, args.compact, kod)
 27 | 
 28 |     template_dir = join(dirname(dirname(abspath(__file__))), "templates")
 29 |     j2_env = Environment(loader=FileSystemLoader(template_dir))
 30 |     j2_templ = j2_env.get_template(args.template + ".j2")
 31 |     j2_templ.stream(db=db, base64=base64).dump(stdout)
 32 | 
 33 | 
 34 | def safepathname(name):
 35 |     return name.replace(':', '_').replace('/', '_').replace('\\', '_')
 36 | 
 37 | 
 38 | def csv_output(kod, args):
 39 |     """creates a directory with the current timestamp and in it a set of CSV or TSV
 40 |        files with all the tables found and an extra directory with all the files"""
 41 |     db = Database(args.dbdir, args.compact, kod)
 42 | 
 43 |     mkdir(args.outputdir)
 44 |     chdir(args.outputdir)
 45 | 
 46 |     filereferences = []
 47 | 
 48 |     # first dump all non-file tables
 49 |     for table in db.enumerate_tables(files=False):
 50 |         tablesafename = safepathname(table.tablename) + ".csv"
 51 | 
 52 |         with open(tablesafename, 'w', encoding='utf-8') as csvfile:
 53 |             writer = csv.writer(csvfile, delimiter=args.delimiter, escapechar='\\')
 54 |             writer.writerow([field.name for field in table.fields])
 55 | 
 56 |             # Record should be iterable over its fields, so we could use writerows
 57 |             for record in db.enumerate_records(table):
 58 |                 writer.writerow([field.content for field in record.fields])
 59 | 
 60 |                 filereferences.extend([field for field in record.fields if field.typ == 6])
 61 | 
 62 |     # Write all files from the file table. This is useful for unreferenced files
 63 |     for table in db.enumerate_tables(files=True):
 64 |         filedir = "Files-" + table.abbrev
 65 |         mkdir(filedir)
 66 | 
 67 |         for system_number, content in db.enumerate_files(table):
 68 |             with open(join(filedir, str(system_number)), "wb") as binfile:
 69 |                 binfile.write(content)
 70 | 
 71 |     if len(filereferences):
 72 |         filedir = "Files-Referenced"
 73 |         mkdir(filedir)
 74 | 
 75 |     # Write all referenced files with their filename and extension intact
 76 |     for reffile in filereferences:
 77 |         if reffile.content:             # only print when file is not NULL
 78 |             filesafename = safepathname(reffile.filename) + "." + safepathname(reffile.extname)
 79 |             content = db.get_record(reffile.filedatarecord)
 80 |             with open(join("Files-Referenced", filesafename), "wb") as binfile:
 81 |                 binfile.write(content)
 82 | 
 83 | 
 84 | def main():
 85 |     import argparse
 86 | 
 87 |     parser = argparse.ArgumentParser(description="CRONOS database converter")
 88 |     parser.add_argument("--template", "-t", type=str, default="html",
 89 |                         help="output template to use for conversion")
 90 |     parser.add_argument("--csv", "-c", action='store_true', help="create output in .csv format")
 91 |     parser.add_argument("--delimiter", "-d", default=",", help="delimiter used in csv output")
 92 |     parser.add_argument("--outputdir", "-o", type=str, help="directory to create the dump in")
 93 |     parser.add_argument("--kod", type=str, help="specify custom KOD table")
 94 |     parser.add_argument("--compact", action="store_true", help="save memory by not caching the index, note: increases convert time by factor 1.15")
 95 |     parser.add_argument("--strucrack", action="store_true", help="infer the KOD sbox from CroStru.dat")
 96 |     parser.add_argument("--dbcrack", action="store_true", help="infer the KOD sbox from CroIndex.dat+CroBank.dat")
 97 |     parser.add_argument("--nokod", "-n", action="store_true", help="don't KOD decode")
 98 |     parser.add_argument("dbdir", type=str)
 99 |     args = parser.parse_args()
100 | 
101 |     import crodump.koddecoder
102 |     if args.kod:
103 |         if len(args.kod)!=512:
104 |             raise Exception("--kod should have a 512 hex digit argument")
105 |         kod = crodump.koddecoder.new(list(unhex(args.kod)))
106 |     elif args.nokod:
107 |         kod = None
108 |     elif args.strucrack:
109 |         class Cls: pass
110 |         cargs = Cls()
111 |         cargs.dbdir = args.dbdir
112 |         cargs.sys = False
113 |         cargs.silent = True
114 |         cracked = strucrack(None, cargs)
115 |         if not cracked:
116 |             return
117 |         kod = crodump.koddecoder.new(cracked)
118 |     elif args.dbcrack:
119 |         class Cls: pass
120 |         cargs = Cls()
121 |         cargs.dbdir = args.dbdir
122 |         cargs.sys = False
123 |         cargs.silent = True
124 |         cracked = dbcrack(None, cargs)
125 |         if not cracked:
126 |             return
127 |         kod = crodump.koddecoder.new(cracked)
128 |     else:
129 |         kod = crodump.koddecoder.new()
130 | 
131 |     if args.csv:
132 |         if not args.outputdir:
133 |             args.outputdir = "cronodump"+datetime.now().strftime("-%Y-%m-%d-%H-%M-%S-%f")
134 |         csv_output(kod, args)
135 |     else:
136 |         template_convert(kod, args)
137 | 
138 | 
139 | if __name__ == "__main__":
140 |     main()
141 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # cronodump
  2 | 
  3 | The cronodump utility can parse most of the databases created by the [CronosPro](https://www.cronos.ru/) database software
  4 | and dump it to several output formats.
  5 | 
  6 | The software is popular among Russian public offices, companies and police agencies.
  7 | 
  8 | 
  9 | # Quick start
 10 | 
 11 | In its simplest form, without any dependencies, the croconvert command creates a [CSV](https://en.wikipedia.org/wiki/Comma-separated_values) representation of all the database's tables and a copy of all files contained in the database:
 12 | 
 13 | ```bash
 14 | bin/croconvert --csv test_data/all_field_types
 15 | ```
 16 | 
 17 | By default it creates a `cronodump-YYYY-mm-DD-HH-MM-SS-ffffff/` directory containing CSV files for each table found. It will under this directory also create a `Files-FL/` directory containing all the files stored in the Database, regardless if they are (still) referenced in any data table. All files that are actually referenced (and thus are known by their filename) will be stored under the `Files-Referenced` directory. With the `--outputdir` option you can chose your own dump location.
 18 | 
 19 | When you get an error message, or just unreadable data, chances are your database is protected. You may need to look into the `--dbcrack` or `--strucrack` options, explained below.
 20 | 
 21 | 
 22 | # Templates
 23 | 
 24 | The croconvert command can use the powerful [jinja templating framework](https://jinja.palletsprojects.com/en/3.0.x/) to render more file formats like PostgreSQL and HTML.
 25 | The default action for `croconvert` is to convert the database using the `html` template.
 26 | Use
 27 | 
 28 | ```bash
 29 | python3 -m venv ./venc
 30 | . venv/bin/activate
 31 | pip install jinja2
 32 | bin/croconvert test_data/all_field_types > test_data.html
 33 | ```
 34 | 
 35 | to dump an HTML file with all tables found in the database, files listed and ready for download as inlined [data URI](https://en.wikipedia.org/wiki/Data_URI_scheme) and all table images inlined as well. Note that the resulting HTML file can be huge for large databases, causing a lot of load on browsers when trying to open them.
 36 | 
 37 | 
 38 | The `-t postgres` command will dump the table schemes and records as valid `CREATE TABLE` and `INSERT INTO` statements to stdout. This dump can then be imported in a PostgreSQL database. Note that the backslash character is not escaped and thus the [`standard_conforming_strings`](https://www.postgresql.org/docs/current/runtime-config-compatible.html#GUC-STANDARD-CONFORMING-STRINGS) option should be off.
 39 | 
 40 | Pull requests for [more templates supporting other output types](/templates) are welcome.
 41 | 
 42 | 
 43 | # Inspection
 44 | 
 45 | There's a `bin/crodump` tool to further investigate databases. This might be useful for extracting metadata like path names of table image files or input and output forms. Not all metadata has yet been completely reverse engineered, so some experience with understanding binary dumps might be required.
 46 | 
 47 | The crodump script has a plethora of options but in the most basic for the `strudump` sub command will provide a rich variety of metadata to look further:
 48 | 
 49 | ```bash
 50 | bin/crodump strudump -v -a test_data/all_field_types/
 51 | ```
 52 | The `-a` option tells strudump to output ascii instead of a hexdump.
 53 | 
 54 | For a low level dump of the database contents, use:
 55 | ```bash
 56 | bin/crodump crodump -v  test_data/all_field_types/
 57 | ```
 58 | The `-v` option tells crodump to include all unused byte ranges, this may be useful when identifying deleted records.
 59 | 
 60 | For a bit higher level dump of the database contents, use:
 61 | ```bash
 62 | bin/crodump recdump  test_data/all_field_types/
 63 | ```
 64 | This will print a hexdump of all records for all tables.
 65 | 
 66 | 
 67 | ## decoding password protected databases
 68 | 
 69 | Cronos v4 and higher are able to password protect databases, the protection works
 70 | by modifying the KOD sbox. `cronodump` has two methods of deriving the KOD sbox from
 71 | a database:
 72 | 
 73 | Both these methods are statistics based operations, it may not always
 74 | yield the correct KOD sbox.
 75 | 
 76 | 
 77 | ### 1. strudump
 78 | 
 79 | When the database has a sufficiently large CroStru.dat file,
 80 | it is easy to derive the nodified KOD-sbox from the CroStru file, the `--strucrack` option
 81 | will do this. 
 82 | 
 83 |     crodump --strucrack  recdump <dbpath>
 84 | 
 85 | ### 2. dbdump
 86 | 
 87 | When the Bank and Index files are compressed, we can derive the KOD sbox by inspecting
 88 | the fourth byte of each record, which should decode to a zero.
 89 | 
 90 | The `--dbcrack` option will do this.
 91 | 
 92 |     crodump --dbcrack  recdump <dbpath>
 93 | 
 94 | 
 95 | # Installing
 96 | 
 97 | `cronodump` requires python 3.7 or later. It has been tested on Linux, MacOS and Windows.
 98 | There is one optional requirement: the `Jinja2` templating engine, but it will install fine without.
 99 | 
100 | There are several ways of installing `cronodump`:
101 | 
102 |  * You can run `cronodump` directly from the cloned git repository, by using the shell scripts in the `bin` subdirectory.
103 |  * You can install `cronodump` in your python environment by ruinning: `python setup.py  build install`.
104 |  * You can install `cronodump` from the public [pypi repository](https://pypi.org/project/cronodump/) with `pip install cronodump`.
105 |  * You can install `cronodump` with the `Jinja2` templating engine from the public [pypi repository](https://pypi.org/project/cronodump/) with `pip install cronodump[templates]`.
106 | 
107 | 
108 | # Terminology
109 | 
110 | We decided to use the more common terminology for database, tables, records, etc.
111 | Here is a table showing how cronos calls these:
112 | 
113 | | what | cronos english | cronos russian
114 | |:------ |:------ |:------ 
115 | | Database  |  Bank   | Банк 
116 | | Table     |  Base   | Базы
117 | | Record    |  Record | Записи
118 | | Field     |  Field  | поля
119 | | recid     |  System Number | Системный номер
120 | 
121 | 
122 | # License
123 | 
124 | cronodump is released under the [MIT license](LICENSE).
125 | 
126 | 
127 | # References
128 | 
129 | cronodump builds upon [documentation of the file format found in older versions of Cronos](http://sergsv.narod.ru/cronos.htm) and
130 | the [subsequent implementation of a parser for the old file format](https://github.com/occrp/cronosparser) but dropped the heuristic
131 | approach to guess offsets and obfuscation parameters for a more rigid parser. Refer to [the docs](docs/cronos-research.md) for further
132 | details.
133 | 


--------------------------------------------------------------------------------
/crodump/Datamodel.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from .hexdump import tohex, ashex
  3 | from .readers import ByteReader
  4 | 
  5 | 
  6 | class FieldDefinition:
  7 |     """
  8 |     Contains the properties for a single field in a record.
  9 |     """
 10 |     def __init__(self, data):
 11 |         self.decode(data)
 12 | 
 13 |     def decode(self, data):
 14 |         self.defdata = data
 15 | 
 16 |         rd = ByteReader(data)
 17 |         self.typ = rd.readword()
 18 |         self.idx1 = rd.readdword()
 19 |         self.name = rd.readname()
 20 |         self.flags = rd.readdword()
 21 |         self.minval = rd.readbyte()  # Always 1
 22 |         if self.typ:
 23 |             self.idx2 = rd.readdword()
 24 |             self.maxval = rd.readdword()  # max value or length
 25 |             self.unk4 = rd.readdword()  # Always 0x00000009 or 0x0001000d
 26 |         else:
 27 |             self.idx2 = 0
 28 |             self.maxval = self.unk4 = None
 29 |         self.remaining = rd.readbytes()
 30 | 
 31 |     def __str__(self):
 32 |         if self.typ:
 33 |             return "Type: %2d (%2d/%2d) %04x,(%d-%4d),%04x - %-40s -- %s" % (
 34 |                     self.typ, self.idx1, self.idx2,
 35 |                     self.flags, self.minval, self.maxval, self.unk4,
 36 |                     "'%s'" % self.name, tohex(self.remaining))
 37 |         else:
 38 |             return "Type: %2d %2d    %d,%d       - '%s'" % (
 39 |                     self.typ, self.idx1, self.flags, self.minval, self.name)
 40 | 
 41 |     def sqltype(self):
 42 |         return { 0: "INTEGER PRIMARY KEY",
 43 |                  1: "INTEGER",
 44 |                  2: "VARCHAR(" + str(self.maxval) + ")",
 45 |                  3: "TEXT",          # dictionaray
 46 |                  4: "DATE",
 47 |                  5: "TIMESTAMP",
 48 |                  6: "TEXT",          # file reference
 49 |         }.get(self.typ, "TEXT")
 50 | 
 51 | 
 52 | class TableImage:
 53 |     def __init__(self, data):
 54 |         self.decode(data)
 55 | 
 56 |     def decode(self, data):
 57 |         if not len(data):
 58 |             self.filename = "none"
 59 |             self.data = b''
 60 |             return
 61 | 
 62 |         rd = ByteReader(data)
 63 | 
 64 |         _ = rd.readbyte()
 65 |         namelen = rd.readdword()
 66 |         self.filename = rd.readbytes(namelen).decode("cp1251", 'ignore')
 67 | 
 68 |         imagelen = rd.readdword()
 69 |         self.data = rd.readbytes(imagelen)
 70 | 
 71 | 
 72 | class TableDefinition:
 73 |     def __init__(self, data, image=''):
 74 |         self.decode(data, image)
 75 | 
 76 |     def decode(self, data, image):
 77 |         """
 78 |         decode the 'base' / table definition
 79 |         """
 80 |         rd = ByteReader(data)
 81 | 
 82 |         self.unk1 = rd.readword()
 83 |         self.version = rd.readbyte()
 84 |         if self.version > 1:
 85 |             _ = rd.readbyte()  # always 0 anyway
 86 | 
 87 |         # if this is not 5 (but 9), there's another 4 bytes inserted, this could be a length-byte.
 88 |         self.unk2 = rd.readbyte()
 89 | 
 90 |         self.unk3 = rd.readbyte()
 91 |         if self.unk2 > 5:  # seen only 5 and 9 for now with 9 implying an extra dword
 92 |             _ = rd.readdword()
 93 |         self.unk4 = rd.readdword()
 94 | 
 95 |         self.tableid = rd.readdword()
 96 | 
 97 |         self.tablename = rd.readname()
 98 |         self.abbrev = rd.readname()
 99 |         self.unk7 = rd.readdword()
100 |         nrfields = rd.readdword()
101 | 
102 |         self.headerdata = data[: rd.o]
103 | 
104 |         # There's (at least) two blocks describing fields, ended when encountering ffffffff
105 |         self.fields = []
106 |         for _ in range(nrfields):
107 |             deflen = rd.readword()
108 |             fielddef = rd.readbytes(deflen)
109 |             self.fields.append(FieldDefinition(fielddef))
110 | 
111 |         # Between the first and the second block, there's some byte strings inbetween, count
112 |         # given in first dword
113 |         self.extraunkdatastrings = rd.readdword()
114 | 
115 |         for _ in range(self.extraunkdatastrings):
116 |             datalen = rd.readword()
117 |             skip = rd.readbytes(datalen)
118 | 
119 |         try:
120 |             # Then there's another unknow dword and then (probably section indicator) 02 byte
121 |             self.unk8_ = rd.readdword()
122 |             if rd.readbyte() != 2:
123 |                 print("Warning: FieldDefinition Section 2 not marked with a 2")
124 |             self.unk9 = rd.readdword()
125 | 
126 |             # Then there's the amount of extra fields in the second section
127 |             nrextrafields = rd.readdword()
128 | 
129 |             for _ in range(nrextrafields):
130 |                 deflen = rd.readword()
131 |                 fielddef = rd.readbytes(deflen)
132 |                 self.fields.append(FieldDefinition(fielddef))
133 |         except Exception as e:
134 |             print("Warning: Error '%s' parsing FieldDefinitions" % e)
135 | 
136 |         try:
137 |             self.terminator = rd.readdword()
138 |         except EOFError:
139 |             print("Warning: FieldDefinition section not terminated")
140 |         except Exception as e:
141 |             print("Warning: Error '%s' parsing Tabledefinition" % e)
142 | 
143 |         self.fields.sort(key=lambda field: field.idx2)
144 | 
145 |         self.remainingdata = rd.readbytes()
146 | 
147 |         self.tableimage = TableImage(image)
148 | 
149 |     def __str__(self):
150 |         return "%d,%d<%d,%d,%d>%d  %d,%d '%s'  '%s'  [TableImage(%d bytes): %s]" % (
151 |                 self.unk1, self.version, self.unk2, self.unk3, self.unk4, self.tableid,
152 |                 self.unk7, len(self.fields),
153 |                 self.tablename, self.abbrev, len(self.tableimage.data), self.tableimage.filename)
154 | 
155 |     def dump(self, args):
156 |         if args.verbose:
157 |             print("table: %s" % tohex(self.headerdata))
158 | 
159 |         print(str(self))
160 | 
161 |         for i, field in enumerate(self.fields):
162 |             if args.verbose:
163 |                 print("field#%2d: %04x - %s" % (
164 |                     i, len(field.defdata), tohex(field.defdata)))
165 |             print(str(field))
166 |         if args.verbose:
167 |             print("remaining: %s" % tohex(self.remainingdata))
168 | 
169 | 
170 | class Field:
171 |     """
172 |     Contains a single fully decoded value.
173 |     """
174 |     def __init__(self, fielddef, data):
175 |         self.decode(fielddef, data)
176 | 
177 |     def decode(self, fielddef, data):
178 |         self.typ = fielddef.typ
179 |         self.data = data
180 | 
181 |         if not data:
182 |             self.content = ""
183 |             return
184 |         elif self.typ == 0:
185 |             # typ 0 is the recno, or as cronos calls this: Системный номер, systemnumber.
186 |             # just convert this to string for presentation
187 |             self.content = str(data)
188 | 
189 |         elif self.typ == 4:
190 |             # typ 4 is DATE, formatted like: <year-1900:signedNumber><month:2digits><day:2digits>
191 |             try:
192 |                 data = data.rstrip(b"\x00")
193 |                 y, m, d = 1900+int(data[:-4]), int(data[-4:-2]), int(data[-2:])
194 |                 self.content = "%04d-%02d-%02d" % (y, m, d)
195 |             except ValueError:
196 |                 self.content = str(data)
197 | 
198 |         elif self.typ == 5:
199 |             # typ 5 is TIME, formatted like: <hour:2digits><minute:2digits>
200 |             try:
201 |                 data = data.rstrip(b"\x00")
202 |                 h, m = int(data[-4:-2]), int(data[-2:])
203 |                 self.content = "%02d:%02d" % (h, m)
204 |             except ValueError:
205 |                 self.content = str(data)
206 | 
207 |         elif self.typ == 6:
208 |             # decode internal file reference
209 |             rd = ByteReader(data)
210 |             self.flag = rd.readdword()
211 |             self.remlen = rd.readdword()
212 |             self.filename = rd.readtoseperator(b"\x1e").decode("cp1251", 'ignore')
213 |             self.extname = rd.readtoseperator(b"\x1e").decode("cp1251", 'ignore')
214 |             self.filedatarecord = rd.readtoseperator(b"\x1e").decode("cp1251", 'ignore')
215 |             self.content = " ".join([self.filename, self.extname, self.filedatarecord])
216 | 
217 |         elif self.typ == 7 or self.typ == 8 or self.typ == 9:
218 |             # just hexdump foreign keys
219 |             self.content = ashex(data)
220 | 
221 |         else:
222 |             # currently assuming everything else to be strings, which is wrong
223 |             self.content = data.rstrip(b"\x00").decode("cp1251", 'ignore')
224 | 
225 | 
226 | class Record:
227 |     """
228 |     Contains a single fully decoded record.
229 |     """
230 |     def __init__(self, recno, tabledef, data):
231 |         self.decode(recno, tabledef, data)
232 | 
233 |     def decode(self, recno, tabledef, data):
234 |         """
235 |         decode the fields in a record
236 |         """
237 |         self.data = data
238 |         self.recno = recno
239 |         self.table = tabledef
240 | 
241 |         # start with the record number, or as Cronos calls this:
242 |         # the system number, in russian: Системный номер.
243 |         self.fields = [ Field(tabledef[0], str(recno)) ]
244 | 
245 |         rd = ByteReader(data)
246 |         for fielddef in tabledef[1:]:
247 |             if not rd.eof() and rd.testbyte(0x1b):
248 |                 # read complex record indicated by b"\x1b"
249 |                 rd.readbyte()
250 |                 size = rd.readdword()
251 |                 fielddata = rd.readbytes(size)
252 |             else:
253 |                 fielddata = rd.readtoseperator(b"\x1e")
254 | 
255 |             self.fields.append(Field(fielddef, fielddata))
256 | 


--------------------------------------------------------------------------------
/crodump/Database.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | import os
  3 | import re
  4 | from sys import stderr
  5 | from binascii import b2a_hex
  6 | from .readers import ByteReader
  7 | from .hexdump import strescape, toout, ashex
  8 | from .Datamodel import TableDefinition, Record
  9 | from .Datafile import Datafile
 10 | import base64
 11 | import struct
 12 | import crodump.koddecoder
 13 | 
 14 | import sys
 15 | if sys.version_info[0] == 2:
 16 |     sys.exit("cronodump needs python3")
 17 | 
 18 | 
 19 | class Database:
 20 |     """represent the entire database, consisting of Stru, Index and Bank files"""
 21 | 
 22 |     def __init__(self, dbdir, compact, kod=crodump.koddecoder.new()):
 23 |         """
 24 |         `dbdir` is the directory containing the Cro*.dat and Cro*.tad files.
 25 |         `compact` if set, the .tad file is not cached in memory, making dumps 15 % slower
 26 |         `kod` is optionally a KOD coder object.
 27 |               by default the v3 KOD coding will be used.
 28 |         """
 29 |         self.dbdir = dbdir
 30 |         self.compact = compact
 31 |         self.kod = kod
 32 | 
 33 |         # Stru+Index+Bank for the components for most databases
 34 |         self.stru = self.getfile("Stru")
 35 |         self.index = self.getfile("Index")
 36 |         self.bank = self.getfile("Bank")
 37 | 
 38 |         # the Sys file resides in the "Program Files\Cronos" directory, and
 39 |         # contains an index of all known databases.
 40 |         self.sys = self.getfile("Sys")
 41 | 
 42 |     def getfile(self, name):
 43 |         """
 44 |         Returns a Datafile object for `name`.
 45 |         this function expects a `Cro<name>.dat` and a `Cro<name>.tad` file.
 46 |         When no such files exist, or only one, then None is returned.
 47 | 
 48 |         `name` is matched case insensitively
 49 |         """
 50 |         try:
 51 |             datname = self.getname(name, "dat")
 52 |             tadname = self.getname(name, "tad")
 53 |             if datname and tadname:
 54 |                 return Datafile(name, open(datname, "rb"), open(tadname, "rb"), self.compact, self.kod)
 55 |         except IOError:
 56 |             return
 57 | 
 58 |     def getname(self, name, ext):
 59 |         """
 60 |         Get a case-insensitive filename match for 'name.ext'.
 61 |         Returns None when no matching file was not found.
 62 |         """
 63 |         basename = "Cro%s.%s" % (name, ext)
 64 |         for fn in os.listdir(self.dbdir):
 65 |             if basename.lower() == fn.lower():
 66 |                 return os.path.join(self.dbdir, fn)
 67 | 
 68 |     def dump(self, args):
 69 |         """
 70 |         Calls the `dump` method on all database components.
 71 |         """
 72 |         if self.stru:
 73 |             self.stru.dump(args)
 74 |         if self.index:
 75 |             self.index.dump(args)
 76 |         if self.bank:
 77 |             self.bank.dump(args)
 78 |         if self.sys:
 79 |             self.sys.dump(args)
 80 | 
 81 |     def strudump(self, args):
 82 |         """
 83 |         prints all info found in the CroStru file.
 84 |         """
 85 |         if not self.stru:
 86 |             print("missing CroStru file")
 87 |             return
 88 |         self.dump_db_table_defs(args)
 89 | 
 90 |     def decode_db_definition(self, data):
 91 |         """
 92 |         decode the 'bank' / database definition
 93 |         """
 94 |         rd = ByteReader(data)
 95 | 
 96 |         d = dict()
 97 |         while not rd.eof():
 98 |             keyname = rd.readname()
 99 |             if keyname in d:
100 |                 print("WARN: duplicate key: %s" % keyname)
101 | 
102 |             index_or_length = rd.readdword()
103 |             if index_or_length >> 31:
104 |                 d[keyname] = rd.readbytes(index_or_length & 0x7FFFFFFF)
105 |             else:
106 |                 refdata = self.stru.readrec(index_or_length)
107 |                 if refdata[:1] != b"\x04":
108 |                     print("WARN: expected refdata to start with 0x04")
109 |                 d[keyname] = refdata[1:]
110 |         return d
111 | 
112 |     def dump_db_definition(self, args, dbdict):
113 |         """
114 |         decode the 'bank' / database definition
115 |         """
116 |         for k, v in dbdict.items():
117 |             if re.search(b"[^\x0d\x0a\x09\x20-\x7e\xc0-\xff]", v):
118 |                 print("%-20s - %s" % (k, toout(args, v)))
119 |             else:
120 |                 print('%-20s - "%s"' % (k, strescape(v)))
121 | 
122 |     def dump_db_table_defs(self, args):
123 |         """
124 |         decode the table defs from recid #1, which always has table-id #3
125 |         Note that I don't know if it is better to refer to this by recid, or by table-id.
126 | 
127 |         other table-id's found in CroStru:
128 |             #4  -> large values referenced from tableid#3
129 |         """
130 |         dbinfo = self.stru.readrec(1)
131 |         if dbinfo[:1] != b"\x03":
132 |             print("WARN: expected dbinfo to start with 0x03")
133 |         dbdef = self.decode_db_definition(dbinfo[1:])
134 |         self.dump_db_definition(args, dbdef)
135 | 
136 |         for k, v in dbdef.items():
137 |             if k.startswith("Base") and k[4:].isnumeric():
138 |                 print("== %s ==" % k)
139 |                 tbdef = TableDefinition(v, dbdef.get("BaseImage" + k[4:], b''))
140 |                 tbdef.dump(args)
141 |             elif k == "NS1":
142 |                 self.dump_ns1(v)
143 | 
144 |     def dump_ns1(self, data):
145 |         if len(data)<2:
146 |             print("NS1 is unexpectedly short")
147 |             return
148 |         unk1, sh, = struct.unpack_from("<BB", data, 0)
149 | 
150 |         # NS1 is encoded with the default KOD table,
151 |         # so we are not using stru.kod here.
152 |         ns1kod = crodump.koddecoder.new()
153 |         decoded_data = ns1kod.decode(sh, data[2:])
154 | 
155 |         if len(decoded_data) < 12:
156 |             print("NS1 is unexpectedly short")
157 |             return
158 |         serial, unk2, pwlen, = struct.unpack_from("<LLL", decoded_data, 0)
159 |         password = decoded_data[12:12+pwlen].decode('cp1251')
160 | 
161 |         print("== NS1: (%02x,%02x) -> %6d, %d, %d:'%s'" % (unk1, sh, serial, unk2, pwlen, password))
162 | 
163 |     def enumerate_tables(self, files=False):
164 |         """
165 |         yields a TableDefinition object for all `BaseNNN` entries found in CroStru
166 |         """
167 |         dbinfo = self.stru.readrec(1)
168 |         if dbinfo[:1] != b"\x03":
169 |             print("WARN: expected dbinfo to start with 0x03")
170 |         try:
171 |             dbdef = self.decode_db_definition(dbinfo[1:])
172 |         except Exception as e:
173 |             print("ERROR decoding db definition: %s" % e)
174 |             print("This could possibly mean that you need to try with the --strucrack option")
175 |             return
176 | 
177 |         for k, v in dbdef.items():
178 |             if k.startswith("Base") and k[4:].isnumeric():
179 |                 if files and k[4:] == "000":
180 |                     yield TableDefinition(v)
181 |                 if not files and k[4:] != "000":
182 |                     yield TableDefinition(v, dbdef.get("BaseImage" + k[4:], b''))
183 | 
184 |     def enumerate_records(self, table):
185 |         """
186 |         Yields a Record object for all records in CroBank matching
187 |         the tableid from `table`
188 | 
189 |         usage:
190 |         for tab in db.enumerate_tables():
191 |             for rec in db.enumerate_records(tab):
192 |                 print(sqlformatter(tab, rec))
193 |         """
194 |         for i in range(self.bank.nrofrecords):
195 |             data = self.bank.readrec(i + 1)
196 |             if data and data[0] == table.tableid:
197 |                 try:
198 |                     yield Record(i + 1, table.fields, data[1:])
199 |                 except EOFError:
200 |                     print("Record %d too short: -- %s" % (i+1, ashex(data)), file=stderr)
201 |                 except Exception as e:
202 |                     print("Record %d broken: ERROR '%s' -- %s" % (i+1, e, ashex(data)), file=stderr)
203 | 
204 |     def enumerate_files(self, table):
205 |         """
206 |         Yield all file contents found in CroBank for `table`.
207 |         This is most likely the table with id 0.
208 |         """
209 |         for i in range(self.bank.nrofrecords):
210 |             data = self.bank.readrec(i + 1)
211 |             if data and data[0] == table.tableid:
212 |                 yield i + 1, data[1:]
213 | 
214 |     def get_record(self, index, asbase64=False):
215 |         """
216 |         Retrieve a single record from CroBank with record number `index`.
217 |         """
218 |         data = self.bank.readrec(int(index))
219 |         if asbase64:
220 |             return base64.b64encode(data[1:]).decode('utf-8')
221 |         else:
222 |             return data[1:]
223 | 
224 |     def recdump(self, args):
225 |         """
226 |         Function for outputing record contents of the various .dat files.
227 | 
228 |         This function is mostly useful for reverse-engineering the database format.
229 |         """
230 |         if args.index:
231 |             dbfile = self.index
232 |         elif args.sys:
233 |             dbfile = self.sys
234 |         elif args.stru:
235 |             dbfile = self.stru
236 |         else:
237 |             dbfile = self.bank
238 | 
239 |         if not dbfile:
240 |             print(".dat not found")
241 |             return
242 |         nerr = 0
243 |         nr_recnone = 0
244 |         nr_recempty = 0
245 |         tabidxref = [0] * 256
246 |         bytexref = [0] * 256
247 |         for i in range(1, args.maxrecs + 1):
248 |             try:
249 |                 data = dbfile.readrec(i)
250 |                 if args.find1d:
251 |                     if data and (data.find(b"\x1d") > 0 or data.find(b"\x1b") > 0):
252 |                         print("record with '1d': %d -> %s" % (i, b2a_hex(data)))
253 |                         break
254 | 
255 |                 elif not args.stats:
256 |                     if data is None:
257 |                         print("%5d: <deleted>" % i)
258 |                     else:
259 |                         print("%5d: %s" % (i, toout(args, data)))
260 |                 else:
261 |                     if data is None:
262 |                         nr_recnone += 1
263 |                     elif not len(data):
264 |                         nr_recempty += 1
265 |                     else:
266 |                         tabidxref[data[0]] += 1
267 |                         for b in data[1:]:
268 |                             bytexref[b] += 1
269 |                 nerr = 0
270 |             except IndexError:
271 |                 break
272 |             except Exception as e:
273 |                 print("%5d: <%s>" % (i, e))
274 |                 if args.debug:
275 |                     raise
276 |                 nerr += 1
277 |                 if nerr > 5:
278 |                     break
279 | 
280 |         if args.stats:
281 |             print("-- table-id stats --, %d * none, %d * empty" % (nr_recnone, nr_recempty))
282 |             for k, v in enumerate(tabidxref):
283 |                 if v:
284 |                     print("%5d * %02x" % (v, k))
285 |             print("-- byte stats --")
286 |             for k, v in enumerate(bytexref):
287 |                 if v:
288 |                     print("%5d * %02x" % (v, k))
289 | 


--------------------------------------------------------------------------------
/crodump/crodump.py:
--------------------------------------------------------------------------------
  1 | from .kodump import kod_hexdump
  2 | from .hexdump import unhex, tohex
  3 | from .readers import ByteReader
  4 | from .Database import Database
  5 | from .Datamodel import TableDefinition
  6 | 
  7 | 
  8 | def destruct_sys3_def(rd):
  9 |     # todo
 10 |     pass
 11 | 
 12 | 
 13 | def destruct_sys4_def(rd):
 14 |     """
 15 |     decode type 4 of the records found in CroSys.
 16 | 
 17 |     This function is only useful for reverse-engineering the CroSys format.
 18 |     """
 19 |     n = rd.readdword()
 20 |     for _ in range(n):
 21 |         marker = rd.readdword()
 22 |         description = rd.readlongstring()
 23 |         path = rd.readlongstring()
 24 |         marker2 = rd.readdword()
 25 | 
 26 |         print("%08x;%08x: %-50s : %s" % (marker, marker2, path, description))
 27 | 
 28 | 
 29 | def destruct_sys_definition(args, data):
 30 |     """
 31 |     Decode the 'sys' / dbindex definition
 32 | 
 33 |     This function is only useful for reverse-engineering the CroSys format.
 34 |     """
 35 |     rd = ByteReader(data)
 36 | 
 37 |     systype = rd.readbyte()
 38 |     if systype == 3:
 39 |         return destruct_sys3_def(rd)
 40 |     elif systype == 4:
 41 |         return destruct_sys4_def(rd)
 42 |     else:
 43 |         raise Exception("unsupported sys record")
 44 | 
 45 | 
 46 | def cro_dump(kod, args):
 47 |     """handle 'crodump' subcommand"""
 48 |     if args.maxrecs:
 49 |         args.maxrecs = int(args.maxrecs, 0)
 50 |     else:
 51 |         # an arbitrarily large number.
 52 |         args.maxrecs = 0xFFFFFFFF
 53 | 
 54 |     db = Database(args.dbdir, args.compact, kod)
 55 |     db.dump(args)
 56 | 
 57 | 
 58 | def stru_dump(kod, args):
 59 |     """handle 'strudump' subcommand"""
 60 |     db = Database(args.dbdir, args.compact, kod)
 61 |     db.strudump(args)
 62 | 
 63 | 
 64 | def sys_dump(kod, args):
 65 |     """hexdump all CroSys records"""
 66 |     # an arbitrarily large number.
 67 |     args.maxrecs = 0xFFFFFFFF
 68 | 
 69 |     db = Database(args.dbdir, args.compact, kod)
 70 |     if db.sys:
 71 |         db.sys.dump(args)
 72 | 
 73 | 
 74 | def rec_dump(kod, args):
 75 |     """hexdump all records of the specified CroXXX.dat file."""
 76 |     if args.maxrecs:
 77 |         args.maxrecs = int(args.maxrecs, 0)
 78 |     else:
 79 |         # an arbitrarily large number.
 80 |         args.maxrecs = 0xFFFFFFFF
 81 | 
 82 |     db = Database(args.dbdir, args.compact, kod)
 83 |     db.recdump(args)
 84 | 
 85 | 
 86 | def destruct(kod, args):
 87 |     """
 88 |     decode the index#1 structure information record
 89 |     Takes hex input from stdin.
 90 |     """
 91 |     import sys
 92 | 
 93 |     data = sys.stdin.buffer.read()
 94 |     data = unhex(data)
 95 | 
 96 |     if args.type == 1:
 97 |         # create a dummy db object
 98 |         db = Database(".", args.compact)
 99 |         db.dump_db_definition(args, data)
100 |     elif args.type == 2:
101 |         tbdef = TableDefinition(data)
102 |         tbdef.dump(args)
103 |     elif args.type == 3:
104 |         destruct_sys_definition(args, data)
105 | 
106 | 
107 | def strucrack(kod, args):
108 |     """
109 |     This function derives the KOD key from the assumption that most bytes in
110 |     the CroStru records will be zero, given a sufficient number of CroStru
111 |     items, statistically the most common bytes will encode to '0x00'
112 |     """
113 | 
114 |     # start without 'KOD' table, so we will get the encrypted records
115 |     db = Database(args.dbdir, args.compact, None)
116 |     if args.sys:
117 |         table = db.sys
118 |         if not db.sys:
119 |             print("no CroSys.dat file found in %s" % args.dbdir)
120 |             return
121 |     else:
122 |         table = db.stru
123 |         if not db.stru:
124 |             print("no CroStru.dat file found in %s" % args.dbdir)
125 |             return
126 | 
127 |     xref = [ [0]*256 for _ in range(256) ]
128 |     for i, data in enumerate(table.enumrecords()):
129 |         if not data: continue
130 |         for ofs, byte in enumerate(data):
131 |             xref[(ofs+i+1)%256][byte] += 1
132 | 
133 |     KOD = [0] * 256
134 |     for i, xx in enumerate(xref):
135 |         k, v = max(enumerate(xx), key=lambda kv: kv[1])
136 |         KOD[k] = i
137 | 
138 |     if not args.silent:
139 |         print(tohex(bytes(KOD)))
140 | 
141 |     return KOD
142 | 
143 | def dbcrack(kod, args):
144 |     """
145 |     This function derives the KOD key from the assumption that most records in CroIndex
146 |     and CroBank will be compressed, and start with:
147 |       uint16 size
148 |       byte  0x08
149 |       byte  0x00
150 | 
151 |     So because the fourth byte in each record will be 0x00 when kod-decoded, I can
152 |     use this as the inverse of the KOD table, adjusting for record-index.
153 | 
154 |     """
155 |     # start without 'KOD' table, so we will get the encrypted records
156 |     db = Database(args.dbdir, args.compact, None)
157 |     xref = [ [0]*256 for _ in range(256) ]
158 | 
159 |     for dbfile in db.bank, db.index:
160 |         if not dbfile:
161 |             print("no data file found in %s" % args.dbdir)
162 |             return
163 |         for i in range(1, min(10000, dbfile.nrofrecords)):
164 |             rec = dbfile.readrec(i)
165 |             if rec and len(rec)>11:
166 |                 xref[(i+3)%256][rec[3]] += 1
167 | 
168 |     KOD = [0] * 256
169 |     for i, xx in enumerate(xref):
170 |         k, v = max(enumerate(xx), key=lambda kv: kv[1])
171 |         KOD[k] = i
172 | 
173 |     if not args.silent:
174 |         print(tohex(bytes(KOD)))
175 | 
176 |     return KOD
177 | 
178 | 
179 | def main():
180 |     import argparse
181 | 
182 |     parser = argparse.ArgumentParser(description="CRO hexdumper")
183 |     subparsers = parser.add_subparsers(title='commands',
184 |                         help='Use the --help option for the individual sub commands for more details')
185 |     parser.set_defaults(handler=lambda *args: parser.print_help())
186 |     parser.add_argument("--debug", action="store_true", help="break on exceptions")
187 |     parser.add_argument("--kod", type=str, help="specify custom KOD table")
188 |     parser.add_argument("--strucrack", action="store_true", help="infer the KOD sbox from CroStru.dat")
189 |     parser.add_argument("--dbcrack", action="store_true", help="infer the KOD sbox from CroBank.dat + CroIndex.dat")
190 |     parser.add_argument("--nokod", "-n", action="store_true", help="don't KOD decode")
191 |     parser.add_argument("--compact", action="store_true", help="save memory by not caching the index, note: increases convert time by factor 1.15")
192 | 
193 |     p = subparsers.add_parser("kodump", help="KOD/hex dumper")
194 |     p.add_argument("--offset", "-o", type=str, default="0")
195 |     p.add_argument("--length", "-l", type=str)
196 |     p.add_argument("--width", "-w", type=str)
197 |     p.add_argument("--endofs", "-e", type=str)
198 |     p.add_argument("--nokod", "-n", action="store_true", help="don't KOD decode")
199 |     p.add_argument("--unhex", "-x", action="store_true", help="assume the input contains hex data")
200 |     p.add_argument("--shift", "-s", type=str, help="KOD decode with the specified shift")
201 |     p.add_argument("--increment", "-i", action="store_true",
202 |                    help="assume data is already KOD decoded, but with wrong shift -> dump alternatives.")
203 |     p.add_argument("--ascdump", "-a", action="store_true", help="CP1251 asc dump of the data")
204 |     p.add_argument("--invkod", "-I", action="store_true", help="KOD encode")
205 |     p.add_argument("filename", type=str, nargs="?", help="dump either stdin, or the specified file")
206 |     p.set_defaults(handler=kod_hexdump)
207 | 
208 |     p = subparsers.add_parser("crodump", help="CROdumper")
209 |     p.add_argument("--verbose", "-v", action="store_true")
210 |     p.add_argument("--ascdump", "-a", action="store_true")
211 |     p.add_argument("--maxrecs", "-m", type=str, help="max nr or recots to output")
212 |     p.add_argument("--nodecompress", action="store_false", dest="decompress", default="true")
213 |     p.add_argument("dbdir", type=str)
214 |     p.set_defaults(handler=cro_dump)
215 | 
216 |     p = subparsers.add_parser("sysdump", help="SYSdumper")
217 |     p.add_argument("--verbose", "-v", action="store_true")
218 |     p.add_argument("--ascdump", "-a", action="store_true")
219 |     p.add_argument("--nodecompress", action="store_false", dest="decompress", default="true")
220 |     p.add_argument("dbdir", type=str)
221 |     p.set_defaults(handler=sys_dump)
222 | 
223 |     p = subparsers.add_parser("recdump", help="record dumper")
224 |     p.add_argument("--verbose", "-v", action="store_true")
225 |     p.add_argument("--ascdump", "-a", action="store_true")
226 |     p.add_argument("--maxrecs", "-m", type=str, help="max nr or recots to output")
227 |     p.add_argument("--find1d", action="store_true", help="Find records with 0x1d in it")
228 |     p.add_argument("--stats", action="store_true", help="calc table stats from the first byte of each record",)
229 |     p.add_argument("--index", action="store_true", help="dump CroIndex")
230 |     p.add_argument("--stru", action="store_true", help="dump CroIndex")
231 |     p.add_argument("--bank", action="store_true", help="dump CroBank")
232 |     p.add_argument("--sys", action="store_true", help="dump CroSys")
233 |     p.add_argument("dbdir", type=str)
234 |     p.set_defaults(handler=rec_dump)
235 | 
236 |     p = subparsers.add_parser("strudump", help="STRUdumper")
237 |     p.add_argument("--verbose", "-v", action="store_true")
238 |     p.add_argument("--ascdump", "-a", action="store_true")
239 |     p.add_argument("dbdir", type=str)
240 |     p.set_defaults(handler=stru_dump)
241 | 
242 |     p = subparsers.add_parser("destruct", help="Stru dumper")
243 |     p.add_argument("--verbose", "-v", action="store_true")
244 |     p.add_argument("--ascdump", "-a", action="store_true")
245 |     p.add_argument("--type", "-t", type=int, help="what type of record to destruct")
246 |     p.set_defaults(handler=destruct)
247 | 
248 |     p = subparsers.add_parser("strucrack", help="Crack v4 KOD encrypion, bypassing the need for the database password.")
249 |     p.add_argument("--sys", action="store_true", help="Use CroSys for cracking")
250 |     p.add_argument("--silent", action="store_true", help="no output")
251 |     p.add_argument("dbdir", type=str)
252 |     p.set_defaults(handler=strucrack)
253 | 
254 |     p = subparsers.add_parser("dbcrack", help="Crack v4 KOD encrypion, bypassing the need for the database password.")
255 |     p.add_argument("--silent", action="store_true", help="no output")
256 |     p.add_argument("dbdir", type=str)
257 |     p.set_defaults(handler=dbcrack)
258 | 
259 |     args = parser.parse_args()
260 | 
261 |     import crodump.koddecoder
262 |     if args.kod:
263 |         if len(args.kod)!=512:
264 |             raise Exception("--kod should have a 512 hex digit argument")
265 |         kod = crodump.koddecoder.new(list(unhex(args.kod)))
266 |     elif args.nokod:
267 |         kod = None
268 |     elif args.strucrack:
269 |         class Cls: pass
270 |         cargs = Cls()
271 |         cargs.dbdir = args.dbdir
272 |         cargs.sys = False
273 |         cargs.silent = True
274 |         cracked = strucrack(None, cargs)
275 |         if not cracked:
276 |             return
277 |         kod = crodump.koddecoder.new(cracked)
278 |     elif args.dbcrack:
279 |         class Cls: pass
280 |         cargs = Cls()
281 |         cargs.dbdir = args.dbdir
282 |         cargs.sys = False
283 |         cargs.silent = True
284 |         cracked = dbcrack(None, cargs)
285 |         if not cracked:
286 |             return
287 |         kod = crodump.koddecoder.new(cracked)
288 |     else:
289 |         kod = crodump.koddecoder.new()
290 | 
291 |     if args.handler:
292 |         args.handler(kod, args)
293 | 
294 | 
295 | if __name__ == "__main__":
296 |     main()
297 | 


--------------------------------------------------------------------------------
/crodump/Datafile.py:
--------------------------------------------------------------------------------
  1 | import io
  2 | import struct
  3 | import zlib
  4 | from .hexdump import tohex, toout
  5 | import crodump.koddecoder
  6 | 
  7 | class Datafile:
  8 |     """Represent a single .dat with it's .tad index file"""
  9 | 
 10 |     def __init__(self, name, dat, tad, compact, kod):
 11 |         self.name = name
 12 |         self.dat = dat
 13 |         self.tad = tad
 14 |         self.compact = compact
 15 | 
 16 |         self.readdathdr()
 17 |         self.readtad()
 18 | 
 19 |         self.dat.seek(0, io.SEEK_END)
 20 |         self.datsize = self.dat.tell()
 21 | 
 22 |         self.kod = kod if not kod or self.isencrypted() else crodump.koddecoder.new()
 23 | 
 24 |     def isencrypted(self):
 25 |         return self.version in (b'01.04', b'01.05') or self.isv4()
 26 | 
 27 |     def isv3(self):
 28 |         #  01.02: 32 bit file offsets
 29 |         #  01.03: 64 bit file offsets
 30 |         #  01.04:  encrypted?, 32bit
 31 |         #  01.05:  encrypted?, 64bit
 32 |         return self.version in (b'01.02', b'01.03', b'01.04', b'01.05')
 33 | 
 34 |     def isv4(self):
 35 |         #  01.11  v4 ( 64bit )
 36 |         #  01.14  v4 ( 64bit ), encrypted?
 37 |         #  01.13  ?? I have not seen this version anywhere yet.
 38 |         return self.version in (b'01.11', b'01.13', b'01.14')
 39 | 
 40 |     def isv7(self):
 41 |         #  01.19  ?? I have not seen this version anywhere yet.
 42 |         return self.version in (b'01.19',)
 43 | 
 44 |     def readdathdr(self):
 45 |         """
 46 |         Read the .dat file header.
 47 |         Note that the 19 byte header if followed by 0xE9 random bytes, generated by
 48 |         'srand(time())' followed by 0xE9 times obfuscate(rand())
 49 |         """
 50 |         self.dat.seek(0)
 51 |         hdrdata = self.dat.read(19)
 52 | 
 53 |         (
 54 |             magic,            # +00  8 bytes
 55 |             self.hdrunk,      # +08  uint16
 56 |             self.version,     # +0a  5 bytes
 57 |             self.encoding,    # +0f  uint16
 58 |             self.blocksize,   # +11  uint16
 59 |         ) = struct.unpack("<8sH5sHH", hdrdata)
 60 | 
 61 |         if magic != b"CroFile\x00":
 62 |             print("unknown magic: ", magic)
 63 |             raise Exception("not a Crofile")
 64 |         self.use64bit = self.version in (b"01.03", b"01.05", b"01.11")
 65 | 
 66 |         # blocksize
 67 |         #   0040 -> Bank
 68 |         #   0400 -> Index or Sys
 69 |         #   0200 -> Stru  or Sys
 70 | 
 71 |         # encoding
 72 |         #   bit0 = 'KOD encoded'
 73 |         #   bit1 = compressed
 74 | 
 75 |     def readtad(self):
 76 |         """
 77 |         read and decode the .tad file.
 78 |         """
 79 |         self.tad.seek(0)
 80 |         if self.isv3():
 81 |             hdrdata = self.tad.read(2 * 4)
 82 |             self.nrdeleted, self.firstdeleted = struct.unpack("<2L", hdrdata)
 83 |         elif self.isv4():
 84 |             hdrdata = self.tad.read(4 * 4)
 85 |             unk1, self.nrdeleted, self.firstdeleted, unk2 = struct.unpack("<4L", hdrdata)
 86 |         else:
 87 |             raise Exception("unsupported .tad version")
 88 | 
 89 |         self.tadhdrlen = self.tad.tell()
 90 |         self.tadentrysize = 16 if self.use64bit else 12
 91 |         if self.compact:
 92 |             self.tad.seek(0, io.SEEK_END)
 93 |         else:
 94 |             self.idxdata = self.tad.read()
 95 |         self.tadsize = self.tad.tell() - self.tadhdrlen
 96 |         self.nrofrecords = self.tadsize // self.tadentrysize
 97 |         if self.tadsize % self.tadentrysize:
 98 |             print("WARN: leftover data in .tad")
 99 | 
100 |     def tadidx(self, idx):
101 |         """
102 |         If we're not supposed to be more compact but slower, lookup from a cached .tad
103 |         """
104 |         if self.compact:
105 |             return self.tadidx_seek(idx)
106 | 
107 |         if self.use64bit:
108 |             # 01.03 and 01.11 have 64 bit file offsets
109 |             return struct.unpack_from("<QLL", self.idxdata, idx * self.tadentrysize)
110 |         else:
111 |             # 01.02  and 01.04  have 32 bit offsets.
112 |            return struct.unpack_from("<LLL", self.idxdata, idx * self.tadentrysize)
113 | 
114 | 
115 |     def tadidx_seek(self, idx):
116 |         """
117 |             Memory saving version without caching the .tad
118 |         """
119 |         self.tad.seek(self.tadhdrlen + idx * self.tadentrysize)
120 |         idxdata = self.tad.read(self.tadentrysize)
121 | 
122 |         if self.use64bit:
123 |             # 01.03 and 01.11 have 64 bit file offsets
124 |             return struct.unpack("<QLL", idxdata)
125 |         else:
126 |             # 01.02  and 01.04  have 32 bit offsets.
127 |            return struct.unpack("<LLL", idxdata)
128 | 
129 |     def readdata(self, ofs, size):
130 |         """
131 |         Read raw data from the .dat file
132 |         """
133 |         self.dat.seek(ofs)
134 |         return self.dat.read(size)
135 | 
136 |     def readrec(self, idx):
137 |         """
138 |         Extract and decode a single record.
139 |         """
140 |         if idx == 0:
141 |             raise Exception("recnum must be a positive number")
142 |         ofs, ln, chk = self.tadidx(idx - 1)
143 |         if ln == 0xFFFFFFFF:
144 |             # deleted record
145 |             return
146 | 
147 |         if self.isv3():
148 |             flags = ln >> 24
149 |             ln &= 0xFFFFFFF
150 |         elif self.isv4():
151 |             flags = ofs >> 56
152 |             ofs &= (1<<56)-1
153 | 
154 |         dat = self.readdata(ofs, ln)
155 | 
156 |         if not dat:
157 |             # empty record
158 |             encdat = dat
159 |         elif not flags:
160 |             if self.use64bit:
161 |                 extofs, extlen = struct.unpack("<QL", dat[:12])
162 |                 o = 12
163 |             else:
164 |                 extofs, extlen = struct.unpack("<LL", dat[:8])
165 |                 o = 8
166 | 
167 |             encdat = dat[o:]
168 |             while len(encdat) < extlen:
169 |                 dat = self.readdata(extofs, self.blocksize)
170 |                 if self.use64bit:
171 |                     (extofs,) = struct.unpack("<Q", dat[:8])
172 |                     o = 8
173 |                 else:
174 |                     (extofs,) = struct.unpack("<L", dat[:4])
175 |                     o = 4
176 |                 encdat += dat[o:]
177 | 
178 |             encdat = encdat[:extlen]
179 |         else:
180 |             encdat = dat
181 | 
182 |         if self.encoding & 1:
183 |             if self.kod:
184 |                 encdat = self.kod.decode(idx, encdat)
185 | 
186 |         if self.iscompressed(encdat):
187 |             encdat = self.decompress(encdat)
188 | 
189 |         return encdat
190 | 
191 |     def enumrecords(self):
192 |         for i in range(self.nrofrecords):
193 |             yield self.readrec(i+1)
194 | 
195 |     def enumunreferenced(self, ranges, filesize):
196 |         """
197 |         From a list of used byte ranges and the filesize, enumerate the list of unused byte ranges
198 |         """
199 |         o = 0
200 |         for start, end, desc in sorted(ranges):
201 |             if start > o:
202 |                 yield o, start - o
203 |             o = end
204 |         if o < filesize:
205 |             yield o, filesize - o
206 | 
207 |     def dump(self, args):
208 |         """
209 |         Dump decodes all data referenced from the .tad file.
210 |         And optionally print out all unreferenced byte ranges in the .dat file.
211 | 
212 |         This function is mostly useful for reverse-engineering the database format.
213 | 
214 |         the `args` object controls how data is decoded.
215 |         """
216 |         print("hdr: %-6s dat: %04x %s enc:%04x bs:%04x, tad: %08x %08x" % (
217 |                 self.name, self.hdrunk, self.version,
218 |                 self.encoding, self.blocksize,
219 |                 self.nrdeleted, self.firstdeleted))
220 | 
221 |         ranges = []  # keep track of used bytes in the .dat file.
222 | 
223 |         for i in range(self.nrofrecords):
224 |             (ofs, ln, chk) = self.tadidx(i)
225 |             idx = i + 1
226 |             if args.maxrecs and i==args.maxrecs:
227 |                 break
228 |             if ln == 0xFFFFFFFF:
229 |                 print("%5d: %08x %08x %08x" % (idx, ofs, ln, chk))
230 |                 continue
231 | 
232 |             if self.isv3():
233 |                 flags = ln >> 24
234 |                 ln &= 0xFFFFFFF
235 |             elif self.isv4():
236 |                 flags = ofs >> 56
237 |                 # 04 --> data, v3compdata
238 |                 # 02,03 --> deleted
239 |                 # 00 --> extrec
240 |                 ofs &= (1<<56)-1
241 | 
242 |             dat = self.readdata(ofs, ln)
243 |             ranges.append((ofs, ofs + ln, "item #%d" % i))
244 |             decflags = [" ", " "]
245 |             infostr = ""
246 |             tail = b""
247 | 
248 |             if not dat:
249 |                 # empty record
250 |                 encdat = dat
251 |             elif not flags:
252 |                 if self.use64bit:
253 |                     extofs, extlen = struct.unpack("<QL", dat[:12])
254 |                     o = 12
255 |                 else:
256 |                     extofs, extlen = struct.unpack("<LL", dat[:8])
257 |                     o = 8
258 |                 infostr = "%08x;%08x" % (extofs, extlen)
259 |                 encdat = dat[o:]
260 |                 while len(encdat) < extlen:
261 |                     dat = self.readdata(extofs, self.blocksize)
262 |                     ranges.append((extofs, extofs + self.blocksize, "item #%d ext" % i))
263 |                     if self.use64bit:
264 |                         (extofs,) = struct.unpack("<Q", dat[:8])
265 |                         o = 8
266 |                     else:
267 |                         (extofs,) = struct.unpack("<L", dat[:4])
268 |                         o = 4
269 |                     infostr += ";%08x" % (extofs)
270 |                     encdat += dat[o:]
271 |                 tail = encdat[extlen:]
272 |                 encdat = encdat[:extlen]
273 |                 decflags[0] = "+"
274 |             else:
275 |                 encdat = dat
276 |                 decflags[0] = "*"
277 | 
278 |             if self.encoding & 1:
279 |                 if self.kod:
280 |                     encdat = self.kod.decode(idx, encdat)
281 |             else:
282 |                 decflags[0] = " "
283 | 
284 |             if args.decompress:
285 |                 if self.iscompressed(encdat):
286 |                     encdat = self.decompress(encdat)
287 |                     decflags[1] = "@"
288 | 
289 |             # TODO: separate handling for v4
290 |             print("%5d: %08x-%08x: (%02x:%08x) %s %s%s %s" % (
291 |                     i+1, ofs, ofs + ln, flags, chk,
292 |                     infostr, "".join(decflags), toout(args, encdat), tohex(tail)))
293 | 
294 |         if args.verbose:
295 |             # output parts not referenced in the .tad file.
296 |             for o, l in self.enumunreferenced(ranges, self.datsize):
297 |                 dat = self.readdata(o, l)
298 |                 print("%08x-%08x: %s" % (o, o + l, toout(args, dat)))
299 | 
300 |     def iscompressed(self, data):
301 |         """
302 |         Check if this record looks like a compressed record.
303 |         """
304 |         if len(data) < 11:
305 |             return
306 |         if data[-3:] != b"\x00\x00\x02":
307 |             return
308 |         o = 0
309 |         while o < len(data) - 3:
310 |             size, flag = struct.unpack_from(">HH", data, o)
311 |             if flag != 0x800 and flag != 0x008:
312 |                 return
313 |             o += size + 2
314 |         return True
315 | 
316 |     def decompress(self, data):
317 |         """
318 |         Decompress a record.
319 | 
320 |         Compressed records can have several chunks of compressed data.
321 |         Note that the compression header uses a mix of big-endian and little numbers.
322 | 
323 |         each chunk has the following format:
324 |             size  - big endian uint16, size of flag + crc + compdata
325 |             flag  - big endian uint16 - always 0x800
326 |             crc   - little endian uint32, crc32 of the decompressed data
327 |         the final chunk has only 3 bytes: a zero size followed by a 2.
328 | 
329 |         the crc algorithm is the one labeled 'crc-32' on this page:
330 |             http://crcmod.sourceforge.net/crcmod.predefined.html
331 |         """
332 |         result = b""
333 |         o = 0
334 |         while o < len(data) - 3:
335 |             # note the mix of bigendian and little endian numbers here.
336 |             size, flag = struct.unpack_from(">HH", data, o)
337 |             storedcrc, = struct.unpack_from("<L", data, o+4)
338 | 
339 |             C = zlib.decompressobj(-15)
340 |             result += C.decompress(data[o+8:o+8+size-6])
341 |             # note that we are not verifying the crc!
342 | 
343 |             o += size + 2
344 |         return result
345 | 


--------------------------------------------------------------------------------
/docs/cronos-research.md:
--------------------------------------------------------------------------------
  1 | # About Cronos databases.
  2 | 
  3 | A _cronos database_ consists of those files
  4 | 
  5 |     CroBank.dat
  6 |     CroBank.tad
  7 |     CroIndex.dat
  8 |     CroIndex.tad
  9 |     CroStru.dat
 10 |     CroStru.tad
 11 | 
 12 | and a Vocabulary database with another set of these files in a sub directory Voc/
 13 | 
 14 | `CroIndex.*` can be ignored for most dumping purposes, unless the user suspects there to be residues of deleted data.
 15 | 
 16 | Additionally there are the `CroSys.dat` and `CroSys.tad` files in the cronos application directory, which list the currently
 17 | known databases.
 18 | 
 19 | ## app installation
 20 | 
 21 | On a default non-russian Windows installation, the CronosPro app shows with several encoding issues that can be fixed like this:
 22 | 
 23 |     reg set HKLM\System\CurrentControlSet\Control\Nls\Codepage 1250=c_1251.nls 1252=c_1251.nls
 24 | 
 25 | [from](https://ixnfo.com/en/question-marks-instead-of-russian-letters-a-solution-to-the-problem-with-windows-encoding.html)
 26 | 
 27 | Also note that the v3 cronos app will run without problem on a linux machine using [wine](https://winehq.org/)
 28 | 
 29 | ## Files ending in .dat
 30 | 
 31 | All .dat files start with a 19 byte header:
 32 | 
 33 |     char      magic[8]      // allways: 'CroFile\x00'
 34 |     uint16    unknown
 35 |     char      version[5]    // 01.XX see, below
 36 |     uint16    encoding      // bitfield: bit0 = KOD, bit1 = ?
 37 |     uint16    blocksize     // 0x0040, 0x0200 or 0x0400
 38 | 
 39 | Most Bank files use blocksize == 0x0040
 40 | most Index files use blocksize == 0x0400
 41 | most Stru files use blocksize == 0x0200
 42 | 
 43 | This is followed by a block of 0x101 or 0x100 minus 19 bytes seemingly random data.
 44 | 
 45 | The unknown word is unclear but seems not to be random, might be a checksum.
 46 | 
 47 | ## File versions
 48 | 
 49 | * Pre cronos pro used version `01.01`.
 50 | * Cronos version 3 introduced version indicators of `01.02`, `01.03`, `01.04` and `01.05`.
 51 |  * `01.02` and `01.04` are called "small model", i.e. 32 bit offsets,
 52 |  * `01.03` and `01.05` are called "big model", i.e. 64 bit offsets.
 53 |  * `01.04` and `01.05` are called "lite".
 54 | * Cronos version 4 introduced version indicators of `01.11`, `01.13` and `01.14`.
 55 |  * `01.11` are called "small model", i.e. 32 bit offsets,
 56 |  * `01.13` are called "pro".
 57 |  * `01.14` are called "lite".
 58 | * Cronos version 7 introduced version indicator of `01.19`.
 59 | 
 60 | ## Files ending in .tad
 61 | 
 62 | The first two `uint32` are the number of deleted records and the tad offset to the first deleted entry.
 63 | The deleted entries form a linked list, with the size always 0xFFFFFFFF.
 64 | 
 65 | Depending on the version in the `.dat` header, `.tad` use either 32 bit or 64 bit file offsets
 66 | 
 67 | version `01.02` and `01.04` use 32 bit offsets:
 68 | 
 69 |     uint32 offset
 70 |     uint32 size       // with flag in upper bit, 0 -> large record
 71 |     uint32 checksum   // but sometimes just 0x00000000, 0x00000001 or 0x00000002
 72 | 
 73 | versions `01.03`, `01.05` and `01.11` use 64 bit offsets:
 74 | 
 75 |     uint64 offset
 76 |     uint32 size       // with flag in upper bit, 0 -> large record
 77 |     uint32 checksum   // but sometimes just 0x00000000, 0x00000001 or 0x00000002
 78 | 
 79 | where size can be 0xffffffff (indicating a free/deleted block).
 80 | Bit 31 of the size indicates that this is an extended record.
 81 | 
 82 | Extended records start with plaintext: { uint32 offset, uint32 size }  or { uint64 offset, uint32 size }
 83 | 
 84 | 
 85 | ## the 'old format'
 86 | 
 87 | The original description made it look like there were different formats for the block references.
 88 | 
 89 | This was found in previously existing documentation, but no sample databases with this format were found so far.
 90 | 
 91 | If the .dat file has a version of 01.03 or later, the corresponding .tad file looks like this:
 92 | 
 93 |     uint32_t offset
 94 |     uint32_t size       // with flag in upper bit, 0 -> large record
 95 |     uint32_t checksum   // but sometimes just 0x00000000, 0x00000001 or 0x00000002
 96 |     uint32_t unknownn   // mostly 0
 97 | 
 98 | The old description would also assume 12 byte reference blocks but a packed struct, probably if the CroFile version is 01.01.
 99 | 
100 |     uint32 offset1
101 |     uint16 size1
102 |     uint32 offset2
103 |     uint16 size2
104 | 
105 | with the first chunk read from offset1 with length size1 and potentially more parts with total length of size2 starting at file offset offset2 with the first `uint32` of the 256 byte chunk being the next chunk's offset and a maximum of 252 bytes being actual data.
106 | 
107 | However, I never found files with .tad like that. Also the original description insisted on those chunks needing the decode-magic outlined below, but the python implementation only does that for CroStru files and still seems to produce results.
108 | 
109 | ## CroStru
110 | 
111 | Interesting files are CroStru.dat containing metadata on the database within blocks whose size and length are found in CroStru.tad. These blocks are rotated byte wise using an sbox found in the cro2sql sources and then each byte is incremented by a one byte counter which is initialised by a per block offset. The sbox looks like this:
112 | 
113 |     unsigned char kod[256] = {
114 |       0x08, 0x63, 0x81, 0x38, 0xa3, 0x6b, 0x82, 0xa6,
115 |       0x18, 0x0d, 0xac, 0xd5, 0xfe, 0xbe, 0x15, 0xf6,
116 |       0xa5, 0x36, 0x76, 0xe2, 0x2d, 0x41, 0xb5, 0x12,
117 |       0x4b, 0xd8, 0x3c, 0x56, 0x34, 0x46, 0x4f, 0xa4,
118 |       0xd0, 0x01, 0x8b, 0x60, 0x0f, 0x70, 0x57, 0x3e,
119 |       0x06, 0x67, 0x02, 0x7a, 0xf8, 0x8c, 0x80, 0xe8,
120 |       0xc3, 0xfd, 0x0a, 0x3a, 0xa7, 0x73, 0xb0, 0x4d,
121 |       0x99, 0xa2, 0xf1, 0xfb, 0x5a, 0xc7, 0xc2, 0x17,
122 |       0x96, 0x71, 0xba, 0x2a, 0xa9, 0x9a, 0xf3, 0x87,
123 |       0xea, 0x8e, 0x09, 0x9e, 0xb9, 0x47, 0xd4, 0x97,
124 |       0xe4, 0xb3, 0xbc, 0x58, 0x53, 0x5f, 0x2e, 0x21,
125 |       0xd1, 0x1a, 0xee, 0x2c, 0x64, 0x95, 0xf2, 0xb8,
126 |       0xc6, 0x33, 0x8d, 0x2b, 0x1f, 0xf7, 0x25, 0xad,
127 |       0xff, 0x7f, 0x39, 0xa8, 0xbf, 0x6a, 0x91, 0x79,
128 |       0xed, 0x20, 0x7b, 0xa1, 0xbb, 0x45, 0x69, 0xcd,
129 |       0xdc, 0xe7, 0x31, 0xaa, 0xf0, 0x65, 0xd7, 0xa0,
130 |       0x32, 0x93, 0xb1, 0x24, 0xd6, 0x5b, 0x9f, 0x27,
131 |       0x42, 0x85, 0x07, 0x44, 0x3f, 0xb4, 0x11, 0x68,
132 |       0x5e, 0x49, 0x29, 0x13, 0x94, 0xe6, 0x1b, 0xe1,
133 |       0x7d, 0xc8, 0x2f, 0xfa, 0x78, 0x1d, 0xe3, 0xde,
134 |       0x50, 0x4e, 0x89, 0xb6, 0x30, 0x48, 0x0c, 0x10,
135 |       0x05, 0x43, 0xce, 0xd3, 0x61, 0x51, 0x83, 0xda,
136 |       0x77, 0x6f, 0x92, 0x9d, 0x74, 0x7c, 0x04, 0x88,
137 |       0x86, 0x55, 0xca, 0xf4, 0xc1, 0x62, 0x0e, 0x28,
138 |       0xb7, 0x0b, 0xc0, 0xf5, 0xcf, 0x35, 0xc5, 0x4c,
139 |       0x16, 0xe0, 0x98, 0x00, 0x9b, 0xd9, 0xae, 0x03,
140 |       0xaf, 0xec, 0xc9, 0xdb, 0x6d, 0x3b, 0x26, 0x75,
141 |       0x3d, 0xbd, 0xb2, 0x4a, 0x5d, 0x6c, 0x72, 0x40,
142 |       0x7e, 0xab, 0x59, 0x52, 0x54, 0x9c, 0xd2, 0xe9,
143 |       0xef, 0xdd, 0x37, 0x1e, 0x8f, 0xcb, 0x8a, 0x90,
144 |       0xfc, 0x84, 0xe5, 0xf9, 0x14, 0x19, 0xdf, 0x6e,
145 |       0x23, 0xc4, 0x66, 0xeb, 0xcc, 0x22, 0x1c, 0x5c,
146 |     };
147 | 
148 | 
149 | given the `shift`, the encoded data: `a[0]..a[n-1]` and the decoded data: `b[0]..b[n-1]`, the encoding works as follows:
150 | 
151 |     decode: b[i] = KOD[a[i]] - (i+shift)
152 |     encode: a[i] = INV[b[i] + (i+shift)]
153 | 
154 | 
155 | The original description of an older database format called the per block counter start offset 'sistN' which seems to imply it to be constant for certain entries. They correspond to a "system number" of meta entries visible in the database software. For encoded records this is their primary key.
156 | 
157 | In noticed that the first 256 bytes of CroStru.dat look close to identical (except the first 16 bytes) than CroBank.dat.
158 | 
159 | The toplevel table-id for CroStru and CroSys is #3, while referenced records have tableid #4.
160 | 
161 | ## CroBank
162 | 
163 | CroBank.dat contains the actual database entries for multiple tables as described in the CroStru file. After each chunk is re-assembled (and potentially decoded with the per block offset being the record number in the .tad file).
164 | 
165 | Its first byte defines, which table it belongs to. It is encoded in cp1251 (or possibly IBM866) with actual column data separated by 0x1e.
166 | 
167 | There is an extra concept of sub fields in those columns, indicated by a 0x1d byte.
168 | 
169 | Fields of field types 6 and 9 start with an 0x1b byte, followed by a uint32 size of the actual fields. It may then contain further 0x1e bytes which indicate sub field separators.
170 | 
171 | If used for field type 6, the field begins with two uint32 (the first one mostly 0x00000001, the second one the size of the next strings) followed by three 0x1e separated strings containing file name, file extension and system number of the actual file record data referred to by this record.
172 | 
173 | ## structure definitions
174 | 
175 | records start numbering at '1'.
176 | Names are stored as: `byte strlen + char value[strlen]`
177 | 
178 | The first entry contains:
179 | 
180 |     uint8
181 |     array {
182 |         Name keyname
183 |         uint32 index_or_size;   // size when bit31 is set.
184 |         uint8 data[size]
185 |     }
186 | 
187 | this results in a dictionary, with keys like: `Bank`, `BankId`, `BankTable`, `Base`nnn, etc.
188 | 
189 | the `Base000` entry contains the record number for the table definition of the first table.
190 | 
191 | ## table definitions
192 | 
193 |     uint16 unk1
194 |     union {
195 |         uint8 shortversion; // 1
196 |         uint16 version;     // >1
197 |     }
198 |     uint8 somelen;     // 5 or 9
199 |     struct {
200 |         uint8 unk3
201 |         uint32 unk4    // not there when 'somelen'==5
202 |         uint32 unk5
203 |     }
204 |     uint32 tableid
205 |     Name   tablename
206 |     Name   abbreviation
207 |     uint32 unk7
208 |     uint32 nrfields
209 | 
210 |     array {
211 |       uint16 entrysize    -- total nr of bytes in this entry.
212 |       uint16 fieldtype    // see below
213 |       uint32 fieldindex1  // presentation index (i.e. where in the UI it shows)
214 |       Name   fieldname
215 |       uint32 flags
216 |       uint8  alwaysone    // maybe the 'minvalue'
217 |       uint32 fieldindex2  // serialization index (i.e. where in the record in the .dat it appears)
218 |       uint32 fieldsize    // max fieldsize
219 |       uint32 unk4
220 |       ...
221 |       followed by remaining unknown bytes
222 |     } fields[nrfields]
223 | 
224 |     uint32 extradatstr    // amount of unknown length indexed data strings between field definition blocks
225 |     array {
226 |       uint16 datalen
227 |       uint8[datalen]
228 |     } datastrings[extradatstr]
229 | 
230 |     uint32 unk8
231 |     uint8  fielddefblock  // always 2, probably the number of this block of field definitions
232 |     uint32 unk9
233 | 
234 |     uint32 nrextrafields
235 |     array {
236 |       ... as above
237 |     } extrafields[nrextrafields]
238 | 
239 |     followed by remaining unknown bytes
240 |     ...
241 | 
242 | 
243 |     In order to have field definitions for all the fields in a record from the .dat for that table,
244 |     fields.append(extrafields) must be sorted by their fieldindex2.
245 | 
246 | ## field types
247 | 
248 | The interface gives a list of field types I can select for table columns:
249 | 
250 | * 0  - Системный номер = Primary Key ID
251 | * 1  - Числовое = Numeric
252 | * 2  - Текстовое = Text
253 | * 3  - Словарное = Dictionary
254 | * 4  - Дата = Date
255 | * 5  - Время = Time
256 | * 6  - Фаил = File (internal)
257 | * 29 - Внеэшний фаил = File (external)
258 | * 7  - Прямая ссылка = Direkt link
259 | * 8  - Обратная ссылка = Back link
260 | * 9  - Прямаяь-Обратная ссылка = Direct-Reverse link
261 | * 17 - Связь по полю = Field communication
262 | 
263 | Other unassigned values in the table entry definition are
264 | 
265 | * Dictionary Base (defaults to 0)
266 | * номер в записи = number in the record
267 | * Длина Поля = Field size
268 | * Flags:
269 |   * (0x2000) Множественное = Multiple
270 |   * (0x0800) Информативное = Informative
271 |   * (0x0040) Некорректируемое = Uncorrectable
272 |   * (0x1000) поиск на вводе = input search
273 |   * (?) симбольное =  symbolic
274 |   * (?) Лемматизировать = Lemmatize
275 |   * (?) поиск по значениям = search by values
276 |   * (0x0200) замена непустого значения = replacement of a non-empty value
277 |   * (0x0100) замена значения = value replacement
278 |   * (0x0004) автозаполнения = autocomplete
279 |   * (?) корневая связь = root connection
280 |   * (?) допускать дубли = allow doubles
281 |   * (0x0002) обязательное = obligatory
282 | 
283 | ## compressed records
284 | 
285 | some records are compressed, the format is like this:
286 | 
287 |     multiple-chunks {
288 |         uint16 size;     // stored in bigendian format.
289 |         uint8   head[2] = { 8, 0 }
290 |         uint32 crc32
291 |         uint8   compdata[size-6]
292 |     }
293 |     uint8   tail[3] = { 0, 0, 2 }
294 | 
295 | 
296 | # v4 format
297 | 
298 | The header version 01.11 indicates a database created with cronos v4.x.
299 | 
300 | ## .tad
301 | 
302 | A 4 dword header:
303 | 
304 |     dword -2
305 |     dword nr deleted
306 |     dword first deleted
307 |     dword 0
308 | 
309 | 16 byte records:
310 |     qword offset,  with flags in upper 8 bits.
311 |     dword size
312 |     dword unk
313 | 
314 | flags:
315 |     02,03  - deleted record.
316 |     04  - compressed { int16be size; int16be flag int32le crc; byte data[size-6]; } 00 00 02
317 |     00  - extended record
318 | 
319 | ## .dat
320 | 
321 | The .dat file of a 01.11 database has 64bit offsets, like the 01.03 file format.
322 | 
323 | 


--------------------------------------------------------------------------------