├── .editorconfig
├── .gitattributes
├── .gitignore
├── LICENSE
├── README.md
├── core
    ├── constants.py
    ├── extractor.py
    ├── models
    │   ├── lookup_table_entry.py
    │   └── stringliteral.py
    └── patcher.py
├── extract.py
└── patch.py


/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | charset = utf-8
 5 | indent_size = 4
 6 | indent_style = space
 7 | insert_final_newline = true
 8 | trim_trailing_whitespace = true
 9 | 
10 | [*.md]
11 | trim_trailing_whitespace = false
12 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.py text eol=lf
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | global-metadata.dat
163 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 József Sallai
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # IL2CPP StringLiteral Patcher
  2 | 
  3 | This repository contains two Python scripts that you can use to extract string
  4 | literals from global-metadata.dat files and patch them into a new file.
  5 | 
  6 | ## The Problem
  7 | 
  8 | Recently, many Unity games have been using IL2CPP to compile their C# code into
  9 | native binaries. Games that don't have support for localization and don't store
 10 | texts in MonoBehaviour objects that can be easily obtained, will usually have
 11 | a dictionary of hardcoded strings in the game's code. When using IL2CPP, these
 12 | hardcoded string literals will be stored in a byte chunk and a lookup table will
 13 | be used to determine the index and length of each string required by the game.
 14 | 
 15 | This is good for developers, as it offers faster and safer access to all string
 16 | literals, but for modders and translators, it can be a bit troublesome. While we
 17 | can see the string literals themselves in the global-metadata.dat file, editing
 18 | them is a bit more complicated. Using a hex editor to change the strings will
 19 | work as long as the new strings are the same length as the old ones, but this is
 20 | far from ideal.
 21 | 
 22 | ## The Solution
 23 | 
 24 | Using the scripts from this repository, you can extract all string literals from
 25 | a global-metadata.dat file and store them in a JSON file that you can then edit.
 26 | Once you're done making changes to your strings, you can then create a new
 27 | global-metadata.dat file with the patches applied.
 28 | 
 29 | ## Usage
 30 | 
 31 | ### Extracting Strings
 32 | 
 33 | ```
 34 | python3 extract.py -i /path/to/global-metadata.dat -o /path/to/output/strings.json
 35 | ```
 36 | 
 37 | This will create a JSON file containing every string literal that can be found
 38 | in the binary. As expected, this file will be rather large, as it contains lots
 39 | of irrelevant strings, such as ones used by Unity, .NET, and other libraries.
 40 | 
 41 | Unfortunately, there is no way to reliably tell which strings are the ones that
 42 | the game actually displays to the player, however, they are usually located in
 43 | the same chunk of the file (usually towards the end). You can use your editor's
 44 | search function to find a string that you know is visible in the game, and then
 45 | look around that string to determine the chunk that contains the game's strings.
 46 | 
 47 | While you can keep the other strings in the JSON file, it is recommended that
 48 | you just remove any string that you don't actually need. The patcher script will
 49 | just copy the original string literal if it can't find a replacement in your
 50 | JSON file.
 51 | 
 52 | ### Patching Strings
 53 | 
 54 | ```
 55 | python3 patch.py -i /path/to/original-global-metadata.dat -p /path/to/strings.json -o /path/to/patched-global-metadata.dat
 56 | ```
 57 | 
 58 | This will create a new global-metadata.dat file with the updated strings. If all
 59 | went well, the game will run without any errors and will also display the new
 60 | strings. The output path can not be the same as the input path.
 61 | 
 62 | ## Troubleshooting and FAQ
 63 | 
 64 | **Q: Where can I find the global-metadata.dat file?**
 65 | 
 66 | A: The usual location in the game's directory is: `Managed/Metadata/global-metadata.dat`.
 67 | If you're trying to mod a Unity WebGL/WebAssembly game, you can find the
 68 | metadata file by extracting it from the game's data file using a tool such as
 69 | [unityweb][unityweb-url].
 70 | 
 71 | **Q: I'm getting "Invalid global-metadata file" errors when trying to extract strings.**
 72 | 
 73 | A: This error should only be thrown when your global-metadata.dat file doesn't
 74 | start with the correct magic header. This can either mean that the file is
 75 | encrypted, obfuscated, or actually not a global-metadata.dat file in the
 76 | first place.
 77 | 
 78 | **Q: I'm getting "Invalid StringLiteral object" errors when trying to patch strings.**
 79 | 
 80 | A: One of your strings in the JSON file does not follow the correct format. The
 81 | exported JSON file should be an array of objects, each object having two
 82 | keys: `index` (should never be changed, as it's used for identifying the
 83 | string) and `value` (the actual string literal). If any of these is missing,
 84 | the script will throw an error.
 85 | 
 86 | **Q: I'm getting a different error when extracting or the resulting strings are
 87 | all garbled.**
 88 | 
 89 | A: The global-metadata file is most likely obfuscated. Some game developers may
 90 | do this to prevent players from reverse engineering their games. If you're
 91 | sure the file is not obfuscated, please [open an issue][issue-tracker-url]
 92 | and I will look into it.
 93 | 
 94 | **Q: What version of global-metadata was this tool tested with?**
 95 | 
 96 | A: The scripts were only tested with version 29. In theory, version should not
 97 | affect the functionality of the scripts, as the offsets at which the string
 98 | literal information is stored should be the same. If you encounter any issues
 99 | with a particular version, please [open an issue][issue-tracker-url].
100 | 
101 | ## License
102 | 
103 | MIT.
104 | 
105 | [issue-tracker-url]: https://github.com/jozsefsallai/il2cpp-stringliteral-patcher/issues
106 | [unityweb-url]: https://github.com/jozsefsallai/unityweb
107 | 


--------------------------------------------------------------------------------
/core/constants.py:
--------------------------------------------------------------------------------
1 | MAGIC_BYTES = b'\xAF\x1B\xB1\xFA'
2 | 
3 | LOOKUP_TABLE_DEFINITION_OFFSET = 8
4 | LOOKUP_TABLE_SIZE_DEFINITION_OFFSET = 12
5 | 
6 | STRINGLITERAL_DATA_DEFINITION_OFFSET = 16
7 | STRINGLITERAL_DATA_SIZE_DEFINITION_OFFSET = 20
8 | 


--------------------------------------------------------------------------------
/core/extractor.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | from core.constants import *
 4 | 
 5 | from core.models.stringliteral import StringLiteral
 6 | from core.models.lookup_table_entry import LookupTableEntry
 7 | 
 8 | 
 9 | class StringLiteralExtractor:
10 |     def __init__(self, filepath):
11 |         self.filepath = filepath
12 |         self.lookup_table = []
13 |         self.stringliterals = []
14 | 
15 |         self.last_byte = 0
16 | 
17 |     def extract(self):
18 |         with open(self.filepath, "rb") as f:
19 |             self._extract(f)
20 |         return self
21 | 
22 |     def dump(self, output_path):
23 |         with open(output_path, "w", encoding="utf-8") as f:
24 |             f.write(json.dumps(self.to_dict(), indent=2, ensure_ascii=False))
25 | 
26 |     def _extract(self, f):
27 |         self._ensure_magic_is_valid(f)
28 | 
29 |         lookup_table_offset = self._get_lookup_table_offset(f)
30 |         lookup_table_size = self._get_lookup_table_size(f)
31 |         stringliteral_data_offset = self._get_stringliteral_data_offset(f)
32 |         stringliteral_data_size = self._get_stringliteral_data_size(f)
33 | 
34 |         self._extract_lookup_table(f, lookup_table_offset, lookup_table_size)
35 |         self._extract_string_literals(
36 |             f, stringliteral_data_offset, stringliteral_data_size
37 |         )
38 | 
39 |     def _ensure_magic_is_valid(self, f):
40 |         magic = f.read(4)
41 |         if magic != MAGIC_BYTES:
42 |             raise Exception("Invalid global-metadata file")
43 | 
44 |     def _get_lookup_table_offset(self, f):
45 |         f.seek(LOOKUP_TABLE_DEFINITION_OFFSET)
46 |         return int.from_bytes(f.read(4), byteorder="little")
47 | 
48 |     def _get_lookup_table_size(self, f):
49 |         f.seek(LOOKUP_TABLE_SIZE_DEFINITION_OFFSET)
50 |         return int.from_bytes(f.read(4), byteorder="little")
51 | 
52 |     def _get_stringliteral_data_offset(self, f):
53 |         f.seek(STRINGLITERAL_DATA_DEFINITION_OFFSET)
54 |         return int.from_bytes(f.read(4), byteorder="little")
55 | 
56 |     def _get_stringliteral_data_size(self, f):
57 |         f.seek(STRINGLITERAL_DATA_SIZE_DEFINITION_OFFSET)
58 |         return int.from_bytes(f.read(4), byteorder="little")
59 | 
60 |     def _extract_lookup_table(self, f, lookup_table_offset, lookup_table_size):
61 |         f.seek(lookup_table_offset)
62 | 
63 |         bytes_read = 0
64 |         while bytes_read < lookup_table_size:
65 |             length = int.from_bytes(f.read(4), byteorder="little")
66 |             index = int.from_bytes(f.read(4), byteorder="little")
67 |             self._add_lookup_table_entry(length, index)
68 |             bytes_read += 8
69 | 
70 |     def _extract_string_literals(
71 |         self, f, stringliteral_data_offset, stringliteral_data_size
72 |     ):
73 |         f.seek(stringliteral_data_offset)
74 | 
75 |         for idx, entry in enumerate(self.lookup_table):
76 |             f.seek(stringliteral_data_offset + entry.index)
77 |             literal = f.read(entry.length).decode("utf-8", "ignore")
78 |             self._add_string_literal(idx, literal)
79 | 
80 |     def _add_lookup_table_entry(self, length, index):
81 |         lookup_table_entry = LookupTableEntry(length, index)
82 |         self.lookup_table.append(lookup_table_entry)
83 | 
84 |     def _add_string_literal(self, index, literal):
85 |         string_literal = StringLiteral(index, literal)
86 |         self.stringliterals.append(string_literal)
87 | 
88 |     def to_dict(self):
89 |         return [string_literal.to_dict() for string_literal in self.stringliterals]
90 | 


--------------------------------------------------------------------------------
/core/models/lookup_table_entry.py:
--------------------------------------------------------------------------------
1 | class LookupTableEntry:
2 |     length: int
3 |     index: int
4 | 
5 |     def __init__(self, length: int, index: int):
6 |         self.length = length
7 |         self.index = index
8 | 


--------------------------------------------------------------------------------
/core/models/stringliteral.py:
--------------------------------------------------------------------------------
 1 | class StringLiteral:
 2 |     index: int
 3 |     value: str
 4 | 
 5 |     def __init__(self, index: int, value: str):
 6 |         self.index = index
 7 |         self.value = value
 8 | 
 9 |     def to_dict(self):
10 |         return {
11 |             'index': self.index,
12 |             'value': self.value
13 |         }
14 | 
15 |     @staticmethod
16 |     def from_dict(d: dict):
17 |         if 'index' not in d or 'value' not in d:
18 |             raise Exception('Invalid StringLiteral object')
19 | 
20 |         return StringLiteral(d['index'], d['value'])
21 | 


--------------------------------------------------------------------------------
/core/patcher.py:
--------------------------------------------------------------------------------
 1 | import json, shutil, os
 2 | 
 3 | from core.constants import *
 4 | 
 5 | from core.extractor import StringLiteralExtractor
 6 | from core.models.stringliteral import StringLiteral
 7 | 
 8 | 
 9 | class StringLiteralPatcher:
10 |     def __init__(self, metadata_filepath, stringliteral_filepath):
11 |         self.metadata_filepath = metadata_filepath
12 |         self.stringliteral_filepath = stringliteral_filepath
13 |         self.patched_stringliterals = []
14 | 
15 |         self.extractor = StringLiteralExtractor(metadata_filepath)
16 |         self.extractor.extract()
17 | 
18 |     def update(self):
19 |         with open(self.stringliteral_filepath, "r", encoding="utf-8") as f:
20 |             self._populate_patched_stringliterals(f)
21 |         self._update_extractor_data()
22 |         return self
23 | 
24 |     def patch(self, output_filepath):
25 |         shutil.copy2(self.metadata_filepath, output_filepath)
26 | 
27 |         with open(output_filepath, "rb+") as f:
28 |             self._patch(f)
29 | 
30 |     def _populate_patched_stringliterals(self, f):
31 |         data = json.load(f)
32 |         self.patched_stringliterals = [StringLiteral.from_dict(entry) for entry in data]
33 | 
34 |     def _update_extractor_data(self):
35 |         for entry in self.patched_stringliterals:
36 |             value_bytes = bytes(entry.value, "utf-8")
37 |             new_length = len(value_bytes)
38 |             index = entry.index
39 |             self.extractor.lookup_table[index].length = new_length
40 |             self.extractor.stringliterals[index].value = entry.value
41 | 
42 |         index = 0
43 |         for entry in self.extractor.lookup_table:
44 |             entry.index = index
45 |             index += entry.length
46 | 
47 |     def _patch(self, f):
48 |         offset = self._get_last_offset(f)
49 |         self._append_stringliteral_database(f, offset)
50 |         self._patch_lookup_table(f)
51 |         self._patch_stringliteral_data_offset(f, offset)
52 | 
53 |     def _get_last_offset(self, f):
54 |         f.seek(0, os.SEEK_END)
55 |         offset = f.tell()
56 |         f.seek(0)
57 | 
58 |         return offset
59 | 
60 |     def _append_stringliteral_database(self, f, offset):
61 |         f.seek(offset)
62 |         for entry in self.extractor.stringliterals:
63 |             f.write(bytes(entry.value, "utf-8"))
64 |         f.seek(0)
65 | 
66 |     def _patch_lookup_table(self, f):
67 |         lookup_table_offset = self._get_lookup_table_offset(f)
68 |         f.seek(lookup_table_offset)
69 | 
70 |         for entry in self.extractor.lookup_table:
71 |             f.write(entry.length.to_bytes(4, byteorder="little"))
72 |             f.write(entry.index.to_bytes(4, byteorder="little"))
73 | 
74 |     def _patch_stringliteral_data_offset(self, f, offset):
75 |         f.seek(STRINGLITERAL_DATA_DEFINITION_OFFSET)
76 |         f.write(offset.to_bytes(4, byteorder="little"))
77 | 
78 |     def _get_lookup_table_offset(self, f):
79 |         f.seek(LOOKUP_TABLE_DEFINITION_OFFSET)
80 |         return int.from_bytes(f.read(4), byteorder="little")
81 | 


--------------------------------------------------------------------------------
/extract.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | import argparse
 4 | 
 5 | from core.extractor import StringLiteralExtractor
 6 | 
 7 | if __name__ == "__main__":
 8 |     parser = argparse.ArgumentParser(
 9 |         description="Extract string literals from global-metadata.dat"
10 |     )
11 | 
12 |     parser.add_argument(
13 |         "-i", "--input", required=True, help="Path to global-metadata.dat"
14 |     )
15 |     parser.add_argument("-o", "--output", required=True, help="Path to output file")
16 | 
17 |     args = parser.parse_args()
18 | 
19 |     extractor = StringLiteralExtractor(args.input)
20 |     extractor.extract().dump(args.output)
21 | 
22 |     print("Done!")
23 | 


--------------------------------------------------------------------------------
/patch.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | import argparse
 4 | 
 5 | from core.patcher import StringLiteralPatcher
 6 | 
 7 | if __name__ == "__main__":
 8 |     parser = argparse.ArgumentParser(
 9 |         description="Patch string literals in global-metadata.dat"
10 |     )
11 | 
12 |     parser.add_argument(
13 |         "-i", "--input", required=True, help="Path to original global-metadata.dat"
14 |     )
15 |     parser.add_argument("-p", "--patch", required=True, help="Path to patch file")
16 |     parser.add_argument(
17 |         "-o", "--output", required=True, help="Path to output global-metadata.dat"
18 |     )
19 | 
20 |     args = parser.parse_args()
21 | 
22 |     patcher = StringLiteralPatcher(args.input, args.patch)
23 |     patcher.update().patch(args.output)
24 | 
25 |     print("Done!")
26 | 


--------------------------------------------------------------------------------