├── .editorconfig ├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── core ├── constants.py ├── extractor.py ├── models │ ├── lookup_table_entry.py │ └── stringliteral.py └── patcher.py ├── extract.py └── patch.py /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | indent_size = 4 6 | indent_style = space 7 | insert_final_newline = true 8 | trim_trailing_whitespace = true 9 | 10 | [*.md] 11 | trim_trailing_whitespace = false 12 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.py text eol=lf 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | global-metadata.dat 163 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 József Sallai 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # IL2CPP StringLiteral Patcher 2 | 3 | This repository contains two Python scripts that you can use to extract string 4 | literals from global-metadata.dat files and patch them into a new file. 5 | 6 | ## The Problem 7 | 8 | Recently, many Unity games have been using IL2CPP to compile their C# code into 9 | native binaries. Games that don't have support for localization and don't store 10 | texts in MonoBehaviour objects that can be easily obtained, will usually have 11 | a dictionary of hardcoded strings in the game's code. When using IL2CPP, these 12 | hardcoded string literals will be stored in a byte chunk and a lookup table will 13 | be used to determine the index and length of each string required by the game. 14 | 15 | This is good for developers, as it offers faster and safer access to all string 16 | literals, but for modders and translators, it can be a bit troublesome. While we 17 | can see the string literals themselves in the global-metadata.dat file, editing 18 | them is a bit more complicated. Using a hex editor to change the strings will 19 | work as long as the new strings are the same length as the old ones, but this is 20 | far from ideal. 21 | 22 | ## The Solution 23 | 24 | Using the scripts from this repository, you can extract all string literals from 25 | a global-metadata.dat file and store them in a JSON file that you can then edit. 26 | Once you're done making changes to your strings, you can then create a new 27 | global-metadata.dat file with the patches applied. 28 | 29 | ## Usage 30 | 31 | ### Extracting Strings 32 | 33 | ``` 34 | python3 extract.py -i /path/to/global-metadata.dat -o /path/to/output/strings.json 35 | ``` 36 | 37 | This will create a JSON file containing every string literal that can be found 38 | in the binary. As expected, this file will be rather large, as it contains lots 39 | of irrelevant strings, such as ones used by Unity, .NET, and other libraries. 40 | 41 | Unfortunately, there is no way to reliably tell which strings are the ones that 42 | the game actually displays to the player, however, they are usually located in 43 | the same chunk of the file (usually towards the end). You can use your editor's 44 | search function to find a string that you know is visible in the game, and then 45 | look around that string to determine the chunk that contains the game's strings. 46 | 47 | While you can keep the other strings in the JSON file, it is recommended that 48 | you just remove any string that you don't actually need. The patcher script will 49 | just copy the original string literal if it can't find a replacement in your 50 | JSON file. 51 | 52 | ### Patching Strings 53 | 54 | ``` 55 | python3 patch.py -i /path/to/original-global-metadata.dat -p /path/to/strings.json -o /path/to/patched-global-metadata.dat 56 | ``` 57 | 58 | This will create a new global-metadata.dat file with the updated strings. If all 59 | went well, the game will run without any errors and will also display the new 60 | strings. The output path can not be the same as the input path. 61 | 62 | ## Troubleshooting and FAQ 63 | 64 | **Q: Where can I find the global-metadata.dat file?** 65 | 66 | A: The usual location in the game's directory is: `Managed/Metadata/global-metadata.dat`. 67 | If you're trying to mod a Unity WebGL/WebAssembly game, you can find the 68 | metadata file by extracting it from the game's data file using a tool such as 69 | [unityweb][unityweb-url]. 70 | 71 | **Q: I'm getting "Invalid global-metadata file" errors when trying to extract strings.** 72 | 73 | A: This error should only be thrown when your global-metadata.dat file doesn't 74 | start with the correct magic header. This can either mean that the file is 75 | encrypted, obfuscated, or actually not a global-metadata.dat file in the 76 | first place. 77 | 78 | **Q: I'm getting "Invalid StringLiteral object" errors when trying to patch strings.** 79 | 80 | A: One of your strings in the JSON file does not follow the correct format. The 81 | exported JSON file should be an array of objects, each object having two 82 | keys: `index` (should never be changed, as it's used for identifying the 83 | string) and `value` (the actual string literal). If any of these is missing, 84 | the script will throw an error. 85 | 86 | **Q: I'm getting a different error when extracting or the resulting strings are 87 | all garbled.** 88 | 89 | A: The global-metadata file is most likely obfuscated. Some game developers may 90 | do this to prevent players from reverse engineering their games. If you're 91 | sure the file is not obfuscated, please [open an issue][issue-tracker-url] 92 | and I will look into it. 93 | 94 | **Q: What version of global-metadata was this tool tested with?** 95 | 96 | A: The scripts were only tested with version 29. In theory, version should not 97 | affect the functionality of the scripts, as the offsets at which the string 98 | literal information is stored should be the same. If you encounter any issues 99 | with a particular version, please [open an issue][issue-tracker-url]. 100 | 101 | ## License 102 | 103 | MIT. 104 | 105 | [issue-tracker-url]: https://github.com/jozsefsallai/il2cpp-stringliteral-patcher/issues 106 | [unityweb-url]: https://github.com/jozsefsallai/unityweb 107 | -------------------------------------------------------------------------------- /core/constants.py: -------------------------------------------------------------------------------- 1 | MAGIC_BYTES = b'\xAF\x1B\xB1\xFA' 2 | 3 | LOOKUP_TABLE_DEFINITION_OFFSET = 8 4 | LOOKUP_TABLE_SIZE_DEFINITION_OFFSET = 12 5 | 6 | STRINGLITERAL_DATA_DEFINITION_OFFSET = 16 7 | STRINGLITERAL_DATA_SIZE_DEFINITION_OFFSET = 20 8 | -------------------------------------------------------------------------------- /core/extractor.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from core.constants import * 4 | 5 | from core.models.stringliteral import StringLiteral 6 | from core.models.lookup_table_entry import LookupTableEntry 7 | 8 | 9 | class StringLiteralExtractor: 10 | def __init__(self, filepath): 11 | self.filepath = filepath 12 | self.lookup_table = [] 13 | self.stringliterals = [] 14 | 15 | self.last_byte = 0 16 | 17 | def extract(self): 18 | with open(self.filepath, "rb") as f: 19 | self._extract(f) 20 | return self 21 | 22 | def dump(self, output_path): 23 | with open(output_path, "w", encoding="utf-8") as f: 24 | f.write(json.dumps(self.to_dict(), indent=2, ensure_ascii=False)) 25 | 26 | def _extract(self, f): 27 | self._ensure_magic_is_valid(f) 28 | 29 | lookup_table_offset = self._get_lookup_table_offset(f) 30 | lookup_table_size = self._get_lookup_table_size(f) 31 | stringliteral_data_offset = self._get_stringliteral_data_offset(f) 32 | stringliteral_data_size = self._get_stringliteral_data_size(f) 33 | 34 | self._extract_lookup_table(f, lookup_table_offset, lookup_table_size) 35 | self._extract_string_literals( 36 | f, stringliteral_data_offset, stringliteral_data_size 37 | ) 38 | 39 | def _ensure_magic_is_valid(self, f): 40 | magic = f.read(4) 41 | if magic != MAGIC_BYTES: 42 | raise Exception("Invalid global-metadata file") 43 | 44 | def _get_lookup_table_offset(self, f): 45 | f.seek(LOOKUP_TABLE_DEFINITION_OFFSET) 46 | return int.from_bytes(f.read(4), byteorder="little") 47 | 48 | def _get_lookup_table_size(self, f): 49 | f.seek(LOOKUP_TABLE_SIZE_DEFINITION_OFFSET) 50 | return int.from_bytes(f.read(4), byteorder="little") 51 | 52 | def _get_stringliteral_data_offset(self, f): 53 | f.seek(STRINGLITERAL_DATA_DEFINITION_OFFSET) 54 | return int.from_bytes(f.read(4), byteorder="little") 55 | 56 | def _get_stringliteral_data_size(self, f): 57 | f.seek(STRINGLITERAL_DATA_SIZE_DEFINITION_OFFSET) 58 | return int.from_bytes(f.read(4), byteorder="little") 59 | 60 | def _extract_lookup_table(self, f, lookup_table_offset, lookup_table_size): 61 | f.seek(lookup_table_offset) 62 | 63 | bytes_read = 0 64 | while bytes_read < lookup_table_size: 65 | length = int.from_bytes(f.read(4), byteorder="little") 66 | index = int.from_bytes(f.read(4), byteorder="little") 67 | self._add_lookup_table_entry(length, index) 68 | bytes_read += 8 69 | 70 | def _extract_string_literals( 71 | self, f, stringliteral_data_offset, stringliteral_data_size 72 | ): 73 | f.seek(stringliteral_data_offset) 74 | 75 | for idx, entry in enumerate(self.lookup_table): 76 | f.seek(stringliteral_data_offset + entry.index) 77 | literal = f.read(entry.length).decode("utf-8", "ignore") 78 | self._add_string_literal(idx, literal) 79 | 80 | def _add_lookup_table_entry(self, length, index): 81 | lookup_table_entry = LookupTableEntry(length, index) 82 | self.lookup_table.append(lookup_table_entry) 83 | 84 | def _add_string_literal(self, index, literal): 85 | string_literal = StringLiteral(index, literal) 86 | self.stringliterals.append(string_literal) 87 | 88 | def to_dict(self): 89 | return [string_literal.to_dict() for string_literal in self.stringliterals] 90 | -------------------------------------------------------------------------------- /core/models/lookup_table_entry.py: -------------------------------------------------------------------------------- 1 | class LookupTableEntry: 2 | length: int 3 | index: int 4 | 5 | def __init__(self, length: int, index: int): 6 | self.length = length 7 | self.index = index 8 | -------------------------------------------------------------------------------- /core/models/stringliteral.py: -------------------------------------------------------------------------------- 1 | class StringLiteral: 2 | index: int 3 | value: str 4 | 5 | def __init__(self, index: int, value: str): 6 | self.index = index 7 | self.value = value 8 | 9 | def to_dict(self): 10 | return { 11 | 'index': self.index, 12 | 'value': self.value 13 | } 14 | 15 | @staticmethod 16 | def from_dict(d: dict): 17 | if 'index' not in d or 'value' not in d: 18 | raise Exception('Invalid StringLiteral object') 19 | 20 | return StringLiteral(d['index'], d['value']) 21 | -------------------------------------------------------------------------------- /core/patcher.py: -------------------------------------------------------------------------------- 1 | import json, shutil, os 2 | 3 | from core.constants import * 4 | 5 | from core.extractor import StringLiteralExtractor 6 | from core.models.stringliteral import StringLiteral 7 | 8 | 9 | class StringLiteralPatcher: 10 | def __init__(self, metadata_filepath, stringliteral_filepath): 11 | self.metadata_filepath = metadata_filepath 12 | self.stringliteral_filepath = stringliteral_filepath 13 | self.patched_stringliterals = [] 14 | 15 | self.extractor = StringLiteralExtractor(metadata_filepath) 16 | self.extractor.extract() 17 | 18 | def update(self): 19 | with open(self.stringliteral_filepath, "r", encoding="utf-8") as f: 20 | self._populate_patched_stringliterals(f) 21 | self._update_extractor_data() 22 | return self 23 | 24 | def patch(self, output_filepath): 25 | shutil.copy2(self.metadata_filepath, output_filepath) 26 | 27 | with open(output_filepath, "rb+") as f: 28 | self._patch(f) 29 | 30 | def _populate_patched_stringliterals(self, f): 31 | data = json.load(f) 32 | self.patched_stringliterals = [StringLiteral.from_dict(entry) for entry in data] 33 | 34 | def _update_extractor_data(self): 35 | for entry in self.patched_stringliterals: 36 | value_bytes = bytes(entry.value, "utf-8") 37 | new_length = len(value_bytes) 38 | index = entry.index 39 | self.extractor.lookup_table[index].length = new_length 40 | self.extractor.stringliterals[index].value = entry.value 41 | 42 | index = 0 43 | for entry in self.extractor.lookup_table: 44 | entry.index = index 45 | index += entry.length 46 | 47 | def _patch(self, f): 48 | offset = self._get_last_offset(f) 49 | self._append_stringliteral_database(f, offset) 50 | self._patch_lookup_table(f) 51 | self._patch_stringliteral_data_offset(f, offset) 52 | 53 | def _get_last_offset(self, f): 54 | f.seek(0, os.SEEK_END) 55 | offset = f.tell() 56 | f.seek(0) 57 | 58 | return offset 59 | 60 | def _append_stringliteral_database(self, f, offset): 61 | f.seek(offset) 62 | for entry in self.extractor.stringliterals: 63 | f.write(bytes(entry.value, "utf-8")) 64 | f.seek(0) 65 | 66 | def _patch_lookup_table(self, f): 67 | lookup_table_offset = self._get_lookup_table_offset(f) 68 | f.seek(lookup_table_offset) 69 | 70 | for entry in self.extractor.lookup_table: 71 | f.write(entry.length.to_bytes(4, byteorder="little")) 72 | f.write(entry.index.to_bytes(4, byteorder="little")) 73 | 74 | def _patch_stringliteral_data_offset(self, f, offset): 75 | f.seek(STRINGLITERAL_DATA_DEFINITION_OFFSET) 76 | f.write(offset.to_bytes(4, byteorder="little")) 77 | 78 | def _get_lookup_table_offset(self, f): 79 | f.seek(LOOKUP_TABLE_DEFINITION_OFFSET) 80 | return int.from_bytes(f.read(4), byteorder="little") 81 | -------------------------------------------------------------------------------- /extract.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import argparse 4 | 5 | from core.extractor import StringLiteralExtractor 6 | 7 | if __name__ == "__main__": 8 | parser = argparse.ArgumentParser( 9 | description="Extract string literals from global-metadata.dat" 10 | ) 11 | 12 | parser.add_argument( 13 | "-i", "--input", required=True, help="Path to global-metadata.dat" 14 | ) 15 | parser.add_argument("-o", "--output", required=True, help="Path to output file") 16 | 17 | args = parser.parse_args() 18 | 19 | extractor = StringLiteralExtractor(args.input) 20 | extractor.extract().dump(args.output) 21 | 22 | print("Done!") 23 | -------------------------------------------------------------------------------- /patch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import argparse 4 | 5 | from core.patcher import StringLiteralPatcher 6 | 7 | if __name__ == "__main__": 8 | parser = argparse.ArgumentParser( 9 | description="Patch string literals in global-metadata.dat" 10 | ) 11 | 12 | parser.add_argument( 13 | "-i", "--input", required=True, help="Path to original global-metadata.dat" 14 | ) 15 | parser.add_argument("-p", "--patch", required=True, help="Path to patch file") 16 | parser.add_argument( 17 | "-o", "--output", required=True, help="Path to output global-metadata.dat" 18 | ) 19 | 20 | args = parser.parse_args() 21 | 22 | patcher = StringLiteralPatcher(args.input, args.patch) 23 | patcher.update().patch(args.output) 24 | 25 | print("Done!") 26 | --------------------------------------------------------------------------------