├── Todo ├── Todo.txt └── tasks.txt ├── Ideas ├── Ideas.txt ├── Plugins.txt └── Outline.txt ├── Tests └── test1.cpp ├── Docs ├── documentation.md └── terminology.txt ├── CPP └── PortedDeobfuscation.cpp ├── .idea ├── .name ├── .gitignore ├── codeStyles │ └── codeStyleConfig.xml ├── vcs.xml ├── toolchains.xml ├── saveactions_settings.xml ├── modules.xml ├── misc.xml ├── UnityDeobfuscator.iml └── inspectionProfiles │ └── Project_Default.xml ├── Python ├── DeobfuscationReformatted │ ├── main.py │ ├── Dumpcsfunctions.py │ ├── Deobfuscationfunctions.py │ ├── .idea │ │ └── DeobfuscationReformatted.iml │ └── Utils │ │ └── exceptions.py ├── test.py ├── fileutils.py ├── unitydeobfuscatorexceptions.py ├── deobfuscationtest.py ├── offsetstest.py ├── old.py ├── OldDeobfuscationFunctions.py └── DeobfuscationRefactored.py ├── README.md ├── .vscode └── c_cpp_properties.json └── README.txt /Todo/Todo.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Ideas/Ideas.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Tests/test1.cpp: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Docs/documentation.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /CPP/PortedDeobfuscation.cpp: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.idea/.name: -------------------------------------------------------------------------------- 1 | DeobfuscationRefactored.py -------------------------------------------------------------------------------- /Python/DeobfuscationReformatted/main.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Python/DeobfuscationReformatted/Dumpcsfunctions.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Python/DeobfuscationReformatted/Deobfuscationfunctions.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | /.vscode 5 | /.idea 6 | -------------------------------------------------------------------------------- /Ideas/Plugins.txt: -------------------------------------------------------------------------------- 1 | If you have another deobfuscation method, feel free to implement it via the plugin system. A plugin is just 2 | another deobfuscation function that can be disabled or enabled. -------------------------------------------------------------------------------- /.idea/codeStyles/codeStyleConfig.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # THIS IS UNFINISHED 2 | **DO NOT TRY TO USE** 3 | 4 | A python tool for deobfuscating Unity games. 5 | 6 | Disclaimer: THIS REPOSITORY IS FOR EDUCATIONAL PURPOSES. I AM NOT LIABLE FOR ANY MISUSE. 7 | -------------------------------------------------------------------------------- /.idea/toolchains.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/saveactions_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 7 | -------------------------------------------------------------------------------- /Python/DeobfuscationReformatted/.idea/DeobfuscationReformatted.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /Todo/tasks.txt: -------------------------------------------------------------------------------- 1 | 1 (Done). Copy python code 2 | 2 (Done). Remove unnecessary python code 3 | 3. Refactor python code, add type hints, and add comments 4 | 4. Test refactored code 5 | 5. Add documentation 6 | 6. Refactor refactored code 7 | 7. Test refactored refactored code 8 | 8. Group refactored code into hierarchy of classes and files, change data structures, and choose better names 9 | 9. Test reformatted code 10 | 10. Make python code statically typed 11 | 11. Test statically typed code 12 | 12. Port python code to c++ 13 | 13. Test c++ code 14 | 14. Diagnose and optimize c++ code -------------------------------------------------------------------------------- /.vscode/c_cpp_properties.json: -------------------------------------------------------------------------------- 1 | { 2 | "configurations": [ 3 | { 4 | "name": "Win32", 5 | "includePath": [ 6 | "${workspaceFolder}/**" 7 | ], 8 | "defines": [ 9 | "_DEBUG", 10 | "UNICODE", 11 | "_UNICODE" 12 | ], 13 | "compilerPath": "D:\\MinGW\\bin\\gcc.exe", 14 | "cStandard": "gnu17", 15 | "cppStandard": "gnu++17", 16 | "intelliSenseMode": "windows-gcc-x64" 17 | } 18 | ], 19 | "version": 4 20 | } -------------------------------------------------------------------------------- /.idea/UnityDeobfuscator.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 12 | 13 | 16 | -------------------------------------------------------------------------------- /README.txt: -------------------------------------------------------------------------------- 1 | # THIS IS UNFINISHED 2 | **DO NOT TRY TO USE** 3 | 4 | A python tool for deobfuscating Unity games. 5 | 6 | Put in apk (il2cpp or mono), dll or dummydll (all or just one), metadata and libil2cpp.so (for il2cpp) or dump.cs (for il2cpp) files of obfuscated and unobfuscated, and it will deobfuscate 7 | Can generate json output, txt output, deobfuscated dll(s), deobfuscated apk, deobfuscated dump.cs, and more! 8 | 9 | Features: 10 | -Flexible inputs and outputs 11 | -Automatic Dumping for Il2cpp Games 12 | -Lightning-fast 13 | -Employs 6 deobfuscation methods 14 | -Defeats Beebyte Deobfuscator 15 | -Supports deobfuscation of plugins 16 | -Flexible options 17 | -Force unity version, or it can be auto-detected 18 | -Deobfuscate everything, only one dll / method / class / namespace etc., or specify certain dll > namespace > class > method paths, subpaths (like all classes in blank namespace), regex, name, charset, etc. 19 | -Dnspy-style viewer for selecting paths etc. -------------------------------------------------------------------------------- /Python/DeobfuscationReformatted/Utils/exceptions.py: -------------------------------------------------------------------------------- 1 | class UnexpectedDumpcsFormatError(Exception): 2 | # Thanks to https://www.programiz.com/python-programming/user-defined-exception 3 | """Exception raised when a section of dumpcs does not follow a pattern""" 4 | 5 | def __init__(self, message, sample=None, line=None): 6 | self.message = message 7 | if sample: 8 | self.message += f":\n{sample}" 9 | if line: 10 | self.message = f"Detected at line {line}: " + self.message 11 | super().__init__(self.message) 12 | 13 | 14 | class InvalidDumpcsError(Exception): 15 | # Thanks to https://www.programiz.com/python-programming/user-defined-exception 16 | """Exception raised when a section of dumpcs does not follow a pattern""" 17 | 18 | def __init__(self, path): 19 | self.message = f"Dumpcs file at path {path} is invalid" 20 | super().__init__(self.message) 21 | 22 | 23 | def UnexpectedDumpcsFormatWarning(message): 24 | print(f"UnexpectedDumpcsFormatWarning: {message}") -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 40 | -------------------------------------------------------------------------------- /Docs/terminology.txt: -------------------------------------------------------------------------------- 1 | whether object is inherited ("class a: b" vs "class a") = is inherited (CURRENT NAME IS "is shared" MUST CHANGE) 2 | 3 | base object in inherited object = base object (if none, it is set to None) 4 | 5 | type of object ("struct", "class", "interface", "enum") = object type (CURRENT NAME IS "user-defined type" MUST CHANGE) 6 | Called "objecttype" instead of "type" because "type" is a keyword in python 7 | 8 | "public" in "public class classname: baseclass" or "private" in "private enum enumname" = object data type (CURRENT NAME IS "object type" MUST CHANGE) 9 | 10 | "classname" in "public class classname: baseclass" or "enumname" in "private enum enumname" = object name 11 | 12 | how many methods / fields / properties are in object = object size 13 | 14 | "readonly" / "public" / "abstract" / "const" / "bool" / "object int[]" / "char*" etc. 15 | = primitive type (including complex types) (CURRENT NAME IS "type" MUST CHANGE) 16 | 17 | "Dictionary" / "Vector3" / "Quaternion" etc. = unity type 18 | 19 | "PhotonNetworkingMessage" / "Color" (any user-defined type not defined by Unity) = user-defined type 20 | 21 | user-definedtype / unity type / primitive type (including complex types) = data type (CURRENT NAME IS "type" MUST CHANGE) 22 | 23 | Content of // Methods = methodssection 24 | Each method = method 25 | Content of "/* GenericInstMethod :" = generics 26 | Each generic = generic 27 | Content of // Fields = fieldssection 28 | Each field = field 29 | Content of // Properties = propertiessection 30 | Each property = property 31 | Called "Property" instead of "property" because "property" is a keyword in python 32 | 33 | "int i = 1" in "function(int i = 1)" or "int a" in "function(int a,bool b=true)" = parameter / param 34 | "1" in "function(int i = 1)" = default argument (if none, it is set to None) 35 | "int " in "function(int a,bool b=true)" = parameter data type / param data type 36 | "a" in "function(int a,bool b=true)" = parameter name / param name 37 | 38 | "1" in "TypeDefIndex: 1" = typedefindex 39 | 40 | "UnityEngine" in "// Namespace: UnityEngine" = namespace 41 | 42 | "[ComVisibleAttribute] // RVA: 0x1 Offset: 0x1 VA: 0x1" 43 | or 44 | [DebuggerBrowsableAttribute] // RVA: 0x1 Offset: 0x1 VA: 0x1" 45 | etc. = attribute line 46 | 47 | "[ComVisibleAttribute]" or "[DebuggerBrowsableAttribute]" = attribute 48 | 49 | "// Namespace: 50 | enum enumname // TypeDefIndex: 1 51 | { 52 | // Fields 53 | public int field; 54 | }" 55 | or 56 | "// Namespace: 57 | interface interfacename // TypeDefIndex: 1 58 | { 59 | // Fields 60 | public int field; 61 | }" 62 | or 63 | "// Namespace: 64 | struct structname // TypeDefIndex: 1 65 | { 66 | // Fields 67 | public int field; 68 | }" 69 | or 70 | "// Namespace: 71 | class classname // TypeDefIndex: 1 72 | { 73 | // Fields 74 | public int field; 75 | }" 76 | etc. = object 77 | Called "Object" instead of "object" because "object" is a keyword in python 78 | -------------------------------------------------------------------------------- /Python/test.py: -------------------------------------------------------------------------------- 1 | def readafter(sub: str, s: str, backward=False, regex=False, mustcontain=True, lengthwarning=True) -> str: 2 | """ 3 | Returns the substring after the delimiter 4 | If the substring is not found in the string, returns the whole string 5 | Also supports backward (right to left) and regex 6 | 7 | @param sub: The substring (or regex) to read after 8 | @param s: The initial string 9 | @param backward: Whether to read from right to left 10 | @param regex: Whether to treat the substring as regex 11 | @param mustcontain: Whether to throw an AssertionError if the substring (or regex) is not present 12 | in the initial string 13 | @param lengthwarning: Whether to raise a warning if the substring is longer than the initial string, 14 | which should never happen (only applies if regex is False) 15 | @return: The partition of the string after the substring (or regex) 16 | 17 | :raises ImportError: Wrong regex module: Expected regex module, got re module. 18 | To fix this, replace "import re" with "import regex as re" 19 | :raises AssertionError: param mustcontain was True and substring (or regex) was not present 20 | in the initial string 21 | :warns ValueWarning: param lengthwarning was True, param regex was False, and substring was longer 22 | than initial string 23 | 24 | Example: 25 | sub: "string" 26 | s: "Split this string by delimiter" 27 | return: " by delimiter" 28 | """ 29 | class ValueWarning(Warning): 30 | pass 31 | # If re isn't imported at all, that will show later. 32 | # Since we only catch AttributeError, we don't have to worry about it here. 33 | try: 34 | re.REVERSE 35 | except AttributeError: 36 | raise ImportError("Wrong regex module: Expected regex module, got re module. To fix this, replace \"import " 37 | "re\" with \"import regex as re\"") 38 | if regex: 39 | sub = f"({sub})" # Enclose regex with paranthesis to make it a group capture 40 | if mustcontain: 41 | assert bool(re.search(sub, s)) is True 42 | # If an invalid regex is passed, we let re.error throw - it can be handled by the caller 43 | if backward: 44 | split = re.split(sub, s, 1, flags=re.REVERSE) 45 | return split[0] 46 | else: 47 | split = re.split(sub, s, 1) 48 | return split[-1] 49 | else: 50 | if lengthwarning and len(sub) > len(s): 51 | warnings.warn(f"Call to readafter(sub=\"{sub}\", str=\"{s}\"): substring is longer than string", 52 | ValueWarning) 53 | if mustcontain: 54 | assert sub in s 55 | if backward: 56 | prefix, found, suffix = s.rpartition(sub) 57 | else: 58 | prefix, found, suffix = s.partition(sub) 59 | if found: 60 | return suffix 61 | else: 62 | return s 63 | 64 | import regex as re 65 | import warnings 66 | readafter("123", "1") -------------------------------------------------------------------------------- /Python/fileutils.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | import io 3 | import os 4 | from typing import TextIO, Any, Optional, Callable 5 | 6 | 7 | class FileEncodingException(Exception): 8 | pass 9 | 10 | 11 | class FileHandler: 12 | def __init__(self, encodings: tuple = ('utf-8', 'utf-16', 'utf-32')): 13 | self.encodings = encodings 14 | self.file_handles = {} 15 | 16 | def detect_file_encoding(self, path: str | os.PathLike) -> str: 17 | for encoding in self.encodings: 18 | f = open(path, 'r', encoding=encoding) 19 | try: 20 | f.read() 21 | except UnicodeDecodeError: 22 | pass 23 | else: 24 | return encoding 25 | finally: 26 | f.close() 27 | raise FileEncodingException(f"Failed to detect encoding of file {path}") 28 | 29 | def open_file(self, path: str | os.PathLike, mode: str, encoding=None) -> \ 30 | TextIO | io.TextIOWrapper | io.BufferedReader: 31 | if not (encoding or 'b' in mode): 32 | # Auto-detect encoding 33 | encoding = self.detect_file_encoding(path) 34 | return open(path, mode, encoding=encoding) 35 | 36 | def create_file_handle(self, path: str | os.PathLike, mode: str, encoding=None, name: str = None) -> \ 37 | TextIO | io.TextIOWrapper | io.BufferedReader: 38 | if not name: 39 | name = path 40 | handle = self.open_file(path, mode, encoding=encoding) 41 | self.file_handles[name] = handle 42 | return handle 43 | 44 | def get_file_handle(self, name) -> TextIO | io.TextIOWrapper | io.BufferedReader: 45 | return self.file_handles[name] 46 | 47 | def close_file_handle(self, name) -> FileHandler: 48 | self.file_handles[name].close() 49 | del self.file_handles[name] 50 | return self 51 | 52 | def close_all_file_handles(self) -> FileHandler: 53 | self.file_handles.clear() 54 | return self 55 | 56 | def read_file(self, path: str | os.PathLike, encoding=None, binary=False) -> str | bytes: 57 | if binary: 58 | with self.open_file(path, 'rb') as f: 59 | return f.read() 60 | else: 61 | with self.open_file(path, 'r', encoding) as f: 62 | return f.read() 63 | 64 | def read_bytes(self, path: str | os.PathLike, tohex=True, hexformat: Optional[Callable] = None) -> \ 65 | bytes | hex | Any: 66 | filecontent = self.read_file(path, binary=True) 67 | if tohex: 68 | if hexformat: 69 | return hexformat(filecontent.hex()) 70 | else: 71 | return filecontent.hex() 72 | else: 73 | return filecontent 74 | 75 | def write_file(self, path: str | os.PathLike, data: str | bytes, encoding, binary=False) -> \ 76 | TextIO | io.TextIOWrapper | io.BufferedReader | str: 77 | with self.open_file(path, encoding, binary) as f: 78 | f.write(data) 79 | return f 80 | -------------------------------------------------------------------------------- /Python/unitydeobfuscatorexceptions.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | import os 3 | 4 | 5 | def _truncatestring(s: str, maxlength: Optional[int] = 1000, maxlines: Optional[int] = 20) -> str: 6 | """ 7 | Helper function to truncate a string to a given length 8 | 9 | :param s: The initial string 10 | :param maxlength: The amount of characters to truncate at. Takes precedence over maxlines. 11 | :param maxlines: The amount of lines to truncate at. Lower precedence than maxlength. 12 | :return: If the string did not exceed either of the limits - the unmodified initial string 13 | If the string exceeded the length limit - the string truncated to the length limit with 14 | "...[Truncated]" added to the end 15 | If the string exceeded the line limit but not the length limit - the string truncated to the 16 | line limit with "...[Truncated]" added to the end 17 | """ 18 | if maxlength and len(s) > maxlength: 19 | return s[0:maxlength - 1] + "...[Truncated]" 20 | lines = s.splitlines() 21 | if maxlines and len(lines) > maxlines: 22 | return "\n".join(lines[0:maxlines - 1]) + "...[Truncated]" 23 | return s 24 | 25 | 26 | class ValueWarning(Warning): 27 | pass 28 | 29 | 30 | class IllegalArgumentException(ValueError): 31 | pass 32 | 33 | 34 | class IllegalArgumentWarning(ValueError): 35 | pass 36 | 37 | 38 | class UnexpectedDumpcsFormatError(Exception): 39 | """Exception raised when something unexpected is encountered in a section of dumpcs""" 40 | 41 | def __init__(self, message: str = None, sample: str = None, line: str = None): 42 | self.message = message if message else "" 43 | if sample: 44 | self.message += f":\n{sample}" 45 | if line: 46 | self.message = f"Detected at line {line}: " + self.message 47 | if self.message: 48 | super().__init__(self.message) 49 | 50 | 51 | class UnexpectedDumpcsFormatWarning(Warning): 52 | """Warning raised when something unexpected is encountered in a section of dumpcs""" 53 | 54 | def __init__(self, message: str = None, sample: str = None, line: str = None): 55 | self.message = message if message else "" 56 | if sample: 57 | self.message += f":\n{sample}" 58 | if line: 59 | self.message = f"Detected at line {line}: " + self.message 60 | if self.message: 61 | super().__init__(self.message) 62 | 63 | 64 | class InvalidDumpcsError(Exception): 65 | """Exception raised when a dumpcs in not a valid dumpcs""" 66 | 67 | def __init__(self, path: str | os.PathLike = None, content: str = None): 68 | """ 69 | Path and content are mutually exclusive, though this is not enforced. 70 | If both are provided, will default to path 71 | """ 72 | self.message = None 73 | if path: 74 | self.message = f"Dumpcs file at path {path} does not appear to be a valid dumpcs" 75 | elif content: 76 | # noinspection IncorrectFormatting 77 | self.message = "Dumpcs does not appear to be a valid dumpcs:\n" \ 78 | f"{_truncatestring(content, maxlength=1000, maxlines=20)}" 79 | if self.message: 80 | super().__init__(self.message) 81 | 82 | 83 | class InvalidDumpcsWarning(Warning): 84 | """Warning raised when a dumpcs in not a valid dumpcs""" 85 | 86 | def __init__(self, path: str | os.PathLike = None, content: str = None): 87 | """ 88 | Path and content are mutually exclusive, though this is not enforced. 89 | If both are provided, will default to path 90 | """ 91 | self.message = None 92 | if path: 93 | self.message = f"Dumpcs file at path {path} does not appear to be a valid dumpcs" 94 | elif content: 95 | # noinspection IncorrectFormatting 96 | self.message = "Dumpcs does not appear to be a valid dumpcs:\n" \ 97 | f"{_truncatestring(content, maxlength=1000, maxlines=20)}" 98 | if self.message: 99 | super().__init__(self.message) 100 | -------------------------------------------------------------------------------- /Python/deobfuscationtest.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | from DeobfuscationRefactored import * 4 | 5 | 6 | def compareobjects(object1, object2, dosize = True, dofields = True): 7 | maxscore = (len(object1["methods"]) * _methodweighttrue) + ( 8 | len(object1["propertiess"]) * _propertyweighttrue) # calculate maximum score 9 | score = 0 10 | # Size 11 | if dosize: 12 | maxscore = maxscore + 8 # start off at 8, and subtract nothing for a perfect score 13 | size1 = (len(object1["fields"]) + len(object1["methods"]) + len( 14 | object1["propertiess"])) # how many methods, fields, and properties are there? 15 | size2 = (len(object2["fields"]) + len(object2["methods"]) + len( 16 | object2["propertiess"])) # how many methods, fields, and properties are there? 17 | score = 8 - ((( 18 | abs(size2 - size1) / _sizebenchmark) * _sizeweightfalse)) # depending on the difference in size, this could have a small impact, or be very bad 19 | objectcomparisons: List[Dict[str, bool, Optional[float]]] = [ 20 | {"name": "base", "mustmatch": True, "weight": None}, 21 | {"name": "genericdatatype", "mustmatch": True, "weight": None}, 22 | {"name": "namespace", "mustmatch": True, "weight": None}, 23 | {"name": "hasconstructor", "mustmatch": True, "weight": None}, 24 | {"name": "hasstaticconstructor", "mustmatch": True, "weight": None}, 25 | {"name": "hasfields", "mustmatch": True, "weight": None}, 26 | {"name": "hasproperties", "mustmatch": True, "weight": None}, 27 | {"name": "hasmethods", "mustmatch": True, "weight": None}, 28 | ] 29 | methodscomparisons: List[Dict[str, bool, Optional[float]]] = [ 30 | ] 31 | methodweights: List[Dict[str, float]] = [ 32 | {"isconstructor": 7.0}, 33 | {"isstaticconstructor": 7.0}, 34 | {"isoperator": 5.0}, 35 | {"isvirtual": 2.5}, 36 | ] # Anything else is 1 37 | mustmatch = ["base", "genericdatatype", "namespace", "hasconstructor", 38 | "hasstaticconstructor", "hasfields", "hasproperties", "hasmethods"] 39 | for must in mustmatch: 40 | if not (object1[must] == object2[must]): 41 | return 0 42 | # Fields 43 | if dofields: 44 | maxscore = maxscore + (len(object1["fields"]) * _fieldweighttrue) 45 | # We are using the fields type objects, not the fields themselvles 46 | fields1 = copy.deepcopy(object1["fields"]) 47 | fields2 = list(object2["fields"]) 48 | templist = list( 49 | fields2) # it's very normal to add on things, but not as common to delete them. So, most of the fields in the unobfuscated (earlier) one 50 | # should also exist in the obfuscated one (newer) 51 | templist2 = list(fields1) 52 | for item in templist2: 53 | if len(templist) > 0: 54 | if (item in templist): 55 | score = score + _fieldweighttrue 56 | templist.remove(item) 57 | # Methods 58 | if domethodparams: 59 | # We are using the methods type objects, not the methods themselvles 60 | methods1 = list(object1["methods"]) 61 | methods2 = list(object2["methods"]) 62 | else: 63 | methods1 = list(object1.get("MethodTypes")) 64 | methods2 = list(object2.get("MethodTypes")) 65 | templist = list( 66 | methods2) # it's very normal to add on things, but not as common to delete them. So, most of the methods in the unobfuscated (earlier) one 67 | # should also exist in the obfuscated one (newer) 68 | templist2 = list(methods1) 69 | for item in templist2: 70 | if len(templist) > 0: 71 | if (item in templist): 72 | score = score + _methodweighttrue 73 | templist.remove(item) 74 | # Properties 75 | if dopropertyattributes: 76 | # We are using the propertiess type objects, not the propertiess themselvles 77 | properties1 = list(object1["propertiess"]) 78 | properties2 = list(object2["propertiess"]) 79 | templist = list( 80 | properties2) # it's very normal to add on things, but not as common to delete them. So, most of the properties in the unobfuscated (earlier) one 81 | # should also exist in the obfuscated one (newer) 82 | templist2 = list(properties1) 83 | for item in templist2: 84 | if len(templist) > 0: 85 | if (item in templist): 86 | score = score + _propertyweighttrue 87 | templist.remove(item) 88 | # To do: method params, number of shared classes for class 89 | matchscore = ((score / maxscore) * 100) 90 | endspeedtest() 91 | return (not (((score / maxscore) * 100) < usetolerance)) # is percentage score not less than tolerated percent? 92 | -------------------------------------------------------------------------------- /Python/offsetstest.py: -------------------------------------------------------------------------------- 1 | from DeobfuscationRefactored import * 2 | 3 | SPECIALCONSTRUCTORNAMES = True 4 | FIELDPREFIXES = True 5 | with open(dumpcspath, encoding=encoding) as f: 6 | objects = dumpcs_getobjects(dumpcs_removeattributes(f.read()), getmethodhex=False) 7 | globalnamespace = [] 8 | namespaces = {} 9 | for Object in objects: 10 | hasfields = False 11 | for field in Object["fields"]: 12 | if field["hasoffset"]: 13 | if "__" not in field["name"] and "." not in field["name"] and "<" not in field["name"]: 14 | hasfields = True 15 | break 16 | hasmethods = False 17 | for method in Object["methods"]: 18 | if method["hasoffsetdata"]: 19 | if "__" not in method["name"] and "." not in method["name"] and "<" not in method["name"]: 20 | hasmethods = True 21 | break 22 | if "<" not in Object["name"] and "__" not in Object["name"] and "." not in Object["name"] and (hasfields or hasmethods): 23 | namespace = Object["namespace"] 24 | if namespace is None: 25 | globalnamespace.append(Object) 26 | else: 27 | if namespace in namespaces: 28 | namespaces[namespace].append(Object) 29 | else: 30 | namespaces[namespace] = [Object] 31 | code = "" 32 | for Object in globalnamespace: 33 | hasfields = False 34 | for field in Object["fields"]: 35 | if field["hasoffset"]: 36 | if "__" not in field["name"] and "." not in field["name"] and "<" not in field["name"]: 37 | hasfields = True 38 | break 39 | hasmethods = False 40 | for method in Object["methods"]: 41 | if method["hasoffsetdata"]: 42 | if "__" not in method["name"] and "." not in method["name"] and "<" not in method["name"]: 43 | hasmethods = True 44 | break 45 | if "<" not in Object["name"] and "__" not in Object["name"] and "." not in Object["name"] and (hasfields or hasmethods): 46 | name = Object["name"] 47 | code += f"namespace {name}{{" 48 | usednames = {} 49 | if hasfields: 50 | code += "\n\t//Fields" 51 | for field in Object["fields"]: 52 | if field["hasoffset"]: 53 | if "__" not in field["name"] and "." not in field["name"] and "<" not in field["name"]: 54 | if FIELDPREFIXES: 55 | if field["name"].startswith("m_"): 56 | name = field["name"] 57 | elif field["name"].startswith("_"): 58 | name = "m" + field["name"] 59 | else: 60 | name = "m_" + field["name"] 61 | else: 62 | name = field["name"] 63 | if name in usednames: 64 | usednames[name] += 1 65 | name += str(usednames[name]) 66 | else: 67 | usednames[name] = 1 68 | offset = field["offset"] 69 | code += f"\n\t\tconst uint64_t {name} = {offset}" 70 | if hasmethods: 71 | code += "\n\t//Methods" 72 | for method in Object["methods"]: 73 | if method["hasoffsetdata"]: 74 | if "__" not in method["name"] and "." not in method["name"] and "<" not in method["name"]: 75 | if method["isconstructor"] and SPECIALCONSTRUCTORNAMES: 76 | name = "ctor" 77 | elif method["isstaticconstructor"] and SPECIALCONSTRUCTORNAMES: 78 | name = "cctor" 79 | else: 80 | name = method["name"] 81 | if name in usednames: 82 | usednames[name] += 1 83 | name += str(usednames[name]) 84 | else: 85 | usednames[name] = 1 86 | offset = method["offset"] 87 | code += f"\n\t\tconst uint64_t {name} = {offset}" 88 | code += "\n" 89 | for namespace, objects in namespaces.items(): 90 | code += f"namespace {namespace}{{" 91 | for Object in objects: 92 | if "__" not in Object["name"] and "." not in Object["name"] and (hasfields or hasmethods): 93 | name = Object["name"] 94 | code += f"\n\tnamespace {name}{{" 95 | usednames = {} 96 | if hasfields: 97 | code += "\n\t\t//Fields" 98 | for field in Object["fields"]: 99 | if field["hasoffset"]: 100 | if "__" not in field["name"] and "." not in field["name"] and "<" not in field["name"]: 101 | if FIELDPREFIXES: 102 | if field["name"].startswith("m_"): 103 | name = field["name"] 104 | elif field["name"].startswith("_"): 105 | name = "m" + field["name"] 106 | else: 107 | name = "m_" + field["name"] 108 | else: 109 | name = field["name"] 110 | if name in usednames: 111 | usednames[name] += 1 112 | name += str(usednames[name]) 113 | else: 114 | usednames[name] = 1 115 | offset = field["offset"] 116 | code += f"\n\t\t\tconst uint64_t {name} = {offset}" 117 | if hasmethods: 118 | code += "\n\t\t//Methods" 119 | for method in Object["methods"]: 120 | if method["hasoffsetdata"]: 121 | if "__" not in method["name"] and "." not in method["name"] and "<" not in method["name"]: 122 | if method["isconstructor"] and SPECIALCONSTRUCTORNAMES: 123 | name = "ctor" 124 | elif method["isstaticconstructor"] and SPECIALCONSTRUCTORNAMES: 125 | name = "cctor" 126 | else: 127 | name = method["name"] 128 | if name in usednames: 129 | usednames[name] += 1 130 | name += str(usednames[name]) 131 | else: 132 | usednames[name] = 1 133 | offset = method["offset"] 134 | code += f"\n\t\t\tconst uint64_t {name} = {offset}" 135 | code += "\n\t}" 136 | code += "\n}\n\n" 137 | code = code[0:-4] # Lazy way to remove newlines at end 138 | with open(r"C:\Users\zachy\OneDrive\Documents\Work\Temp\Python Temps\offsetstest.cpp", 'w', encoding=encoding) as f: 139 | f.write(code) 140 | print(code) 141 | -------------------------------------------------------------------------------- /Ideas/Outline.txt: -------------------------------------------------------------------------------- 1 | Input: 2 | Put in apk (il2cpp or mono), dll or dummydll (all or just one), metadata and libil2cpp.so (for il2cpp) or dump.cs (for il2cpp) files of obfuscated and unobfuscated, and it will deobfuscate. 3 | Version chaining is also supported. This is where you add more than 2 game versions, and specify the order 4 | (can be auto-detected via version detection). UnityDeobfuscator deobfuscates the versions in order, using the 5 | deobfuscated last version as the deobfuscated version for the next obfuscated version. This minimizes differences between game versions, allowing for more accurate deobfuscation. 6 | 7 | Output: 8 | Can generate json output, txt output, deobfuscated dll(s), deobfuscated apk, deobfuscated dump.cs, and more! 9 | 10 | Options: 11 | You can create multiple packages of options and give them names. This is useful if you want to have different 12 | settings (such as the trust unity types option) for different games. You can also import and export options 13 | file (they are stored as .json files) 14 | -Version auto-detection (scans game apk, binary, dump.cs, or whatever else is input to find game version) 15 | -Multi-select deobfuscation methods 16 | -Enable and disable certain things to balance speed and accuracy 17 | -Options for what can and cannot change - data types, whether class has a certain method, data type of class, 18 | etc. 19 | -Option to add your own data types and unity types (in case the project missed something, or unity implements new types) 20 | -Multi-select plugins to auto-detect and use in deobfuscate 21 | -Multi-select plugins to auto-detect and deobfuscate 22 | -Comes with a few known plugins like Photon and CodeStage, but you can add more by adding apk (il2cpp or mono), dll or dummydll (all or just one), 23 | metadata and libil2cpp.so (for il2cpp) or dump.cs (for il2cpp) files and specifying the namespace(s) used by 24 | the plugin; the plugin will then be processed and the deobfuscation data will be added to the local database. 25 | You can also manually add custom types to trust 26 | -Specify certain namespaces, classes, or methods to exclude from deobfuscation (in unobfuscated, obfuscated, or both) 27 | -Trust names (if two items have same name, they are a match in deobfuscation) 28 | -Only deobfuscate names with certain naming regex or chars 29 | -Ignore names with certain naming regex or chars when deobfuscating 30 | -Configure output 31 | -Trust Unity types (Vector3, Quaternion, string, etc.) 32 | -Force or auto-detect unity version 33 | -Trust user-defined types 34 | -Trust plugin types 35 | -Trust order (even if the game does not scramble methods / fields / properties, this is still not recommended 36 | because games may add, remove, and delete fields / methods / properties). 37 | -Trust operators (if method name starts with "op_", then it is an operator) 38 | -Configure deobfuscation tolerance 39 | -Configure how many candidates to accept (If there are more candidates than this number, only the best matches 40 | will be kept.) 41 | -Configure confidence to remove (if a match exceeds this confidence, it will be excluded 42 | when processing other items) 43 | -Choose between only take best match or take all matches 44 | -Change internal settings like size weight and field weight 45 | -Configure how to order classes / fields / methods etc.: Keep order of obfuscated, reorder to match unobfuscated, 46 | sort alphabetically, etc. 47 | -Enable / disable type weight 48 | -Configure type weight (eg: whether method is virtual may be very heavy) 49 | 50 | Deobfuscation Methods: 51 | # - Brute Force Deobfuscation (Comparitive Deobfuscation) : A deobfuscation method that works by comparing unobfuscated and obfuscated dump.cs. It finds the class or member etc. by name. Then, it takes the class, and replaces the names and dynamic values with a certain string ('offset','methodname','classname','comment',etc.). This way, things such as data types, params, # of methods and fields, etc. can be compared. It then converts this into lists of methods, and each method has its method type, and the method params. Same is done on fields and class itself. There is a strikes system with a customizable strictness. It can automatically adapt by narrowing down the perfect strictness by moving it up and down and seeing how little results it can get while still getting results (the toggleable smart mode, changeable in settings or function parameters). This method takes a long time. 52 | # - Regex search deobfuscation (String search deobfuscation): This method is faster, simpler, and better. Both are useful though. This method finds unchanging string (such as and private readonly Dictionary<) by searching strings until it finds one with low occurences (like 300 or less), and it finds the one with the lowest. It can also remove names / dynamic values and uses regex search. It can also use the renamer to remove changing things. Then it sees if this comes up in obfuscated. It uses brute force deobfuscation on the resulting classes methods etc. This is done until the right one is found. 53 | # - Mutual Name Deobfuscation (Cross Reference Deobfuscation): This deobfuscation method is kind of like string search deobfuscation. It searches for the name you want to deobfuscate and finds other instances of the name, either as parameters in methods, methods with the same name in other classes, or fields with the same name in other classes. It tries to find one of these where the method or class is unobfuscated, or known through previous deobfuscation. Then, it goes to this class and used brute force deobfuscation to find the right method or field. 54 | # - String Count Deobfuscation: This deobfuscation method is kind of like regex search and mutual name deobfuscation. It compares the number of occurrences of a name, string, or regex between game versions. 55 | # - Same Name Deobfuscation: In some games, including ones obfuscated with Beebyte Obfuscator, all occurrences of the deobfuscated name are replace with the obfuscated name (like find and replace). For example, if you have a field called health in both your player and vehicle games, both fields will be changed to the same name. This deobfuscation mode can be forced by the user, or detected by the program when it finds this out via another form of deobfuscation. When activated, this mode simply finds and replaces text. 56 | # - Pattern Search Deobfuscation (AOB Deobfuscation): This deobfuscation methods generates aob for an unobfuscated class, method, field, etc., then searches for the aob in the new game version. 57 | # - Order Deobfuscation: This deobfuscation method is not recommended. Inserting methods / fields will break it. 58 | It relies on fields / methods / classes etc. not being scrambled. For example: 59 | Method b comes two after method a 60 | You know which method method a is 61 | Therefore, you can assume method b is the method two methods after method a. 62 | # - Relative Offset Deobfuscation: This deobfuscation method is not recommended. It relies on 63 | fields / methods / not being scrambled. Inserting methods / fields will break it. 64 | This deobfuscation method uses relative field / method offsets. For example: 65 | Width's field offset is 0x1 66 | Length's field offset is 0x9 67 | You know the field offset of width in the obfuscated version is 0x33 68 | Therefore, you can assume the length field is the field with the field offset of 0x41. 69 | # - Override Matching Deobfuscation: If a class is derived, but parts or all of the base class's methods are nknown, then overridden methods can be deobfuscated using process of elimination. For example, if class B inherits from class A, which has one virtual method of type int, and class B has one overridden method of type int, it can be concluded that both methods are the same. This method works both ways - for deobfuscating derived classes using base classes, and deobfuscation base classes using derived classes. 70 | # - Cross Reference Deobfuscation: This method searches for uses of a class, method, etc. For example, it determines if any classes are derived from a class. It also searches the code (libil2cpp.so binary) to find references to classes, namespaces, fields, methods, etc. 71 | # - Same Namespace Deobfuscation: This method determines what namespaces a class, method, etc. uses by searching 72 | the code (libil2cpp.so binary), dll, or dummy dll. It then uses this to compare. 73 | # - Code Deobfuscation: This method compares code. If the game is mono, the code is already provided. If the game is libil2cpp.so, it searches the code (libil2cpp.so binary). 74 | # - CPP2IL Deobfuscation: This method dumpcs the game with CPP2IL (or uses an existing dump) and compares data from the CPP2IL dump. 75 | # - Nested Object Deobfuscation: If a class, struct, enum, etc. is nested in another one, the outer one can be deobfuscated if the inner one is known. If the outer is deobfuscated, it makes it much easier to deobfuscated the inner one. This nesting hierarchy can span multiple layers of nesting, and each layer can help deobfuscated the other layers. 76 | # - Relative offset deobfuscation: This method will only work if objects are not shuffled during obfuscation. If two objects are close together, then the relative offset between objects (or their fields, methods, etc.) should be the same between updates. This will break if an object is inserted in between objects, or if an object between them is moved or removed. It will also break if methods, fields, etc. are added, removed, or changed. -------------------------------------------------------------------------------- /Python/old.py: -------------------------------------------------------------------------------- 1 | # noinspection PyUnresolvedReferences 2 | import copy 3 | # noinspection PyUnresolvedReferences 4 | from typing import * 5 | import string 6 | from functools import * 7 | 8 | 9 | def removeattributes(thisobject, toremovenewlines=False): 10 | """ 11 | Function has been removed 12 | """ 13 | raise NotImplementedError("removeattributes function has been removed.\ 14 | Attributes are removed from the whole dumpcs, so the call to removeattributes can be deleted.") 15 | 16 | 17 | def removewhitespace(fullstr, beginning=True, end=True, allwhitespace=False): 18 | """ 19 | Function has been removed 20 | """ 21 | raise NotImplementedError("removewhitespace function has been removed.\ 22 | To trim whitespace, use the trim function. To remove all whitespace, use the removeallwhitespace funnction.") 23 | 24 | def getobjects(*args,**kwargs): 25 | """ 26 | Function has been removed 27 | """ 28 | raise NotImplementedError("getobjects function has been removed.\ 29 | To get objects, use dumpcs_getobjects directly on dumpcs.") 30 | 31 | def getfullobjects(*args,**kwargs): 32 | """ 33 | Function has been removed 34 | """ 35 | raise NotImplementedError("getfullobjects function has been removed.\ 36 | To get objects, use dumpcs_getobjects directly on dumpcs.") 37 | 38 | 39 | def readaftersubstring(s: str,sub: str) -> str: 40 | #Done 41 | """ 42 | Docs Not Done! 43 | 44 | This function is based off of 45 | https://stackoverflow.com/questions/12572362/how-to-get-a-string-after-a-specific-substring/57064170#57064170 46 | Possible Improvements: 47 | 1. Directly returning instead of using suffix variable may be faster, but sacrifices 48 | readability and simplicity 49 | """ 50 | prefix, success, suffix = s.partition(sub) 51 | if not success: 52 | suffix = prefix 53 | return(suffix) 54 | 55 | 56 | def readbeforesubstring(s: str,sub: str) -> str: 57 | #Done 58 | """ 59 | Docs Not Done! 60 | 61 | This function is based off of 62 | https://stackoverflow.com/questions/12572362/how-to-get-a-string-after-a-specific-substring/57064170#57064170 63 | 64 | Possible Improvements: 65 | 1. Directly returning instead of using prefix variable may be faster, but sacrifices 66 | readability and simplicity 67 | """ 68 | prefix, success, suffix = s.partition(sub) 69 | if not success: 70 | prefix = suffix 71 | return (prefix) 72 | 73 | 74 | def removesubstring(s: str, sub: str) -> str: 75 | #Done 76 | """ 77 | Possible Improvements: 78 | 79 | Remove one substring from a string 80 | 81 | Example: 82 | String: "Removing Substrings" 83 | Sub: "ing" 84 | Return: "Remov Substrs" 85 | 86 | Arguments: 87 | s: string to remove substring from 88 | sub: substring to remove from string 89 | 90 | Return: 91 | string with substring removed 92 | """ 93 | return s.replace(sub, "") 94 | 95 | 96 | def removesubstrings(s: str, subs: list[str]) -> str: 97 | #Done 98 | """ 99 | Possible Improvements: 100 | 101 | Remove multiple substring from a string, in order of list 102 | 103 | Example: 104 | String: "Removing Substrings" 105 | Subs: ["e","in","ing"] 106 | Return: "Rmovg Substrgs" 107 | 108 | Arguments: 109 | s: string to remove substrings from 110 | subs: list of substrings to remove from string 111 | 112 | Return: 113 | string with substrings removed 114 | """ 115 | for sub in subs: 116 | s = removesubstring(s, sub) 117 | return s 118 | 119 | 120 | def replacesubstring(s: str, sub: str, replace: str) -> str: 121 | #Done 122 | """ 123 | Doc Not Done 124 | """ 125 | return s.replace(sub, replace) 126 | 127 | 128 | def replacesubstrings(s: str, subs: list[str], replace: str) -> str: 129 | #Done 130 | """ 131 | Doc Not Done 132 | """ 133 | for sub in subs: 134 | s = replacesubstring(s, sub, replace) 135 | return s 136 | 137 | 138 | def removeallwhitespace(s: str) -> str: 139 | #Done, but maybe could be optimized 140 | #NOTE: Function is named removeallwhitespace because old function removewhitespace 141 | #was for trimming. Once all functions use trim instead, this function can be renamed 142 | #back to removewhitespace. 143 | """ 144 | Possible Improvements: 145 | Make whitespace a constant instead of unpacking each time function is called 146 | 147 | Removes all whitespace from a string 148 | Does not just trim leading and trailing. For that, use the trim function. 149 | 150 | Example: 151 | String: " Whitespace will be removed from 152 | this string" 153 | Return: "Whitespacewillberemovedfromthisstring" 154 | 155 | Arguments: 156 | s: string to remove whitespace from 157 | 158 | Return: 159 | string with whitespace removed 160 | """ 161 | # Should have a constant instead of unpacking 162 | # string.whitespace each time 163 | _whitespace = [*string.whitespace] 164 | return removesubstrings(s, _whitespace) 165 | 166 | 167 | def removeblanklines(s: str, toremovewhitespacelines=True) -> str: 168 | #Not Done 169 | """ 170 | Possible Improvements: 171 | 172 | Removes all blank lines from a string 173 | 174 | Example: 175 | String: " 176 | blank 177 | lines will be 178 | 179 | removed from 180 | 181 | thisstr 182 | ing" 183 | toremovewhitespacelines: true 184 | Return: "blank 185 | lines will be 186 | removed from 187 | thisstr 188 | ing" 189 | 190 | Arguments: 191 | s: string to remove blank lines from 192 | toremovewhitespacelines: whether to remove lines with only whitespace (eg: " ") 193 | 194 | Return: 195 | string with blank lines removed 196 | """ 197 | if toremovewhitespacelines: 198 | raise NotImplementedError("removeblanklines with toremovewhitespacelines is not done") 199 | else: 200 | return replacesubstring(s, "\n\n", "\n") 201 | 202 | 203 | def iswhitespace(s: str,totreatblankaswhitespace=True) -> bool: 204 | #Done 205 | """ 206 | Possible Improvements: 207 | 208 | Detects if a string is all whitespace 209 | Works on strings with any length, including 0 210 | 211 | Example: 212 | String: " 213 | " 214 | Return: true 215 | 216 | String: " hello world! 217 | hi" 218 | Return: false 219 | 220 | Arguments: 221 | s: string to check for whitespace 222 | totreatblankaswhitespace: whether to treat "" as whitespace 223 | 224 | Return: 225 | bool whether string is all whitespace 226 | """ 227 | if s == "": 228 | if totreatblankaswhitespace: 229 | return(True) 230 | else: 231 | return(False) 232 | else: 233 | return(s.isspace()) 234 | 235 | 236 | def trim(s: str, leading=True, trailing=True) -> str: 237 | """ 238 | Possible Improvements: 239 | 240 | Trims whitespace from a string 241 | 242 | Example: 243 | String: " hello world! 244 | " 245 | Leading: true 246 | Trailing: true 247 | Return: "hello world!" 248 | 249 | Arguments: 250 | s: string to trim whitespace from 251 | leading: whether to trim leading whitespace 252 | trailing: whether to trim trailing whitespace 253 | 254 | Return: 255 | string with trimmed whitespace 256 | """ 257 | if leading and trailing: 258 | return s.strip() 259 | elif leading: 260 | return s.lstrip() 261 | elif trailing: 262 | return s.rstrip() 263 | else: 264 | return s 265 | 266 | def getwords(s: str) -> list[str]: 267 | # Done 268 | """ 269 | Possible Improvements: 270 | 1. Creating a new list is inefficient, modifying existing list would be ideal 271 | 2. Directly using s.split() instead of using words variable may be faster, but 272 | sacrifices readability and simplicity and simplicity 273 | 274 | 275 | Splits a string into a list of words 276 | Treats any type of whitespace as a word delimiter, including new lines and tabs 277 | Treats chunks of whitespace as delimiters (ex: 2 spaces has the same effect as 1 space) 278 | 279 | Example: 280 | String: "the quick 281 | brown fox 282 | 283 | abcdefg " 284 | Return: ["the","quick","brown","fox","abcdefg"] 285 | 286 | Arguments: 287 | s: string to split into words 288 | 289 | Return: 290 | list of the string's words 291 | """ 292 | return s.split() 293 | 294 | 295 | def wordstostring(words: list[str],totrimwords=False,toignoreblankwords=True,toignorewhitespacewords=False,concatenator=" ") -> str: 296 | #Done 297 | """ 298 | Possible Improvements: 299 | 1. Creating a new list is inefficient, modifying existing list would be ideal 300 | 301 | joins a list of words into a string 302 | 303 | Example: 304 | words: ["hello","a","b"," ","cd\n","","hey",""] 305 | concatenator: " " 306 | toignoreblankwords: false 307 | totrimwords: false 308 | toignorewhitespacewords: false 309 | Return: "hello a b cd 310 | hey " 311 | 312 | Arguments: 313 | words: list of words to join into a string 314 | toignoreblankwords: whether to concatenate or ignore blank words 315 | totrimwords: whether to trim leading and trailing whitespace from each word 316 | (only leading / only trailing whitespace is not supported) 317 | toignorewhitespacewords: whether to concatenate or ignore words with only whitespace 318 | concatenator: the string to put in between words (default space) 319 | 320 | Return: 321 | string containing all the words concatenated by concatenator (default space) 322 | """ 323 | if not(toignoreblankwords or toignorewhitespacewords or totrimwords): 324 | return concatenator.join(words) 325 | else: 326 | wordstoconcatenate = [] 327 | for word in words: 328 | if iswhitespace(word) and toignorewhitespacewords: 329 | continue 330 | if totrimwords: 331 | word = trim(word, True, True) 332 | if not (word == "" and toignoreblankwords): 333 | wordstoconcatenate.append(word) 334 | return(concatenator.join(wordstoconcatenate)) 335 | 336 | 337 | def getlines(s: str, toremoveblanklines=False, totrimlines=False) -> list[str]: 338 | # Done, but maybe could be optimized 339 | """ 340 | Possible Improvements: 341 | 1. Creating a new list is inefficient, modifying existing list would be ideal 342 | 2. Directly using s.splitlines() instead of using lines variable may be faster, 343 | but sacrifices readability and simplicity and simplicity 344 | 345 | Splits a string into a list of lines 346 | 347 | Example: 348 | String: "a 349 | 350 | b 351 | 352 | c " 353 | toremoveblanklines: true 354 | totrimlines: true 355 | Return: ["a","b","c"] 356 | 357 | Arguments: 358 | s: string to split into lines 359 | toremoveblanklines: whether to ignore lines that are blank or only whitespace 360 | totrimlines: whether to trim leading and trailing whitespace from each line 361 | (only leading / only trailing whitespace is not supported) 362 | 363 | Return: 364 | list of the string's lines 365 | """ 366 | lines = s.splitlines() 367 | if toremoveblanklines or totrimlines: 368 | newlines = [] 369 | for line in lines: 370 | if totrimlines: 371 | line = trim(line, True, True) 372 | if not (iswhitespace(line) and toremoveblanklines): 373 | newlines.append(line) 374 | return newlines 375 | else: 376 | return lines 377 | 378 | 379 | def linestostring(lines: list[str],totrimlines=True,toignoreblanklines=False) -> str: 380 | # Done 381 | """ 382 | Possible Improvements: 383 | 384 | joins a list of lines into a string 385 | 386 | Example: 387 | lines: ["a","","b"," ","cd",""] 388 | toignoreblanklines: False 389 | totrimlines: False 390 | Return: "a 391 | 392 | b 393 | 394 | cd 395 | " 396 | 397 | Arguments: 398 | lines: list of lines to join into a string 399 | toignoreblanklines: whether to concatenate or ignore lines that are blank or only whitespace 400 | totrimlines: whether to trim leading and trailing whitespace from each line 401 | (only leading / only trailing whitespace is not supported) 402 | 403 | Return: 404 | string containing all the lines concatenated by new line 405 | """ 406 | return wordstostring(lines,totrimlines,toignoreblanklines,toignoreblanklines,"\n") 407 | 408 | 409 | def dumpcs_isvalid(dumpcs: str) -> bool: 410 | #Not done 411 | """ 412 | Bad detection, needs proper algorithm 413 | 414 | Determines whether a dumpcs file is valid 415 | All dumpcs files entered should be valid, but of course they must be checked. 416 | Note: This function only performs a short check on the file as a whole. 417 | On the other hand, the dumpcs_checkformat function analyzes the whole thing and is very picky . 418 | 419 | Arguments: 420 | dumpcs: the string of the dumpcs file 421 | 422 | Return: 423 | bool whether the dumpcs is valid 424 | """ 425 | # return "// Image" in dumpcs and "// RVA: 0x" in dumpcs and "// Namespace:" in dumpcs\ 426 | # and " TypeDefIndex: " in dumpcs 427 | raise NotImplementedError("Dumpcs_isvalid function needs improvement") 428 | if len(dumpcs) == 0: 429 | return False 430 | return True 431 | 432 | 433 | def dumpcs_checkformat(dumpcs: str) -> list[str]: 434 | #Not done 435 | """ 436 | Scan dump.cs for unexpected formatting 437 | Returns list of unexpected formatting errors 438 | 439 | Arguments: 440 | dumpcs: the string of the dumpcs file 441 | 442 | Return: 443 | List of errors with the line number and error 444 | """ 445 | raise NotImplementedError("Dumpcs_checkformat function not completed") 446 | 447 | 448 | def dumpcs_hasattributes(dumpcs: str) -> bool: 449 | #Not done 450 | """ 451 | Bad detection, needs proper algorithm 452 | 453 | Determines whether a dumpcs file has attributes 454 | 455 | Arguments: 456 | dumpcs: the string of the dumpcs file 457 | 458 | Return: 459 | bool whether the dumpcs has attributes 460 | """ 461 | raise NotImplementedError("Dumpcs_hasattributes function not completed") 462 | #return "[CompilerGeneratedAttribute]" in dumpcs 463 | 464 | 465 | def dumpcs_constructor(path: str, attributeswarning=False) -> str: 466 | #Done, but needs improvement 467 | """ 468 | Possible Improvements: 469 | 1. No need to warn about attributes as they should be removed automatically. 470 | However, I want to keep this code commented out and not delete it in case I 471 | change my mind later. 472 | 2. Setting dumpcs variable after removing attributes makes code more readable and concise, 473 | but is less inefficient than directing passing result of dumpcs_removeattributes. 474 | In addition, attributes must be removed *before* dumpcs is checked for format errors 475 | 3. Does try except clause make a difference? IDK whether to keep it. 476 | 477 | Loads and initializes a dumpcs file 478 | 479 | Arguments: 480 | path: the file path of the dumpcs file 481 | 482 | Returns: 483 | string containing the contents of the dump.cs file 484 | """ 485 | #Does this try except clause make a difference? IDK whether to keep it 486 | #try: 487 | #dumpcs = filehandler.read_file(path) 488 | #raise NotImplementedError("filehandler.read_file function does not exist") 489 | #except Exception as exception: 490 | #raise exception 491 | # dumpcs = filehandler.read_file(path) 492 | raise NotImplementedError("filehandler.read_file function does not exist") 493 | if not(dumpcs_isvalid(dumpcs)): 494 | #raise exceptions.errors.invaliddumpcs(path) 495 | raise NotImplementedError("exceptions.errors.invaliddumpcs function does not exist") 496 | #No need to warn about attributes as they should be removed automatically 497 | #if attributeswarning and dumpcs_hasattributes(dumpcs): 498 | #exceptions.warnings.dumpcsattributeswarning(path) 499 | if dumpcs_hasattributes(dumpcs): 500 | dumpcs = dumpcs_removeattributes(dumpcs) 501 | formaterrors = dumpcs_checkformat(dumpcs) 502 | if formaterrors != []: 503 | #exceptions.warnings.unexpecteddumpcsformatearly(path,formaterrors) 504 | raise NotImplementedError("exceptions.warnings.unexpecteddumpcsformatearly function does not exist") 505 | return dumpcs 506 | 507 | 508 | def dumpcs_removeattributes(dumpcs: str) -> str: 509 | #Not done 510 | """ 511 | Possible Improvements: 512 | 1. Creating a new list of lines is inefficient, modifying existing list would be ideal 513 | 2. Directly using getlines() instead of using lines variable may be faster, but sacrifices readability and simplicity 514 | 515 | Removes attributes from a dumpcs file 516 | Does not process attributes, only removes them 517 | Does not remove blank lines yet 518 | 519 | Arguments: 520 | dumpcs: the string of the dumpcs file 521 | 522 | Returns: 523 | string containing dumpcs content with attributes removed 524 | """ 525 | lines = getlines(dumpcs, False, False) 526 | newlines = [] 527 | for line in lines: 528 | #Trim leading whitespace from line 529 | trimmedline = trim(line, True, False) 530 | # If the first non-whitespace character on the line is a square bracket, 531 | # this means the line is an attribute 532 | if trimmedline[0] != "[": 533 | #The line is not an attribute line, so keep it 534 | newlines.append(line) 535 | return linestostring(newlines) 536 | 537 | 538 | def dumpcsobject_getnamespace(content): 539 | # Not Done 540 | """ 541 | Docs Not Done! 542 | Possible Improvements: 543 | 1. Using string.find "\n" and taking a substring is faster than splitting the object into lines, 544 | but sacrifices readability and simplicity 545 | 2. Directly using getlines() instead of using lines variable may be faster, but sacrifices 546 | readability and simplicity 547 | 3. Directly returning instead of using variable may be faster, but sacrifices 548 | readability and simplicity 549 | 4. Directly using lines[0] instead of using namespaceline variable may be faster, but sacrifices 550 | readability and simplicity 551 | 552 | Gets the namespace of a dumpcs object 553 | """ 554 | lines = lru_cache(getlines(content),maxsize=10, typed=False) 555 | namespaceline = lines[0] 556 | if namespaceline == "// Namespace: ": 557 | namespace = "" 558 | else: 559 | namespace = lru_cache(readaftersubstring(namespaceline,"// Namespace: "),maxsize=2048, typed=False) 560 | return(namespace) 561 | 562 | def dumpcsobject_gettype(content): 563 | # Not Done 564 | """ 565 | Docs Not Done! 566 | Possible Improvements: 567 | 1. Using string.find "\n" and taking a substring is faster than splitting the object into lines, 568 | but sacrifices readability and simplicity 569 | 2. Directly using getlines() instead of using lines variable may be faster, but sacrifices 570 | readability and simplicity 571 | 3. Directly returning instead of using type variable and breaking loop out of loop may be faster, 572 | but sacrifices readability and simplicity 573 | 4. Directly using lines[1] instead of using objectdeclarationline variable may be faster, but sacrifices 574 | readability and simplicity 575 | 5. Object types should be a constant 576 | 577 | Gets the type (struct, class, enum, or interface) of a dumpcs object 578 | """ 579 | objecttypes = set("class,struct,interface,enum") # should be a constant! 580 | lines = lru_cache(getlines(content),maxsize=10, typed=False) 581 | objectdeclarationline = lines[1] 582 | words = lru_cache(getwords(objectdeclarationline),maxsize=3, typed=False) 583 | for word in words: 584 | if word in objecttypes: 585 | return(word) 586 | # Object type (class, struct, enum, interface) not found 587 | #exceptions.errors.unexpecteddumpcsformat(f"Could not find type of object:\n{content}") 588 | raise NotImplementedError("exceptions.errors.unexpecteddumpcsformat function does not exist") 589 | return(None) 590 | 591 | def dumpcsobject_getdatatype(content): 592 | # Not Done 593 | """ 594 | Docs Not Done! 595 | Possible Improvements: 596 | 1. Using string.find "\n" and taking a substring is faster than splitting the object into lines, 597 | but sacrifices readability and simplicity 598 | 2. Directly using getlines() instead of using lines variable may be faster, but sacrifices 599 | readability and simplicity 600 | 3. Directly returning instead of using data type variable and breaking loop out of loop may be faster, 601 | but sacrifices readability and simplicity 602 | 4. Directly using lines[1] instead of using objectdeclarationline variable may be faster, but sacrifices 603 | readability and simplicity 604 | 5. Object types should be a constant 605 | 6. Using a string for data type instead of using a list and concatenating it into a string may be faster, 606 | but sacrifices readability and simplicity 607 | 608 | Gets the data type of a dumpcs object 609 | """ 610 | objecttypes = set("class,struct,interface,enum") # should be a constant! 611 | lines = lru_cache(getlines(content),maxsize=10, typed=False) 612 | objectdeclarationline = lines[1] 613 | words = lru_cache(getwords(objectdeclarationline),maxsize=3, typed=False) 614 | datatypewords = [] 615 | for word in words: 616 | if word in objecttypes: 617 | return(wordstostring(datatypewords)) 618 | else: 619 | datatypewords.append(word) 620 | # Object type (class, struct, enum, interface) not found 621 | #exceptions.errors.unexpecteddumpcsformat(f"Could not find type of object:\n{content}") 622 | raise NotImplementedError("exceptions.errors.unexpecteddumpcsformat function does not exist") 623 | return(None) 624 | 625 | def dumpcsobject_getname(content): 626 | # Not Done 627 | """ 628 | Docs Not Done! 629 | Possible Improvements: 630 | 1. Using string.find "\n" and taking a substring is faster than splitting the object into lines, 631 | but sacrifices readability and simplicity 632 | 2. Directly using getlines() instead of using lines variable may be faster, but sacrifices 633 | readability and simplicity 634 | 3. Directly returning instead of using data type variable and breaking loop out of loop may be faster, 635 | but sacrifices readability and simplicity 636 | 4. Directly using lines[1] instead of using objectdeclarationline variable may be faster, but sacrifices 637 | readability and simplicity 638 | 5. Object types should be a constant 639 | 6. Using a string for data type instead of using a list and concatenating it into a string may be faster, 640 | but sacrifices readability and simplicity 641 | 642 | Gets the data type of a dumpcs object 643 | """ 644 | objecttypes = set("class,struct,interface,enum") # should be a constant! 645 | lines = lru_cache(getlines(content),maxsize=10, typed=False) 646 | objectdeclarationline = lines[1] 647 | if lru_cache(dumpcsobject_isinherited(content),maxsize=4, typed=False): 648 | prefix = lru_cache(readbeforesubstring(" : ",objectdeclarationline),maxsize=3, typed=False) 649 | else: 650 | prefix = lru_cache(readbeforesubstring(" //",objectdeclarationline),maxsize=3, typed=False) 651 | words = lru_cache(getwords(prefix),maxsize=3, typed=False) 652 | name = words[len(words) - 1] 653 | return name 654 | 655 | def dumpcsobject_getbase(content): 656 | # Not Done 657 | """ 658 | Docs Not Done! 659 | Possible Improvements: 660 | 1. Using string.find "\n" and taking a substring is faster than splitting the object into lines, 661 | but sacrifices readability and simplicity 662 | 2. Directly using getlines() instead of using lines variable may be faster, but sacrifices 663 | readability and simplicity 664 | 3. Directly returning instead of using data type variable and breaking loop out of loop may be faster, 665 | but sacrifices readability and simplicity 666 | 4. Directly using lines[1] instead of using objectdeclarationline variable may be faster, but sacrifices 667 | readability and simplicity 668 | 5. Object types should be a constant 669 | 6. Using a string for data type instead of using a list and concatenating it into a string may be faster, 670 | but sacrifices readability and simplicity 671 | 672 | Gets the data type of a dumpcs object 673 | """ 674 | objecttypes = set("class,struct,interface,enum") # should be a constant! 675 | lines = lru_cache(getlines(content),maxsize=10, typed=False) 676 | objectdeclarationline = lines[1] 677 | if not(lru_cache(dumpcsobject_isinherited(content),maxsize=4, typed=False)): 678 | return(None) 679 | suffix = lru_cache(readaftersubstring(" : ",objectdeclarationline),maxsize=3, typed=False) 680 | base = lru_cache(readbeforesubstring(" //",objectdeclarationline),maxsize=3, typed=False) 681 | return base 682 | 683 | def dumpcsobject_gettypedefindex(content): 684 | # Not Done 685 | """ 686 | Docs Not Done! 687 | Possible Improvements: 688 | 1. Using string.find "\n" and taking a substring is faster than splitting the object into lines, 689 | but sacrifices readability and simplicity 690 | 2. Directly using getlines() instead of using lines variable may be faster, but sacrifices 691 | readability and simplicity 692 | 3. Directly returning instead of using data type variable and breaking loop out of loop may be faster, 693 | but sacrifices readability and simplicity 694 | 4. Directly using lines[1] instead of using objectdeclarationline variable may be faster, but sacrifices 695 | readability and simplicity 696 | 5. Object types should be a constant 697 | 6. Using a string for data type instead of using a list and concatenating it into a string may be faster, 698 | but sacrifices readability and simplicity 699 | 700 | Gets the data type of a dumpcs object 701 | """ 702 | objecttypes = set("class,struct,interface,enum") # should be a constant! 703 | lines = lru_cache(getlines(content),maxsize=10, typed=False) 704 | objectdeclarationline = lines[1] 705 | typedefindex = readaftersubstring(objectdeclarationline,"// TypeDefIndex: ") 706 | return typedefindex 707 | 708 | def dumpcsobject_isinherited(content): 709 | # Not Done 710 | """ 711 | Docs Not Done! 712 | Possible Improvements: 713 | 1. Using string.find "\n" and taking a substring is faster than splitting the object into lines, 714 | but sacrifices readability and simplicity 715 | 2. Directly using getlines() instead of using lines variable may be faster, but sacrifices 716 | readability and simplicity 717 | 3. Directly returning instead of using data type variable and breaking loop out of loop may be faster, 718 | but sacrifices readability and simplicity 719 | 4. Directly using lines[1] instead of using objectdeclarationline variable may be faster, but sacrifices 720 | readability and simplicity 721 | 5. Object types should be a constant 722 | 6. Using a string for data type instead of using a list and concatenating it into a string may be faster, 723 | but sacrifices readability and simplicity 724 | 725 | Gets the data type of a dumpcs object 726 | """ 727 | objecttypes = set("class,struct,interface,enum") # should be a constant! 728 | lines = lru_cache(getlines(content),maxsize=10, typed=False) 729 | objectdeclarationline = lines[1] 730 | return " : " in objectdeclarationline 731 | 732 | def dumpcs_getobjects(dumpcs: str, 733 | createtypemodels=True, 734 | objecttypefilter: Union[set[str],None]=None, 735 | namespacefilter: Union[set[str],None]=None, 736 | customfilter: Union[Callable,None]=None) -> list[dict]: 737 | #Not Done 738 | """ 739 | Docs Not Done! 740 | Possible Improvements: 741 | 1. Creating a new list is inefficient, modifying existing list would be ideal 742 | 2. Directly using dumpcs.split() instead of using fullobjects variable may be faster, 743 | but sacrifices readability and simplicity 744 | 3. Having different loops for namespacefilter, objecttypefilter, customfilter, and combinations 745 | is faster, but logner 746 | 4. Directly creating a dictionary may be faster than using variables for namespacefilter 747 | and objecttypefilter, but sacrifices readability and simplicity 748 | 5. To save memory and speed, maybe only add object base if it exists. However, this 749 | sacrifices readability and simplicity 750 | 6. Setting object's type model to None decreases errors and complexity, but 751 | takes up extra memory and sacrifices speed 752 | 7. Returning a dictionary of objects by path (namespace -> object) may be faster and simpler than 753 | returning a list of dictionaries (as to grab an object out of the list by its path, the list must be 754 | iterated through until a match is found), but a list is simpler, easier, and faster to create, 755 | process, and iterate over 756 | 8. Object delimiter should be a constant 757 | 758 | Parses dumpcs file into a list of objects 759 | Does not remove blank lines 760 | """ 761 | objectdelimiter = "// Namespace: " # Should be a constant 762 | # Sets are much faster than lists or tuples, so convert to them 763 | if type(objecttypefilter) != set: 764 | objecttypefilter = set(objecttypefilter) 765 | if type(namespacefilter) != set: 766 | namespacefilter = set(namespacefilter) 767 | # Split dumpcs by "// Namespace: ", which can be used to mark the start of each object 768 | fullobjects = dumpcs.split(objectdelimiter) 769 | if fullobjects == []: 770 | # If there aren't any objects in dumpcs (this is impossible, but just theoretically), 771 | # we can terminate the function now to keep it simple 772 | return [] 773 | # The split function will capture everything before the first object 774 | # since we split by the delimiter that starts objects, so delete that 775 | del fullobjects[0] 776 | # Build dictionary of objects from full objects 777 | objects = [] 778 | for fullobject in fullobjects: 779 | # Add "// Namespace: " back on, as string.split excludes the delimiter 780 | content = objectdelimiter + fullobject 781 | # Exit early on objecttypefilter or namespacefilter to save some work 782 | namespace = dumpcsobject_getnamespace(content) 783 | if namespacefilter is not None and not(namespace in namespacefilter): 784 | continue 785 | type = dumpcsobject_gettype(content) 786 | if objecttypefilter is not None and not (type in objecttypefilter): 787 | continue 788 | name = dumpcsobject_getname(content) 789 | datatype = dumpcsobject_getdatatype(content) 790 | isinherited = dumpcsobject_isinherited(content) 791 | if isinherited: 792 | base = dumpcsobject_getbase(content) 793 | else: 794 | base = None 795 | typedefindex = dumpcsobject_gettypedefindex(content) 796 | methods = dumpcsobject_getmethods(content) 797 | fields = dumpcsobject_getfields(content) 798 | properties = dumpcsobject_getproperties(content) 799 | Object = { 800 | "content": content, 801 | "name": name, 802 | "typedefindex": typedefindex, 803 | "type": type, 804 | "namespace": namespace, 805 | "datatype": datatype, 806 | "isinherited": isinherited, 807 | "methods": methods, 808 | "fields": fields, 809 | "properties": properties, 810 | "base": base, 811 | } 812 | # Now that we have all the object's data, we can check against custom filter. 813 | # This allows us to avoid creating the object's type model 814 | if customfilter is not None and not(customfilter(Object)): 815 | continue 816 | if createtypemodels: 817 | # Create type model from the object's data, then add it to the object 818 | typemodel = buildtypemodel(Object) 819 | Object["typemodel"] = typemodel 820 | else: 821 | Object["typemodel"] = None 822 | objects.append(Object) 823 | return(objects) -------------------------------------------------------------------------------- /Python/OldDeobfuscationFunctions.py: -------------------------------------------------------------------------------- 1 | def loaddumpcs(path,attributeswarning = True): 2 | global dumpcs 3 | dumpcs = read_file(path) 4 | if not("// Image" in dumpcs and "// RVA: 0x" in dumpcs and "// Namespace:" in dumpcs and " TypeDefIndex: " in dumpcs): 5 | invaliddumpcswarning(path) 6 | #if (attributeswarning and (contains("[CompilerGeneratedAttribute]",dumpcs))): #and (contains("[DebuggerBrowsableAttribute]",dumpcs))): 7 | #dumpcsattributeswarning(path) 8 | return(dumpcs) 9 | 10 | def getobjectof(index): 11 | index = int(index) 12 | if not(variableexists("dumpcs")): 13 | objectnotdeclarederror("dumpcs") 14 | return(None) 15 | if index > (len(dumpcs)- len("// Namespace: ")): #Impossible scenario, but ocd makes me put this here! 16 | return("") 17 | rangebehind = 0 18 | startpos = 0 19 | while startpos == 0: 20 | startpos = dumpcs.find("// Namespace: ",((index - rangebehind) - len("// Namespace: ")),(index - rangebehind)) + 1 21 | if (((index - rangebehind) - len("// Namespace: ")) < 1): #Not found - must be the beginning (shouldn't happen) 22 | startpos = 0 23 | return("") # no method 24 | rangebehind = rangebehind + 1 25 | endpos = dumpcs.find("// Namespace: ",startpos + len("// Namespace: "),len(dumpcs)) #find the next "Namespace: " after startpos 26 | if endpos == -1: #Not found - must be the last object 27 | endpos = len(dumpcs) #set to the end 28 | return (removeblanklines(substring(dumpcs,startpos,endpos),True,True)) #the object is between namespaces 29 | 30 | def getmethodof(index): 31 | index = int(index) 32 | if not(variableexists("dumpcs")): 33 | objectnotdeclarederror("dumpcs") 34 | return(None) 35 | if index > (len(dumpcs)- len("\n\n")): #Impossible scenario, but ocd makes me put this here! 36 | return("") 37 | rangebehind = 0 38 | startpos = 0 39 | while startpos == 0: 40 | startpos = dumpcs.find("\n\n",((index - rangebehind) - len("\n\n")),(index - rangebehind)) + 1 41 | if (((index - rangebehind) - len("\n\n")) < 1): #Not found - must be the beginning (shouldn't happen) 42 | startpos = 0 43 | return("") # no method 44 | rangebehind = rangebehind + 1 45 | endpos = dumpcs.find("\n\n",startpos + len("\n\n"),len(dumpcs)) #find the next \n after startpos 46 | if endpos == -1: #Not found - shouldn't be possible but we assume it is end of dump.cs 47 | endpos = len(dumpcs) #set to the end 48 | methodline = removeblanklines(substring(dumpcs,startpos,endpos)).strip() 49 | if ((contains(_offsetsuffix,methodline)) and (len(getlines(methodline))) == 1): #just method offset line 50 | rangebehind = -1 51 | endpos = 1 52 | while endpos == 1: 53 | endpos = dumpcs.find("\n\n",startpos + 3,(startpos + ((index - rangebehind) + len("\n\n")))) + 2 54 | if (((index - rangebehind) + len("\n\n")) > len(dumpcs)): #Not found - shouldn't be possible but we assume it is end of dump.cs 55 | endpos = len(dumpcs) #set to the end 56 | rangebehind = rangebehind - 1 57 | methodline = removeblanklines(substring(dumpcs,startpos,endpos)).strip() 58 | lines = getlines(methodline) 59 | lines[0] = lines[0].strip() #remove whitespace from the two lines 60 | lines[1] = lines[1].strip() 61 | methodline = linestostring(lines) 62 | if not((contains(_isoffsetstring,methodline)) and contains(_ismethodstring,methodline)): #It isn't a method 63 | return("") 64 | else: 65 | return(methodline) 66 | else: #method offset line and method type line, or not method 67 | lines = getlines(methodline) 68 | if len(getlines(methodline)) < 2: #error - must not be a method 69 | return("") 70 | lines[0] = lines[0].strip() #remove whitespace from the two lines 71 | lines[1] = lines[1].strip() 72 | methodline = linestostring(lines) 73 | if not(contains(_offsetsuffix,methodline)): #error - must not be a method 74 | return("") 75 | if not((contains(_isoffsetstring,methodline)) and contains(_ismethodstring,methodline)): #It isn't a method 76 | return("") 77 | return(methodline) 78 | 79 | def getfieldof(index): 80 | index = int(index) 81 | if not(variableexists("dumpcs")): 82 | objectnotdeclarederror("dumpcs") 83 | return(None) 84 | if index > (len(dumpcs)- len("\n")): #Impossible scenario, but ocd makes me put this here! 85 | return("") 86 | rangebehind = 0 87 | startpos = -1 88 | while startpos == -1: 89 | startpos = dumpcs.find("\n",((index - rangebehind) - len("\n")),len(dumpcs)) + 1 90 | if (((index - rangebehind) - len("\n")) < 1): #Not found - must be the beginning (shouldn't happen) 91 | startpos = 0 92 | return("") # no field 93 | endpos = dumpcs.find("\n",startpos + 1,len(dumpcs)) #find the next \n after startpos 94 | if endpos == -1: #Not found - shouldn't be possible but we assume it is end of dump.cs 95 | endpos = len(dumpcs) #set to the end 96 | thisfield = (substring(dumpcs,startpos,endpos)).strip() #field is between new lines 97 | if not(contains(_isfieldstring,thisfield)): #It isn't a field 98 | return("") 99 | return(thisfield) 100 | 101 | def getpropertyof(index): 102 | index = int(index) 103 | if not(variableexists("dumpcs")): 104 | objectnotdeclarederror("dumpcs") 105 | return(None) 106 | if index > (len(dumpcs)- len("\n")): #Impossible scenario, but ocd makes me put this here! 107 | return("") 108 | rangebehind = 0 109 | startpos = -1 110 | while startpos == -1: 111 | startpos = dumpcs.find("\n",((index - rangebehind) - len("\n")),len(dumpcs)) + 1 112 | if (((index - rangebehind) - len("\n")) < 1): #Not found - must be the beginning (shouldn't happen) 113 | startpos = 0 114 | return("") # no field 115 | endpos = dumpcs.find("\n",startpos + 1,len(dumpcs)) #find the next \n after startpos 116 | if endpos == -1: #Not found - shouldn't be possible but we assume it is end of dump.cs 117 | endpos = len(dumpcs) #set to the end 118 | thisproperty = (substring(dumpcs,startpos,endpos)).strip() #property is between new lines 119 | if not(contains(_ispropertystring,thisproperty)): #It isn't a property 120 | return("") 121 | return(thisproperty) 122 | 123 | 124 | def getlineof(index,text,removewhitespace = False): 125 | index = int(index) 126 | rangebehind = 0 127 | startpos = -1 128 | while startpos == -1: 129 | startpos = text.find("\n",((index - rangebehind) - len("\n")),len(text)) + 1 130 | if (((index - rangebehind) - len("\n")) < 1): #Not found - must be the beginning 131 | startpos = 0 132 | return("") # no field 133 | endpos = text.find("\n",startpos + 1,len(text)) #find the next \n after startpos 134 | if endpos == -1: #Not found - must be at the end 135 | endpos = len(text) #set to the end 136 | if removewhitespace: 137 | return((substring(text,startpos,endpos)).strip()) #this line is between new lines 138 | else: 139 | return((substring(text,startpos,endpos))) #this line is between new lines 140 | 141 | def offsettomethod(offset): 142 | if offset.startswith("0x"): 143 | offset = readafter(offset,"0x") 144 | if not(variableexists("dumpcs")): 145 | objectnotdeclarederror("dumpcs") 146 | return(None) 147 | offsetindex = dumpcs.find(_offsetprefix + offset + _offsetsuffix) 148 | if offsetindex == -1: #not found 149 | return("") 150 | else: 151 | return(getmethodof(offsetindex)) 152 | 153 | getmethodofoffset = offsettomethod #same thing, but different name 154 | 155 | def offsettofield(classname,offset): 156 | return("Not Done!") 157 | 158 | getfieldofoffset = offsettofield #same thing, but different name 159 | 160 | def getobjectofoffset(offset): 161 | if offset.startswith("0x"): 162 | offset = readafter(offset,"0x") 163 | if not(variableexists("dumpcs")): 164 | objectnotdeclarederror("dumpcs") 165 | return(None) 166 | offsetindex = dumpcs.find(_offsetprefix + offset + _offsetsuffix) 167 | return(getobjectof(offsetindex)) 168 | 169 | offsettoobject = getobjectofoffset #same thing, but different name 170 | 171 | 172 | def getnamespaces(objects): 173 | ## namespacenames = [] 174 | ## namespacecontent = [] #list of lists (each namespace has list of classes in it) 175 | namespaces = {} 176 | i = -1 177 | for thisobject in objects: 178 | i = i + 1 179 | if multipleof(i,1000): 180 | print(str(i) + "/" + str(len(bjects))) 181 | thisnamespacename = thisobject["Namespace"] 182 | if thisnamespacename in namespaces: 183 | namespaces[thisnamespacename].append(thisobject) 184 | else: 185 | namespaces[thisnamespacename] = [thisobject] 186 | return(namespaces) 187 | 188 | def getfullobjects(getshared = True,toremoveattributes = True,toremoveblanklines = True,toremoveallblanklines = False,returntuple = True): 189 | #Python won't let us declare an object global and set it twice in two different for loops, because it does not know that only one will ever run. So, we have to declare it globa at the start of the function as a workaround. 190 | global flagremovedblanklines 191 | fullobjects = dumpcs.split(_objectseparator) 192 | if len(fullobjects) > 0: 193 | del fullobjects[0] #classes start with "// Namespace: ", so namespace gets everything before it. This means the first one will always go 194 | if toremoveblanklines and not(toremoveattributes): #remove blank lines 195 | new = [] 196 | i = -1 197 | for thisitem in fullobjects: 198 | i = i + 1 199 | if multipleof(i,1000): 200 | print(str(i) + "/" + str(len(fullobjects))) 201 | if toremoveallblanklines: 202 | newitem = removeblanklines(thisitem,True,True,True) 203 | else: 204 | newitem = removeblanklines(thisitem) 205 | new.append(newitem) 206 | flagremovedblanklines = True 207 | fullobjects = new 208 | new = [] 209 | for thisitem in fullobjects: #Add seperator back on, as string.split excludes the seperator 210 | newitem = _objectseparator + thisitem 211 | new.append(newitem) 212 | fullobjects = new 213 | #fullobjects = tuple(map(lambda x: _objectseparator + x,fullobjects)) 214 | if toremoveattributes: #remove attributes 215 | new = [] 216 | i = -1 217 | for item in fullobjects: 218 | i = i + 1 219 | if multipleof(i,1000): 220 | print(str(i) + "/" + str(len(fullobjects))) 221 | newitem = removeattributes(item,toremoveblanklines) 222 | new.append(newitem) 223 | fullobjects = new 224 | #fullobjects = tuple(map(removeattributes,fullobjects)) 225 | global flagremovedattributes 226 | flagremovedattributes = True 227 | if toremoveblanklines: 228 | flagremovedblanklines = True 229 | if not(getshared): 230 | new = [] 231 | i = -1 232 | for thisitem in fullobjects: #Remove shared objects 233 | i = i + 1 234 | if multipleof(i,1000): 235 | print(str(i) + "/" + str(len(fullobjects))) 236 | if not(getisshared(thisitem)): 237 | new.append(thisitem) 238 | fullobjects = new 239 | #fullobjects = [thisitem for thisitem in fullobjects if not(getisshared(thisitem))] 240 | global flagremovedshared 241 | flagremovedshared = True 242 | if returntuple: 243 | return(tuple(fullobjects)) 244 | else: 245 | return(fullobjects) 246 | 247 | def removeattributes(thisobject,toremovenewlines = False): 248 | global flagremovedattributes 249 | if flagremovedattributes: 250 | return(thisobject) #attributes have already been removed! 251 | ## lines = getlines(thisobject,False,False) 252 | ## newlines = [] 253 | ## for thisline in lines: 254 | ## newline = removewhitespace(thisline,True,False,False) 255 | ## if ((letter(1,newline) == _attributestart) and (contains(_attributeend,newline))): 256 | ## if contains(_attributeend + " ",newline): 257 | ## newline = readafter(newline,_attributeend + " ") 258 | ## else: 259 | ## newline = readafter(newline,_attributeend) 260 | ## if not(newline == ""): #and not((checkforstringat(" " + _isoffsetstring,newline,1)) or (checkforstringat(_isoffsetstring,newline,1)))): # rva is only after we remove compiler generated etc., so it is useless 261 | ## if((checkforstringat(" " + _isoffsetstring,newline,1)) or (checkforstringat(_isoffsetstring,newline,1))): 262 | ## newlines.append("\n") 263 | ## newlines.append(newline) 264 | ## else: 265 | ## newline = thisline 266 | ## if not(toremovenewlines and (newline == "")): 267 | ## if (contains("// RVA: -1 Offset: -1",newline)): 268 | ## if (len(newlines) == 0): 269 | ## newlines.append(newline) 270 | ## else: 271 | ## if not((checkforstringat(" " + _isoffsetstring,newlines[len(newlines) - 1],1)) or (checkforstringat(_isoffsetstring,newlines[len(newlines) - 1],1))): 272 | ## newlines.append(newline) 273 | ## #else: 274 | ## #newlines[len(newlines) - 1] = newline 275 | ## else: 276 | ## if (len(newlines) == 0): 277 | ## newlines.append(newline) 278 | ## else: 279 | ## if not((checkforstringat(" " + _isoffsetstring,newlines[len(newlines) - 1],1)) or (checkforstringat(_isoffsetstring,newlines[len(newlines) - 1],1))): 280 | ## newlines.append(newline) 281 | ## else: 282 | ## newlines[len(newlines) - 1] = newline 283 | lines = getlines(thisobject,False,False) 284 | newlines = [] 285 | for thisline in lines: 286 | newline = removewhitespace(thisline,True,False,False) 287 | if ((letter(1,newline) == _attributestart) and (contains(_attributeend,newline))): #yes, purposely 1, not 0 - begins with tab (" [") 288 | if contains(_attributeend + " ",newline): 289 | newline = readafter(newline,_attributeend + " ") 290 | else: 291 | newline = readafter(newline,_attributeend) 292 | if (not(newline == "") and not((checkforstringat(" " + _isoffsetstring,newline,1)) or (checkforstringat(_isoffsetstring,newline,1)))): # rva is only after we remove compiler generated etc., so it is useless 293 | newlines = listadd(newline,newlines) 294 | else: 295 | newline = thisline 296 | if not(toremovenewlines and (newline == "")): 297 | newlines = listadd(newline,newlines) 298 | return(linestostring(newlines)) 299 | 300 | def getuserdefinedtype(thisobject): 301 | thisobject = removeattributes(thisobject) 302 | global isshared 303 | isshared = False 304 | userdefinedtypeofobject = "Other" #in case there are no lines or no words in line 2. not found - unknown structure, so unknown object 305 | lines = getlines(thisobject,False,False) 306 | words = getwords(item(_objecttypeline,lines)) #1st line is namespace, 2nd line describes object (abstract class, public enum, etc.) 307 | onword = 0 308 | for thisword in words: 309 | onword = onword + 1 310 | if onword > len(words): #not found - unknown structure, so unknown object. This should not happen! 311 | userdefinedtypeofobject = "Other" 312 | break 313 | if len(_userdefinedtypes) > 0: 314 | if thisword in _userdefinedtypes: 315 | userdefinedtypeofobject = thisword 316 | #isshared = (contains(".<",(item(2,lines))) or contains(" :",(item(2,lines))) or contains(">.",(item(2,lines)))) #in dump cs, a shared class has '(nameofclass).,' and ' :'. 317 | isshared = False 318 | for i in _issharedstrings: 319 | if contains(i,item(_objecttypeline,lines)): 320 | isshared = True 321 | break 322 | break 323 | userdefinedtypeofobject = userdefinedtypeofobject.strip() 324 | return(userdefinedtypeofobject) 325 | 326 | getuserdefinedtypeobject = getuserdefinedtype #same thing, but different name 327 | getuserdefinedtypeofobject = getuserdefinedtype #same thing, but different name 328 | objectgetuserdefinedtype = getuserdefinedtype #same thing, but different name 329 | 330 | def getisshared(thisobject): 331 | thisobject = removeattributes(thisobject) 332 | global isshared 333 | isshared = False 334 | lines = getlines(thisobject,False,False) 335 | words = getwords(item(_objecttypeline,lines)) #1st line is namespace, 2nd line describes object (abstract class, public enum, etc.) 336 | onword = 0 337 | for thisword in words: 338 | onword = onword + 1 339 | if onword > len(words): #not found - unknown structure, so unknown object. This should not happen! 340 | isshared = False 341 | break 342 | if len(_userdefinedtypes) > 0: 343 | if thisword in _userdefinedtypes: 344 | #isshared = (contains(".<",(item(2,lines))) or contains(" :",(item(2,lines))) or contains(">.",(item(2,lines)))) #in dump cs, a shared class has '(nameofclass).,' and ' :'. 345 | isshared = False 346 | for i in _issharedstrings: 347 | if contains(i,item(_objecttypeline,lines)): 348 | isshared = True 349 | break #break for optimization - we don't want to go through the whole list if it isn't necessary 350 | #we check for both of these because they might only have one or the other. there may be false positives, idk. I hope not! 351 | break 352 | return(isshared) 353 | 354 | getissharedobject = getisshared #same thing, but different name 355 | objectgetisshared = getisshared #same thing, but different name 356 | isshared = getisshared #same thing, but different name 357 | objectisshared = getisshared #same thing, but different name 358 | issharedobject = getisshared #same thing, but different name 359 | getshared = getisshared #same thing, but different name 360 | objectgetshared = getisshared #same thing, but different name 361 | getsharedobject = getisshared #same thing, but different name 362 | 363 | def getobjecttype(thisobject): 364 | thisobject = removeattributes(thisobject) 365 | typeofobject = "" 366 | lines = getlines(thisobject,False,False) 367 | words = getwords(item(_objecttypeline,lines)) #1st line is namespace, 2nd line describes object (abstract class, public enum, etc.) 368 | onword = 0 369 | for thisword in words: 370 | onword = onword + 1 371 | if onword > len(words): #not found - unknown structure, so unknown object. This should not happen! We assume type is correct anyway. 372 | break 373 | if thisword in _userdefinedtypes: #say we want public from public enum, or internal static from internal static class. we... unfinished comment oops 374 | break 375 | typeofobject = typeofobject + thisword + " " 376 | if typeofobject[len(typeofobject) - 1] == "": #we should have gotten a space at the end, since each word, we add the word and " ". We don't want the last space. 377 | typeofobject = readbefore(typeofobject," ") 378 | typeofobject = typeofobject.strip() 379 | return typeofobject 380 | 381 | gettypeofobject = getobjecttype #same thing, but different name 382 | 383 | def getobjectnamespace(thisobject): 384 | lines = getlines(thisobject) 385 | thisline = item(_namespaceline,lines) 386 | namespacename = readafter(thisline,_namespacenamestart) 387 | namespacename = namespacename.strip() 388 | if namespacename == "": 389 | namespacename = _globalnamespacename 390 | return(namespacename) 391 | 392 | def getobjectname(thisobject): 393 | thisobject = removeattributes(thisobject) 394 | lines = getlines(thisobject) 395 | thisline = item(_objecttypeline,lines) #2nd line is about class, like public static class 396 | objectname = readbetween(thisline,(getobjecttype(thisobject) + " " + getuserdefinedtype(thisobject) + ""),_objecttypeend) 397 | if (not(_objectkeepaftercolon) and contains(_objectcolon,objectname)): 398 | objectname = readbefore(objectname,_objectcolon) 399 | objectname = substring(objectname,1,len(objectname) - len(_objectcolon)) #readbefore function still keeps up to the end of objectcolon, so remove that 400 | objectname = objectname.strip() 401 | return(objectname) 402 | 403 | def getmethod(methodname,methodslist,casesensitive = False): 404 | i = 0 405 | for thismethod in methodslist: 406 | i = i + 1 407 | if match(getmethodname(thismethod),str(methodname),casesensitive): 408 | thismethod = { 409 | "Name" : getmethodname(thismethod), 410 | "Type" : getmethodtype(thismethod), 411 | "Content" : thismethod, 412 | "Offset" : getmethodoffset(thismethod), 413 | "Params" : getmethodparams(thismethod), 414 | "Param Types" : getmethodparamtypes(thismethod), 415 | } 416 | return(thismethod) 417 | dumpcsnotfounderror(methodname) 418 | sys.exit() 419 | return(None) 420 | 421 | def getfield(fieldname,fieldslist,casesensitive = False): 422 | i = 0 423 | for thisfield in fieldslist: 424 | i = i + 1 425 | if match(getfieldname(thisfield),str(fieldname),casesensitive): 426 | thisfield = { 427 | "Name" : getfieldname(thisfield), 428 | "Type" : getfieldtype(thisfield), 429 | "Content" : thisfield, 430 | "Offset" : getfieldoffset(thisfield), 431 | } 432 | return(thisfield) 433 | dumpcsnotfounderror(fieldname) 434 | sys.exit() 435 | return(None) 436 | 437 | def getproperty(propertyname,propertieslist,casesensitive = False): 438 | i = 0 439 | for thisproperty in propertieslist: 440 | i = i + 1 441 | if match(getpropertyname(thisproperty),str(propertyname),casesensitive): 442 | thisproperty = { 443 | "Name" : getpropertyname(thisproperty), 444 | "Type" : getpropertytype(thisproperty), 445 | "Content" : thisproperty, 446 | "properties" : getpropertyattributes(thisproperty), 447 | } 448 | return(thisproperty) 449 | dumpcsnotfounderror(propertyname) 450 | sys.exit() 451 | return(None) 452 | 453 | def getfullmethodparams(thismethod): 454 | lines = getlines(thismethod) 455 | thisline = lines[_methodtypeline - 1] 456 | fullmethodparams = readbetween(thisline,_methodparamsstart,_methodparamsend) 457 | return(fullmethodparams) 458 | 459 | def getmethodparams(thismethod): 460 | fullmethodparams = getfullmethodparams(thismethod) 461 | methodparams = [] 462 | thisparam = "" 463 | ingroup = False 464 | for thisletter in str(fullmethodparams): 465 | if ((thisletter == _datatypegroupstart) and not(ingroup)): 466 | ingroup = True 467 | if ((thisletter == _datatypegroupend) and ingroup): 468 | ingroup = False 469 | if (not(ingroup) and (thisletter == ",")): 470 | if (thisparam != ""): 471 | methodparams.append(thisparam.strip()) 472 | thisparam = "" 473 | else: 474 | thisparam = (thisparam + thisletter) 475 | if (thisparam != ""): 476 | methodparams.append(thisparam.strip()) 477 | thisparam = "" 478 | return(methodparams) 479 | 480 | def getmethodparamtypes(thismethod,replacenames = True): 481 | #methodparams = getwords(getfullmethodparams(thismethod)) 482 | methodparams = getmethodparams(thismethod) 483 | newparams = [] 484 | for thisparam in methodparams: 485 | for thisword in getwords(thisparam): 486 | newparams.append(thisword) 487 | methodparams = newparams 488 | if replacenames: 489 | methodparams = replacetypenames(methodparams) 490 | return(methodparams) 491 | 492 | def replacetypenames(thistype): 493 | if _processdatatypegroups: 494 | #Convert to string 495 | if type(thistype) == list: 496 | thistype = wordstostring(thistype) 497 | #Replace data type groups 498 | newtypes = "" 499 | for thisletter in thistype: 500 | if (thisletter == _datatypegroupstart) or (thisletter == _datatypegroupend) or (thisletter == _datatypegroupseparator) : 501 | newtypes = newtypes + " " 502 | else: 503 | newtypes = newtypes + thisletter 504 | #Convert to list of words 505 | words = getwords(newtypes) 506 | else: 507 | #Convert to list of words 508 | if type(thistype) == str: 509 | words = getwords(thistype) 510 | else: 511 | words = thistype 512 | #Replace names 513 | newwords = [] 514 | for thisword in words: 515 | if not(thisword in _types): 516 | newwords.append(_typenamereplace) 517 | else: 518 | newwords.append(thisword) 519 | if type(thistype) == str: 520 | newtype = wordstostring(newwords) 521 | else: 522 | newtype = newwords 523 | return(newtype) 524 | 525 | def getmethodtype(thismethod,replacenames = True): 526 | lines = getlines(thismethod) 527 | thisline = lines[_methodtypeline - 1] 528 | thisline = substring(thisline,0,findstr(_methodparamsstart,thisline)) 529 | methodtype = readbefore(thisline,_methodparamsstart) 530 | methodtype = methodtype.strip() 531 | words = getwords(methodtype) 532 | if len(words) > 0: 533 | del words[len(words) - 1] 534 | methodtype = wordstostring(words) 535 | if replacenames: 536 | methodtype = replacetypenames(methodtype) 537 | return(methodtype) 538 | 539 | def getmethodname(thismethod): 540 | lines = getlines(thismethod) 541 | thisline = lines[_methodtypeline - 1] 542 | thisline = substring(thisline,0,findstr(_methodparamsstart,thisline)) 543 | methodname = readbefore(thisline,_methodparamsstart) 544 | methodname = methodname.strip() 545 | words = getwords(methodname) 546 | methodname = words[len(words) - 1] 547 | return(methodname) 548 | 549 | def getmethodoffset(thismethod): 550 | lines = getlines(thismethod) 551 | thisline = lines[_methodoffsetline - 1] 552 | methodoffset = readbetween(thisline,_offsetprefix,_offsetsuffix) 553 | return(methodoffset) 554 | 555 | def removegenericinstmethods(fullmethods): 556 | lines = getlines(fullmethods,True,True) 557 | newlines = [] 558 | ingenericinst = False 559 | for thisline in lines: 560 | if thisline == _genericinstmethodstart: 561 | ingenericinst = True 562 | else: 563 | if (thisline == _genericinstmethodend) and ingenericinst: 564 | ingenericinst = False 565 | else: 566 | if not(ingenericinst): 567 | newlines.append(thisline) 568 | return(newlines) 569 | 570 | def getmethodslist(fullmethods): 571 | lines = removegenericinstmethods(fullmethods) 572 | methodslist = [] 573 | if (isodd(len(lines))): 574 | unexpecteddumpcsformaterror("Methods section missing line or has extra line (only expected sets of 2 lines per method ie:\n // RVA: 0x1321F3C Offset: 0x1321F3C VA: 0x1321F3C\npublic static float get_deltaTime() { }",fullmethods) 575 | for i in range(int(len(lines) // 2)): 576 | methodslist.append(concat([lines[int((((i + 1) * 2)) - 1) - 1],lines[int((((i + 1) * 2))) - 1]],"\n")) 577 | return(methodslist) 578 | 579 | def getmethods(methodslist): 580 | if type(methodslist) == str: #got full methods, not methods list - so convert to methods list 581 | methodslist = getmethodslist(methodslist) 582 | global methods 583 | methods = [] 584 | for thismethod in methodslist: 585 | thismethoddata = { 586 | "Name" : getmethodname(thismethod), 587 | "Type" : getmethodtype(thismethod), 588 | "Content" : thismethod, 589 | "Offset" : getmethodoffset(thismethod), 590 | "Params" : getmethodparams(thismethod), 591 | "ParamTypes" : getmethodparamtypes(thismethod), 592 | } 593 | methods.append(thismethoddata) 594 | return(methods) 595 | 596 | def getmethodsdict(methods): 597 | methodsdict = {} 598 | for thismethod in methods: 599 | methodsdict[thismethod["Name"]] = thismethod 600 | return(methodsdict) 601 | 602 | 603 | def getfullmethods(thisobject): 604 | global fullmethods 605 | thisobject = removeattributes(thisobject) 606 | lines = getlines(thisobject,True,True) 607 | if len(lines) > 0: 608 | if (_methodsstart in lines): 609 | fullmethods = "" 610 | i = lines.index(_methodsstart) + 1 611 | start = i 612 | thisitem = removewhitespace(lines[i]) 613 | fullmethods = concat([fullmethods,thisitem],"\n") 614 | i = i + 1 615 | thisitem = removewhitespace(lines[i]) 616 | i = i + 1 617 | while not((thisitem in _contentends) or i > (len(lines) - 1)): 618 | i = i + 1 619 | if not(iswhitespace(thisitem)): 620 | fullmethods = concat([fullmethods,thisitem],"\n") 621 | thisitem = removewhitespace(lines[i - 1]) 622 | else: 623 | fullmethods = "" 624 | return(fullmethods) 625 | 626 | def methodsmatch(method1,method2,checkparams = True): 627 | type1 = method1["Type"] 628 | type2 = method2["Type"] 629 | typesmatch = (type1 == type2) 630 | if checkparams: 631 | params1 = method1["ParamTypes"] 632 | params2 = method2["ParamTypes"] 633 | paramsmatch = (param1 == param2) 634 | else: 635 | paramsmatch = True 636 | return(typesmatch and paramsmatch) #is percentage score not less than tolerated percent? 637 | 638 | checkmethods = methodsmatch #same thing, but different name 639 | comparemethods = methodsmatch #same thing, but different name 640 | 641 | def getobject(objectnames,fullobjects,casesensitive = False): 642 | if type(objectnames) == str: #convert to list 643 | objectnames = [objectnames] 644 | objectsfound = [] 645 | i = -1 646 | for thisfullobject in fullobjects: 647 | i = i + 1 648 | if multipleof(i,1000): 649 | print(str(i) + "/" + str(len(fullobjects))) 650 | if listcontains(getobjectname(thisfullobject),objectnames,casesensitive): 651 | thisobject = { 652 | "Name" : getobjectname(thisfullobject), 653 | "Namespace" : getobjectnamespace(thisfullobject), 654 | "UserDefinedType" : getuserdefinedtype(thisfullobject), 655 | "Shared" : getisshared(thisfullobject), 656 | "Type" : getobjecttype(thisfullobject), 657 | "Content" : thisfullobject, 658 | "Fields" : getfullfields(thisfullobject), 659 | "Properties" : getfullproperties(thisfullobject), 660 | "Methods" : getfullproperties(thisfullobject), 661 | "TypeModel" : buildtypemodel(thisfullobject), 662 | } 663 | objectsfound.append(thisobject) 664 | if len(objectsfound) < len(objectnames): 665 | dumpcsnotfounderror(objectname) 666 | sys.exit() 667 | return(objectsfound) 668 | 669 | def getfieldoffset(thisfield): 670 | fieldoffset = readafter(thisfield,_fieldoffsetstart) 671 | return(fieldoffset) 672 | 673 | def getfieldtype(thisfield,replacenames = True): 674 | thisfield = substring(thisfield,0,findstr(_fieldoffsetstart,thisfield)) 675 | fieldtype = readbefore(thisfield,_fieldoffsetstart) 676 | fieldtype = fieldtype.strip() 677 | words = getwords(fieldtype) 678 | if len(words) > 0: 679 | del words[len(words) - 1] 680 | fieldtype = wordstostring(words) 681 | if replacenames: 682 | fieldtype = replacetypenames(fieldtype) 683 | return(fieldtype) 684 | 685 | def getfieldname(thisfield): 686 | thisfield = substring(thisfield,0,findstr(_fieldoffsetstart,thisfield)) 687 | fieldname = readbefore(thisfield,_fieldoffsetstart) 688 | fieldname = fieldname.strip() 689 | words = getwords(fieldname) 690 | fieldname = words[len(words) - 1] 691 | return(fieldname) 692 | 693 | def getfieldslist(fullfields): 694 | lines = getlines(fullfields,True,True) 695 | global fields 696 | fields = [] 697 | for thisline in lines: 698 | if (contains(_fieldoffsetstart,thisline)): 699 | fields.append(thisline) 700 | return(fields) 701 | 702 | def getfields(fieldslist): 703 | if type(fieldslist) == str: #got full fields, not fields list - so convert to fields list 704 | fieldslist = getfieldslist(fieldslist) 705 | global fields 706 | fields = [] 707 | for thisfield in fieldslist: 708 | thisfielddata = { 709 | "Name" : getfieldname(thisfield), 710 | "Type" : getfieldtype(thisfield), 711 | "Content" : thisfield, 712 | "Offset" : getfieldoffset(thisfield), 713 | } 714 | fields.append(thisfielddata) 715 | return(fields) 716 | 717 | def getfieldsdict(fields): 718 | fieldsdict = {} 719 | for thisfield in fields: 720 | fieldsdict[thisfield["Name"]] = thisfield 721 | return(fieldsdict) 722 | 723 | def buildtypemodel(thisobject): 724 | #To do: method params, number of shared classes for class 725 | objecttype = getobjecttype(thisobject) 726 | userdefinedtype = getuserdefinedtype(thisobject) 727 | isshared = getisshared(thisobject) 728 | fields = getfieldslist(getfullfields(thisobject)) 729 | properties = getpropertieslist(getfullproperties(thisobject)) 730 | methods = getmethodslist(getfullmethods(thisobject)) 731 | fieldtypes = [] 732 | for thisfield in fields: 733 | fieldtypes.append(getfieldtype(thisfield,True)) 734 | propertytypes = [] 735 | for thisproperty in properties: 736 | thispropertymodel = { 737 | "Type": getpropertytype(thisproperty,True), 738 | "Attributes": getpropertyattributes(thisproperty), 739 | } 740 | propertytypes.append(thispropertymodel) 741 | justpropertytypes = [] 742 | for thisproperty in properties: 743 | justpropertytypes.append(getpropertytype(thisproperty,True)) 744 | methodtypes = [] 745 | for thismethod in methods: 746 | thismethodmodel = { 747 | "Type": getmethodtype(thismethod,True), 748 | "ParamTypes": getmethodparamtypes(thismethod,True), 749 | } 750 | methodtypes.append(thismethodmodel) 751 | justmethodtypes = [] 752 | for thismethod in methods: 753 | justmethodtypes.append(getmethodtype(thismethod,True)) 754 | typemodel = { 755 | "UserDefinedType": userdefinedtype, 756 | "Type": objecttype, 757 | "Shared": isshared, 758 | "Fields": fieldtypes, 759 | "Properties": propertytypes, 760 | "PropertyTypes": justpropertytypes, 761 | "Methods": methodtypes, 762 | "MethodTypes": justmethodtypes, 763 | } 764 | return(typemodel) 765 | 766 | gettypemodel = buildtypemodel #same thing, but different name 767 | maketypemodel = buildtypemodel #same thing, but different name 768 | 769 | def getfullfields(thisobject): 770 | global fullfields 771 | thisobject = removeattributes(thisobject) 772 | lines = getlines(thisobject,True,True) 773 | if len(lines) > 0: 774 | if (_fieldsstart in lines): 775 | fullfields = "" 776 | i = lines.index(_fieldsstart) + 1 777 | start = i 778 | thisitem = removewhitespace(lines[i]) 779 | fullfields = concat([fullfields,thisitem],"\n") 780 | i = i + 1 781 | thisitem = removewhitespace(lines[i]) 782 | i = i + 1 783 | while not((thisitem in _contentends) or i > (len(lines) - 1)): 784 | i = i + 1 785 | if not(iswhitespace(thisitem)): 786 | fullfields = concat([fullfields,thisitem],"\n") 787 | thisitem = removewhitespace(lines[i - 1]) 788 | else: 789 | fullfields = "" 790 | return(fullfields) 791 | 792 | def getfullproperties(thisobject): 793 | global fullproperties 794 | thisobject = removeattributes(thisobject) 795 | lines = getlines(thisobject,True,True) 796 | if len(lines) > 0: 797 | if (_propertiesstart in lines): 798 | fullproperties = "" 799 | i = lines.index(_propertiesstart) + 1 800 | start = i 801 | thisitem = removewhitespace(lines[i]) 802 | fullproperties = concat([fullproperties,thisitem],"\n") 803 | i = i + 1 804 | thisitem = removewhitespace(lines[i]) 805 | i = i + 1 806 | while not((thisitem in _contentends) or i > (len(lines) - 1)): 807 | i = i + 1 808 | if not(iswhitespace(thisitem)): 809 | fullproperties = concat([fullproperties,thisitem],"\n") 810 | thisitem = removewhitespace(lines[i - 1]) 811 | else: 812 | fullproperties = "" 813 | return(fullproperties) 814 | 815 | def getpropertytype(thisproperty,replacenames = True): 816 | thisproperty = substring(thisproperty,0,findstr(_propertyattributesstart,thisproperty)) 817 | propertytype = readbefore(thisproperty,_propertyattributesstart) 818 | propertytype = propertytype.strip() 819 | words = getwords(propertytype) 820 | if len(words) > 0: 821 | del words[len(words) - 1] 822 | propertytype = wordstostring(words) 823 | if replacenames: 824 | propertytype = replacetypenames(propertytype) 825 | return(propertytype) 826 | 827 | def getfullpropertyattributes(thisproperty): 828 | fullproperties = readbetween(thisproperty,_propertyattributesstart,_propertyattributesend) 829 | fullproperties = fullproperties.strip() 830 | words = getwords(fullproperties) 831 | fullproperties = wordstostring(words) 832 | return(fullproperties) 833 | 834 | def getpropertyattributes(thisproperty): 835 | fullproperties = getfullpropertyattributes(thisproperty) 836 | properties = fullproperties.split(_propertyattributeseparator) 837 | return(properties) 838 | 839 | def getpropertyname(thisproperty): 840 | thisproperty = substring(thisproperty,0,findstr(_propertyattributesstart,thisproperty)) 841 | propertyname = readbefore(thisproperty,_propertyattributesstart) 842 | propertyname = propertyname.strip() 843 | words = getwords(propertyname) 844 | propertyname = words[len(words) - 1] 845 | return(propertyname) 846 | 847 | def getpropertieslist(fullproperties): 848 | global properties 849 | lines = getlines(fullproperties,True,True) 850 | properties = [] 851 | for thisline in lines: 852 | if (contains(_propertyattributesstart,thisline)): 853 | properties.append(thisline) 854 | return(properties) 855 | 856 | def getproperties(propertieslist): 857 | if type(propertieslist) == str: #got full properties, not properties list - so convert to properties list 858 | propertieslist = getpropertieslist(propertieslist) 859 | global properties 860 | properties = [] 861 | for thisproperty in propertieslist: 862 | thispropertydata = { 863 | "Name" : getpropertyname(thisproperty), 864 | "Type" : getpropertytype(thisproperty), 865 | "Content" : thisproperty, 866 | "Attributes" : getpropertyattributes(thisproperty), 867 | "FullAttributes" : getfullpropertyattributes(thisproperty), 868 | } 869 | properties.append(thispropertydata) 870 | return(properties) 871 | 872 | def getpropertiesdict(properties): 873 | propertiesdict = {} 874 | for thisproperty in properties: 875 | propertiesdict[thisproperty["Name"]] = thisproperty 876 | return(propertiesdict) 877 | 878 | def getfullclasses(fullobjects): 879 | fullclasses = [] 880 | i = -1 881 | for thisobject in fullobjects: 882 | i = i + 1 883 | if multipleof(i,1000): 884 | print(str(i) + "/" + str(len(fullobjects))) 885 | if getuserdefinedtype(thisobject) == "class": 886 | fullclasses.append(thisobject) 887 | return(fullclasses) 888 | 889 | def getfullstructs(fullobjects): 890 | fullstructs = [] 891 | for thisobject in fullobjects: 892 | if getuserdefinedtype(thisobject) == "struct": 893 | fullstructs.append(thisobject) 894 | return(fullstructs) 895 | 896 | def getfullenums(fullobjects): 897 | fullenums = [] 898 | for thisobject in fullobjects: 899 | if getuserdefinedtype(thisobject) == "enum": 900 | fullenums.append(thisobject) 901 | return(fullenums) 902 | 903 | def getfullinterfaces(fullobjects): 904 | fullinterfaces = [] 905 | for thisobject in fullobjects: 906 | if getuserdefinedtype(thisobject) == "interface": 907 | fullinterfacse.append(thisobject) 908 | return(fullinterfaces) 909 | 910 | def getobjects(fullobjects,onlyclasses = False,getshared = True,namespacefilter = None,justnameandtypemodel = False,doalphabeticalsort = True,returntuple = True): 911 | if type(namespacefilter) == str: 912 | namespacefilter = [namespacefilter] #convert to list 913 | if namespacefilter == [] or namespacefilter is False: 914 | namespacefilter = None 915 | global flagremovedshared 916 | if onlyclasses: 917 | fullobjects = getfullclasses(fullobjects) 918 | objects = [] 919 | i = -1 920 | for thisfullobject in fullobjects: 921 | i = i + 1 922 | if multipleof(i,1000): 923 | print(str(i) + "/" + str(len(fullobjects))) 924 | valid = True 925 | if not(flagremovedshared) and valid: 926 | if not(getshared): 927 | if getisshared(thisfullobject): 928 | valid = False 929 | if (namespacefilter != None) and valid: 930 | if not(getobjectnamespace(thisfullobject) in namespacefilter): 931 | valid = False 932 | if valid: 933 | if justnameandtypemodel: 934 | thisobject = { 935 | "Name" : getobjectname(thisfullobject), 936 | "TypeModel" : buildtypemodel(thisfullobject), 937 | } 938 | else: 939 | objectname = getobjectname(thisfullobject) 940 | if onlyclasses: 941 | userdefinedtype = "class" 942 | else: 943 | userdefinedtype = getuserdefinedtypeofobject(thisfullobject) 944 | objecttype = getobjecttype(thisfullobject) 945 | objectnamespace = getobjectnamespace(thisfullobject) 946 | shared = getisshared(thisfullobject) 947 | fullmethods = getfullmethods(thisfullobject) 948 | methods = getmethods(fullmethods) 949 | methodsdict = getmethodsdict(methods) 950 | fullfields = getfullfields(thisfullobject) 951 | fields = getfields(fullfields) 952 | fieldsdict = getfieldsdict(fields) 953 | fullproperties = getfullproperties(thisfullobject) 954 | properties = getproperties(fullproperties) 955 | propertiesdict = getpropertiesdict(properties) 956 | typemodel = buildtypemodel(thisfullobject) 957 | if doalphabeticalsort: 958 | methods = alphabeticalsort(methods) 959 | fields = alphabeticalsort(fields) 960 | properties = alphabeticalsort(properties) 961 | methodsdict = alphabeticalsort(methodsdict) 962 | fieldsdict = alphabeticalsort(fieldsdict) 963 | propertiesdict = alphabeticalsort(propertiesdict) 964 | thisobject = { 965 | "Name" : objectname, 966 | "Namespace" : objectnamespace, 967 | "UserDefinedType" : userdefinedtype, 968 | "Shared" : shared, 969 | "Type" : objecttype, 970 | "Content" : thisfullobject, 971 | "FullMethods" : fullmethods, 972 | "Methods" : methods, 973 | "MethodsDict" : methodsdict, 974 | "FullFields" : fullfields, 975 | "Fields" : fields, 976 | "FieldsDict" : fieldsdict, 977 | "FullProperties" : fullproperties, 978 | "Properties" : properties, 979 | "PropertiesDict" : propertiesdict, 980 | "TypeModel" : typemodel, 981 | } 982 | objects.append(thisobject) 983 | if doalphabeticalsort: 984 | objects = alphabeticalsort(objects) 985 | if not(getshared): 986 | flagremovedshared = True 987 | if returntuple: 988 | return(tuple(objects)) 989 | else: 990 | return(objects) 991 | 992 | findobject = getobject #same thing, but different name 993 | 994 | def builddumpcshierarchy(dumpcspath,doalphabeticalsort = True,onlyclasses = False,getshared = True,namespacefilter = None,toremoveattributes = True,toremoveblanklines = True,toremoveallblanklines = False,rettype = "dict"): 995 | if rettype == dict: 996 | rettype == "dict" 997 | elif rettype == list: 998 | rettype = "list" 999 | elif rettype == tuple: 1000 | rettype = "tuple" 1001 | elif type(rettype) == str: 1002 | rettype = str(rettype).lower() 1003 | else: 1004 | #Error 1005 | return(None) 1006 | dumpcs = loaddumpcs(dumpcspath) 1007 | if dumpcs is None: 1008 | return(None) 1009 | fullobjects = getfullobjects(getshared,toremoveattributes,toremoveblanklines,toremoveallblanklines,returntuple = True) 1010 | if doalphabeticalsort: 1011 | fullobjects = alphabeticalsort(fullobjects) 1012 | #Sort full objects into list of objects 1013 | objects = getobjects(fullobjects,onlyclasses,getshared,namespacefilter,False,alphabeticalsort,returntuple = True) 1014 | #Sort list of objects into dictionary of namespaces 1015 | namespaces = getnamespaces(objects) 1016 | #Change lists of objects to dictionaries of objects 1017 | newnamespaces = {} 1018 | for thisnamespace, thisobjectlist in zip(namespaces.keys(),namespaces.items()): 1019 | thisobjectdict = {} 1020 | for thisobject in thisobjectlist: 1021 | thisobjectdict[thisobject["Name"]] = thisobject 1022 | newnamespaces[thisnamespace] = thisobjectdict 1023 | if rettype == "dict": 1024 | return(newnamespaces) 1025 | elif rettype == "list": 1026 | return(newnamespaces.items()) 1027 | elif rettype == "tuple": 1028 | return(newnamespaces.items()) 1029 | else: 1030 | #Unknown ret type - assume dict 1031 | #Warning 1032 | return(newnamespaces) 1033 | 1034 | 1035 | 1036 | getdumpcshierarchy = builddumpcshierarchy #same thing, but different name 1037 | builddumpcshierarchy = builddumpcshierarchy #same thing, but different name 1038 | 1039 | def typemodelsmatch(model1,model2,usetolerance = None,dosize = True,douserdefinedtype = True,doshared = True,dotype = True,donamespace = True,dofields = True,domethodparams = True,dopropertyattributes = True): #make sure model1 is the unobfuscated one! 1040 | if usetolerance is None: 1041 | global _tolerance 1042 | usetolerance = _tolerance 1043 | #To-do: Number of shared classes for class 1044 | maxscore =(len(model1.get("Methods")) * _methodweighttrue) + (len(model1.get("Properties")) * _propertyweighttrue) #calculate maximum score 1045 | score = float(0) 1046 | #Size 1047 | if dosize: 1048 | maxscore = maxscore + 8 #start off at 8, and subtract nothing for a perfect score 1049 | size1 = (len(model1.get("Fields")) + len(model1.get("Methods")) + len(model1.get("Properties"))) #how many methods, fields, and properties are there? 1050 | size2 = (len(model2.get("Fields")) + len(model2.get("Methods")) + len(model2.get("Properties"))) #how many methods, fields, and properties are there? 1051 | score = 8 - (((abs(size2 - size1) / _sizebenchmark) * _sizeweightfalse)) #depending on the difference in size, this could have a small impact, or be very bad 1052 | #Userdefined Type 1053 | if douserdefinedtype: 1054 | maxscore = maxscore + _userdefinedtypeweighttrue 1055 | if model1.get("UserDefinedType") == model2.get("UserDefinedType"): 1056 | score = score + _userdefinedtypeweighttrue 1057 | else: 1058 | return(False) #userdefined type MUST match 1059 | #Shared 1060 | if doshared: 1061 | maxscore = maxscore + _sharedweighttrue 1062 | if model1.get("Shared") == model2.get("Shared"): 1063 | score = score + _sharedweighttrue 1064 | else: 1065 | return(False) #Is shared MUST match 1066 | #Type 1067 | ## if dotype: 1068 | ## maxscore = maxscore + _objecttypeweighttrue 1069 | ## if model1.get("Type") == model2.get("Type"): 1070 | ## score = score + _objecttypeweighttrue 1071 | if dotype: 1072 | maxscore = maxscore + _objecttypetrue 1073 | if model1.get("Type") == model2.get("Type"): 1074 | score = score + _objecttypeweighttrue 1075 | else: 1076 | return(False) #Object type MUST match 1077 | #Namespace 1078 | if donamespace: 1079 | maxscore = maxscore + _namespaceweighttrue 1080 | if model1.get("Namespace") == model2.get("Namespace"): 1081 | score = score + _objecttypeweighttrue 1082 | #Fields 1083 | if dofields: 1084 | maxscore = maxscore + (len(model1.get("Fields")) * _fieldweighttrue) 1085 | #We are using the fields type models, not the fields themselvles 1086 | fields1 = list(model1.get("Fields")) 1087 | fields2 = list(model2.get("Fields")) 1088 | templist = list(fields2) #it's very normal to add on things, but not as common to delete them. So, most of the fields in the unobfuscated (earlier) one 1089 | #should also exist in the obfuscated one (newer) 1090 | templist2 = list(fields1) 1091 | for item in templist2: 1092 | if len(templist) > 0: 1093 | if (item in templist): 1094 | score = score + _fieldweighttrue 1095 | templist.remove(item) 1096 | #Methods 1097 | if domethodparams: 1098 | #We are using the methods type models, not the methods themselvles 1099 | methods1 = list(model1.get("Methods")) 1100 | methods2 = list(model2.get("Methods")) 1101 | else: 1102 | methods1 = list(model1.get("MethodTypes")) 1103 | methods2 = list(model2.get("MethodTypes")) 1104 | templist = list(methods2) #it's very normal to add on things, but not as common to delete them. So, most of the methods in the unobfuscated (earlier) one 1105 | #should also exist in the obfuscated one (newer) 1106 | templist2 = list(methods1) 1107 | for item in templist2: 1108 | if len(templist) > 0: 1109 | if (item in templist): 1110 | score = score + _methodweighttrue 1111 | templist.remove(item) 1112 | #Properties 1113 | if dopropertyattributes: 1114 | #We are using the propertiess type models, not the propertiess themselvles 1115 | properties1 = list(model1.get("Properties")) 1116 | properties2 = list(model2.get("Properties")) 1117 | else: 1118 | properties1 = list(model1.get("PropertyTypes")) 1119 | properties2 = list(model2.get("PropertyTypes")) 1120 | templist = list(properties2) #it's very normal to add on things, but not as common to delete them. So, most of the properties in the unobfuscated (earlier) one 1121 | #should also exist in the obfuscated one (newer) 1122 | templist2 = list(properties1) 1123 | for item in templist2: 1124 | if len(templist) > 0: 1125 | if (item in templist): 1126 | score = score + _propertyweighttrue 1127 | templist.remove(item) 1128 | #To do: method params, number of shared classes for class 1129 | matchscore = ((score / maxscore) * 100) 1130 | endspeedtest() 1131 | return(not(((score / maxscore) * 100) < usetolerance)) #is percentage score not less than tolerated percent? 1132 | 1133 | comparetypemodels = typemodelsmatch #same thing, but different name 1134 | checktypemodels = typemodelsmatch #same thing, but different name 1135 | 1136 | def objectscheckformatch(object1,object2,usetolerance = None,dosize = True,douserdefinedtype = True,doshared = True,dotype = True,donamespace = True,dofields = True,domethodparams = True,dopropertyattributes = True): 1137 | #make sure object1 is the unobfuscated one! 1138 | global _trustnames 1139 | if (_trustnames and str(object1.get("Name")) == str(object2.get("Name"))): 1140 | return(True) 1141 | else: 1142 | return(typemodelsmatch(object1.get("TypeModel"),object2.get("TypeModel"),usetolerance,dosize,domethodparams,dopropertyattributes,donamespace)) 1143 | 1144 | checkobjects = objectscheckformatch #same thing, but different name 1145 | compareobjects = objectscheckformatch #same thing, but different name 1146 | objectsmatch = objectscheckformatch #same thing, but different name -------------------------------------------------------------------------------- /Python/DeobfuscationRefactored.py: -------------------------------------------------------------------------------- 1 | """ 2 | TODO: Make everything lazy (only parse objects, methods, params, etc.) when needed 3 | 4 | TODO: Make methods able to access other objects and methods (after rewrite): 5 | 6 | TODO: Make is standard method detection for methods (Equals, etc.) 7 | 8 | TODO: Make is unity type detection 9 | 10 | TODO: Replace all terminology with terminology from tools like Il2cppDumper, to make sure it is correct 11 | """ 12 | 13 | from __future__ import annotations 14 | import sys 15 | import os 16 | import warnings 17 | import copy 18 | from typing import * 19 | import string 20 | import regex as re 21 | import json 22 | from functools import cache, lru_cache 23 | from typing import Any 24 | # FIXME: Make this a package and make it use relative imports 25 | from unitydeobfuscatorexceptions import * 26 | import fileutils 27 | 28 | filehandler = fileutils.FileHandler() 29 | 30 | 31 | def readafter(sub: str, s: str, backward=False, regex=False, mustcontain=True, lengthwarning=True) -> str: 32 | """ 33 | Returns the substring after the delimiter 34 | If the substring is not found in the string, returns the whole string 35 | Also supports backward (right to left) and regex 36 | 37 | @param sub: The substring (or regex) to read after 38 | @param s: The initial string 39 | @param backward: Whether to read from right to left 40 | @param regex: Whether to treat the substring as regex 41 | @param mustcontain: Whether to throw an AssertionError if the substring (or regex) is not present 42 | in the initial string 43 | @param lengthwarning: Whether to raise a warning if the substring is longer than the initial string, 44 | which should never happen (only applies if regex is False) 45 | @return: The partition of the string after the substring (or regex) 46 | 47 | :raises ImportError: Wrong regex module: Expected regex module, got re module. 48 | To fix this, replace "import re" with "import regex as re" 49 | :raises AssertionError: param mustcontain was True and substring (or regex) was not present 50 | in the initial string 51 | :warns ValueWarning: param lengthwarning was True, param regex was False, and substring was longer 52 | than initial string 53 | 54 | Example: 55 | sub: "string" 56 | s: "Split this string by delimiter" 57 | return: " by delimiter" 58 | """ 59 | class ValueWarning(Warning): 60 | pass 61 | # If re isn't imported at all, that will show later. 62 | # Since we only catch AttributeError, we don't have to worry about it here. 63 | try: 64 | re.REVERSE 65 | except AttributeError: 66 | raise ImportError("Wrong regex module: Expected regex module, got re module. To fix this, replace \"import " 67 | "re\" with \"import regex as re\"") 68 | if regex: 69 | sub = f"({sub})" # Enclose regex with paranthesis to make it a group capture 70 | if mustcontain: 71 | assert bool(re.search(sub, s)) is True 72 | # If an invalid regex is passed, we let re.error throw - it can be handled by the caller 73 | if backward: 74 | split = re.split(sub, s, 1, ) 75 | return split[0] 76 | else: 77 | split = re.split(sub, s, 1) 78 | return split[-1] 79 | else: 80 | if lengthwarning and len(sub) > len(s): 81 | warnings.warn(f"Call to readafter(sub=\"{sub}\", str=\"{s}\"): substring is longer than string", 82 | ValueWarning) 83 | if mustcontain: 84 | assert sub in s 85 | if backward: 86 | prefix, found, suffix = s.rpartition(sub) 87 | else: 88 | prefix, found, suffix = s.partition(sub) 89 | if found: 90 | return suffix 91 | else: 92 | return s 93 | 94 | 95 | 96 | def readbefore(sub: str, s: str, backward=False, regex=False, mustcontain=True, lengthwarning=True) -> str: 97 | """ 98 | Returns the substring before the delimiter 99 | If the substring is not found in the string, returns the whole string 100 | Also supports backward (right to left) and regex 101 | 102 | @param sub: The substring (or regex) to read before 103 | @param s: The initial string 104 | @param backward: Whether to read from right to left 105 | @param regex: Whether to treat the substring as regex 106 | @param mustcontain: Whether to throw an AssertionError if the substring (or regex) is not present in the initial 107 | string 108 | @param lengthwarning: Whether to raise a warning if the substring is longer than the initial string, 109 | which should never happen (only applies if regex is False) 110 | @return: The partition of the string before the substring (or regex) 111 | 112 | :raises ImportError: Wrong regex module: Expected regex module, got re module. 113 | To fix this, replace "import re" with "import regex as re" 114 | :raises AssertionError: param mustcontain was True and substring (or regex) was not present 115 | in the initial string 116 | :warns ValueWarning: param lengthwarning was True, param regex was False, and substring was longer 117 | than initial string 118 | 119 | Example: 120 | sub: "string" 121 | s: "Split this string by delimiter" 122 | return: "Split this " 123 | """ 124 | class ValueWarning(Warning): 125 | pass 126 | # If re isn't imported at all, that will show later. 127 | # Since we only catch AttributeError, we don't have to worry about it here. 128 | try: 129 | re.REVERSE 130 | except AttributeError: 131 | raise ImportError("Wrong regex module: Expected regex module, got re module. To fix this, replace \"import " 132 | "re\" with \"import regex as re\"") 133 | if regex: 134 | sub = f"({sub})" # Enclose regex with paranthesis to make it a group capture 135 | if mustcontain: 136 | assert bool(re.search(sub, s)) is True 137 | # If an invalid regex is passed, we let re.error throw - it can be handled by the caller 138 | if backward: 139 | split = re.split(sub, s, 1, flags=re.REVERSE) 140 | return split[-1] 141 | else: 142 | split = re.split(sub, s, 1) 143 | return split[0] 144 | else: 145 | if lengthwarning and len(sub) > len(s): 146 | warnings.warn(f"Call to readbefore(sub=\"{sub}\", str=\"{s}\"): substring is longer than string", 147 | ValueWarning) 148 | if mustcontain: 149 | assert sub in s 150 | if backward: 151 | prefix, found, suffix = s.rpartition(sub) 152 | else: 153 | prefix, found, suffix = s.partition(sub) 154 | if found: 155 | return prefix 156 | else: 157 | return s 158 | 159 | 160 | def iswhitespace(s: str, totreatblankaswhitespace=True) -> bool: 161 | """ 162 | Determines if a string is whitespace 163 | 164 | @param s: The string to check 165 | @param totreatblankaswhitespace: Whether to treat an empty string ("") as whitespace 166 | @return: Whether the string is whitespace 167 | """ 168 | if s == "": 169 | return totreatblankaswhitespace 170 | else: 171 | return s.isspace() 172 | 173 | 174 | def trim(s: str, leading=True, trailing=True) -> str: 175 | """ 176 | Trims whitespace from a string 177 | 178 | @param s: The initial string 179 | @param leading: Whether to trim leading whitespace 180 | @param trailing: Whether to trim trailing whitespace 181 | @return: String with whitespace trimmed 182 | 183 | Example: 184 | s: " hello world! 185 | " 186 | leading: True 187 | trailing: True 188 | return: "hello world!" 189 | """ 190 | if leading and trailing: 191 | return s.strip() 192 | elif leading: 193 | return s.lstrip() 194 | elif trailing: 195 | return s.rstrip() 196 | else: 197 | return s 198 | 199 | 200 | # FIXME: Why does caching this cause bugs? 201 | #@cache 202 | def getwords(s: str, customregex=None) -> list[str]: 203 | """ 204 | Splits a string into a list of words 205 | Treats any whitespace as a word delimiter, including newlines and tabs 206 | If a chunk of whitespace is encountered (ex: "\t\n" or " ", the whole thing 207 | will be considered one delimiter 208 | 209 | @param s: The string to split into words 210 | @param customregex: Instead of using built-in strng.split method, split by custom regex 211 | @return: List of words in the initial string (in order) 212 | 213 | Example: 214 | s: "the quick 215 | brown fox 216 | 217 | abcdefg " 218 | return: ["the","quick","brown","fox","abcdefg"] 219 | """ 220 | if customregex: 221 | return re.split(customregex, s) 222 | else: 223 | return s.split() 224 | 225 | 226 | def wordstostring(words: list[str], 227 | totrimwords=False, 228 | toignoreblankwords=False, 229 | concatenator=" ") -> str: 230 | """ 231 | Joins a list of words into a string 232 | 233 | @param words: The list of words to concatenate 234 | @param totrimwords: Whether to trim whitespace from words. Trims both leading and trailing whitespace. 235 | @param toignoreblankwords: Whether to ignore words that are only whitespace 236 | @param concatenator: Delimiter to concatenate words with (default " ") 237 | @return: Words concatenated by concatenator 238 | """ 239 | wordstoconcatenate = [] 240 | for word in words: 241 | if iswhitespace(word) and toignoreblankwords: 242 | continue 243 | if totrimwords: 244 | word = trim(word, True, True) 245 | wordstoconcatenate.append(word) 246 | return concatenator.join(wordstoconcatenate) 247 | 248 | 249 | def getlines(s: str, 250 | toremoveblanklines=False, 251 | totrimlines=False) -> list[str]: 252 | 253 | """ 254 | Splits a string into a list of lines 255 | 256 | @param s: The string to split into lines 257 | @param toremoveblanklines: Whether to ignore lines that are blank or only whitespace 258 | @param totrimlines: Wwhether to trim whitespace from each line (leading and trailing) 259 | @return: List of lines in the string (in order) 260 | 261 | Example: 262 | s "a 263 | 264 | b 265 | 266 | c " 267 | toremoveblanklines: True 268 | totrimlines: True 269 | return: ["a","b","c"] 270 | """ 271 | lines = s.splitlines() 272 | newlines = [] 273 | for line in lines: 274 | if totrimlines: 275 | line = trim(line, True, True) 276 | if not (toremoveblanklines and iswhitespace(line)): 277 | newlines.append(line) 278 | return newlines 279 | # return [trim(line, True, True) if totrimlines else line 280 | # for line in s.splitlines() 281 | # if not (toremoveblanklines and iswhitespace(line))] 282 | 283 | 284 | def linestostring(lines: list[str], 285 | totrimlines=False, 286 | toignoreblanklines=False, 287 | concatenator="\n") -> str: 288 | """ 289 | Joins a list of lines into a string 290 | 291 | @param lines: The list of lines to concatenate 292 | @param totrimlines: Whether to trim whitespace from lines. Trims both leading and trailing whitespace. 293 | @param toignoreblanklines: Whether to ignore lines that are only whitespace 294 | @param concatenator: Delimiter to concatenate lines with (default "\n") 295 | @return: Lines concatenated by concatenator 296 | 297 | Example: 298 | lines: ["a","","b"," ","cd",""] 299 | toignoreblanklines: False 300 | totrimlines: False 301 | Return: "a 302 | 303 | b 304 | 305 | cd 306 | " 307 | """ 308 | # This function is exactly the same as wordstostring, except it operates on lines and 309 | # "\n" is the default concatenator. So, it's best to reuse the wordstostring function here. 310 | return wordstostring(lines, totrimlines, toignoreblanklines, concatenator) 311 | 312 | 313 | @cache 314 | def datatype_isreference(datatype: str) -> bool: 315 | # Not Done 316 | words = getwords(datatype, customregex="(? 1 318 | return "ref" in words 319 | 320 | 321 | def dumpcs_isvalid(dumpcs: str) -> bool: 322 | # Not done 323 | """ 324 | 325 | Determines whether a dumpcs file is valid 326 | Works by checking against some substrings that some should exist in all valid dump.cs files 327 | All dumpcs files entered should be valid, but of course they must be checked. 328 | Note: This function only performs a short check on the file as a whole. 329 | On the other hand, the dumpcs_checkformat function analyzes the whole thing and is very picky. 330 | 331 | Arguments: 332 | dumpcs: the raw content of the dumpcs file 333 | 334 | Return: 335 | whether the dumpcs is valid 336 | """ 337 | checks = ("// Image 0:", "// Namespace: ", "class", "\n\t// Methods", "// RVA: 0x") 338 | for check in checks: 339 | if check not in dumpcs: 340 | return False 341 | return True 342 | 343 | 344 | def dumpcs_checkformat(dumpcs: str) -> list[dict]: 345 | # Not Done 346 | """ 347 | Scan dump.cs for unexpected formatting 348 | Returns list of unexpected formatting errors 349 | 350 | Arguments: 351 | dumpcs: the raw content of the dumpcs file 352 | 353 | Return: 354 | list of errors with the line number and error message 355 | """ 356 | raise NotImplementedError("Dumpcs_checkformat function not completed") 357 | 358 | 359 | def dumpcs_hasattributes(dumpcs: str, fastcheck=False) -> bool: 360 | # Done 361 | """ 362 | 363 | Possible Improvements: 364 | 1. Directly using getlines() instead of using lines variable may be faster, but sacrifices 365 | readability and simplicity 366 | 367 | Determines whether a dumpcs file has attributes 368 | 369 | Arguments: 370 | dumpcs: the raw content of the dumpcs file 371 | fastcheck: whether to perform a fast or thorough check for attributes (fast should be sufficient, 372 | but it is safer to perform a thorough check) 373 | 374 | Return: 375 | whether the dumpcs file has attributes 376 | """ 377 | if fastcheck: 378 | return "[CompilerGeneratedAttribute]" in dumpcs 379 | else: 380 | lines = getlines(dumpcs, False, False) 381 | for line in lines: 382 | # Trim leading whitespace from line 383 | trimmedline = trim(line, True, False) 384 | # If the first non-whitespace character on the line is a square bracket, 385 | # this means the line is an attribute 386 | if trimmedline and trimmedline[0] == "[": 387 | return True 388 | return False 389 | 390 | 391 | def dumpcs_constructor(dumpcs: str, terminateifinvalid: True) -> list[list[dict]]: 392 | # Done 393 | """ 394 | Possible Improvements: 395 | 1. Setting dumpcs variable after removing attributes makes code more readable and concise, 396 | but is less inefficient than directing passing result of dumpcs_removeattributes. 397 | In addition, attributes must be removed before dumpcs is checked for format errors 398 | 2. Directly using dumpcs_removeattributes instead of checking with dumpcs_hasattributes may be faster 399 | (idk if it is), but it sacrifices readability 400 | 401 | Loads and initializes a dumpcs 402 | Checks validity of the dumpcs and searches for format errors 403 | Parser images and objects in dumpcs 404 | (Sets fields for images and objects) 405 | 406 | Arguments: 407 | dumpcs: the raw content of the dumpcs file 408 | terminateifinvalid: whether to terminate with an error or just throw a warning 409 | if the dumpcs appears to be invalid 410 | 411 | Return: 412 | List of objects in the dumpcs file 413 | 414 | :raises InvalidDumpcsError: The dumpcs appears to be invalid, and terminateifinvalid was True 415 | :warns InvalidDumpcsWarning: The dumpcs appears to be invalid, and terminateifinvalid was False 416 | """ 417 | if not dumpcs_isvalid(dumpcs): 418 | if terminateifinvalid: 419 | raise InvalidDumpcsError(content=dumpcs) 420 | else: 421 | warnings.warn(InvalidDumpcsWarning(content=dumpcs)) 422 | if dumpcs_hasattributes(dumpcs): 423 | dumpcs = dumpcs_removeattributes(dumpcs) 424 | formaterrors = dumpcs_checkformat(dumpcs) 425 | if formaterrors: 426 | for formaterror in formaterrors: 427 | raise UnexpectedDumpcsFormatError(formaterror.message, formaterror.sample, line=formaterror.line) 428 | images = dumpcs_getimages(dumpcs) 429 | objects = dumpcs_getobjects(dumpcs) 430 | return objects 431 | 432 | 433 | def dumpcs_removeattributes(dumpcs: str) -> str: 434 | # Done 435 | """ 436 | Possible Improvements: 437 | 1. Creating a new list of lines is inefficient, modifying existing list would be ideal 438 | 2. Directly using getlines() instead of using lines variable may be faster, but sacrifices 439 | readability and simplicity 440 | 441 | Removes attributes from a dumpcs file 442 | Does not process attributes, only removes them 443 | Does not remove blank lines 444 | 445 | Arguments: 446 | dumpcs: the raw content of the dumpcs file 447 | 448 | Return: 449 | dumpcs raw content with attributes removed 450 | """ 451 | lines = getlines(dumpcs, False, False) 452 | newlines = [] 453 | for line in lines: 454 | # Trim leading whitespace from line 455 | trimmedline = trim(line, True, False) 456 | # If the first non-whitespace character on the line is a square bracket, 457 | # this means the line is an attribute 458 | if trimmedline == "" or trimmedline[0] != "[": 459 | # The line is not an attribute line, so keep it 460 | newlines.append(line) 461 | return linestostring(newlines, False, False) 462 | 463 | 464 | @cache 465 | def dumpcsobject_hasnamespace(rawobject: str) -> bool: 466 | # Not Done 467 | lines = getlines(rawobject) 468 | namespaceline = lines[0] 469 | return namespaceline != "// Namespace: " 470 | 471 | 472 | @cache 473 | def dumpcsobject_getnamespace(rawobject: str) -> Optional[str]: 474 | # Done 475 | """ 476 | 477 | Possible Improvements: 478 | 479 | Gets the namespace of a dumpcs object 480 | 481 | Example: 482 | namespaceline: // Namespace: Microsoft.Win32 483 | Return: Microsoft.Win32 484 | 485 | 486 | Arguments: 487 | rawobject: the raw content of the dumpcs object 488 | 489 | Return: 490 | namespace of the dumpcs sobject 491 | """ 492 | if not dumpcsobject_hasnamespace(rawobject): 493 | # No namespace 494 | return None 495 | lines = getlines(rawobject) 496 | namespaceline = lines[0] 497 | # Everything after "// Namespace: " in the namespaceline is the object's namespace 498 | namespacedelimiter = "// Namespace: " 499 | namespace = readafter(namespacedelimiter, namespaceline) 500 | return namespace 501 | 502 | 503 | @cache 504 | def dumpcsobject__getmodifiers(rawobject: str) -> list[str]: 505 | # Done 506 | """ 507 | 508 | Possible Improvements: 509 | 1. Directly returning instead of using type variable and breaking loop out of loop may be faster, 510 | but sacrifices readability and simplicity 511 | 2. Object types should be a constant 512 | 3. IDK if using a dictionary cache is faster or ironically slower 513 | 514 | Gets the type (struct, class, enum, or interface) of a dumpcs object 515 | 516 | Example: 517 | objectsignatureline: public static class Registry // TypeDefIndex: 4 518 | Return: class 519 | 520 | 521 | Arguments: 522 | rawobject: the raw content of the dumpcs object 523 | 524 | Return: 525 | type of the dumpcs object 526 | """ 527 | lines = getlines(rawobject) 528 | objectsignatureline = lines[1].strip() 529 | if dumpcsobject_isinherited(rawobject): 530 | # If the object is inherited, read before the base class (to get the derived class) 531 | prefix = readbefore(" : ", objectsignatureline) 532 | else: 533 | # If the object is not inherited, read before the typedefindex 534 | prefix = readbefore(" //", objectsignatureline) 535 | words = getwords(prefix, customregex="(? 1 537 | # Last word is object name, second to last word is object type 538 | del words[-1:-2] 539 | while words[-1] in "ref, in, out": 540 | datatype += f"{words[-3]} " 541 | del words[-3] 542 | return words 543 | 544 | 545 | @cache 546 | def dumpcsobject_getobjecttype(rawobject: str) -> str: 547 | # Done 548 | """ 549 | 550 | Possible Improvements: 551 | 1. Directly returning instead of using type variable and breaking loop out of loop may be faster, 552 | but sacrifices readability and simplicity 553 | 2. Object types should be a constant 554 | 3. IDK if using a dictionary cache is faster or ironically slower 555 | 556 | Gets the type (struct, class, enum, or interface) of a dumpcs object 557 | 558 | Example: 559 | objectsignatureline: public static class Registry // TypeDefIndex: 4 560 | Return: class 561 | 562 | 563 | Arguments: 564 | rawobject: the raw content of the dumpcs object 565 | 566 | Return: 567 | type of the dumpcs object 568 | """ 569 | lines = getlines(rawobject) 570 | objectsignatureline = lines[1].strip() 571 | if dumpcsobject_isinherited(rawobject): 572 | # If the object is inherited, read before the base class (to get the derived class) 573 | prefix = readbefore(" : ", objectsignatureline) 574 | else: 575 | # If the object is not inherited, read before the typedefindex 576 | prefix = readbefore(" //", objectsignatureline) 577 | words = getwords(prefix, customregex="(? 1 579 | # Last word is object name, second to last word is object type 580 | objecttype = words[-2] 581 | return objecttype 582 | 583 | 584 | @cache 585 | def dumpcsobject_getdatatype(rawobject: str) -> str: 586 | # Done 587 | """ 588 | 589 | Possible Improvements: 590 | 1. Directly returning instead of using type variable and breaking loop out of loop may be faster, 591 | but sacrifices readability and simplicity 592 | 2. Using a string for data type instead of using a list and concatenating it into a string may be faster, 593 | but sacrifices readability and simplicity 594 | 3. Object types should be a constant 595 | 4. IDK if using a dictionary cache is faster or ironically slower 596 | 597 | Gets the data type of a dumpcs object 598 | 599 | Example: 600 | objectsignatureline: public static class Registry // TypeDefIndex: 4 601 | Return: public static 602 | 603 | 604 | Arguments: 605 | rawobject: the raw content of the dumpcs object 606 | 607 | Return: 608 | data type of the dumpcs object 609 | """ 610 | lines = getlines(rawobject) 611 | objectsignatureline = lines[1].strip() 612 | if dumpcsobject_isinherited(rawobject): 613 | # If the object is inherited, read before the base class (to get the derived class) 614 | prefix = readbefore(" : ", objectsignatureline) 615 | else: 616 | # If the object is not inherited, read before the typedefindex 617 | prefix = readbefore(" //", objectsignatureline) 618 | words = getwords(prefix, customregex="(? 1 620 | # Delete object name and object type 621 | del words[-1:-2] 622 | datatype = wordstostring(words) 623 | return datatype 624 | 625 | 626 | @cache 627 | def dumpcsobject_getname(rawobject: str, includenesting=False) -> str: 628 | # Done 629 | """ 630 | 631 | Possible Improvements: 632 | 1. Directly returning instead of using variable may be faster, 633 | but sacrifices readability and simplicity 634 | 2. IDK if using a dictionary cache is faster or ironically slower 635 | 636 | Gets the name of a dumpcs object 637 | 638 | Examples: 639 | objectsignatureline: private enum SimpleCollator.ExtenderType // TypeDefIndex: 41 640 | includenesting: True 641 | Return: SimpleCollator.ExtenderType 642 | 643 | objectsignatureline: private enum SimpleCollator.ExtenderType // TypeDefIndex: 41 644 | includenesting: False 645 | Return: ExtenderType 646 | 647 | 648 | Arguments: 649 | rawobject: the raw content of the dumpcs object 650 | includenesting: whether to include the outer objects in the name 651 | (only applies if the object is nested) 652 | 653 | Return: 654 | the name of the dumpcs object 655 | """ 656 | lines = getlines(rawobject) 657 | objectsignatureline = lines[1].strip() 658 | if dumpcsobject_isinherited(rawobject): 659 | # If the object is inherited, read before the base class (to get the derived class) 660 | prefix = readbefore(" : ", objectsignatureline) 661 | else: 662 | # If the object is not inherited, read before the typedefindex 663 | prefix = readbefore(" //", objectsignatureline) 664 | if dumpcsobject_isgeneric(rawobject): 665 | # Match generics, but not compiler generated symbols 666 | # EX: IEnumerator, but not TweenRunner. 667 | prefix = readbefore("(?!^)(?)", prefix, regex=True) 668 | # The name of the object is the last word before the delimiter in the objectsignatureline 669 | words = getwords(prefix, customregex="(? 1 671 | fullname = words[-1] #including nesting (if any) 672 | if includenesting: 673 | name = fullname 674 | else: 675 | nesting = fullname.split(".") 676 | # Last element is the name of the object (without nesting, if any) 677 | name = nesting[-1] 678 | return name 679 | 680 | 681 | @cache 682 | def dumpcsobject_getbases(rawobject: str) -> list[str]: 683 | # Not Done 684 | """ 685 | Docs Not Done! 686 | 687 | Possible Improvements: 688 | 1. Directly returning instead of using variable may be faster, 689 | but sacrifices readability and simplicity 690 | 2. IDK if using a dictionary cache is faster or ironically slower 691 | 692 | Gets the base class of a dumpcs object 693 | If the object is not inherited, returns None 694 | 695 | Examples: 696 | objectsignatureline: public static class Registry // TypeDefIndex: 4 697 | Return: None 698 | 699 | objectsignatureline: public class DecalsMeshRenderer : MonoBehaviour // TypeDefIndex: 4727 700 | Return: MonoBehavior 701 | 702 | 703 | Arguments: 704 | rawobject: the raw content of the dumpcs object 705 | 706 | Returns: 707 | base class of the dumpcs object, or None if the dumpcs object is not inherited 708 | """ 709 | lines = getlines(rawobject) 710 | objectsignatureline = lines[1].strip() 711 | if not dumpcsobject_isinherited(rawobject): 712 | return None 713 | suffix = readafter(" : ", objectsignatureline) 714 | basessection = readbefore(" //", suffix) 715 | # Split basessection by ", ", which can be used to mark the start of each base 716 | # Make sure not to split by ", " in data types (such as Dict) 717 | rawbases = [] 718 | thisbase = "" 719 | previousletter = None 720 | datatypelayer = 0 721 | iterable = iter(basessection) 722 | for letter in iterable: 723 | # This code will treat compiler generated symbols (such as .) as generics 724 | # This is unintentional, but it doesn't matter in this case 725 | if datatypelayer >= 1 and letter == ">": 726 | datatypelayer -= 1 727 | elif letter == "<": 728 | datatypelayer += 1 729 | elif datatypelayer == 0 and letter == ",": 730 | # Call next() on iterator to skip the character after this one (space) 731 | rawbases.append(thisbase) 732 | thisbase = "" 733 | next(iterable) 734 | # Continue to skip this letter (","), so it does not get added to this param 735 | continue 736 | thisbase += letter 737 | previousletter = letter 738 | if thisbase != "": 739 | # Add last base (it did not get added because there is no comma after it) 740 | rawbases.append(thisbase) 741 | if not rawbases: 742 | return [] 743 | # Build dictionary of bases from raw bases 744 | bases = [] 745 | for rawbase in rawbases: 746 | name = dumpcsobject_base_getname(rawbase) 747 | isgeneric = dumpcsobject_base_isgeneric(rawbase) 748 | genericdatatype = dumpcsobject_base_getgenericdatatype(rawbase) if isgeneric else None 749 | base = { 750 | "raw": rawbase, 751 | "name": name, 752 | "isgeneric": isgeneric, 753 | "genericdatatype": genericdatatype, 754 | } 755 | bases.append(base) 756 | return bases 757 | 758 | @cache 759 | def dumpcsobject_base_getname(rawbase: str) -> str: 760 | if dumpcsobject_base_isgeneric(rawbase): 761 | # Match generics, but not compiler generated symbols 762 | # EX: IEnumerator, but not TweenRunner. 763 | return readbefore("(?!^)(?)", rawbase, regex=True) 764 | else: 765 | return rawbase 766 | 767 | @cache 768 | def dumpcsobject_base_isgeneric(rawbase: str) -> bool: 769 | # Not Done 770 | # Match generics, but not compiler generated symbols 771 | # EX: IEnumerator, but not TweenRunner. 772 | if re.search("(?!^)(?!^)(?)", rawbase): 773 | return True 774 | else: 775 | return False 776 | 777 | 778 | @cache 779 | def dumpcsobject_base_getgenericdatatype(rawbase: str) -> str: 780 | # Not Done 781 | if not dumpcsobject_base_isgeneric(rawbase): 782 | # Not generic 783 | return None 784 | # Match generics, but not compiler generated symbols 785 | # EX: IEnumerator, but not TweenRunner. 786 | suffix = readafter("(?!^)(?)", rawbase, regex=True) 787 | genericdatatype = readbefore("(?", suffix, regex=True, backward=True) 788 | return genericdatatype 789 | 790 | @cache 791 | def dumpcsobject_isabstract(rawobject: str) -> bool: 792 | # Not Done 793 | if dumpcsobject_getobjecttype(rawobject) == "interface": 794 | # Interfaces are implicity abstract 795 | return True 796 | modifiers = dumpcsobject__getmodifiers(rawobject) 797 | return "abstract" in modifiers 798 | 799 | @cache 800 | def dumpcsobject_isstatic(rawobject: str) -> bool: 801 | # Not Done 802 | modifiers = dumpcsobject__getmodifiers(rawobject) 803 | return "static" in modifiers 804 | 805 | @cache 806 | def dumpcsobject_issealed(rawobject: str) -> bool: 807 | # Not Done 808 | modifiers = dumpcsobject__getmodifiers(rawobject) 809 | return "sealed" in modifiers 810 | 811 | 812 | @cache 813 | def dumpcsobject_getnesting(rawobject: str, includename=False, innertoouter=True) -> tuple: 814 | # Not Done 815 | # Ex: a.b.c -> (c, b, a) 816 | fullname = dumpcsobject_getname(rawobject, includenesting=True) # including nesting (if any) 817 | nesting = fullname.split(".") 818 | if not includename: 819 | # Remove last element as this is the name of the object (without nesting, if any) 820 | del nesting[-1] 821 | if innertoouter: 822 | # Reverse the list to make the order go from inner to outer rather than outer to inner 823 | nesting.reverse() 824 | return tuple(nesting) 825 | 826 | 827 | @cache 828 | def dumpcsobject_isnested(rawobject: str) -> bool: 829 | nesting = dumpcsobject_getnesting(rawobject) 830 | if nesting: 831 | return True 832 | else: 833 | return False 834 | 835 | 836 | @cache 837 | def dumpcsobject_getpath(rawobject: str) -> str: 838 | hasnamespace = dumpcsobject_hasnamespace(rawobject) 839 | namespace = dumpcsobject_getnamespace(rawobject) 840 | name = dumpcsobject_getname(rawobject) 841 | nesting = dumpcsobject_getnesting(rawobject, includename=False, innertoouter=False) 842 | path = "" 843 | if hasnamespace: 844 | path += f"{namespace}::" 845 | for nestinglevel in nesting: 846 | path += f"{nestinglevel}." 847 | path += name 848 | return path 849 | 850 | 851 | @cache 852 | def dumpcsobject_isgeneric(rawobject: str) -> bool: 853 | # Not Done 854 | lines = getlines(rawobject) 855 | objectsignatureline = lines[1].strip() 856 | if dumpcsobject_isinherited(rawobject): 857 | # If the object is inherited, read before the base class (to get the derived class) 858 | prefix = readbefore(" : ", objectsignatureline) 859 | else: 860 | # If the object is not inherited, read before the typedefindex 861 | prefix = readbefore(" //", objectsignatureline) 862 | words = getwords(prefix, customregex="(? 1 864 | lastword = words[-1] 865 | # Match generics, but not compiler generated symbols 866 | # EX: IEnumerator, but not TweenRunner. 867 | if re.search("(?!^)(?)", lastword): 868 | return True 869 | else: 870 | return False 871 | 872 | 873 | @cache 874 | def dumpcsobject_getgenerictypename(rawobject: str) -> Optional[str]: 875 | # Not Done 876 | if not dumpcsobject_isgeneric(rawobject): 877 | # Not generic 878 | return None 879 | lines = getlines(rawobject) 880 | objectsignatureline = lines[1].strip() 881 | if dumpcsobject_isinherited(rawobject): 882 | # If the object is inherited, read before the base class (to get the derived class) 883 | prefix = readbefore(" : ", objectsignatureline) 884 | else: 885 | # If the object is not inherited, read before the typedefindex 886 | prefix = readbefore(" //", objectsignatureline) 887 | # Match generics, but not compiler generated symbols 888 | # EX: IEnumerator, but not TweenRunner. 889 | suffix = readafter("(?!^)(?)", prefix, regex=True) 890 | generictypename = readbefore("(?", suffix, regex=True, backward=True) 891 | return generictypename 892 | 893 | 894 | @cache 895 | def dumpcsobject_gettypedefindex(rawobject: str) -> str: 896 | # Done 897 | """ 898 | 899 | Possible Improvements: 900 | 1. Directly returning instead of using variable may be faster, 901 | but sacrifices readability and simplicity 902 | 2. IDK if using a dictionary cache is faster or ironically slower 903 | 904 | Gets the typedefindex of a dumpcs object 905 | 906 | Example: 907 | objectsignatureline: public static class Registry // TypeDefIndex: 4 908 | Return: 4 909 | 910 | Arguments: 911 | rawobject: the raw content of the dumpcs object 912 | 913 | Return: 914 | typedefindex of the dumpcs object 915 | """ 916 | lines = getlines(rawobject) 917 | objectsignatureline = lines[1].strip() 918 | typedefindex = readafter("// TypeDefIndex: ", objectsignatureline) 919 | return typedefindex 920 | 921 | 922 | @cache 923 | def dumpcsobject_isinherited(rawobject: str) -> bool: 924 | # Done 925 | """ 926 | 927 | Possible Improvements: 928 | 1. Directly returning instead of using variable may be faster, 929 | but sacrifices readability and simplicity 930 | 2. IDK if using a dictionary cache is faster or ironically slower 931 | 932 | Determines whether a dumpcs object is inherited 933 | 934 | Examples: 935 | objectsignatureline: public static class Registry // TypeDefIndex: 4 936 | Return: False 937 | 938 | objectsignatureline: public class DecalsMeshRenderer : MonoBehaviour // TypeDefIndex: 4727 939 | Return: True 940 | 941 | Arguments: 942 | rawobject: the raw content of the dumpcs object 943 | 944 | Return: 945 | whether the dumpcs object is inherited 946 | """ 947 | lines = getlines(rawobject) 948 | objectsignatureline = lines[1].strip() 949 | return " : " in objectsignatureline 950 | 951 | 952 | @cache 953 | def dumpcsobject__hasmethods(rawobject: str) -> bool: 954 | return "\n\t// Methods" in rawobject 955 | 956 | 957 | @cache 958 | def dumpcsobject__hasfields(rawobject: str) -> bool: 959 | return "\n\t// Fields" in rawobject 960 | 961 | 962 | @cache 963 | def dumpcsobject__hasproperties(rawobject: str) -> bool: 964 | return "\n\t// Properties" in rawobject 965 | 966 | 967 | @cache 968 | def dumpcsobject_getmethods(rawobject: str) -> list[dict]: 969 | # Not Done 970 | """ 971 | 972 | Possible Improvements: 973 | 974 | Gets a list of methods from a dumpcs object 975 | Includes list of generics in each method 976 | 977 | rawobject: the raw content of the dumpcs object 978 | 979 | Return: 980 | list of methods in the dumpcs object 981 | """ 982 | # Get the methods section of the dumpcs object 983 | if not dumpcsobject__hasmethods(rawobject): 984 | # No Methods 985 | return [] 986 | suffix = readafter("\n\t// Methods", rawobject) 987 | checks = ("\n\t// Fields", "\n\t// Properties") 988 | found = False 989 | for check in checks: 990 | if check in suffix: 991 | methodssection = readbefore(f"\n{check}", suffix) 992 | found = True 993 | break 994 | if not found: 995 | methodssection = readbefore("\n}", suffix, backward=True) 996 | methoddelimiter = "\n\n" 997 | # Split methodssection by "\n\n", which can be used to mark the start of each method 998 | rawmethods = methodssection.split(methoddelimiter) 999 | if not rawmethods: 1000 | return [] 1001 | # The split function will capture everything before the first method 1002 | # since we split by the delimiter that starts methods, so delete that 1003 | del rawmethods[0] 1004 | # Build dictionary of methods from raw methods 1005 | methods = [] 1006 | for rawmethod in rawmethods: 1007 | isconstructor = dumpcsobject_method_isconstructor(rawmethod) 1008 | isstaticconstructor = dumpcsobject_method_isstaticconstructor(rawmethod) 1009 | isdestructor = dumpcsobject_method_isdestructor(rawmethod) 1010 | isoperator = dumpcsobject_method_isoperator(rawmethod) 1011 | isupdate = dumpcsobject_method_isupdate(rawmethod) 1012 | updatetype = dumpcsobject_method_getupdatetype(rawmethod) if isupdate else None 1013 | if isconstructor or isstaticconstructor: 1014 | name = dumpcsobject_getname(rawobject, includenesting=False) 1015 | elif isdestructor: 1016 | name = f"~{dumpcsobject_getname(rawobject, includenesting=False)}" 1017 | else: 1018 | name = dumpcsobject_method_getname(rawmethod) 1019 | datatype = dumpcsobject_method_getdatatype(rawmethod) 1020 | visibility = dumpcsobject_method_getvisibility(rawmethod) 1021 | isvoid = dumpcsobject_method_isvoid(rawmethod) 1022 | isoverride = dumpcsobject_method_isoverride(rawmethod) 1023 | isvirtual = dumpcsobject_method_isvirtual(rawmethod) 1024 | # We don't have to check if the object is static, but this is a shortcut 1025 | isstatic = True if dumpcsobject_isstatic(rawobject) else dumpcsobject_method_isstatic(rawmethod) 1026 | # This is an example of where methods being able to access other objects and methods would be useful 1027 | # - so we could put this in the dumcsobject_method_isabstract function instead of having to check 1028 | # for it here 1029 | isabstract = True if dumpcsobject_isabstract(rawobject) else dumpcsobject_method_isabstract(rawmethod) 1030 | isextern = dumpcsobject_method_isextern(rawmethod) 1031 | hasslot = dumpcsobject_method_hasslot(rawmethod) 1032 | slot = dumpcsobject_method_getslot(rawmethod) if hasslot else None 1033 | isgenericinstmethod = dumpcsobject_method_isgenericinstmethod(rawmethod) 1034 | generics = dumpcsobject_method_getgenerics(rawmethod) if isgenericinstmethod else [] 1035 | isgeneric = dumpcsobject_method_isgeneric(rawmethod) 1036 | generictypename = dumpcsobject_method_getgenerictypename(rawmethod) if isgeneric else None 1037 | hasoffsetdata = dumpcsobject_method_hasoffsetdata(rawmethod) # Offset -1 if not 1038 | if hasoffsetdata: 1039 | relativevirtualaddress = dumpcsobject_method_getrelativevirtualaddress(rawmethod) #RVA 1040 | virtualaddress = dumpcsobject_method_getvirtualaddress(rawmethod) #VA 1041 | offset = dumpcsobject_method_getoffset(rawmethod) #Offset 1042 | else: 1043 | relativevirtualaddress = None # RVA 1044 | virtualaddress = None # VA 1045 | offset = None # Offset 1046 | params = dumpcsobject_method_getparams(rawmethod) 1047 | method = { 1048 | "raw": rawmethod, 1049 | "name": name, 1050 | "isconstructor": isconstructor, 1051 | "isstaticconstructor": isstaticconstructor, 1052 | "isoperator": isoperator, 1053 | "isupdate": isupdate, 1054 | "updatetype": updatetype, 1055 | "datatype": datatype, 1056 | "isvoid": isvoid, 1057 | "isvirtual": isvirtual, 1058 | "isoverride": isoverride, 1059 | "isabstract": isabstract, 1060 | "isstatic": isstatic, 1061 | "isextern": isextern, 1062 | "hasslot": hasslot, 1063 | "slot": slot, 1064 | "hasoffsetdata": hasoffsetdata, 1065 | "relativevirtualaddress": relativevirtualaddress, 1066 | "offset": offset, 1067 | "virtualaddress": virtualaddress, 1068 | "params": params, 1069 | "isgenericinstmethod": isgenericinstmethod, 1070 | "generics": generics, 1071 | "isgeneric": isgeneric, 1072 | "generictypename": generictypename, 1073 | } 1074 | methods.append(method) 1075 | return methods 1076 | 1077 | 1078 | @cache 1079 | def dumpcsobject_method_getname(rawmethod: str) -> str: 1080 | # Not Done 1081 | lines = getlines(rawmethod) 1082 | methodsignatureline = lines[1].strip() 1083 | prefix = readbefore("(", methodsignatureline) 1084 | # TODO: Split everything like this (every signature) with words = re.split("(? 1 1087 | # The name of the method is the last word before the delimiter in the methodsignatureline 1088 | lastword = words[-1] 1089 | if dumpcsobject_method_isgeneric(rawmethod): 1090 | # Match generics, but not compiler generated symbols 1091 | # EX: IEnumerator, but not TweenRunner. 1092 | prefix = readbefore("(?!^)(?)", lastword, regex=True) 1093 | suffix = readafter("(?", lastword, regex=True, backward=True) 1094 | name = prefix + suffix 1095 | else: 1096 | name = lastword 1097 | return name 1098 | 1099 | 1100 | @cache 1101 | def dumpcsobject_method_isconstructor(rawmethod: str) -> bool: 1102 | # Not Done 1103 | # We don't need to check if the method is static, because the name tells us 1104 | name = dumpcsobject_method_getname(rawmethod) 1105 | return name == ".ctor" 1106 | 1107 | 1108 | @cache 1109 | def dumpcsobject_method_isstaticconstructor(rawmethod: str) -> bool: 1110 | # Not Done 1111 | # We don't need to check if the method is static, because the name tells us 1112 | name = dumpcsobject_method_getname(rawmethod) 1113 | return name == ".cctor" 1114 | 1115 | @cache 1116 | def dumpcsobject_method_isdestructor(rawmethod: str) -> bool: 1117 | # Not Done 1118 | name = dumpcsobject_method_getname(rawmethod) 1119 | return name == "Finalize" 1120 | 1121 | 1122 | @cache 1123 | def dumpcsobject_method_isoperator(rawmethod: str) -> bool: 1124 | # Not Done 1125 | """ 1126 | WARNING: 1127 | Overloaded operators are not actually represented as operators in compiled unity games 1128 | The only way to tell if they are operators is if they use the operator naming convention, 1129 | which is automatically generated by the Il2CPP compiler. This naming convention is to 1130 | prefix them with "op_", then put the name of the operator (ex: op_Addition). 1131 | However, it is possible for a programmer or obfuscator to use this naming convention in their 1132 | own methods. This would be bad practice, but it would be practical for obfuscation. If this is done, 1133 | it will cause this function to give false positives about methods being operators. 1134 | In addition, the opposite is true: Technically, the overloaded operators don't have to start with op_, 1135 | and after compilation, their names can be changed. 1136 | """ 1137 | name = dumpcsobject_method_getname(rawmethod) 1138 | return name.startswith("op_") 1139 | 1140 | 1141 | @cache 1142 | def dumpcsobject_method_isupdate(rawmethod: str) -> bool: 1143 | # Not Done 1144 | name = dumpcsobject_method_getname(rawmethod) 1145 | UPDATENAMES = {"FixedUpdate", "LateUpdate", "Update"} # Should be a constant! 1146 | return name in UPDATENAMES 1147 | 1148 | 1149 | @cache 1150 | def dumpcsobject_method_getupdatetype(rawmethod: str) -> str: 1151 | # Not Done 1152 | if dumpcsobject_method_isupdate(rawmethod): 1153 | name = dumpcsobject_method_getname(rawmethod) 1154 | UPDATENAMES = {"FixedUpdate", "LateUpdate", "Update"} # Should be a constant! 1155 | assert name in UPDATENAMES 1156 | return name 1157 | else: 1158 | # No update type 1159 | return None 1160 | 1161 | 1162 | @cache 1163 | def dumpcsobject_method_isgeneric(rawmethod: str) -> str: 1164 | # Not Done 1165 | lines = getlines(rawmethod) 1166 | methodsignatureline = lines[1].strip() 1167 | prefix = readbefore("(", methodsignatureline) 1168 | words = getwords(prefix, customregex="(? 1 1170 | lastword = words[-1] 1171 | # Match generics, but not compiler generated symbols 1172 | # EX: IEnumerator, but not TweenRunner. 1173 | if re.search("(?!^)(?)", lastword): 1174 | return True 1175 | else: 1176 | return False 1177 | 1178 | @cache 1179 | def dumpcsobject_method_getgenerictypename(rawmethod: str) -> Optional[str]: 1180 | # Not Done 1181 | if not dumpcsobject_method_isgeneric(rawmethod): 1182 | # No generic type 1183 | return None 1184 | lines = getlines(rawmethod) 1185 | methodsignatureline = lines[1].strip() 1186 | prefix = readbefore("(", methodsignatureline) 1187 | words = getwords(prefix, customregex="(? 1 1189 | # The name of the method is the last word before the delimiter in the methodsignatureline 1190 | name = words[-1] 1191 | suffix = readafter("<", name) 1192 | generictype = readbefore(">", suffix, backward=True) 1193 | return generictype 1194 | 1195 | 1196 | @cache 1197 | def dumpcsobject_method_getvisibility(rawmethod: str) -> str: 1198 | # Not Done 1199 | lines = getlines(rawmethod) 1200 | methodsignatureline = lines[1].strip() 1201 | prefix = readbefore("(", methodsignatureline) 1202 | words = getwords(prefix, customregex="(? 1 1204 | # Delete method name 1205 | del words[-1] 1206 | ACCESSMODIFIERS = {"public", "private", "protected", "internal"} # should be a constant! 1207 | # visibilitywords = [] 1208 | # for word in words: 1209 | # if word not in ACCESSMODIFIERS: 1210 | # break 1211 | # # Add this word onto the visibility 1212 | # visibilitywords.append(word) 1213 | # # Concatenate the words back into a string 1214 | # visibility = wordstostring(visibilitywords) 1215 | visibility = wordstostring([word for word in words if word in ACCESSMODIFIERS]) 1216 | return visibility 1217 | 1218 | 1219 | @cache 1220 | def dumpcsobject_method__getmodifiers(rawmethod: str) -> list[str]: 1221 | # Not Done 1222 | lines = getlines(rawmethod) 1223 | methodsignatureline = lines[1].strip() 1224 | prefix = readbefore("(", methodsignatureline) 1225 | words = getwords(prefix, customregex="(? 1 1227 | # Delete method name 1228 | del words[-1] 1229 | ACCESSMODIFIERS = {"public", "private", "protected", "internal"} # should be a constant! 1230 | modifiers = [word for word in words if word not in ACCESSMODIFIERS] 1231 | return modifiers 1232 | 1233 | 1234 | @cache 1235 | def dumpcsobject_method_getdatatype(rawmethod: str) -> str: 1236 | # Not Done 1237 | lines = getlines(rawmethod) 1238 | methodsignatureline = lines[1].strip() 1239 | prefix = readbefore("(", methodsignatureline) 1240 | words = getwords(prefix, customregex="(? 1 1242 | # Last word is method name 1243 | del words[-1] 1244 | # The data type is always one word, except for references 1245 | # (such as ref int) 1246 | if words[-2] == "ref": 1247 | datatype = words[-2:-1] 1248 | else: 1249 | datatype = words[-1] 1250 | return datatype 1251 | 1252 | 1253 | @cache 1254 | def dumpcsobject_method_isvoid(rawmethod: str) -> bool: 1255 | # Not Done 1256 | datatype = dumpcsobject_method_getdatatype(rawmethod) 1257 | return datatype == "void" 1258 | 1259 | 1260 | @cache 1261 | def dumpcsobject_method_isvirtual(rawmethod: str) -> bool: 1262 | # Not Done 1263 | modifiers = dumpcsobject_method__getmodifiers(rawmethod) 1264 | return "virtual" in modifiers 1265 | 1266 | @cache 1267 | def dumpcsobject_method_isabstract(rawmethod: str) -> bool: 1268 | # Not Done 1269 | modifiers = dumpcsobject_method__getmodifiers(rawmethod) 1270 | return "abstract" in modifiers 1271 | 1272 | @cache 1273 | def dumpcsobject_method_isstatic(rawmethod: str) -> bool: 1274 | # Not Done 1275 | modifiers = dumpcsobject_method__getmodifiers(rawmethod) 1276 | return "static" in modifiers 1277 | 1278 | 1279 | @cache 1280 | def dumpcsobject_method_isoverride(rawmethod: str) -> bool: 1281 | # Not Done 1282 | # Note: Override keyword is *required* to override, even for abstract methods and classes, 1283 | # making our life much easier. 1284 | # TODO: Also check if base method is virtual 1285 | modifiers = dumpcsobject_method__getmodifiers(rawmethod) 1286 | return "override" in modifiers 1287 | 1288 | 1289 | @cache 1290 | def dumpcsobject_method_isextern(rawmethod: str) -> bool: 1291 | # Not Done 1292 | modifiers = dumpcsobject_method__getmodifiers(rawmethod) 1293 | return "extern" in modifiers 1294 | 1295 | 1296 | @cache 1297 | def dumpcsobject_method_hasslot(rawmethod: str) -> bool: 1298 | # Not Done 1299 | lines = getlines(rawmethod) 1300 | offsetdataline = lines[0] 1301 | offsetdataline = offsetdataline.strip() 1302 | return "Slot: " in offsetdataline 1303 | 1304 | 1305 | @cache 1306 | def dumpcsobject_method_getslot(rawmethod: str) -> Optional[str]: 1307 | # Not Done 1308 | if not dumpcsobject_method_hasslot(rawmethod): 1309 | # No slot 1310 | return None 1311 | lines = getlines(rawmethod) 1312 | offsetdataline = lines[0].strip() 1313 | suffix = readafter("Slot: ", offsetdataline) 1314 | # if " " in suffix: 1315 | # slot = readbefore(" ", suffix) 1316 | # else: 1317 | # slot = suffix 1318 | slot = readbefore(" ", suffix, mustcontain=False) 1319 | return slot 1320 | 1321 | @cache 1322 | def dumpcsobject_method_isgenericinstmethod(rawmethod: str) -> bool: 1323 | # Not Done 1324 | return "\t/* GenericInstMethod :" in rawmethod 1325 | 1326 | 1327 | @cache 1328 | def dumpcsobject_method_hasoffsetdata(rawmethod: str) -> bool: 1329 | # Not Done 1330 | lines = getlines(rawmethod) 1331 | offsetdataline = lines[0].strip() 1332 | if "Offset: " not in offsetdataline: 1333 | return False 1334 | if "Offset: -1" in offsetdataline: 1335 | hasoffsetdata = False 1336 | else: 1337 | hasoffsetdata = True 1338 | if hasoffsetdata: 1339 | assert "RVA: " in offsetdataline 1340 | assert "VA: " in offsetdataline 1341 | return hasoffsetdata 1342 | 1343 | 1344 | @cache 1345 | def dumpcsobject_method_getrelativevirtualaddress(rawmethod: str) -> Optional[str]: 1346 | # Not Done 1347 | if not dumpcsobject_method_hasoffsetdata(rawmethod): 1348 | return None 1349 | lines = getlines(rawmethod) 1350 | offsetdataline = lines[0].strip() 1351 | suffix = readafter("RVA: ", offsetdataline) 1352 | # if " " in suffix: 1353 | # relativevirtualaddress = readbefore(" ", suffix) 1354 | # else: 1355 | # relativevirtualaddress = suffix 1356 | relativevirtualaddress = readbefore(" ", suffix, mustcontain=False) 1357 | return relativevirtualaddress 1358 | 1359 | 1360 | @cache 1361 | def dumpcsobject_method_getoffset(rawmethod: str) -> Optional[str]: 1362 | # Not Done 1363 | if not dumpcsobject_method_hasoffsetdata(rawmethod): 1364 | return None 1365 | lines = getlines(rawmethod) 1366 | offsetdataline = lines[0].strip() 1367 | suffix = readafter("Offset: ", offsetdataline) 1368 | # if " " in suffix: 1369 | # offset = readbefore(" ", suffix) 1370 | # else: 1371 | # offset = suffix 1372 | offset = readbefore(" ", suffix, mustcontain=False) 1373 | return offset 1374 | 1375 | 1376 | @cache 1377 | def dumpcsobject_method_getvirtualaddress(rawmethod: str) -> Optional[str]: 1378 | # Not Done 1379 | if not dumpcsobject_method_hasoffsetdata(rawmethod): 1380 | return None 1381 | lines = getlines(rawmethod) 1382 | offsetdataline = lines[0].strip() 1383 | suffix = readafter("VA: ", offsetdataline) 1384 | # if " " in suffix: 1385 | # virtualaddress = readbefore(" ", suffix) 1386 | # else: 1387 | # virtualaddress = suffix 1388 | virtualaddress = readbefore(" ", suffix, mustcontain=False) 1389 | return virtualaddress 1390 | 1391 | 1392 | @cache 1393 | def dumpcsobject_method_getgenerics(rawmethod: str) -> list[dict]: 1394 | # Not Done 1395 | """ 1396 | 1397 | Possible Improvements: 1398 | 1399 | Gets a list of generics from a dumpcs method 1400 | 1401 | rawmethod: the raw content of the dumpcs method 1402 | 1403 | Return: 1404 | list of generics in the dumpcs method 1405 | """ 1406 | # Get the generics section of the dumpcs method 1407 | if not dumpcsobject_method_isgenericinstmethod(rawmethod): 1408 | # No generics 1409 | return [] 1410 | suffix = readafter("\t/* GenericInstMethod :", rawmethod) 1411 | genericssection = readbefore("\n\t*/", suffix) 1412 | genericdelimiter = "\n\t|\n" 1413 | # Split genericssection by ", ", which can be used to mark the start of each generic 1414 | rawgenerics = genericssection.split(genericdelimiter) 1415 | if not rawgenerics: 1416 | # No generics 1417 | return [] 1418 | # The split function will capture everything before the first generic - 1419 | # which happens to be a newline - since we split by the delimiter that starts methods, 1420 | # so delete that 1421 | del rawgenerics[0] 1422 | if not rawgenerics: 1423 | # No generics 1424 | return [] 1425 | # Build dictionary of generics from raw generics 1426 | generics = [] 1427 | for rawgeneric in rawgenerics: 1428 | types = dumpcsobject_method_generic_gettypes(rawgeneric) 1429 | hasoffsetdata = dumpcsobject_method_generic_hasoffsetdata(rawgeneric) # Offset -1 if not 1430 | if hasoffsetdata: 1431 | relativevirtualaddress = dumpcsobject_method_generic_getrelativevirtualaddress(rawgeneric) # RVA 1432 | virtualaddress = dumpcsobject_method_generic_getvirtualaddress(rawgeneric) # VA 1433 | offset = dumpcsobject_method_generic_getoffset(rawgeneric) # Offset 1434 | else: 1435 | relativevirtualaddress = None # RVA 1436 | virtualaddress = None # VA 1437 | offset = None # Offset 1438 | generic = { 1439 | "raw": rawgeneric, 1440 | "types": types, 1441 | "hasoffsetdata": hasoffsetdata, 1442 | "relativevirtualaddress": relativevirtualaddress, 1443 | "virtualaddress": virtualaddress, 1444 | "offset": offset, 1445 | } 1446 | generics.append(generic) 1447 | return generics 1448 | 1449 | 1450 | @cache 1451 | def dumpcsobject_method_generic_hasoffsetdata(rawgeneric: str) -> bool: 1452 | # Not Done 1453 | lines = getlines(rawgeneric) 1454 | offsetdataline = readafter("\t|-", lines[0]).strip() 1455 | if "Offset: " not in offsetdataline: 1456 | return False 1457 | if "Offset: -1" in offsetdataline: 1458 | hasoffsetdata = False 1459 | else: 1460 | hasoffsetdata = True 1461 | if hasoffsetdata: 1462 | assert "RVA: " in offsetdataline 1463 | assert "VA: " in offsetdataline 1464 | return hasoffsetdata 1465 | 1466 | 1467 | @cache 1468 | def dumpcsobject_method_generic_getrelativevirtualaddress(rawgeneric: str) -> Optional[str]: 1469 | # Not Done 1470 | if not dumpcsobject_method_generic_hasoffsetdata(rawgeneric): 1471 | return None 1472 | lines = getlines(rawgeneric) 1473 | offsetdataline = readafter("\t|-", lines[0]).strip() 1474 | suffix = readafter("RVA: ", offsetdataline) 1475 | # if " " in suffix: 1476 | # relativevirtualaddress = readbefore(" ", suffix) 1477 | # else: 1478 | # relativevirtualaddress = suffix 1479 | relativevirtualaddress = readbefore(" ", suffix, mustcontain=False) 1480 | return relativevirtualaddress 1481 | 1482 | 1483 | @cache 1484 | def dumpcsobject_method_generic_getoffset(rawgeneric: str) -> Optional[str]: 1485 | # Not Done 1486 | if not dumpcsobject_method_generic_hasoffsetdata(rawgeneric): 1487 | return None 1488 | lines = getlines(rawgeneric) 1489 | offsetdataline = readafter("\t|-", lines[0]).strip() 1490 | suffix = readafter("Offset: ", offsetdataline) 1491 | # if " " in suffix: 1492 | # offset = readbefore(" ", suffix) 1493 | # else: 1494 | # offset = suffix 1495 | offset = readbefore(" ", suffix, mustcontain=False) 1496 | return offset 1497 | 1498 | 1499 | @cache 1500 | def dumpcsobject_method_generic_getvirtualaddress(rawgeneric: str) -> Optional[str]: 1501 | # Not Done 1502 | if not dumpcsobject_method_generic_hasoffsetdata(rawgeneric): 1503 | return None 1504 | lines = getlines(rawgeneric) 1505 | offsetdataline = readafter("\t|-", lines[0]).strip() 1506 | suffix = readafter("VA: ", offsetdataline) 1507 | # if " " in suffix: 1508 | # virtualaddress = readbefore(" ", suffix) 1509 | # else: 1510 | # virtualaddress = suffix 1511 | virtualaddress = readbefore(" ", suffix, mustcontain=False) 1512 | return virtualaddress 1513 | 1514 | 1515 | @cache 1516 | def dumpcsobject_method_generic_gettypes(rawgeneric: str) -> list[dict]: 1517 | # Not Done 1518 | """ 1519 | 1520 | Possible Improvements: 1521 | 1522 | Gets a list of types from a dumpcs method generic 1523 | 1524 | rawgeneric: the raw content of the dumpcs method generic 1525 | 1526 | Return: 1527 | list of types in the dumpcs method generic 1528 | """ 1529 | lines = getlines(rawgeneric) 1530 | typedatalines = lines[1:-1] 1531 | types = [] 1532 | for fulltype in typedatalines: 1533 | hasname = dumpcsobject_method_generic_type_hasname(fulltype) 1534 | name = dumpcsobject_method_generic_type_getname(fulltype) if hasname else None 1535 | datatype = dumpcsobject_method_generic_type_getdatatype(fulltype) 1536 | # Type is capitalized because "type" is a keyword 1537 | Type = { 1538 | "raw": fulltype, 1539 | "hasname": hasname, 1540 | "name": name, 1541 | "datatype": datatype, 1542 | } 1543 | types.append(Type) 1544 | return types 1545 | 1546 | 1547 | @cache 1548 | def dumpcsobject_method_generic_type_getdatatype(fulltype: str) -> str: 1549 | # Not Done 1550 | typedata = readafter("\t|-", fulltype) 1551 | # Match generics, but not compiler generated symbols 1552 | # EX: IEnumerator, but not TweenRunner. 1553 | suffix = readafter("(?!^)(?)", typedata, regex=True) 1554 | datatype = readbefore("(?", suffix, regex=True, backward=True) 1555 | return datatype 1556 | 1557 | 1558 | @cache 1559 | def dumpcsobject_method_generic_type_hasname(fulltype: str) -> bool: 1560 | # Not Done 1561 | typedata = readafter("\t|-", fulltype) 1562 | # Match generics, but not compiler generated symbols 1563 | # EX: # EX: IEnumerator, but not TweenRunner. 1564 | prefix = readbefore("(?!^)(?)", typedata, regex=True) 1565 | suffix = readafter("(?", typedata, regex=True, backward=True) 1566 | return prefix + suffix != "" 1567 | 1568 | 1569 | @cache 1570 | def dumpcsobject_method_generic_type_getname(fulltype: str) -> Optional[str]: 1571 | # Not Done 1572 | if not dumpcsobject_method_generic_type_hasname(fulltype): 1573 | # No name 1574 | return None 1575 | typedata = readafter("\t|-", fulltype) 1576 | # Match generics, but not compiler generated symbols 1577 | # EX: IEnumerator, but not TweenRunner. 1578 | prefix = readbefore("(?!^)(?)", typedata, regex=True) 1579 | suffix = readafter("(?", typedata, regex=True, backward=True) 1580 | typename = prefix + suffix 1581 | return typename 1582 | 1583 | @cache 1584 | def dumpcsobject_method_getparams(rawmethod: str) -> list[dict]: 1585 | # Not Done 1586 | """ 1587 | 1588 | Possible Improvements: 1589 | 1590 | Gets a list of params from a dumpcs method 1591 | 1592 | rawmethod: the raw content of the dumpcs method 1593 | 1594 | Return: 1595 | list of params in the dumpcs object 1596 | """ 1597 | # Get the params section of the dumpcs method 1598 | lines = getlines(rawmethod) 1599 | methodsignatureline = lines[1].strip() 1600 | suffix = readafter("(", methodsignatureline) 1601 | paramssection = readbefore(")", suffix, backward=True) 1602 | # Split paramssection by ", ", which can be used to mark the start of each param 1603 | # Make sure not to split by ", " in data types (such as Dict) 1604 | # or in strings (such as "string separator = ","") 1605 | rawparams = [] 1606 | thisparam = "" 1607 | previousletter = None 1608 | instring = False 1609 | datatypelayer = 0 1610 | iterable = iter(paramssection) 1611 | for letter in iterable: 1612 | if instring: 1613 | if (previousletter is None or previousletter != "\\") and \ 1614 | (letter == "\"" or letter == "'"): 1615 | instring = False 1616 | else: 1617 | if (previousletter is None or previousletter != "\\") and \ 1618 | (letter == "\"" or letter == "'"): 1619 | instring = True 1620 | elif datatypelayer >= 1 and letter == ">": 1621 | datatypelayer -= 1 1622 | elif letter == "<": 1623 | datatypelayer += 1 1624 | elif datatypelayer == 0 and letter == ",": 1625 | # Call next() on iterator to skip the character after this one (space) 1626 | rawparams.append(thisparam) 1627 | thisparam = "" 1628 | next(iterable) 1629 | # Continue to prevent this letter (",") from being added to this param 1630 | continue 1631 | thisparam += letter 1632 | previousletter = letter 1633 | if thisparam != "": 1634 | # Add last parameter (it did not get added because there is no comma after it) 1635 | rawparams.append(thisparam) 1636 | if not rawparams: 1637 | # No params 1638 | return [] 1639 | # Build dictionary of params from raw params 1640 | params = [] 1641 | for rawparam in rawparams: 1642 | hasname = dumpcsobject_method_param_hasname(rawparam) 1643 | name = dumpcsobject_method_param_getname(rawparam) if hasname else None 1644 | datatype = dumpcsobject_method_param_getdatatype(rawparam) 1645 | hasdefault = dumpcsobject_method_param_hasdefault(rawparam) 1646 | default = dumpcsobject_method_param_getdefault(rawparam) if hasdefault else None 1647 | param = { 1648 | "raw": rawparam, 1649 | "hasname": hasname, 1650 | "name": name, 1651 | "datatype": datatype, 1652 | "hasdefault": hasdefault, 1653 | "default": default, 1654 | } 1655 | params.append(param) 1656 | return params 1657 | 1658 | 1659 | @cache 1660 | def dumpcsobject_method_param_hasname(rawparam: str) -> bool: 1661 | # Not Done 1662 | if dumpcsobject_method_param_hasdefault(rawparam): 1663 | rawparam = readbefore(" = ", rawparam) 1664 | if rawparam[-1] == " " or rawparam[-1] == ">": 1665 | # EX: "(AggregateException )" or "(object )" 1666 | hasname = False 1667 | else: 1668 | hasname = True 1669 | return hasname 1670 | 1671 | 1672 | @cache 1673 | def dumpcsobject_method_param_getname(rawparam: str) -> Optional[str]: 1674 | # Not Done 1675 | if not dumpcsobject_method_param_hasname(rawparam): 1676 | # No Name 1677 | return None 1678 | if dumpcsobject_method_param_hasdefault(rawparam): 1679 | # the " = " always comes before the default value, 1680 | # so we do not have to worry if the default value is a string containing " = " 1681 | rawparam = readbefore(" = ", rawparam) 1682 | words = getwords(rawparam, customregex="(? 1 1684 | # The name of the param is the last word 1685 | return words[-1] 1686 | 1687 | 1688 | @cache 1689 | def dumpcsobject_method_param_getdatatype(rawparam: str) -> str: 1690 | # Not Done 1691 | if dumpcsobject_method_param_hasdefault(rawparam): 1692 | # the " = " always comes before the default value, 1693 | # so we do not have to worry if the default value is a string containing " = " 1694 | rawparam = readbefore(" = ", rawparam) 1695 | words = getwords(rawparam, customregex="(? 1 1699 | del words[-1] 1700 | # Concatenate the words back into a string 1701 | datatype = wordstostring(words) 1702 | return datatype 1703 | 1704 | 1705 | @cache 1706 | def dumpcsobject_method_param_hasdefault(rawparam: str) -> bool: 1707 | # Not Done 1708 | # the " = " always comes before the default value, 1709 | # so we do not have to worry if the default value is a string containing " = " 1710 | return " = " in rawparam 1711 | 1712 | 1713 | @cache 1714 | def dumpcsobject_method_param_getdefault(rawparam: str) -> str: 1715 | # Not Done 1716 | if not dumpcsobject_method_param_hasdefault(rawparam): 1717 | # No default 1718 | return None 1719 | # the " = " always comes before the default value, 1720 | # so we do not have to worry if the default value is a string containing " = " 1721 | default = readafter(" = ", rawparam) 1722 | return default 1723 | 1724 | 1725 | @cache 1726 | def dumpcsobject_hasconstructor(rawobject: str) -> bool: 1727 | # Not Done 1728 | methods = dumpcsobject_getmethods(rawobject) 1729 | for method in methods: 1730 | if method["isconstructor"]: 1731 | return True 1732 | return False 1733 | 1734 | 1735 | @cache 1736 | def dumpcsobject_hasstaticconstructor(rawobject: str) -> bool: 1737 | # Not Done 1738 | methods = dumpcsobject_getmethods(rawobject) 1739 | for method in methods: 1740 | if method["isstaticconstructor"]: 1741 | return True 1742 | return False 1743 | 1744 | 1745 | @cache 1746 | def dumpcsobject_getupdatetypes(rawobject: str) -> list[str]: 1747 | # Not Done 1748 | updatetypes = [] 1749 | methods = dumpcsobject_getmethods(rawobject) 1750 | for method in methods: 1751 | if method["isupdate"]: 1752 | if not method["updatetype"] in updatetypes: 1753 | updatetypes.append(method["updatetype"]) 1754 | return updatetypes 1755 | 1756 | 1757 | @cache 1758 | def dumpcsobject_getfields(rawobject: str) -> list[dict]: 1759 | # Not Done 1760 | """ 1761 | 1762 | Possible Improvements: 1763 | 1764 | Gets a list of fields from a dumpcs object 1765 | 1766 | rawobject: the raw content of the dumpcs object 1767 | 1768 | Return: 1769 | list of fields in the dumpcs object 1770 | """ 1771 | # Get the fields section of the dumpcs object 1772 | if not dumpcsobject__hasfields(rawobject): 1773 | # No Fields 1774 | return [] 1775 | suffix = readafter("\n\t// Fields", rawobject) 1776 | checks = ("\n\t// Properties", "\n\t// Methods") 1777 | found = False 1778 | for check in checks: 1779 | if check in suffix: 1780 | fieldssection = readbefore(f"\n{check}", suffix) 1781 | found = True 1782 | break 1783 | if not found: 1784 | fieldssection = readbefore("\n}", suffix, backward=True) 1785 | fielddelimiter = "\n" 1786 | # Split fields by "\n", which can be used to mark the start of each field 1787 | rawfields = fieldssection.split(fielddelimiter) 1788 | if not rawfields: 1789 | return [] 1790 | # The split function will capture everything before the first field 1791 | # since we split by the delimiter that starts fields, so delete that 1792 | del rawfields[0] 1793 | # Build dictionary of fields from raw fields 1794 | fields = [] 1795 | for rawfield in rawfields: 1796 | name = dumpcsobject_field_getname(rawfield) 1797 | datatype = dumpcsobject_field_getdatatype(rawfield) 1798 | #isstatic = dumpcsobject_field_isstatic(rawfield) 1799 | isdynamic = dumpcsobject_field_isdynamic(rawfield) 1800 | isgeneric = dumpcsobject_field_isgeneric(rawfield) 1801 | generictypename = dumpcsobject_field_getgenerictypename(rawfield) if isgeneric else None 1802 | hasoffset = dumpcsobject_field_hasoffset(rawfield) 1803 | offset = dumpcsobject_field_getoffset(rawfield) if hasoffset else None 1804 | hasdefault = dumpcsobject_field_hasdefault(rawfield) 1805 | default = dumpcsobject_field_getdefault(rawfield) if hasdefault else None 1806 | field = { 1807 | "raw": rawfield, 1808 | "name": name, 1809 | "datatype": datatype, 1810 | "isdynamic": isdynamic, 1811 | "hasoffset": hasoffset, 1812 | "offset": offset, 1813 | "isgeneric": isgeneric, 1814 | "generictypename": generictypename, 1815 | "hasdefault": hasdefault, 1816 | "default": default, 1817 | } 1818 | fields.append(field) 1819 | return fields 1820 | 1821 | 1822 | @cache 1823 | def dumpcsobject_field_getname(rawfield: str) -> str: 1824 | # Not Done 1825 | fieldsignatureline = rawfield.strip() 1826 | prefix = readbefore(";", fieldsignatureline) 1827 | if dumpcsobject_field_hasdefault(rawfield): 1828 | prefix = readbefore(" = ", prefix) 1829 | words = getwords(prefix, customregex="(? 1 1831 | # The name of the field is the last word before the delimiter in the rawfield 1832 | lastword = words[-1] 1833 | if dumpcsobject_field_isgeneric(rawfield): 1834 | # Match generics, but not compiler generated symbols 1835 | # EX: IEnumerator, but not TweenRunner. 1836 | prefix = readbefore("(?!^)(?)", lastword, regex=True) 1837 | suffix = readafter("(?", lastword, regex=True, backward=True) 1838 | name = prefix + suffix 1839 | else: 1840 | name = lastword 1841 | return name 1842 | 1843 | 1844 | @cache 1845 | def dumpcsobject_field_isgeneric(rawfield: str) -> bool: 1846 | # Not Done 1847 | fieldsignatureline = rawfield.strip() 1848 | prefix = readbefore(";", fieldsignatureline) 1849 | if dumpcsobject_field_hasdefault(rawfield): 1850 | prefix = readbefore(" = ", prefix) 1851 | words = getwords(prefix, customregex="(? 1 1853 | lastword = words[-1] 1854 | # Match generics, but not compiler generated symbols 1855 | # EX: IEnumerator, but not TweenRunner. 1856 | if re.search("(?!^)(?)", lastword): 1857 | return True 1858 | else: 1859 | return False 1860 | 1861 | 1862 | @cache 1863 | def dumpcsobject_field_getgenerictypename(rawfield: str) -> str: 1864 | # Not Done 1865 | if not dumpcsobject_field_isgeneric(rawfield): 1866 | # No generic type 1867 | return None 1868 | fieldsignatureline = rawfield.strip() 1869 | prefix = readbefore(";", fieldsignatureline) 1870 | if dumpcsobject_field_hasdefault(rawfield): 1871 | prefix = readbefore(" = ", prefix) 1872 | words = getwords(prefix, customregex="(? 1 1874 | # The name of the field is the last word before the delimiter in the rawfield 1875 | name = words[-1] 1876 | suffix = readafter("<", name) 1877 | generictype = readbefore(">", suffix, backward=True) 1878 | return generictype 1879 | 1880 | 1881 | @cache 1882 | def dumpcsobject_field_getvisibility(rawfield: str) -> str: 1883 | # Not Done 1884 | fieldsignatureline = rawfield.strip() 1885 | prefix = readbefore(";", fieldsignatureline) 1886 | if dumpcsobject_field_hasdefault(rawfield): 1887 | prefix = readbefore(" = ", prefix) 1888 | # The data type is everything but the last word (which is the field name) 1889 | words = getwords(prefix, customregex="(? 1 1891 | # Delete field name 1892 | del words[-1] 1893 | # Concatenate the words back into a string 1894 | datatype = wordstostring(words) 1895 | return datatype 1896 | 1897 | 1898 | @cache 1899 | def dumpcsobject_field_getdatatype(rawfield: str) -> str: 1900 | # Not Done 1901 | fieldsignatureline = rawfield.strip() 1902 | prefix = readbefore(";", fieldsignatureline) 1903 | if dumpcsobject_field_hasdefault(rawfield): 1904 | prefix = readbefore(" = ", prefix) 1905 | # The data type is everything but the last word (which is the field name) 1906 | words = getwords(prefix, customregex="(? 1 1908 | # Delete field name 1909 | del words[-1] 1910 | # Concatenate the words back into a string 1911 | datatype = wordstostring(words) 1912 | return datatype 1913 | 1914 | 1915 | @cache 1916 | def dumpcsobject_field_isdynamic(rawfield: str) -> bool: 1917 | # Not Done 1918 | datatype = dumpcsobject_field_getdatatype(rawfield) 1919 | # Split the data type by " " 1920 | # Make sure not to split by " " in compound types (such as Dict) 1921 | words = getwords(datatype, customregex="(? 1 1923 | return "dynamic" in words 1924 | 1925 | 1926 | @cache 1927 | def dumpcsobject_field_hasoffset(rawfield: str) -> bool: 1928 | # Not Done 1929 | fieldsignatureline = rawfield.strip() 1930 | suffix = readafter(";", fieldsignatureline, backward=True) 1931 | return "// " in suffix 1932 | 1933 | 1934 | @cache 1935 | def dumpcsobject_field_getoffset(rawfield: str) -> str: 1936 | # Not Done 1937 | if not dumpcsobject_field_hasoffset(rawfield): 1938 | # No offset 1939 | return None 1940 | fieldsignatureline = rawfield.strip() 1941 | suffix = readafter(";", fieldsignatureline, backward=True) 1942 | offset = readafter("// ", suffix) 1943 | return offset 1944 | 1945 | 1946 | @cache 1947 | def dumpcsobject_field_hasdefault(rawfield: str) -> bool: 1948 | # Not Done 1949 | # the " = " always comes before the default value, 1950 | # so we do not have to worry if the default value is a string containing " = " 1951 | return " = " in rawfield 1952 | 1953 | 1954 | @cache 1955 | def dumpcsobject_field_getdefault(rawfield: str) -> str: 1956 | # Not Done 1957 | if not dumpcsobject_field_hasdefault(rawfield): 1958 | # No default 1959 | return None 1960 | fieldsignatureline = rawfield.strip() 1961 | prefix = readbefore(";", fieldsignatureline, backward=True) 1962 | # the " = " always comes before the default value, 1963 | # so we do not have to worry if the default value is a string containing " = " 1964 | default = readafter(" = ", prefix) 1965 | return default 1966 | 1967 | 1968 | @cache 1969 | def dumpcsobject_getproperties(rawobject: str) -> list[dict]: 1970 | # Not Done 1971 | """ 1972 | 1973 | Possible Improvements: 1974 | 1975 | Gets a list of properties from a dumpcs object 1976 | 1977 | rawobject: the raw content of the dumpcs object 1978 | 1979 | Return: 1980 | list of properties in the dumpcs object 1981 | """ 1982 | # Get the properties section of the dumpcs object 1983 | if not dumpcsobject__hasproperties(rawobject): 1984 | # No Properties 1985 | return [] 1986 | suffix = readafter("\n\t// Properties", rawobject) 1987 | checks = ("\n\t// Methods", "\n\t// Fields") 1988 | found = False 1989 | for check in checks: 1990 | if check in suffix: 1991 | propertiessection = readbefore(f"\n{check}", suffix) 1992 | found = True 1993 | break 1994 | if not found: 1995 | propertiessection = readbefore("\n}", suffix, backward=True) 1996 | propertydelimiter = "\n" 1997 | # Split properties by "\n", which can be used to mark the start of each property 1998 | rawproperties = propertiessection.split(propertydelimiter) 1999 | if not rawproperties: 2000 | return [] 2001 | # The split function will capture everything before the first property 2002 | # since we split by the delimiter that starts properties, so delete that 2003 | del rawproperties[0] 2004 | # Build dictionary of properties from raw properties 2005 | properties = [] 2006 | for rawproperty in rawproperties: 2007 | name = dumpcsobject_property_getname(rawproperty) 2008 | datatype = dumpcsobject_property_getdatatype(rawproperty) 2009 | # Fixme: Can you override properties? 2010 | isoverride = dumpcsobject_property_isoverride(rawproperty) 2011 | isvirtual = dumpcsobject_property_isvirtual(rawproperty) 2012 | isabstract = True if dumpcsobject_isabstract(rawobject) else dumpcsobject_property_isabstract(rawproperty) 2013 | #isstatic = dumpcsobject_property_isstatic(rawproperty) 2014 | isgeneric = dumpcsobject_property_isgeneric(rawproperty) 2015 | generictypename = dumpcsobject_property_getgenerictypename(rawproperty) if isgeneric else None 2016 | hasgetter = dumpcsobject_property_hasgetter(rawproperty) 2017 | hassetter = dumpcsobject_property_hassetter(rawproperty) 2018 | # The name Property is capitalized because property" is a keyword in python 2019 | Property = { 2020 | "raw": rawproperty, 2021 | "name": name, 2022 | "datatype": datatype, 2023 | "isvirtual": isvirtual, 2024 | "isabstract": isabstract, 2025 | "isoverride": isoverride, 2026 | "isgeneric": isgeneric, 2027 | "generictypename": generictypename, 2028 | "hasgetter": hasgetter, 2029 | "hassetter": hassetter, 2030 | } 2031 | properties.append(Property) 2032 | return properties 2033 | 2034 | 2035 | @cache 2036 | def dumpcsobject_property_getname(rawproperty: str) -> str: 2037 | # Not Done 2038 | propertysignatureline = rawproperty.strip() 2039 | prefix = readbefore(" {", propertysignatureline) 2040 | words = getwords(prefix, customregex="(? 1 2042 | # The name of the property is the last word before the delimiter in the rawproperty 2043 | lastword = words[-1] 2044 | if dumpcsobject_property_isgeneric(rawproperty): 2045 | # Match generics, but not compiler generated symbols 2046 | # EX: IEnumerator, but not TweenRunner. 2047 | prefix = readbefore("(?!^)(?)", lastword, regex=True) 2048 | suffix = readafter("(?", lastword, regex=True, backward=True) 2049 | name = prefix + suffix 2050 | else: 2051 | name = lastword 2052 | return name 2053 | 2054 | 2055 | @cache 2056 | def dumpcsobject_property_isgeneric(rawproperty: str) -> str: 2057 | # Not Done 2058 | propertysignatureline = rawproperty.strip() 2059 | prefix = readbefore(" {", propertysignatureline) 2060 | words = getwords(prefix, customregex="(? 1 2062 | lastword = words[-1] 2063 | # Match generics, but not compiler generated symbols 2064 | # EX: IEnumerator, but not TweenRunner. 2065 | if re.search("(?!^)(?)", lastword): 2066 | return True 2067 | else: 2068 | return False 2069 | 2070 | @cache 2071 | def dumpcsobject_property_getgenerictypename(rawproperty: str) -> str: 2072 | # Not Done 2073 | if not dumpcsobject_property_isgeneric(rawproperty): 2074 | # No generic type 2075 | return None 2076 | propertysignatureline = rawproperty.strip() 2077 | prefix = readbefore(" {", propertysignatureline) 2078 | words = getwords(prefix, customregex="(? 1 2080 | # The name of the property is the last word before the delimiter in the rawproperty 2081 | lastword = words[-1] 2082 | suffix = readafter("<", lastword) 2083 | generictype = readbefore(">", suffix, backward=True) 2084 | return generictype 2085 | 2086 | 2087 | @cache 2088 | def dumpcsobject_property_getdatatype(rawproperty: str) -> str: 2089 | # Not Done 2090 | propertysignatureline = rawproperty.strip() 2091 | prefix = readbefore(" {", propertysignatureline) 2092 | # The data type is everything but the last word (which is the property name) 2093 | words = getwords(prefix, customregex="(? 1 2095 | del words[-1] 2096 | # Concatenate the words back into a string 2097 | datatype = wordstostring(words) 2098 | return datatype 2099 | 2100 | 2101 | @cache 2102 | def dumpcsobject_property_isvirtual(rawproperty: str) -> bool: 2103 | # Not Done 2104 | datatype = dumpcsobject_property_getdatatype(rawproperty) 2105 | return datatype_isvirtual(datatype) 2106 | 2107 | @cache 2108 | def dumpcsobject_property_isabstract(rawproperty: str) -> bool: 2109 | # Not Done 2110 | datatype = dumpcsobject_property_getdatatype(rawproperty) 2111 | return datatype_isabstract(datatype) 2112 | 2113 | 2114 | @cache 2115 | def dumpcsobject_property_isoverride(rawproperty: str) -> bool: 2116 | # Not Done 2117 | datatype = dumpcsobject_property_getdatatype(rawproperty) 2118 | return datatype_isoverride(datatype) 2119 | 2120 | 2121 | @cache 2122 | def dumpcsobject_property_getpropertymethods(rawproperty: str) -> str: 2123 | propertysignatureline = rawproperty.strip() 2124 | suffix = readafter(" { ", propertysignatureline) 2125 | propertymethodssection = readbefore(" }", suffix, backward=True) 2126 | propertymethods = [propertymethod.strip() for propertymethod in propertymethodssection.split(";")] 2127 | # The split function will capture everything after the last property 2128 | # since we split by the delimiter that ends properties, 2129 | # so delete that 2130 | del propertymethods[-1] 2131 | return propertymethods 2132 | 2133 | 2134 | @cache 2135 | def dumpcsobject_property_hasgetter(rawproperty: str) -> bool: 2136 | # Not Done 2137 | propertymethods = dumpcsobject_property_getpropertymethods(rawproperty) 2138 | return "get" in propertymethods 2139 | 2140 | @cache 2141 | def dumpcsobject_property_hassetter(rawproperty: str) -> bool: 2142 | # Not Done 2143 | propertymethods = dumpcsobject_property_getpropertymethods(rawproperty) 2144 | return "set" in propertymethods 2145 | 2146 | 2147 | def dumpcs_getobjects(dumpcs: str, 2148 | objecttypefilter: Optional[set[str]]=None, 2149 | namespacefilter: Optional[set[str]]=None, 2150 | customfilter: Optional[Callable]=None, 2151 | makeobjectpaths=False, 2152 | getmethodhex=True, 2153 | libfilepath=None) -> list[dict]: 2154 | # Not Done 2155 | """ 2156 | Docs Not Done! 2157 | Possible Improvements: 2158 | 1. Creating a new list is inefficient, modifying existing list would be ideal 2159 | 2. Directly using dumpcs.split() instead of using rawobjects variable may be faster, 2160 | but sacrifices readability and simplicity 2161 | 3. Having different loops for namespacefilter, objecttypefilter, customfilter, and combinations 2162 | is faster, but logner 2163 | 4. Directly creating a dictionary may be faster than using variables for namespacefilter 2164 | and objecttypefilter, but sacrifices readability and simplicity 2165 | 5. To save memory and speed, maybe only add object base if it exists rather than adding None. 2166 | However, this sacrifices readability and simplicity 2167 | 6. To save memory and speed, maybe only add object type model if it exists rather than adding None. 2168 | However, this sacrifices readability and simplicity 2169 | 7. Returning a dictionary of objects by path (namespace -> object) may be faster and simpler than 2170 | returning a list of dictionaries (as to grab an object out of the list by its path, the list must be 2171 | iterated through until a match is found), but a list is simpler, easier, and faster to create, 2172 | process, and iterate over 2173 | 8. Object delimiter should be a constant 2174 | 9. Supplying raw content and objectcache parameters to all functions, whether they use them or not, 2175 | is good for consistency, readability, and refactoring, but sacrifices speed and memory 2176 | 2177 | Parses dumpcs file into a list of objects 2178 | Does not remove blank lines 2179 | 2180 | Arguments: 2181 | dumpcs: the raw content of the dumpcs file 2182 | createtypemodels: whether to create type models for objects 2183 | objecttypefilter: filter to only process objects of certain types (set to None to disable filter) 2184 | namespacefilter: filter to only process objects that are in certain namespaces (inclusive) 2185 | (set to None to disable filter, set to [None] to only process objects in global namespace) 2186 | customfilter: filter that is supplied the finished object dictionary and tells the function to only 2187 | process objects that satisfy custom conditions (set to None to disable filter) 2188 | getmethodhex: whether to get the hex code of methods from the binary 2189 | libfilepath: the file path of the libil2cpp.so binary 2190 | 2191 | Return: 2192 | list of parsed objects from the dumpcs file 2193 | :param makeobjectpaths: 2194 | """ 2195 | if getmethodhex: 2196 | raise NotImplementedError("getmethodhex is not done!") 2197 | if getmethodhex and libfilepath is None: 2198 | raise IllegalArgumentException("Call to dumpcs_getobjects with" \ 2199 | "getmethodhex enabled but no libfilepath.") 2200 | objectdelimiter = "// Namespace: " 2201 | if dumpcs_hasattributes(dumpcs): 2202 | dumpcs = dumpcs_removeattributes(dumpcs) 2203 | # Split dumpcs by "// Namespace: ", which can be used to mark the start of each object 2204 | # There are blank lines between objects, so add on a blank line 2205 | rawobjects = dumpcs.split(f"\n{objectdelimiter}") 2206 | #if not rawobjects: 2207 | # # If there aren't any objects in dumpcs (this is impossible, but just theoretically), 2208 | # # we can terminate the function now 2209 | # return [] 2210 | if not rawobjects: 2211 | # exceptions.errors.unexpecteddumpcsformat(f"Error go brrrr") 2212 | raise UnexpectedDumpcsFormatError("No objects in dumpcs", dumpcs) 2213 | # The split function will capture everything before the first object 2214 | # since we split by the delimiter that starts objects, so delete that 2215 | del rawobjects[0] 2216 | # Build dictionary of objects from raw objects 2217 | objects = [] 2218 | for count, rawobject in enumerate(rawobjects): 2219 | # Add "// Namespace: " back on, as string.split excludes the delimiter 2220 | rawobject = objectdelimiter + rawobject 2221 | 2222 | hasnamespace = dumpcsobject_hasnamespace(rawobject) 2223 | namespace = dumpcsobject_getnamespace(rawobject) if hasnamespace else None 2224 | if namespacefilter and namespace not in namespacefilter: 2225 | continue 2226 | 2227 | objecttype = dumpcsobject_getobjecttype(rawobject) 2228 | if objecttypefilter and objecttype not in objecttypefilter: 2229 | continue 2230 | 2231 | isabstract = dumpcsobject_isabstract(rawobject) 2232 | isstatic = dumpcsobject_isstatic(rawobject) 2233 | issealed = dumpcsobject_issealed(rawobject) 2234 | isinherited = dumpcsobject_isinherited(rawobject) 2235 | name = dumpcsobject_getname(rawobject, includenesting=False) 2236 | isnested = dumpcsobject_isnested(rawobject) 2237 | nesting = dumpcsobject_getnesting(rawobject, includename=False, innertoouter=True) 2238 | datatype = dumpcsobject_getdatatype(rawobject) 2239 | bases = dumpcsobject_getbases(rawobject) if isinherited else None 2240 | isgeneric = dumpcsobject_isgeneric(rawobject) 2241 | generictypename = dumpcsobject_getgenerictypename(rawobject) if isgeneric else None 2242 | typedefindex = dumpcsobject_gettypedefindex(rawobject) 2243 | 2244 | hasmethods = dumpcsobject__hasmethods(rawobject) 2245 | methods = dumpcsobject_getmethods(rawobject) if hasmethods else [] 2246 | hasconstructor = dumpcsobject_hasconstructor(rawobject) if hasmethods else False 2247 | hasstaticconstructor = dumpcsobject_hasstaticconstructor(rawobject) if hasmethods else False 2248 | updatetypes = dumpcsobject_getupdatetypes(rawobject) if hasmethods else [] 2249 | 2250 | hasfields = dumpcsobject__hasfields(rawobject) 2251 | fields = dumpcsobject_getfields(rawobject) if hasfields else [] 2252 | hasproperties = dumpcsobject__hasproperties(rawobject) 2253 | properties = dumpcsobject_getproperties(rawobject) if hasproperties else [] 2254 | 2255 | # The name Object is capitalized because "object" is a keyword 2256 | Object = { 2257 | "raw": rawobject, 2258 | "name": name, 2259 | "typedefindex": typedefindex, 2260 | "objecttype": objecttype, 2261 | "hasnamespace": hasnamespace, 2262 | "namespace": namespace, 2263 | "datatype": datatype, 2264 | "isinherited": isinherited, 2265 | "bases": bases, 2266 | "isnested": isnested, 2267 | "nesting": nesting, 2268 | "isgeneric": isgeneric, 2269 | "generictypename": generictypename, 2270 | "hasmethods": hasmethods, 2271 | "methods": methods, 2272 | "hasconstructor": hasconstructor, 2273 | "hasstaticconstructor": hasstaticconstructor, 2274 | "updatetypes": updatetypes, 2275 | "hasfields": hasfields, 2276 | "fields": fields, 2277 | "hasproperties": hasproperties, 2278 | "properties": properties, 2279 | } 2280 | if makeobjectpaths: 2281 | path = dumpcsobject_getpath(rawobject) 2282 | Object["path"] = path 2283 | # Now that we have all the object's data, we can check against custom filter. 2284 | if customfilter and not(customfilter(Object)): 2285 | continue 2286 | objects.append(Object) 2287 | percentdone = (count * 10000 // len(rawobjects)) / 100 2288 | if percentdone != 0 and percentdone % 1 == 0: 2289 | print(f"{percentdone}%") 2290 | return objects 2291 | 2292 | 2293 | def dumpcs_getimages(dumpcs: str) -> list[dict]: 2294 | # Not Done 2295 | """ 2296 | Docs Not Done! 2297 | Possible Improvements: 2298 | """ 2299 | raise NotImplementedError("dumpcs_getimages function is unfinished") 2300 | 2301 | 2302 | dumpcspath, encoding = r"C:\Users\zachy\OneDrive\Documents\Work\Projects\Polywar\64bit\dump.cs", 'utf8' 2303 | #dumpcspath, encoding = r"C:\Users\zachy\OneDrive\Documents\Work\Projects\Pixel Gun 2304 | # 3D\Pixel Gun 3D 2305 | # 16.6.1\Pixel Gun 2306 | # 3D 16.6.1 dump.cs", 'utf8' 2307 | outputpath = r"C:\Users\zachy\PycharmProjects\UnityDeobfuscator\tests\parseddumpcs.json" 2308 | #outputpath = r"C:\Users\zachy\OneDrive\Documents\Work\Temp\Python Temps\parseddumpcs.json" 2309 | if __name__ == "__main__": 2310 | with open(dumpcspath,encoding = encoding) as f: 2311 | #import cProfile 2312 | #cProfile.run("dumpcs_getobjects(dumpcs_removeattributes(f.read()), makeobjectpaths=True, getmethodhex=False)") 2313 | #sys.exit() 2314 | objects = dumpcs_getobjects(dumpcs_removeattributes(f.read()), makeobjectpaths=True, 2315 | getmethodhex=False) 2316 | with open(outputpath,'w',encoding = encoding) as f: 2317 | # We can't write all the objects to the file because it's too big and eats up all my memory 2318 | # when I try to view it 2319 | # For testing purposes, only a few hundred is necessary 2320 | #f.write(str(objects)) 2321 | #for i in objects: 2322 | #if i["name"] == "FriendsMenu": 2323 | #if "ResourceFallbackManage" in i["name"]: 2324 | #f.write(str(i)) 2325 | #sys.exit() 2326 | f.write(str(objects[1000:1300])) 2327 | --------------------------------------------------------------------------------