15 |
16 |
--------------------------------------------------------------------------------
/README.txt:
--------------------------------------------------------------------------------
1 | # THIS IS UNFINISHED
2 | **DO NOT TRY TO USE**
3 |
4 | A python tool for deobfuscating Unity games.
5 |
6 | Put in apk (il2cpp or mono), dll or dummydll (all or just one), metadata and libil2cpp.so (for il2cpp) or dump.cs (for il2cpp) files of obfuscated and unobfuscated, and it will deobfuscate
7 | Can generate json output, txt output, deobfuscated dll(s), deobfuscated apk, deobfuscated dump.cs, and more!
8 |
9 | Features:
10 | -Flexible inputs and outputs
11 | -Automatic Dumping for Il2cpp Games
12 | -Lightning-fast
13 | -Employs 6 deobfuscation methods
14 | -Defeats Beebyte Deobfuscator
15 | -Supports deobfuscation of plugins
16 | -Flexible options
17 | -Force unity version, or it can be auto-detected
18 | -Deobfuscate everything, only one dll / method / class / namespace etc., or specify certain dll > namespace > class > method paths, subpaths (like all classes in blank namespace), regex, name, charset, etc.
19 | -Dnspy-style viewer for selecting paths etc.
--------------------------------------------------------------------------------
/Python/DeobfuscationReformatted/Utils/exceptions.py:
--------------------------------------------------------------------------------
1 | class UnexpectedDumpcsFormatError(Exception):
2 | # Thanks to https://www.programiz.com/python-programming/user-defined-exception
3 | """Exception raised when a section of dumpcs does not follow a pattern"""
4 |
5 | def __init__(self, message, sample=None, line=None):
6 | self.message = message
7 | if sample:
8 | self.message += f":\n{sample}"
9 | if line:
10 | self.message = f"Detected at line {line}: " + self.message
11 | super().__init__(self.message)
12 |
13 |
14 | class InvalidDumpcsError(Exception):
15 | # Thanks to https://www.programiz.com/python-programming/user-defined-exception
16 | """Exception raised when a section of dumpcs does not follow a pattern"""
17 |
18 | def __init__(self, path):
19 | self.message = f"Dumpcs file at path {path} is invalid"
20 | super().__init__(self.message)
21 |
22 |
23 | def UnexpectedDumpcsFormatWarning(message):
24 | print(f"UnexpectedDumpcsFormatWarning: {message}")
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
--------------------------------------------------------------------------------
/Docs/terminology.txt:
--------------------------------------------------------------------------------
1 | whether object is inherited ("class a: b" vs "class a") = is inherited (CURRENT NAME IS "is shared" MUST CHANGE)
2 |
3 | base object in inherited object = base object (if none, it is set to None)
4 |
5 | type of object ("struct", "class", "interface", "enum") = object type (CURRENT NAME IS "user-defined type" MUST CHANGE)
6 | Called "objecttype" instead of "type" because "type" is a keyword in python
7 |
8 | "public" in "public class classname: baseclass" or "private" in "private enum enumname" = object data type (CURRENT NAME IS "object type" MUST CHANGE)
9 |
10 | "classname" in "public class classname: baseclass" or "enumname" in "private enum enumname" = object name
11 |
12 | how many methods / fields / properties are in object = object size
13 |
14 | "readonly" / "public" / "abstract" / "const" / "bool" / "object int[]" / "char*" etc.
15 | = primitive type (including complex types) (CURRENT NAME IS "type" MUST CHANGE)
16 |
17 | "Dictionary" / "Vector3" / "Quaternion" etc. = unity type
18 |
19 | "PhotonNetworkingMessage" / "Color" (any user-defined type not defined by Unity) = user-defined type
20 |
21 | user-definedtype / unity type / primitive type (including complex types) = data type (CURRENT NAME IS "type" MUST CHANGE)
22 |
23 | Content of // Methods = methodssection
24 | Each method = method
25 | Content of "/* GenericInstMethod :" = generics
26 | Each generic = generic
27 | Content of // Fields = fieldssection
28 | Each field = field
29 | Content of // Properties = propertiessection
30 | Each property = property
31 | Called "Property" instead of "property" because "property" is a keyword in python
32 |
33 | "int i = 1" in "function(int i = 1)" or "int a" in "function(int a,bool b=true)" = parameter / param
34 | "1" in "function(int i = 1)" = default argument (if none, it is set to None)
35 | "int " in "function(int a,bool b=true)" = parameter data type / param data type
36 | "a" in "function(int a,bool b=true)" = parameter name / param name
37 |
38 | "1" in "TypeDefIndex: 1" = typedefindex
39 |
40 | "UnityEngine" in "// Namespace: UnityEngine" = namespace
41 |
42 | "[ComVisibleAttribute] // RVA: 0x1 Offset: 0x1 VA: 0x1"
43 | or
44 | [DebuggerBrowsableAttribute] // RVA: 0x1 Offset: 0x1 VA: 0x1"
45 | etc. = attribute line
46 |
47 | "[ComVisibleAttribute]" or "[DebuggerBrowsableAttribute]" = attribute
48 |
49 | "// Namespace:
50 | enum enumname // TypeDefIndex: 1
51 | {
52 | // Fields
53 | public int field;
54 | }"
55 | or
56 | "// Namespace:
57 | interface interfacename // TypeDefIndex: 1
58 | {
59 | // Fields
60 | public int field;
61 | }"
62 | or
63 | "// Namespace:
64 | struct structname // TypeDefIndex: 1
65 | {
66 | // Fields
67 | public int field;
68 | }"
69 | or
70 | "// Namespace:
71 | class classname // TypeDefIndex: 1
72 | {
73 | // Fields
74 | public int field;
75 | }"
76 | etc. = object
77 | Called "Object" instead of "object" because "object" is a keyword in python
78 |
--------------------------------------------------------------------------------
/Python/test.py:
--------------------------------------------------------------------------------
1 | def readafter(sub: str, s: str, backward=False, regex=False, mustcontain=True, lengthwarning=True) -> str:
2 | """
3 | Returns the substring after the delimiter
4 | If the substring is not found in the string, returns the whole string
5 | Also supports backward (right to left) and regex
6 |
7 | @param sub: The substring (or regex) to read after
8 | @param s: The initial string
9 | @param backward: Whether to read from right to left
10 | @param regex: Whether to treat the substring as regex
11 | @param mustcontain: Whether to throw an AssertionError if the substring (or regex) is not present
12 | in the initial string
13 | @param lengthwarning: Whether to raise a warning if the substring is longer than the initial string,
14 | which should never happen (only applies if regex is False)
15 | @return: The partition of the string after the substring (or regex)
16 |
17 | :raises ImportError: Wrong regex module: Expected regex module, got re module.
18 | To fix this, replace "import re" with "import regex as re"
19 | :raises AssertionError: param mustcontain was True and substring (or regex) was not present
20 | in the initial string
21 | :warns ValueWarning: param lengthwarning was True, param regex was False, and substring was longer
22 | than initial string
23 |
24 | Example:
25 | sub: "string"
26 | s: "Split this string by delimiter"
27 | return: " by delimiter"
28 | """
29 | class ValueWarning(Warning):
30 | pass
31 | # If re isn't imported at all, that will show later.
32 | # Since we only catch AttributeError, we don't have to worry about it here.
33 | try:
34 | re.REVERSE
35 | except AttributeError:
36 | raise ImportError("Wrong regex module: Expected regex module, got re module. To fix this, replace \"import "
37 | "re\" with \"import regex as re\"")
38 | if regex:
39 | sub = f"({sub})" # Enclose regex with paranthesis to make it a group capture
40 | if mustcontain:
41 | assert bool(re.search(sub, s)) is True
42 | # If an invalid regex is passed, we let re.error throw - it can be handled by the caller
43 | if backward:
44 | split = re.split(sub, s, 1, flags=re.REVERSE)
45 | return split[0]
46 | else:
47 | split = re.split(sub, s, 1)
48 | return split[-1]
49 | else:
50 | if lengthwarning and len(sub) > len(s):
51 | warnings.warn(f"Call to readafter(sub=\"{sub}\", str=\"{s}\"): substring is longer than string",
52 | ValueWarning)
53 | if mustcontain:
54 | assert sub in s
55 | if backward:
56 | prefix, found, suffix = s.rpartition(sub)
57 | else:
58 | prefix, found, suffix = s.partition(sub)
59 | if found:
60 | return suffix
61 | else:
62 | return s
63 |
64 | import regex as re
65 | import warnings
66 | readafter("123", "1")
--------------------------------------------------------------------------------
/Python/fileutils.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 | import io
3 | import os
4 | from typing import TextIO, Any, Optional, Callable
5 |
6 |
7 | class FileEncodingException(Exception):
8 | pass
9 |
10 |
11 | class FileHandler:
12 | def __init__(self, encodings: tuple = ('utf-8', 'utf-16', 'utf-32')):
13 | self.encodings = encodings
14 | self.file_handles = {}
15 |
16 | def detect_file_encoding(self, path: str | os.PathLike) -> str:
17 | for encoding in self.encodings:
18 | f = open(path, 'r', encoding=encoding)
19 | try:
20 | f.read()
21 | except UnicodeDecodeError:
22 | pass
23 | else:
24 | return encoding
25 | finally:
26 | f.close()
27 | raise FileEncodingException(f"Failed to detect encoding of file {path}")
28 |
29 | def open_file(self, path: str | os.PathLike, mode: str, encoding=None) -> \
30 | TextIO | io.TextIOWrapper | io.BufferedReader:
31 | if not (encoding or 'b' in mode):
32 | # Auto-detect encoding
33 | encoding = self.detect_file_encoding(path)
34 | return open(path, mode, encoding=encoding)
35 |
36 | def create_file_handle(self, path: str | os.PathLike, mode: str, encoding=None, name: str = None) -> \
37 | TextIO | io.TextIOWrapper | io.BufferedReader:
38 | if not name:
39 | name = path
40 | handle = self.open_file(path, mode, encoding=encoding)
41 | self.file_handles[name] = handle
42 | return handle
43 |
44 | def get_file_handle(self, name) -> TextIO | io.TextIOWrapper | io.BufferedReader:
45 | return self.file_handles[name]
46 |
47 | def close_file_handle(self, name) -> FileHandler:
48 | self.file_handles[name].close()
49 | del self.file_handles[name]
50 | return self
51 |
52 | def close_all_file_handles(self) -> FileHandler:
53 | self.file_handles.clear()
54 | return self
55 |
56 | def read_file(self, path: str | os.PathLike, encoding=None, binary=False) -> str | bytes:
57 | if binary:
58 | with self.open_file(path, 'rb') as f:
59 | return f.read()
60 | else:
61 | with self.open_file(path, 'r', encoding) as f:
62 | return f.read()
63 |
64 | def read_bytes(self, path: str | os.PathLike, tohex=True, hexformat: Optional[Callable] = None) -> \
65 | bytes | hex | Any:
66 | filecontent = self.read_file(path, binary=True)
67 | if tohex:
68 | if hexformat:
69 | return hexformat(filecontent.hex())
70 | else:
71 | return filecontent.hex()
72 | else:
73 | return filecontent
74 |
75 | def write_file(self, path: str | os.PathLike, data: str | bytes, encoding, binary=False) -> \
76 | TextIO | io.TextIOWrapper | io.BufferedReader | str:
77 | with self.open_file(path, encoding, binary) as f:
78 | f.write(data)
79 | return f
80 |
--------------------------------------------------------------------------------
/Python/unitydeobfuscatorexceptions.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 | import os
3 |
4 |
5 | def _truncatestring(s: str, maxlength: Optional[int] = 1000, maxlines: Optional[int] = 20) -> str:
6 | """
7 | Helper function to truncate a string to a given length
8 |
9 | :param s: The initial string
10 | :param maxlength: The amount of characters to truncate at. Takes precedence over maxlines.
11 | :param maxlines: The amount of lines to truncate at. Lower precedence than maxlength.
12 | :return: If the string did not exceed either of the limits - the unmodified initial string
13 | If the string exceeded the length limit - the string truncated to the length limit with
14 | "...[Truncated]" added to the end
15 | If the string exceeded the line limit but not the length limit - the string truncated to the
16 | line limit with "...[Truncated]" added to the end
17 | """
18 | if maxlength and len(s) > maxlength:
19 | return s[0:maxlength - 1] + "...[Truncated]"
20 | lines = s.splitlines()
21 | if maxlines and len(lines) > maxlines:
22 | return "\n".join(lines[0:maxlines - 1]) + "...[Truncated]"
23 | return s
24 |
25 |
26 | class ValueWarning(Warning):
27 | pass
28 |
29 |
30 | class IllegalArgumentException(ValueError):
31 | pass
32 |
33 |
34 | class IllegalArgumentWarning(ValueError):
35 | pass
36 |
37 |
38 | class UnexpectedDumpcsFormatError(Exception):
39 | """Exception raised when something unexpected is encountered in a section of dumpcs"""
40 |
41 | def __init__(self, message: str = None, sample: str = None, line: str = None):
42 | self.message = message if message else ""
43 | if sample:
44 | self.message += f":\n{sample}"
45 | if line:
46 | self.message = f"Detected at line {line}: " + self.message
47 | if self.message:
48 | super().__init__(self.message)
49 |
50 |
51 | class UnexpectedDumpcsFormatWarning(Warning):
52 | """Warning raised when something unexpected is encountered in a section of dumpcs"""
53 |
54 | def __init__(self, message: str = None, sample: str = None, line: str = None):
55 | self.message = message if message else ""
56 | if sample:
57 | self.message += f":\n{sample}"
58 | if line:
59 | self.message = f"Detected at line {line}: " + self.message
60 | if self.message:
61 | super().__init__(self.message)
62 |
63 |
64 | class InvalidDumpcsError(Exception):
65 | """Exception raised when a dumpcs in not a valid dumpcs"""
66 |
67 | def __init__(self, path: str | os.PathLike = None, content: str = None):
68 | """
69 | Path and content are mutually exclusive, though this is not enforced.
70 | If both are provided, will default to path
71 | """
72 | self.message = None
73 | if path:
74 | self.message = f"Dumpcs file at path {path} does not appear to be a valid dumpcs"
75 | elif content:
76 | # noinspection IncorrectFormatting
77 | self.message = "Dumpcs does not appear to be a valid dumpcs:\n" \
78 | f"{_truncatestring(content, maxlength=1000, maxlines=20)}"
79 | if self.message:
80 | super().__init__(self.message)
81 |
82 |
83 | class InvalidDumpcsWarning(Warning):
84 | """Warning raised when a dumpcs in not a valid dumpcs"""
85 |
86 | def __init__(self, path: str | os.PathLike = None, content: str = None):
87 | """
88 | Path and content are mutually exclusive, though this is not enforced.
89 | If both are provided, will default to path
90 | """
91 | self.message = None
92 | if path:
93 | self.message = f"Dumpcs file at path {path} does not appear to be a valid dumpcs"
94 | elif content:
95 | # noinspection IncorrectFormatting
96 | self.message = "Dumpcs does not appear to be a valid dumpcs:\n" \
97 | f"{_truncatestring(content, maxlength=1000, maxlines=20)}"
98 | if self.message:
99 | super().__init__(self.message)
100 |
--------------------------------------------------------------------------------
/Python/deobfuscationtest.py:
--------------------------------------------------------------------------------
1 | import copy
2 |
3 | from DeobfuscationRefactored import *
4 |
5 |
6 | def compareobjects(object1, object2, dosize = True, dofields = True):
7 | maxscore = (len(object1["methods"]) * _methodweighttrue) + (
8 | len(object1["propertiess"]) * _propertyweighttrue) # calculate maximum score
9 | score = 0
10 | # Size
11 | if dosize:
12 | maxscore = maxscore + 8 # start off at 8, and subtract nothing for a perfect score
13 | size1 = (len(object1["fields"]) + len(object1["methods"]) + len(
14 | object1["propertiess"])) # how many methods, fields, and properties are there?
15 | size2 = (len(object2["fields"]) + len(object2["methods"]) + len(
16 | object2["propertiess"])) # how many methods, fields, and properties are there?
17 | score = 8 - (((
18 | abs(size2 - size1) / _sizebenchmark) * _sizeweightfalse)) # depending on the difference in size, this could have a small impact, or be very bad
19 | objectcomparisons: List[Dict[str, bool, Optional[float]]] = [
20 | {"name": "base", "mustmatch": True, "weight": None},
21 | {"name": "genericdatatype", "mustmatch": True, "weight": None},
22 | {"name": "namespace", "mustmatch": True, "weight": None},
23 | {"name": "hasconstructor", "mustmatch": True, "weight": None},
24 | {"name": "hasstaticconstructor", "mustmatch": True, "weight": None},
25 | {"name": "hasfields", "mustmatch": True, "weight": None},
26 | {"name": "hasproperties", "mustmatch": True, "weight": None},
27 | {"name": "hasmethods", "mustmatch": True, "weight": None},
28 | ]
29 | methodscomparisons: List[Dict[str, bool, Optional[float]]] = [
30 | ]
31 | methodweights: List[Dict[str, float]] = [
32 | {"isconstructor": 7.0},
33 | {"isstaticconstructor": 7.0},
34 | {"isoperator": 5.0},
35 | {"isvirtual": 2.5},
36 | ] # Anything else is 1
37 | mustmatch = ["base", "genericdatatype", "namespace", "hasconstructor",
38 | "hasstaticconstructor", "hasfields", "hasproperties", "hasmethods"]
39 | for must in mustmatch:
40 | if not (object1[must] == object2[must]):
41 | return 0
42 | # Fields
43 | if dofields:
44 | maxscore = maxscore + (len(object1["fields"]) * _fieldweighttrue)
45 | # We are using the fields type objects, not the fields themselvles
46 | fields1 = copy.deepcopy(object1["fields"])
47 | fields2 = list(object2["fields"])
48 | templist = list(
49 | fields2) # it's very normal to add on things, but not as common to delete them. So, most of the fields in the unobfuscated (earlier) one
50 | # should also exist in the obfuscated one (newer)
51 | templist2 = list(fields1)
52 | for item in templist2:
53 | if len(templist) > 0:
54 | if (item in templist):
55 | score = score + _fieldweighttrue
56 | templist.remove(item)
57 | # Methods
58 | if domethodparams:
59 | # We are using the methods type objects, not the methods themselvles
60 | methods1 = list(object1["methods"])
61 | methods2 = list(object2["methods"])
62 | else:
63 | methods1 = list(object1.get("MethodTypes"))
64 | methods2 = list(object2.get("MethodTypes"))
65 | templist = list(
66 | methods2) # it's very normal to add on things, but not as common to delete them. So, most of the methods in the unobfuscated (earlier) one
67 | # should also exist in the obfuscated one (newer)
68 | templist2 = list(methods1)
69 | for item in templist2:
70 | if len(templist) > 0:
71 | if (item in templist):
72 | score = score + _methodweighttrue
73 | templist.remove(item)
74 | # Properties
75 | if dopropertyattributes:
76 | # We are using the propertiess type objects, not the propertiess themselvles
77 | properties1 = list(object1["propertiess"])
78 | properties2 = list(object2["propertiess"])
79 | templist = list(
80 | properties2) # it's very normal to add on things, but not as common to delete them. So, most of the properties in the unobfuscated (earlier) one
81 | # should also exist in the obfuscated one (newer)
82 | templist2 = list(properties1)
83 | for item in templist2:
84 | if len(templist) > 0:
85 | if (item in templist):
86 | score = score + _propertyweighttrue
87 | templist.remove(item)
88 | # To do: method params, number of shared classes for class
89 | matchscore = ((score / maxscore) * 100)
90 | endspeedtest()
91 | return (not (((score / maxscore) * 100) < usetolerance)) # is percentage score not less than tolerated percent?
92 |
--------------------------------------------------------------------------------
/Python/offsetstest.py:
--------------------------------------------------------------------------------
1 | from DeobfuscationRefactored import *
2 |
3 | SPECIALCONSTRUCTORNAMES = True
4 | FIELDPREFIXES = True
5 | with open(dumpcspath, encoding=encoding) as f:
6 | objects = dumpcs_getobjects(dumpcs_removeattributes(f.read()), getmethodhex=False)
7 | globalnamespace = []
8 | namespaces = {}
9 | for Object in objects:
10 | hasfields = False
11 | for field in Object["fields"]:
12 | if field["hasoffset"]:
13 | if "__" not in field["name"] and "." not in field["name"] and "<" not in field["name"]:
14 | hasfields = True
15 | break
16 | hasmethods = False
17 | for method in Object["methods"]:
18 | if method["hasoffsetdata"]:
19 | if "__" not in method["name"] and "." not in method["name"] and "<" not in method["name"]:
20 | hasmethods = True
21 | break
22 | if "<" not in Object["name"] and "__" not in Object["name"] and "." not in Object["name"] and (hasfields or hasmethods):
23 | namespace = Object["namespace"]
24 | if namespace is None:
25 | globalnamespace.append(Object)
26 | else:
27 | if namespace in namespaces:
28 | namespaces[namespace].append(Object)
29 | else:
30 | namespaces[namespace] = [Object]
31 | code = ""
32 | for Object in globalnamespace:
33 | hasfields = False
34 | for field in Object["fields"]:
35 | if field["hasoffset"]:
36 | if "__" not in field["name"] and "." not in field["name"] and "<" not in field["name"]:
37 | hasfields = True
38 | break
39 | hasmethods = False
40 | for method in Object["methods"]:
41 | if method["hasoffsetdata"]:
42 | if "__" not in method["name"] and "." not in method["name"] and "<" not in method["name"]:
43 | hasmethods = True
44 | break
45 | if "<" not in Object["name"] and "__" not in Object["name"] and "." not in Object["name"] and (hasfields or hasmethods):
46 | name = Object["name"]
47 | code += f"namespace {name}{{"
48 | usednames = {}
49 | if hasfields:
50 | code += "\n\t//Fields"
51 | for field in Object["fields"]:
52 | if field["hasoffset"]:
53 | if "__" not in field["name"] and "." not in field["name"] and "<" not in field["name"]:
54 | if FIELDPREFIXES:
55 | if field["name"].startswith("m_"):
56 | name = field["name"]
57 | elif field["name"].startswith("_"):
58 | name = "m" + field["name"]
59 | else:
60 | name = "m_" + field["name"]
61 | else:
62 | name = field["name"]
63 | if name in usednames:
64 | usednames[name] += 1
65 | name += str(usednames[name])
66 | else:
67 | usednames[name] = 1
68 | offset = field["offset"]
69 | code += f"\n\t\tconst uint64_t {name} = {offset}"
70 | if hasmethods:
71 | code += "\n\t//Methods"
72 | for method in Object["methods"]:
73 | if method["hasoffsetdata"]:
74 | if "__" not in method["name"] and "." not in method["name"] and "<" not in method["name"]:
75 | if method["isconstructor"] and SPECIALCONSTRUCTORNAMES:
76 | name = "ctor"
77 | elif method["isstaticconstructor"] and SPECIALCONSTRUCTORNAMES:
78 | name = "cctor"
79 | else:
80 | name = method["name"]
81 | if name in usednames:
82 | usednames[name] += 1
83 | name += str(usednames[name])
84 | else:
85 | usednames[name] = 1
86 | offset = method["offset"]
87 | code += f"\n\t\tconst uint64_t {name} = {offset}"
88 | code += "\n"
89 | for namespace, objects in namespaces.items():
90 | code += f"namespace {namespace}{{"
91 | for Object in objects:
92 | if "__" not in Object["name"] and "." not in Object["name"] and (hasfields or hasmethods):
93 | name = Object["name"]
94 | code += f"\n\tnamespace {name}{{"
95 | usednames = {}
96 | if hasfields:
97 | code += "\n\t\t//Fields"
98 | for field in Object["fields"]:
99 | if field["hasoffset"]:
100 | if "__" not in field["name"] and "." not in field["name"] and "<" not in field["name"]:
101 | if FIELDPREFIXES:
102 | if field["name"].startswith("m_"):
103 | name = field["name"]
104 | elif field["name"].startswith("_"):
105 | name = "m" + field["name"]
106 | else:
107 | name = "m_" + field["name"]
108 | else:
109 | name = field["name"]
110 | if name in usednames:
111 | usednames[name] += 1
112 | name += str(usednames[name])
113 | else:
114 | usednames[name] = 1
115 | offset = field["offset"]
116 | code += f"\n\t\t\tconst uint64_t {name} = {offset}"
117 | if hasmethods:
118 | code += "\n\t\t//Methods"
119 | for method in Object["methods"]:
120 | if method["hasoffsetdata"]:
121 | if "__" not in method["name"] and "." not in method["name"] and "<" not in method["name"]:
122 | if method["isconstructor"] and SPECIALCONSTRUCTORNAMES:
123 | name = "ctor"
124 | elif method["isstaticconstructor"] and SPECIALCONSTRUCTORNAMES:
125 | name = "cctor"
126 | else:
127 | name = method["name"]
128 | if name in usednames:
129 | usednames[name] += 1
130 | name += str(usednames[name])
131 | else:
132 | usednames[name] = 1
133 | offset = method["offset"]
134 | code += f"\n\t\t\tconst uint64_t {name} = {offset}"
135 | code += "\n\t}"
136 | code += "\n}\n\n"
137 | code = code[0:-4] # Lazy way to remove newlines at end
138 | with open(r"C:\Users\zachy\OneDrive\Documents\Work\Temp\Python Temps\offsetstest.cpp", 'w', encoding=encoding) as f:
139 | f.write(code)
140 | print(code)
141 |
--------------------------------------------------------------------------------
/Ideas/Outline.txt:
--------------------------------------------------------------------------------
1 | Input:
2 | Put in apk (il2cpp or mono), dll or dummydll (all or just one), metadata and libil2cpp.so (for il2cpp) or dump.cs (for il2cpp) files of obfuscated and unobfuscated, and it will deobfuscate.
3 | Version chaining is also supported. This is where you add more than 2 game versions, and specify the order
4 | (can be auto-detected via version detection). UnityDeobfuscator deobfuscates the versions in order, using the
5 | deobfuscated last version as the deobfuscated version for the next obfuscated version. This minimizes differences between game versions, allowing for more accurate deobfuscation.
6 |
7 | Output:
8 | Can generate json output, txt output, deobfuscated dll(s), deobfuscated apk, deobfuscated dump.cs, and more!
9 |
10 | Options:
11 | You can create multiple packages of options and give them names. This is useful if you want to have different
12 | settings (such as the trust unity types option) for different games. You can also import and export options
13 | file (they are stored as .json files)
14 | -Version auto-detection (scans game apk, binary, dump.cs, or whatever else is input to find game version)
15 | -Multi-select deobfuscation methods
16 | -Enable and disable certain things to balance speed and accuracy
17 | -Options for what can and cannot change - data types, whether class has a certain method, data type of class,
18 | etc.
19 | -Option to add your own data types and unity types (in case the project missed something, or unity implements new types)
20 | -Multi-select plugins to auto-detect and use in deobfuscate
21 | -Multi-select plugins to auto-detect and deobfuscate
22 | -Comes with a few known plugins like Photon and CodeStage, but you can add more by adding apk (il2cpp or mono), dll or dummydll (all or just one),
23 | metadata and libil2cpp.so (for il2cpp) or dump.cs (for il2cpp) files and specifying the namespace(s) used by
24 | the plugin; the plugin will then be processed and the deobfuscation data will be added to the local database.
25 | You can also manually add custom types to trust
26 | -Specify certain namespaces, classes, or methods to exclude from deobfuscation (in unobfuscated, obfuscated, or both)
27 | -Trust names (if two items have same name, they are a match in deobfuscation)
28 | -Only deobfuscate names with certain naming regex or chars
29 | -Ignore names with certain naming regex or chars when deobfuscating
30 | -Configure output
31 | -Trust Unity types (Vector3, Quaternion, string, etc.)
32 | -Force or auto-detect unity version
33 | -Trust user-defined types
34 | -Trust plugin types
35 | -Trust order (even if the game does not scramble methods / fields / properties, this is still not recommended
36 | because games may add, remove, and delete fields / methods / properties).
37 | -Trust operators (if method name starts with "op_", then it is an operator)
38 | -Configure deobfuscation tolerance
39 | -Configure how many candidates to accept (If there are more candidates than this number, only the best matches
40 | will be kept.)
41 | -Configure confidence to remove (if a match exceeds this confidence, it will be excluded
42 | when processing other items)
43 | -Choose between only take best match or take all matches
44 | -Change internal settings like size weight and field weight
45 | -Configure how to order classes / fields / methods etc.: Keep order of obfuscated, reorder to match unobfuscated,
46 | sort alphabetically, etc.
47 | -Enable / disable type weight
48 | -Configure type weight (eg: whether method is virtual may be very heavy)
49 |
50 | Deobfuscation Methods:
51 | # - Brute Force Deobfuscation (Comparitive Deobfuscation) : A deobfuscation method that works by comparing unobfuscated and obfuscated dump.cs. It finds the class or member etc. by name. Then, it takes the class, and replaces the names and dynamic values with a certain string ('offset','methodname','classname','comment',etc.). This way, things such as data types, params, # of methods and fields, etc. can be compared. It then converts this into lists of methods, and each method has its method type, and the method params. Same is done on fields and class itself. There is a strikes system with a customizable strictness. It can automatically adapt by narrowing down the perfect strictness by moving it up and down and seeing how little results it can get while still getting results (the toggleable smart mode, changeable in settings or function parameters). This method takes a long time.
52 | # - Regex search deobfuscation (String search deobfuscation): This method is faster, simpler, and better. Both are useful though. This method finds unchanging string (such as and private readonly Dictionary<) by searching strings until it finds one with low occurences (like 300 or less), and it finds the one with the lowest. It can also remove names / dynamic values and uses regex search. It can also use the renamer to remove changing things. Then it sees if this comes up in obfuscated. It uses brute force deobfuscation on the resulting classes methods etc. This is done until the right one is found.
53 | # - Mutual Name Deobfuscation (Cross Reference Deobfuscation): This deobfuscation method is kind of like string search deobfuscation. It searches for the name you want to deobfuscate and finds other instances of the name, either as parameters in methods, methods with the same name in other classes, or fields with the same name in other classes. It tries to find one of these where the method or class is unobfuscated, or known through previous deobfuscation. Then, it goes to this class and used brute force deobfuscation to find the right method or field.
54 | # - String Count Deobfuscation: This deobfuscation method is kind of like regex search and mutual name deobfuscation. It compares the number of occurrences of a name, string, or regex between game versions.
55 | # - Same Name Deobfuscation: In some games, including ones obfuscated with Beebyte Obfuscator, all occurrences of the deobfuscated name are replace with the obfuscated name (like find and replace). For example, if you have a field called health in both your player and vehicle games, both fields will be changed to the same name. This deobfuscation mode can be forced by the user, or detected by the program when it finds this out via another form of deobfuscation. When activated, this mode simply finds and replaces text.
56 | # - Pattern Search Deobfuscation (AOB Deobfuscation): This deobfuscation methods generates aob for an unobfuscated class, method, field, etc., then searches for the aob in the new game version.
57 | # - Order Deobfuscation: This deobfuscation method is not recommended. Inserting methods / fields will break it.
58 | It relies on fields / methods / classes etc. not being scrambled. For example:
59 | Method b comes two after method a
60 | You know which method method a is
61 | Therefore, you can assume method b is the method two methods after method a.
62 | # - Relative Offset Deobfuscation: This deobfuscation method is not recommended. It relies on
63 | fields / methods / not being scrambled. Inserting methods / fields will break it.
64 | This deobfuscation method uses relative field / method offsets. For example:
65 | Width's field offset is 0x1
66 | Length's field offset is 0x9
67 | You know the field offset of width in the obfuscated version is 0x33
68 | Therefore, you can assume the length field is the field with the field offset of 0x41.
69 | # - Override Matching Deobfuscation: If a class is derived, but parts or all of the base class's methods are nknown, then overridden methods can be deobfuscated using process of elimination. For example, if class B inherits from class A, which has one virtual method of type int, and class B has one overridden method of type int, it can be concluded that both methods are the same. This method works both ways - for deobfuscating derived classes using base classes, and deobfuscation base classes using derived classes.
70 | # - Cross Reference Deobfuscation: This method searches for uses of a class, method, etc. For example, it determines if any classes are derived from a class. It also searches the code (libil2cpp.so binary) to find references to classes, namespaces, fields, methods, etc.
71 | # - Same Namespace Deobfuscation: This method determines what namespaces a class, method, etc. uses by searching
72 | the code (libil2cpp.so binary), dll, or dummy dll. It then uses this to compare.
73 | # - Code Deobfuscation: This method compares code. If the game is mono, the code is already provided. If the game is libil2cpp.so, it searches the code (libil2cpp.so binary).
74 | # - CPP2IL Deobfuscation: This method dumpcs the game with CPP2IL (or uses an existing dump) and compares data from the CPP2IL dump.
75 | # - Nested Object Deobfuscation: If a class, struct, enum, etc. is nested in another one, the outer one can be deobfuscated if the inner one is known. If the outer is deobfuscated, it makes it much easier to deobfuscated the inner one. This nesting hierarchy can span multiple layers of nesting, and each layer can help deobfuscated the other layers.
76 | # - Relative offset deobfuscation: This method will only work if objects are not shuffled during obfuscation. If two objects are close together, then the relative offset between objects (or their fields, methods, etc.) should be the same between updates. This will break if an object is inserted in between objects, or if an object between them is moved or removed. It will also break if methods, fields, etc. are added, removed, or changed.
--------------------------------------------------------------------------------
/Python/old.py:
--------------------------------------------------------------------------------
1 | # noinspection PyUnresolvedReferences
2 | import copy
3 | # noinspection PyUnresolvedReferences
4 | from typing import *
5 | import string
6 | from functools import *
7 |
8 |
9 | def removeattributes(thisobject, toremovenewlines=False):
10 | """
11 | Function has been removed
12 | """
13 | raise NotImplementedError("removeattributes function has been removed.\
14 | Attributes are removed from the whole dumpcs, so the call to removeattributes can be deleted.")
15 |
16 |
17 | def removewhitespace(fullstr, beginning=True, end=True, allwhitespace=False):
18 | """
19 | Function has been removed
20 | """
21 | raise NotImplementedError("removewhitespace function has been removed.\
22 | To trim whitespace, use the trim function. To remove all whitespace, use the removeallwhitespace funnction.")
23 |
24 | def getobjects(*args,**kwargs):
25 | """
26 | Function has been removed
27 | """
28 | raise NotImplementedError("getobjects function has been removed.\
29 | To get objects, use dumpcs_getobjects directly on dumpcs.")
30 |
31 | def getfullobjects(*args,**kwargs):
32 | """
33 | Function has been removed
34 | """
35 | raise NotImplementedError("getfullobjects function has been removed.\
36 | To get objects, use dumpcs_getobjects directly on dumpcs.")
37 |
38 |
39 | def readaftersubstring(s: str,sub: str) -> str:
40 | #Done
41 | """
42 | Docs Not Done!
43 |
44 | This function is based off of
45 | https://stackoverflow.com/questions/12572362/how-to-get-a-string-after-a-specific-substring/57064170#57064170
46 | Possible Improvements:
47 | 1. Directly returning instead of using suffix variable may be faster, but sacrifices
48 | readability and simplicity
49 | """
50 | prefix, success, suffix = s.partition(sub)
51 | if not success:
52 | suffix = prefix
53 | return(suffix)
54 |
55 |
56 | def readbeforesubstring(s: str,sub: str) -> str:
57 | #Done
58 | """
59 | Docs Not Done!
60 |
61 | This function is based off of
62 | https://stackoverflow.com/questions/12572362/how-to-get-a-string-after-a-specific-substring/57064170#57064170
63 |
64 | Possible Improvements:
65 | 1. Directly returning instead of using prefix variable may be faster, but sacrifices
66 | readability and simplicity
67 | """
68 | prefix, success, suffix = s.partition(sub)
69 | if not success:
70 | prefix = suffix
71 | return (prefix)
72 |
73 |
74 | def removesubstring(s: str, sub: str) -> str:
75 | #Done
76 | """
77 | Possible Improvements:
78 |
79 | Remove one substring from a string
80 |
81 | Example:
82 | String: "Removing Substrings"
83 | Sub: "ing"
84 | Return: "Remov Substrs"
85 |
86 | Arguments:
87 | s: string to remove substring from
88 | sub: substring to remove from string
89 |
90 | Return:
91 | string with substring removed
92 | """
93 | return s.replace(sub, "")
94 |
95 |
96 | def removesubstrings(s: str, subs: list[str]) -> str:
97 | #Done
98 | """
99 | Possible Improvements:
100 |
101 | Remove multiple substring from a string, in order of list
102 |
103 | Example:
104 | String: "Removing Substrings"
105 | Subs: ["e","in","ing"]
106 | Return: "Rmovg Substrgs"
107 |
108 | Arguments:
109 | s: string to remove substrings from
110 | subs: list of substrings to remove from string
111 |
112 | Return:
113 | string with substrings removed
114 | """
115 | for sub in subs:
116 | s = removesubstring(s, sub)
117 | return s
118 |
119 |
120 | def replacesubstring(s: str, sub: str, replace: str) -> str:
121 | #Done
122 | """
123 | Doc Not Done
124 | """
125 | return s.replace(sub, replace)
126 |
127 |
128 | def replacesubstrings(s: str, subs: list[str], replace: str) -> str:
129 | #Done
130 | """
131 | Doc Not Done
132 | """
133 | for sub in subs:
134 | s = replacesubstring(s, sub, replace)
135 | return s
136 |
137 |
138 | def removeallwhitespace(s: str) -> str:
139 | #Done, but maybe could be optimized
140 | #NOTE: Function is named removeallwhitespace because old function removewhitespace
141 | #was for trimming. Once all functions use trim instead, this function can be renamed
142 | #back to removewhitespace.
143 | """
144 | Possible Improvements:
145 | Make whitespace a constant instead of unpacking each time function is called
146 |
147 | Removes all whitespace from a string
148 | Does not just trim leading and trailing. For that, use the trim function.
149 |
150 | Example:
151 | String: " Whitespace will be removed from
152 | this string"
153 | Return: "Whitespacewillberemovedfromthisstring"
154 |
155 | Arguments:
156 | s: string to remove whitespace from
157 |
158 | Return:
159 | string with whitespace removed
160 | """
161 | # Should have a constant instead of unpacking
162 | # string.whitespace each time
163 | _whitespace = [*string.whitespace]
164 | return removesubstrings(s, _whitespace)
165 |
166 |
167 | def removeblanklines(s: str, toremovewhitespacelines=True) -> str:
168 | #Not Done
169 | """
170 | Possible Improvements:
171 |
172 | Removes all blank lines from a string
173 |
174 | Example:
175 | String: "
176 | blank
177 | lines will be
178 |
179 | removed from
180 |
181 | thisstr
182 | ing"
183 | toremovewhitespacelines: true
184 | Return: "blank
185 | lines will be
186 | removed from
187 | thisstr
188 | ing"
189 |
190 | Arguments:
191 | s: string to remove blank lines from
192 | toremovewhitespacelines: whether to remove lines with only whitespace (eg: " ")
193 |
194 | Return:
195 | string with blank lines removed
196 | """
197 | if toremovewhitespacelines:
198 | raise NotImplementedError("removeblanklines with toremovewhitespacelines is not done")
199 | else:
200 | return replacesubstring(s, "\n\n", "\n")
201 |
202 |
203 | def iswhitespace(s: str,totreatblankaswhitespace=True) -> bool:
204 | #Done
205 | """
206 | Possible Improvements:
207 |
208 | Detects if a string is all whitespace
209 | Works on strings with any length, including 0
210 |
211 | Example:
212 | String: "
213 | "
214 | Return: true
215 |
216 | String: " hello world!
217 | hi"
218 | Return: false
219 |
220 | Arguments:
221 | s: string to check for whitespace
222 | totreatblankaswhitespace: whether to treat "" as whitespace
223 |
224 | Return:
225 | bool whether string is all whitespace
226 | """
227 | if s == "":
228 | if totreatblankaswhitespace:
229 | return(True)
230 | else:
231 | return(False)
232 | else:
233 | return(s.isspace())
234 |
235 |
236 | def trim(s: str, leading=True, trailing=True) -> str:
237 | """
238 | Possible Improvements:
239 |
240 | Trims whitespace from a string
241 |
242 | Example:
243 | String: " hello world!
244 | "
245 | Leading: true
246 | Trailing: true
247 | Return: "hello world!"
248 |
249 | Arguments:
250 | s: string to trim whitespace from
251 | leading: whether to trim leading whitespace
252 | trailing: whether to trim trailing whitespace
253 |
254 | Return:
255 | string with trimmed whitespace
256 | """
257 | if leading and trailing:
258 | return s.strip()
259 | elif leading:
260 | return s.lstrip()
261 | elif trailing:
262 | return s.rstrip()
263 | else:
264 | return s
265 |
266 | def getwords(s: str) -> list[str]:
267 | # Done
268 | """
269 | Possible Improvements:
270 | 1. Creating a new list is inefficient, modifying existing list would be ideal
271 | 2. Directly using s.split() instead of using words variable may be faster, but
272 | sacrifices readability and simplicity and simplicity
273 |
274 |
275 | Splits a string into a list of words
276 | Treats any type of whitespace as a word delimiter, including new lines and tabs
277 | Treats chunks of whitespace as delimiters (ex: 2 spaces has the same effect as 1 space)
278 |
279 | Example:
280 | String: "the quick
281 | brown fox
282 |
283 | abcdefg "
284 | Return: ["the","quick","brown","fox","abcdefg"]
285 |
286 | Arguments:
287 | s: string to split into words
288 |
289 | Return:
290 | list of the string's words
291 | """
292 | return s.split()
293 |
294 |
295 | def wordstostring(words: list[str],totrimwords=False,toignoreblankwords=True,toignorewhitespacewords=False,concatenator=" ") -> str:
296 | #Done
297 | """
298 | Possible Improvements:
299 | 1. Creating a new list is inefficient, modifying existing list would be ideal
300 |
301 | joins a list of words into a string
302 |
303 | Example:
304 | words: ["hello","a","b"," ","cd\n","","hey",""]
305 | concatenator: " "
306 | toignoreblankwords: false
307 | totrimwords: false
308 | toignorewhitespacewords: false
309 | Return: "hello a b cd
310 | hey "
311 |
312 | Arguments:
313 | words: list of words to join into a string
314 | toignoreblankwords: whether to concatenate or ignore blank words
315 | totrimwords: whether to trim leading and trailing whitespace from each word
316 | (only leading / only trailing whitespace is not supported)
317 | toignorewhitespacewords: whether to concatenate or ignore words with only whitespace
318 | concatenator: the string to put in between words (default space)
319 |
320 | Return:
321 | string containing all the words concatenated by concatenator (default space)
322 | """
323 | if not(toignoreblankwords or toignorewhitespacewords or totrimwords):
324 | return concatenator.join(words)
325 | else:
326 | wordstoconcatenate = []
327 | for word in words:
328 | if iswhitespace(word) and toignorewhitespacewords:
329 | continue
330 | if totrimwords:
331 | word = trim(word, True, True)
332 | if not (word == "" and toignoreblankwords):
333 | wordstoconcatenate.append(word)
334 | return(concatenator.join(wordstoconcatenate))
335 |
336 |
337 | def getlines(s: str, toremoveblanklines=False, totrimlines=False) -> list[str]:
338 | # Done, but maybe could be optimized
339 | """
340 | Possible Improvements:
341 | 1. Creating a new list is inefficient, modifying existing list would be ideal
342 | 2. Directly using s.splitlines() instead of using lines variable may be faster,
343 | but sacrifices readability and simplicity and simplicity
344 |
345 | Splits a string into a list of lines
346 |
347 | Example:
348 | String: "a
349 |
350 | b
351 |
352 | c "
353 | toremoveblanklines: true
354 | totrimlines: true
355 | Return: ["a","b","c"]
356 |
357 | Arguments:
358 | s: string to split into lines
359 | toremoveblanklines: whether to ignore lines that are blank or only whitespace
360 | totrimlines: whether to trim leading and trailing whitespace from each line
361 | (only leading / only trailing whitespace is not supported)
362 |
363 | Return:
364 | list of the string's lines
365 | """
366 | lines = s.splitlines()
367 | if toremoveblanklines or totrimlines:
368 | newlines = []
369 | for line in lines:
370 | if totrimlines:
371 | line = trim(line, True, True)
372 | if not (iswhitespace(line) and toremoveblanklines):
373 | newlines.append(line)
374 | return newlines
375 | else:
376 | return lines
377 |
378 |
379 | def linestostring(lines: list[str],totrimlines=True,toignoreblanklines=False) -> str:
380 | # Done
381 | """
382 | Possible Improvements:
383 |
384 | joins a list of lines into a string
385 |
386 | Example:
387 | lines: ["a","","b"," ","cd",""]
388 | toignoreblanklines: False
389 | totrimlines: False
390 | Return: "a
391 |
392 | b
393 |
394 | cd
395 | "
396 |
397 | Arguments:
398 | lines: list of lines to join into a string
399 | toignoreblanklines: whether to concatenate or ignore lines that are blank or only whitespace
400 | totrimlines: whether to trim leading and trailing whitespace from each line
401 | (only leading / only trailing whitespace is not supported)
402 |
403 | Return:
404 | string containing all the lines concatenated by new line
405 | """
406 | return wordstostring(lines,totrimlines,toignoreblanklines,toignoreblanklines,"\n")
407 |
408 |
409 | def dumpcs_isvalid(dumpcs: str) -> bool:
410 | #Not done
411 | """
412 | Bad detection, needs proper algorithm
413 |
414 | Determines whether a dumpcs file is valid
415 | All dumpcs files entered should be valid, but of course they must be checked.
416 | Note: This function only performs a short check on the file as a whole.
417 | On the other hand, the dumpcs_checkformat function analyzes the whole thing and is very picky .
418 |
419 | Arguments:
420 | dumpcs: the string of the dumpcs file
421 |
422 | Return:
423 | bool whether the dumpcs is valid
424 | """
425 | # return "// Image" in dumpcs and "// RVA: 0x" in dumpcs and "// Namespace:" in dumpcs\
426 | # and " TypeDefIndex: " in dumpcs
427 | raise NotImplementedError("Dumpcs_isvalid function needs improvement")
428 | if len(dumpcs) == 0:
429 | return False
430 | return True
431 |
432 |
433 | def dumpcs_checkformat(dumpcs: str) -> list[str]:
434 | #Not done
435 | """
436 | Scan dump.cs for unexpected formatting
437 | Returns list of unexpected formatting errors
438 |
439 | Arguments:
440 | dumpcs: the string of the dumpcs file
441 |
442 | Return:
443 | List of errors with the line number and error
444 | """
445 | raise NotImplementedError("Dumpcs_checkformat function not completed")
446 |
447 |
448 | def dumpcs_hasattributes(dumpcs: str) -> bool:
449 | #Not done
450 | """
451 | Bad detection, needs proper algorithm
452 |
453 | Determines whether a dumpcs file has attributes
454 |
455 | Arguments:
456 | dumpcs: the string of the dumpcs file
457 |
458 | Return:
459 | bool whether the dumpcs has attributes
460 | """
461 | raise NotImplementedError("Dumpcs_hasattributes function not completed")
462 | #return "[CompilerGeneratedAttribute]" in dumpcs
463 |
464 |
465 | def dumpcs_constructor(path: str, attributeswarning=False) -> str:
466 | #Done, but needs improvement
467 | """
468 | Possible Improvements:
469 | 1. No need to warn about attributes as they should be removed automatically.
470 | However, I want to keep this code commented out and not delete it in case I
471 | change my mind later.
472 | 2. Setting dumpcs variable after removing attributes makes code more readable and concise,
473 | but is less inefficient than directing passing result of dumpcs_removeattributes.
474 | In addition, attributes must be removed *before* dumpcs is checked for format errors
475 | 3. Does try except clause make a difference? IDK whether to keep it.
476 |
477 | Loads and initializes a dumpcs file
478 |
479 | Arguments:
480 | path: the file path of the dumpcs file
481 |
482 | Returns:
483 | string containing the contents of the dump.cs file
484 | """
485 | #Does this try except clause make a difference? IDK whether to keep it
486 | #try:
487 | #dumpcs = filehandler.read_file(path)
488 | #raise NotImplementedError("filehandler.read_file function does not exist")
489 | #except Exception as exception:
490 | #raise exception
491 | # dumpcs = filehandler.read_file(path)
492 | raise NotImplementedError("filehandler.read_file function does not exist")
493 | if not(dumpcs_isvalid(dumpcs)):
494 | #raise exceptions.errors.invaliddumpcs(path)
495 | raise NotImplementedError("exceptions.errors.invaliddumpcs function does not exist")
496 | #No need to warn about attributes as they should be removed automatically
497 | #if attributeswarning and dumpcs_hasattributes(dumpcs):
498 | #exceptions.warnings.dumpcsattributeswarning(path)
499 | if dumpcs_hasattributes(dumpcs):
500 | dumpcs = dumpcs_removeattributes(dumpcs)
501 | formaterrors = dumpcs_checkformat(dumpcs)
502 | if formaterrors != []:
503 | #exceptions.warnings.unexpecteddumpcsformatearly(path,formaterrors)
504 | raise NotImplementedError("exceptions.warnings.unexpecteddumpcsformatearly function does not exist")
505 | return dumpcs
506 |
507 |
508 | def dumpcs_removeattributes(dumpcs: str) -> str:
509 | #Not done
510 | """
511 | Possible Improvements:
512 | 1. Creating a new list of lines is inefficient, modifying existing list would be ideal
513 | 2. Directly using getlines() instead of using lines variable may be faster, but sacrifices readability and simplicity
514 |
515 | Removes attributes from a dumpcs file
516 | Does not process attributes, only removes them
517 | Does not remove blank lines yet
518 |
519 | Arguments:
520 | dumpcs: the string of the dumpcs file
521 |
522 | Returns:
523 | string containing dumpcs content with attributes removed
524 | """
525 | lines = getlines(dumpcs, False, False)
526 | newlines = []
527 | for line in lines:
528 | #Trim leading whitespace from line
529 | trimmedline = trim(line, True, False)
530 | # If the first non-whitespace character on the line is a square bracket,
531 | # this means the line is an attribute
532 | if trimmedline[0] != "[":
533 | #The line is not an attribute line, so keep it
534 | newlines.append(line)
535 | return linestostring(newlines)
536 |
537 |
538 | def dumpcsobject_getnamespace(content):
539 | # Not Done
540 | """
541 | Docs Not Done!
542 | Possible Improvements:
543 | 1. Using string.find "\n" and taking a substring is faster than splitting the object into lines,
544 | but sacrifices readability and simplicity
545 | 2. Directly using getlines() instead of using lines variable may be faster, but sacrifices
546 | readability and simplicity
547 | 3. Directly returning instead of using variable may be faster, but sacrifices
548 | readability and simplicity
549 | 4. Directly using lines[0] instead of using namespaceline variable may be faster, but sacrifices
550 | readability and simplicity
551 |
552 | Gets the namespace of a dumpcs object
553 | """
554 | lines = lru_cache(getlines(content),maxsize=10, typed=False)
555 | namespaceline = lines[0]
556 | if namespaceline == "// Namespace: ":
557 | namespace = ""
558 | else:
559 | namespace = lru_cache(readaftersubstring(namespaceline,"// Namespace: "),maxsize=2048, typed=False)
560 | return(namespace)
561 |
562 | def dumpcsobject_gettype(content):
563 | # Not Done
564 | """
565 | Docs Not Done!
566 | Possible Improvements:
567 | 1. Using string.find "\n" and taking a substring is faster than splitting the object into lines,
568 | but sacrifices readability and simplicity
569 | 2. Directly using getlines() instead of using lines variable may be faster, but sacrifices
570 | readability and simplicity
571 | 3. Directly returning instead of using type variable and breaking loop out of loop may be faster,
572 | but sacrifices readability and simplicity
573 | 4. Directly using lines[1] instead of using objectdeclarationline variable may be faster, but sacrifices
574 | readability and simplicity
575 | 5. Object types should be a constant
576 |
577 | Gets the type (struct, class, enum, or interface) of a dumpcs object
578 | """
579 | objecttypes = set("class,struct,interface,enum") # should be a constant!
580 | lines = lru_cache(getlines(content),maxsize=10, typed=False)
581 | objectdeclarationline = lines[1]
582 | words = lru_cache(getwords(objectdeclarationline),maxsize=3, typed=False)
583 | for word in words:
584 | if word in objecttypes:
585 | return(word)
586 | # Object type (class, struct, enum, interface) not found
587 | #exceptions.errors.unexpecteddumpcsformat(f"Could not find type of object:\n{content}")
588 | raise NotImplementedError("exceptions.errors.unexpecteddumpcsformat function does not exist")
589 | return(None)
590 |
591 | def dumpcsobject_getdatatype(content):
592 | # Not Done
593 | """
594 | Docs Not Done!
595 | Possible Improvements:
596 | 1. Using string.find "\n" and taking a substring is faster than splitting the object into lines,
597 | but sacrifices readability and simplicity
598 | 2. Directly using getlines() instead of using lines variable may be faster, but sacrifices
599 | readability and simplicity
600 | 3. Directly returning instead of using data type variable and breaking loop out of loop may be faster,
601 | but sacrifices readability and simplicity
602 | 4. Directly using lines[1] instead of using objectdeclarationline variable may be faster, but sacrifices
603 | readability and simplicity
604 | 5. Object types should be a constant
605 | 6. Using a string for data type instead of using a list and concatenating it into a string may be faster,
606 | but sacrifices readability and simplicity
607 |
608 | Gets the data type of a dumpcs object
609 | """
610 | objecttypes = set("class,struct,interface,enum") # should be a constant!
611 | lines = lru_cache(getlines(content),maxsize=10, typed=False)
612 | objectdeclarationline = lines[1]
613 | words = lru_cache(getwords(objectdeclarationline),maxsize=3, typed=False)
614 | datatypewords = []
615 | for word in words:
616 | if word in objecttypes:
617 | return(wordstostring(datatypewords))
618 | else:
619 | datatypewords.append(word)
620 | # Object type (class, struct, enum, interface) not found
621 | #exceptions.errors.unexpecteddumpcsformat(f"Could not find type of object:\n{content}")
622 | raise NotImplementedError("exceptions.errors.unexpecteddumpcsformat function does not exist")
623 | return(None)
624 |
625 | def dumpcsobject_getname(content):
626 | # Not Done
627 | """
628 | Docs Not Done!
629 | Possible Improvements:
630 | 1. Using string.find "\n" and taking a substring is faster than splitting the object into lines,
631 | but sacrifices readability and simplicity
632 | 2. Directly using getlines() instead of using lines variable may be faster, but sacrifices
633 | readability and simplicity
634 | 3. Directly returning instead of using data type variable and breaking loop out of loop may be faster,
635 | but sacrifices readability and simplicity
636 | 4. Directly using lines[1] instead of using objectdeclarationline variable may be faster, but sacrifices
637 | readability and simplicity
638 | 5. Object types should be a constant
639 | 6. Using a string for data type instead of using a list and concatenating it into a string may be faster,
640 | but sacrifices readability and simplicity
641 |
642 | Gets the data type of a dumpcs object
643 | """
644 | objecttypes = set("class,struct,interface,enum") # should be a constant!
645 | lines = lru_cache(getlines(content),maxsize=10, typed=False)
646 | objectdeclarationline = lines[1]
647 | if lru_cache(dumpcsobject_isinherited(content),maxsize=4, typed=False):
648 | prefix = lru_cache(readbeforesubstring(" : ",objectdeclarationline),maxsize=3, typed=False)
649 | else:
650 | prefix = lru_cache(readbeforesubstring(" //",objectdeclarationline),maxsize=3, typed=False)
651 | words = lru_cache(getwords(prefix),maxsize=3, typed=False)
652 | name = words[len(words) - 1]
653 | return name
654 |
655 | def dumpcsobject_getbase(content):
656 | # Not Done
657 | """
658 | Docs Not Done!
659 | Possible Improvements:
660 | 1. Using string.find "\n" and taking a substring is faster than splitting the object into lines,
661 | but sacrifices readability and simplicity
662 | 2. Directly using getlines() instead of using lines variable may be faster, but sacrifices
663 | readability and simplicity
664 | 3. Directly returning instead of using data type variable and breaking loop out of loop may be faster,
665 | but sacrifices readability and simplicity
666 | 4. Directly using lines[1] instead of using objectdeclarationline variable may be faster, but sacrifices
667 | readability and simplicity
668 | 5. Object types should be a constant
669 | 6. Using a string for data type instead of using a list and concatenating it into a string may be faster,
670 | but sacrifices readability and simplicity
671 |
672 | Gets the data type of a dumpcs object
673 | """
674 | objecttypes = set("class,struct,interface,enum") # should be a constant!
675 | lines = lru_cache(getlines(content),maxsize=10, typed=False)
676 | objectdeclarationline = lines[1]
677 | if not(lru_cache(dumpcsobject_isinherited(content),maxsize=4, typed=False)):
678 | return(None)
679 | suffix = lru_cache(readaftersubstring(" : ",objectdeclarationline),maxsize=3, typed=False)
680 | base = lru_cache(readbeforesubstring(" //",objectdeclarationline),maxsize=3, typed=False)
681 | return base
682 |
683 | def dumpcsobject_gettypedefindex(content):
684 | # Not Done
685 | """
686 | Docs Not Done!
687 | Possible Improvements:
688 | 1. Using string.find "\n" and taking a substring is faster than splitting the object into lines,
689 | but sacrifices readability and simplicity
690 | 2. Directly using getlines() instead of using lines variable may be faster, but sacrifices
691 | readability and simplicity
692 | 3. Directly returning instead of using data type variable and breaking loop out of loop may be faster,
693 | but sacrifices readability and simplicity
694 | 4. Directly using lines[1] instead of using objectdeclarationline variable may be faster, but sacrifices
695 | readability and simplicity
696 | 5. Object types should be a constant
697 | 6. Using a string for data type instead of using a list and concatenating it into a string may be faster,
698 | but sacrifices readability and simplicity
699 |
700 | Gets the data type of a dumpcs object
701 | """
702 | objecttypes = set("class,struct,interface,enum") # should be a constant!
703 | lines = lru_cache(getlines(content),maxsize=10, typed=False)
704 | objectdeclarationline = lines[1]
705 | typedefindex = readaftersubstring(objectdeclarationline,"// TypeDefIndex: ")
706 | return typedefindex
707 |
708 | def dumpcsobject_isinherited(content):
709 | # Not Done
710 | """
711 | Docs Not Done!
712 | Possible Improvements:
713 | 1. Using string.find "\n" and taking a substring is faster than splitting the object into lines,
714 | but sacrifices readability and simplicity
715 | 2. Directly using getlines() instead of using lines variable may be faster, but sacrifices
716 | readability and simplicity
717 | 3. Directly returning instead of using data type variable and breaking loop out of loop may be faster,
718 | but sacrifices readability and simplicity
719 | 4. Directly using lines[1] instead of using objectdeclarationline variable may be faster, but sacrifices
720 | readability and simplicity
721 | 5. Object types should be a constant
722 | 6. Using a string for data type instead of using a list and concatenating it into a string may be faster,
723 | but sacrifices readability and simplicity
724 |
725 | Gets the data type of a dumpcs object
726 | """
727 | objecttypes = set("class,struct,interface,enum") # should be a constant!
728 | lines = lru_cache(getlines(content),maxsize=10, typed=False)
729 | objectdeclarationline = lines[1]
730 | return " : " in objectdeclarationline
731 |
732 | def dumpcs_getobjects(dumpcs: str,
733 | createtypemodels=True,
734 | objecttypefilter: Union[set[str],None]=None,
735 | namespacefilter: Union[set[str],None]=None,
736 | customfilter: Union[Callable,None]=None) -> list[dict]:
737 | #Not Done
738 | """
739 | Docs Not Done!
740 | Possible Improvements:
741 | 1. Creating a new list is inefficient, modifying existing list would be ideal
742 | 2. Directly using dumpcs.split() instead of using fullobjects variable may be faster,
743 | but sacrifices readability and simplicity
744 | 3. Having different loops for namespacefilter, objecttypefilter, customfilter, and combinations
745 | is faster, but logner
746 | 4. Directly creating a dictionary may be faster than using variables for namespacefilter
747 | and objecttypefilter, but sacrifices readability and simplicity
748 | 5. To save memory and speed, maybe only add object base if it exists. However, this
749 | sacrifices readability and simplicity
750 | 6. Setting object's type model to None decreases errors and complexity, but
751 | takes up extra memory and sacrifices speed
752 | 7. Returning a dictionary of objects by path (namespace -> object) may be faster and simpler than
753 | returning a list of dictionaries (as to grab an object out of the list by its path, the list must be
754 | iterated through until a match is found), but a list is simpler, easier, and faster to create,
755 | process, and iterate over
756 | 8. Object delimiter should be a constant
757 |
758 | Parses dumpcs file into a list of objects
759 | Does not remove blank lines
760 | """
761 | objectdelimiter = "// Namespace: " # Should be a constant
762 | # Sets are much faster than lists or tuples, so convert to them
763 | if type(objecttypefilter) != set:
764 | objecttypefilter = set(objecttypefilter)
765 | if type(namespacefilter) != set:
766 | namespacefilter = set(namespacefilter)
767 | # Split dumpcs by "// Namespace: ", which can be used to mark the start of each object
768 | fullobjects = dumpcs.split(objectdelimiter)
769 | if fullobjects == []:
770 | # If there aren't any objects in dumpcs (this is impossible, but just theoretically),
771 | # we can terminate the function now to keep it simple
772 | return []
773 | # The split function will capture everything before the first object
774 | # since we split by the delimiter that starts objects, so delete that
775 | del fullobjects[0]
776 | # Build dictionary of objects from full objects
777 | objects = []
778 | for fullobject in fullobjects:
779 | # Add "// Namespace: " back on, as string.split excludes the delimiter
780 | content = objectdelimiter + fullobject
781 | # Exit early on objecttypefilter or namespacefilter to save some work
782 | namespace = dumpcsobject_getnamespace(content)
783 | if namespacefilter is not None and not(namespace in namespacefilter):
784 | continue
785 | type = dumpcsobject_gettype(content)
786 | if objecttypefilter is not None and not (type in objecttypefilter):
787 | continue
788 | name = dumpcsobject_getname(content)
789 | datatype = dumpcsobject_getdatatype(content)
790 | isinherited = dumpcsobject_isinherited(content)
791 | if isinherited:
792 | base = dumpcsobject_getbase(content)
793 | else:
794 | base = None
795 | typedefindex = dumpcsobject_gettypedefindex(content)
796 | methods = dumpcsobject_getmethods(content)
797 | fields = dumpcsobject_getfields(content)
798 | properties = dumpcsobject_getproperties(content)
799 | Object = {
800 | "content": content,
801 | "name": name,
802 | "typedefindex": typedefindex,
803 | "type": type,
804 | "namespace": namespace,
805 | "datatype": datatype,
806 | "isinherited": isinherited,
807 | "methods": methods,
808 | "fields": fields,
809 | "properties": properties,
810 | "base": base,
811 | }
812 | # Now that we have all the object's data, we can check against custom filter.
813 | # This allows us to avoid creating the object's type model
814 | if customfilter is not None and not(customfilter(Object)):
815 | continue
816 | if createtypemodels:
817 | # Create type model from the object's data, then add it to the object
818 | typemodel = buildtypemodel(Object)
819 | Object["typemodel"] = typemodel
820 | else:
821 | Object["typemodel"] = None
822 | objects.append(Object)
823 | return(objects)
--------------------------------------------------------------------------------
/Python/OldDeobfuscationFunctions.py:
--------------------------------------------------------------------------------
1 | def loaddumpcs(path,attributeswarning = True):
2 | global dumpcs
3 | dumpcs = read_file(path)
4 | if not("// Image" in dumpcs and "// RVA: 0x" in dumpcs and "// Namespace:" in dumpcs and " TypeDefIndex: " in dumpcs):
5 | invaliddumpcswarning(path)
6 | #if (attributeswarning and (contains("[CompilerGeneratedAttribute]",dumpcs))): #and (contains("[DebuggerBrowsableAttribute]",dumpcs))):
7 | #dumpcsattributeswarning(path)
8 | return(dumpcs)
9 |
10 | def getobjectof(index):
11 | index = int(index)
12 | if not(variableexists("dumpcs")):
13 | objectnotdeclarederror("dumpcs")
14 | return(None)
15 | if index > (len(dumpcs)- len("// Namespace: ")): #Impossible scenario, but ocd makes me put this here!
16 | return("")
17 | rangebehind = 0
18 | startpos = 0
19 | while startpos == 0:
20 | startpos = dumpcs.find("// Namespace: ",((index - rangebehind) - len("// Namespace: ")),(index - rangebehind)) + 1
21 | if (((index - rangebehind) - len("// Namespace: ")) < 1): #Not found - must be the beginning (shouldn't happen)
22 | startpos = 0
23 | return("") # no method
24 | rangebehind = rangebehind + 1
25 | endpos = dumpcs.find("// Namespace: ",startpos + len("// Namespace: "),len(dumpcs)) #find the next "Namespace: " after startpos
26 | if endpos == -1: #Not found - must be the last object
27 | endpos = len(dumpcs) #set to the end
28 | return (removeblanklines(substring(dumpcs,startpos,endpos),True,True)) #the object is between namespaces
29 |
30 | def getmethodof(index):
31 | index = int(index)
32 | if not(variableexists("dumpcs")):
33 | objectnotdeclarederror("dumpcs")
34 | return(None)
35 | if index > (len(dumpcs)- len("\n\n")): #Impossible scenario, but ocd makes me put this here!
36 | return("")
37 | rangebehind = 0
38 | startpos = 0
39 | while startpos == 0:
40 | startpos = dumpcs.find("\n\n",((index - rangebehind) - len("\n\n")),(index - rangebehind)) + 1
41 | if (((index - rangebehind) - len("\n\n")) < 1): #Not found - must be the beginning (shouldn't happen)
42 | startpos = 0
43 | return("") # no method
44 | rangebehind = rangebehind + 1
45 | endpos = dumpcs.find("\n\n",startpos + len("\n\n"),len(dumpcs)) #find the next \n after startpos
46 | if endpos == -1: #Not found - shouldn't be possible but we assume it is end of dump.cs
47 | endpos = len(dumpcs) #set to the end
48 | methodline = removeblanklines(substring(dumpcs,startpos,endpos)).strip()
49 | if ((contains(_offsetsuffix,methodline)) and (len(getlines(methodline))) == 1): #just method offset line
50 | rangebehind = -1
51 | endpos = 1
52 | while endpos == 1:
53 | endpos = dumpcs.find("\n\n",startpos + 3,(startpos + ((index - rangebehind) + len("\n\n")))) + 2
54 | if (((index - rangebehind) + len("\n\n")) > len(dumpcs)): #Not found - shouldn't be possible but we assume it is end of dump.cs
55 | endpos = len(dumpcs) #set to the end
56 | rangebehind = rangebehind - 1
57 | methodline = removeblanklines(substring(dumpcs,startpos,endpos)).strip()
58 | lines = getlines(methodline)
59 | lines[0] = lines[0].strip() #remove whitespace from the two lines
60 | lines[1] = lines[1].strip()
61 | methodline = linestostring(lines)
62 | if not((contains(_isoffsetstring,methodline)) and contains(_ismethodstring,methodline)): #It isn't a method
63 | return("")
64 | else:
65 | return(methodline)
66 | else: #method offset line and method type line, or not method
67 | lines = getlines(methodline)
68 | if len(getlines(methodline)) < 2: #error - must not be a method
69 | return("")
70 | lines[0] = lines[0].strip() #remove whitespace from the two lines
71 | lines[1] = lines[1].strip()
72 | methodline = linestostring(lines)
73 | if not(contains(_offsetsuffix,methodline)): #error - must not be a method
74 | return("")
75 | if not((contains(_isoffsetstring,methodline)) and contains(_ismethodstring,methodline)): #It isn't a method
76 | return("")
77 | return(methodline)
78 |
79 | def getfieldof(index):
80 | index = int(index)
81 | if not(variableexists("dumpcs")):
82 | objectnotdeclarederror("dumpcs")
83 | return(None)
84 | if index > (len(dumpcs)- len("\n")): #Impossible scenario, but ocd makes me put this here!
85 | return("")
86 | rangebehind = 0
87 | startpos = -1
88 | while startpos == -1:
89 | startpos = dumpcs.find("\n",((index - rangebehind) - len("\n")),len(dumpcs)) + 1
90 | if (((index - rangebehind) - len("\n")) < 1): #Not found - must be the beginning (shouldn't happen)
91 | startpos = 0
92 | return("") # no field
93 | endpos = dumpcs.find("\n",startpos + 1,len(dumpcs)) #find the next \n after startpos
94 | if endpos == -1: #Not found - shouldn't be possible but we assume it is end of dump.cs
95 | endpos = len(dumpcs) #set to the end
96 | thisfield = (substring(dumpcs,startpos,endpos)).strip() #field is between new lines
97 | if not(contains(_isfieldstring,thisfield)): #It isn't a field
98 | return("")
99 | return(thisfield)
100 |
101 | def getpropertyof(index):
102 | index = int(index)
103 | if not(variableexists("dumpcs")):
104 | objectnotdeclarederror("dumpcs")
105 | return(None)
106 | if index > (len(dumpcs)- len("\n")): #Impossible scenario, but ocd makes me put this here!
107 | return("")
108 | rangebehind = 0
109 | startpos = -1
110 | while startpos == -1:
111 | startpos = dumpcs.find("\n",((index - rangebehind) - len("\n")),len(dumpcs)) + 1
112 | if (((index - rangebehind) - len("\n")) < 1): #Not found - must be the beginning (shouldn't happen)
113 | startpos = 0
114 | return("") # no field
115 | endpos = dumpcs.find("\n",startpos + 1,len(dumpcs)) #find the next \n after startpos
116 | if endpos == -1: #Not found - shouldn't be possible but we assume it is end of dump.cs
117 | endpos = len(dumpcs) #set to the end
118 | thisproperty = (substring(dumpcs,startpos,endpos)).strip() #property is between new lines
119 | if not(contains(_ispropertystring,thisproperty)): #It isn't a property
120 | return("")
121 | return(thisproperty)
122 |
123 |
124 | def getlineof(index,text,removewhitespace = False):
125 | index = int(index)
126 | rangebehind = 0
127 | startpos = -1
128 | while startpos == -1:
129 | startpos = text.find("\n",((index - rangebehind) - len("\n")),len(text)) + 1
130 | if (((index - rangebehind) - len("\n")) < 1): #Not found - must be the beginning
131 | startpos = 0
132 | return("") # no field
133 | endpos = text.find("\n",startpos + 1,len(text)) #find the next \n after startpos
134 | if endpos == -1: #Not found - must be at the end
135 | endpos = len(text) #set to the end
136 | if removewhitespace:
137 | return((substring(text,startpos,endpos)).strip()) #this line is between new lines
138 | else:
139 | return((substring(text,startpos,endpos))) #this line is between new lines
140 |
141 | def offsettomethod(offset):
142 | if offset.startswith("0x"):
143 | offset = readafter(offset,"0x")
144 | if not(variableexists("dumpcs")):
145 | objectnotdeclarederror("dumpcs")
146 | return(None)
147 | offsetindex = dumpcs.find(_offsetprefix + offset + _offsetsuffix)
148 | if offsetindex == -1: #not found
149 | return("")
150 | else:
151 | return(getmethodof(offsetindex))
152 |
153 | getmethodofoffset = offsettomethod #same thing, but different name
154 |
155 | def offsettofield(classname,offset):
156 | return("Not Done!")
157 |
158 | getfieldofoffset = offsettofield #same thing, but different name
159 |
160 | def getobjectofoffset(offset):
161 | if offset.startswith("0x"):
162 | offset = readafter(offset,"0x")
163 | if not(variableexists("dumpcs")):
164 | objectnotdeclarederror("dumpcs")
165 | return(None)
166 | offsetindex = dumpcs.find(_offsetprefix + offset + _offsetsuffix)
167 | return(getobjectof(offsetindex))
168 |
169 | offsettoobject = getobjectofoffset #same thing, but different name
170 |
171 |
172 | def getnamespaces(objects):
173 | ## namespacenames = []
174 | ## namespacecontent = [] #list of lists (each namespace has list of classes in it)
175 | namespaces = {}
176 | i = -1
177 | for thisobject in objects:
178 | i = i + 1
179 | if multipleof(i,1000):
180 | print(str(i) + "/" + str(len(bjects)))
181 | thisnamespacename = thisobject["Namespace"]
182 | if thisnamespacename in namespaces:
183 | namespaces[thisnamespacename].append(thisobject)
184 | else:
185 | namespaces[thisnamespacename] = [thisobject]
186 | return(namespaces)
187 |
188 | def getfullobjects(getshared = True,toremoveattributes = True,toremoveblanklines = True,toremoveallblanklines = False,returntuple = True):
189 | #Python won't let us declare an object global and set it twice in two different for loops, because it does not know that only one will ever run. So, we have to declare it globa at the start of the function as a workaround.
190 | global flagremovedblanklines
191 | fullobjects = dumpcs.split(_objectseparator)
192 | if len(fullobjects) > 0:
193 | del fullobjects[0] #classes start with "// Namespace: ", so namespace gets everything before it. This means the first one will always go
194 | if toremoveblanklines and not(toremoveattributes): #remove blank lines
195 | new = []
196 | i = -1
197 | for thisitem in fullobjects:
198 | i = i + 1
199 | if multipleof(i,1000):
200 | print(str(i) + "/" + str(len(fullobjects)))
201 | if toremoveallblanklines:
202 | newitem = removeblanklines(thisitem,True,True,True)
203 | else:
204 | newitem = removeblanklines(thisitem)
205 | new.append(newitem)
206 | flagremovedblanklines = True
207 | fullobjects = new
208 | new = []
209 | for thisitem in fullobjects: #Add seperator back on, as string.split excludes the seperator
210 | newitem = _objectseparator + thisitem
211 | new.append(newitem)
212 | fullobjects = new
213 | #fullobjects = tuple(map(lambda x: _objectseparator + x,fullobjects))
214 | if toremoveattributes: #remove attributes
215 | new = []
216 | i = -1
217 | for item in fullobjects:
218 | i = i + 1
219 | if multipleof(i,1000):
220 | print(str(i) + "/" + str(len(fullobjects)))
221 | newitem = removeattributes(item,toremoveblanklines)
222 | new.append(newitem)
223 | fullobjects = new
224 | #fullobjects = tuple(map(removeattributes,fullobjects))
225 | global flagremovedattributes
226 | flagremovedattributes = True
227 | if toremoveblanklines:
228 | flagremovedblanklines = True
229 | if not(getshared):
230 | new = []
231 | i = -1
232 | for thisitem in fullobjects: #Remove shared objects
233 | i = i + 1
234 | if multipleof(i,1000):
235 | print(str(i) + "/" + str(len(fullobjects)))
236 | if not(getisshared(thisitem)):
237 | new.append(thisitem)
238 | fullobjects = new
239 | #fullobjects = [thisitem for thisitem in fullobjects if not(getisshared(thisitem))]
240 | global flagremovedshared
241 | flagremovedshared = True
242 | if returntuple:
243 | return(tuple(fullobjects))
244 | else:
245 | return(fullobjects)
246 |
247 | def removeattributes(thisobject,toremovenewlines = False):
248 | global flagremovedattributes
249 | if flagremovedattributes:
250 | return(thisobject) #attributes have already been removed!
251 | ## lines = getlines(thisobject,False,False)
252 | ## newlines = []
253 | ## for thisline in lines:
254 | ## newline = removewhitespace(thisline,True,False,False)
255 | ## if ((letter(1,newline) == _attributestart) and (contains(_attributeend,newline))):
256 | ## if contains(_attributeend + " ",newline):
257 | ## newline = readafter(newline,_attributeend + " ")
258 | ## else:
259 | ## newline = readafter(newline,_attributeend)
260 | ## if not(newline == ""): #and not((checkforstringat(" " + _isoffsetstring,newline,1)) or (checkforstringat(_isoffsetstring,newline,1)))): # rva is only after we remove compiler generated etc., so it is useless
261 | ## if((checkforstringat(" " + _isoffsetstring,newline,1)) or (checkforstringat(_isoffsetstring,newline,1))):
262 | ## newlines.append("\n")
263 | ## newlines.append(newline)
264 | ## else:
265 | ## newline = thisline
266 | ## if not(toremovenewlines and (newline == "")):
267 | ## if (contains("// RVA: -1 Offset: -1",newline)):
268 | ## if (len(newlines) == 0):
269 | ## newlines.append(newline)
270 | ## else:
271 | ## if not((checkforstringat(" " + _isoffsetstring,newlines[len(newlines) - 1],1)) or (checkforstringat(_isoffsetstring,newlines[len(newlines) - 1],1))):
272 | ## newlines.append(newline)
273 | ## #else:
274 | ## #newlines[len(newlines) - 1] = newline
275 | ## else:
276 | ## if (len(newlines) == 0):
277 | ## newlines.append(newline)
278 | ## else:
279 | ## if not((checkforstringat(" " + _isoffsetstring,newlines[len(newlines) - 1],1)) or (checkforstringat(_isoffsetstring,newlines[len(newlines) - 1],1))):
280 | ## newlines.append(newline)
281 | ## else:
282 | ## newlines[len(newlines) - 1] = newline
283 | lines = getlines(thisobject,False,False)
284 | newlines = []
285 | for thisline in lines:
286 | newline = removewhitespace(thisline,True,False,False)
287 | if ((letter(1,newline) == _attributestart) and (contains(_attributeend,newline))): #yes, purposely 1, not 0 - begins with tab (" [")
288 | if contains(_attributeend + " ",newline):
289 | newline = readafter(newline,_attributeend + " ")
290 | else:
291 | newline = readafter(newline,_attributeend)
292 | if (not(newline == "") and not((checkforstringat(" " + _isoffsetstring,newline,1)) or (checkforstringat(_isoffsetstring,newline,1)))): # rva is only after we remove compiler generated etc., so it is useless
293 | newlines = listadd(newline,newlines)
294 | else:
295 | newline = thisline
296 | if not(toremovenewlines and (newline == "")):
297 | newlines = listadd(newline,newlines)
298 | return(linestostring(newlines))
299 |
300 | def getuserdefinedtype(thisobject):
301 | thisobject = removeattributes(thisobject)
302 | global isshared
303 | isshared = False
304 | userdefinedtypeofobject = "Other" #in case there are no lines or no words in line 2. not found - unknown structure, so unknown object
305 | lines = getlines(thisobject,False,False)
306 | words = getwords(item(_objecttypeline,lines)) #1st line is namespace, 2nd line describes object (abstract class, public enum, etc.)
307 | onword = 0
308 | for thisword in words:
309 | onword = onword + 1
310 | if onword > len(words): #not found - unknown structure, so unknown object. This should not happen!
311 | userdefinedtypeofobject = "Other"
312 | break
313 | if len(_userdefinedtypes) > 0:
314 | if thisword in _userdefinedtypes:
315 | userdefinedtypeofobject = thisword
316 | #isshared = (contains(".<",(item(2,lines))) or contains(" :",(item(2,lines))) or contains(">.",(item(2,lines)))) #in dump cs, a shared class has '(nameofclass).,' and ' :'.
317 | isshared = False
318 | for i in _issharedstrings:
319 | if contains(i,item(_objecttypeline,lines)):
320 | isshared = True
321 | break
322 | break
323 | userdefinedtypeofobject = userdefinedtypeofobject.strip()
324 | return(userdefinedtypeofobject)
325 |
326 | getuserdefinedtypeobject = getuserdefinedtype #same thing, but different name
327 | getuserdefinedtypeofobject = getuserdefinedtype #same thing, but different name
328 | objectgetuserdefinedtype = getuserdefinedtype #same thing, but different name
329 |
330 | def getisshared(thisobject):
331 | thisobject = removeattributes(thisobject)
332 | global isshared
333 | isshared = False
334 | lines = getlines(thisobject,False,False)
335 | words = getwords(item(_objecttypeline,lines)) #1st line is namespace, 2nd line describes object (abstract class, public enum, etc.)
336 | onword = 0
337 | for thisword in words:
338 | onword = onword + 1
339 | if onword > len(words): #not found - unknown structure, so unknown object. This should not happen!
340 | isshared = False
341 | break
342 | if len(_userdefinedtypes) > 0:
343 | if thisword in _userdefinedtypes:
344 | #isshared = (contains(".<",(item(2,lines))) or contains(" :",(item(2,lines))) or contains(">.",(item(2,lines)))) #in dump cs, a shared class has '(nameofclass).,' and ' :'.
345 | isshared = False
346 | for i in _issharedstrings:
347 | if contains(i,item(_objecttypeline,lines)):
348 | isshared = True
349 | break #break for optimization - we don't want to go through the whole list if it isn't necessary
350 | #we check for both of these because they might only have one or the other. there may be false positives, idk. I hope not!
351 | break
352 | return(isshared)
353 |
354 | getissharedobject = getisshared #same thing, but different name
355 | objectgetisshared = getisshared #same thing, but different name
356 | isshared = getisshared #same thing, but different name
357 | objectisshared = getisshared #same thing, but different name
358 | issharedobject = getisshared #same thing, but different name
359 | getshared = getisshared #same thing, but different name
360 | objectgetshared = getisshared #same thing, but different name
361 | getsharedobject = getisshared #same thing, but different name
362 |
363 | def getobjecttype(thisobject):
364 | thisobject = removeattributes(thisobject)
365 | typeofobject = ""
366 | lines = getlines(thisobject,False,False)
367 | words = getwords(item(_objecttypeline,lines)) #1st line is namespace, 2nd line describes object (abstract class, public enum, etc.)
368 | onword = 0
369 | for thisword in words:
370 | onword = onword + 1
371 | if onword > len(words): #not found - unknown structure, so unknown object. This should not happen! We assume type is correct anyway.
372 | break
373 | if thisword in _userdefinedtypes: #say we want public from public enum, or internal static from internal static class. we... unfinished comment oops
374 | break
375 | typeofobject = typeofobject + thisword + " "
376 | if typeofobject[len(typeofobject) - 1] == "": #we should have gotten a space at the end, since each word, we add the word and " ". We don't want the last space.
377 | typeofobject = readbefore(typeofobject," ")
378 | typeofobject = typeofobject.strip()
379 | return typeofobject
380 |
381 | gettypeofobject = getobjecttype #same thing, but different name
382 |
383 | def getobjectnamespace(thisobject):
384 | lines = getlines(thisobject)
385 | thisline = item(_namespaceline,lines)
386 | namespacename = readafter(thisline,_namespacenamestart)
387 | namespacename = namespacename.strip()
388 | if namespacename == "":
389 | namespacename = _globalnamespacename
390 | return(namespacename)
391 |
392 | def getobjectname(thisobject):
393 | thisobject = removeattributes(thisobject)
394 | lines = getlines(thisobject)
395 | thisline = item(_objecttypeline,lines) #2nd line is about class, like public static class
396 | objectname = readbetween(thisline,(getobjecttype(thisobject) + " " + getuserdefinedtype(thisobject) + ""),_objecttypeend)
397 | if (not(_objectkeepaftercolon) and contains(_objectcolon,objectname)):
398 | objectname = readbefore(objectname,_objectcolon)
399 | objectname = substring(objectname,1,len(objectname) - len(_objectcolon)) #readbefore function still keeps up to the end of objectcolon, so remove that
400 | objectname = objectname.strip()
401 | return(objectname)
402 |
403 | def getmethod(methodname,methodslist,casesensitive = False):
404 | i = 0
405 | for thismethod in methodslist:
406 | i = i + 1
407 | if match(getmethodname(thismethod),str(methodname),casesensitive):
408 | thismethod = {
409 | "Name" : getmethodname(thismethod),
410 | "Type" : getmethodtype(thismethod),
411 | "Content" : thismethod,
412 | "Offset" : getmethodoffset(thismethod),
413 | "Params" : getmethodparams(thismethod),
414 | "Param Types" : getmethodparamtypes(thismethod),
415 | }
416 | return(thismethod)
417 | dumpcsnotfounderror(methodname)
418 | sys.exit()
419 | return(None)
420 |
421 | def getfield(fieldname,fieldslist,casesensitive = False):
422 | i = 0
423 | for thisfield in fieldslist:
424 | i = i + 1
425 | if match(getfieldname(thisfield),str(fieldname),casesensitive):
426 | thisfield = {
427 | "Name" : getfieldname(thisfield),
428 | "Type" : getfieldtype(thisfield),
429 | "Content" : thisfield,
430 | "Offset" : getfieldoffset(thisfield),
431 | }
432 | return(thisfield)
433 | dumpcsnotfounderror(fieldname)
434 | sys.exit()
435 | return(None)
436 |
437 | def getproperty(propertyname,propertieslist,casesensitive = False):
438 | i = 0
439 | for thisproperty in propertieslist:
440 | i = i + 1
441 | if match(getpropertyname(thisproperty),str(propertyname),casesensitive):
442 | thisproperty = {
443 | "Name" : getpropertyname(thisproperty),
444 | "Type" : getpropertytype(thisproperty),
445 | "Content" : thisproperty,
446 | "properties" : getpropertyattributes(thisproperty),
447 | }
448 | return(thisproperty)
449 | dumpcsnotfounderror(propertyname)
450 | sys.exit()
451 | return(None)
452 |
453 | def getfullmethodparams(thismethod):
454 | lines = getlines(thismethod)
455 | thisline = lines[_methodtypeline - 1]
456 | fullmethodparams = readbetween(thisline,_methodparamsstart,_methodparamsend)
457 | return(fullmethodparams)
458 |
459 | def getmethodparams(thismethod):
460 | fullmethodparams = getfullmethodparams(thismethod)
461 | methodparams = []
462 | thisparam = ""
463 | ingroup = False
464 | for thisletter in str(fullmethodparams):
465 | if ((thisletter == _datatypegroupstart) and not(ingroup)):
466 | ingroup = True
467 | if ((thisletter == _datatypegroupend) and ingroup):
468 | ingroup = False
469 | if (not(ingroup) and (thisletter == ",")):
470 | if (thisparam != ""):
471 | methodparams.append(thisparam.strip())
472 | thisparam = ""
473 | else:
474 | thisparam = (thisparam + thisletter)
475 | if (thisparam != ""):
476 | methodparams.append(thisparam.strip())
477 | thisparam = ""
478 | return(methodparams)
479 |
480 | def getmethodparamtypes(thismethod,replacenames = True):
481 | #methodparams = getwords(getfullmethodparams(thismethod))
482 | methodparams = getmethodparams(thismethod)
483 | newparams = []
484 | for thisparam in methodparams:
485 | for thisword in getwords(thisparam):
486 | newparams.append(thisword)
487 | methodparams = newparams
488 | if replacenames:
489 | methodparams = replacetypenames(methodparams)
490 | return(methodparams)
491 |
492 | def replacetypenames(thistype):
493 | if _processdatatypegroups:
494 | #Convert to string
495 | if type(thistype) == list:
496 | thistype = wordstostring(thistype)
497 | #Replace data type groups
498 | newtypes = ""
499 | for thisletter in thistype:
500 | if (thisletter == _datatypegroupstart) or (thisletter == _datatypegroupend) or (thisletter == _datatypegroupseparator) :
501 | newtypes = newtypes + " "
502 | else:
503 | newtypes = newtypes + thisletter
504 | #Convert to list of words
505 | words = getwords(newtypes)
506 | else:
507 | #Convert to list of words
508 | if type(thistype) == str:
509 | words = getwords(thistype)
510 | else:
511 | words = thistype
512 | #Replace names
513 | newwords = []
514 | for thisword in words:
515 | if not(thisword in _types):
516 | newwords.append(_typenamereplace)
517 | else:
518 | newwords.append(thisword)
519 | if type(thistype) == str:
520 | newtype = wordstostring(newwords)
521 | else:
522 | newtype = newwords
523 | return(newtype)
524 |
525 | def getmethodtype(thismethod,replacenames = True):
526 | lines = getlines(thismethod)
527 | thisline = lines[_methodtypeline - 1]
528 | thisline = substring(thisline,0,findstr(_methodparamsstart,thisline))
529 | methodtype = readbefore(thisline,_methodparamsstart)
530 | methodtype = methodtype.strip()
531 | words = getwords(methodtype)
532 | if len(words) > 0:
533 | del words[len(words) - 1]
534 | methodtype = wordstostring(words)
535 | if replacenames:
536 | methodtype = replacetypenames(methodtype)
537 | return(methodtype)
538 |
539 | def getmethodname(thismethod):
540 | lines = getlines(thismethod)
541 | thisline = lines[_methodtypeline - 1]
542 | thisline = substring(thisline,0,findstr(_methodparamsstart,thisline))
543 | methodname = readbefore(thisline,_methodparamsstart)
544 | methodname = methodname.strip()
545 | words = getwords(methodname)
546 | methodname = words[len(words) - 1]
547 | return(methodname)
548 |
549 | def getmethodoffset(thismethod):
550 | lines = getlines(thismethod)
551 | thisline = lines[_methodoffsetline - 1]
552 | methodoffset = readbetween(thisline,_offsetprefix,_offsetsuffix)
553 | return(methodoffset)
554 |
555 | def removegenericinstmethods(fullmethods):
556 | lines = getlines(fullmethods,True,True)
557 | newlines = []
558 | ingenericinst = False
559 | for thisline in lines:
560 | if thisline == _genericinstmethodstart:
561 | ingenericinst = True
562 | else:
563 | if (thisline == _genericinstmethodend) and ingenericinst:
564 | ingenericinst = False
565 | else:
566 | if not(ingenericinst):
567 | newlines.append(thisline)
568 | return(newlines)
569 |
570 | def getmethodslist(fullmethods):
571 | lines = removegenericinstmethods(fullmethods)
572 | methodslist = []
573 | if (isodd(len(lines))):
574 | unexpecteddumpcsformaterror("Methods section missing line or has extra line (only expected sets of 2 lines per method ie:\n // RVA: 0x1321F3C Offset: 0x1321F3C VA: 0x1321F3C\npublic static float get_deltaTime() { }",fullmethods)
575 | for i in range(int(len(lines) // 2)):
576 | methodslist.append(concat([lines[int((((i + 1) * 2)) - 1) - 1],lines[int((((i + 1) * 2))) - 1]],"\n"))
577 | return(methodslist)
578 |
579 | def getmethods(methodslist):
580 | if type(methodslist) == str: #got full methods, not methods list - so convert to methods list
581 | methodslist = getmethodslist(methodslist)
582 | global methods
583 | methods = []
584 | for thismethod in methodslist:
585 | thismethoddata = {
586 | "Name" : getmethodname(thismethod),
587 | "Type" : getmethodtype(thismethod),
588 | "Content" : thismethod,
589 | "Offset" : getmethodoffset(thismethod),
590 | "Params" : getmethodparams(thismethod),
591 | "ParamTypes" : getmethodparamtypes(thismethod),
592 | }
593 | methods.append(thismethoddata)
594 | return(methods)
595 |
596 | def getmethodsdict(methods):
597 | methodsdict = {}
598 | for thismethod in methods:
599 | methodsdict[thismethod["Name"]] = thismethod
600 | return(methodsdict)
601 |
602 |
603 | def getfullmethods(thisobject):
604 | global fullmethods
605 | thisobject = removeattributes(thisobject)
606 | lines = getlines(thisobject,True,True)
607 | if len(lines) > 0:
608 | if (_methodsstart in lines):
609 | fullmethods = ""
610 | i = lines.index(_methodsstart) + 1
611 | start = i
612 | thisitem = removewhitespace(lines[i])
613 | fullmethods = concat([fullmethods,thisitem],"\n")
614 | i = i + 1
615 | thisitem = removewhitespace(lines[i])
616 | i = i + 1
617 | while not((thisitem in _contentends) or i > (len(lines) - 1)):
618 | i = i + 1
619 | if not(iswhitespace(thisitem)):
620 | fullmethods = concat([fullmethods,thisitem],"\n")
621 | thisitem = removewhitespace(lines[i - 1])
622 | else:
623 | fullmethods = ""
624 | return(fullmethods)
625 |
626 | def methodsmatch(method1,method2,checkparams = True):
627 | type1 = method1["Type"]
628 | type2 = method2["Type"]
629 | typesmatch = (type1 == type2)
630 | if checkparams:
631 | params1 = method1["ParamTypes"]
632 | params2 = method2["ParamTypes"]
633 | paramsmatch = (param1 == param2)
634 | else:
635 | paramsmatch = True
636 | return(typesmatch and paramsmatch) #is percentage score not less than tolerated percent?
637 |
638 | checkmethods = methodsmatch #same thing, but different name
639 | comparemethods = methodsmatch #same thing, but different name
640 |
641 | def getobject(objectnames,fullobjects,casesensitive = False):
642 | if type(objectnames) == str: #convert to list
643 | objectnames = [objectnames]
644 | objectsfound = []
645 | i = -1
646 | for thisfullobject in fullobjects:
647 | i = i + 1
648 | if multipleof(i,1000):
649 | print(str(i) + "/" + str(len(fullobjects)))
650 | if listcontains(getobjectname(thisfullobject),objectnames,casesensitive):
651 | thisobject = {
652 | "Name" : getobjectname(thisfullobject),
653 | "Namespace" : getobjectnamespace(thisfullobject),
654 | "UserDefinedType" : getuserdefinedtype(thisfullobject),
655 | "Shared" : getisshared(thisfullobject),
656 | "Type" : getobjecttype(thisfullobject),
657 | "Content" : thisfullobject,
658 | "Fields" : getfullfields(thisfullobject),
659 | "Properties" : getfullproperties(thisfullobject),
660 | "Methods" : getfullproperties(thisfullobject),
661 | "TypeModel" : buildtypemodel(thisfullobject),
662 | }
663 | objectsfound.append(thisobject)
664 | if len(objectsfound) < len(objectnames):
665 | dumpcsnotfounderror(objectname)
666 | sys.exit()
667 | return(objectsfound)
668 |
669 | def getfieldoffset(thisfield):
670 | fieldoffset = readafter(thisfield,_fieldoffsetstart)
671 | return(fieldoffset)
672 |
673 | def getfieldtype(thisfield,replacenames = True):
674 | thisfield = substring(thisfield,0,findstr(_fieldoffsetstart,thisfield))
675 | fieldtype = readbefore(thisfield,_fieldoffsetstart)
676 | fieldtype = fieldtype.strip()
677 | words = getwords(fieldtype)
678 | if len(words) > 0:
679 | del words[len(words) - 1]
680 | fieldtype = wordstostring(words)
681 | if replacenames:
682 | fieldtype = replacetypenames(fieldtype)
683 | return(fieldtype)
684 |
685 | def getfieldname(thisfield):
686 | thisfield = substring(thisfield,0,findstr(_fieldoffsetstart,thisfield))
687 | fieldname = readbefore(thisfield,_fieldoffsetstart)
688 | fieldname = fieldname.strip()
689 | words = getwords(fieldname)
690 | fieldname = words[len(words) - 1]
691 | return(fieldname)
692 |
693 | def getfieldslist(fullfields):
694 | lines = getlines(fullfields,True,True)
695 | global fields
696 | fields = []
697 | for thisline in lines:
698 | if (contains(_fieldoffsetstart,thisline)):
699 | fields.append(thisline)
700 | return(fields)
701 |
702 | def getfields(fieldslist):
703 | if type(fieldslist) == str: #got full fields, not fields list - so convert to fields list
704 | fieldslist = getfieldslist(fieldslist)
705 | global fields
706 | fields = []
707 | for thisfield in fieldslist:
708 | thisfielddata = {
709 | "Name" : getfieldname(thisfield),
710 | "Type" : getfieldtype(thisfield),
711 | "Content" : thisfield,
712 | "Offset" : getfieldoffset(thisfield),
713 | }
714 | fields.append(thisfielddata)
715 | return(fields)
716 |
717 | def getfieldsdict(fields):
718 | fieldsdict = {}
719 | for thisfield in fields:
720 | fieldsdict[thisfield["Name"]] = thisfield
721 | return(fieldsdict)
722 |
723 | def buildtypemodel(thisobject):
724 | #To do: method params, number of shared classes for class
725 | objecttype = getobjecttype(thisobject)
726 | userdefinedtype = getuserdefinedtype(thisobject)
727 | isshared = getisshared(thisobject)
728 | fields = getfieldslist(getfullfields(thisobject))
729 | properties = getpropertieslist(getfullproperties(thisobject))
730 | methods = getmethodslist(getfullmethods(thisobject))
731 | fieldtypes = []
732 | for thisfield in fields:
733 | fieldtypes.append(getfieldtype(thisfield,True))
734 | propertytypes = []
735 | for thisproperty in properties:
736 | thispropertymodel = {
737 | "Type": getpropertytype(thisproperty,True),
738 | "Attributes": getpropertyattributes(thisproperty),
739 | }
740 | propertytypes.append(thispropertymodel)
741 | justpropertytypes = []
742 | for thisproperty in properties:
743 | justpropertytypes.append(getpropertytype(thisproperty,True))
744 | methodtypes = []
745 | for thismethod in methods:
746 | thismethodmodel = {
747 | "Type": getmethodtype(thismethod,True),
748 | "ParamTypes": getmethodparamtypes(thismethod,True),
749 | }
750 | methodtypes.append(thismethodmodel)
751 | justmethodtypes = []
752 | for thismethod in methods:
753 | justmethodtypes.append(getmethodtype(thismethod,True))
754 | typemodel = {
755 | "UserDefinedType": userdefinedtype,
756 | "Type": objecttype,
757 | "Shared": isshared,
758 | "Fields": fieldtypes,
759 | "Properties": propertytypes,
760 | "PropertyTypes": justpropertytypes,
761 | "Methods": methodtypes,
762 | "MethodTypes": justmethodtypes,
763 | }
764 | return(typemodel)
765 |
766 | gettypemodel = buildtypemodel #same thing, but different name
767 | maketypemodel = buildtypemodel #same thing, but different name
768 |
769 | def getfullfields(thisobject):
770 | global fullfields
771 | thisobject = removeattributes(thisobject)
772 | lines = getlines(thisobject,True,True)
773 | if len(lines) > 0:
774 | if (_fieldsstart in lines):
775 | fullfields = ""
776 | i = lines.index(_fieldsstart) + 1
777 | start = i
778 | thisitem = removewhitespace(lines[i])
779 | fullfields = concat([fullfields,thisitem],"\n")
780 | i = i + 1
781 | thisitem = removewhitespace(lines[i])
782 | i = i + 1
783 | while not((thisitem in _contentends) or i > (len(lines) - 1)):
784 | i = i + 1
785 | if not(iswhitespace(thisitem)):
786 | fullfields = concat([fullfields,thisitem],"\n")
787 | thisitem = removewhitespace(lines[i - 1])
788 | else:
789 | fullfields = ""
790 | return(fullfields)
791 |
792 | def getfullproperties(thisobject):
793 | global fullproperties
794 | thisobject = removeattributes(thisobject)
795 | lines = getlines(thisobject,True,True)
796 | if len(lines) > 0:
797 | if (_propertiesstart in lines):
798 | fullproperties = ""
799 | i = lines.index(_propertiesstart) + 1
800 | start = i
801 | thisitem = removewhitespace(lines[i])
802 | fullproperties = concat([fullproperties,thisitem],"\n")
803 | i = i + 1
804 | thisitem = removewhitespace(lines[i])
805 | i = i + 1
806 | while not((thisitem in _contentends) or i > (len(lines) - 1)):
807 | i = i + 1
808 | if not(iswhitespace(thisitem)):
809 | fullproperties = concat([fullproperties,thisitem],"\n")
810 | thisitem = removewhitespace(lines[i - 1])
811 | else:
812 | fullproperties = ""
813 | return(fullproperties)
814 |
815 | def getpropertytype(thisproperty,replacenames = True):
816 | thisproperty = substring(thisproperty,0,findstr(_propertyattributesstart,thisproperty))
817 | propertytype = readbefore(thisproperty,_propertyattributesstart)
818 | propertytype = propertytype.strip()
819 | words = getwords(propertytype)
820 | if len(words) > 0:
821 | del words[len(words) - 1]
822 | propertytype = wordstostring(words)
823 | if replacenames:
824 | propertytype = replacetypenames(propertytype)
825 | return(propertytype)
826 |
827 | def getfullpropertyattributes(thisproperty):
828 | fullproperties = readbetween(thisproperty,_propertyattributesstart,_propertyattributesend)
829 | fullproperties = fullproperties.strip()
830 | words = getwords(fullproperties)
831 | fullproperties = wordstostring(words)
832 | return(fullproperties)
833 |
834 | def getpropertyattributes(thisproperty):
835 | fullproperties = getfullpropertyattributes(thisproperty)
836 | properties = fullproperties.split(_propertyattributeseparator)
837 | return(properties)
838 |
839 | def getpropertyname(thisproperty):
840 | thisproperty = substring(thisproperty,0,findstr(_propertyattributesstart,thisproperty))
841 | propertyname = readbefore(thisproperty,_propertyattributesstart)
842 | propertyname = propertyname.strip()
843 | words = getwords(propertyname)
844 | propertyname = words[len(words) - 1]
845 | return(propertyname)
846 |
847 | def getpropertieslist(fullproperties):
848 | global properties
849 | lines = getlines(fullproperties,True,True)
850 | properties = []
851 | for thisline in lines:
852 | if (contains(_propertyattributesstart,thisline)):
853 | properties.append(thisline)
854 | return(properties)
855 |
856 | def getproperties(propertieslist):
857 | if type(propertieslist) == str: #got full properties, not properties list - so convert to properties list
858 | propertieslist = getpropertieslist(propertieslist)
859 | global properties
860 | properties = []
861 | for thisproperty in propertieslist:
862 | thispropertydata = {
863 | "Name" : getpropertyname(thisproperty),
864 | "Type" : getpropertytype(thisproperty),
865 | "Content" : thisproperty,
866 | "Attributes" : getpropertyattributes(thisproperty),
867 | "FullAttributes" : getfullpropertyattributes(thisproperty),
868 | }
869 | properties.append(thispropertydata)
870 | return(properties)
871 |
872 | def getpropertiesdict(properties):
873 | propertiesdict = {}
874 | for thisproperty in properties:
875 | propertiesdict[thisproperty["Name"]] = thisproperty
876 | return(propertiesdict)
877 |
878 | def getfullclasses(fullobjects):
879 | fullclasses = []
880 | i = -1
881 | for thisobject in fullobjects:
882 | i = i + 1
883 | if multipleof(i,1000):
884 | print(str(i) + "/" + str(len(fullobjects)))
885 | if getuserdefinedtype(thisobject) == "class":
886 | fullclasses.append(thisobject)
887 | return(fullclasses)
888 |
889 | def getfullstructs(fullobjects):
890 | fullstructs = []
891 | for thisobject in fullobjects:
892 | if getuserdefinedtype(thisobject) == "struct":
893 | fullstructs.append(thisobject)
894 | return(fullstructs)
895 |
896 | def getfullenums(fullobjects):
897 | fullenums = []
898 | for thisobject in fullobjects:
899 | if getuserdefinedtype(thisobject) == "enum":
900 | fullenums.append(thisobject)
901 | return(fullenums)
902 |
903 | def getfullinterfaces(fullobjects):
904 | fullinterfaces = []
905 | for thisobject in fullobjects:
906 | if getuserdefinedtype(thisobject) == "interface":
907 | fullinterfacse.append(thisobject)
908 | return(fullinterfaces)
909 |
910 | def getobjects(fullobjects,onlyclasses = False,getshared = True,namespacefilter = None,justnameandtypemodel = False,doalphabeticalsort = True,returntuple = True):
911 | if type(namespacefilter) == str:
912 | namespacefilter = [namespacefilter] #convert to list
913 | if namespacefilter == [] or namespacefilter is False:
914 | namespacefilter = None
915 | global flagremovedshared
916 | if onlyclasses:
917 | fullobjects = getfullclasses(fullobjects)
918 | objects = []
919 | i = -1
920 | for thisfullobject in fullobjects:
921 | i = i + 1
922 | if multipleof(i,1000):
923 | print(str(i) + "/" + str(len(fullobjects)))
924 | valid = True
925 | if not(flagremovedshared) and valid:
926 | if not(getshared):
927 | if getisshared(thisfullobject):
928 | valid = False
929 | if (namespacefilter != None) and valid:
930 | if not(getobjectnamespace(thisfullobject) in namespacefilter):
931 | valid = False
932 | if valid:
933 | if justnameandtypemodel:
934 | thisobject = {
935 | "Name" : getobjectname(thisfullobject),
936 | "TypeModel" : buildtypemodel(thisfullobject),
937 | }
938 | else:
939 | objectname = getobjectname(thisfullobject)
940 | if onlyclasses:
941 | userdefinedtype = "class"
942 | else:
943 | userdefinedtype = getuserdefinedtypeofobject(thisfullobject)
944 | objecttype = getobjecttype(thisfullobject)
945 | objectnamespace = getobjectnamespace(thisfullobject)
946 | shared = getisshared(thisfullobject)
947 | fullmethods = getfullmethods(thisfullobject)
948 | methods = getmethods(fullmethods)
949 | methodsdict = getmethodsdict(methods)
950 | fullfields = getfullfields(thisfullobject)
951 | fields = getfields(fullfields)
952 | fieldsdict = getfieldsdict(fields)
953 | fullproperties = getfullproperties(thisfullobject)
954 | properties = getproperties(fullproperties)
955 | propertiesdict = getpropertiesdict(properties)
956 | typemodel = buildtypemodel(thisfullobject)
957 | if doalphabeticalsort:
958 | methods = alphabeticalsort(methods)
959 | fields = alphabeticalsort(fields)
960 | properties = alphabeticalsort(properties)
961 | methodsdict = alphabeticalsort(methodsdict)
962 | fieldsdict = alphabeticalsort(fieldsdict)
963 | propertiesdict = alphabeticalsort(propertiesdict)
964 | thisobject = {
965 | "Name" : objectname,
966 | "Namespace" : objectnamespace,
967 | "UserDefinedType" : userdefinedtype,
968 | "Shared" : shared,
969 | "Type" : objecttype,
970 | "Content" : thisfullobject,
971 | "FullMethods" : fullmethods,
972 | "Methods" : methods,
973 | "MethodsDict" : methodsdict,
974 | "FullFields" : fullfields,
975 | "Fields" : fields,
976 | "FieldsDict" : fieldsdict,
977 | "FullProperties" : fullproperties,
978 | "Properties" : properties,
979 | "PropertiesDict" : propertiesdict,
980 | "TypeModel" : typemodel,
981 | }
982 | objects.append(thisobject)
983 | if doalphabeticalsort:
984 | objects = alphabeticalsort(objects)
985 | if not(getshared):
986 | flagremovedshared = True
987 | if returntuple:
988 | return(tuple(objects))
989 | else:
990 | return(objects)
991 |
992 | findobject = getobject #same thing, but different name
993 |
994 | def builddumpcshierarchy(dumpcspath,doalphabeticalsort = True,onlyclasses = False,getshared = True,namespacefilter = None,toremoveattributes = True,toremoveblanklines = True,toremoveallblanklines = False,rettype = "dict"):
995 | if rettype == dict:
996 | rettype == "dict"
997 | elif rettype == list:
998 | rettype = "list"
999 | elif rettype == tuple:
1000 | rettype = "tuple"
1001 | elif type(rettype) == str:
1002 | rettype = str(rettype).lower()
1003 | else:
1004 | #Error
1005 | return(None)
1006 | dumpcs = loaddumpcs(dumpcspath)
1007 | if dumpcs is None:
1008 | return(None)
1009 | fullobjects = getfullobjects(getshared,toremoveattributes,toremoveblanklines,toremoveallblanklines,returntuple = True)
1010 | if doalphabeticalsort:
1011 | fullobjects = alphabeticalsort(fullobjects)
1012 | #Sort full objects into list of objects
1013 | objects = getobjects(fullobjects,onlyclasses,getshared,namespacefilter,False,alphabeticalsort,returntuple = True)
1014 | #Sort list of objects into dictionary of namespaces
1015 | namespaces = getnamespaces(objects)
1016 | #Change lists of objects to dictionaries of objects
1017 | newnamespaces = {}
1018 | for thisnamespace, thisobjectlist in zip(namespaces.keys(),namespaces.items()):
1019 | thisobjectdict = {}
1020 | for thisobject in thisobjectlist:
1021 | thisobjectdict[thisobject["Name"]] = thisobject
1022 | newnamespaces[thisnamespace] = thisobjectdict
1023 | if rettype == "dict":
1024 | return(newnamespaces)
1025 | elif rettype == "list":
1026 | return(newnamespaces.items())
1027 | elif rettype == "tuple":
1028 | return(newnamespaces.items())
1029 | else:
1030 | #Unknown ret type - assume dict
1031 | #Warning
1032 | return(newnamespaces)
1033 |
1034 |
1035 |
1036 | getdumpcshierarchy = builddumpcshierarchy #same thing, but different name
1037 | builddumpcshierarchy = builddumpcshierarchy #same thing, but different name
1038 |
1039 | def typemodelsmatch(model1,model2,usetolerance = None,dosize = True,douserdefinedtype = True,doshared = True,dotype = True,donamespace = True,dofields = True,domethodparams = True,dopropertyattributes = True): #make sure model1 is the unobfuscated one!
1040 | if usetolerance is None:
1041 | global _tolerance
1042 | usetolerance = _tolerance
1043 | #To-do: Number of shared classes for class
1044 | maxscore =(len(model1.get("Methods")) * _methodweighttrue) + (len(model1.get("Properties")) * _propertyweighttrue) #calculate maximum score
1045 | score = float(0)
1046 | #Size
1047 | if dosize:
1048 | maxscore = maxscore + 8 #start off at 8, and subtract nothing for a perfect score
1049 | size1 = (len(model1.get("Fields")) + len(model1.get("Methods")) + len(model1.get("Properties"))) #how many methods, fields, and properties are there?
1050 | size2 = (len(model2.get("Fields")) + len(model2.get("Methods")) + len(model2.get("Properties"))) #how many methods, fields, and properties are there?
1051 | score = 8 - (((abs(size2 - size1) / _sizebenchmark) * _sizeweightfalse)) #depending on the difference in size, this could have a small impact, or be very bad
1052 | #Userdefined Type
1053 | if douserdefinedtype:
1054 | maxscore = maxscore + _userdefinedtypeweighttrue
1055 | if model1.get("UserDefinedType") == model2.get("UserDefinedType"):
1056 | score = score + _userdefinedtypeweighttrue
1057 | else:
1058 | return(False) #userdefined type MUST match
1059 | #Shared
1060 | if doshared:
1061 | maxscore = maxscore + _sharedweighttrue
1062 | if model1.get("Shared") == model2.get("Shared"):
1063 | score = score + _sharedweighttrue
1064 | else:
1065 | return(False) #Is shared MUST match
1066 | #Type
1067 | ## if dotype:
1068 | ## maxscore = maxscore + _objecttypeweighttrue
1069 | ## if model1.get("Type") == model2.get("Type"):
1070 | ## score = score + _objecttypeweighttrue
1071 | if dotype:
1072 | maxscore = maxscore + _objecttypetrue
1073 | if model1.get("Type") == model2.get("Type"):
1074 | score = score + _objecttypeweighttrue
1075 | else:
1076 | return(False) #Object type MUST match
1077 | #Namespace
1078 | if donamespace:
1079 | maxscore = maxscore + _namespaceweighttrue
1080 | if model1.get("Namespace") == model2.get("Namespace"):
1081 | score = score + _objecttypeweighttrue
1082 | #Fields
1083 | if dofields:
1084 | maxscore = maxscore + (len(model1.get("Fields")) * _fieldweighttrue)
1085 | #We are using the fields type models, not the fields themselvles
1086 | fields1 = list(model1.get("Fields"))
1087 | fields2 = list(model2.get("Fields"))
1088 | templist = list(fields2) #it's very normal to add on things, but not as common to delete them. So, most of the fields in the unobfuscated (earlier) one
1089 | #should also exist in the obfuscated one (newer)
1090 | templist2 = list(fields1)
1091 | for item in templist2:
1092 | if len(templist) > 0:
1093 | if (item in templist):
1094 | score = score + _fieldweighttrue
1095 | templist.remove(item)
1096 | #Methods
1097 | if domethodparams:
1098 | #We are using the methods type models, not the methods themselvles
1099 | methods1 = list(model1.get("Methods"))
1100 | methods2 = list(model2.get("Methods"))
1101 | else:
1102 | methods1 = list(model1.get("MethodTypes"))
1103 | methods2 = list(model2.get("MethodTypes"))
1104 | templist = list(methods2) #it's very normal to add on things, but not as common to delete them. So, most of the methods in the unobfuscated (earlier) one
1105 | #should also exist in the obfuscated one (newer)
1106 | templist2 = list(methods1)
1107 | for item in templist2:
1108 | if len(templist) > 0:
1109 | if (item in templist):
1110 | score = score + _methodweighttrue
1111 | templist.remove(item)
1112 | #Properties
1113 | if dopropertyattributes:
1114 | #We are using the propertiess type models, not the propertiess themselvles
1115 | properties1 = list(model1.get("Properties"))
1116 | properties2 = list(model2.get("Properties"))
1117 | else:
1118 | properties1 = list(model1.get("PropertyTypes"))
1119 | properties2 = list(model2.get("PropertyTypes"))
1120 | templist = list(properties2) #it's very normal to add on things, but not as common to delete them. So, most of the properties in the unobfuscated (earlier) one
1121 | #should also exist in the obfuscated one (newer)
1122 | templist2 = list(properties1)
1123 | for item in templist2:
1124 | if len(templist) > 0:
1125 | if (item in templist):
1126 | score = score + _propertyweighttrue
1127 | templist.remove(item)
1128 | #To do: method params, number of shared classes for class
1129 | matchscore = ((score / maxscore) * 100)
1130 | endspeedtest()
1131 | return(not(((score / maxscore) * 100) < usetolerance)) #is percentage score not less than tolerated percent?
1132 |
1133 | comparetypemodels = typemodelsmatch #same thing, but different name
1134 | checktypemodels = typemodelsmatch #same thing, but different name
1135 |
1136 | def objectscheckformatch(object1,object2,usetolerance = None,dosize = True,douserdefinedtype = True,doshared = True,dotype = True,donamespace = True,dofields = True,domethodparams = True,dopropertyattributes = True):
1137 | #make sure object1 is the unobfuscated one!
1138 | global _trustnames
1139 | if (_trustnames and str(object1.get("Name")) == str(object2.get("Name"))):
1140 | return(True)
1141 | else:
1142 | return(typemodelsmatch(object1.get("TypeModel"),object2.get("TypeModel"),usetolerance,dosize,domethodparams,dopropertyattributes,donamespace))
1143 |
1144 | checkobjects = objectscheckformatch #same thing, but different name
1145 | compareobjects = objectscheckformatch #same thing, but different name
1146 | objectsmatch = objectscheckformatch #same thing, but different name
--------------------------------------------------------------------------------
/Python/DeobfuscationRefactored.py:
--------------------------------------------------------------------------------
1 | """
2 | TODO: Make everything lazy (only parse objects, methods, params, etc.) when needed
3 |
4 | TODO: Make methods able to access other objects and methods (after rewrite):
5 |
6 | TODO: Make is standard method detection for methods (Equals, etc.)
7 |
8 | TODO: Make is unity type detection
9 |
10 | TODO: Replace all terminology with terminology from tools like Il2cppDumper, to make sure it is correct
11 | """
12 |
13 | from __future__ import annotations
14 | import sys
15 | import os
16 | import warnings
17 | import copy
18 | from typing import *
19 | import string
20 | import regex as re
21 | import json
22 | from functools import cache, lru_cache
23 | from typing import Any
24 | # FIXME: Make this a package and make it use relative imports
25 | from unitydeobfuscatorexceptions import *
26 | import fileutils
27 |
28 | filehandler = fileutils.FileHandler()
29 |
30 |
31 | def readafter(sub: str, s: str, backward=False, regex=False, mustcontain=True, lengthwarning=True) -> str:
32 | """
33 | Returns the substring after the delimiter
34 | If the substring is not found in the string, returns the whole string
35 | Also supports backward (right to left) and regex
36 |
37 | @param sub: The substring (or regex) to read after
38 | @param s: The initial string
39 | @param backward: Whether to read from right to left
40 | @param regex: Whether to treat the substring as regex
41 | @param mustcontain: Whether to throw an AssertionError if the substring (or regex) is not present
42 | in the initial string
43 | @param lengthwarning: Whether to raise a warning if the substring is longer than the initial string,
44 | which should never happen (only applies if regex is False)
45 | @return: The partition of the string after the substring (or regex)
46 |
47 | :raises ImportError: Wrong regex module: Expected regex module, got re module.
48 | To fix this, replace "import re" with "import regex as re"
49 | :raises AssertionError: param mustcontain was True and substring (or regex) was not present
50 | in the initial string
51 | :warns ValueWarning: param lengthwarning was True, param regex was False, and substring was longer
52 | than initial string
53 |
54 | Example:
55 | sub: "string"
56 | s: "Split this string by delimiter"
57 | return: " by delimiter"
58 | """
59 | class ValueWarning(Warning):
60 | pass
61 | # If re isn't imported at all, that will show later.
62 | # Since we only catch AttributeError, we don't have to worry about it here.
63 | try:
64 | re.REVERSE
65 | except AttributeError:
66 | raise ImportError("Wrong regex module: Expected regex module, got re module. To fix this, replace \"import "
67 | "re\" with \"import regex as re\"")
68 | if regex:
69 | sub = f"({sub})" # Enclose regex with paranthesis to make it a group capture
70 | if mustcontain:
71 | assert bool(re.search(sub, s)) is True
72 | # If an invalid regex is passed, we let re.error throw - it can be handled by the caller
73 | if backward:
74 | split = re.split(sub, s, 1, )
75 | return split[0]
76 | else:
77 | split = re.split(sub, s, 1)
78 | return split[-1]
79 | else:
80 | if lengthwarning and len(sub) > len(s):
81 | warnings.warn(f"Call to readafter(sub=\"{sub}\", str=\"{s}\"): substring is longer than string",
82 | ValueWarning)
83 | if mustcontain:
84 | assert sub in s
85 | if backward:
86 | prefix, found, suffix = s.rpartition(sub)
87 | else:
88 | prefix, found, suffix = s.partition(sub)
89 | if found:
90 | return suffix
91 | else:
92 | return s
93 |
94 |
95 |
96 | def readbefore(sub: str, s: str, backward=False, regex=False, mustcontain=True, lengthwarning=True) -> str:
97 | """
98 | Returns the substring before the delimiter
99 | If the substring is not found in the string, returns the whole string
100 | Also supports backward (right to left) and regex
101 |
102 | @param sub: The substring (or regex) to read before
103 | @param s: The initial string
104 | @param backward: Whether to read from right to left
105 | @param regex: Whether to treat the substring as regex
106 | @param mustcontain: Whether to throw an AssertionError if the substring (or regex) is not present in the initial
107 | string
108 | @param lengthwarning: Whether to raise a warning if the substring is longer than the initial string,
109 | which should never happen (only applies if regex is False)
110 | @return: The partition of the string before the substring (or regex)
111 |
112 | :raises ImportError: Wrong regex module: Expected regex module, got re module.
113 | To fix this, replace "import re" with "import regex as re"
114 | :raises AssertionError: param mustcontain was True and substring (or regex) was not present
115 | in the initial string
116 | :warns ValueWarning: param lengthwarning was True, param regex was False, and substring was longer
117 | than initial string
118 |
119 | Example:
120 | sub: "string"
121 | s: "Split this string by delimiter"
122 | return: "Split this "
123 | """
124 | class ValueWarning(Warning):
125 | pass
126 | # If re isn't imported at all, that will show later.
127 | # Since we only catch AttributeError, we don't have to worry about it here.
128 | try:
129 | re.REVERSE
130 | except AttributeError:
131 | raise ImportError("Wrong regex module: Expected regex module, got re module. To fix this, replace \"import "
132 | "re\" with \"import regex as re\"")
133 | if regex:
134 | sub = f"({sub})" # Enclose regex with paranthesis to make it a group capture
135 | if mustcontain:
136 | assert bool(re.search(sub, s)) is True
137 | # If an invalid regex is passed, we let re.error throw - it can be handled by the caller
138 | if backward:
139 | split = re.split(sub, s, 1, flags=re.REVERSE)
140 | return split[-1]
141 | else:
142 | split = re.split(sub, s, 1)
143 | return split[0]
144 | else:
145 | if lengthwarning and len(sub) > len(s):
146 | warnings.warn(f"Call to readbefore(sub=\"{sub}\", str=\"{s}\"): substring is longer than string",
147 | ValueWarning)
148 | if mustcontain:
149 | assert sub in s
150 | if backward:
151 | prefix, found, suffix = s.rpartition(sub)
152 | else:
153 | prefix, found, suffix = s.partition(sub)
154 | if found:
155 | return prefix
156 | else:
157 | return s
158 |
159 |
160 | def iswhitespace(s: str, totreatblankaswhitespace=True) -> bool:
161 | """
162 | Determines if a string is whitespace
163 |
164 | @param s: The string to check
165 | @param totreatblankaswhitespace: Whether to treat an empty string ("") as whitespace
166 | @return: Whether the string is whitespace
167 | """
168 | if s == "":
169 | return totreatblankaswhitespace
170 | else:
171 | return s.isspace()
172 |
173 |
174 | def trim(s: str, leading=True, trailing=True) -> str:
175 | """
176 | Trims whitespace from a string
177 |
178 | @param s: The initial string
179 | @param leading: Whether to trim leading whitespace
180 | @param trailing: Whether to trim trailing whitespace
181 | @return: String with whitespace trimmed
182 |
183 | Example:
184 | s: " hello world!
185 | "
186 | leading: True
187 | trailing: True
188 | return: "hello world!"
189 | """
190 | if leading and trailing:
191 | return s.strip()
192 | elif leading:
193 | return s.lstrip()
194 | elif trailing:
195 | return s.rstrip()
196 | else:
197 | return s
198 |
199 |
200 | # FIXME: Why does caching this cause bugs?
201 | #@cache
202 | def getwords(s: str, customregex=None) -> list[str]:
203 | """
204 | Splits a string into a list of words
205 | Treats any whitespace as a word delimiter, including newlines and tabs
206 | If a chunk of whitespace is encountered (ex: "\t\n" or " ", the whole thing
207 | will be considered one delimiter
208 |
209 | @param s: The string to split into words
210 | @param customregex: Instead of using built-in strng.split method, split by custom regex
211 | @return: List of words in the initial string (in order)
212 |
213 | Example:
214 | s: "the quick
215 | brown fox
216 |
217 | abcdefg "
218 | return: ["the","quick","brown","fox","abcdefg"]
219 | """
220 | if customregex:
221 | return re.split(customregex, s)
222 | else:
223 | return s.split()
224 |
225 |
226 | def wordstostring(words: list[str],
227 | totrimwords=False,
228 | toignoreblankwords=False,
229 | concatenator=" ") -> str:
230 | """
231 | Joins a list of words into a string
232 |
233 | @param words: The list of words to concatenate
234 | @param totrimwords: Whether to trim whitespace from words. Trims both leading and trailing whitespace.
235 | @param toignoreblankwords: Whether to ignore words that are only whitespace
236 | @param concatenator: Delimiter to concatenate words with (default " ")
237 | @return: Words concatenated by concatenator
238 | """
239 | wordstoconcatenate = []
240 | for word in words:
241 | if iswhitespace(word) and toignoreblankwords:
242 | continue
243 | if totrimwords:
244 | word = trim(word, True, True)
245 | wordstoconcatenate.append(word)
246 | return concatenator.join(wordstoconcatenate)
247 |
248 |
249 | def getlines(s: str,
250 | toremoveblanklines=False,
251 | totrimlines=False) -> list[str]:
252 |
253 | """
254 | Splits a string into a list of lines
255 |
256 | @param s: The string to split into lines
257 | @param toremoveblanklines: Whether to ignore lines that are blank or only whitespace
258 | @param totrimlines: Wwhether to trim whitespace from each line (leading and trailing)
259 | @return: List of lines in the string (in order)
260 |
261 | Example:
262 | s "a
263 |
264 | b
265 |
266 | c "
267 | toremoveblanklines: True
268 | totrimlines: True
269 | return: ["a","b","c"]
270 | """
271 | lines = s.splitlines()
272 | newlines = []
273 | for line in lines:
274 | if totrimlines:
275 | line = trim(line, True, True)
276 | if not (toremoveblanklines and iswhitespace(line)):
277 | newlines.append(line)
278 | return newlines
279 | # return [trim(line, True, True) if totrimlines else line
280 | # for line in s.splitlines()
281 | # if not (toremoveblanklines and iswhitespace(line))]
282 |
283 |
284 | def linestostring(lines: list[str],
285 | totrimlines=False,
286 | toignoreblanklines=False,
287 | concatenator="\n") -> str:
288 | """
289 | Joins a list of lines into a string
290 |
291 | @param lines: The list of lines to concatenate
292 | @param totrimlines: Whether to trim whitespace from lines. Trims both leading and trailing whitespace.
293 | @param toignoreblanklines: Whether to ignore lines that are only whitespace
294 | @param concatenator: Delimiter to concatenate lines with (default "\n")
295 | @return: Lines concatenated by concatenator
296 |
297 | Example:
298 | lines: ["a","","b"," ","cd",""]
299 | toignoreblanklines: False
300 | totrimlines: False
301 | Return: "a
302 |
303 | b
304 |
305 | cd
306 | "
307 | """
308 | # This function is exactly the same as wordstostring, except it operates on lines and
309 | # "\n" is the default concatenator. So, it's best to reuse the wordstostring function here.
310 | return wordstostring(lines, totrimlines, toignoreblanklines, concatenator)
311 |
312 |
313 | @cache
314 | def datatype_isreference(datatype: str) -> bool:
315 | # Not Done
316 | words = getwords(datatype, customregex="(? 1
318 | return "ref" in words
319 |
320 |
321 | def dumpcs_isvalid(dumpcs: str) -> bool:
322 | # Not done
323 | """
324 |
325 | Determines whether a dumpcs file is valid
326 | Works by checking against some substrings that some should exist in all valid dump.cs files
327 | All dumpcs files entered should be valid, but of course they must be checked.
328 | Note: This function only performs a short check on the file as a whole.
329 | On the other hand, the dumpcs_checkformat function analyzes the whole thing and is very picky.
330 |
331 | Arguments:
332 | dumpcs: the raw content of the dumpcs file
333 |
334 | Return:
335 | whether the dumpcs is valid
336 | """
337 | checks = ("// Image 0:", "// Namespace: ", "class", "\n\t// Methods", "// RVA: 0x")
338 | for check in checks:
339 | if check not in dumpcs:
340 | return False
341 | return True
342 |
343 |
344 | def dumpcs_checkformat(dumpcs: str) -> list[dict]:
345 | # Not Done
346 | """
347 | Scan dump.cs for unexpected formatting
348 | Returns list of unexpected formatting errors
349 |
350 | Arguments:
351 | dumpcs: the raw content of the dumpcs file
352 |
353 | Return:
354 | list of errors with the line number and error message
355 | """
356 | raise NotImplementedError("Dumpcs_checkformat function not completed")
357 |
358 |
359 | def dumpcs_hasattributes(dumpcs: str, fastcheck=False) -> bool:
360 | # Done
361 | """
362 |
363 | Possible Improvements:
364 | 1. Directly using getlines() instead of using lines variable may be faster, but sacrifices
365 | readability and simplicity
366 |
367 | Determines whether a dumpcs file has attributes
368 |
369 | Arguments:
370 | dumpcs: the raw content of the dumpcs file
371 | fastcheck: whether to perform a fast or thorough check for attributes (fast should be sufficient,
372 | but it is safer to perform a thorough check)
373 |
374 | Return:
375 | whether the dumpcs file has attributes
376 | """
377 | if fastcheck:
378 | return "[CompilerGeneratedAttribute]" in dumpcs
379 | else:
380 | lines = getlines(dumpcs, False, False)
381 | for line in lines:
382 | # Trim leading whitespace from line
383 | trimmedline = trim(line, True, False)
384 | # If the first non-whitespace character on the line is a square bracket,
385 | # this means the line is an attribute
386 | if trimmedline and trimmedline[0] == "[":
387 | return True
388 | return False
389 |
390 |
391 | def dumpcs_constructor(dumpcs: str, terminateifinvalid: True) -> list[list[dict]]:
392 | # Done
393 | """
394 | Possible Improvements:
395 | 1. Setting dumpcs variable after removing attributes makes code more readable and concise,
396 | but is less inefficient than directing passing result of dumpcs_removeattributes.
397 | In addition, attributes must be removed before dumpcs is checked for format errors
398 | 2. Directly using dumpcs_removeattributes instead of checking with dumpcs_hasattributes may be faster
399 | (idk if it is), but it sacrifices readability
400 |
401 | Loads and initializes a dumpcs
402 | Checks validity of the dumpcs and searches for format errors
403 | Parser images and objects in dumpcs
404 | (Sets fields for images and objects)
405 |
406 | Arguments:
407 | dumpcs: the raw content of the dumpcs file
408 | terminateifinvalid: whether to terminate with an error or just throw a warning
409 | if the dumpcs appears to be invalid
410 |
411 | Return:
412 | List of objects in the dumpcs file
413 |
414 | :raises InvalidDumpcsError: The dumpcs appears to be invalid, and terminateifinvalid was True
415 | :warns InvalidDumpcsWarning: The dumpcs appears to be invalid, and terminateifinvalid was False
416 | """
417 | if not dumpcs_isvalid(dumpcs):
418 | if terminateifinvalid:
419 | raise InvalidDumpcsError(content=dumpcs)
420 | else:
421 | warnings.warn(InvalidDumpcsWarning(content=dumpcs))
422 | if dumpcs_hasattributes(dumpcs):
423 | dumpcs = dumpcs_removeattributes(dumpcs)
424 | formaterrors = dumpcs_checkformat(dumpcs)
425 | if formaterrors:
426 | for formaterror in formaterrors:
427 | raise UnexpectedDumpcsFormatError(formaterror.message, formaterror.sample, line=formaterror.line)
428 | images = dumpcs_getimages(dumpcs)
429 | objects = dumpcs_getobjects(dumpcs)
430 | return objects
431 |
432 |
433 | def dumpcs_removeattributes(dumpcs: str) -> str:
434 | # Done
435 | """
436 | Possible Improvements:
437 | 1. Creating a new list of lines is inefficient, modifying existing list would be ideal
438 | 2. Directly using getlines() instead of using lines variable may be faster, but sacrifices
439 | readability and simplicity
440 |
441 | Removes attributes from a dumpcs file
442 | Does not process attributes, only removes them
443 | Does not remove blank lines
444 |
445 | Arguments:
446 | dumpcs: the raw content of the dumpcs file
447 |
448 | Return:
449 | dumpcs raw content with attributes removed
450 | """
451 | lines = getlines(dumpcs, False, False)
452 | newlines = []
453 | for line in lines:
454 | # Trim leading whitespace from line
455 | trimmedline = trim(line, True, False)
456 | # If the first non-whitespace character on the line is a square bracket,
457 | # this means the line is an attribute
458 | if trimmedline == "" or trimmedline[0] != "[":
459 | # The line is not an attribute line, so keep it
460 | newlines.append(line)
461 | return linestostring(newlines, False, False)
462 |
463 |
464 | @cache
465 | def dumpcsobject_hasnamespace(rawobject: str) -> bool:
466 | # Not Done
467 | lines = getlines(rawobject)
468 | namespaceline = lines[0]
469 | return namespaceline != "// Namespace: "
470 |
471 |
472 | @cache
473 | def dumpcsobject_getnamespace(rawobject: str) -> Optional[str]:
474 | # Done
475 | """
476 |
477 | Possible Improvements:
478 |
479 | Gets the namespace of a dumpcs object
480 |
481 | Example:
482 | namespaceline: // Namespace: Microsoft.Win32
483 | Return: Microsoft.Win32
484 |
485 |
486 | Arguments:
487 | rawobject: the raw content of the dumpcs object
488 |
489 | Return:
490 | namespace of the dumpcs sobject
491 | """
492 | if not dumpcsobject_hasnamespace(rawobject):
493 | # No namespace
494 | return None
495 | lines = getlines(rawobject)
496 | namespaceline = lines[0]
497 | # Everything after "// Namespace: " in the namespaceline is the object's namespace
498 | namespacedelimiter = "// Namespace: "
499 | namespace = readafter(namespacedelimiter, namespaceline)
500 | return namespace
501 |
502 |
503 | @cache
504 | def dumpcsobject__getmodifiers(rawobject: str) -> list[str]:
505 | # Done
506 | """
507 |
508 | Possible Improvements:
509 | 1. Directly returning instead of using type variable and breaking loop out of loop may be faster,
510 | but sacrifices readability and simplicity
511 | 2. Object types should be a constant
512 | 3. IDK if using a dictionary cache is faster or ironically slower
513 |
514 | Gets the type (struct, class, enum, or interface) of a dumpcs object
515 |
516 | Example:
517 | objectsignatureline: public static class Registry // TypeDefIndex: 4
518 | Return: class
519 |
520 |
521 | Arguments:
522 | rawobject: the raw content of the dumpcs object
523 |
524 | Return:
525 | type of the dumpcs object
526 | """
527 | lines = getlines(rawobject)
528 | objectsignatureline = lines[1].strip()
529 | if dumpcsobject_isinherited(rawobject):
530 | # If the object is inherited, read before the base class (to get the derived class)
531 | prefix = readbefore(" : ", objectsignatureline)
532 | else:
533 | # If the object is not inherited, read before the typedefindex
534 | prefix = readbefore(" //", objectsignatureline)
535 | words = getwords(prefix, customregex="(? 1
537 | # Last word is object name, second to last word is object type
538 | del words[-1:-2]
539 | while words[-1] in "ref, in, out":
540 | datatype += f"{words[-3]} "
541 | del words[-3]
542 | return words
543 |
544 |
545 | @cache
546 | def dumpcsobject_getobjecttype(rawobject: str) -> str:
547 | # Done
548 | """
549 |
550 | Possible Improvements:
551 | 1. Directly returning instead of using type variable and breaking loop out of loop may be faster,
552 | but sacrifices readability and simplicity
553 | 2. Object types should be a constant
554 | 3. IDK if using a dictionary cache is faster or ironically slower
555 |
556 | Gets the type (struct, class, enum, or interface) of a dumpcs object
557 |
558 | Example:
559 | objectsignatureline: public static class Registry // TypeDefIndex: 4
560 | Return: class
561 |
562 |
563 | Arguments:
564 | rawobject: the raw content of the dumpcs object
565 |
566 | Return:
567 | type of the dumpcs object
568 | """
569 | lines = getlines(rawobject)
570 | objectsignatureline = lines[1].strip()
571 | if dumpcsobject_isinherited(rawobject):
572 | # If the object is inherited, read before the base class (to get the derived class)
573 | prefix = readbefore(" : ", objectsignatureline)
574 | else:
575 | # If the object is not inherited, read before the typedefindex
576 | prefix = readbefore(" //", objectsignatureline)
577 | words = getwords(prefix, customregex="(? 1
579 | # Last word is object name, second to last word is object type
580 | objecttype = words[-2]
581 | return objecttype
582 |
583 |
584 | @cache
585 | def dumpcsobject_getdatatype(rawobject: str) -> str:
586 | # Done
587 | """
588 |
589 | Possible Improvements:
590 | 1. Directly returning instead of using type variable and breaking loop out of loop may be faster,
591 | but sacrifices readability and simplicity
592 | 2. Using a string for data type instead of using a list and concatenating it into a string may be faster,
593 | but sacrifices readability and simplicity
594 | 3. Object types should be a constant
595 | 4. IDK if using a dictionary cache is faster or ironically slower
596 |
597 | Gets the data type of a dumpcs object
598 |
599 | Example:
600 | objectsignatureline: public static class Registry // TypeDefIndex: 4
601 | Return: public static
602 |
603 |
604 | Arguments:
605 | rawobject: the raw content of the dumpcs object
606 |
607 | Return:
608 | data type of the dumpcs object
609 | """
610 | lines = getlines(rawobject)
611 | objectsignatureline = lines[1].strip()
612 | if dumpcsobject_isinherited(rawobject):
613 | # If the object is inherited, read before the base class (to get the derived class)
614 | prefix = readbefore(" : ", objectsignatureline)
615 | else:
616 | # If the object is not inherited, read before the typedefindex
617 | prefix = readbefore(" //", objectsignatureline)
618 | words = getwords(prefix, customregex="(? 1
620 | # Delete object name and object type
621 | del words[-1:-2]
622 | datatype = wordstostring(words)
623 | return datatype
624 |
625 |
626 | @cache
627 | def dumpcsobject_getname(rawobject: str, includenesting=False) -> str:
628 | # Done
629 | """
630 |
631 | Possible Improvements:
632 | 1. Directly returning instead of using variable may be faster,
633 | but sacrifices readability and simplicity
634 | 2. IDK if using a dictionary cache is faster or ironically slower
635 |
636 | Gets the name of a dumpcs object
637 |
638 | Examples:
639 | objectsignatureline: private enum SimpleCollator.ExtenderType // TypeDefIndex: 41
640 | includenesting: True
641 | Return: SimpleCollator.ExtenderType
642 |
643 | objectsignatureline: private enum SimpleCollator.ExtenderType // TypeDefIndex: 41
644 | includenesting: False
645 | Return: ExtenderType
646 |
647 |
648 | Arguments:
649 | rawobject: the raw content of the dumpcs object
650 | includenesting: whether to include the outer objects in the name
651 | (only applies if the object is nested)
652 |
653 | Return:
654 | the name of the dumpcs object
655 | """
656 | lines = getlines(rawobject)
657 | objectsignatureline = lines[1].strip()
658 | if dumpcsobject_isinherited(rawobject):
659 | # If the object is inherited, read before the base class (to get the derived class)
660 | prefix = readbefore(" : ", objectsignatureline)
661 | else:
662 | # If the object is not inherited, read before the typedefindex
663 | prefix = readbefore(" //", objectsignatureline)
664 | if dumpcsobject_isgeneric(rawobject):
665 | # Match generics, but not compiler generated symbols
666 | # EX: IEnumerator