├── MonoString.cpp ├── MonoString.h └── README.md /MonoString.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Fate on 2020-06-01. 3 | // 4 | 5 | #include 6 | #include "MonoString.h" 7 | 8 | void MonoString::setMonoString(const char *s) { 9 | string str(s); 10 | length = strlen(s); 11 | u16string basicString = utf8_to_utf16le(str); 12 | const char16_t *cStr = basicString.c_str(); 13 | mempcpy(getChars(), cStr, getLength() * 2); 14 | } 15 | 16 | void MonoString::setMonoString(string s) { 17 | length = s.length(); 18 | u16string basicString = utf8_to_utf16le(s); 19 | const char16_t *str = basicString.c_str(); 20 | mempcpy(getChars(), str, getLength() * 2); 21 | } 22 | 23 | const char *MonoString::toChars() { 24 | u16string ss((char16_t *) getChars(), 0, getLength()); 25 | string str = utf16le_to_utf8(ss); 26 | return str.c_str(); 27 | } 28 | 29 | string MonoString::toString() { 30 | u16string ss((char16_t *) getChars(), 0, getLength()); 31 | string str = utf16le_to_utf8(ss); 32 | return str; 33 | } 34 | 35 | 36 | static inline uint16_t byteswap_ushort(uint16_t number) { 37 | #if defined(_MSC_VER) && _MSC_VER > 1310 38 | return _byteswap_ushort(number); 39 | #elif defined(__GNUC__) 40 | return __builtin_bswap16(number); 41 | #else 42 | return (number >> 8) | (number << 8); 43 | #endif 44 | } 45 | 46 | 47 | //////////////////////////////////////// 48 | // 以下转换都是在小端序下进行 // 49 | //////////////////////////////////////// 50 | 51 | // 从UTF16编码字符串构建，需要带BOM标记 52 | std::string utf16_to_utf8(const std::u16string &u16str) { 53 | if (u16str.empty()) { return std::string(); } 54 | //Byte Order Mark 55 | char16_t bom = u16str[0]; 56 | switch (bom) { 57 | case 0xFEFF: //Little Endian 58 | return utf16le_to_utf8(u16str); 59 | break; 60 | case 0xFFFE: //Big Endian 61 | return utf16be_to_utf8(u16str); 62 | break; 63 | default: 64 | return std::string(); 65 | } 66 | } 67 | 68 | 69 | // 从UTF16 LE编码的字符串创建 70 | std::string utf16le_to_utf8(const std::u16string &u16str) { 71 | if (u16str.empty()) { return std::string(); } 72 | const char16_t *p = u16str.data(); 73 | std::u16string::size_type len = u16str.length(); 74 | if (p[0] == 0xFEFF) { 75 | p += 1; //带有bom标记，后移 76 | len -= 1; 77 | } 78 | 79 | // 开始转换 80 | std::string u8str; 81 | u8str.reserve(len * 3); 82 | 83 | char16_t u16char; 84 | for (std::u16string::size_type i = 0; i < len; ++i) { 85 | // 这里假设是在小端序下(大端序不适用) 86 | u16char = p[i]; 87 | 88 | // 1字节表示部分 89 | if (u16char < 0x0080) { 90 | // u16char <= 0x007f 91 | // U- 0000 0000 ~ 0000 07ff : 0xxx xxxx 92 | u8str.push_back((char) (u16char & 0x00FF)); // 取低8bit 93 | continue; 94 | } 95 | // 2 字节能表示部分 96 | if (u16char >= 0x0080 && u16char <= 0x07FF) { 97 | // * U-00000080 - U-000007FF: 110xxxxx 10xxxxxx 98 | u8str.push_back((char) (((u16char >> 6) & 0x1F) | 0xC0)); 99 | u8str.push_back((char) ((u16char & 0x3F) | 0x80)); 100 | continue; 101 | } 102 | // 代理项对部分(4字节表示) 103 | if (u16char >= 0xD800 && u16char <= 0xDBFF) { 104 | // * U-00010000 - U-001FFFFF: 1111 0xxx 10xxxxxx 10xxxxxx 10xxxxxx 105 | uint32_t highSur = u16char; 106 | uint32_t lowSur = p[++i]; 107 | // 从代理项对到UNICODE代码点转换 108 | // 1、从高代理项减去0xD800，获取有效10bit 109 | // 2、从低代理项减去0xDC00，获取有效10bit 110 | // 3、加上0x10000，获取UNICODE代码点值 111 | uint32_t codePoint = highSur - 0xD800; 112 | codePoint <<= 10; 113 | codePoint |= lowSur - 0xDC00; 114 | codePoint += 0x10000; 115 | // 转为4字节UTF8编码表示 116 | u8str.push_back((char) ((codePoint >> 18) | 0xF0)); 117 | u8str.push_back((char) (((codePoint >> 12) & 0x3F) | 0x80)); 118 | u8str.push_back((char) (((codePoint >> 06) & 0x3F) | 0x80)); 119 | u8str.push_back((char) ((codePoint & 0x3F) | 0x80)); 120 | continue; 121 | } 122 | // 3 字节表示部分 123 | { 124 | // * U-0000E000 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx 125 | u8str.push_back((char) (((u16char >> 12) & 0x0F) | 0xE0)); 126 | u8str.push_back((char) (((u16char >> 6) & 0x3F) | 0x80)); 127 | u8str.push_back((char) ((u16char & 0x3F) | 0x80)); 128 | continue; 129 | } 130 | } 131 | 132 | return u8str; 133 | } 134 | 135 | 136 | // 从UTF16BE编码字符串创建 137 | std::string utf16be_to_utf8(const std::u16string &u16str) { 138 | if (u16str.empty()) { return std::string(); } 139 | const char16_t *p = u16str.data(); 140 | std::u16string::size_type len = u16str.length(); 141 | if (p[0] == 0xFEFF) { 142 | p += 1; //带有bom标记，后移 143 | len -= 1; 144 | } 145 | 146 | 147 | // 开始转换 148 | std::string u8str; 149 | u8str.reserve(len * 2); 150 | char16_t u16char; //u16le 低字节存低位，高字节存高位 151 | for (std::u16string::size_type i = 0; i < len; ++i) { 152 | // 这里假设是在小端序下(大端序不适用) 153 | u16char = p[i]; 154 | // 将大端序转为小端序 155 | u16char = byteswap_ushort(u16char); 156 | 157 | // 1字节表示部分 158 | if (u16char < 0x0080) { 159 | // u16char <= 0x007f 160 | // U- 0000 0000 ~ 0000 07ff : 0xxx xxxx 161 | u8str.push_back((char) (u16char & 0x00FF)); 162 | continue; 163 | } 164 | // 2 字节能表示部分 165 | if (u16char >= 0x0080 && u16char <= 0x07FF) { 166 | // * U-00000080 - U-000007FF: 110xxxxx 10xxxxxx 167 | u8str.push_back((char) (((u16char >> 6) & 0x1F) | 0xC0)); 168 | u8str.push_back((char) ((u16char & 0x3F) | 0x80)); 169 | continue; 170 | } 171 | // 代理项对部分(4字节表示) 172 | if (u16char >= 0xD800 && u16char <= 0xDBFF) { 173 | // * U-00010000 - U-001FFFFF: 1111 0xxx 10xxxxxx 10xxxxxx 10xxxxxx 174 | uint32_t highSur = u16char; 175 | uint32_t lowSur = byteswap_ushort(p[++i]); 176 | // 从代理项对到UNICODE代码点转换 177 | // 1、从高代理项减去0xD800，获取有效10bit 178 | // 2、从低代理项减去0xDC00，获取有效10bit 179 | // 3、加上0x10000，获取UNICODE代码点值 180 | uint32_t codePoint = highSur - 0xD800; 181 | codePoint <<= 10; 182 | codePoint |= lowSur - 0xDC00; 183 | codePoint += 0x10000; 184 | // 转为4字节UTF8编码表示 185 | u8str.push_back((char) ((codePoint >> 18) | 0xF0)); 186 | u8str.push_back((char) (((codePoint >> 12) & 0x3F) | 0x80)); 187 | u8str.push_back((char) (((codePoint >> 06) & 0x3F) | 0x80)); 188 | u8str.push_back((char) ((codePoint & 0x3F) | 0x80)); 189 | continue; 190 | } 191 | // 3 字节表示部分 192 | { 193 | // * U-0000E000 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx 194 | u8str.push_back((char) (((u16char >> 12) & 0x0F) | 0xE0)); 195 | u8str.push_back((char) (((u16char >> 6) & 0x3F) | 0x80)); 196 | u8str.push_back((char) ((u16char & 0x3F) | 0x80)); 197 | continue; 198 | } 199 | } 200 | return u8str; 201 | } 202 | 203 | 204 | // 获取转换为UTF-16 LE编码的字符串 205 | std::u16string utf8_to_utf16le(const std::string &u8str, bool addbom, bool *ok) { 206 | std::u16string u16str; 207 | u16str.reserve(u8str.size()); 208 | if (addbom) { 209 | u16str.push_back(0xFEFF); //bom (字节表示为 FF FE) 210 | } 211 | std::string::size_type len = u8str.length(); 212 | 213 | const unsigned char *p = (unsigned char *) (u8str.data()); 214 | // 判断是否具有BOM(判断长度小于3字节的情况) 215 | if (len > 3 && p[0] == 0xEF && p[1] == 0xBB && p[2] == 0xBF) { 216 | p += 3; 217 | len -= 3; 218 | } 219 | 220 | bool is_ok = true; 221 | // 开始转换 222 | for (std::string::size_type i = 0; i < len; ++i) { 223 | uint32_t ch = p[i]; // 取出UTF8序列首字节 224 | if ((ch & 0x80) == 0) { 225 | // 最高位为0，只有1字节表示UNICODE代码点 226 | u16str.push_back((char16_t) ch); 227 | continue; 228 | } 229 | switch (ch & 0xF0) { 230 | case 0xF0: // 4 字节字符, 0x10000 到 0x10FFFF 231 | { 232 | uint32_t c2 = p[++i]; 233 | uint32_t c3 = p[++i]; 234 | uint32_t c4 = p[++i]; 235 | // 计算UNICODE代码点值(第一个字节取低3bit，其余取6bit) 236 | uint32_t codePoint = 237 | ((ch & 0x07U) << 18) | ((c2 & 0x3FU) << 12) | ((c3 & 0x3FU) << 6) | 238 | (c4 & 0x3FU); 239 | if (codePoint >= 0x10000) { 240 | // 在UTF-16中 U+10000 到 U+10FFFF 用两个16bit单元表示, 代理项对. 241 | // 1、将代码点减去0x10000(得到长度为20bit的值) 242 | // 2、high 代理项是将那20bit中的高10bit加上0xD800(110110 00 00000000) 243 | // 3、low 代理项是将那20bit中的低10bit加上0xDC00(110111 00 00000000) 244 | codePoint -= 0x10000; 245 | u16str.push_back((char16_t) ((codePoint >> 10) | 0xD800U)); 246 | u16str.push_back((char16_t) ((codePoint & 0x03FFU) | 0xDC00U)); 247 | } else { 248 | // 在UTF-16中 U+0000 到 U+D7FF 以及 U+E000 到 U+FFFF 与Unicode代码点值相同. 249 | // U+D800 到 U+DFFF 是无效字符, 为了简单起见，这里假设它不存在(如果有则不编码) 250 | u16str.push_back((char16_t) codePoint); 251 | } 252 | } 253 | break; 254 | case 0xE0: // 3 字节字符, 0x800 到 0xFFFF 255 | { 256 | uint32_t c2 = p[++i]; 257 | uint32_t c3 = p[++i]; 258 | // 计算UNICODE代码点值(第一个字节取低4bit，其余取6bit) 259 | uint32_t codePoint = ((ch & 0x0FU) << 12) | ((c2 & 0x3FU) << 6) | (c3 & 0x3FU); 260 | u16str.push_back((char16_t) codePoint); 261 | } 262 | break; 263 | case 0xD0: // 2 字节字符, 0x80 到 0x7FF 264 | case 0xC0: { 265 | uint32_t c2 = p[++i]; 266 | // 计算UNICODE代码点值(第一个字节取低5bit，其余取6bit) 267 | uint32_t codePoint = ((ch & 0x1FU) << 12) | ((c2 & 0x3FU) << 6); 268 | u16str.push_back((char16_t) codePoint); 269 | } 270 | break; 271 | default: // 单字节部分(前面已经处理，所以不应该进来) 272 | is_ok = false; 273 | break; 274 | } 275 | } 276 | if (ok != NULL) { *ok = is_ok; } 277 | 278 | return u16str; 279 | } 280 | 281 | 282 | // 获取转换为UTF-16 BE的字符串 283 | std::u16string utf8_to_utf16be(const std::string &u8str, bool addbom, bool *ok) { 284 | // 先获取utf16le编码字符串 285 | std::u16string u16str = utf8_to_utf16le(u8str, addbom, ok); 286 | // 将小端序转换为大端序 287 | for (size_t i = 0; i < u16str.size(); ++i) { 288 | u16str[i] = byteswap_ushort(u16str[i]); 289 | } 290 | return u16str; 291 | } 292 | -------------------------------------------------------------------------------- /MonoString.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Fate on 2020-06-01. 3 | // 4 | 5 | #ifndef U3DINJECT_MASTER_MONOSTRING_H 6 | #define U3DINJECT_MASTER_MONOSTRING_H 7 | 8 | #include 9 | #include 10 | 11 | #ifdef __GNUC__ 12 | 13 | #include 14 | 15 | #endif // __GNUC__ 16 | using namespace std; 17 | 18 | class MonoString { 19 | void *klass; 20 | void *monitor; 21 | int length; 22 | char chars[1]; 23 | char *getChars() { 24 | return chars; 25 | } 26 | 27 | public: 28 | /** 29 | * 获取字符串长度 30 | * @return 31 | */ 32 | int getLength() { 33 | return length; 34 | } 35 | /** 36 | * monostring转char* 37 | * @return 38 | */ 39 | const char *toChars(); 40 | 41 | /** 42 | * monostring转string 43 | * @return 44 | */ 45 | string toString(); 46 | 47 | /** 48 | * char* 转monostring 49 | * @param s 50 | */ 51 | void setMonoString(const char *s); 52 | 53 | /** 54 | * string 转monostring 55 | * @param s 56 | */ 57 | void setMonoString(string s); 58 | }; 59 | 60 | // 从UTF16编码字符串构建，需要带BOM标记 61 | std::string utf16_to_utf8(const std::u16string &u16str); 62 | 63 | // 从UTF16 LE编码的字符串创建 64 | std::string utf16le_to_utf8(const std::u16string &u16str); 65 | 66 | // 从UTF16BE编码字符串创建 67 | std::string utf16be_to_utf8(const std::u16string &u16str); 68 | 69 | // 获取转换为UTF-16 LE编码的字符串 70 | std::u16string utf8_to_utf16le(const std::string &u8str, bool addbom = false, bool *ok = NULL); 71 | 72 | // 获取转换为UTF-16 BE的字符串 73 | std::u16string utf8_to_utf16be(const std::string &u8str, bool addbom = false, bool *ok = NULL); 74 | 75 | #endif //U3DINJECT_MASTER_MONOSTRING_H 76 | 77 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MonoString 2 | il2cpp游戏的string类实现char*与std::string的转换和修改 3 | 4 | ## 使用方法 5 | 6 | ```c++ 7 | //public static string get_applicationBundleIdentifier() { } 8 | 9 | MonoString *(*get_applicationBundleIdentifier)(); 10 | MonoString *$get_applicationBundleIdentifier() { 11 | MonoString *str = get_applicationBundleIdentifier(); 12 | const char *s = str->toChars(); //转const char* 13 | std::string ss = str->toString(); //转std::string 14 | str->setMonoString("monoString"); 15 | str->setMonoString(string("monoString")); 16 | return str; 17 | } 18 | ``` 19 | 20 | --------------------------------------------------------------------------------