├── LICENSE ├── README.md └── dexstrings.c /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Hugo Gonzalez 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dexstrings 2 | Extracting the strings from the .dex files with meaning. 3 | 4 | Instead of using the typical 'strings' command on the .dex file, we can extract each string with some knowledge about what type of string is it. 5 | 6 | You can request only the 'text strings', wich means only the strings that are not definied with other specific function as type, prototype, source or method name. 7 | 8 | The approach is simple, but you can find interesting stuff in this piece of information. 9 | If the strings are in a different language (chinese, russian), you will need to have support for that encoding on your terminal to see them. 10 | 11 | H. 12 | 13 | ## Changelog 14 | 15 | 16 | ###v1.0 17 | After more testing, new release. 18 | [![DOI](https://zenodo.org/badge/36319360.svg)](https://zenodo.org/badge/latestdoi/36319360) 19 | ###0.8 20 | New options added: -u -r -s 21 | 22 | -u 23 | 24 | Unicode string detection added in a very naive way, comparing the number of characters vs the number of utf8 characters. 25 | If the string is only one character it does not detect it. 26 | 27 | -r 28 | 29 | print the number of references to that string in the rest of the tables 30 | 31 | -s 32 | 33 | print the size in characters and in utf8_characters. 34 | 35 | Separator changed to |. 36 | -------------------------------------------------------------------------------- /dexstrings.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Hugo Gonzalez 2015. 3 | * dexstrings - Extract information from .dex files 4 | * 5 | * compile: 6 | * gcc -g -o dexstrings dexstrings.c -lm 7 | * some warnings will be showed because the use of pointers, you can probably ignore that. 8 | * 9 | */ 10 | 11 | /* Jun 26, 2015 Check for unicode added, if the length of chars and unicode are 12 | * different, we have unicode in the string. 13 | * */ 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #define VERSION "0.8" 23 | 24 | typedef uint8_t u1; 25 | typedef uint16_t u2; 26 | typedef uint32_t u4; 27 | typedef uint64_t u8; 28 | 29 | typedef struct { 30 | char dex[3]; 31 | char newline[1]; 32 | char ver[3]; 33 | char zero[1]; 34 | } dex_magic; 35 | 36 | typedef struct { 37 | dex_magic magic; 38 | u4 checksum[1]; 39 | unsigned char signature[20]; 40 | u4 file_size[1]; 41 | u4 header_size[1]; 42 | u4 endian_tag[1]; 43 | u4 link_size[1]; 44 | u4 link_off[1]; 45 | u4 map_off[1]; 46 | u4 string_ids_size[1]; 47 | u4 string_ids_off[1]; 48 | u4 type_ids_size[1]; 49 | u4 type_ids_off[1]; 50 | u4 proto_ids_size[1]; 51 | u4 proto_ids_off[1]; 52 | u4 field_ids_size[1]; 53 | u4 field_ids_off[1]; 54 | u4 method_ids_size[1]; 55 | u4 method_ids_off[1]; 56 | u4 class_defs_size[1]; 57 | u4 class_defs_off[1]; 58 | u4 data_size[1]; 59 | u4 data_off[1]; 60 | } dex_header; 61 | 62 | typedef struct { 63 | u4 class_idx[1]; 64 | u4 access_flags[1]; 65 | u4 superclass_idx[1]; 66 | u4 interfaces_off[1]; 67 | u4 source_file_idx[1]; 68 | u4 annotations_off[1]; 69 | u4 class_data_off[1]; 70 | u4 static_values_off[1]; 71 | } class_def_struct; 72 | 73 | typedef struct { 74 | u2 class_idx[1]; 75 | u2 proto_idx[1]; 76 | u4 name_idx[1]; 77 | } method_id_struct; 78 | 79 | typedef struct { 80 | u4 string_data_off[1]; 81 | } string_id_struct; 82 | 83 | typedef struct { 84 | u4 descriptor_idx[1]; 85 | } type_id_struct; 86 | 87 | typedef struct { 88 | u4 shorty_idx[1]; 89 | u4 return_type_idx[1]; 90 | u4 parameters_off[1]; 91 | } proto_id_struct; 92 | 93 | typedef struct { 94 | u2 class_idx[1]; 95 | u2 type_idx[1]; 96 | u4 name_idx[1]; 97 | } field_id_struct; 98 | 99 | 100 | size_t utf8len(char *s) 101 | { 102 | size_t len = 0; 103 | for (; *s; ++s) if ((*s & 0xC0) != 0x80) ++len; 104 | return len; 105 | } 106 | 107 | 108 | void printStrings2(u1 *file, u4 offset, int iSize, int iUnicode) 109 | { 110 | u1 *uValues = file; 111 | char *stringData; 112 | int unicodelen; 113 | 114 | printf("%x | ",offset); 115 | /* Replace the uleb128_value function to put it inline */ 116 | //uLebValue = uleb128_value(uValues+offset); 117 | u1 *ptr = uValues+offset; 118 | int result = *(ptr++); 119 | if (result > 0x7f) { 120 | int cur = *(ptr++); 121 | result = (result & 0x7f) | ((cur & 0x7f) << 7); 122 | if (cur > 0x7f) { 123 | cur = *(ptr++); 124 | result |= (cur & 0x7f) << 14; 125 | if (cur > 0x7f) { 126 | cur = *(ptr++); 127 | result |= (cur & 0x7f) << 21; 128 | if (cur > 0x7f) { 129 | cur = *(ptr++); 130 | result |= cur << 28; 131 | } 132 | } 133 | } 134 | } 135 | stringData = malloc(result * sizeof(u1)+1); 136 | memcpy(stringData, ptr , result); // to print the string even if its unicode 137 | stringData[result]='\0'; 138 | unicodelen = utf8len(stringData); 139 | if (iSize!=0) 140 | printf ("%i | %i | ",result, unicodelen); 141 | if (iUnicode !=0) 142 | if (result != unicodelen) printf ("_U_ |"); else printf(" |"); 143 | printf(".:%s:.\n",stringData); 144 | free(stringData); 145 | 146 | } 147 | 148 | 149 | void help_show_message(char name[]) 150 | { 151 | fprintf(stderr, "Usage: %s [options]\n",name); 152 | fprintf(stderr, " options:\n"); 153 | fprintf(stderr, "\t-t\tprint only the text strings\n"); 154 | fprintf(stderr, "\t-s\tprint the size of strings\n"); 155 | fprintf(stderr, "\t-r\tprint how many references a string have\n"); 156 | fprintf(stderr, "\t-u\tcheck if the string contain unicode\n"); 157 | 158 | } 159 | int main(int argc, char *argv[]) 160 | { 161 | char *dexfile; 162 | FILE *input; 163 | u1 *fileinmemory; 164 | int i,c,j; 165 | 166 | int iFound; 167 | int iOnlyStrings=0; 168 | int iSize=0; 169 | int iRef=0; 170 | int iUnicode=0; 171 | 172 | 173 | dex_header* header; 174 | class_def_struct class_def_item; 175 | 176 | method_id_struct method_id_item; 177 | method_id_struct* method_id_list; 178 | 179 | string_id_struct string_id_item; 180 | string_id_struct* string_id_list; 181 | 182 | type_id_struct type_id_item; 183 | type_id_struct* type_id_list; 184 | 185 | proto_id_struct* proto_id_list; 186 | field_id_struct* field_id_list; 187 | class_def_struct* class_def_list; 188 | 189 | printf ("\n=== dexstrings %s - (c) 2015 Hugo Gonzalez @hugo_glez\n", VERSION); 190 | 191 | if (argc < 2) { 192 | help_show_message(argv[0]); 193 | return 1; 194 | } 195 | 196 | dexfile=argv[1]; 197 | input = fopen(dexfile, "rb"); 198 | if (input == NULL) { 199 | fprintf(stderr, "ERROR: Can't open dex file!\n"); 200 | perror(dexfile); 201 | exit(1); 202 | } 203 | 204 | // Obtain the size of the file 205 | int fd = fileno(input); 206 | struct stat buffs; 207 | fstat(fd,&buffs); 208 | int filesize = buffs.st_size; 209 | 210 | // allocate memory, load all the file in memory 211 | fileinmemory = malloc(filesize*sizeof(u1)); 212 | if (fileinmemory == NULL) { 213 | fprintf(stderr, "ERROR: Can't allocate memory!\n"); 214 | perror("Memory for the file"); 215 | fclose(input); 216 | exit(1); 217 | } 218 | 219 | fread(fileinmemory,1,filesize,input); // file in memory contains the binary 220 | fclose(input); 221 | 222 | while ((c = getopt(argc, argv, "tsru")) != -1) { 223 | switch(c) { 224 | case 't': 225 | iOnlyStrings=2; 226 | break; 227 | case 's': 228 | iSize=2; 229 | break; 230 | case 'r': 231 | iRef=2; 232 | break; 233 | case 'u': 234 | iUnicode=2; 235 | break; 236 | default: 237 | help_show_message(argv[0]); 238 | return 1; 239 | } 240 | } 241 | 242 | /* print dex header information */ 243 | printf ("Dex file: %s\n",dexfile); 244 | 245 | header = (struct dex_header *)fileinmemory; 246 | 247 | if ( (strncmp(header->magic.dex,"dex",3) != 0) || 248 | (strncmp(header->magic.newline,"\n",1) != 0) || 249 | (strncmp(header->magic.zero,"\0",1) != 0 ) ) { 250 | fprintf (stderr, "ERROR: not a dex file\n"); 251 | free(fileinmemory); 252 | exit(1); 253 | } 254 | 255 | if (strncmp(header->magic.ver,"035",3) != 0) { 256 | fprintf (stderr,"Warning: Dex file version != 035\n"); 257 | } 258 | 259 | 260 | if (*header->header_size != 0x70) { 261 | fprintf (stderr,"Warning: Header size != 0x70\n"); 262 | } 263 | 264 | if (*header->endian_tag != 0x12345678) { 265 | fprintf (stderr,"Warning: Endian tag != 0x12345678\n"); 266 | } 267 | 268 | /* strings */ 269 | u2 strptr = sizeof(string_id_struct); 270 | 271 | //u2 strptr = 2; 272 | 273 | printf("======================\n"); 274 | for (i= 0; i < *header->string_ids_size; i++) { 275 | string_id_list = (struct string_id_struct *) (fileinmemory + *header->string_ids_off + strptr * i); 276 | iFound = 0; 277 | if (iOnlyStrings == 0) printf("%d | ", i); 278 | // check if the other guys are using this string. 279 | // types 280 | for (j=0; j < *header->type_ids_size; j++) 281 | { 282 | type_id_list = (struct type_id_struct *)(fileinmemory + *header->type_ids_off + sizeof(type_id_struct) * j); 283 | if (i == *type_id_list->descriptor_idx) { 284 | if (iOnlyStrings == 0) printf("T"); 285 | iFound++; 286 | } 287 | } 288 | // proto 289 | for (j=0; j < *header->proto_ids_size; j++) 290 | { 291 | proto_id_list = (struct proto_id_struct *)(fileinmemory + *header->proto_ids_off + sizeof(proto_id_struct) * j); 292 | if (i == *proto_id_list->shorty_idx) { 293 | if (iOnlyStrings == 0) printf("P"); 294 | iFound++; 295 | } 296 | } 297 | // field 298 | for (j=0; j < *header->field_ids_size; j++) 299 | { 300 | field_id_list = (struct field_id_struct *)(fileinmemory + *header->field_ids_off + sizeof(field_id_struct) * j); 301 | if (i == *field_id_list->name_idx) { 302 | if (iOnlyStrings == 0) printf("F"); 303 | iFound++; 304 | } 305 | } 306 | // method 307 | for (j=0; j < *header->method_ids_size; j++) 308 | { 309 | method_id_list = (struct method_id_struct *)(fileinmemory + *header->method_ids_off + sizeof(method_id_struct) * j); 310 | if (i == *method_id_list->name_idx) { 311 | if (iOnlyStrings == 0) printf("M"); 312 | iFound++; 313 | } 314 | } 315 | // class 316 | for (j=0; j < *header->class_defs_size; j++) 317 | { 318 | class_def_list = (struct class_def_struct *)(fileinmemory + *header->class_defs_off + sizeof(class_def_struct) * j); 319 | if (i == *class_def_list->class_idx) { 320 | if (iOnlyStrings == 0) printf("C"); 321 | iFound++; 322 | } 323 | if (i == *class_def_list->source_file_idx) { 324 | if (iOnlyStrings == 0) printf("J"); 325 | iFound++; 326 | } 327 | } 328 | 329 | if (iOnlyStrings == 0) { 330 | if (iFound == 0) printf( "S" ); 331 | printf(" | "); 332 | if (iRef !=0) printf("%i | ",iFound); 333 | printStrings2(fileinmemory, *string_id_list->string_data_off, iSize, iUnicode); 334 | } 335 | else { 336 | if (iFound == 0) { 337 | printf("%d | ", i); 338 | printStrings2(fileinmemory, *string_id_list->string_data_off, iSize, iUnicode); 339 | } 340 | } 341 | } 342 | 343 | 344 | free(fileinmemory); 345 | 346 | return 0; 347 | } 348 | --------------------------------------------------------------------------------