├── LICENSE
├── README.md
└── xstrings.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <http://unlicense.org>
25 | 
26 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | #xstrings.py
2 | Similar to the famous [GNU binutils](http://www.gnu.org/software/binutils/)' strings command line utility, xstrings is used for emitting printable strings in files. Unlike strings, xstrings is capable of detecting obfuscated strings hidden in files using simple encodings like bitwise XOR, bitwise rotate-right, bitwise shift-left, etc. which are fairly common among malwares.
3 | 
4 | By default, xstrings looks for sequences of all printable characters (i.e. alphanumeric, punctuation and whitespaces) using all available encoding methods. To reduce noise, the user may disable certain encoding methods or narrow down the printable characters set to a smaller subset.
5 | 
6 | xstrings may be empowered by [GNU grep](http://www.gnu.org/software/grep/) to locate certain obfuscated strings or patterns in files.
7 | 
8 | xstrings is inspired by [xorsearch](http://blog.didierstevens.com/programs/xorsearch) by Didier Stevens
9 | 


--------------------------------------------------------------------------------
/xstrings.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | 
  3 | '''
  4 | xstrings - print the strings of encoded printable characters in files.
  5 | inspired by 'xorsearch' by didier stevens, http://blog.didierstevens.com/programs/xorsearch
  6 | 
  7 | @author Eli Cohen-Nehemia, https://github.com/elicn/xstrings
  8 | '''
  9 | 
 10 | import re
 11 | import argparse
 12 | 
 13 | from string import printable
 14 | from sys import stdin
 15 | 
 16 | PROG_NAME = 'xstrings.py'
 17 | PROG_DESC = 'Display printable strings in [file(s)] (stdin by default)'
 18 | 
 19 | def encoding(func):
 20 |     '''
 21 |     Wrapper method that constructs an encoded charmap according to given single-char encoding
 22 |     function.
 23 | 
 24 |     @param  func    Encoding function that accepts a single character and a key value
 25 | 
 26 |     @return A function for constructing a charmap
 27 |     '''
 28 | 
 29 |     def wrapped(charset, key):
 30 |         return ''.join('%c' % func(ord(char), key) for char in charset)
 31 | 
 32 |     return wrapped
 33 | 
 34 | # aggregate a few standard encoding functions along with the range of values (keys) they operate on.
 35 | # each entry in the dictionary consists of a function to encode a single character and its keys range
 36 | #
 37 | # by default the encoders dictionary encloses:
 38 | #   xor - bitwise xor of every character in charmap with the current key; keys range is [1, 255]
 39 | #   ror - bitwise rotate-right every character in charmap by 'key' places; keys range is [1, 7]
 40 | #   shl - bitwise shift-left every character in charmap by 'key' places; keys range is [1, 4]
 41 | #   add - binary add 'key' value to evey character in charmap; keys range is [1, 133]
 42 | 
 43 | encoders = {'XOR' : (encoding(lambda char, key: char ^ key),                                 xrange(1, 256)),
 44 |             'ROR' : (encoding(lambda char, key: (char << (8 - key)) & 0xff | (char >> key)), xrange(1, 8)),
 45 |             'SHL' : (encoding(lambda char, key: (char << key) & 0xff),                       xrange(1, 5)),
 46 | #           'SHR' : (encoding(lambda char, key: (char >> key) & 0xff),                       xrange(1, 8))
 47 |             'ADD' : (encoding(lambda char, key: (char + key) % 0x100),                       xrange(1, 134))}
 48 | 
 49 | # required to support --use-encoding command line option
 50 | class filter_dict(argparse.Action):
 51 |     '''A helper Action class that filters a dictionary according to a list of keys specified by the user
 52 |     and returns a subset dictionary. Option must specify 'default' and assign the dictionary to filter.
 53 | 
 54 |     @sa argparse.Action
 55 |     '''
 56 | 
 57 |     def __call__(self, parser, namespace, values, option_string):
 58 |         unfiltered = self.default
 59 |         filtered = dict((k, unfiltered.get(k)) for k in values if unfiltered.has_key(k))
 60 | 
 61 |         setattr(namespace, self.dest, filtered)
 62 | 
 63 | # required to support --use-encoding command line option
 64 | def comma_list(val):
 65 |     '''A helper function for separating string elements delimited by commas into a list of strings.
 66 |     For exmaple: comma_list('A,B,C,D') results in: ['A', 'B', 'C', 'D']
 67 |     '''
 68 | 
 69 |     return val.split(',')
 70 | 
 71 | # required to support --use-encoding command line option
 72 | class xlist(list):
 73 |     '''A helper class to support multi-choice in argparse module.
 74 |     For a list L and xlist XL, the expression 'L in XL' will return True iff XL contains all elements in L
 75 |     '''
 76 | 
 77 |     def __contains__(self, keys):
 78 |         return all(list.__contains__(self, k) for k in keys)
 79 | 
 80 | def finditer(content, encodings, charset, min_size):
 81 |     '''Generator function that iterates over all string matches inside the given content which are at least
 82 |     min_size characters long.
 83 | 
 84 |     @param    content    Binary content to search in
 85 |     @param    encodings  Dictionary of encoding functions
 86 |     @param    charset    An interable object containing the characters to consider as part of a string
 87 |     @param    min_size   Minimal string size to consider as a string match
 88 | 
 89 |     @return A tuple containing the match offset in content, encoding name, encoding key and the deobfuscated
 90 |             string reconstructed from the blob found
 91 |     '''
 92 | 
 93 |     # iterate over available encoding fucntions
 94 |     for encoding_name, (encoding_function, encoding_range) in encodings.items():
 95 | 
 96 |         # iterate over all keys in range for that encoding function
 97 |         for key in encoding_range:
 98 |             encoded_charset = encoding_function(charset, key)
 99 | 
100 |             pattern = '[%s]{%d,}' % (re.escape(encoded_charset), min_size)
101 | 
102 |             for match in re.finditer(pattern, content):
103 |                 # deobfuscation: reconstruct the original string
104 |                 deobf = ''.join(charset[encoded_charset.index(c)] for c in match.group(0))
105 | 
106 |                 yield (match.start(0), encoding_name, key, deobf)
107 | 
108 |         # cleanup regex cache once in a while
109 |         re.purge()
110 | 
111 | def main(args):
112 |     # prepare the format string for file offsets if required
113 |     if args.radix:
114 |         radixfmt = '%%7%s' % args.radix
115 | 
116 |     # iterate over input files list
117 |     for fd in args.infiles:
118 | 
119 |         # gnu strings emits '{standard input}' instead of 'stdin' if required to emit filename
120 |         # stick with the snu strings style if necessary
121 |         if args.print_file_name:
122 |             filename = '{standard input}' if fd == stdin else fd.name
123 | 
124 |         # iterate over findings in current input file
125 |         # each iteration returns offset, encoding name, encoding key and deobfuscated string found
126 |         for offset, enc_name, enc_key, deobf in finditer(fd.read(), args.encodings, args.charset, args.bytes):
127 |             if args.print_file_name:
128 |                 print '%s:' % filename,
129 | 
130 |             if args.radix:
131 |                 print radixfmt % offset,
132 | 
133 |             print '%s(%x) %s' % (enc_name, enc_key, deobf)
134 | 
135 | if __name__ == '__main__':
136 |     parser = argparse.ArgumentParser(prog = PROG_NAME, description = PROG_DESC)
137 | 
138 |     parser.add_argument('-f', '--print-file-name', action = 'store_true', help = 'Print the name of the file before each string')
139 |     parser.add_argument('-n', '--bytes',     type = int, default = 4, metavar = 'number', help = 'Locate & print any sequence of at least [number] characters')
140 |     parser.add_argument('-t', '--radix',     type = str, choices = ('o', 'd', 'x'), help = 'Print the location of the string in base 8, 10 or 16')
141 |     parser.add_argument('-c', '--charset',   type = str, default = printable, help = 'Replace the default characters set to look for with a custom one')
142 |     parser.add_argument('-e', '--encodings', type = comma_list, action = filter_dict, choices = xlist(encoders), default = encoders, help = 'Encodings list to try out (default: all)')
143 |     parser.add_argument('infiles', type = argparse.FileType('rb'), default = (stdin,), metavar = 'file', nargs = '*')
144 | 
145 |     main(parser.parse_args())
146 | 


--------------------------------------------------------------------------------