├── .gitignore ├── README.md ├── aplib.py ├── aprip.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | ## Custom for this repo 2 | ## Don't commit any output files 3 | dump*.bin 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | env/ 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *,cover 50 | .hypothesis/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # IPython Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # celery beat schedule file 80 | celerybeat-schedule 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | venv/ 87 | ENV/ 88 | 89 | # Spyder project settings 90 | .spyderproject 91 | 92 | # Rope project settings 93 | .ropeproject 94 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # aplib-ripper 2 | Use this library to automatically extract PE files compressed with aplib from a binary blob. 3 | 4 | ## What is aPlib? 5 | Aplib is a lightweight LZ-based [compression library](http://ibsensoftware.com/products_aPLib.html) that is commonly used in packers and shell code. It is easy to spot a PE file that has been compressed using aPlib because the PE magic bytes MZ become **M8Z**. 6 | 7 | The aplib-ripper (**aprip.py**) simply automates the process of locating those magic bytes in a file an attempting to decompress the resulting data into a PE file. 8 | 9 | Let's get automating! 10 | 11 | ![](https://media.giphy.com/media/CGS2MNrIDLpVm/giphy.gif) 12 | 13 | ## Use Cases 14 | Aplib-ripper (**aprip.py**) can be imported as a module and used in your python tooling or it can used as a standalone CLI tool. 15 | 16 | ### aprip module 17 | To use aprip.py as a module you simply need to import it and use the **extract_all** function to automatically extract all aplib compressed PE files from you data blob. 18 | ``` 19 | >>> import aprip 20 | >>> #Extract PE files from data 21 | >>> #pe_files is a list containing all extracted PE files 22 | >>> pe_files = aprip.extract_all(data) 23 | >>> 24 | ``` 25 | 26 | ### aprip CLI 27 | To use aprip.py as a tool from the command line you simply need to pass it the name of the file that you will be extracting the aPlib compressed PE files from. Each extracted file will be written to a file “dump0.bin”, “dump1.bin”, … 28 | 29 | ``` 30 | $aprip test.bin 31 | 32 | ----------------------------- 33 | 34 | APLIB RIPPER 1.1 35 | 36 | ----------------------------- 37 | 38 | Ripping PE files, this may take some time... 39 | - Ripped PE writing to file: dump0.bin 40 | - Ripped PE writing to file: dump1.bin 41 | ``` 42 | 43 | ## Aprip Function Reference 44 | **_find_candidates(blob)_**
45 |     Find potential aplib candidates.
46 |     **Args:**
47 |         blob (string): binary string of the blob to search
48 |     **Returns:**
49 |         list: offsets to each of the candidates (empty if none found) 50 | 51 | **_extract_candidate(blob, offset)_**
52 |     Attempt to decrypt candidate and test DOS header.
53 |     **Args:**
54 |         blob (string): binary string of the blob to search
55 |         offset (int): offset in the blob (candidate start)
56 |     **Returns:**
57 |         string: extracted PE file (none if no PE is extracted)
58 | 59 | **_extract_all(blob)_**
60 |     Locate potential aplib candidates and attempt to decrypt them.
61 |     **Args:**
62 |         blob (string): binary string of the blob to search
63 |     **Returns:**
64 |         list: list of PE files that have been extracted (empty if none are found)
65 | 66 | ## Acknowledgments 67 | A big thank you to the creator of the aplib python module: Kabopan http://code.google.com/p/kabopan/ 68 | 69 | ## Feedback / Help 70 | * Any questions, comments, requests hit me up on twitter: @herrcore 71 | * Pull requests welcome! 72 | -------------------------------------------------------------------------------- /aplib.py: -------------------------------------------------------------------------------- 1 | # this is a standalone single-file merge of aplib compression and decompression 2 | # taken from my own library Kabopan http://code.google.com/p/kabopan/ 3 | # (no other clean-up or improvement) 4 | 5 | # Ange Albertini, BSD Licence, 2007-2011 6 | 7 | # from kbp\comp\_lz77.py ################################################## 8 | def find_longest_match(s, sub): 9 | """returns the number of byte to look backward and the length of byte to copy)""" 10 | if sub == "": 11 | return 0, 0 12 | limit = len(s) 13 | dic = s[:] 14 | l = 0 15 | offset = 0 16 | length = 0 17 | first = 0 18 | word = "" 19 | 20 | word += sub[l] 21 | pos = dic.rfind(word, 0, limit + 1) 22 | if pos == -1: 23 | return offset, length 24 | 25 | offset = limit - pos 26 | length = len(word) 27 | dic += sub[l] 28 | 29 | while l < len(sub) - 1: 30 | l += 1 31 | word += sub[l] 32 | 33 | pos = dic.rfind(word, 0, limit + 1) 34 | if pos == -1: 35 | return offset, length 36 | offset = limit - pos 37 | length = len(word) 38 | dic += sub[l] 39 | return offset, length 40 | 41 | # from _misc.py ############################### 42 | 43 | def int2lebin(value, size): 44 | """ouputs value in binary, as little-endian""" 45 | result = "" 46 | for i in xrange(size): 47 | result = result + chr((value >> (8 * i)) & 0xFF ) 48 | return result 49 | 50 | def modifystring(s, sub, offset): 51 | """overwrites 'sub' at 'offset' of 's'""" 52 | return s[:offset] + sub + s[offset + len(sub):] 53 | 54 | def getbinlen(value): 55 | """return the bit length of an integer""" 56 | result = 0 57 | if value == 0: 58 | return 1 59 | while value != 0: 60 | value >>= 1 61 | result += 1 62 | return result 63 | 64 | # from kbp\_bits.py ################################# 65 | class _bits_compress(): 66 | """bit machine for variable-sized auto-reloading tag compression""" 67 | def __init__(self, tagsize): 68 | """tagsize is the number of bytes that takes the tag""" 69 | self.out = "" 70 | 71 | self.__tagsize = tagsize 72 | self.__tag = 0 73 | self.__tagoffset = -1 74 | self.__maxbit = (self.__tagsize * 8) - 1 75 | self.__curbit = 0 76 | self.__isfirsttag = True 77 | 78 | 79 | def getdata(self): 80 | """builds an output string of what's currently compressed: 81 | currently output bit + current tag content""" 82 | tagstr = int2lebin(self.__tag, self.__tagsize) 83 | return modifystring(self.out, tagstr, self.__tagoffset) 84 | 85 | def write_bit(self, value): 86 | """writes a bit, make space for the tag if necessary""" 87 | if self.__curbit != 0: 88 | self.__curbit -= 1 89 | else: 90 | if self.__isfirsttag: 91 | self.__isfirsttag = False 92 | else: 93 | self.out = self.getdata() 94 | self.__tagoffset = len(self.out) 95 | self.out += "".join(["\x00"] * self.__tagsize) 96 | self.__curbit = self.__maxbit 97 | self.__tag = 0 98 | 99 | if value: 100 | self.__tag |= (1 << self.__curbit) 101 | return 102 | 103 | def write_bitstring(self, s): 104 | """write a string of bits""" 105 | for c in s: 106 | self.write_bit(0 if c == "0" else 1) 107 | return 108 | 109 | def write_byte(self, b): 110 | """writes a char or a number""" 111 | assert len(b) == 1 if isinstance(b, str) else 0 <= b <= 255 112 | self.out += b[0:1] if isinstance(b, str) else chr(b) 113 | return 114 | 115 | def write_fixednumber(self, value, nbbit): 116 | """write a value on a fixed range of bits""" 117 | for i in xrange(nbbit - 1, -1, -1): 118 | self.write_bit( (value >> i) & 1) 119 | return 120 | 121 | def write_variablenumber(self, value): 122 | assert value >= 2 123 | 124 | length = getbinlen(value) - 2 # the highest bit is 1 125 | self.write_bit(value & (1 << length)) 126 | for i in xrange(length - 1, -1, -1): 127 | self.write_bit(1) 128 | self.write_bit(value & (1 << i)) 129 | self.write_bit(0) 130 | return 131 | 132 | class _bits_decompress(): 133 | """bit machine for variable-sized auto-reloading tag decompression""" 134 | def __init__(self, data, tagsize): 135 | self.__curbit = 0 136 | self.__offset = 0 137 | self.__tag = None 138 | self.__tagsize = tagsize 139 | self.__in = data 140 | self.out = "" 141 | 142 | def getoffset(self): 143 | """return the current byte offset""" 144 | return self.__offset 145 | 146 | # def getdata(self): 147 | # return self.__lzdata 148 | 149 | def read_bit(self): 150 | """read next bit from the stream, reloads the tag if necessary""" 151 | if self.__curbit != 0: 152 | self.__curbit -= 1 153 | else: 154 | self.__curbit = (self.__tagsize * 8) - 1 155 | self.__tag = ord(self.read_byte()) 156 | for i in xrange(self.__tagsize - 1): 157 | self.__tag += ord(self.read_byte()) << (8 * (i + 1)) 158 | 159 | bit = (self.__tag >> ((self.__tagsize * 8) - 1)) & 0x01 160 | self.__tag <<= 1 161 | return bit 162 | 163 | def is_end(self): 164 | return self.__offset == len(self.__in) and self.__curbit == 1 165 | 166 | def read_byte(self): 167 | """read next byte from the stream""" 168 | if type(self.__in) == str: 169 | result = self.__in[self.__offset] 170 | elif type(self.__in) == file: 171 | result = self.__in.read(1) 172 | self.__offset += 1 173 | return result 174 | 175 | def read_fixednumber(self, nbbit, init=0): 176 | """reads a fixed bit-length number""" 177 | result = init 178 | for i in xrange(nbbit): 179 | result = (result << 1) + self.read_bit() 180 | return result 181 | 182 | def read_variablenumber(self): 183 | """return a variable bit-length number x, x >= 2 184 | 185 | reads a bit until the next bit in the pair is not set""" 186 | result = 1 187 | result = (result << 1) + self.read_bit() 188 | while self.read_bit(): 189 | result = (result << 1) + self.read_bit() 190 | return result 191 | 192 | def read_setbits(self, max_, set_=1): 193 | """read bits as long as their set or a maximum is reached""" 194 | result = 0 195 | while result < max_ and self.read_bit() == set_: 196 | result += 1 197 | return result 198 | 199 | def back_copy(self, offset, length=1): 200 | for i in xrange(length): 201 | self.out += self.out[-offset] 202 | return 203 | 204 | def read_literal(self, value=None): 205 | if value is None: 206 | self.out += self.read_byte() 207 | else: 208 | self.out += value 209 | return False 210 | 211 | # from kbp\comp\aplib.py ################################################### 212 | """ 213 | aPLib, LZSS based lossless compression algorithm 214 | 215 | Jorgen Ibsen U{http://www.ibsensoftware.com} 216 | """ 217 | 218 | def lengthdelta(offset): 219 | if offset < 0x80 or 0x7D00 <= offset: 220 | return 2 221 | elif 0x500 <= offset: 222 | return 1 223 | return 0 224 | 225 | class compress(_bits_compress): 226 | """ 227 | aplib compression is based on lz77 228 | """ 229 | def __init__(self, data, length=None): 230 | _bits_compress.__init__(self, 1) 231 | self.__in = data 232 | self.__length = length if length is not None else len(data) 233 | self.__offset = 0 234 | self.__lastoffset = 0 235 | self.__pair = True 236 | return 237 | 238 | def __literal(self, marker=True): 239 | if marker: 240 | self.write_bit(0) 241 | self.write_byte(self.__in[self.__offset]) 242 | self.__offset += 1 243 | self.__pair = True 244 | return 245 | 246 | def __block(self, offset, length): 247 | assert offset >= 2 248 | self.write_bitstring("10") 249 | 250 | # if the last operations were literal or single byte 251 | # and the offset is unchanged since the last block copy 252 | # we can just store a 'null' offset and the length 253 | if self.__pair and self.__lastoffset == offset: 254 | self.write_variablenumber(2) # 2- 255 | self.write_variablenumber(length) 256 | else: 257 | high = (offset >> 8) + 2 258 | if self.__pair: 259 | high += 1 260 | self.write_variablenumber(high) 261 | low = offset & 0xFF 262 | self.write_byte(low) 263 | self.write_variablenumber(length - lengthdelta(offset)) 264 | self.__offset += length 265 | self.__lastoffset = offset 266 | self.__pair = False 267 | return 268 | 269 | def __shortblock(self, offset, length): 270 | assert 2 <= length <= 3 271 | assert 0 < offset <= 127 272 | self.write_bitstring("110") 273 | b = (offset << 1 ) + (length - 2) 274 | self.write_byte(b) 275 | self.__offset += length 276 | self.__lastoffset = offset 277 | self.__pair = False 278 | return 279 | 280 | def __singlebyte(self, offset): 281 | assert 0 <= offset < 16 282 | self.write_bitstring("111") 283 | self.write_fixednumber(offset, 4) 284 | self.__offset += 1 285 | self.__pair = True 286 | return 287 | 288 | def __end(self): 289 | self.write_bitstring("110") 290 | self.write_byte(chr(0)) 291 | return 292 | 293 | def do(self): 294 | self.__literal(False) 295 | while self.__offset < self.__length: 296 | offset, length = find_longest_match(self.__in[:self.__offset], 297 | self.__in[self.__offset:]) 298 | if length == 0: 299 | c = self.__in[self.__offset] 300 | if c == "\x00": 301 | self.__singlebyte(0) 302 | else: 303 | self.__literal() 304 | elif length == 1 and 0 <= offset < 16: 305 | self.__singlebyte(offset) 306 | elif 2 <= length <= 3 and 0 < offset <= 127: 307 | self.__shortblock(offset, length) 308 | elif 3 <= length and 2 <= offset: 309 | self.__block(offset, length) 310 | else: 311 | self.__literal() 312 | #raise ValueError("no parsing found", offset, length) 313 | self.__end() 314 | return self.getdata() 315 | 316 | 317 | class decompress(_bits_decompress): 318 | def __init__(self, data): 319 | _bits_decompress.__init__(self, data, tagsize=1) 320 | self.__pair = True # paired sequence 321 | self.__lastoffset = 0 322 | self.__functions = [ 323 | self.__literal, 324 | self.__block, 325 | self.__shortblock, 326 | self.__singlebyte] 327 | return 328 | 329 | def __literal(self): 330 | self.read_literal() 331 | self.__pair = True 332 | return False 333 | 334 | def __block(self): 335 | b = self.read_variablenumber() # 2- 336 | if b == 2 and self.__pair : # reuse the same offset 337 | offset = self.__lastoffset 338 | length = self.read_variablenumber() # 2- 339 | else: 340 | high = b - 2 # 0- 341 | if self.__pair: 342 | high -= 1 343 | offset = (high << 8) + ord(self.read_byte()) 344 | length = self.read_variablenumber() # 2- 345 | length += lengthdelta(offset) 346 | self.__lastoffset = offset 347 | self.back_copy(offset, length) 348 | self.__pair = False 349 | return False 350 | 351 | def __shortblock(self): 352 | b = ord(self.read_byte()) 353 | if b <= 1: # likely 0 354 | return True 355 | length = 2 + (b & 0x01) # 2-3 356 | offset = b >> 1 # 1-127 357 | self.back_copy(offset, length) 358 | self.__lastoffset = offset 359 | self.__pair = False 360 | return False 361 | 362 | def __singlebyte(self): 363 | offset = self.read_fixednumber(4) # 0-15 364 | if offset: 365 | self.back_copy(offset) 366 | else: 367 | self.read_literal('\x00') 368 | self.__pair = True 369 | return False 370 | 371 | def do(self): 372 | """returns decompressed buffer and consumed bytes counter""" 373 | self.read_literal() 374 | while True: 375 | # Allow for partial decryption with exception 376 | try: 377 | if self.__functions[self.read_setbits(3)](): 378 | break 379 | except Exception as e: 380 | break 381 | return self.out, self.getoffset() 382 | 383 | if __name__ == "__main__": 384 | # from kbp\test\aplib_test.py ###################################################################### 385 | assert decompress(compress("a").do()).do() == ("a", 3) 386 | assert decompress(compress("ababababababab").do()).do() == ('ababababababab', 9) 387 | assert decompress(compress("aaaaaaaaaaaaaacaaaaaa").do()).do() == ('aaaaaaaaaaaaaacaaaaaa', 11) 388 | -------------------------------------------------------------------------------- /aprip.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | ############################################################################# 3 | # 4 | # _ _ _ _____ _ 5 | # /\ | |(_)| | | __ \ (_) 6 | # / \ _ __ | | _ | |__ | |__) | _ _ __ _ __ ___ _ __ 7 | # / /\ \ | '_ \ | || || '_ \ | _ / | || '_ \ | '_ \ / _ \| '__| 8 | # -/ ____ \ | |_) || || || |_) | -| | \ \ | || |_) || |_) || __/| | 9 | # --/_/ \_\| .__/ |_||_||_.__/ --|_| \_\|_|| .__/ | .__/ \___||_| 10 | # -| | -| | -| | 11 | # --|_| --|_| --|_| 12 | # 13 | # [ We eat aplib compressed binaries for breakfast! ] 14 | # 15 | # Use this library to automatically extract PE files compressed with aplib 16 | # from a binary blob. This is especially fun to run on memory dumps from 17 | # your sandbox... 18 | # 19 | ############################################################################# 20 | 21 | __author__ = "@herrcore" 22 | __version__ = "1.2" 23 | 24 | import aplib 25 | import re 26 | import argparse 27 | import pefile 28 | import os 29 | import sys 30 | 31 | aplib_magic = (r"M8Z") 32 | 33 | def find_candidates(blob): 34 | """Find potential aplib candidates. 35 | 36 | Args: 37 | blob (string): binary string of the blob to search 38 | 39 | Returns: 40 | list: offsets to each of the candidates (empty if none found) 41 | """ 42 | out = [] 43 | ire = re.finditer(aplib_magic, blob) 44 | for match in ire: 45 | out.append(match.start()) 46 | return out 47 | 48 | 49 | def extract_candidate(blob, offset): 50 | """Attempt to decrypt candidate and test DOS header 51 | 52 | Args: 53 | blob (string): binary string of the blob to search 54 | offset (int): offset in the blob (candidate start) 55 | 56 | Returns: 57 | string: extracted PE file (none if no PE is extracted) 58 | """ 59 | try: 60 | candidate = blob[offset:] 61 | ptext = aplib.decompress(candidate).do()[0] 62 | # If this is a valid PE file find the length and trim it 63 | # If it's not valid pefile will throw and error and we will 64 | # return None 65 | pe = pefile.PE(data=ptext) 66 | # Remove overlay 67 | return pe.trim() 68 | except Exception as e: 69 | return None 70 | 71 | 72 | def extract_all(blob): 73 | """Locate potential aplib candidates and attempt to decrypt them 74 | 75 | Args: 76 | blob (string): binary string of the blob to search 77 | 78 | Returns: 79 | list: list of PE files that have been extracted (empty if none are found) 80 | """ 81 | 82 | # Locate all potential candidates 83 | candidates = find_candidates(blob) 84 | 85 | # Extract valid candidates 86 | out = [] 87 | for ptr in candidates: 88 | ptext = extract_candidate(blob, ptr) 89 | if ptext != None: 90 | out.append(ptext) 91 | return out 92 | 93 | 94 | ############################################################################# 95 | # 96 | # Below here is just fancy stuff for the CLI 97 | # 98 | ############################################################################# 99 | 100 | 101 | def color(text, color_code): 102 | """Format text for color code 103 | """ 104 | if sys.platform == "win32" and os.getenv("TERM") != "xterm": 105 | return text 106 | return '\x1b[%dm%s\x1b[0m' % (color_code, text) 107 | 108 | 109 | def red(text): 110 | """Format text as red 111 | """ 112 | return color(text, 31) 113 | 114 | 115 | def green(text): 116 | """Format text as green 117 | """ 118 | return color(text, 32) 119 | 120 | 121 | def banner(): 122 | """Print a pretty banner for CLI use. 123 | """ 124 | os.system('cls' if os.name == 'nt' else 'clear') 125 | print 126 | print '-----------------------------' 127 | print 128 | print ' APLIB RIPPER %s' % __version__ 129 | print 130 | print '-----------------------------' 131 | print 132 | 133 | 134 | def main(): 135 | parser = argparse.ArgumentParser(description="Find and extract aplib packed PE files. Output: dump1.bin, dump2.bin, ...") 136 | parser.add_argument("infile", help="File containing the binary blob to serach for aplib compressed binaries.") 137 | args = parser.parse_args() 138 | 139 | # Read data blob from file 140 | with open(args.infile, "rb") as fp: 141 | data = fp.read() 142 | 143 | # Some UI candy 144 | banner() 145 | print "Ripping PE files, this may take some time..." 146 | 147 | # Extract all aplib compressed PE files 148 | pe_files = extract_all(data) 149 | 150 | # Write extracted PE files to dump1.bin, dump2.bin etc. 151 | flag_fail = True 152 | for ptr in range(0,len(pe_files)): 153 | flag_fail = False 154 | outfile = "dump%d.bin" % ptr 155 | print green(" - Ripped PE writing to file: %s" % outfile) 156 | with open(outfile, "wb") as fp: 157 | fp.write(pe_files[ptr]) 158 | 159 | if flag_fail: 160 | print red(" - No PE files found!") 161 | 162 | 163 | if __name__ == '__main__': 164 | main() 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | import os 3 | 4 | 5 | setup( 6 | name='aprip', 7 | version=1.2, 8 | url='https://github.com/herrcore/aplib-ripper', 9 | author="@herrcore", 10 | description="Automatically extract PE files compressed with aplib from a binary blob", 11 | install_requires=['pefile'], 12 | py_modules=['aprip', 'aplib'], 13 | entry_points={'console_scripts': ['aprip=aprip:main']} 14 | ) 15 | 16 | --------------------------------------------------------------------------------